Package org.apache.nutch.parse
Class ParseOutputFormat
- java.lang.Object
-
- org.apache.hadoop.mapreduce.OutputFormat<Text,Parse>
-
- org.apache.nutch.parse.ParseOutputFormat
-
public class ParseOutputFormat extends OutputFormat<Text,Parse>
-
-
Constructor Summary
Constructors Constructor Description ParseOutputFormat()
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description voidcheckOutputSpecs(JobContext context)static StringfilterNormalize(String fromUrl, String toUrl, String fromHost, boolean ignoreInternalLinks, boolean ignoreExternalLinks, String ignoreExternalLinksMode, URLFilters filters, URLExemptionFilters exemptionFilters, URLNormalizers normalizers)static StringfilterNormalize(String fromUrl, String toUrl, String origin, boolean ignoreInternalLinks, boolean ignoreExternalLinks, String ignoreExternalLinksMode, URLFilters filters, URLExemptionFilters exemptionFilters, URLNormalizers normalizers, String urlNormalizerScope)OutputCommittergetOutputCommitter(TaskAttemptContext context)RecordWriter<Text,Parse>getRecordWriter(TaskAttemptContext context)StringgetUniqueFile(TaskAttemptContext context, String name)
-
-
-
Method Detail
-
getOutputCommitter
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException
- Specified by:
getOutputCommitterin classOutputFormat<Text,Parse>- Throws:
IOException
-
checkOutputSpecs
public void checkOutputSpecs(JobContext context) throws IOException
- Specified by:
checkOutputSpecsin classOutputFormat<Text,Parse>- Throws:
IOException
-
getUniqueFile
public String getUniqueFile(TaskAttemptContext context, String name)
-
getRecordWriter
public RecordWriter<Text,Parse> getRecordWriter(TaskAttemptContext context) throws IOException
- Specified by:
getRecordWriterin classOutputFormat<Text,Parse>- Throws:
IOException
-
filterNormalize
public static String filterNormalize(String fromUrl, String toUrl, String fromHost, boolean ignoreInternalLinks, boolean ignoreExternalLinks, String ignoreExternalLinksMode, URLFilters filters, URLExemptionFilters exemptionFilters, URLNormalizers normalizers)
-
filterNormalize
public static String filterNormalize(String fromUrl, String toUrl, String origin, boolean ignoreInternalLinks, boolean ignoreExternalLinks, String ignoreExternalLinksMode, URLFilters filters, URLExemptionFilters exemptionFilters, URLNormalizers normalizers, String urlNormalizerScope)
-
-