public abstract class AbstractDataset extends java.lang.Object implements Dataset
| Modifier and Type | Class and Description |
|---|---|
protected static class |
AbstractDataset.SplitFilter |
Dataset.Encoding| Modifier and Type | Field and Description |
|---|---|
protected boolean |
addDeterminer |
protected boolean |
addRoot |
protected java.util.Set<java.lang.String> |
configuredOptions |
protected TreeVisitor |
customTreeVisitor |
protected Dataset.Encoding |
encoding |
protected java.util.regex.Pattern |
fileNameNormalizer |
protected java.lang.String |
flatFileName |
protected java.lang.String |
lexMapOptions |
protected Mapper |
lexMapper |
protected boolean |
makeFlatFile |
protected int |
maxLen |
protected java.lang.String |
morphDelim |
protected java.util.Properties |
options
Provides access for sub-classes to the data set parameters
|
protected java.lang.String |
outFileName |
protected java.util.List<java.lang.String> |
outputFileList |
protected java.util.List<java.io.File> |
pathsToData |
protected java.util.List<java.io.File> |
pathsToMappings |
protected java.lang.String |
posMapOptions |
protected Mapper |
posMapper |
protected boolean |
removeDashTags |
protected boolean |
removeEscapeTokens |
protected java.util.Set<java.lang.String> |
requiredOptions |
protected java.io.FileFilter |
splitFilter |
protected java.lang.StringBuilder |
toStringBuffer |
protected Treebank |
treebank |
protected java.lang.String |
treeFileExtension |
| Constructor and Description |
|---|
AbstractDataset() |
| Modifier and Type | Method and Description |
|---|---|
abstract void |
build()
Generic method for loading, processing, and writing a dataset.
|
protected java.util.Set<java.lang.String> |
buildSplitMap(java.lang.String path) |
java.util.List<java.lang.String> |
getFilenames()
Returns the filenames written by
Dataset.build(). |
boolean |
setOptions(java.util.Properties opts)
Sets options for a dataset.
|
java.lang.String |
toString() |
protected final java.util.List<java.lang.String> outputFileList
protected Mapper posMapper
protected java.lang.String posMapOptions
protected Mapper lexMapper
protected java.lang.String lexMapOptions
protected Dataset.Encoding encoding
protected final java.util.List<java.io.File> pathsToData
protected final java.util.List<java.io.File> pathsToMappings
protected java.io.FileFilter splitFilter
protected boolean addDeterminer
protected boolean removeDashTags
protected boolean addRoot
protected boolean removeEscapeTokens
protected int maxLen
protected java.lang.String morphDelim
protected TreeVisitor customTreeVisitor
protected java.lang.String outFileName
protected java.lang.String flatFileName
protected boolean makeFlatFile
protected final java.util.regex.Pattern fileNameNormalizer
protected Treebank treebank
protected final java.util.Set<java.lang.String> configuredOptions
protected final java.util.Set<java.lang.String> requiredOptions
protected final java.lang.StringBuilder toStringBuffer
protected java.lang.String treeFileExtension
protected java.util.Properties options
public abstract void build()
Datasetpublic boolean setOptions(java.util.Properties opts)
DatasetsetOptions in interface Datasetopts - A map from parameter types defined in ConfigParser to
valuesprotected java.util.Set<java.lang.String> buildSplitMap(java.lang.String path)
public java.util.List<java.lang.String> getFilenames()
DatasetDataset.build().getFilenames in interface Datasetpublic java.lang.String toString()
toString in class java.lang.Object