FreeLing
3.1
|
Class "idioma" implements a visible Markov's model that calculates the probability that a text is in a certain language. More...
#include <idioma.h>
Public Member Functions | |
idioma () | |
null constructor | |
idioma (const std::wstring &) | |
Constructor, given the model file to load. | |
double | sequence_probability (std::wistream &, size_t &) const |
Calculates the probability that the text is in the instance language. | |
double | compute_probability (const std::wstring &, double s=1.0) const |
Compute normalized language probability for given string. | |
void | train (const std::wstring &, const std::wstring &, const std::wstring &) |
Create a new model for the language from given input file, Store model in given filename, with given language code. | |
void | train (std::wistream &f, const std::wstring &, const std::wstring &) |
std::wstring | get_language_code () const |
get iso code for current language | |
Private Member Functions | |
std::wstring | from_writable (const std::wstring &) const |
convert a trigram from writable represntation in the model file | |
std::wstring | to_writable (const std::wstring &) const |
convert a trigram to a writable represntation for the model file | |
double | ProbA (const std::wstring &, wchar_t) const |
Consult method for transition probabilities. | |
double | ProbPi (const std::wstring &) const |
Consult method for initial probabilities. | |
void | increment (std::map< std::wstring, double > &, const std::wstring &, double n=1.0) |
Increase occurrences of a n-gram. | |
void | increment (std::map< std::pair< std::wstring, std::wstring >, double > &, const std::wstring &, const std::wstring &, double n=1.0) |
Increase occurrences of a two chained trigrams. | |
void | initial_trigram (std::wistream &, wchar_t &, wchar_t &, wchar_t &) const |
Initial trigram: two fictitious ' ' plus the first actual letter. | |
std::wstring | trigram (wchar_t, wchar_t, wchar_t) const |
build actual trigram from iterators | |
void | create_model (std::wistream &f) |
Create new model from given stream, with given language code. | |
void | save_model (const std::wstring &) const |
Save current model in given file. | |
Private Attributes | |
std::wstring | LangCode |
std::map< std::wstring, double > | pa_nom |
State transitions probabilities. | |
std::map< std::wstring, double > | ppi_nom |
Initial probabilities. | |
std::map< std::wstring, double > | pi |
auxiliary for training | |
std::map< std::wstring, double > | A |
std::map< std::pair < std::wstring, std::wstring > , double > | tB |
auxiliary for training | |
std::map< std::pair < std::wstring, std::wstring > , double > | bB |
std::map< std::pair < std::wstring, std::wstring > , double > | uB |
double | scale |
scale factor to apply to resulting probability (useful to equalize models among languages) |
Class "idioma" implements a visible Markov's model that calculates the probability that a text is in a certain language.
null constructor
freeling::idioma::idioma | ( | const std::wstring & | ) |
Constructor, given the model file to load.
double freeling::idioma::compute_probability | ( | const std::wstring & | , |
double | s = 1.0 |
||
) | const |
Compute normalized language probability for given string.
void freeling::idioma::create_model | ( | std::wistream & | f | ) | [private] |
Create new model from given stream, with given language code.
std::wstring freeling::idioma::from_writable | ( | const std::wstring & | ) | const [private] |
convert a trigram from writable represntation in the model file
std::wstring freeling::idioma::get_language_code | ( | ) | const |
get iso code for current language
void freeling::idioma::increment | ( | std::map< std::wstring, double > & | , |
const std::wstring & | , | ||
double | n = 1.0 |
||
) | [private] |
Increase occurrences of a n-gram.
void freeling::idioma::increment | ( | std::map< std::pair< std::wstring, std::wstring >, double > & | , |
const std::wstring & | , | ||
const std::wstring & | , | ||
double | n = 1.0 |
||
) | [private] |
Increase occurrences of a two chained trigrams.
void freeling::idioma::initial_trigram | ( | std::wistream & | , |
wchar_t & | , | ||
wchar_t & | , | ||
wchar_t & | |||
) | const [private] |
Initial trigram: two fictitious '
' plus the first actual letter.
double freeling::idioma::ProbA | ( | const std::wstring & | , |
wchar_t | |||
) | const [private] |
Consult method for transition probabilities.
double freeling::idioma::ProbPi | ( | const std::wstring & | ) | const [private] |
Consult method for initial probabilities.
void freeling::idioma::save_model | ( | const std::wstring & | ) | const [private] |
Save current model in given file.
double freeling::idioma::sequence_probability | ( | std::wistream & | , |
size_t & | |||
) | const |
Calculates the probability that the text is in the instance language.
std::wstring freeling::idioma::to_writable | ( | const std::wstring & | ) | const [private] |
convert a trigram to a writable represntation for the model file
void freeling::idioma::train | ( | const std::wstring & | , |
const std::wstring & | , | ||
const std::wstring & | |||
) |
Create a new model for the language from given input file, Store model in given filename, with given language code.
void freeling::idioma::train | ( | std::wistream & | f, |
const std::wstring & | , | ||
const std::wstring & | |||
) |
std::wstring freeling::idioma::trigram | ( | wchar_t | , |
wchar_t | , | ||
wchar_t | |||
) | const [private] |
build actual trigram from iterators
std::map<std::wstring,double> freeling::idioma::A [private] |
std::map<std::pair<std::wstring,std::wstring>,double> freeling::idioma::bB [private] |
std::wstring freeling::idioma::LangCode [private] |
std::map<std::wstring,double> freeling::idioma::pa_nom [private] |
State transitions probabilities.
std::map<std::wstring,double> freeling::idioma::pi [private] |
auxiliary for training
std::map<std::wstring,double> freeling::idioma::ppi_nom [private] |
Initial probabilities.
double freeling::idioma::scale [private] |
scale factor to apply to resulting probability (useful to equalize models among languages)
std::map<std::pair<std::wstring,std::wstring>,double> freeling::idioma::tB [private] |
auxiliary for training
std::map<std::pair<std::wstring,std::wstring>,double> freeling::idioma::uB [private] |