FreeLing
3.1
|
00001 00003 // 00004 // FreeLing - Open Source Language Analyzers 00005 // 00006 // Copyright (C) 2004 TALP Research Center 00007 // Universitat Politecnica de Catalunya 00008 // 00009 // This library is free software; you can redistribute it and/or 00010 // modify it under the terms of the GNU General Public 00011 // License as published by the Free Software Foundation; either 00012 // version 3 of the License, or (at your option) any later version. 00013 // 00014 // This library is distributed in the hope that it will be useful, 00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 // General Public License for more details. 00018 // 00019 // You should have received a copy of the GNU General Public 00020 // License along with this library; if not, write to the Free Software 00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00022 // 00023 // contact: Lluis Padro (padro@lsi.upc.es) 00024 // TALP Research Center 00025 // despatx C6.212 - Campus Nord UPC 00026 // 08034 Barcelona. SPAIN 00027 // 00029 00030 #ifndef _PROBABILITIES 00031 #define _PROBABILITIES 00032 00033 #include <map> 00034 00035 #include "freeling/windll.h" 00036 #include "freeling/morfo/language.h" 00037 #include "freeling/morfo/processor.h" 00038 #include "freeling/morfo/tagset.h" 00039 00040 namespace freeling { 00041 00042 const std::wstring RE_FZ=L"^[FZ]"; 00043 00048 00049 class WINDLL probabilities : public processor { 00050 private: 00052 freeling::regexp RE_PunctNum; 00053 00055 double ProbabilityThreshold; 00057 const tagset *Tags; 00058 00061 double BiassSuffixes; 00062 00064 double LidstoneLambda; 00065 00067 bool activate_guesser; 00068 00070 std::map<std::wstring,double> single_tags; 00072 std::map<std::wstring,std::map<std::wstring,double> > class_tags; 00074 std::map<std::wstring,std::map<std::wstring,double> > lexical_tags; 00076 std::map<std::wstring,double> unk_tags; 00078 std::map<std::wstring,std::map<std::wstring,double> > unk_suffs; 00080 double theeta; 00082 std::wstring::size_type long_suff; 00083 00085 void smoothing(word &) const; 00087 double compute_probability(const std::wstring &, double, const std::wstring &) const; 00089 double guesser(word &, double) const; 00090 00091 public: 00093 probabilities(const std::wstring &, double); 00095 ~probabilities(); 00096 00098 void annotate_word(word &) const; 00099 00101 void set_activate_guesser(bool); 00102 00104 void analyze(sentence &) const; 00105 00107 using processor::analyze; 00108 }; 00109 00110 } // namespace 00111 00112 #endif