/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.ie;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.AnnotationPipeline;
import edu.stanford.nlp.pipeline.NERCombinerAnnotator;
import edu.stanford.nlp.pipeline.TokenizerAnnotator;
import edu.stanford.nlp.pipeline.WordsToSentencesAnnotator;
import edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import junit.framework.TestCase;

public class NERBenchmarkSlowITest
extends TestCase {
    private static Redwood.RedwoodChannels log = Redwood.channels(NERBenchmarkSlowITest.class);
    private static final String CONLL_BASE_DIR = "/u/nlp/data/ner/conll/";
    private static final String CONLL_TRAIN = "/u/nlp/data/ner/conll/eng.train";
    private static final String CONLL_DEV = "/u/nlp/data/ner/conll/eng.testa";
    private static final String CONLL_TEST = "/u/nlp/data/ner/conll/eng.testb";
    private static final String CONLL_OUTPUT_TRAIN = "conll_output_train.txt";
    private static final String CONLL_OUTPUT_DEV = "conll_output_dev.txt";
    private static final String CONLL_OUTPUT_TEST = "conll_output_test.txt";
    private static final String ONTO_BASE_DIR = "/u/nlp/data/ner/ontonotes";
    private static final String ONTO_DEV = "/u/nlp/data/ner/ontonotesonto-3class-dev.tsv";
    private static final String ONTO_TEST = "/u/nlp/data/ner/ontonotesonto-3class-test.tsv";
    private static final String ONTO_OUTPUT_DEV = "onto_output_dev.txt";
    private static final String ONTO_OUTPUT_TEST = "onto_output_test.txt";
    private static final String CONLL_EVAL = "../../scripts/ner/eval_conll_cmd.sh";
    private static final Pattern FB1_Pattern = Pattern.compile("FB1:  (\\d+\\.\\d+)");
    private static NERCombinerAnnotator conllNERAnnotator = null;
    private static AnnotationPipeline conllNERAnnotationPipeline = null;
    private static NERCombinerAnnotator ontoNERAnnotator = null;
    private static AnnotationPipeline ontoNERAnnotationPipeline = null;
    private static final Double CONLL03_DEV_TOTAL_F1 = 93.2;
    private static final Double CONLL03_DEV_LOC_F1 = 95.38;
    private static final Double CONLL03_DEV_MISC_F1 = 88.96;
    private static final Double CONLL03_DEV_ORG_F1 = 88.17;
    private static final Double CONLL03_DEV_PER_F1 = 96.76;
    private static final Double CONLL03_TEST_TOTAL_F1 = 88.8;
    private static final Double CONLL03_TEST_LOC_F1 = 89.84;
    private static final Double CONLL03_TEST_MISC_F1 = 79.94;
    private static final Double CONLL03_TEST_ORG_F1 = 84.69;
    private static final Double CONLL03_TEST_PER_F1 = 94.83;
    private static final Double ONTO_DEV_TOTAL_F1 = 89.93;
    private static final Double ONTO_DEV_LOC_F1 = 90.53;
    private static final Double ONTO_DEV_ORG_F1 = 85.12;
    private static final Double ONTO_DEV_PER_F1 = 93.31;
    private static final Double ONTO_TEST_TOTAL_F1 = 90.79;
    private static final Double ONTO_TEST_LOC_F1 = 91.17;
    private static final Double ONTO_TEST_ORG_F1 = 88.87;
    private static final Double ONTO_TEST_PER_F1 = 92.88;

    public void setUp() throws Exception {
        if (conllNERAnnotator == null || ontoNERAnnotator == null) {
            Properties nerProps = new Properties();
            nerProps.setProperty("ner.useSUTime", "false");
            nerProps.setProperty("ner.applyNumericClassifiers", "false");
            nerProps.setProperty("ner.model", "edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz");
            nerProps.setProperty("applyNumericClassifiers", "false");
            conllNERAnnotator = new NERCombinerAnnotator(nerProps);
            nerProps.setProperty("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz");
            ontoNERAnnotator = new NERCombinerAnnotator(nerProps);
            Properties tokenizerProps = new Properties();
            tokenizerProps.setProperty("tokenize.whitespace", "true");
            conllNERAnnotationPipeline = new AnnotationPipeline();
            conllNERAnnotationPipeline.addAnnotator(new TokenizerAnnotator(false, tokenizerProps));
            conllNERAnnotationPipeline.addAnnotator(new WordsToSentencesAnnotator(false));
            conllNERAnnotationPipeline.addAnnotator(conllNERAnnotator);
            ontoNERAnnotationPipeline = new AnnotationPipeline();
            ontoNERAnnotationPipeline.addAnnotator(new TokenizerAnnotator(false, tokenizerProps));
            ontoNERAnnotationPipeline.addAnnotator(new WordsToSentencesAnnotator(false));
            ontoNERAnnotationPipeline.addAnnotator(ontoNERAnnotator);
        }
    }

    public HashMap<String, Double> parseResults(String results) {
        HashMap<String, Double> f1Results = new HashMap<String, Double>();
        double result = 0.0;
        String[] lines = results.split("\n");
        for (int idx = 0; idx < lines.length; ++idx) {
            String line = lines[idx];
            Matcher m = FB1_Pattern.matcher(line);
            while (m.find()) {
                String f1 = m.group(1);
                result = Double.parseDouble(f1);
            }
            String key = line.contains("LOC") ? "LOC" : (line.contains("MISC") ? "MISC" : (line.contains("ORG") ? "ORG" : (line.contains("PER") ? "PER" : "TOTAL")));
            f1Results.put(key, result);
        }
        return f1Results;
    }

    public String convert(String origTag) throws Exception {
        String converted;
        switch (origTag) {
            case "ORGANIZATION": 
            case "ORG": {
                converted = "ORG";
                break;
            }
            case "LOCATION": 
            case "LOC": {
                converted = "LOC";
                break;
            }
            case "PERSON": {
                converted = "PER";
                break;
            }
            case "MISC": {
                converted = "MISC";
                break;
            }
            case "O": {
                converted = "O";
                break;
            }
            default: {
                throw new Exception("System outputting invalid label " + origTag);
            }
        }
        return converted;
    }

    public String runEvalScript(String resultsFile) throws IOException {
        String inputLine;
        String result = null;
        String cmd = "../../scripts/ner/eval_conll_cmd.sh " + resultsFile;
        Process p = Runtime.getRuntime().exec(cmd);
        BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream()));
        while ((inputLine = in.readLine()) != null) {
            System.out.println(inputLine);
            result = result + inputLine + "\n";
        }
        in.close();
        return result;
    }

    public HashMap<String, Double> evalConll(String dataset) throws IOException, Exception {
        String inputFile;
        String resultsFile;
        SeqClassifierFlags flags = new SeqClassifierFlags();
        flags.entitySubclassification = "noprefix";
        CoNLLDocumentReaderAndWriter rw = new CoNLLDocumentReaderAndWriter();
        rw.init(flags);
        switch (dataset) {
            case "train": {
                resultsFile = CONLL_OUTPUT_DEV;
                inputFile = CONLL_DEV;
                break;
            }
            case "dev": {
                resultsFile = CONLL_OUTPUT_DEV;
                inputFile = CONLL_DEV;
                break;
            }
            case "test": {
                resultsFile = CONLL_OUTPUT_TEST;
                inputFile = CONLL_TEST;
                break;
            }
            default: {
                throw new Exception("Not a valid dataset name provided!");
            }
        }
        PrintWriter writer = new PrintWriter(resultsFile);
        Iterator<List<CoreLabel>> itr = rw.getIterator(IOUtils.readerFromString(inputFile));
        while (itr.hasNext()) {
            List<CoreLabel> goldLabels = itr.next();
            String docString = "";
            for (CoreLabel f1 : goldLabels) {
                docString = docString + " " + f1.word();
            }
            Annotation docAnnotation = new Annotation(docString);
            conllNERAnnotationPipeline.annotate(docAnnotation);
            ArrayList<CoreLabel> predictLabels = new ArrayList<CoreLabel>();
            for (CoreLabel l : (List)docAnnotation.get(CoreAnnotations.TokensAnnotation.class)) {
                predictLabels.add(l);
            }
            NERBenchmarkSlowITest.assertEquals((String)"# gold outputs not same as # predicted!\n", (int)goldLabels.size(), (int)predictLabels.size());
            int numLabels = goldLabels.size();
            for (int i = 0; i < numLabels; ++i) {
                CoreLabel gold = goldLabels.get(i);
                String goldToken = (String)gold.get(CoreAnnotations.AnswerAnnotation.class);
                CoreLabel predict = (CoreLabel)predictLabels.get(i);
                String predictStr = (String)predict.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                String predictPrefix = this.convert(predictStr);
                NERBenchmarkSlowITest.assertEquals((String)"Gold and Predict words don't match!\n", (String)((String)gold.get(CoreAnnotations.TextAnnotation.class)), (String)((String)predict.get(CoreAnnotations.TextAnnotation.class)));
                writer.println((String)gold.get(CoreAnnotations.TextAnnotation.class) + "\t_\t" + goldToken + "\t" + predictPrefix);
            }
        }
        writer.close();
        String result = this.runEvalScript(resultsFile);
        HashMap<String, Double> parsedF1 = this.parseResults(result);
        return parsedF1;
    }

    public List<List<CoreLabel>> readTokensFromOntoFile(String file) {
        ArrayList<List<CoreLabel>> sentences = new ArrayList<List<CoreLabel>>();
        ArrayList<CoreLabel> currSentenceTokens = new ArrayList<CoreLabel>();
        List<String> linesFromFile = IOUtils.linesFromFile(file);
        int wordsSeen = 0;
        for (String line : linesFromFile) {
            String[] entries = line.split("\t");
            if (entries.length == 2) {
                String word = entries[0];
                String nerTag = entries[1];
                ++wordsSeen;
                CoreLabel token = new CoreLabel();
                token.setWord(word);
                token.setNER(nerTag);
                currSentenceTokens.add(token);
                continue;
            }
            if (currSentenceTokens.size() == 0) continue;
            sentences.add(currSentenceTokens);
            currSentenceTokens = new ArrayList();
        }
        return sentences;
    }

    public HashMap<String, Double> evalOnto(String dataset) throws IOException, Exception {
        String inputFile;
        String resultsFile;
        switch (dataset) {
            case "dev": {
                resultsFile = ONTO_OUTPUT_DEV;
                inputFile = ONTO_DEV;
                break;
            }
            case "test": {
                resultsFile = ONTO_OUTPUT_TEST;
                inputFile = ONTO_TEST;
                break;
            }
            default: {
                throw new Exception("Not a valid dataset name provided!");
            }
        }
        List<List<CoreLabel>> ontoSentences = this.readTokensFromOntoFile(inputFile);
        PrintWriter writer = new PrintWriter(resultsFile);
        for (List<CoreLabel> sentenceLabels : ontoSentences) {
            String sentence = "";
            for (CoreLabel label : sentenceLabels) {
                sentence = sentence + " " + label.word();
            }
            Annotation sentenceAnnotation = new Annotation(sentence);
            ontoNERAnnotationPipeline.annotate(sentenceAnnotation);
            ArrayList<CoreLabel> predictLabels = new ArrayList<CoreLabel>();
            for (CoreLabel l : (List)sentenceAnnotation.get(CoreAnnotations.TokensAnnotation.class)) {
                predictLabels.add(l);
            }
            int numLabels = sentenceLabels.size();
            for (int i = 0; i < numLabels; ++i) {
                CoreLabel gold = sentenceLabels.get(i);
                String goldToken = (String)gold.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                String goldPrefix = this.convert(goldToken);
                CoreLabel predict = (CoreLabel)predictLabels.get(i);
                String predictStr = (String)predict.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                String predictPrefix = this.convert(predictStr);
                NERBenchmarkSlowITest.assertEquals((String)"Gold and Predict words don't match!\n", (String)((String)gold.get(CoreAnnotations.TextAnnotation.class)), (String)((String)predict.get(CoreAnnotations.TextAnnotation.class)));
                writer.println((String)gold.get(CoreAnnotations.TextAnnotation.class) + "\t_\t" + goldPrefix + "\t" + predictPrefix);
            }
        }
        writer.close();
        String result = this.runEvalScript(resultsFile);
        HashMap<String, Double> parsedF1 = this.parseResults(result);
        return parsedF1;
    }

    public void testConLLDev() {
        try {
            log.log("Evaluating on CoNLL Dev");
        }
        catch (Exception e) {
            log.log(e);
        }
    }

    public void testConLLTest() {
        try {
            log.log("Evaluating on CoNLL Test");
        }
        catch (Exception e) {
            log.log(e);
        }
    }

    public void testOntoDev() {
        try {
            HashMap<String, Double> parsedF1 = this.evalOnto("dev");
            Double totalF1 = parsedF1.get("TOTAL");
            Double locF1 = parsedF1.get("LOC");
            Double orgF1 = parsedF1.get("ORG");
            Double perF1 = parsedF1.get("PER");
            NERBenchmarkSlowITest.assertEquals((String)String.format("Onto Total Test F1 should be %.2f but was %.2f", ONTO_DEV_TOTAL_F1, totalF1), (double)ONTO_DEV_TOTAL_F1, (double)totalF1, (double)0.01);
            NERBenchmarkSlowITest.assertEquals((String)String.format("Onto LOC Test F1 should be %.2f but was %.2f", ONTO_DEV_LOC_F1, locF1), (double)ONTO_DEV_LOC_F1, (double)locF1, (double)0.01);
            NERBenchmarkSlowITest.assertEquals((String)String.format("Onto ORG Test F1 should be %.2f but was %.2f", ONTO_DEV_ORG_F1, orgF1), (double)ONTO_DEV_ORG_F1, (double)orgF1, (double)0.01);
            NERBenchmarkSlowITest.assertEquals((String)String.format("Onto PER Test F1 should be %.2f but was %.2f", ONTO_DEV_PER_F1, perF1), (double)ONTO_DEV_PER_F1, (double)perF1, (double)0.01);
        }
        catch (Exception e) {
            log.log(e);
        }
    }

    public void testOntoTest() {
        try {
            HashMap<String, Double> parsedF1 = this.evalOnto("test");
            Double totalF1 = parsedF1.get("TOTAL");
            Double locF1 = parsedF1.get("LOC");
            Double orgF1 = parsedF1.get("ORG");
            Double perF1 = parsedF1.get("PER");
            NERBenchmarkSlowITest.assertEquals((String)String.format("Onto Total Test F1 should be %.2f but was %.2f", ONTO_TEST_TOTAL_F1, totalF1), (double)ONTO_TEST_TOTAL_F1, (double)totalF1, (double)0.01);
            NERBenchmarkSlowITest.assertEquals((String)String.format("Onto LOC Test F1 should be %.2f but was %.2f", ONTO_TEST_LOC_F1, locF1), (double)ONTO_TEST_LOC_F1, (double)locF1, (double)0.01);
            NERBenchmarkSlowITest.assertEquals((String)String.format("Onto ORG Test F1 should be %.2f but was %.2f", ONTO_TEST_ORG_F1, orgF1), (double)ONTO_TEST_ORG_F1, (double)orgF1, (double)0.01);
            NERBenchmarkSlowITest.assertEquals((String)String.format("Onto PER Test F1 should be %.2f but was %.2f", ONTO_TEST_PER_F1, perF1), (double)ONTO_TEST_PER_F1, (double)perF1, (double)0.01);
        }
        catch (Exception e) {
            log.log(e);
        }
    }
}

