/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ie.KBPRelationExtractor;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.SentenceAnnotator;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.time.TimeAnnotations;
import edu.stanford.nlp.time.Timex;
import edu.stanford.nlp.util.ArgumentParser;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.SystemUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;

public class WikidictAnnotator
extends SentenceAnnotator {
    private static final Redwood.RedwoodChannels log = Redwood.channels(WikidictAnnotator.class);
    private static final Pattern NUMBER_PATTERN = Pattern.compile("[0-9.]+");
    @ArgumentParser.Option(name="threads", gloss="The number of threads to run this annotator on")
    private int threads = 1;
    @ArgumentParser.Option(name="wikidict", gloss="The location of the <text, link, score> TSV file")
    private String wikidictPath = "edu/stanford/nlp/models/kbp/english/wikidict.tab.gz";
    @ArgumentParser.Option(name="threshold", gloss="The score threshold under which to discard links")
    private double threshold = 0.0;
    @ArgumentParser.Option(name="caseless", gloss="Ignore case when looking up entries in wikidict")
    private boolean wikidictCaseless = false;
    private final Map<String, String> dictionary = new HashMap<String, String>(21000000);

    public WikidictAnnotator(String name, Properties properties) {
        ArgumentParser.fillOptions((Object)this, name, properties);
        long startTime = System.currentTimeMillis();
        log.info("Reading Wikidict from " + this.wikidictPath);
        try {
            int i = 0;
            String[] fields = new String[3];
            for (String line : IOUtils.readLines(this.wikidictPath, "UTF-8")) {
                double score;
                if (line.charAt(0) == '\t') continue;
                StringUtils.splitOnChar(fields, line, '\t');
                if (i % 1000000 == 0) {
                    log.info("Loaded " + i + " entries from Wikidict [" + SystemUtils.getMemoryInUse() + "MB memory used; " + Redwood.formatTimeDifference(System.currentTimeMillis() - startTime) + " elapsed]");
                }
                if (this.threshold > 0.0 && (score = Double.parseDouble(fields[2])) < this.threshold) continue;
                String surfaceForm = fields[0];
                if (this.wikidictCaseless) {
                    surfaceForm = surfaceForm.toLowerCase();
                }
                String link = fields[1].intern();
                this.dictionary.put(surfaceForm, link);
                ++i;
            }
            log.info("Done reading Wikidict (" + this.dictionary.size() + " links read; " + Redwood.formatTimeDifference(System.currentTimeMillis() - startTime) + " elapsed)");
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public WikidictAnnotator(Properties properties) {
        this("entitylink", properties);
    }

    public static String normalizeTimex(String timex) {
        if (timex.contains("T") && !"PRESENT".equals(timex)) {
            return timex.substring(0, timex.indexOf("T"));
        }
        return timex;
    }

    public Optional<String> link(CoreMap mention) {
        String surfaceForm = mention.get(CoreAnnotations.OriginalTextAnnotation.class) == null ? (String)mention.get(CoreAnnotations.TextAnnotation.class) : (String)mention.get(CoreAnnotations.OriginalTextAnnotation.class);
        String mentionSurfaceFormKey = this.wikidictCaseless ? surfaceForm.toLowerCase() : surfaceForm;
        String ner = (String)mention.get(CoreAnnotations.NamedEntityTagAnnotation.class);
        if (ner != null && (KBPRelationExtractor.NERTag.DATE.name.equalsIgnoreCase(ner) || "TIME".equalsIgnoreCase(ner) || "SET".equalsIgnoreCase(ner)) && mention.get(TimeAnnotations.TimexAnnotation.class) != null && ((Timex)mention.get(TimeAnnotations.TimexAnnotation.class)).value() != null) {
            Timex timex = (Timex)mention.get(TimeAnnotations.TimexAnnotation.class);
            if (!(timex.value() == null || timex.value().equals("PRESENT") || timex.value().equals("PRESENT_REF") || timex.value().equals("PAST") || timex.value().equals("PAST_REF") || timex.value().equals("FUTURE") || timex.value().equals("FUTURE_REF"))) {
                return Optional.of(WikidictAnnotator.normalizeTimex(timex.value()));
            }
            return Optional.empty();
        }
        if (ner != null && "ORDINAL".equalsIgnoreCase(ner) && mention.get(CoreAnnotations.NumericValueAnnotation.class) != null) {
            Number numericValue = (Number)mention.get(CoreAnnotations.NumericValueAnnotation.class);
            return Optional.of(numericValue.toString());
        }
        if (NUMBER_PATTERN.matcher(surfaceForm).matches()) {
            return Optional.of(surfaceForm);
        }
        if (ner != null && !"O".equals(ner) && this.dictionary.containsKey(mentionSurfaceFormKey)) {
            return Optional.of(this.dictionary.get(mentionSurfaceFormKey));
        }
        return Optional.empty();
    }

    @Override
    protected int nThreads() {
        return this.threads;
    }

    @Override
    protected long maxTime() {
        return -1L;
    }

    @Override
    protected void doOneSentence(Annotation annotation, CoreMap sentence) {
        for (CoreLabel token : (List)sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            token.set(CoreAnnotations.WikipediaEntityAnnotation.class, "O");
        }
        for (CoreMap mention : (List)sentence.get(CoreAnnotations.MentionsAnnotation.class)) {
            Optional<String> canonicalName = this.link(mention);
            if (!canonicalName.isPresent()) continue;
            mention.set(CoreAnnotations.WikipediaEntityAnnotation.class, canonicalName.get());
            for (CoreLabel token : (List)mention.get(CoreAnnotations.TokensAnnotation.class)) {
                token.set(CoreAnnotations.WikipediaEntityAnnotation.class, canonicalName.get());
            }
        }
    }

    @Override
    protected void doOneFailedSentence(Annotation annotation, CoreMap sentence) {
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
        return Collections.singleton(CoreAnnotations.WikipediaEntityAnnotation.class);
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requires() {
        HashSet<Class> requirements = new HashSet<Class>(Arrays.asList(CoreAnnotations.TextAnnotation.class, CoreAnnotations.TokensAnnotation.class, CoreAnnotations.SentencesAnnotation.class, CoreAnnotations.OriginalTextAnnotation.class, CoreAnnotations.MentionsAnnotation.class));
        return Collections.unmodifiableSet(requirements);
    }

    public static void main(String[] args) throws IOException {
        Properties props = StringUtils.argsToProperties(args);
        props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,entitymentions,entitylink");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        IOUtils.console("sentence> ", line -> {
            Annotation ann = new Annotation((String)line);
            pipeline.annotate(ann);
            List tokens = (List)((CoreMap)((List)ann.get(CoreAnnotations.SentencesAnnotation.class)).get(0)).get(CoreAnnotations.TokensAnnotation.class);
            System.err.println(StringUtils.join(tokens.stream().map(x -> (String)x.get(CoreAnnotations.WikipediaEntityAnnotation.class)), "  "));
        });
    }
}

