/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PropertiesUtils;
import java.util.List;
import java.util.Properties;
import org.junit.Assert;
import org.junit.Test;

public class WordsToSentencesAnnotatorITest {
    private static final String headline = "\u201c\u4e60\u4e3b\u5e2d\u6b27\u6d32\u884c\u201d\u6f2b\u8bc4\u2463\uff1a\u4e3a\u4e16\u754c\u7ecf\u6d4e\u8054\u52a8\u589e\u957f\u8d21\u732e\u201c\u4e2d\u56fd\u65b9\u7565\u201d\n\n7\u65e5\uff0c\u56fd\u5bb6\u4e3b\u5e2d\u4e60\u8fd1\u5e73\u5728\u201c\u4e16\u754c\u6865\u57ce\u201d\u6c49\u5821\u51fa\u5e2d\u4e8c\u5341\u56fd\u96c6\u56e2\uff08G20\uff09\u9886\u5bfc\u4eba\u7b2c\u5341\u4e8c\u6b21\u5cf0\u4f1a\u5e76\u53d1\u8868\u9898\u4e3a\u300a\u575a\n\u6301\u5f00\u653e\u5305\u5bb9 \u63a8\u52a8\u8054\u52a8\u589e\u957f\u300b\u7684\u91cd\u8981\u8bb2\u8bdd\uff0c\u63d0\u51fa\u56db\u70b9\u201c\u4e2d\u56fd\u4e3b\u5f20\u201d\uff0c\u4e3aG20\u672a\u6765\u53d1\u5c55\u89c4\u5212\u84dd\u56fe\uff0c\u4e3a\u4e16\u754c\u7ecf\u6d4e\u8054\u52a8\u589e\u957f\u6307\u660e\u65b9\u5411\uff0c\u53d7\u5230\u4e0e\u4f1a\u5404\u65b9\u548c\u56fd\u9645\u793e\u4f1a\u9ad8\u5ea6\u8bc4\u4ef7\u3002[\u8be6\u7ec6]";

    @Test
    public void testTwoNewlineIsSentenceBreakTokenizeNLs() {
        String[] sents = new String[]{"\u201c \u4e60 \u4e3b\u5e2d \u6b27\u6d32\u884c \u201d \u6f2b\u8bc4\u2463 \uff1a \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u8d21\u732e \u201c \u4e2d\u56fd \u65b9\u7565 \u201d", "7 \u65e5 \uff0c \u56fd\u5bb6 \u4e3b\u5e2d \u4e60\u8fd1\u5e73 \u5728 \u201c \u4e16\u754c \u6865 \u57ce \u201d \u6c49\u5821 \u51fa\u5e2d \u4e8c\u5341 \u56fd \u96c6\u56e2 \uff08 G20 \uff09 \u9886\u5bfc\u4eba \u7b2c\u5341\u4e8c \u6b21 \u5cf0\u4f1a \u5e76 \u53d1\u8868 \u9898 \u4e3a \u300a \u575a\u6301 \u5f00\u653e \u5305\u5bb9 \u63a8\u52a8 \u8054\u52a8 \u589e\u957f \u300b \u7684 \u91cd\u8981 \u8bb2\u8bdd \uff0c \u63d0\u51fa \u56db\u70b9 \u201c \u4e2d\u56fd \u4e3b\u5f20 \u201d \uff0c \u4e3a G20 \u672a\u6765 \u53d1\u5c55 \u89c4\u5212 \u84dd\u56fe \uff0c \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u6307\u660e \u65b9\u5411 \uff0c \u53d7\u5230 \u4e0e\u4f1a \u5404 \u65b9 \u548c \u56fd\u9645 \u793e\u4f1a \u9ad8\u5ea6 \u8bc4\u4ef7 \u3002", "[ \u8be6\u7ec6 ]"};
        Properties props = PropertiesUtils.asProperties("annotators", "tokenize, ssplit", "tokenize.language", "zh", "tokenize.options", "tokenizeNLs,invertible,ptb3Escaping=true", "ssplit.boundaryTokenRegex", "[.\u3002]|[!?\uff01\uff1f]+", "ssplit.newlineIsSentenceBreak", "two", "segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz", "segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese", "segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz", "segment.sighanPostProcessing", "true");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        Annotation document1 = new Annotation(headline);
        pipeline.annotate(document1);
        List sentences = (List)document1.get(CoreAnnotations.SentencesAnnotation.class);
        Assert.assertEquals((long)sents.length, (long)sentences.size());
        for (int i = 0; i < Math.min(sents.length, sentences.size()); ++i) {
            CoreMap sentence = (CoreMap)sentences.get(i);
            String sentenceText = SentenceUtils.listToString((List)sentence.get(CoreAnnotations.TokensAnnotation.class));
            Assert.assertEquals((String)("Bad sentence #" + i), (Object)sents[i], (Object)sentenceText);
        }
    }

    @Test
    public void testTwoNewlineIsSentenceBreak() {
        String[] sents = new String[]{"\u201c \u4e60 \u4e3b\u5e2d \u6b27\u6d32\u884c \u201d \u6f2b\u8bc4\u2463 \uff1a \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u8d21\u732e \u201c \u4e2d\u56fd \u65b9\u7565 \u201d", "7 \u65e5 \uff0c \u56fd\u5bb6 \u4e3b\u5e2d \u4e60\u8fd1\u5e73 \u5728 \u201c \u4e16\u754c \u6865 \u57ce \u201d \u6c49\u5821 \u51fa\u5e2d \u4e8c\u5341 \u56fd \u96c6\u56e2 \uff08 G20 \uff09 \u9886\u5bfc\u4eba \u7b2c\u5341\u4e8c \u6b21 \u5cf0\u4f1a \u5e76 \u53d1\u8868 \u9898 \u4e3a \u300a \u575a\u6301 \u5f00\u653e \u5305\u5bb9 \u63a8\u52a8 \u8054\u52a8 \u589e\u957f \u300b \u7684 \u91cd\u8981 \u8bb2\u8bdd \uff0c \u63d0\u51fa \u56db\u70b9 \u201c \u4e2d\u56fd \u4e3b\u5f20 \u201d \uff0c \u4e3a G20 \u672a\u6765 \u53d1\u5c55 \u89c4\u5212 \u84dd\u56fe \uff0c \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u6307\u660e \u65b9\u5411 \uff0c \u53d7\u5230 \u4e0e\u4f1a \u5404 \u65b9 \u548c \u56fd\u9645 \u793e\u4f1a \u9ad8\u5ea6 \u8bc4\u4ef7 \u3002", "[ \u8be6\u7ec6 ]"};
        Properties props = PropertiesUtils.asProperties("annotators", "tokenize, ssplit", "tokenize.language", "zh", "tokenize.options", "invertible,ptb3Escaping=true", "ssplit.boundaryTokenRegex", "[.\u3002]|[!?\uff01\uff1f]+", "ssplit.newlineIsSentenceBreak", "two", "segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz", "segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese", "segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz", "segment.sighanPostProcessing", "true");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        Annotation document1 = new Annotation(headline);
        pipeline.annotate(document1);
        List sentences = (List)document1.get(CoreAnnotations.SentencesAnnotation.class);
        Assert.assertEquals((long)sents.length, (long)sentences.size());
        for (int i = 0; i < Math.min(sents.length, sentences.size()); ++i) {
            CoreMap sentence = (CoreMap)sentences.get(i);
            String sentenceText = SentenceUtils.listToString((List)sentence.get(CoreAnnotations.TokensAnnotation.class));
            Assert.assertEquals((String)("Bad sentence #" + i), (Object)sents[i], (Object)sentenceText);
        }
    }

    @Test
    public void testNewlineIsSentenceBreakTokenizeNLs() {
        String[] sents = new String[]{"\u201c \u4e60 \u4e3b\u5e2d \u6b27\u6d32\u884c \u201d \u6f2b\u8bc4\u2463 \uff1a \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u8d21\u732e \u201c \u4e2d\u56fd \u65b9\u7565 \u201d", "7 \u65e5 \uff0c \u56fd\u5bb6 \u4e3b\u5e2d \u4e60\u8fd1\u5e73 \u5728 \u201c \u4e16\u754c \u6865 \u57ce \u201d \u6c49\u5821 \u51fa\u5e2d \u4e8c\u5341 \u56fd \u96c6\u56e2 \uff08 G20 \uff09 \u9886\u5bfc\u4eba \u7b2c\u5341\u4e8c \u6b21 \u5cf0\u4f1a \u5e76 \u53d1\u8868 \u9898 \u4e3a \u300a \u575a", "\u6301 \u5f00\u653e \u5305\u5bb9 \u63a8\u52a8 \u8054\u52a8 \u589e\u957f \u300b \u7684 \u91cd\u8981 \u8bb2\u8bdd \uff0c \u63d0\u51fa \u56db\u70b9 \u201c \u4e2d\u56fd \u4e3b\u5f20 \u201d \uff0c \u4e3a G20 \u672a\u6765 \u53d1\u5c55 \u89c4\u5212 \u84dd\u56fe \uff0c \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u6307\u660e \u65b9\u5411 \uff0c \u53d7\u5230 \u4e0e\u4f1a \u5404 \u65b9 \u548c \u56fd\u9645 \u793e\u4f1a \u9ad8\u5ea6 \u8bc4\u4ef7 \u3002", "[ \u8be6\u7ec6 ]"};
        Properties props = PropertiesUtils.asProperties("annotators", "tokenize, ssplit", "tokenize.language", "zh", "tokenize.options", "tokenizeNLs,invertible,ptb3Escaping=true", "ssplit.boundaryTokenRegex", "[.\u3002]|[!?\uff01\uff1f]+", "ssplit.newlineIsSentenceBreak", "always", "segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz", "segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese", "segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz", "segment.sighanPostProcessing", "true");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        Annotation document1 = new Annotation(headline);
        pipeline.annotate(document1);
        List sentences = (List)document1.get(CoreAnnotations.SentencesAnnotation.class);
        Assert.assertEquals((long)sents.length, (long)sentences.size());
        for (int i = 0; i < Math.min(sents.length, sentences.size()); ++i) {
            CoreMap sentence = (CoreMap)sentences.get(i);
            String sentenceText = SentenceUtils.listToString((List)sentence.get(CoreAnnotations.TokensAnnotation.class));
            Assert.assertEquals((String)("Bad sentence #" + i), (Object)sents[i], (Object)sentenceText);
        }
    }

    @Test
    public void testNewlineIsSentenceBreakNever() {
        String[] sents = new String[]{"\u201c \u4e60 \u4e3b\u5e2d \u6b27\u6d32\u884c \u201d \u6f2b\u8bc4\u2463 \uff1a \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u8d21\u732e \u201c \u4e2d\u56fd \u65b9\u7565 \u201d 7 \u65e5 \uff0c \u56fd\u5bb6 \u4e3b\u5e2d \u4e60\u8fd1\u5e73 \u5728 \u201c \u4e16\u754c \u6865 \u57ce \u201d \u6c49\u5821 \u51fa\u5e2d \u4e8c\u5341 \u56fd \u96c6\u56e2 \uff08 G20 \uff09 \u9886\u5bfc\u4eba \u7b2c\u5341\u4e8c \u6b21 \u5cf0\u4f1a \u5e76 \u53d1\u8868 \u9898 \u4e3a \u300a \u575a\u6301 \u5f00\u653e \u5305\u5bb9 \u63a8\u52a8 \u8054\u52a8 \u589e\u957f \u300b \u7684 \u91cd\u8981 \u8bb2\u8bdd \uff0c \u63d0\u51fa \u56db\u70b9 \u201c \u4e2d\u56fd \u4e3b\u5f20 \u201d \uff0c \u4e3a G20 \u672a\u6765 \u53d1\u5c55 \u89c4\u5212 \u84dd\u56fe \uff0c \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u6307\u660e \u65b9\u5411 \uff0c \u53d7\u5230 \u4e0e\u4f1a \u5404 \u65b9 \u548c \u56fd\u9645 \u793e\u4f1a \u9ad8\u5ea6 \u8bc4\u4ef7 \u3002", "[ \u8be6\u7ec6 ]"};
        Properties props = PropertiesUtils.asProperties("annotators", "tokenize, ssplit", "tokenize.language", "zh", "ssplit.boundaryTokenRegex", "[.\u3002]|[!?\uff01\uff1f]+", "ssplit.newlineIsSentenceBreak", "never", "segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz", "segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese", "segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz", "segment.sighanPostProcessing", "true");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        Annotation document1 = new Annotation(headline);
        pipeline.annotate(document1);
        List sentences = (List)document1.get(CoreAnnotations.SentencesAnnotation.class);
        Assert.assertEquals((long)sents.length, (long)sentences.size());
        for (int i = 0; i < Math.min(sents.length, sentences.size()); ++i) {
            CoreMap sentence = (CoreMap)sentences.get(i);
            String sentenceText = SentenceUtils.listToString((List)sentence.get(CoreAnnotations.TokensAnnotation.class));
            Assert.assertEquals((String)("Bad sentence #" + i), (Object)sents[i], (Object)sentenceText);
        }
    }

    @Test
    public void testEolOnly() {
        String[] sents = new String[]{"\u201c \u4e60 \u4e3b\u5e2d \u6b27\u6d32\u884c \u201d \u6f2b\u8bc4\u2463 \uff1a \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u8d21\u732e \u201c \u4e2d\u56fd \u65b9\u7565 \u201d", "7 \u65e5 \uff0c \u56fd\u5bb6 \u4e3b\u5e2d \u4e60\u8fd1\u5e73 \u5728 \u201c \u4e16\u754c \u6865 \u57ce \u201d \u6c49\u5821 \u51fa\u5e2d \u4e8c\u5341 \u56fd \u96c6\u56e2 \uff08 G20 \uff09 \u9886\u5bfc\u4eba \u7b2c\u5341\u4e8c \u6b21 \u5cf0\u4f1a \u5e76 \u53d1\u8868 \u9898 \u4e3a \u300a \u575a", "\u6301 \u5f00\u653e \u5305\u5bb9 \u63a8\u52a8 \u8054\u52a8 \u589e\u957f \u300b \u7684 \u91cd\u8981 \u8bb2\u8bdd \uff0c \u63d0\u51fa \u56db\u70b9 \u201c \u4e2d\u56fd \u4e3b\u5f20 \u201d \uff0c \u4e3a G20 \u672a\u6765 \u53d1\u5c55 \u89c4\u5212 \u84dd\u56fe \uff0c \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u6307\u660e \u65b9\u5411 \uff0c \u53d7\u5230 \u4e0e\u4f1a \u5404 \u65b9 \u548c \u56fd\u9645 \u793e\u4f1a \u9ad8\u5ea6 \u8bc4\u4ef7 \u3002 [ \u8be6\u7ec6 ]"};
        Properties props = PropertiesUtils.asProperties("annotators", "tokenize, ssplit", "tokenize.language", "zh", "tokenize.options", "invertible", "ssplit.boundaryTokenRegex", "[.\u3002]|[!?\uff01\uff1f]+", "ssplit.eolonly", "true", "segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz", "segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese", "segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz", "segment.sighanPostProcessing", "true");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        Annotation document1 = new Annotation(headline);
        pipeline.annotate(document1);
        List sentences = (List)document1.get(CoreAnnotations.SentencesAnnotation.class);
        Assert.assertEquals((long)sents.length, (long)sentences.size());
        for (int i = 0; i < Math.min(sents.length, sentences.size()); ++i) {
            CoreMap sentence = (CoreMap)sentences.get(i);
            String sentenceText = SentenceUtils.listToString((List)sentence.get(CoreAnnotations.TokensAnnotation.class));
            Assert.assertEquals((String)("Bad sentence #" + i), (Object)sents[i], (Object)sentenceText);
        }
    }

    @Test
    public void testIsOneSentence() {
        String[] sents = new String[]{"\u201c \u4e60 \u4e3b\u5e2d \u6b27\u6d32\u884c \u201d \u6f2b\u8bc4\u2463 \uff1a \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u8d21\u732e \u201c \u4e2d\u56fd \u65b9\u7565 \u201d 7 \u65e5 \uff0c \u56fd\u5bb6 \u4e3b\u5e2d \u4e60\u8fd1\u5e73 \u5728 \u201c \u4e16\u754c \u6865 \u57ce \u201d \u6c49\u5821 \u51fa\u5e2d \u4e8c\u5341 \u56fd \u96c6\u56e2 \uff08 G20 \uff09 \u9886\u5bfc\u4eba \u7b2c\u5341\u4e8c \u6b21 \u5cf0\u4f1a \u5e76 \u53d1\u8868 \u9898 \u4e3a \u300a \u575a\u6301 \u5f00\u653e \u5305\u5bb9 \u63a8\u52a8 \u8054\u52a8 \u589e\u957f \u300b \u7684 \u91cd\u8981 \u8bb2\u8bdd \uff0c \u63d0\u51fa \u56db\u70b9 \u201c \u4e2d\u56fd \u4e3b\u5f20 \u201d \uff0c \u4e3a G20 \u672a\u6765 \u53d1\u5c55 \u89c4\u5212 \u84dd\u56fe \uff0c \u4e3a \u4e16\u754c \u7ecf\u6d4e \u8054\u52a8 \u589e\u957f \u6307\u660e \u65b9\u5411 \uff0c \u53d7\u5230 \u4e0e\u4f1a \u5404 \u65b9 \u548c \u56fd\u9645 \u793e\u4f1a \u9ad8\u5ea6 \u8bc4\u4ef7 \u3002 [ \u8be6\u7ec6 ]"};
        Properties props = PropertiesUtils.asProperties("annotators", "tokenize, ssplit", "tokenize.language", "zh", "ssplit.boundaryTokenRegex", "[.\u3002]|[!?\uff01\uff1f]+", "ssplit.isOneSentence", "true", "segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz", "segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese", "segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz", "segment.sighanPostProcessing", "true");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        Annotation document1 = new Annotation(headline);
        pipeline.annotate(document1);
        List sentences = (List)document1.get(CoreAnnotations.SentencesAnnotation.class);
        Assert.assertEquals((long)sents.length, (long)sentences.size());
        for (int i = 0; i < Math.min(sents.length, sentences.size()); ++i) {
            CoreMap sentence = (CoreMap)sentences.get(i);
            String sentenceText = SentenceUtils.listToString((List)sentence.get(CoreAnnotations.TokensAnnotation.class));
            Assert.assertEquals((String)("Bad sentence #" + i), (Object)sents[i], (Object)sentenceText);
        }
    }
}

