#!/usr/bin/env perl

# SynCha Lite
my $VERSION = 0.3.1;

use strict;
use warnings;
use Getopt::Std;

my $usage = <<USG;
syncha [args] [file..]
[args]
-I 0: raw sentence   (default)
   1: cabocha format

-O 0: cabocha format (default)

-n #num: extract candidates in preceding #num sentences for zero anaphora resolution
         (defoult: -n 2)
-c #num: cache size on NP corefrence resolution
         (default: -n 7)
USG

my %options;
getopts("I:O:n:c:h", \%options);
die $usage if $options{h};
$options{I} = 0 unless defined $options{I};
$options{O} = 0 unless defined $options{O};
$options{n} = 2 unless defined $options{n};
$options{c} = 7 unless defined $options{c};

my $Bin = "/opt/local/share/syncha";
unshift @INC, $Bin.'/src';
require 'common.pl';
require 'coreference.pl';
require 'predicate.pl';
require 'event-noun.pl';
require 'optimize.pl';

my $infile = ($ARGV[0])? $ARGV[0] : '-';

if (!$ARGV[0]) {
    binmode STDIN, ':utf8';
}

use IPC::Open2;
open2 my $OUT, my $IN, 'cabocha -f 1 -n 1' or die $!;
binmode $OUT, ':utf8';
binmode $IN, ':utf8';
my $coref_model = Coreference->new($Bin, \%options);
my $pred_model  = Predicate->new($Bin, \%options);
my $noun_model  = EventNoun->new($Bin, \%options);
my $opt_model   = Optimize->new();

# binmode STDOUT, ':utf8';

use utf8;
use open ':utf8';

$/ = "EOT\n";
open 'FL', $infile or die $!;
# binmode 'FL', ':utf8';
while (<FL>) {
    chomp;
    my @sent;
    if ($options{I} == 0) {
	@sent = &open_raw_input($_, $IN, $OUT);
    } elsif ($options{I} == 1) {
	@sent = &open_cabocha_input($_);
    } else {
	die $!;
    }

    # coreference resolution
    $coref_model->analyze(\@sent);

    # predicate argument structure analysis
    my $out = '';
    if (@sent) {
	$out = $opt_model->solve(\@sent, 
				 $pred_model->analyze(\@sent), 
	                         $noun_model->analyze(\@sent));
    } else {
	$out = "EOT\n";
    }
    binmode STDOUT, ':utf8';
    print $out;
    $/ = "EOT\n";
}
close FL;

