=head1 NAME

iPE::Model::Emission::ELUT - extended Markov chain model (Look-Up Table)

=head1 DESCRIPTION

=head1 FUNCTIONS

=cut

package iPE::Model::Emission::ELUT;
use iPE;
use iPE::Globals;
use base("iPE::Model::Emission::LUT");
use strict;

sub init {
    my ($this) = @_;

    $this->SUPER::init();

    die("ELUT requires an order and footprint in the data attribute.\n".
        "Error found in ".$this->name."\n")
        if(!defined($this->footprint));
}

sub footprint       { shift->settings->{footprint}       }

sub getZoeHeaderEnd {
    my $this = shift;
    return "" if(!defined($this->order));
    return $this->order;
}

sub countRegion     {
    my ($this, $region) = @_;
    if($region->seq->loaded) { _count(@_, 0)         }
    else                     { _countUnloaded(@_, 0) }
}

sub countNullRegion {
    my ($this, $region) = @_;
    if($region->seq->loaded) { _count(@_, 1)         }
    else                     { _countUnloaded(@_, 1) }
}


sub _count {
    my ($this, $region, $null) = @_;


    my $buck;
    if($null)   { $buck = $this->nullCounts }
    else        { $buck = $this->posCounts  }

    #optimization
    my $order = $this->order;
    my $footprint = $this->footprint;
    my $weight = $region->weight;
    my $end = $region->end;
    my $strRef = $region->strRef;

    my $str;

# I still dont know if I like how this is working... if you have a run like this:
#  1....................1....1
# it gets binned like this:
# ..111
# and probably should get treated like this:
# 1..11

# you could come back and make up these padded sequences in a more
# complex way later... for now see how this works

# what you could do is break up the footprint up into zones and check
# each zone seperately like 5 5 base zones for a 25 bp
# footprint... you could even send that in with the data tag

# this kind of change in the scoring system will have to be sync'ed
# with changes in the code for scoring in zoe

# since I wrote this I have changed the dont care symbol to 0 from
# . because the . was confusing the LUT scanner on the Arrayseq sequence.

    my $dont_care_symbol = "0";   # NOTE... THIS FORCES THE MISSING DATA SYMBOL TO BE 0!  COULD PASS THAT ALONG IN THE DATA TAG

    for (my $pos = $region->start+$footprint; $pos <= $end; $pos++) {

	my $last_position = substr($$strRef, $pos, 1);

	if($last_position ne $dont_care_symbol) { 
	    
	    $str = substr($$strRef, $pos-$footprint, $footprint+1);
	    
#	    print "string = ", $str, "\n";

	    my $reduced_str = $str;
	    $reduced_str =~ s/${dont_care_symbol}//g;
#	    $reduced_str =~ s/0//g;
	   
#	    print "reduced string = ", $reduced_str, "\n";

	    my $padded_str = "";
	    if(length($reduced_str) < $order+1) {
            # in this case need to pad the string on the left with don't care characters to indiate missing data...

		for(my $i=0;$i<$order-length($reduced_str)+1;$i++) {
		    $padded_str .= $dont_care_symbol;
		}
		$padded_str .= $reduced_str;

	    }
	    elsif(length($reduced_str) > $order+1) {
            # ...in this case need to truncate the string to get only $order positions of context...
		$padded_str = substr($reduced_str, length($reduced_str) - $order - 1, $order+1);
	    }
	    elsif(length($reduced_str) == $order+1) {
            # ... and that porridge was just right (see goldilocks)
		$padded_str = $reduced_str;
	    }
	    else {
		die("this can't be happening!  look at the code! order = $order length of reduced string = ", length($reduced_str), "reduced string = ", $reduced_str);
	    }

	    if(length($padded_str) != $order + 1) {
		die("something is wrong with this padded string... $padded_str");
	    }

#	    print $padded_str, "\n";

	    $buck->{$padded_str} += $weight;
	}
    }
}

sub _countUnloaded {
  die("ELUT model cannot count sequences that are not loaded.\n".
    "To load sequences, set the loadSequences option in your instance file\n".
    "to true.\n");
}

=head1 SEE ALSO

L<iPE::Model::Emission>

=head1 AUTHOR

Aaron Tenney (tenney@cse.wustl.edu)

=cut

1;
