#!/usr/bin/env tclsh

# segexpr : Evaluate logical expressions of segment lists
# Written by Peter Shawhan - library code starting in 2003, wrapper in 2005
# Copyright 2003, 2004, 2005, 2006, 2012 Peter Shawhan

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

#----------------------------------------------------------------------
#-- "segments.tcl"
#-- Tcl library for operating on segment lists
#-- Written by Peter Shawhan, July 2003

package provide segments 3.1

#==============================================================================
namespace eval ::segments {

    #-- Commands to be imported with "namespace import"
    namespace export SegRead SegWrite SegCoalesce SegBare SegSort SegInvert \
	    SegUnion SegIntersection SegSum SegLengthCut \
	    SegPlayground SegPlaygroundMask SegNonPlaygroundMask \
	    SegIsPlayground

}


#==============================================================================
proc ::segments::SegRead { filename } {

    set seglist [list]

    set re_special {^(between |elapsed time | science data )}
    set re_line {^(.*?)(\d{9,10}\.?\d*)[\s\-,;]+(\d{9,10}\.?\d*)(.*)$}

    #-- Check whether this is an http URL or a regular file

    if [regexp {^\s*http:} $filename] {
	#-- This is an http URL
	package require http

	#-- We may need to loop, if there is URL redirection
	set url $filename
	set origurl $url
	while { 1 } {

	    #-- Read contents into memory
	    if { [catch {http::geturl $url} httpToken] } {
		return -code error $httpToken
	    }

	    upvar #0 $httpToken httpstate

	    #-- Parse the "meta" array, if it exists
	    if { [info exists httpstate(meta)] } {
		array set httpmeta $httpstate(meta)
	    } else {
		set httpmeta(null) ""
	    }

	    #-- Check the transfer status
	    if { $httpstate(status) != "ok" } {
		if { [info exists httpstate(error)] } {
		    set errmsg $httpstate(error)
		} else {
		    set errmsg "http status is $httpstate(status)"
		}
		return -code error $errmsg
	    }

	    #-- Check the HTTP status code
	    set status $httpstate(http)
	    regexp {^(\S+) (\S+) (.+)$} $status match httpver statcode stattext

	    if { [string equal $statcode "200"] } {
		#-- We got the file we wanted!

		foreach line [split $httpstate(body) "\n"] {
		    #-- Remove comments
		    regsub {\#.*$} $line {} line
		    #-- Ignore certain lines from conlog listings
		    if [regexp -nocase $re_special $line] continue
		    if {! [regexp $re_line $line \
			       match prefix start end suffix]} continue
		    if [catch {
			set annots [eval list "$prefix $suffix"]
		    } ] {
			set annots [list]
		    }
		    lappend seglist [concat [list $start $end] $annots ]
		}
		#-- End loop over lines in file

		unset $httpToken
		break   ;##- Break out of the while loop
	    }

	    ;##- Check for a URL redirection
	    if { [string match "3*" $statcode] && \
		     [info exists httpmeta(Location)] } {
		set url $httpmeta(Location)
		unset $httpToken
		continue   ;##- Go back and read from the revised URL
	    }

	    #-- If we get here, then there was an error
	    unset $httpToken
	    return -code error "Unable to read $origurl: $statcode $stattext"
    
	}
	#-- End of while loop

    } else {
	#-- This is an ordinary file

	if [catch {
	    set fid [open $filename]
	} errmsg] {
	    return -code error $errmsg
	}
	while { [gets $fid line] >= 0 } {
	    #-- Remove comments
	    regsub {\#.*$} $line {} line
	    #-- Ignore certain lines from conlog listings
	    if [regexp -nocase $re_special $line] continue
	    if {! [regexp $re_line $line \
		       match prefix start end suffix]} continue
	    if [catch {
		set annots [eval list "$prefix $suffix"]
	    } ] {
		set annots [list]
	    }
	    lappend seglist [concat [list $start $end] $annots ]
	}
	#-- End loop over lines in file

	close $fid
    }

    return $seglist
}

#==============================================================================
proc ::segments::SegWrite { seglist filename args } {

    #-- Set defaults
    set include(index) 0
    set include(duration) 0

    #-- Check optional arguments
    set opt ""
    foreach arg $args {
	if { [string match "-*" $arg] } {
	    set opt [string range $arg 1 end]
	} else {
	    #-- Store the value
	    if { $opt == "include" } {
		foreach item [regexp -all -inline {\w+} $arg] {
		    set include($item) 1
		}
	    }
	}
    }

    set index 0
    if { [string equal $filename "-"] } {
	set fid stdout
    } else {
	set fid [open $filename w]
    }
    foreach seg $seglist {
	incr index
	if { $include(index) } {
	    puts -nonewline $fid [format "%5d " $index]
	}

        foreach {start stop} $seg break
	puts -nonewline $fid "$start $stop"

	if { $include(duration) } {
	    set dur [expr {$stop-$start}]
	    if { [string is integer $dur] } {
		puts -nonewline $fid [format " %6d" $dur ]
	    } else {
		puts -nonewline $fid [format " %13.6f" $dur ]
	    }
	}

	if { [llength $seg] > 2 } {
	    puts $fid [format " %s" [lrange $seg 2 end] ]
	} else {
	    puts $fid ""
	}
    }
    if { ! [string equal $filename "-"] } {
	close $fid
    }

    return $index
}

#==============================================================================
proc ::segments::SegCoalesce { seglist } {

    #-- Check for an empty list
    if { [llength $seglist] == 0 } { return $seglist }

    #-- Sort the list by start time
    set seglist [lsort -real -index 0 $seglist]

    #-- Coalesce the list, checking each segment for validity as we go
    set outlist [list]
    set ostop -1
    foreach seg $seglist {
	set start [lindex $seg 0]
	set stop [lindex $seg 1]
	if { $start > $ostop } {
	    #-- Disconnected, so flush out the existing segment (if any)
	    if { $ostop >= 0 } {
		lappend outlist [list $ostart $ostop]
	    }
	    set ostart $start
	    set ostop $stop
	} elseif { $stop > $ostop } {
	    #-- Extend the current segment
	    set ostop $stop
	}
    }
    #-- Flush out the final segment (if any)
    if { $ostop >= 0 } {
	lappend outlist [list $ostart $ostop]
    }

    return $outlist
}

#==============================================================================
proc ::segments::SegBare { seglist } {

    #-- Check for an empty list
    if { [llength $seglist] == 0 } { return $seglist }

    #-- Strip off all annotations
    set outlist [list]
    foreach seg $seglist {
	lappend outlist [lrange $seg 0 1]
    }

    return $outlist
}

#==============================================================================
proc ::segments::SegSort { seglist } {

    #-- Check for an empty list
    if { [llength $seglist] == 0 } { return $seglist }

    #-- Sort by start time
    return [lsort -real -index 0 $seglist]
}

#==============================================================================
proc ::segments::SegInvert { seglist } {

    #-- Check for an empty list
    if { [llength $seglist] == 0 } {
	#-- Return a segment representing all time
	return [list [list 0 1999999999] ]
    }

    #-- Make sure the list is coalesced
    if [catch {
	set seglist [SegCoalesce $seglist]
    } errmsg] {
	return -code error $errmsg
    }

    #-- Go through the list, checking for validity as we go
    set outlist [list]
    set ostart 0
    foreach seg $seglist {
	set start [lindex $seg 0]
	set stop [lindex $seg 1]
	if { $start < 0 || $stop < $start || $start < $ostart } {
	    return -code error "Invalid list"
	}
	if { $start > 0 } {
	    lappend outlist [list $ostart $start]
	}
	set ostart $stop
    }
    if { $ostart < 1999999999 } {
	lappend outlist [list $ostart 1999999999]
    }

    return $outlist
}

#==============================================================================
proc ::segments::SegUnion { {seglist1 {}} {seglist2 {}} args } {

    #-- If more than two lists were passed, use recursion
    if { [llength $args] > 0 } {
	set seglist2 [eval [list SegUnion $seglist2] $args]
    }

    #-- If we get here, then we know there are exactly two lists to deal with

    set length1 [llength $seglist1]
    set length2 [llength $seglist2]

    #-- Initialize list of output segments
    set ostart -1
    set seglist {}

    set i1 -1
    set i2 -1
    set start1 -1
    set start2 -1

    while { 1 } {

	#-- If necessary, get a segment from list 1
	if { $start1 == -1 } {
	    incr i1
	    if { $i1 < $length1 } {
		foreach { start1 stop1 } [lindex $seglist1 $i1] break
	    } elseif { $i2 == $length2 } {
		break
	    }
	}

	#-- If necessary, get a segment from list 2
	if { $start2 == -1 } {
	    incr i2
	    if { $i2 < $length2 } {
		foreach { start2 stop2 } [lindex $seglist2 $i2] break
	    } elseif { $i1 == $length1 } {
		break
	    }
	}

	#-- Pick the earlier segment from the two lists
	if { $start1 > -1 && ( $start2 == -1 || $start1 <= $start2) } {
	    set ustart $start1
	    set ustop $stop1
	    #-- Mark this segment has having been consumed
	    set start1 -1
	} elseif { $start2 > -1 } {
	    set ustart $start2
	    set ustop $stop2
	    #-- Mark this segment has having been consumed
	    set start2 -1
	} else {
	    break
	}

	#-- If the output segment is blank, initialize it; otherwise, see
	#-- whether the new segment extends it or is disjoint
	if { $ostart == -1 } {
	    set ostart $ustart
	    set ostop $ustop
	} elseif { $ustart <= $ostop } {
	    if { $ustop > $ostop } {
		#-- This extends the output segment
		set ostop $ustop
	    } else {
		#-- This lies entirely within the current output segment
	    }
	} else {
	    #-- Flush the current output segment, and replace it with the
	    #-- new segment
	    lappend seglist [list $ostart $ostop]
	    set ostart $ustart
	    set ostop $ustop
	}

    }

    #-- Flush out the final output segment (if any)
    if { $ostart != -1 } {
	lappend seglist [list $ostart $ostop]
    }

    return $seglist
}

#==============================================================================
proc ::segments::SegIntersection { {seglist1 {}} args } {

    #-- If only one list was passed, just return it
    if { [llength $args] == 0 } {
	return $seglist1
    }

    #-- If more than two lists were passed, use recursion
    if { [llength $args] > 1 } {
	set seglist2 [eval SegIntersection $args]
    } else {
	set seglist2 [lindex $args 0]
    }

    #-- If we get here, then we know there are exactly two lists to deal with

    set length1 [llength $seglist1]
    set length2 [llength $seglist2]

    #-- Initialize list of output segments
    set ostart -1
    set outlist [list]
    set iseg2 -1
    set start2 -1
    set stop2 -1

    foreach seg1 $seglist1 {
	set start1 [lindex $seg1 0]
	set stop1 [lindex $seg1 1]
	set annotations [lrange $seg1 2 end]

	#-- Loop over segments from the second list which overlap this segment
	while { $start2 < $stop1 } {
	    if { $stop2 > $start1 } {
		#-- These overlap

		#-- Find the overlapping range
		if { $start1 < $start2 } {
		    set ostart $start2
		} else {
		    set ostart $start1
		}
		if { $stop1 > $stop2 } {
		    set ostop $stop2
		} else {
		    set ostop $stop1
		}

		lappend outlist [concat [list $ostart $ostop] $annotations]
	    }

	    if { $stop2 > $stop1 } { break }

	    #-- Step forward
	    incr iseg2
	    if { $iseg2 < [llength $seglist2] } {
		set seg2 [lindex $seglist2 $iseg2]
		set start2 [lindex $seg2 0]
		set stop2 [lindex $seg2 1]
	    } else {
		#-- Pseudo-segment in the far future
		set start2 2000000000
		set stop2 2000000000
	    }

	}
	#-- End loop over list 2

    }
    #-- End loop over list 1

    return $outlist
}

#==============================================================================
proc ::segments::SegSum { seglist } {
    set sum 0
    foreach seg $seglist {
	foreach {start stop} $seg break
	if { $stop < $start } {
	    return -code error "Invalid list"
	}
	set sum [expr {$sum+($stop-$start)}]
    }
    return $sum
}

#==============================================================================
proc ::segments::SegLengthCut { seglist oper limit } {
    #-- Select segments longer or shorter than some limit

    #-- Check arguments
    if { [lsearch -exact [list < <= == >= > = != <> ] $oper] == -1 } {
	return -code error "Invalid operator passed to SegLengthCut"
    }
    if { ! [string is double $limit] } {
	return -code error "Invalid length limit passed to SegLengthCut"
    }

    #-- Map specified operator to valid Tcl operator
    if { [string equal $oper "=" ] } {
	set oper "=="
    } elseif { [string equal $oper "<>" ] } {
	set oper "!="
    }

    #-- Short cut for special case
    if { [string equal $oper ">="] && $limit == 0 } {
	return $seglist
    }

    #-- Construct expression test for use within loop below
    set test "\$length $oper $limit"

    set newseglist [list]
    foreach seg $seglist {
	foreach {start stop} $seg break
	set length [expr {$stop-$start}]
	if $test {
	    lappend newseglist $seg
	}
    }

    return $newseglist
}

#==============================================================================
proc ::segments::SegPlayground { start stop {pad 0} } {
    #-- Generate a list of playground segments between two times

    set seglist [list]

    #-- First deal with the S1 part (if any) of the specified interval
    if { $start < 725000000 } {
	foreach {playstart playstop} [list \
					      714265025 714266239 \
					      714357173 714357806 \
					      714382917 714388358 \
					      714462428 714464278 \
					      714559457 714560204 \
					      714636028 714640435 \
					      714821115 714823144 \
					      714941207 714944756 \
					      715042855 715049751 \
					      715087222 715087871 \
					      715219955 715224063 \
					      715344556 715345510 \
					      715482685 715483545 ] {
	    if { $start > $playstart } { set playstart $start }
	    if { $stop < $playstop } { set playstop $stop }
	    if { $playstop > $playstart } {
		lappend seglist \
		       [list [expr {$playstart-$pad}] [expr {$playstop+$pad}] ]
	    }
	}
	set start 725000000
    }

    #-- Now deal with the S2/S3 part of the interval
    set gps [expr {729273613 + 6370*int(($start-1000-729273613)/6370)} ]
    while { $gps < $stop } {
	set gps1 [expr {$gps-$pad}]
	set gps2 [expr {$gps+600+$pad}]
	if { $gps1 < $start } { set gps1 $start }
	if { $gps2 > $stop } { set gps2 $stop }
	if { $gps2 > $gps1 } {
	    lappend seglist [list $gps1 $gps2 ]
	}
	incr gps 6370
    }

    return $seglist
}

#==============================================================================
proc ::segments::SegPlaygroundMask { seglist {pad 0} } {
    #-- Take an existing segment list and return the parts which are playground

    set playsegs [::segments::SegPlayground \
		      [lindex [lindex $seglist 0] 0] \
		      [lindex [lindex $seglist end] 1] $pad ]
    return [SegIntersection $seglist $playsegs]
}

#==============================================================================
proc ::segments::SegNonPlaygroundMask { seglist {pad 0} } {
    #-- Take existing segment list and return parts which are NOT playground

    set playsegs [::segments::SegPlayground \
		      [lindex [lindex $seglist 0] 0] \
		      [lindex [lindex $seglist end] 1] [expr {-$pad}] ]
    return [SegIntersection $seglist [SegInvert $playsegs] ]
}

#==============================================================================
proc ::segments::SegIsPlayground { args } {
    #-- Determine whether input (a segment list, a single segment, a time
    #-- range, or a single time) is entirely within the playground

    #-- Figure out what kind of argument(s) was/were passed
    if { [llength $args] == 1 } {
	#-- This should be a segment list, a segment, or a scalar GPS time
	set seglist [lindex $args 0]
	if { [llength $seglist] == 0 } {
	    #-- This is an empty list, which we consider to be in the
	    #-- playground!  (At least, it is not outside the playground.)
	    return 1
	}
	if { [llength $seglist] == 1 && [llength [lindex $seglist 0]] == 1 } {
	    #-- Scalar time value
	    set gps [lindex $seglist 0]
	    if { ! [string is double $gps] } {
		return -code error "Argument passed to SegIsPlayground is\
                        not a valid GPS time"
	    }
	    #-- Treat this as a one-second-long segment
	    set seglist [list [list $gps [expr {$gps+1}]]]
	} elseif { [llength $seglist] == 2 && \
		       [llength [lindex $seglist 0]] == 1 && \
		       [llength [lindex $seglist 1]] == 1 } {
	    #-- This is a segment; convert it to a one-element segment list
	    set seglist [list $seglist]
	}
    } elseif { [llength $args] == 2 } {
	#-- Two time values
	set seglist [list $args]
    } else {
	return -code error "Too many arguments passed to SegIsPlayground"
    }

    #-- Figure out what part(s) are playground data
    set playlist [::segments::SegPlaygroundMask $seglist]

    #-- See whether SegPlaygroundMask actually modified anything
    if { [llength $playlist] != [llength $seglist] } { return 0 }
    foreach seg1 $playlist seg2 $seglist {
	foreach {seg1t1 seg1t2} $seg1 break
	foreach {seg2t1 seg2t2} $seg2 break
	if { $seg1t1 != $seg2t1 || $seg1t2 != $seg2t2 } { return 0 }
    }

    #-- If we get here, then this is entirely playground data
    return 1
}


#==============================================================================
# The following commands are executed immediately when this file is sourced,
# i.e. when you do 'package require segments'

namespace import ::segments::*

#----------------------------------------------------------------------

#-- Check command-line arguments
if { $argc < 1 } {
    puts {
Usage:    segexpr <expression> [<outfile>] [<output_options>]
This utility reads segment lists from files, calculates an expression specified
by the user, and writes the output either to stdout or to a file.

<expression> is built out of functions operating on segment lists.  A filename
appearing in the expression represents the segment list contained within the
file.  Filename expansion wildcards (*,?,[]) can be used; if multiple filenames
are matched, then they are all used as a set, which is handy for doing
intersections or unions of several files.  The available functions are:
  intersection(list1,list2[,list3...])  Intersection of 2 or more segment lists
      and()  is an alias for  intersection()
  union(list1,list2[,list3...])         Union of 2 or more segment lists
      or()  is an alias for  union()
  not(list)                             Invert segment list
      invert()  and  veto()  are aliases for  not()
  playground()                          Mask to keep only playground times
  nonplayground()                       Mask to keep only non-playground times
  bare()                                Strip away any annotations
Function names are case-insensitive.  Argument lists may be separated by spaces
instead of commas, if desired.  The entire expression should be enclosed in
single quotes to prevent the shell from trying to interpret parentheses and
wildcards.  Arbitrarily complex expressions may be formed by nesting function
calls.  Some examples:
  segexpr 'union(mysegs.txt,yoursegs.txt)'
  segexpr 'union(mysegs.txt,yoursegs.txt)' oursegs.txt -include duration
  segexpr 'intersection(anasegs.txt,veto(vetotimes.txt))' unvetoed_anasegs.txt
As a special case, an expression consisting of a single filename or filename
pattern without any function calls is interpreted to mean the union of all
matching filename(s).  This can be used, for instance, to add a serial number
and/or duration to each segment, as described in <output_options>.  Example:
  segexpr mybaresegs.txt withinfo.txt -include index,duration

<outfile> is the name of a file to write the output to.  If it is omitted or is
equal to '-' (a dash), then output is directed to standard output instead of to
a file.

<output_options> can be used, if desired, to control the format of the output
segment list.  It can be anything which is understood by the 'SegWrite'
function in the 'segments' Tcl library.  Currently, it can be '-include index'
to assign a serial number to each output segment at the beginning of the line;
'-include duration' to include the duration of the segment immediately after
the start and end times; or '-include index,duration' to include both.
}
  exit 1
}

#-- Parse command-line arguments
set expr [lindex $argv 0]

if { $argc >= 2 && ! [regexp {^-.} [lindex $argv 1]] } {
    set outfile [lindex $argv 1]
    set optstart 2
} else {
    #-- This will cause the output to go to stdout
    set outfile "-"
    set optstart 1
}

if { $argc > $optstart } {
    set outargs [lrange $argv $optstart end]
} else {
    set outargs ""
}

#-- If expression contains no parentheses, make sure it is a single name
if { ! [regexp {\(} $expr] } {
    if { [regexp {^ *\S+ *$} $expr] } {
	set expr "or($expr)"
    } else {
	puts "Invalid segment expression"
	exit 2
    }
}

#-- Deconstruct expression into tokens
set functionOrFilename {[^,\(\) ]+\(?}
set separator {[, ]+}
set closeParen {\)}
set re "(?:$functionOrFilename|$separator|$closeParen)"
set tokens [regexp -all -inline $re $expr]
###hack
##puts [join $tokens " "]

#-- Loop over tokens, converting each piece into Tcl code
set tclcmd ""
set nestlevel 0
foreach token $tokens {

    if [regexp {^(.+)\($} $token - function] {
	#-- This is a function name
	if { $nestlevel > 0 } { append tclcmd " \[" }
	incr nestlevel

	#-- Match this against available function names (case-insensitive)
	set function [string tolower $function]
	switch -exact -- $function {
	    intersection - and { append tclcmd "SegIntersection" }
	    union - or { append tclcmd "SegUnion" }
	    invert - not - veto { append tclcmd "SegInvert" }
	    playground { append tclcmd "SegPlaygroundMask" }
	    nonplayground { append tclcmd "SegNonPlaygroundMask" }
	    bare { append tclcmd "SegBare" }
	    default {
		puts stderr "Invalid function name: $function"
		puts stderr "EXITING"
		exit 1
	    }
	}

    } elseif [regexp $separator $token] {
	append tclcmd " "

    } elseif [regexp $closeParen $token] {
	incr nestlevel -1
	if { $nestlevel > 0 } { append tclcmd " \]" }

    } else {
	#-- This should be a filename or a glob pattern
	set filelist [glob -nocomplain $token]
	if { [llength $filelist] == 0 } {
	    puts "No matching file: $token"
	    exit 1
	}
	#-- Read segment list from each file (if not already read),
	#-- and add to the Tcl command
	foreach file $filelist {
	    if { ! [info exist segs($file)] } {
		if [catch {
		    set segs($file) [SegRead $file]
		} errmsg] {
		    puts "Error reading segment list from $file: $errmsg"
		    exit 1
		}
	    }
	    append tclcmd " \$segs($file)"
	}

    }

}

###hack
##puts $tclcmd

#-- Check that the parenthesis nesting level worked out right
if { $nestlevel != 0 } {
    puts "Mismatched parentheses in expression"
    exit 1
}

#-- Execute the Tcl command
set outsegs [eval $tclcmd]

#-- Write to an output file or to stdout
eval [concat [list SegWrite $outsegs $outfile] $outargs]
