#!/usr/bin/python

import hashlib
import os
import sys
import getopt
import collections

paths_to_jobs = {}
dagFile = collections.namedtuple("Dag_File", "file_name file_path command")

class SimpleDagJob(object):
  def __init__(self, path, local_path = ""):
    self.archived_path = path
    self.local_path = local_path
    paths_to_jobs[self.archived_path] = self
    self.command = ""
    self.batch_job_info = {}
    self.input_files= []
    self.output_files= []
    self.ancestors = []
    self.descendants = []
    self.recover_run_info()
    self.create_input_output_files()
    self.recover_ancestors()
    self.recover_descendants()
    self.get_makeflow_path()

  def add_input_file(self, file_path):
    file_name = file_path.split("/")[-1]
    dag_file = dagFile(file_path = file_path, file_name = file_name, command = "")
    self.input_files.append(dag_file)

  def add_output_file(self, file_path):
    file_name = file_path.split("/")[-1]
    dag_file = dagFile(file_path = file_path, file_name = file_name, command = self.command)
    self.output_files.append(dag_file)

  def create_input_output_files(self):
    input_file_directories = os.path.join(self.archived_path, "input_files")
    input_file_paths = [os.path.realpath(os.path.join(input_file_directories, f_path)) for f_path in os.listdir(input_file_directories)]

    output_file_dir = os.path.join(self.archived_path, "outputs")
    output_file_paths = [os.path.join(output_file_dir, f_path) for f_path in os.listdir(output_file_dir)]

    for o_path in output_file_paths:
      self.add_output_file(o_path)

    for i_path in input_file_paths:
      self.add_input_file(i_path)

  def recover_ancestors(self):
    path = os.path.join(self.archived_path, "ancestors")
    ancestor_dir = os.path.join(self.archived_path, "ancestors")
    dir_contents = [os.path.join(ancestor_dir, path) for path in os.listdir(ancestor_dir)]
    ancestor_job_paths = [os.path.realpath(path) for path in dir_contents]
    for ancestor_path in ancestor_job_paths:
      if ancestor_path in paths_to_jobs:
        self.ancestors.append(paths_to_jobs[ancestor_path])
      else:
        new_dag_job = SimpleDagJob(ancestor_path)
        self.ancestors.append(new_dag_job)
        paths_to_jobs[ancestor_path] = new_dag_job

  def recover_descendants(self):
    descendant_dir = os.path.join(self.archived_path, "descendants")
    dir_contents = [os.path.join(descendant_dir, path) for path in os.listdir(descendant_dir)]
    descendant_job_paths = [os.path.realpath(path) for path in dir_contents]

    for descendant_path in descendant_job_paths:
      if descendant_path in paths_to_jobs:
        self.descendants.append(paths_to_jobs[descendant_path])
      else:
        new_dag_job = SimpleDagJob(descendant_path)
        self.descendants.append(new_dag_job)
        paths_to_jobs[descendant_path] = new_dag_job

  def recover_run_info(self):
    run_info_path = os.path.join(self.archived_path, "run_info")
    with open(run_info_path, "r") as f:
      self.command = f.readline().rstrip()
      self.wrapped_command = f.readline().rstrip()
      self.batch_job_info['submitted'] = int(f.readline().rstrip())
      self.batch_job_info['started'] = int(f.readline().rstrip())
      self.batch_job_info['finished'] = int(f.readline().rstrip())
      self.batch_job_info['exited_normally'] = int(f.readline().rstrip())
      self.batch_job_info['exit_code'] = int(f.readline().rstrip())
      self.batch_job_info['exit_signal'] = int(f.readline().rstrip())
  def print_immediate_inputs(self):
    for f in self.input_files:
      print f.file_path
    print ''

  def print_all_inputs(self):
    queue= [(self, 0)]
    while queue:
      job, distance = queue.pop()
      if job.input_files:
        print distance
        job.print_immediate_inputs()
        for child in job.ancestors:
          queue.append((child, distance + 1))

  def print_immediate_outputs(self):
    for f in self.output_files:
      print f.file_path
    print ''

  def print_all_outputs(self):
    visted = {}
    queue= [(self, 0)]
    visted[self.archived_path] = 1
    while queue:
      job, distance = queue.pop()
      if job.output_files:
        print distance
        job.print_immediate_outputs()
        for child in job.descendants:
          if child.archived_path not in visted:
            queue.append((child, distance + 1))
            visted[child.archived_path] = 1

  def print_job(self):
    print "file: %s" %(self.local_path)
    print "Created by job archived at path: %s" %(self.archived_path)
    print "Command used to create this file: %s" %(self.wrapped_command)
    print "makeflow file archived at path: %s" %(self.source_makeflow_path)
    print "Inputs:"
    self.print_immediate_inputs()

  def get_makeflow_path(self):
    job = self
    while job.ancestors:
      job = job.ancestors[0]
    self.source_makeflow_path = os.path.join(job.archived_path, "source_makeflow")

def usage():
  print """usage: makeflow_recover [options] <file>
  options:
    --info                   print out basic info about the specified file and the associated job
    -i, --inputs             list immediate input files required to create file
    --inputs-all             list both immediate input files and all other files that the specified file relied on directly or indirectly
    -h, --help               print this message
    -o, --outputs            list sibling output files
    --outputs-all            list both sibling output files and all other files that relied directly or indirectly on the specified file
    --path=<path_to_archive>   path to search for the makeflow archive (use if when preserving the makeflow you specified a archive path)

  """
  sys.exit(1)

def parse_args():
  arg_map = {'inputs': False, 'outputs': False, "file": None, "inputs-all": False,
            "outputs-all": False, "info": False, "path": "/tmp/makeflow.archive.%s" %(os.geteuid())}
  try:
    opts, args = getopt.getopt(sys.argv[1:], ":hio", ['help', 'inputs', 'outputs', 'inputs-all', 'outputs-all', 'info', 'path='])
  except getopt.GetoptError as err:
    usage()
  for o, a in opts:
    if o in ("-h", '--help'):
      usage()
    elif o in ("-o", "--outputs"):
      arg_map['outputs'] = True
    elif o in ("-i", "--inputs"):
      arg_map['inputs'] = True
    elif o in ("--outputs-all"):
      arg_map['outputs-all'] = True
    elif o in ("--inputs-all"):
      arg_map['inputs-all'] = True
    elif o in ("--info"):
      arg_map['info'] = True
    elif o in ("--path"):
      arg_map['path'] = a
    else:
      assert False, "unhandled option"
  if len(opts) == 0 or (len(opts) == 1 and opts[0][0] == '--path'):
    arg_map['info'] = True
  try:
    arg_map['file'] = args[0]
  except IndexError:
    usage()
  if not arg_map['file'] or not os.path.isfile(arg_map['file']):
    print "Cannot find file %s" %(arg_map['file'])
    usage()
  return arg_map

if __name__ == "__main__":
  arguments = parse_args()

  # calculate archive_id and create archived file path
  sha1_hash = hashlib.sha1()
  file_name = arguments['file']
  with open(file_name, "r") as f:
    line = f.readline()
    while line:
      sha1_hash.update(line)
      line = f.readline()
  hex_digest = sha1_hash.hexdigest()
  file_path = os.path.join(arguments['path'], "files", hex_digest[0:2], hex_digest[2:])

  if os.path.islink(file_path):
    resolved_job_path = os.path.realpath(file_path)
    job = SimpleDagJob(resolved_job_path, file_name)
    if arguments['inputs']:
      print "Inputs"
      job.print_immediate_inputs()
    if arguments['outputs']:
      print "Outputs"
      job.print_immediate_outputs()
    if arguments['inputs-all']:
      print "Inputs-all"
      job.print_all_inputs()
    if arguments['outputs-all']:
      print "Outputs-all"
      job.print_all_outputs()
    if arguments['info']:
      job.print_job()
  else:
    print "File has not been archived"
