#!/usr/bin/env python

import string
import os
import sys
import optparse
import subprocess

#### Classes ###################################################################

class CommandError(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

#### Functions #################################################################

# simple wraper for os.system
def command(cmd, pass_options=False):
    if options.verbose:
        print >>sys.stderr, "Running \"" + cmd + "\""
    if pass_options:
        if options.verbose:
            cmd += " --verbose"
    status_code = os.system(cmd)
    if status_code != 0:
        raise CommandError("Command %s exited with non-zero stauts %d." % (cmd, status_code));

def which(cmd):
    proc = subprocess.Popen("which %s" % cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout_value, stderr_value = proc.communicate()
    if stderr_value == "":
        return stdout_value.strip()
    else:
        return None

#### Main ######################################################################

def main(argv=None):
    if argv is None: argv = sys.argv
    # parse the args
    parser = optparse.OptionParser(usage="Usage: %prog [options] file1 file2 ... fileN",
                description="Runs md5sum on a para cluster. If $CLUSTER is set, ssh to $CLUSTER to run para. Can be overridden with -c/--cluster. If neither are set, assumes you are running it on the headnode.", version="%prog 0.9")
    parser.add_option("-v", "--verbose", action="store_true", dest="verbose")
    parser.add_option("-c", "--cluster", action="store", dest="cluster", help="use the given cluster headnode")
    parser.add_option("-m", "--max-jobs", action="store", type="int", default=40, dest="max_job")

    global options
    (options, args) = parser.parse_args()

    # output a usage message
    if len(args) == 0:
        parser.print_help()
        return 10

    # find the path to the md5sum wrapper
    md5path = which("md5sumWrapper")
    if md5path == None:
        print >>sys.stderr, "Could not find md5sumWrapper in path."
        return 10

    if options.verbose:
        print >>sys.stderr, "Path to md5sumWrapper: %s" % md5path

    if options.verbose:
        print >>sys.stderr, "Max number of concurrent jobs: %d" % options.max_job

    # form the temp filename
    pid  = os.getpid()
    host = os.uname()[1]
    para_dir = "tmp_%d_%s_para_dir" % (pid, host)
    job_list_filename = "%s/jobs" % para_dir

    # find what head node to use
    cluster_head_node = None
    if options.cluster:
        cluster_head_node = options.cluster
    else:
        if "CLUSTER" in os.environ:
            cluster_head_node = os.environ["CLUSTER"]

    if options.verbose:
        if cluster_head_node:
            print >>sys.stderr, "Connecting to head node %s to run para" % cluster_head_node
        else:
            print >>sys.stderr, "Running para locally"
    
    # make the parasol dir and the jobs file
    os.mkdir(para_dir)
    job_list = open(job_list_filename, "w")
    counter = 0
    for a in args:
        print >>job_list, "%s %s %s/%05d" % (md5path, a, para_dir, counter)
        counter += 1
    job_list.close()

    # run the para command
    try:
        para_cmd = "para make -batch=%s -maxJob=%d %s" % (para_dir, options.max_job, job_list_filename)
        if cluster_head_node:
            ssh_cmd = 'ssh -x %s "cd %s ; %s"' % (cluster_head_node, os.getcwd(), para_cmd)
            command(ssh_cmd)
        else:
            command(para_cmd)

        # cat the files
        cat_cmd = "cat"
        for i in range(counter):
            cat_cmd = cat_cmd + " %s/%05d" % (para_dir, i)
        command(cat_cmd)
    except CommandError, e:
        print >>sys.stderr, e.value
        # clean up
        clean_cmd = "rm -rf %s" % para_dir
        command(clean_cmd)
        return 10

    # clean up
    clean_cmd = "rm -rf %s" % para_dir
    command(clean_cmd)

#### Module ####################################################################

if __name__ == "__main__":
    sys.exit(main())
