# for emacs: -*- mode: sh; -*-

# Cavia porcellus -- Broad Institute Release 3.0 (Feb 12 2008)

# Template from bosTau4.txt

#########################################################################
# DOWNLOAD SEQUENCE (DONE - 2008-04-02 - Tim and Kate)
    ssh kkstore05
    mkdir /cluster/store12/cavPor3
    ln -s /cluster/store12/cavPor3 /cluster/data
    mkdir /cluster/data/cavPor3/broad
    cd /cluster/data/cavPor3/broad

    wget --timestamping \
ftp://ftp.broad.mit.edu/pub/assemblies/mammals/guineaPig/cavPor3/assembly.agp \
ftp://ftp.broad.mit.edu/pub/assemblies/mammals/guineaPig/cavPor3/assembly.bases.gz \
ftp://ftp.broad.mit.edu/pub/assemblies/mammals/guineaPig/cavPor3/assembly.quals.gz

    wget --timestamping \
#Not helpful ftp://ftp.broad.mit.edu/pub/assemblies/mammals/guineaPig/cavPor3/assembly_supers.qual.gz

qaToQac assembly.quals.gz stdout | qacAgpLift assembly.agp stdin cavPor3.qual.qac

########## From broad inst. cavPor3/BasicStats.out
# --------------------------------------------------------------------------------
# Sat Feb 02 03:52:11 2008 run (pid=21060), using Tue Jan 22 11:07:31 EST 2008 make
# BasicStats PRE=/wga/dev/WGAdata DATA=projects/Guineapig RUN=run/work           \
#            SUBDIR=post5 QUAL_STATS=True OUTFILE=BasicStats.out
# --------------------------------------------------------------------------------
# Supercontigs having < 3 reads or < 1kb sequence are ignored.
# 8 gaps <= -1000; 0 gaps <= -10000; 0 gaps <= -100000
# fraction of gaps < -10kb or more than 4 deviations below zero: 0.0222%
# 667 gaps > 10kb, 60 gaps > 50kb, 0 gaps > 200kb, 0 gaps > 1Mb
# 93.8% of reads were used in the assembly (95.55% of bases, 96.3% of Q20 bases)
# 0.00512% of reads were used multiply in the assembly
# 61603 contigs, having N50 length 80583
# total contig length: 2663352932, spanning 2722377657 bases (with 2.17% in gaps)
# 3143 supercontigs, having N50 length 27408292 (not including gaps)
# 2.48% of assembly in supers of size < 200000 (67399955 bases)
# Assembly base coverage: 6.79X.  Assembly Q20 coverage:  5.99X.
# 100% of bases have q >= 1
# 99.57% of bases have q >= 20
# 99.02% of bases have q >= 30
# 98.4% of bases have q >= 40
# 97.72% of bases have q >= 50

   cut -f 1 assembly.agp | uniq -c | wc -l 
   # Number of scaffolds: 3143
   cut -f 1 assembly.agp | uniq -c | sort -b -n -r | cut -c 1-8 | uniq -c | sort -n -r -b | head -3
   # Number of scaffolds with single contig: 2176 thus 967 multi-contig scaffolds   




#########################################################################
# Create .ra file and run makeGenomeDb.pl
    ssh kkstore05
    cd /cluster/data/cavPor3
cat << _EOF_ >cavPor3.config.ra
# Config parameters for makeGenomeDb.pl:
db cavPor3
clade mammal
genomeCladePriority 35
scientificName Cavia porcellus
commonName Guinea Pig
assemblyDate Feb. 2008
assemblyLabel Broad Institute cavPor3 
orderKey 99
#mitoAcc AJ222767
mitoAcc 5679797
fastaFiles /cluster/data/cavPor3/broad/assembly.bases.gz
agpFiles /cluster/data/cavPor3/broad/assembly.agp
qualFiles /cluster/data/cavPor3/broad/cavPor3.qual.qac
dbDbSpeciesDir guineaPig
_EOF_

# use 'screen' make sure on kkstore05
    makeGenomeDb.pl -verbose=2 cavPor3.config.ra > makeGenomeDb.out 2>&1 &

# 'ctl-a ctl -d' returns to previous shell
cut -f 2 chrom.sizes | ave stdin
# Q1 7169.500000
# median 13298.000000
# Q3 55788.500000
# average 866164.007952
# min 3002.000000
# max 88675666.000000
# count 3144
# total 2723219641.000000
# standard deviation 5316243.688364

	# NOTES -- STUFF THAT YOU WILL HAVE TO DO --
	#
	#
	# Template trackDb.ra and .html's have been created, but they all need editing!
	#
	# cd /cluster/data/cavPor3/TemporaryTrackDbCheckout/kent/src/hg/makeDb/trackDb/guineaPig/cavPor3
	#
	# Search for '***' notes in each file in and make corrections (sometimes the
	# files used for a previous assembly might make a better template):
	#  description.html /cluster/data/cavPor3/html/{trackDb.ra,gap.html,gold.html}
	#
	# Then cd ../.. (to trackDb/) and
	# - edit makefile to add cavPor3 to DBS.
	# - (if necessary) cvs add guineaPig
	# - cvs add guineaPig/cavPor3
	# - cvs add guineaPig/cavPor3/*.{ra,html}
	# - cvs ci -m "Added cavPor3 to DBS." makefile
	# - cvs ci -m "Initial descriptions for cavPor3." guineaPig/cavPor3
	# - (if necessary) cvs ci guineaPig
        cvs ci -m "Initial description page for browser gateway of Cavia porcellus (guinea pig)" description.html
        cvs ci -m "Initial description page of gap location track for Cavia porcellus (guinea pig)" gap.html
        cvs ci -m "Initial description page of assembly track for Cavia porcellus (guinea pig)" gold.html
	# - Run make update DBS=cavPor3 and make alpha when done.
	# - (optional) Clean up /cluster/data/cavPor3/TemporaryTrackDbCheckout
	# - cvsup your ~/kent/src/hg/makeDb/trackDb and make future edits there.
		
   


#########################################################################
# REPEATMASKER (DONE - 2008-04-05 - Tim)
    ssh kkstore06
    screen # use a screen to manage this job
    mkdir /cluster/data/cavPor3/bed/repeatMasker
    cd /cluster/data/cavPor3/bed/repeatMasker
    doRepeatMasker.pl -buildDir=/cluster/data/cavPor3/bed/repeatMasker \
        cavPor3 > do.log 2>&1 &

    # Note: can run simpleRepeats simultaneously
    #### When done with RM:
    ssh pk
    para time
    # CPU time in finished jobs:   26236391s  437273.18m  7287.89h  303.66d  0.832 y
    # IO & Wait Time:                548808s    9146.80m   152.45h    6.35d  0.017 y
    # Average job time:                4735s      78.91m     1.32h    0.05d
    # Longest running job:                0s       0.00m     0.00h    0.00d
    # Longest finished job:            5871s      97.85m     1.63h    0.07d
    # Submission to last job:        130217s    2170.28m    36.17h    1.51d


    time nice -n +19 featureBits cavPor3 rmsk > fb.cavPor3.rmsk.txt 2>&1 &
    # 732765485 bases of 2663369733 (27.513%) in intersection

    # RepeatMasker and lib version from do.log:
    #    RepeatMasker version development-$Id: cavPor3.txt,v 1.22 2010/04/21 19:22:27 hiram Exp $
    #    Jan 11 2008 (open-3-1-9) version of RepeatMasker
    #    CC   RELEASE 20071204;

    # Compare coverage to previous assembly:
    # skip this since cavPor 2 was never added to browser
    #featureBits cavPor2 rmsk

#########################################################################
# SIMPLE REPEATS TRF (DONE - 2008-04-04 - Tim)
    ssh kkstore05
    screen # use a screen to manage this job
    mkdir /cluster/data/cavPor3/bed/simpleRepeat
    cd /cluster/data/cavPor3/bed/simpleRepeat
    # 
    doSimpleRepeat.pl -buildDir=/cluster/data/cavPor3/bed/simpleRepeat \
	cavPor3 > do.log 2>&1 &

    #### When done
    ssh pk
    para time
    # Completed: 73 of 73 jobs
    # CPU time in finished jobs:      13220s     220.33m     3.67h    0.15d  0.000 y
    # IO & Wait Time:                   679s      11.32m     0.19h    0.01d  0.000 y
    # Average job time:                 190s       3.17m     0.05h    0.00d
    # Longest running job:                0s       0.00m     0.00h    0.00d
    # Longest finished job:             397s       6.62m     0.11h    0.00d
    # Submission to last job:          1320s      22.00m     0.37h    0.02d

    featureBits cavPor3 simpleRepeat
    # 33641656 bases of 2663369733 (1.263%) in intersection
    #	after RM run is done, add this mask:
    cd /cluster/data/cavPor3
    twoBitMask cavPor3.rmsk.2bit -add bed/simpleRepeat/trfMask.bed cavPor3.2bit

    twoBitToFa cavPor3.2bit stdout | faSize stdin
    #  2723219641 bases (59849908 N's 2663369733 real 1930234123 upper 733135610 lower) in 3144 sequences in 1 files
    #  Total size: mean 866164.0 sd 5317089.3 min 3002 (scaffold_3142) max 88675666 (scaffold_0) median 13298
    #  N count: mean 19036.2 sd 100029.3
    #  U count: mean 613942.2 sd 3764766.9
    #  L count: mean 233185.6 sd 1462738.3
    #  %26.92 masked total, %27.53 masked real
    # >>> NOTE: 2723219641 bases matche chrome.sizes above but not featureBits which is 2663369733 

    twoBitToFa cavPor3.rmsk.2bit stdout | faSize stdin
    # 2723219641 bases (59849908 N's 2663369733 real 1931053208 upper 732316525 lower) in 3144 sequences in 1 files
    # Total size: mean 866164.0 sd 5317089.3 min 3002 (scaffold_3142) max 88675666 (scaffold_0) median 13298
    # N count: mean 19036.2 sd 100029.3
    # U count: mean 614202.7 sd 3766351.7
    # L count: mean 232925.1 sd 1461161.8
    # %26.89 masked total, %27.50 masked real

    #########################################################################
    # Link to it from /gbdb:  (DONE - 2008-04-10 - Tim)
    ln -s /cluster/data/cavPor3/cavPor3.2bit /gbdb/cavPor3/cavPor3.2bit

#########################################################################
# Create OOC file for genbank runs (DONE - 2008-04-07 - Tim)
# use same repMatch value as bosTau2
    ssh kkstore05
    cd /cluster/data/cavPor3
    blat cavPor3.2bit /dev/null /dev/null -tileSize=11 \
    	-makeOoc=cavPor3.11.1005.ooc -repMatch=1005
    # Wrote 26905 overused 11-mers to cavPor3.11.1005.ooc
    # -rw-rw-r--  1 tdreszer protein    107628 Apr  7 13:22 cavPor3.11.1005.ooc
    blat cavPor3.2bit /dev/null /dev/null -tileSize=11 \
	-makeOoc=cavPor3.11.ooc -repMatch=1024
    # Wrote 25815 overused 11-mers to cavPor3.11.ooc
    # -rw-rw-r--  1 tdreszer protein    103268 Apr  7 13:17 cavPor3.11.ooc

    ssh kkr1u00
    mkdir /iscratch/i/cavPor3
    cd /iscratch/i/cavPor3
    cp -p /cluster/data/cavPor3/cavPor3.2bit .
    for R in 2 3 4 5 6 7 8
do
    rsync -a --progress ./ kkr${R}u00:/iscratch/i/cavPor3/
done

#########################################################################
# Run WindowMasker because RepeatMasker coverge is low (DONE - 2008-04-08 - Tim)
    screen
    mkdir /cluster/data/cavPor3/bed/windowMasker
    ssh kkstore05
    cd /cluster/data/cavPor3/bed/windowMasker
    nice doWindowMasker.pl -workhorse=kolossus \
        -buildDir=/cluster/data/cavPor3/bed/windowMasker cavPor3 > wmRun.log 2>&1 &

    #	load this initial data to get ready to clean it
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/windowMasker
    hgLoadBed cavPor3 windowmaskerSdust windowmasker.sdust.bed.gz
    # Loaded 14554107 elements of size 3

    #	eliminate the gaps from the masking
    featureBits cavPor3 -not gap -bed=notGap.bed
    # 2663369733 bases of 2663369733 (100.000%) in intersection
    featureBits cavPor3 windowmaskerSdust
    # 990884347 bases of 2663369733 (37.204%) in intersection

    time nice -n +19 featureBits cavPor3 windowmaskerSdust notGap.bed \
        -bed=stdout | gzip -c > cleanWMask.bed.gz
    # 931034439 bases of 2663369733 (34.957%) in intersection

    #	reload track to get it clean
    hgLoadBed cavPor3 windowmaskerSdust cleanWMask.bed.gz
    # Loaded 14547726 elements of size 4
    featureBits cavPor3 windowmaskerSdust
    # 931034439 bases of 2663369733 (34.957%) in intersection 

    #	mask the sequence with this clean mask
    zcat cleanWMask.bed.gz \
	| twoBitMask ../../cavPor3.unmasked.2bit stdin \
	    -type=.bed cavPor3.cleanWMSdust.2bit
    twoBitToFa cavPor3.cleanWMSdust.2bit stdout | faSize stdin \
        > cavPor3.cleanWMSdust.faSize.txt
    cat cavPor3.cleanWMSdust.faSize.txt
    # 2723219641 bases (59849908 N's 2663369733 real 1732335294 upper 931034439 lower) in 3144 sequences in 1 files
    # Total size: mean 866164.0 sd 5317089.3 min 3002 (scaffold_3142) max 88675666 (scaffold_0) median 13298
    # N count: mean 19036.2 sd 100029.3
    # U count: mean 550997.2 sd 3547541.7
    # L count: mean 296130.5 sd 1686844.5
    # %34.19 masked total, %34.96 masked real

#########################################################################
#  Starting Genbank (Done - 2008-04-10 - Tim)
    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    # edit etc/genbank.conf to add the following entry:

# cavPor3 (C. porcellus) 3144 scaffolds 967 are multi-contig and 2076 are single contig
cavPor3.serverGenome = /cluster/data/cavPor3/cavPor3.2bit
cavPor3.clusterGenome = /iscratch/i/cavPor3/cavPor3.2bit
cavPor3.ooc = /cluster/data/cavPor3/cavPor3.11.ooc
cavPor3.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
cavPor3.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
cavPor3.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
cavPor3.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
cavPor3.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
cavPor3.lift = no
cavPor3.downloadDir = cavPor3
cavPor3.perChromTables = no
cavPor3.refseq.mrna.native.load = no
cavPor3.refseq.mrna.xeno.load = yes
cavPor3.genbank.mrna.xeno.load = yes
cavPor3.genbank.est.native.load = yes
        
    cvs ci -m "Added cavPor3." etc/genbank.conf
    # update /cluster/data/genbank/:
    make etc-update
    cvs ci -m "Turned off native and on xeno." etc/genbank.conf

    # Edit src/lib/gbGenome.c to add new species.
    # static char *cavPorNames[] = {"Cavia porcellus", NULL};
    # static struct dbToSpecies dbToSpeciesMap[] = { ...>>> {"cavPor", cavPorNames}, 

    cvs ci -m "Added guinea pig." src/lib/gbGenome.c
    make install-server

    ssh genbank
    screen  # control this business with a screen since it takes a while
    cd /cluster/data/genbank

    # This is a call to a script that will push our jobs out to the cluster
    # since it's a big job.  
    time nice -n +19 bin/gbAlignStep -initial cavPor3 &
    #	logFile: var/build/logs/2008.03.10-14:14:43.cavPor3.initalign.log
    #	real    567m7.431s
    # > For comparison:  logFile: var/build/logs/2008.04.08-14:37:23.bosTau4.initalign.log
    # >                  real    45m6.595s
    # Batch failed after 4 tries on /cluster/genbank/genbank/bin/gbBlat genbank.164.0/cavPor3/full/psl/est.eb.native.1/scaffold_41/scaffold_41.job genbank.164.0/cavPor3/full/psl/est.eb.native.1/scaffold_41/scaffold_41.psl
    # command failed: ssh -x kk cd /cluster/data/genbank/build/work/initial.cavPor3/align\; para make -maxPush=200000 align.jobs </dev/null at /cluster/genbank/genbank/bin/../lib/gbCommon.pm line 238. at /cluster/genbank/genbank/bin/../lib/gbCommon.pm line 238.
    # command failed: gbAlignRun -workdir=/cluster/data/genbank/build/work/initial.cavPor3/align  at /cluster/genbank/genbank/bin/../lib/gbCommon.pm line 238. at /cluster/genbank/genbank/bin/../lib/gbCommon.pm line 238.
    #[1]+  Exit 2                  time nice -n +19 bin/gbAlignStep -initial cavPor3
    ## Determined the machine kkr4u58 was not answering ssh requests and all failed jobs were
    ## on that machine:
    parasol machine remove kkr4u58.kilokluster.ucsc.edu
    para push -retries=6
    ### After manual restart of the para jobsList:
    #Checking finished jobs
    #Completed: 21128 of 21128 jobs
    #CPU time in finished jobs:   15970173s  266169.54m  4436.16h  184.84d  0.506 y
    #IO & Wait Time:                685589s   11426.49m   190.44h    7.94d  0.022 y
    #Average job time:                 788s      13.14m     0.22h    0.01d
    #Longest running job:                0s       0.00m     0.00h    0.00d
    #Longest finished job:            1581s      26.35m     0.44h    0.02d
    #Submission to last job:        135502s    2258.37m    37.64h    1.57d
    #Estimated complete:                 0s       0.00m     0.00h    0.00d
    ####### Once parasol jobs are finished (handholding) run:
    time nice -n +19 bin/gbAlignStep -initial -continue=finish cavPor3 &
    # logFile: var/build/logs/2008.04.10-09:11:21.cavPor3.initalign.log
    # real    97m14.229s
    # gbAlignInstall: complete: real=2.02
    # genbank 2008.04.10-10:48:36 cavPor3.initalign: finish

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad cavPor3 &
    #	logFile: var/dbload/hgwdev/logs/2008.04.10-11:25:04.dbload.log
    #	real    16m25.416s

    # enable daily alignment and update of hgwdev (DONE - 2008-03-11 - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    cvsup
    # add cavPor3 to:
        etc/align.dbs
        etc/hgwdev.dbs
    cvs ci -m "Added cavPor3." etc/align.dbs etc/hgwdev.dbs
    make etc-update

#########################################################################
#  REstarting Genbank (beg: 2008-05-22   Done: 2008-05-23 - Tim)

    ### QA found empty refLink table which led MarkD to determine that in genbank.conf cavPor3 used default params 
    ### which were not appropriate.  The last 4 lines listed below were added, and genbank started again
    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    # edit etc/genbank.conf to add the following entry:

# cavPor3 (C. porcellus) 3144 scaffolds 967 are multi-contig and 2076 are single contig
cavPor3.serverGenome = /cluster/data/cavPor3/cavPor3.2bit
cavPor3.clusterGenome = /iscratch/i/cavPor3/cavPor3.2bit
cavPor3.ooc = /cluster/data/cavPor3/cavPor3.11.ooc
cavPor3.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
cavPor3.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
cavPor3.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
cavPor3.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
cavPor3.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
cavPor3.lift = no
cavPor3.downloadDir = cavPor3
cavPor3.perChromTables = no
cavPor3.refseq.mrna.native.load = no
cavPor3.refseq.mrna.xeno.load = yes
cavPor3.genbank.mrna.xeno.load = yes
cavPor3.genbank.est.native.load = yes
    ### The last 4 lines listed above were added, and genbank started again
        
    # update /cluster/data/genbank/:
    make etc-update
    cvs ci -m "Turned off native and on xeno." etc/genbank.conf

    # Edit src/lib/gbGenome.c to add new species.
    # static char *cavPorNames[] = {"Cavia porcellus", NULL};
    # static struct dbToSpecies dbToSpeciesMap[] = { ...>>> {"cavPor", cavPorNames}, 

    #cvs ci -m "Added guinea pig." src/lib/gbGenome.c
    make install-server
    
    ### Mark Deikhens recommends:
    # You are not really realigning, but aligning additional.  If you
    # just kick off an initial alignment, it *should* align the
    # missing partitions of the data.  However, sometimes weird things
    # happen when a new, full release is made, and a new refseq just
    # came out.  So kick on the alignment and let me know when its
    # done and I can take a look.
    # 
    # Also, to just reload everything, include the -drop flag on your
    # command line.  That is usually easiest; way waste your time
    # figuring out weird cases when the computer can just do the work.

    ssh genbank
    screen  # control this business with a screen since it takes a while
    cd /cluster/data/genbank

    # This is a call to a script that will push our jobs out to the cluster
    # since it's a big job.  
    time nice -n +19 bin/gbAlignStep -initial cavPor3 &
    # logFile: var/build/logs/2008.05.22-15:44:34.cavPor3.initalign.log
    # real    182m21.821s
    # gbAlignInstall: complete: real=2.35
    # genbank 2008.05.22-18:46:56 cavPor3.initalign: finish
    
    # Mark Deikhens: Look good, load it.

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad cavPor3 &
    #   logFile: var/dbload/hgwdev/logs/2008.05.23-11:39:52.dbload.log
    #   real    17m53.123s

############################################################################
#	DONE - 2008-04-10 - Tim
#	Reset default position to Math1=ATOH1=NM_005172=scaffold_15:19047435-19048046 
    hgsql -e \
'update dbDb set defaultPos="scaffold_15:19047434-19048046" where name="cavPor3";' \
	hgcentraltest
    #	And there was a mistake in the description date entry
    hgsql -e \
'update dbDb set description="Feb. 2008" where name="cavPor3";' \
	hgcentraltest

#########################################################################
## genscan run (DONE - 2008-04-10 - Tim)
##	create hard masked sequence
    ssh kkstore05
    cd /cluster/data/cavPor3
    mkdir hardMasked
    for C in `cut -f1 chrom.sizes`
do
    echo "hardMasked/${C}.hard.fa"
    twoBitToFa -seq=${C} cavPor3.2bit stdout \
	| maskOutFa stdin hard hardMasked/${C}.hard.fa
    ls -ld "hardMasked/${C}.hard.fa"
done

    #	And, make sure there aren't any sequences in this lot that have
    #	become all N's with no sequence left in them.  This drives genscan nuts
    echo hardMasked/*.hard.fa | xargs faCount > faCount.hard.txt

    #	the lowest three are:
    egrep -v "^#|^total" faCount.hard.txt \
	| awk '{print $1,$2-$7}' | sort -k2,2nr | tail -3
    # scaffold_2949 425
    # scaffold_3101 357
    # scaffold_3130 0
    rm harMasked/scaffold_3130.hard.fa
    ##### Saving this junk from bosTau4.txt because it will be useful next time
    ##  #	There are a whole bunch of these, and many with just a few bases.
    ##  #	Actually, before removing these for genscan, run the cpgIsland
    ##  #	business first since it can work on them all.
    ##  #	So, remove any with less than 100 bases of sequence
    ##  egrep -v "^#|^total" faCount.hard.txt | awk '{size=$2-$7; if (size < 100){printf "hardMasked/%s.hard.fa\n", $1}}' | xargs rm

    #	now get them over to a kluster location
    mkdir /san/sanvol1/scratch/cavPor3/hardChunks
    cd /san/sanvol1/scratch/cavPor3/hardChunks
    #	creating 4,000,000 sized chunks, the chroms stay together as
    #	single pieces.  The contigs get grouped together into 4,000,000
    #	sized fasta files.  You don't want to break these things up
    #	because genscan will be doing its own internal 2.4 million
    #	window on these pieces, and the gene names are going to be
    #	constructed from the sequence name in these fasta files.
    echo /cluster/data/cavPor3/hardMasked/*.hard.fa | xargs cat \
	| faSplit about stdin ls c_

    ssh hgwdev
    mkdir /cluster/data/cavPor3/bed/genscan
    cd /cluster/data/cavPor3/bed/genscan
    # Check out hg3rdParty/genscanlinux to get latest genscan:
    cvs co hg3rdParty/genscanlinux

    # Run on small cluster (more mem than big cluster).
    ssh memk
    cd /cluster/data/cavPor3/bed/genscan
    # Make 3 subdirectories for genscan to put their output files in
    mkdir gtf pep subopt
    # Generate a list file, genome.list, of all the hard-masked contigs that 
    # *do not* consist of all-N's (which would cause genscan to blow up)
    #	Since we split on gaps, we have no chunks like that.  You can
    #	verify with faCount on the chunks.
    ls -1Sr /san/sanvol1/scratch/cavPor3/hardChunks/c_*.fa > genome.list

    # Create run-time script to operate gsBig in a cluster safe manner
    cat << \_EOF_ > runGsBig
#!/bin/csh -fe
set runDir = `pwd`
set srcDir = $1
set inFile = $2
set fileRoot = $inFile:r
mkdir /scratch/tmp/$fileRoot
cp -p $srcDir/$inFile /scratch/tmp/$fileRoot
pushd /scratch/tmp/$fileRoot
/cluster/bin/x86_64/gsBig $inFile $fileRoot.gtf -trans=$fileRoot.pep -subopt=$fileRoot.bed -exe=$runDir/hg3rdParty/genscanlinux/genscan -par=$runDir/hg3rdParty/genscanlinux/HumanIso.smat -tmp=/scratch/tmp -window=2400000
popd
cp -p /scratch/tmp/$fileRoot/$fileRoot.gtf gtf
cp -p /scratch/tmp/$fileRoot/$fileRoot.pep pep
cp -p /scratch/tmp/$fileRoot/$fileRoot.bed subopt
rm -fr /scratch/tmp/$fileRoot
_EOF_
    # << happy emacs
    chmod +x runGsBig
    cat << \_EOF_ > template
#LOOP
runGsBig /san/sanvol1/scratch/cavPor3/hardChunks $(file1) {check out line gtf/$(root1).gtf} {check out line pep/$(root1).pep} {check out line subopt/$(root1).bed} 
#ENDLOOP
_EOF_
    # << happy emacs

    gensub2 genome.list single template jobList
    para create jobList
    para try, check, push, check, ...

    ##[tdreszer@memk /cluster/data/cavPor3/bed/genscan] para check
    ##172 jobs in batch
    ##13 jobs (including everybody's) in Parasol queue.
    ##Checking finished jobs
    ##crashed: 7
    ##ranOk: 165
    ##total jobs in batch: 172
    ##[tdreszer@memk /cluster/data/cavPor3/bed/genscan] para status | grep -v done
    ##172 jobs in batch
    ##13 jobs (including everybody's) in Parasol queue.
    ##Checking finished jobs
    ###state  tries   real    cpu     host    jobid   cmd
    ##crash   1       60.00   55.30   mkr0u0  825047  runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_19.fa gtf/c_19.gtf pep/c_19.pep subopt/c_19.bed
    ##crash   1       215.00  213.48  mkr0u6  825117  runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_152.fa gtf/c_152.gtf pep/c_152.pep subopt/c_152.bed
    ##crash   1       97.00   96.01   mkr0u0  825153  runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_111.fa gtf/c_111.gtf pep/c_111.pep subopt/c_111.bed
    ##crash   1       149.00  146.78  mkr0u0  825158  runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_107.fa gtf/c_107.gtf pep/c_107.pep subopt/c_107.bed
    ##crash   1       638.00  636.81  mkr0u3  825178  runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_71.fa gtf/c_71.gtf pep/c_71.pep subopt/c_71.bed
    ##crash   1       904.00  901.75  mkr0u6  825185  runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_32.fa gtf/c_32.gtf pep/c_32.pep subopt/c_32.bed
    ##crash   1       1238.00 1232.05 mkr0u6  825196  runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_01.fa gtf/c_01.gtf pep/c_01.pep subopt/c_01.bed
    ##[tdreszer@memk /cluster/data/cavPor3/bed/genscan] para problems | grep Insuff
    ##Insufficient memory error: results may be unreliable.
    ##Insufficient memory error: results may be unreliable.
    ##Insufficient memory error: results may be unreliable.
    ##Insufficient memory error: results may be unreliable.
    ##Insufficient memory error: results may be unreliable.
    ##Insufficient memory error: results may be unreliable.
    ##Insufficient memory error: results may be unreliable.
    ####### However, these are not the simply the largest files:                      
    [tdreszer@memk /cluster/data/cavPor3/bed/genscan] ll /san/sanvol1/scratch/cavPor3/hardChunks/c_19.fa
    -rw-rw-r--  1 tdreszer protein 4408036 Apr  8 15:39 /san/sanvol1/scratch/cavPor3/hardChunks/c_19.fa
    [tdreszer@memk /cluster/data/cavPor3/bed/genscan] ll /san/sanvol1/scratch/cavPor3/hardChunks/c_152.fa
    -rw-rw-r--  1 tdreszer protein 8304826 Apr  8 15:40 /san/sanvol1/scratch/cavPor3/hardChunks/c_152.fa
    [tdreszer@memk /cluster/data/cavPor3/bed/genscan] ll /san/sanvol1/scratch/cavPor3/hardChunks/c_111.fa
    -rw-rw-r--  1 tdreszer protein 18155094 Apr  8 15:40 /san/sanvol1/scratch/cavPor3/hardChunks/c_111.fa
    [tdreszer@memk /cluster/data/cavPor3/bed/genscan] ll /san/sanvol1/scratch/cavPor3/hardChunks/c_107.fa
    -rw-rw-r--  1 tdreszer protein 20080719 Apr  8 15:40 /san/sanvol1/scratch/cavPor3/hardChunks/c_107.fa
    [tdreszer@memk /cluster/data/cavPor3/bed/genscan] ll /san/sanvol1/scratch/cavPor3/hardChunks/c_71.fa
    -rw-rw-r--  1 tdreszer protein 36860211 Apr  8 15:40 /san/sanvol1/scratch/cavPor3/hardChunks/c_71.fa
    [tdreszer@memk /cluster/data/cavPor3/bed/genscan] ll /san/sanvol1/scratch/cavPor3/hardChunks/c_32.fa
    -rw-rw-r--  1 tdreszer protein 49549701 Apr  8 15:40 /san/sanvol1/scratch/cavPor3/hardChunks/c_32.fa
    [tdreszer@memk /cluster/data/cavPor3/bed/genscan] ll /san/sanvol1/scratch/cavPor3/hardChunks/c_01.fa
    -rw-rw-r--  1 tdreszer protein 82754438 Apr  8 15:39 /san/sanvol1/scratch/cavPor3/hardChunks/c_01.fa
    # Okay try spliting the failed c_*.fa files into individual sequences rt=retry
    ssh memk
    pushd /san/sanvol1/scratch/cavPor3/hardChunks
    faSplit sequence c_19.fa 1000 c_rt19_
    ## Wrong one!   faSplit sequence c_151.fa 1000 c_rt151_
    faSplit sequence c_111.fa 1000 c_rt111_
    faSplit sequence c_107.fa 1000 c_rt107_
    faSplit sequence c_71.fa 1000 c_rt71_
    faSplit sequence c_32.fa 1000 c_rt32_
    faSplit sequence c_01.fa 1000 c_rt01_
    popd # cd /cluster/data/cavPor3/bed/genscan
    ls -1Sr /san/sanvol1/scratch/cavPor3/hardChunks/c_rt*.fa > genome_rt.list

    gensub2 genome_rt.list single template jobList_rt   
    para create jobList_rt
    para try, check, push, check, ...

    # Checking finished jobs
    # crashed: 6
    # ranOk: 71
    # total jobs in batch: 77
    ## missed one:
    pushd /san/sanvol1/scratch/cavPor3/hardChunks
    faSplit sequence c_152.fa 1000 c_rt152_
    popd # cd /cluster/data/cavPor3/bed/genscan
    ls -1Sr /san/sanvol1/scratch/cavPor3/hardChunks/c_rt152*.fa > genome_rt2.list

    gensub2 genome_rt2.list single template jobList_rt2
    para create jobList_rt2
    para try, check, push, check, ...
    # Checking finished jobs
    # crashed: 1
    # ranOk: 10
    # total jobs in batch: 11

    ######## After breaking failed chunks by sequence, the 7 culprit scaffolds are known:
    # faCount /san/sanvol1/scratch/cavPor3/hardChunks/c_rt19_0010.fa
    # faCount /san/sanvol1/scratch/cavPor3/hardChunks/c_rt151_0010.fa
    # faCount /san/sanvol1/scratch/cavPor3/hardChunks/c_rt111_0010.fa
    # faCount /san/sanvol1/scratch/cavPor3/hardChunks/c_rt107_0010.fa
    # faCount /san/sanvol1/scratch/cavPor3/hardChunks/c_rt71_0025.fa
    # faCount /san/sanvol1/scratch/cavPor3/hardChunks/c_rt32_0005.fa
    # faCount /san/sanvol1/scratch/cavPor3/hardChunks/c_rt01_0000.fa
    #seq                len       A        C        G        T        N      cpg
    #scaffold_115     4065651   736071   696850   700707   740890  1191133  60230
    #scaffold_81      7649460  1502224  1297711  1285486  1482474  2081565  87359
    #scaffold_45     15715546  2926604  2362405  2351615  2933678  5141244 126807
    #scaffold_41     16993173  3764581  2421095  2432646  3805601  4569250 103547
    #scaffold_21     33939536  7356221  4629107  4649855  7388452  9915901 190985
    #scaffold_13     48363610 10226185  7754898  7730208 10253556 12398763 432844
    #scaffold_1      81131790 17393288 12255795 12237937 17366862 21877908 561145

    ### Punting.  Hiram suggests that we just wait till the alignments fill us in.

    # At this point, all scaffolds have been successfully genscanned EXCEPT the 7 listed above

    # cat and lift the results into single files
    ssh kkstore05
    cd /cluster/data/cavPor3/bed/genscan
    sort -k1,1 -k4.4n gtf/c_*.gtf > genscan.gtf
    sort -k1,1 -k2,2n subopt/c_*.bed > genscanSubopt.bed 
    cat pep/c_*.pep > genscan.pep

    # Load into the database as so:
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/genscan
    ldHgGene cavPor3 -gtf genscan genscan.gtf
    # Read 64818 transcripts in 416957 lines in 1 files
    # 64818 groups 2179 seqs 1 sources 1 feature types
    # 64818 gene predictions

    hgPepPred cavPor3 generic genscanPep genscan.pep
    hgLoadBed cavPor3 genscanSubopt genscanSubopt.bed
    #	Loaded 537376 elements of size 6

    #	let's check the numbers
    time nice -n +19 featureBits cavPor3 genscan
    #   76489707 bases of 2663369733 (2.872%) in intersection

#########################################################################
## genscan RErun (BEG: 2008-05-22  DONE: 2008-05-22 - Tim)

### ### QA found missing genscan data, so retrying the unlucky 7 by splitting on gaps
    ssh memk
    pushd /san/sanvol1/scratch/cavPor3/hardChunks
    faSplit gap c_rt01_0000.fa 1000000 c_rt_01_0000_gap_ 
    faSplit gap c_rt32_0005.fa 1000000 c_rt_32_0005_gap_ 
    faSplit gap c_rt71_0025.fa 1000000 c_rt_71_0025_gap_ 
    faSplit gap c_rt107_0010.fa 1000000 c_rt_107_0010_gap_ 
    faSplit gap c_rt111_0010.fa 1000000 c_rt_111_0010_gap_ 
    faSplit gap c_rt151_0010.fa 1000000 c_rt_151_0010_gap_ 
    faSplit gap c_rt19_0010.fa 1000000 c_rt_19_0010_gap_ 
    popd # cd /cluster/data/cavPor3/bed/genscan
    ls -1Sr /san/sanvol1/scratch/cavPor3/hardChunks/c_rt*_gap_*.fa > genome_gap.list

    gensub2 genome_gap.list single template jobList_gap   
    para create jobList_gap
    para try, check, push, check, ...
    # 219 jobs in batch
    # 52852 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # .......
    # ranOk: 219
    # total jobs in batch: 219

    # cat and lift the results into single files
    ssh kkstore05
    cd /cluster/data/cavPor3/bed/genscan
    sort -k1,1 -k4.4n gtf/c_*.gtf > genscan_full.gtf
    sort -k1,1 -k2,2n subopt/c_*.bed > genscanSubopt_full.bed 
    cat pep/c_*.pep > genscan_full.pep

    # Load into the database as so:
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/genscan
    ldHgGene cavPor3 -gtf genscan genscan_full.gtf
    # Read 70478 transcripts in 454271 lines in 1 files
    # 70478 groups 2398 seqs 1 sources 1 feature types
    # 70478 gene predictions
    
    hgPepPred cavPor3 generic genscanPep genscan_full.pep
    hgLoadBed cavPor3 genscanSubopt genscanSubopt_full.bed
    #   Loaded 605836 elements of size 6

    #   let's check the numbers
    time nice -n +19 featureBits cavPor3 genscan
    #   76489707 bases of 2663369733 (2.872%) in intersection
    
### ### ### Total defeat.  faSplit on gaps results in new sequence names that donot match to the known scaffold_nnnn sequences.
    
    # Clean up mess:
    cd /cluster/data/cavPor3/bed/genscan
    pushd /san/sanvol1/scratch/cavPor3/hardChunks
    rm c_rt_*_gap_*.fa
    popd # cd /cluster/data/cavPor3/bed/genscan
    rm gtf/c_rt_*_gap_*.gtf
    rm pep/c_rt_*_gap_*.pep
    rm subopt/c_rt_*_gap_*.bed
    
    # Try the seven culprits one more time.
cat << \_EOF_ > genome_oneMoreTime.list
/san/sanvol1/scratch/cavPor3/hardChunks/c_rt19_0010.fa
/san/sanvol1/scratch/cavPor3/hardChunks/c_rt151_0010.fa
/san/sanvol1/scratch/cavPor3/hardChunks/c_rt111_0010.fa
/san/sanvol1/scratch/cavPor3/hardChunks/c_rt107_0010.fa
/san/sanvol1/scratch/cavPor3/hardChunks/c_rt71_0025.fa
/san/sanvol1/scratch/cavPor3/hardChunks/c_rt32_0005.fa
/san/sanvol1/scratch/cavPor3/hardChunks/c_rt01_0000.fa
_EOF_

    ssh memk
    cd /cluster/data/cavPor3/bed/genscan
    gensub2 genome_oneMoreTime.list single template jobList_oneMoreTime   
    para create jobList_oneMoreTime
    para push, check, ...
    #  para status
    # 7 jobs in batch
    # 0 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # #state  tries   real    cpu     host    jobid   cmd
    # crash   1       58.00   54.08   mkr0u5  1182093 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt19_0010.fa gtf/c_rt19_0010.gtf pep/c_rt19_0010.pep subopt/c_rt19_0010.bed
    # done    1       228.00  224.16  mkr0u6  1182094 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt151_0010.fa gtf/c_rt151_0010.gtf pep/c_rt151_0010.pep subopt/c_rt151_0010.bed
    # crash   1       57.00   52.47   mkr0u3  1182095 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt111_0010.fa gtf/c_rt111_0010.gtf pep/c_rt111_0010.pep subopt/c_rt111_0010.bed
    # crash   1       106.00  99.41   mkr0u7  1182096 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt107_0010.fa gtf/c_rt107_0010.gtf pep/c_rt107_0010.pep subopt/c_rt107_0010.bed
    # crash   1       617.00  602.73  mkr0u5  1182097 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt71_0025.fa gtf/c_rt71_0025.gtf pep/c_rt71_0025.pep subopt/c_rt71_0025.bed
    # crash   1       922.00  896.75  mkr0u3  1182098 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt32_0005.fa gtf/c_rt32_0005.gtf pep/c_rt32_0005.pep subopt/c_rt32_0005.bed
    # crash   1       1307.00 1258.05 mkr0u7  1182099 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt01_0000.fa gtf/c_rt01_0000.gtf pep/c_rt01_0000.pep subopt/c_rt01_0000.bed
    
    # One down, 6 to go.  Try lowering window size on gsBig

    mv runGsBig runGsBig.24
    cat << \_EOF_ > runGsBig
#!/bin/csh -fe
set runDir = `pwd`
set srcDir = $1
set inFile = $2
set fileRoot = $inFile:r
mkdir /scratch/tmp/$fileRoot
cp -p $srcDir/$inFile /scratch/tmp/$fileRoot
pushd /scratch/tmp/$fileRoot
/cluster/bin/x86_64/gsBig $inFile $fileRoot.gtf -trans=$fileRoot.pep -subopt=$fileRoot.bed -exe=$runDir/hg3rdParty/genscanlinux/genscan -par=$runDir/hg3rdParty/genscanlinux/HumanIso.smat -tmp=/scratch/tmp -window=1200000
popd
cp -p /scratch/tmp/$fileRoot/$fileRoot.gtf gtf
cp -p /scratch/tmp/$fileRoot/$fileRoot.pep pep
cp -p /scratch/tmp/$fileRoot/$fileRoot.bed subopt
rm -fr /scratch/tmp/$fileRoot
_EOF_
    # << happy emacs
    chmod +x runGsBig

    para push, check, ...
    #  para status
    # 7 jobs in batch
    # 0 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # #state  tries   real    cpu     host    jobid   cmd
    # done    2       58.00   54.08   mkr0u5  1182093 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt19_0010.fa gtf/c_rt19_0010.gtf pep/c_rt19_0010.pep subopt/c_rt19_0010.bed
    # done    1       228.00  224.16  mkr0u6  1182094 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt151_0010.fa gtf/c_rt151_0010.gtf pep/c_rt151_0010.pep subopt/c_rt151_0010.bed
    # crash   2       57.00   52.47   mkr0u3  1182095 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt111_0010.fa gtf/c_rt111_0010.gtf pep/c_rt111_0010.pep subopt/c_rt111_0010.bed
    # done    2       106.00  99.41   mkr0u7  1182096 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt107_0010.fa gtf/c_rt107_0010.gtf pep/c_rt107_0010.pep subopt/c_rt107_0010.bed
    # done    2       617.00  602.73  mkr0u5  1182097 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt71_0025.fa gtf/c_rt71_0025.gtf pep/c_rt71_0025.pep subopt/c_rt71_0025.bed
    # crash   2       922.00  896.75  mkr0u3  1182098 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt32_0005.fa gtf/c_rt32_0005.gtf pep/c_rt32_0005.pep subopt/c_rt32_0005.bed
    # done    2       1307.00 1258.05 mkr0u7  1182099 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt01_0000.fa gtf/c_rt01_0000.gtf pep/c_rt01_0000.pep subopt/c_rt01_0000.bed
    
    # All but two!
    
    para push, check, ...
    #  para status
    # 7 jobs in batch
    # 0 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # #state  tries   real    cpu     host    jobid   cmd
    # done    2       58.00   54.08   mkr0u5  1182093 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt19_0010.fa gtf/c_rt19_0010.gtf pep/c_rt19_0010.pep subopt/c_rt19_0010.bed
    # done    1       228.00  224.16  mkr0u6  1182094 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt151_0010.fa gtf/c_rt151_0010.gtf pep/c_rt151_0010.pep subopt/c_rt151_0010.bed
    # done    3       57.00   52.47   mkr0u3  1182095 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt111_0010.fa gtf/c_rt111_0010.gtf pep/c_rt111_0010.pep subopt/c_rt111_0010.bed
    # done    2       106.00  99.41   mkr0u7  1182096 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt107_0010.fa gtf/c_rt107_0010.gtf pep/c_rt107_0010.pep subopt/c_rt107_0010.bed
    # done    2       617.00  602.73  mkr0u5  1182097 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt71_0025.fa gtf/c_rt71_0025.gtf pep/c_rt71_0025.pep subopt/c_rt71_0025.bed
    # done    3       922.00  896.75  mkr0u3  1182098 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt32_0005.fa gtf/c_rt32_0005.gtf pep/c_rt32_0005.pep subopt/c_rt32_0005.bed
    # done    2       1307.00 1258.05 mkr0u7  1182099 runGsBig /san/sanvol1/scratch/cavPor3/hardChunks c_rt01_0000.fa gtf/c_rt01_0000.gtf pep/c_rt01_0000.pep subopt/c_rt01_0000.bed
    ### All completed!

    # cat and lift the results into single files
    ssh kkstore05
    cd /cluster/data/cavPor3/bed/genscan
    sort -k1,1 -k4.4n gtf/c_*.gtf > genscan_full.gtf
    sort -k1,1 -k2,2n subopt/c_*.bed > genscanSubopt_full.bed 
    cat pep/c_*.pep > genscan_full.pep

    # Load into the database as so:
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/genscan
    ldHgGene cavPor3 -gtf genscan genscan_full.gtf
    # Read 70161 transcripts in 453022 lines in 1 files
    # 70161 groups 2185 seqs 1 sources 1 feature types
    # 70161 gene predictions
    
    hgPepPred cavPor3 generic genscanPep genscan_full.pep
    hgLoadBed cavPor3 genscanSubopt genscanSubopt_full.bed
    #   Loaded 603786 elements of size 6

    #   let's check the numbers
    time nice -n +19 featureBits cavPor3 genscan
    #   82960328 bases of 2663369733 (3.115%) in intersection
    

#########################################################################
# CPGISLANDS (DONE - 2008-04-10 - Tim)
    ssh hgwdev
    mkdir /cluster/data/cavPor3/bed/cpgIsland
    cd /cluster/data/cavPor3/bed/cpgIsland

    # Build software from Asif Chinwalla (achinwal@watson.wustl.edu)
    cvs co hg3rdParty/cpgIslands
    cd hg3rdParty/cpgIslands
    make
    #	There was a problem in here, in both cpg.c and cpg_lh.c:
    #   cpg_lh.c:74: warning: conflicting types for built-in function 'malloc'
    #	warning: conflicting types for built-in function 'malloc'
    #	commentted out line 74 ONLY to get this to build
    #	gcc readseq.c cpg_lh.c -o cpglh.exe
    cd ../..
    ln -s hg3rdParty/cpgIslands/cpglh.exe .
    
    # There may be warnings about "bad character" for IUPAC ambiguous 
    # characters like R, S, etc.  Ignore the warnings.  
    mkdir results
    echo ../../hardMasked/*.hard.fa | sed -e "s/ /\n/g" | while read F
    do
	FA=${F/*\/}
	C=${FA/.hard.fa/}
	echo "./cpglh.exe ${FA} > results/${C}.cpg"
	nice -n +19 ./cpglh.exe ${F} > results/${C}.cpg
    done > cpglh.out 2>&1 &
    #	about 5 minutes

    # Transform cpglh output to bed +
    cat << \_EOF_ > filter.awk
{
$2 = $2 - 1;
width = $3 - $2;
printf("%s\t%d\t%s\t%s %s\t%s\t%s\t%0.0f\t%0.1f\t%s\t%s\n",
       $1, $2, $3, $5,$6, width,
       $6, width*$7*0.01, 100.0*2*$6/width, $7, $9);
}
_EOF_
    #	<< happy emacs
    catDir results | awk -f filter.awk | sort -k1,1 -k2,2n > cpgIsland.bed

    ssh hgwdev
    cd /cluster/data/cavPor3/bed/cpgIsland
    hgLoadBed cavPor3 cpgIslandExt -tab \
      -sqlTable=$HOME/kent/src/hg/lib/cpgIslandExt.sql cpgIsland.bed
    #	Loaded 36373 elements of size 10
    featureBits cavPor3 cpgIslandExt
    #  21746514 bases of 2663369733 (0.817%) in intersection

#########################################################################
# READ before BLASTZ: http://genomewiki.ucsc.edu/index.php/Mm9_multiple_alignment
#########################################################################
# BLASTZ/CHAIN/NET Hg18-cavPor3 (START - 2008-04-10 - Tim; DONE 2008-04-15)
    ssh kkstore02 # store11->kkstore02-10
    screen # use a screen to manage this multi-day job
    mkdir /cluster/data/hg18/bed/blastzCavPor3.2008-04-10
    cd /cluster/data/hg18/bed/
    ln -s blastzCavPor3.2008-04-10 blastz.cavPor3
    cd blastzCavPor3.2008-04-10

    cat << \_EOF_ > DEF
BLASTZ_M=50

# TARGET: Human Hg18
SEQ1_DIR=/cluster/bluearc/scratch/data/hg18/nib
SEQ1_LEN=/cluster/data/hg18/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: GuineaPig cavPor3
SEQ2_DIR=/cluster/bluearc/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/cluster/data/cavPor3/chrom.sizes
# Maximum number of scaffolds that can be lumped together
SEQ2_LIMIT=300
SEQ2_CHUNK=20000000
SEQ2_LAP=0

BASE=/cluster/data/hg18/bed/blastzCavPor3.2008-04-10
TMPDIR=/scratch/tmp
_EOF_
    # << this line keeps emacs coloring happy

    # NOTE: be sure to ls the data in above script on workhorse machine before starting script
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	    -syntenicNet > do.log 2>&1
    # <ctl-A><ctl-d>  ps -ef | grep blastzCavPor3
    # real    1320m35.523s
    # HOWEVER, doBlastzChainNet failed because the run.blastz/para.results were corrupt.
    cd /cluster/data/hg18/bed/blastzCavPor3.2008-04-10/run.blastz
    para recover jobList jobListRecovered
    # wc -l jobListRecovered -> 9
    para create jobListRecovered
    para push
    # completed all jobs.  Now:
    ssh kkstore02
    screen -r -d
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	   -continue=cat -syntenicNet > do.log 2>&1 &
    #real    210m38.768s # low because of the restart that had to occur
    tail do.log
    # *** All done!
    # *** Make sure that goldenPath/hg18/vsCavPor3/README.txt is accurate.
    # *** Add {chain,net}CavPor3 tracks to trackDb.ra if necessary.

    cat fb.hg18.chainCavPor3Link.txt
    #   1267036494 bases of 2881515245 (43.971%) in intersection

    ######### Change locations in DEF due to Hiram's new methods    
    cd /cluster/data/hg18/bed/blastzCavPor3.2008-04-10
    cat << \_EOF_ > DEF
BLASTZ_M=50

# TARGET: Human Hg18
SEQ1_DIR=/scratch/data/hg18/nib
SEQ1_LEN=/scratch/data/hg18/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: GuineaPig cavPor3
SEQ2_DIR=/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/scratch/data/cavPor3/chrom.sizes
# Maximum number of scaffolds that can be lumped together
SEQ2_LIMIT=300
SEQ2_CHUNK=20000000
SEQ2_LAP=0

BASE=/cluster/data/hg18/bed/blastzCavPor3.2008-04-10
TMPDIR=/scratch/tmp
_EOF_
   

    mkdir /cluster/data/cavPor3/bed/blastz.hg18.swap
    ssh kkstore05 # where cavPor3 is located

    screen
    cd /cluster/data/cavPor3/bed/blastz.hg18.swap
    time nice -n +19 doBlastzChainNet.pl \
	/cluster/data/hg18/bed/blastzCavPor3.2008-04-10/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	    -swap -syntenicNet > do.log 2>&1
    # real    216m13.808s
    # chainSplit chain cavPor3.hg18.all.chain.gz
    # sh: pipe error: Too many open files
    # Can't open chain/scaffold_621.chain to append: Too many open files
    
    #	broken down during netSynteny.csh due to too many open files on
    #	a chainSplit
    #	However, there is no need to split when we have scaffolds.
    #   Kate fixes doBlastzChainNet.pl and retry:

    cd /cluster/data/cavPor3/bed/blastz.hg18.swap
    time nice -n +19 ~kate/kent/src/hg/utils/automation/doBlastzChainNet.pl \
	/cluster/data/hg18/bed/blastzCavPor3.2008-04-10/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	    -swap -continue=syntenicNet -syntenicNet > do.log 2>&1 &
    # real     29m49.617s
    # *** All done!
    # *** Make sure that (/usr/local/apache/htdocs/goldenPath/)cavPor3/vsHg18/README.txt is accurate.
    # *** Add {chain,net}Hg18 tracks to trackDb.ra if necessary.

    cat fb.cavPor3.chainHg18Link.txt
    # 1281925834 bases of 2663369733 (48.132%) in intersection

#########################################################################
# BLASTZ/CHAIN/NET mm9-cavPor3 (START - 2008-04-10 - DONE 2008-04-14 - Tim)
    ssh kkstore06 # store4->kkstore04-10
    screen # use a screen to manage this multi-day job
    mkdir /cluster/data/mm9/bed/blastzCavPor3.2008-04-10
    cd /cluster/data/mm9/bed/
    ln -s blastzCavPor3.2008-04-10 blastz.cavPor3
    cd blastzCavPor3.2008-04-10

    cat << \_EOF_ > DEF
BLASTZ_M=50

# TARGET: Mouse Mm9
SEQ1_DIR=/cluster/bluearc/scratch/data/mm9/nib
SEQ1_LEN=/cluster/data/mm9/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: GuineaPig cavPor3
SEQ2_DIR=/cluster/bluearc/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/cluster/data/cavPor3/chrom.sizes
# Maximum number of scaffolds that can be lumped together
SEQ2_LIMIT=300
SEQ2_CHUNK=20000000
SEQ2_LAP=0

BASE=/cluster/data/mm9/bed/blastzCavPor3.2008-04-10
TMPDIR=/scratch/tmp
_EOF_
    # << this line keeps emacs coloring happy

    # NOTE: be sure to ls the data in above script on workhorse machine before starting script
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	    -syntenicNet > do.log 2>&1 
    # <ctl-A><ctl-d>  ps -ef | grep blastzCavPor3
    # real    1764m55.155s
    tail do.log
    #  *** All done!
    # *** Make sure that goldenPath/mm9/vsCavPor3/README.txt is accurate.
    # *** Add {chain,net}CavPor3 tracks to trackDb.ra if necessary.
    # blastz: ranOk: 52984
    cat fb.mm9.chainCavPor3Link.txt
    #    757283793 bases of 2620346127 (28.900%) in intersection

    cd /cluster/data/mm9/bed/blastzCavPor3.2008-04-10
    ######### Change locations in DEF due to Hiram's new methods    
    cat << \_EOF_ > DEF
BLASTZ_M=50

# TARGET: Mouse Mm9
SEQ1_DIR=/scratch/data/mm9/nib
SEQ1_LEN=/scratch/data/mm9/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: GuineaPig cavPor3
SEQ2_DIR=/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/scratch/data/cavPor3/chrom.sizes
# Maximum number of scaffolds that can be lumped together
SEQ2_LIMIT=300
SEQ2_CHUNK=20000000
SEQ2_LAP=0

BASE=/cluster/data/mm9/bed/blastzCavPor3.2008-04-10
TMPDIR=/scratch/tmp
_EOF_

    mkdir /cluster/data/cavPor3/bed/blastz.mm9.swap
    cd /cluster/data/cavPor3/bed/blastz.mm9.swap
    time nice -n +19 doBlastzChainNet.pl \
	/cluster/data/mm9/bed/blastzCavPor3.2008-04-10/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	    -swap -syntenicNet > do.log 2>&1 &
    # real   166m53.671s
    # Exit 25   time nice -n +19 doBlastzChainNet.pl /cluster/data/mm9/bed/blastzCavPor3.2008-04-10/DEF -bigC
    # Can't open chain/scaffold_795.chain to append: Too many open files
    # gzip: stdout: Broken pipe
    # Command failed:
    # ssh -x kolossus nice /cluster/data/cavPor3/bed/blastz.mm9.swap/axtChain/netSynteny.csh

    #	broken down during netSynteny.csh due to too many open files on
    #	a chainSplit
    #	However, there is no need to split when we have scaffolds.
    #   Kate fixes doBlastzChainNet.pl and retry:

    cd /cluster/data/cavPor3/bed/blastz.mm9.swap
    time nice -n +19 ~kate/kent/src/hg/utils/automation/doBlastzChainNet.pl \
	/cluster/data/mm9/bed/blastzCavPor3.2008-04-10/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	    -swap -continue=syntenicNet -syntenicNet > syn.log 2>&1 &
    # real    24m37.561s
    # *** All done!
    # *** Make sure that goldenPath/cavPor3/vsMm9/README.txt is accurate.
    # *** Add {chain,net}Mm9 tracks to trackDb.ra if necessary.
    cat fb.cavPor3.chainMm9Link.txt
    # 781173609 bases of 2663369733 (29.330%) in intersection


#########################################################################
# BLASTZ/CHAIN/NET galGal3-cavPor3 (START - 2008-04-10; DONE: 2008-04-15 - Tim)
    ssh kkstore03 # store6->kkstore03-10
    screen # use a screen to manage this multi-day job
    mkdir /cluster/data/galGal3/bed/blastzCavPor3.2008-04-10
    cd /cluster/data/galGal3/bed/
    ln -s blastzCavPor3.2008-04-10 blastz.cavPor3
    cd blastzCavPor3.2008-04-10

    cat << \_EOF_ > DEF
BLASTZ_M=50
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Chicken galGal3
SEQ1_DIR=/cluster/bluearc/scratch/data/galGal3/nib
SEQ1_LEN=/cluster/data/galGal3/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: GuineaPig cavPor3
SEQ2_DIR=/cluster/bluearc/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/cluster/data/cavPor3/chrom.sizes
# Maximum number of scaffolds that can be lumped together
SEQ2_LIMIT=300
SEQ2_CHUNK=20000000
SEQ2_LAP=0

BASE=/cluster/data/galGal3/bed/blastzCavPor3.2008-04-10
TMPDIR=/scratch/tmp
_EOF_
    # << this line keeps emacs coloring happy

    # NOTE: be sure to ls the data in above script on workhorse machine before starting script
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
	-bigClusterHub=pk -chainMinScore=5000 -chainLinearGap=loose \
	    -syntenicNet > do.log 2>&1 
    # <ctl-A><ctl-d>  ps -ef | grep blastzCavPor3
    # real    1507m5.132s
    tail do.log
    # *** All done!
    # *** Make sure that goldenPath/galGal3/vsCavPor3/README.txt is accurate.
    # *** Add {chain,net}CavPor3 tracks to trackDb.ra if necessary.

    cat fb.galGal3.chainCavPor3Link.txt
    #   106239838 bases of 1042591351 (10.190%) in intersection

    cd /cluster/data/galGal3/bed/blastzCavPor3.2008-04-10
    ######### Change locations in DEF due to Hiram's new methods    
    cat << \_EOF_ > DEF
BLASTZ_M=50
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Chicken galGal3
SEQ1_DIR=/scratch/data/galGal3/nib
SEQ1_LEN=/scratch/data/galGal3/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: GuineaPig cavPor3
SEQ2_DIR=/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/scratch/data/cavPor3/chrom.sizes
# Maximum number of scaffolds that can be lumped together
SEQ2_LIMIT=300
SEQ2_CHUNK=20000000
SEQ2_LAP=0

BASE=/cluster/data/galGal3/bed/blastzCavPor3.2008-04-10
TMPDIR=/scratch/tmp
_EOF_

    mkdir /cluster/data/cavPor3/bed/blastz.galGal3.swap

    cd /cluster/data/cavPor3/bed/blastz.galGal3.swap
    time nice -n +19 doBlastzChainNet.pl \
	/cluster/data/galGal3/bed/blastzCavPor3.2008-04-10/DEF \
	-bigClusterHub=pk -chainMinScore=5000 -chainLinearGap=loose \
	    -swap > do.log 2>&1
    # real    23m19.970s
    # *** All done!
    # *** Make sure that goldenPath/cavPor3/vsGalGal3/README.txt is accurate.
    # *** Add {chain,net}GalGal3 tracks to trackDb.ra if necessary.

    cat fb.cavPor3.chainGalGal3Link.txt
    # 144795360 bases of 2663369733 (5.437%) in intersection

#########################################################################
# BLASTZ/CHAIN/NET cavPor3-oryCun1 (RESTART - 2008-04-11 - DONE - 2008-04-22 Tim)
    ssh kkstore05 # store12->kkstore05
    screen # use a screen to manage this multi-day job
    mkdir /cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11
    cd /cluster/data/cavPor3/bed/
    ln -s blastzOryCun1.2008-04-11 blastz.oryCun1
    cd blastzOryCun1.2008-04-11

    cat << \_EOF_ > DEF
BLASTZ_M=50

# TARGET: GuineaPig cavPor3
SEQ1_DIR=/cluster/bluearc/scratch/data/cavPor3/cavPor3.2bit
SEQ1_LEN=/cluster/bluearc/scratch/data/cavPor3/chrom.sizes
SEQ1_LIMIT=300
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rabbit oryCun1
SEQ2_DIR=/cluster/bluearc/scratch/data/oryCun1/oryCun1.2bit
SEQ2_LEN=/cluster/bluearc/scratch/data/oryCun1/chrom.sizes
# Maximum number of scaffolds that can be lumped together
SEQ2_LIMIT=500
SEQ2_CHUNK=50000000
SEQ2_LAP=0

BASE=/cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11
TMPDIR=/scratch/tmp
_EOF_
    # << this line keeps emacs coloring happy

    # NOTE: be sure to ls the data in above script on workhorse machine before starting script
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	   -stop=partition > do.log 2>&1 & 
    ## used stop, then `wc -l run.blastz/cavPor3.lst` * `wc -l run.balstz/monDom4.lst`
    ## to find out size based upon DEF SEQn_CHUNK and SEQn_LIMIT then:
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	   -continue=blastz > do.log 2>&1 & 
    # <ctl-A><ctl-d>  ps -ef | grep blastzCavPor3
    #real    2818m19.825s
    tail do.log
    # Loading 22976113 chains into cavPor3.chainOryCun1
    # Can't start query:
    # load data local infile 'link.tab' into table chainOryCun1Link
    # mySQL error 1114: The table 'chainOryCun1Link' is full
    # Command failed:
    # ssh -x hgwdev nice /cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11/axtChain/loadUp.csh

    # mysql> select count(*) from chainOryCun1Link; > 119,306,828   mm9-oryCun1 has ~40m spread out among ~20 chr tbls 
    # featureBits cavPor3 chainOryCun1Link
    # 439282179 bases of 2663369733 (16.493%) in intersection
    # Options:
    # 1) combine small scaffolds into a scaffoldUn 
    #    Have to restart all blastzs  Awkward since not clear dividing line between scaffold/superscaffold 
    # 2) Do not have the raw chains available at all.  Just have reciprocal best
    # 3) Go back to the idea of greater masking.

    ## [tdreszer@hgwdev /cluster/data/cavPor3] textHistogram -col=2 -binSize=40000000 chrom.sizes
    ##   0 ************************************************************ 3126
    ## 40000000  16
    ## 80000000  2   
    ## [tdreszer@hgwdev /cluster/data/cavPor3] textHistogram -col=2 -binSize=10000000 chrom.sizes
    ##  0 ************************************************************ 3079
    ## 10000000 * 26
    ## 20000000  14
    ## 30000000  7
    ## 40000000  5  
    ## 50000000  4
    ## 60000000  5
    ## 70000000  2
    ## 80000000  2
    ##################### Hiram recommends loading all 210 mil anyway with special table create to override row limit

    ###################################################################
    #	this failed during the load because the chainLink table became
    #	too large.  These were loaded manually with an sql statement to
    #	start the table definition, then a load data local infile using
    #	the .tab files left over from the failed load.  Note the extra
    #	definitions on the chainOryCun1Link table
    time nice -n +19 hgsql -e \
      "DROP TABLE chainOryCun1Link;" cavPor3 &

    CREATE TABLE chainOryCun1Link (
      bin smallint(5) unsigned NOT NULL default 0,
      tName varchar(255) NOT NULL default '',
      tStart int(10) unsigned NOT NULL default 0,
      tEnd int(10) unsigned NOT NULL default 0,
      qStart int(10) unsigned NOT NULL default 0,
      chainId int(10) unsigned NOT NULL default 0,
      KEY tName (tName(16),bin),
      KEY chainId (chainId)
    ) ENGINE=MyISAM max_rows=220000000 avg_row_length=55 pack_keys=1 CHARSET=latin1;

    time nice -n +19 hgsql -e \
      "load data local infile \"link.tab\" into table chainOryCun1Link;" cavPor3
    #	this one took a number of hours
    # real    272m44.943s

    #	finish the nets and load
    # Add gap/repeat stats to the net file using database tables:
    cd /cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11/axtChain
    netClass -verbose=0   -noAr noClass.net cavPor3 oryCun1 cavPor3.oryCun1.net

    # Load nets:
    time nice -n +19 netFilter -minGap=10 cavPor3.oryCun1.net \
      | hgLoadNet -verbose=0 cavPor3 netOryCun1 stdin > netFilter.out 2>&1 &
    # real    6m27.050s

    cd /cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11
    time nice -n +19 featureBits cavPor3 chainOryCun1Link >&fb.cavPor3.chainOryCun1Link.txt &
    #real    61m59.509s

    cat fb.cavPor3.chainOryCun1Link.txt
    #   752079320 bases of 2663369733 (28.238%) in intersection

    ###################################################################
    ### doReciprocalbest.pl  look in mm9.txt with oryCun1
    #	create reciprocal best chains/nets
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11
    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl cavPor3 oryCun1 \
	> rbest.log 2>&1 &
    # failed due to experiments dir being in the way
    cd /cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11/axtChain
    mv experiments old.experiments
    
    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl cavPor3 oryCun1 \
	> rbestResume.log 2>&1 &
    # hung for some unknown reason.  Starting again
    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl cavPor3 oryCun1 \
	>> rbestAgain.log 2>&1 &
    # real    37m33.221s
    # HgStepManager: executing step 'download' Fri Apr 18 10:33:31 2008.
    # download: output of previous step recipBest, /usr/local/apache/htdocs/goldenPath/cavPor3/vsOryCun1 , is required but does not appear to exist.
    # If it actually does exist, then this error is probably due to network/filesystem delays -- wait a minute and restart with -continue download.
    # If it really doesn't exist, either fix things manually or try -continue recipBest
    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl cavPor3 oryCun1 \
	-continue=recipBest >> rbestAgain.log 2>&1 &

    # Okay, the p[art that I am missing is that I didn't finish doBlastz with downloads!
    ssh kkstore05
    screen
    cd /cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11/
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	   -continue=download > doResumeAtDownload.log 2>&1 & 
    # real    3m21.480s
    # *** All done!
    # *** Make sure that goldenPath/cavPor3/vsOryCun1/README.txt is accurate.
    # *** Add {chain,net}OryCun1 tracks to trackDb.ra if necessary.

    time nice -n +19 /cluster/bin/scripts/doRecipBest.pl cavPor3 oryCun1 \
	-continue=download >> rbestAgain.log 2>&1 &
    # real    0m0.418s
    # *** All done!
    # *** Steps were performed in /cluster/data/cavPor3/bed/blastz.oryCun1

    #	real    611m17.901s
    cat fb.cavPor3.chainOryCun1Link.txt
    #   752079320 bases of 2663369733 (28.238%) in intersection

    ######### Change locations in DEF due to Hiram's new methods    
    cd /cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11
    cat << \_EOF_ > DEF
BLASTZ_M=50

# TARGET: GuineaPig cavPor3
SEQ1_DIR=/scratch/data/cavPor3/cavPor3.2bit
SEQ1_LEN=/scratch/data/cavPor3/chrom.sizes
SEQ1_LIMIT=300
SEQ1_CHUNK=20000000
SEQ1_LAP=10000

# QUERY: Rabbit oryCun1
SEQ2_DIR=/scratch/data/oryCun1/oryCun1.2bit
SEQ2_LEN=/scratch/data/oryCun1/chrom.sizes
# Maximum number of scaffolds that can be lumped together
SEQ2_LIMIT=500
SEQ2_CHUNK=50000000
SEQ2_LAP=0

BASE=/cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11
TMPDIR=/scratch/tmp
_EOF_
    mkdir /cluster/data/oryCun1/bed/blastz.cavPor3.swap
    cd /cluster/data/oryCun1/bed/blastz.cavPor3.swap
    screen
    time nice -n +19 doBlastzChainNet.pl \
	/cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	    -swap > do.log 2>&1
    # <ctl-A><ctl-d>  
    # real    492m11.337s
    # load data local infile 'link.tab' into table chainCavPor3Link
    # mySQL error 1114: The table 'chainCavPor3Link' is full
    # Command failed:
    # ssh -x hgwdev nice /cluster/data/oryCun1/bed/blastz.cavPor3.swap/axtChain/loadUp.csh
    # wc -l link.tab  210745564 

    ###################################################################
    # Manual load again
    ssh hgwdev
    cd /cluster/data/oryCun1/bed/blastz.cavPor3.swap/axtChain
    hgsql oryCun1
    DROP TABLE chainCavPor3Link;
    CREATE TABLE chainCavPor3Link (
      bin smallint(5) unsigned NOT NULL default 0,
      tName varchar(255) NOT NULL default '',
      tStart int(10) unsigned NOT NULL default 0,
      tEnd int(10) unsigned NOT NULL default 0,
      qStart int(10) unsigned NOT NULL default 0,
      chainId int(10) unsigned NOT NULL default 0,
      KEY tName (tName(16),bin),
      KEY chainId (chainId)
    ) ENGINE=MyISAM max_rows=212000000 avg_row_length=55 pack_keys=1 CHARSET=latin1;
    # exit

    screen
    time nice -n +19 hgsql -e \
      "load data local infile \"link.tab\" into table chainCavPor3Link;" oryCun1
    # real    360m19.577s
    # mysql> select count(*) from chainCavPor3Link;  | 210745564 |
    #	finish the nets and load
    # Still on hgwdev because of sql
    screen
    time nice -n +19 /cluster/data/oryCun1/bed/blastz.cavPor3.swap/axtChain/loadUpResume.csh >> doFinishLoad.log 2>&1
    # real    991m52.532s
    # cat fb.oryCun1.chainCavPor3Link.txt
    # 729628282 bases of 2076044328 (35.145%) in intersection

    # Now continue -swap at download
    ssh kkstore04 # oryCun1 -> store09 -> kkstore04
    cd /cluster/data/oryCun1/bed/blastz.cavPor3.swap
    screen
    time nice -n +19 doBlastzChainNet.pl \
	/cluster/data/cavPor3/bed/blastzOryCun1.2008-04-11/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	    -swap -continue=download >> doResumeAtDownload.log 2>&1
    # real    2m2.757s
    # *** All done!
    # *** Make sure that goldenPath/oryCun1/vsCavPor3/README.txt is accurate.
    # *** Add {chain,net}CavPor3 tracks to trackDb.ra if necessary.

#########################################################################
# BLASTZ/CHAIN/NET rn4-cavPor3 (STARTED - 2008-04-14; DONE 04-15-2008 - Tim)
    ssh kkstore06  # rat on store3->kkstore06
    screen # use a screen to manage this multi-day job
    mkdir /cluster/data/rn4/bed/blastzCavPor3.2008-04-14
    cd /cluster/data/rn4/bed/
    ln -s blastzCavPor3.2008-04-14 blastz.cavPor3
    cd blastzCavPor3.2008-04-14

    cat << \_EOF_ > DEF
BLASTZ_M=50

# TARGET: Rat Rn4
SEQ1_DIR=/cluster/bluearc/scratch/data/rn4/nib
SEQ1_LEN=/cluster/bluearc/scratch/data/rn4/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: GuineaPig cavPor3
SEQ2_DIR=/cluster/bluearc/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/cluster/bluearc/scratch/data/cavPor3/chrom.sizes
# Maximum number of scaffolds that can be lumped together
SEQ2_LIMIT=300
SEQ2_CHUNK=20000000
SEQ2_LAP=0

BASE=/cluster/data/rn4/bed/blastzCavPor3.2008-04-14
TMPDIR=/scratch/tmp
_EOF_
    # << this line keeps emacs coloring happy

    # NOTE: be sure to ls the data in above script on workhorse machine before starting script
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	    -syntenicNet > do.log 2>&1 &
    # <ctl-A><ctl-d>  ps -ef | grep blastzCavPor3
    #	real    611m17.901s
    cat fb.rn4.chainCavPor3Link.txt
    #   716379861 bases of 2571531505 (27.858%) in intersection

    ######### Change locations in DEF due to Hiram's new methods    
    cat << \_EOF_ > DEF
BLASTZ_M=50

# TARGET: Rat Rn4
SEQ1_DIR=/scratch/data/rn4/nib
SEQ1_LEN=/scratch/data/rn4/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: GuineaPig cavPor3
SEQ2_DIR=/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/scratch/data/cavPor3/chrom.sizes
# Maximum number of scaffolds that can be lumped together
SEQ2_LIMIT=300
SEQ2_CHUNK=20000000
SEQ2_LAP=0

BASE=/cluster/data/rn4/bed/blastzCavPor3.2008-04-14
TMPDIR=/scratch/tmp
_EOF_

    mkdir /cluster/data/cavPor3/bed/blastz.rn4.swap
    cd /cluster/data/cavPor3/bed/blastz.rn4.swap
    time nice -n +19 doBlastzChainNet.pl \
	/cluster/data/rn4/bed/blastzCavPor3.2008-04-14/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	    -swap -syntenicNet > do.log 2>&1
    # real    103m34.523s

    #	broken down during netSynteny.csh due to too many open files on
    #	a chainSplit
    #	However, there is no need to split when we have scaffolds.
    #   Kate fixes doBlastzChainNet.pl and retry:

    cd /cluster/data/cavPor3/bed/blastz.rn4.swap
    time nice -n +19 doBlastzChainNet.pl \
	/cluster/data/rn4/bed/blastzCavPor3.2008-04-14/DEF \
	-bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
	    -swap -continue=syntenicNet -syntenicNet > doSyn.log 2>&1 &
    # real    14m49.396s
    # *** All done!
    # *** Make sure that goldenPath/cavPor3/vsRn4/README.txt is accurate.
    # *** Add {chain,net}Rn4 tracks to trackDb.ra if necessary.
    cat fb.cavPor3.chainRn4Link.txt
    #   735147548 bases of 2663369733 (27.602%) in intersection

###########################################################################
# BLASTZ/CHAIN/NET monDom4-cavPor3 (START - 2008-04-11 DONE 2008-04-16 - Tim)
    ssh kkstore04 # monDom on store9 -> kkstore04
    screen # use screen to control this job
    mkdir /cluster/data/monDom4/bed/blastzCavPor3.2008-04-11
    cd /cluster/data/monDom4/bed/
    ln -s blastzCavPor3.2008-04-11 blastz.cavPor3
    cd blastzCavPor3.2008-04-11

    cat << \_EOF_ > DEF
# opossum vs guineaPigs

# Use "mammal-fish" params even though this is mammal-mammal...
# pretty distant, hopefully not too many shared undermasked repeats.
#BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET: Opossum monDom4
SEQ1_DIR=/san/sanvol1/scratch/monDom4/nib
SEQ1_LEN=/san/sanvol1/scratch/monDom4/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: GuineaPig cavPor3
SEQ2_DIR=/cluster/bluearc/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/cluster/bluearc/scratch/data/cavPor3/chrom.sizes
SEQ2_LIMIT=300
SEQ2_CHUNK=20000000
SEQ2_LAP=0

BASE=/cluster/data/monDom4/bed/blastzCavPor3.2008-04-11

TMPDIR=/scratch/tmp
_EOF_
    # << happy emacs

    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
	-chainMinScore=5000 -verbose=2 -stop=partition \
	-chainLinearGap=loose -bigClusterHub=pk > do.log 2>&1 &
    ## used stop, then `wc -l run.blastz/cavPor3.lst` * `wc -l run.balstz/monDom4.lst`
    ## to find out size based upon DEF SEQn_CHUNK and SEQn_LIMIT then:
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
	-chainMinScore=5000 -verbose=2 -continue=blastz \
	-chainLinearGap=loose -bigClusterHub=pk > do.log 2>&1 &
    # <ctl-A><ctl-d>  ps -ef | grep blastzCavPor3
    # real    1715m0.868s
    # tail do.log
    # updated job database on disk
    # Batch failed after 4 tries on /cluster/bin/scripts/blastz-run-ucsc -outFormat psl /san/sanvol1/scratch/monDom4/nib/chr5.nib:chr5:20000000-30010000 qParts/part042.lst ../DEF ../psl/chr5.nib:chr5:20000000-30010000/chr5.nib:chr5:20000000-30010000_part042.lst.psl
    # Command failed:
    # ssh -x pk nice /cluster/data/monDom4/bed/blastzCavPor3.2008-04-11/run.blastz/doClusterRun.csh

    para-eta time
    # Completed: 65513 of 65514 jobs
    # Crashed: 1 jobs
    # CPU time in finished jobs:    9807564s  163459.40m  2724.32h  113.51d  0.311 y
    # IO & Wait Time:                373385s    6223.08m   103.72h    4.32d  0.012 y
    # Average job time:                 155s       2.59m     0.04h    0.00d
    # Longest running job:                0s       0.00m     0.00h    0.00d
    # Longest finished job:            2413s      40.22m     0.67h    0.03d
    # Submission to last job:        103236s    1720.60m    28.68h    1.19d

    #### One for automation ???????????:
    para problems
    ## start time: Mon Apr 14 14:01:19 2008
    ## return: 9
    ## ../Psl/chr5.nib:chr5:20000000-30010000/chr5.nib:chr5:20000000-30010000_part042.lst.psl does not exist
    ## stderr:
    ## ../psl/chr5.nib:chr5:20000000-30010000/chr5.nib:chr5:20000000-30010000_part042.lst.psl already exists

    ## ls ../Psl/chr5.nib:chr5:20000000-30010000/chr5.nib:chr5:20000000-30010000_part042.lst.psl
    ## ls: ../Psl/chr5.nib:chr5:20000000-30010000/chr5.nib:chr5:20000000-30010000_part042.lst.psl: No such file or directory
    ## ls ../psl/chr5.nib:chr5:20000000-30010000/chr5.nib:chr5:20000000-30010000_part042.lst.psl
    ## ../psl/chr5.nib:chr5:20000000-30010000/chr5.nib:chr5:20000000-30010000_part042.lst.psl
    ########### Case sensitive path name!!! "Psl" vs "psl"

    ssh kkstore04 
    screen # use screen to control this job
    cd /cluster/data/monDom4/bed/blastzCavPor3.2008-04-11
   ######### Change locations in DEF due to Hiram's new methods    
   cat << \_EOF_ > DEF
# opossum vs guineaPigs

# Use "mammal-fish" params even though this is mammal-mammal...
# pretty distant, hopefully not too many shared undermasked repeats.
#BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=6000
BLASTZ_K=2200
BLASTZ_M=50
BLASTZ_Q=/scratch/data/blastz/HoxD55.q

# TARGET: Opossum monDom4
SEQ1_DIR=/san/sanvol1/scratch/monDom4/nib
#SEQ1_DIR=/scratch/data/monDom4/monDom.2bit
SEQ1_LEN=/scratch/data/monDom4/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: GuineaPig cavPor3
SEQ2_DIR=/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/scratch/data/cavPor3/chrom.sizes
SEQ2_LIMIT=300
SEQ2_CHUNK=20000000
SEQ2_LAP=0

BASE=/cluster/data/monDom4/bed/blastzCavPor3.2008-04-11

TMPDIR=/scratch/tmp
_EOF_

    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
	-chainMinScore=5000 -verbose=2 -continue=cat \
	-chainLinearGap=loose -bigClusterHub=pk > do.log 2>&1 &
    # <ctl-A><ctl-d>  ps -ef | grep blastzCavPor3
    # real    232m24.348s
    # Done time nice -n +19 doBlastzChainNet.pl `pwd`/DEF -chainMinScore=5000 -verbose=2 -continue=cat -

    cat fb.monDom4.chainCavPor3Link.txt
    #   334067222 bases of 3501643220 (9.540%) in intersection

    mkdir /cluster/data/cavPor3/bed/blastz.monDom4.swap

    cd /cluster/data/cavPor3/bed/blastz.monDom4.swap
    time nice -n +19 doBlastzChainNet.pl \
	/cluster/data/monDom4/bed/blastzCavPor3.2008-04-11/DEF \
	-chainMinScore=5000 -verbose=2 -smallClusterHub=memk \
	-swap -chainLinearGap=loose -bigClusterHub=pk > do.log 2>&1 
    # real    238m31.843s
    # Loading 18041915 chains into cavPor3.chainMonDom4
    # Can't start query:
    # load data local infile 'link.tab' into table chainMonDom4Link
    # mySQL error 1114: The table 'chainMonDom4Link' is full
    # select count(*) from chainMonDom4Link; => 119,311,098
    # wc -l link.tab                         => 202,324,406 link.tab

    ###################################################################
    #	this failed during the load because the chainLink table became
    #	too large.  These were loaded manually with an sql statement to
    #	start the table definition, then a load data local infile using
    #	the .tab files left over from the failed load.  Note the extra
    #	definitions on the chainMonDom4Link table
    time nice -n +19 hgsql -e \
      "DROP TABLE chainMonDom4Link;" cavPor3 &

    CREATE TABLE chainMonDom4Link (
      bin smallint(5) unsigned NOT NULL default 0,
      tName varchar(255) NOT NULL default '',
      tStart int(10) unsigned NOT NULL default 0,
      tEnd int(10) unsigned NOT NULL default 0,
      qStart int(10) unsigned NOT NULL default 0,
      chainId int(10) unsigned NOT NULL default 0,
      KEY tName (tName(16),bin),
      KEY chainId (chainId)
    ) ENGINE=MyISAM max_rows=210000000 avg_row_length=55 pack_keys=1 CHARSET=latin1;

    time nice -n +19 hgsql -e \
      "load data local infile \"link.tab\" into table chainMonDom4Link;" cavPor3
    #	this one took a number of hours
    # real    272m44.943s

    #	finish the nets and load
    # Add gap/repeat stats to the net file using database tables:
    cd /cluster/data/cavPor3/bed/blastz.monDom4.swap/axtChain
    # copied loadUp.csh to loadUpResume.csh and commented out lines already done
    time nice -n +19 ./loadUpResume.csh > loadUpResume.out 2>&1
    #real    30m47.809s

    #	real    106m17.385s
    cat fb.cavPor3.chainMonDom4Link.txt
    # 382771802 bases of 2663369733 (14.372%) in intersection

    ssh kkstore05
    screen
    cd /cluster/data/cavPor3/bed/blastz.monDom4.swap

    time nice -n +19 doBlastzChainNet.pl \
	/cluster/data/monDom4/bed/blastzCavPor3.2008-04-11/DEF \
	-chainMinScore=5000 -verbose=2 -smallClusterHub=memk \
	-swap -chainLinearGap=loose -bigClusterHub=pk -continue=download > doDowload.log 2>&1 
    # real    0m36.604s
    # *** All done!
    # *** Make sure that goldenPath/cavPor3/vsMonDom4/README.txt is accurate.
    # *** Add {chain,net}MonDom4 tracks to trackDb.ra if necessary.

############################################################################
# FINAL BLASTZS STEPS:
# Be sure to add chainCavPor3.html,netCavPor3.html to makeDb/trackDb and update makeDb/trackDb/trackDb.ra
# Any orgs with non-standard priority update {org}/priority.ra 
# Any orgs with differnent BLASTZ_Q=HoxD55.q will need chain{DB}.html and trackDb.ra at assembly level

#Modified (possibly merged) files:
#  trackDb/trackDb.ra

#Files/directories not checked in to CVS that look like source:
#  doc/cavPor3.txt
#  trackDb/chainOryCun1.html
#  trackDb/netOryCun1.html

############################################################################
#  BLATSERVERS ENTRY (DONE - 2008-04-17 - Tim)
#   After getting a blat server assigned by the Blat Server Gods,
    ssh hgwdev

    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
    VALUES ("cavPor3", "blat7", "17780", "1", "0"); \
    INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
    VALUES ("cavPor3", "blat7", "17781", "0", "1");' \
        hgcentraltest
    #   test it with some sequence
    # Can't open /gbdb/cavPor3/choHof1.2bit to read: No such file or directory
    ## Doh!  I entered in the wrong blat server the first time!

#############################################################################
#  Downloads (DONE - 2008-04-23 - Tim)
#   Let's see if the downloads will work
    ssh hgwdev
    /cluster/data/cavPor3
#   expecting to find repeat masker .out file here:
    ln -s bed/repeatMasker/cavPor3.fa.out .
    time nice -n +19 /cluster/bin/scripts/makeDownloads.pl \
    -workhorse=hgwdev cavPor3 > jkStuff/downloads.log 2>&1
    # failed due to link to RepeatMasker instead of repeatMasker.  Fixed and restarted.
    # *** All done!
    # *** Please take a look at the downloads for cavPor3 using a web browser.
    # *** Edit each README.txt to resolve any notes marked with "***":
    #     /cluster/data/cavPor3/goldenPath/database/README.txt
    #     /cluster/data/cavPor3/goldenPath/bigZips/README.txt
    #     (The htdocs/goldenPath/cavPor3/*/README.txt "files" are just links to those.)
    # *** If you have to make any edits that would always apply to future
    #     assemblies from the same sequencing center, please edit them into
    #     ~/kent/src/hg/utils/automation/makeDownloads.pl (or ask Angie for help).
    
    # the downloads are located at: http://hgwdev.cse.ucsc.edu/goldenPath/cavPor3/
    mv goldenPath old.goldenPath
    mv jkStuff/downloads.log jkStuff/old.downloads.log
    time nice -n +19 /cluster/home/tdreszer/kent/src/hg/utils/automation/makeDownloads.pl \
    -workhorse=hgwdev cavPor3 >> jkStuff/downloads.log 2>&1
    # real    24m23.427s
    # *** Please take a look at the downloads for cavPor3 using a web browser.
    # *** The downloads directory is: /usr/local/apache/htdocs/goldenPath/cavPor3. 
    # *** Edit each README.txt to resolve any notes marked with "***":
    #      /cluster/data/cavPor3/goldenPath/database/README.txt
    #      /cluster/data/cavPor3/goldenPath/bigZips/README.txt
    #      (The htdocs/goldenPath/cavPor3/*/README.txt "files" are just links to those.)
    #  *** If you have to make any edits that would always apply to future
    #      assemblies from the same sequencing center, please edit them into
    #      ~/kent/src/hg/utils/automation/makeDownloads.pl (or ask Angie for help).


#############################################################################
#  PushQ entries (DONE - 2008-04-24 - Tim)
    ssh hgwdev
    /cluster/data/cavPor3
    /cluster/bin/scripts/makePushQSql.pl cavPor3 > jkStuff/pushQ.sql
    # output warnings:
    # hgwdev does not have /usr/local/apache/htdocs/goldenPath/cavPor3/liftOver/cavPor3ToCavPor*
    ### is it worth it? NO: cavPor2 was never released.
    # cavPor3 does not have seq
    # cavPor3 does not have extFile
    ### Not needed because output is from multiz, visiGene, affyProbes, cloneend, stsMarker, nibbImageProbes
    ### Kate suggests updating makePushQsql to require gbSeq and gbExtFile instead, but it already does.
    # 
    #  *** All done!
    #  *** Please edit the output to ensure correctness before using.
    #  *** 1. Resolve any warnings output by this script.
    #  *** 2. Remove any entries which should not be pushed.
    ### All accounted for but: genscanSubopt which is not in pushQ.sql but is in 

    #  *** 3. Add tables associated with the main track table (e.g. *Pep tables
    #         for gene prediction tracks).
    #  *** 4. Add files associated with tracks.  First, look at the results
    #         of this query:
    #           hgsql cavPor3 -e 'select distinct(path) from extFile'
    ### Doesn't exist
    #         Then, look at file(s) named in each of the following wiggle tables:
    #           hgsql cavPor3 -e 'select distinct(file) from gc5Base'
    ### | /gbdb/cavPor3/wib/gc5Base.wib |
    #           hgsql cavPor3 -e 'select distinct(file) from quality'
    ### | /gbdb/cavPor3/wib/qual.wib |
    #         Files go in the second field after tables (it's tables, cgis, files).
    #  *** 5. This script currently does not recognize composite tracks.  If cavPor3
    #         has any composite tracks, you should manually merge the separate
    #         per-table entries into one entry.
    ### Doen't apply to cavPor3 at this time.
    #  *** 6. Just before executing the sql, note the ID of the most recent entry
    #         in the Main Push Queue.  If the ID (first column) of the last
    #         INSERT statement is not 1 greater than the most recent entry's,
    #         make it so to avoid an ID clash with an existing entry.
    ### Updated to 4283
    #  *** When everything looks complete and correct, use hgsql on the qapushq
    #      machine (currently hgwbeta) to execute the sql file.  (Make sure that
    #      qapushq does not already have a table named cavPor3.)  Then use the Push
    #      Queue web interface to check the contents of all entries.
    ssh hgwbeta
    cd /cluster/data/cavPor3/jkStuff
    hgsql qapushq < pushQ.sql 
    ### All is there
    #  *** If you haven't already, please add cavPor3 to makeDb/schema/all.joiner !
    #      It should be in both $gbd and $chainDest.
    ### Already there
    #  *** When cavPor3 is on the RR (congrats!), please doBlastz -swap if you haven't
    #      already.

###########################################################################
# HUMAN (hg18) PROTEINS TRACK (DONE braney 2008-04-09)
    ssh kkstore05
    # bash  if not using bash shell already

    mkdir /cluster/data/cavPor3/blastDb
    cd /cluster/data/cavPor3
    awk '{if ($2 > 1000000) print $1}' chrom.sizes > 1meg.lst
    twoBitToFa -seqList=1meg.lst  cavPor3.unmasked.2bit temp.fa
    faSplit gap temp.fa 1000000 blastDb/x -lift=blastDb.lft
    rm temp.fa 1meg.lst

    awk '{if ($2 <= 1000000) print $1}' chrom.sizes > less1meg.lst
    twoBitToFa -seqList=less1meg.lst  cavPor3.unmasked.2bit temp.fa
    faSplit about temp.fa 1000000 blastDb/y 

    cd blastDb
    for i in *.fa
    do
	/cluster/bluearc/blast229/formatdb -i $i -p F
    done
    rm *.fa
    ls *.nsq | wc -l
# 3401

    mkdir -p /san/sanvol1/scratch/cavPor3/blastDb
    cd /cluster/data/cavPor3/blastDb
    for i in nhr nin nsq; 
    do 
	echo $i
	cp *.$i /san/sanvol1/scratch/cavPor3/blastDb
    done

    mkdir -p /cluster/data/cavPor3/bed/tblastn.hg18KG
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG
    echo  /san/sanvol1/scratch/cavPor3/blastDb/*.nsq | xargs ls -S | sed "s/\.nsq//"  > query.lst
    wc -l query.lst

# 3401 query.lst

   # we want around 350000 jobs
   calc `wc /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl | awk '{print $1}'`/\(350000/`wc query.lst | awk '{print $1}'`\)

# 36727/(350000/3401) = 356.881506

   mkdir -p /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/kgfa
   split -l 357 /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl  /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/kgfa/kg
   ln -s /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/kgfa kgfa
   cd kgfa
   for i in *; do 
     nice pslxToFa $i $i.fa; 
     rm $i; 
     done
   cd ..
   ls -1S kgfa/*.fa > kg.lst
   mkdir -p /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/blastOut
   ln -s /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/blastOut
   for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
   tcsh
   cd /cluster/data/cavPor3/bed/tblastn.hg18KG
   cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl }
#ENDLOOP
'_EOF_'

   cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/cluster/bluearc/blast229/data
export BLASTMAT
g=`basename $2`
f=/tmp/`basename $3`.$g
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /cluster/bluearc/blast229/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
    if /cluster/bin/i386/blastToPsl $f.1 $f.2
    then
	liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/cavPor3/blastDb.lft carry $f.2
        liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg18/bed/blat.hg18KG/protein.lft warn $f.3
        if pslCheck -prot $3.tmp
        then                  
            mv $3.tmp $3     
            rm -f $f.1 $f.2 $f.3 $f.4
        fi
        exit 0               
    fi                      
fi                         
rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
exit 1
'_EOF_'
    # << happy emacs
    chmod +x blastSome
    gensub2 query.lst kg.lst blastGsub blastSpec
    exit 
    
    ssh pk
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG
    para create blastSpec
#    para try, check, push, check etc.

    para time

# Completed: 350303 of 350303 jobs
# CPU time in finished jobs:   25825855s  430430.91m  7173.85h  298.91d  0.819 y
# IO & Wait Time:               2396715s   39945.25m   665.75h   27.74d  0.076 y
# Average job time:                  81s       1.34m     0.02h    0.00d
# Longest finished job:             339s       5.65m     0.09h    0.00d
# Submission to last job:         80595s    1343.25m    22.39h    0.93d

    ssh kkstore05
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG
    mkdir chainRun
    cd chainRun
    tcsh
    cat << '_EOF_' > chainGsub
#LOOP
chainOne $(path1)
#ENDLOOP
'_EOF_'

    cat << '_EOF_' > chainOne
(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=150000 stdin /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/blastOut/c.`basename $1`.psl)
'_EOF_'
    chmod +x chainOne
    ls -1dS /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/blastOut/kg?? > chain.lst
    gensub2 chain.lst single chainGsub chainSpec
    # do the cluster run for chaining
    ssh pk
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG/chainRun
    para create chainSpec
    para maxNode 30
    para try, check, push, check etc.

# Completed: 99 of 103 jobs
# Crashed: 4 jobs
# CPU time in finished jobs:      15940s     265.67m     4.43h    0.18d  0.001 y
# IO & Wait Time:                397633s    6627.21m   110.45h    4.60d  0.013 y
# Average job time:                4178s      69.63m     1.16h    0.05d
# Longest finished job:            5502s      91.70m     1.53h    0.06d
# Submission to last job:          7032s     117.20m     1.95h    0.08d

# ran 4 crashed jobs on memk.  
Completed: 4 of 4 jobs
CPU time in finished jobs:       1673s      27.88m     0.46h    0.02d  0.000 y
IO & Wait Time:                   529s       8.81m     0.15h    0.01d  0.000 y
Average job time:                 550s       9.17m     0.15h    0.01d
Longest running job:                0s       0.00m     0.00h    0.00d
Longest finished job:             712s      11.87m     0.20h    0.01d
Submission to last job:          1299s      21.65m     0.36h    0.02d

    ssh kkstore05
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG/blastOut
    for i in kg??
    do
       cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
       sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
       awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
       echo $i
    done
    sort -T /tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq > /cluster/data/cavPor3/bed/tblastn.hg18KG/blastHg18KG.psl
    cd ..
    pslCheck blastHg18KG.psl

    # load table 
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG
    hgLoadPsl cavPor3 blastHg18KG.psl

    # check coverage
    featureBits cavPor3 blastHg18KG 
# 35569442 bases of 2663369733 (1.336%) in intersection

    featureBits cavPor3 all_mrna blastHg18KG  -enrichment
# all_mrna 0.023%, blastHg18KG 1.336%, both 0.015%, cover 66.85%, enrich 50.05x

    ssh kkstore05
    rm -rf /cluster/data/cavPor3/bed/tblastn.hg18KG/blastOut
    rm -rf /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/blastOut
#end tblastn
############################### Make editor happy: _EOF_
 
#########################################################################
## 7-Way Multiz (STARTED - 2008-04-25 - Tim)
##

#  From Jim: A minimal set would be human/mouse/rabbit/chicken.  
#  Add rat and possum if you feel like turning the crank a little more.

    #	the all.chain.gz files were split up via kluster jobs on memk
    #	in order to get mafSynNet files.  Example above in ornAna1 blastz
    ssh hgwdev
    mkdir /cluster/data/cavPor3/bed/multiz7way
    cd /cluster/data/cavPor3/bed/multiz7way
    #	take the 30-way tree from mm9 and eliminate genomes not in
    #	this alignment
    #	rearrange to get cavPor3 on the top of the graph
    #	paste this tree into the on-line phyloGif tool:
    #	http://genome.ucsc.edu/cgi-bin/phyloGif
    #	to create the image for the tree diagram

    #	select the 7 organisms from the 30-way recently done on mouse mm9
    /cluster/bin/phast/tree_doctor \
--prune-all-but Human_hg18,Mouse_mm9,Rat_rn4,GuineaPig_cavPor2,Rabbit_oryCun1,Opossum_monDom4,Chicken_galGal3 \
	/cluster/data/mm9/bed/multiz30way/mm9OnTop.fullNames.nh \
	| sed -e "s/cavPor2/cavPor3/g" > 7-way.fullNames.nh

    #	looks something like this:
    # ((((((Mouse_mm9:0.076274,Rat_rn4:0.084383):0.200607,GuineaPig_cavPor3:0.202990):0.034350,Rabbit_oryCun1:0.208548):0.014587
    # ,Human_hg18:0.126901):0.263313,Opossum_monDom4:0.320721):0.207444,Chicken_galGal3:0.488824);

    #	rearrange to get guineaPig at the top:
    # this leaves us with:
    cat << _EOF_ > cavPor3.7-way.nh
(((((GuineaPig_cavPor3:0.202990,(Mouse_mm9:0.076274,Rat_rn4:0.084383):0.200607):0.034350,Rabbit_oryCun1:0.208548):0.014587,Human_hg18:0.126901):0.263313,Opossum_monDom4:0.320721):0.207444,Chicken_galGal3:0.488824);
_EOF_
    #	<< happy emacs
    
    # verify all blastz's exists
    cat << \_EOF_ > listMafs.csh
#!/bin/csh -fe
cd /cluster/data/cavPor3/bed/multiz7way
foreach db (`grep -v cavPor3 species.list`)
    set bdir = /cluster/data/cavPor3/bed/blastz.$db
    if (-e $bdir/mafRBestNet/cavPor3.$db.rbest.maf.gz) then
	echo "$db mafRBestNet"
    else if (-e $bdir/mafSynNet/cavPor3.$db.syn.maf.gz) then
	echo "$db mafSynNet"
    else if (-e $bdir/mafNet/cavPor3.$db.net.maf.gz) then
	echo "$db mafNet"
    else
	echo "$db mafs not found"
    endif
end
_EOF_
    # << happy emacs
    chmod +x ./listMafs.csh
    #	see what it says:
    ./listMafs.csh
    # galGal3 mafNet
    # hg18 mafNet
    # mm9 mafNet
    # monDom4 mafNet
    # oryCun1 mafRBestNet
    # rn4 mafNet

    /cluster/bin/phast/all_dists cavPor3.7-way.nh > 7way.distances.txt
    #    ERROR: Can't parse distance in tree ("0.014587
    #").
    #[hgwdev:tdreszer multiz7way> m cavPor3.7-way.nh 
    # (((((GuineaPig_cavPor3:0.202990,(Mouse_mm9:0.076274,Rat_rn4:0.084383):0.200607):0.034350,Rabbit_oryCun1:0.208548):0.014587
    # ,Human_hg18:0.126901):0.263313,Opossum_monDom4:0.320721):0.207444,Chicken_galGal3:0.488824);
    # Problem is that tree should not contain 'n' so reecho and retry
    m 7way.distances.txt
    # GuineaPig_cavPor3       Mouse_mm9       0.479871
    # GuineaPig_cavPor3       Rat_rn4 0.487980
    # GuineaPig_cavPor3       Rabbit_oryCun1  0.445888
    # GuineaPig_cavPor3       Human_hg18      0.378828
    # GuineaPig_cavPor3       Opossum_monDom4 0.835961
    # GuineaPig_cavPor3       Chicken_galGal3 1.211508
    # Mouse_mm9       Rat_rn4 0.160657
    # Mouse_mm9       Rabbit_oryCun1  0.519779
    # Mouse_mm9       Human_hg18      0.452719
    # Mouse_mm9       Opossum_monDom4 0.909852
    # Mouse_mm9       Chicken_galGal3 1.285399
    # Rat_rn4 Rabbit_oryCun1  0.527888
    # Rat_rn4 Human_hg18      0.460828
    # Rat_rn4 Opossum_monDom4 0.917961
    # Rat_rn4 Chicken_galGal3 1.293508
    # Rabbit_oryCun1  Human_hg18      0.350036
    # Rabbit_oryCun1  Opossum_monDom4 0.807169
    # Rabbit_oryCun1  Chicken_galGal3 1.182716
    # Human_hg18      Opossum_monDom4 0.710935
    # Human_hg18      Chicken_galGal3 1.086482
    # Opossum_monDom4 Chicken_galGal3 1.016989
    # (total) -       2.228942

    grep -i cavPor 7way.distances.txt | sort -k3,3n
    # GuineaPig_cavPor3       Human_hg18      0.378828
    # GuineaPig_cavPor3       Rabbit_oryCun1  0.445888
    # GuineaPig_cavPor3       Mouse_mm9       0.479871
    # GuineaPig_cavPor3       Rat_rn4         0.487980
    # GuineaPig_cavPor3       Opossum_monDom4 0.835961
    # GuineaPig_cavPor3       Chicken_galGal3 1.211508
    
    # Note that guinneaPig is closer to human than to other rodents.  
    # This may be reasonable, since the speed of evolution (length of tree limbs) 
    # in rodents is greater than for primates.  For instance mm9 is closer to hg18
    # than it is to either cavPor2 or oryCun1
    
    #	use the calculated
    #	distances in the table below to order the organisms and check
    #	the button order on the browser.
    #	And if you can fill in the table below entirely, you have
    #	succeeded in finishing all the alignments required.
    #
    #                         featureBits chainLink measures
    #                                    chainCavPor3Link   chain   linearGap
    # distance                     on cavPor3    on other   minScore
    # 0.479871 Mouse_mm9           (28.900%)     (29.330%)  3000     medium
    # 0.487980 Rat_rn4             (27.858%)     (27.602%)  3000     medium
    # 0.445888 Rabbit_oryCun1      (35.145%)     (28.238%)  3000     medium
    # 0.378828 Human_hg18          (43.971%)     (48.132%)  3000     medium
    # 0.835961 Opossum_monDom4      (9.540%)     (14.372%)  5000     loose
    # 1.211508 Chicken_galGal3     (10.190%)      (5.437%)  5000     loose
    ### ### ### ### Be sure to use calJac as an example because of scaffolds
    ### ### ### ### will require making maf files by scaffold name

    #	create a coherent set of all the mafs involved in this run
    mkdir mafLinks
    cd mafLinks
    ln -s ../../blastz.hg18/mafNet ./hg18
    ln -s ../../blastz.mm9/mafNet ./mm9
    ln -s ../../blastz.rn4/mafNet ./rn4
    ln -s ../../blastz.monDom4/mafNet ./monDom4
    ln -s ../../blastz.galGal3/mafNet ./galGal3
    ln -s ../../blastz.oryCun1/mafRBestNet ./oryCun1

    #	check data size:
    du -hscL *
    # 100M    galGal3
    # 930M    hg18
    # 577M    mm9
    # 268M    monDom4
    # 495M    oryCun1
    # 543M    rn4
    # 2.9G    total

    #   need to split these things up by Contig number for efficient kluster run
    ssh kkstore06
    mkdir -p /san/sanvol1/scratch/cavPor3/multiz7way/contigMaf
    cd /scratch/tmp
    #   the 16201 is from petMar/chrom.sizes
    echo "chrM 0 16801" > chrM.bed

    for D in `grep -v cavPor3 /cluster/data/cavPor3/bed/multiz7way/species.list`
do
    echo ${D}
    zcat \
/cluster/data/cavPor3/bed/multiz7way/mafLinks/cavPor3.${D}.*.maf.gz \
    > ${D}.maf
    mkdir /scratch/tmp/${D}
    cd /scratch/tmp/${D}
    mafSplit -verbose=2 /dev/null -byTarget -useHashedName=10 Contig \
    ../${D}.maf -outDirDepth=2
    mafsInRegion ../chrM.bed 0/0/chrM.maf ../${D}.maf
    rsync -a --progress ./ \
    /san/sanvol1/scratch/cavPor3/multiz7way/contigMaf/${D}
    cd /scratch/tmp
    rm -fr ${D} ${D}.maf
done
    
    #   create a run-time list of contigs to operate on, not all contigs
    #   exist in all alignments, but we want all contig names used in any
    #   alignment:
    ssh kkstore05 # cavPor3 -> store12 -> kkstore05
    cd /san/sanvol1/scratch/cavPor3/multiz7way/contigMaf
    for D in *
do
    cd "${D}"
    find . -type f
    cd ..
done | sort -u > /tmp/7-way.contig.list
    wc -l /tmp/7-way.contig.list
    mkdir /cluster/data/cavPor3/bed/multiz7way/splitRun
    cp -p /tmp/7-way.contig.list \
    /cluster/data/cavPor3/bed/multiz7way/splitRun
    #   296 /tmp/7-way.contig.list

    # ready for the multiz run
    ssh pk
    cd /cluster/data/cavPor3/bed/multiz7way/splitRun
    mkdir -p maf run
    cd run
    mkdir penn
    # use latest penn utilities
    P=/cluster/bin/penn/multiz.v11.2007-03-19/multiz-tba
    cp -p $P/{autoMZ,multiz,maf_project} penn

    #   set the db and pairs directories here
    cat > autoMultiz.csh << \_EOF_
#!/bin/csh -ef
set db = cavPor3
set subdir = $1
set c = $2
set result = $3
set resultDir = $result:h
set run = `pwd`
set tmp = /scratch/tmp/$db/multiz.$c
set pairs = /san/sanvol1/scratch/$db/multiz7way/contigMaf
rm -fr $tmp
mkdir -p $tmp
mkdir -p $resultDir
cp ../../tree.7.nh ../../species.list $tmp
pushd $tmp
foreach s (`grep -v $db species.list`)
    set in = $pairs/$s/$subdir/$c.maf
    set out = $db.$s.sing.maf
    if (-e $in.gz) then
        zcat $in.gz > $out
    else if (-e $in) then
        cp $in $out
    else
        echo "##maf version=1 scoring=autoMZ" > $out
    endif
end
set path = ($run/penn $path); rehash
$run/penn/autoMZ + T=$tmp E=$db "`cat tree.7.nh`" $db.*.sing.maf $c.maf
popd
cp $tmp/$c.maf $result
rm -fr $tmp
rmdir --ignore-fail-on-non-empty /scratch/tmp/$db
_EOF_
# << happy emacs
    chmod +x autoMultiz.csh

    cat  << \_EOF_ > template
#LOOP
./autoMultiz.csh $(dir1) $(root1) {check out line+ /cluster/data/cavPor3/bed/multiz7way/splitRun/maf/$(dir1)/$(root1).maf}
#ENDLOOP
_EOF_
    # << emacs

    sed -e "s/^\.\///" ../7-way.contig.list \
    | gensub2 stdin single template jobList

    para create jobList
    para try ... check ... 
    # Checking finished jobs
    # Completed: 10 of 296 jobs
    # CPU time in finished jobs:       4355s      72.59m     1.21h    0.05d  0.000 y
    # IO & Wait Time:                   104s       1.73m     0.03h    0.00d  0.000 y
    # Average job time:                 446s       7.43m     0.12h    0.01d
    # Longest running job:                0s       0.00m     0.00h    0.00d
    # Longest finished job:            1672s      27.87m     0.46h    0.02d
    # Submission to last job:          1681s      28.02m     0.47h    0.02d
    # Estimated complete:                 0s       0.00m     0.00h    0.00d
    # [pk:tdreszer run> pc
    # 296 jobs in batch
    # 92505 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # unsubmitted jobs: 286
    # ranOk: 10
    # total jobs in batch: 296
    para push ... check ... etc
    # Completed: 296 of 296 jobs
    # CPU time in finished jobs:      53659s     894.32m    14.91h    0.62d  0.002 y
    # IO & Wait Time:                  1675s      27.92m     0.47h    0.02d  0.000 y
    # Average job time:                 187s       3.12m     0.05h    0.00d
    # Longest running job:                0s       0.00m     0.00h    0.00d
    # Longest finished job:            1969s      32.82m     0.55h    0.02d
    # Submission to last job:          4023s      67.05m     1.12h    0.05d
    # Estimated complete:                 0s       0.00m     0.00h    0.00d
    # [pk:tdreszer run> pc
    # 296 jobs in batch
    # 84971 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # ranOk: 296
    # total jobs in batch: 296
    
    # put the split maf results back together into a single maf file
    #   eliminate duplicate comments
    ssh kkstore05
    cd /cluster/data/cavPor3/bed/multiz7way
    mkdir togetherMaf
    grep "^##maf version" splitRun/maf/0/0/Contig00700.maf \
    | sort -u > togetherMaf/cavPor3.7way.maf
    ##maf version=1 scoring=autoMZ.v1
    ##maf version=1 scoring=maf_project.v12
    ##maf version=1 scoring=multiz

    for F in `find ./splitRun/maf -type f -depth`
do
    grep -h "^#" "${F}" | egrep -v "maf version=1|eof maf" \
    | sed -e "s#/_MZ_[^ ]* # #g; s#__[0-9]##g"
done | sort -u >> togetherMaf/cavPor3.7way.maf
    for F in `find ./splitRun/maf -type f -depth`
do
    grep -v -h "^#" "${F}"
done >> togetherMaf/cavPor3.7way.maf

    grep "^##eof maf" splitRun/maf/0/0/Contig00700.maf \
    | sort -u >> togetherMaf/cavPor3.7way.maf

    # load tables for a look
    ssh hgwdev
    mkdir -p /gbdb/cavPor3/multiz7way/maf
    ln -s /cluster/data/cavPor3/bed/multiz7way/togetherMaf/*.maf \
                /gbdb/cavPor3/multiz7way/maf/multiz7way.maf
    # this generates an immense multiz7way.tab file in the directory
    #   where it is running.  Best to run this over in scratch.
    cd /scratch/tmp
    time nice -n +19 hgLoadMaf \
    -pathPrefix=/gbdb/cavPor3/multiz7way/maf cavPor3 multiz7way
    # Advisory lock created
    # Indexing and tabulating /gbdb/cavPor3/multiz7way/maf/multiz7way.maf
    # Loading multiz7way into database
    # Loaded 6939217 mafs in 1 files from /gbdb/cavPor3/multiz7way/maf
    # Advisory lock has been released
    # 
    # real    7m7.421s
    # user    1m50.625s
    # sys     0m34.191s

### ### ###################################################################
### ### #### Abandoning 7-Way because I am adding Squirrel 2008-05-27 #####
### ### ###################################################################


#########################################################################
## 8-Way Multiz (STARTED - 2008-05-27 - Tim)
## 8-Way Multiz (RESTARTED - 2008-07-02 - Tim Done: 7-11-2008)
##

    #   the all.chain.gz files were split up via kluster jobs on memk
    #   in order to get mafSynNet files.  Example above in ornAna1 blastz
    ssh hgwdev
    mkdir /cluster/data/cavPor3/bed/multiz8way
    cd /cluster/data/cavPor3/bed/multiz8way
    #   take the 30-way tree from mm9 and eliminate genomes not in
    #   this alignment
    #   rearrange to get cavPor3 on the top of the graph
    #   paste this tree into the on-line phyloGif tool:
    #   http://genome.ucsc.edu/cgi-bin/phyloGif
    #   to create the image for the tree diagram
    
    ### Note the ground squirrel (Spermophilus tridecemlineatus) is not in the mm9 30-way
    ### Add it a same distance from other rodents as rabbit

    #   select the 7 organisms from the 30-way recently done on mouse mm9
    /cluster/bin/phast/tree_doctor \
--prune-all-but Human_hg18,Mouse_mm9,Rat_rn4,GuineaPig_cavPor2,Rabbit_oryCun1,Opossum_monDom4,Chicken_galGal3 \
    /cluster/data/mm9/bed/multiz30way/mm9OnTop.fullNames.nh \
    | sed -e "s/cavPor2/cavPor3/g" > 7-way.fullNames.nh

    #   looks something like this:
    # ((((((Mouse_mm9:0.076274,Rat_rn4:0.084383):0.200607,GuineaPig_cavPor3:0.202990):0.034350,Rabbit_oryCun1:0.208548):0.014587
    # ,Human_hg18:0.126901):0.263313,Opossum_monDom4:0.320721):0.207444,Chicken_galGal3:0.488824);

    #   rearrange to get guineaPig at the top:
    # this leaves us with:
    cat << _EOF_ > cavPor3.7-way.nh
(((((GuineaPig_cavPor3:0.202990,(Mouse_mm9:0.076274,Rat_rn4:0.084383):0.200607):0.034350,Rabbit_oryCun1:0.208548):0.014587,Human_hg18:0.126901):0.263313,Opossum_monDom4:0.320721):0.207444,Chicken_galGal3:0.488824);
_EOF_
    #   << happy emacs
    
    # Add squirrel at same distance to guineaPig and Mouse as rabbits:
   # Alternative tree: ((((((GuineaPig_cavPor3:0.202990,(Mouse_mm9:0.076274,Rat_rn4:0.084383):0.200607):0.017175,Squirrel_speTri0:0.208548):0.017175,Rabbit_oryCun1:0.208548):0.014587,Human_hg18:0.126901):0.263313,Opossum_monDom4:0.320721):0.207444,Chicken_galGal3:0.488824);
    cat << _EOF_ > cavPor3.8-way.nh
(((((GuineaPig_cavPor3:0.202990,((Mouse_mm9:0.076274,Rat_rn4:0.084383):0.18602,Squirrel_speTri0:0.208548):0.014587):0.034350,Rabbit_oryCun1:0.208548):0.014587,Human_hg18:0.126901):0.263313,Opossum_monDom4:0.320721):0.207444,Chicken_galGal3:0.488824);
_EOF_
    #   << happy emacs
    
    cd /cluster/data/cavPor3/bed/multiz8way
    cp ../multiz7way/species.list .
    echo speTri0 >> species.list
    
    # verify all blastz's exists
    cat << \_EOF_ > listMafs.csh
#!/bin/csh -fe
cd /cluster/data/cavPor3/bed/multiz8way
foreach db (`grep -v cavPor3 species.list`)
    set bdir = /cluster/data/cavPor3/bed/blastz.$db
    if (-e $bdir/mafRBestNet/cavPor3.$db.rbest.maf.gz) then
    echo "$db mafRBestNet"
    else if (-e $bdir/mafSynNet/cavPor3.$db.syn.maf.gz) then
    echo "$db mafSynNet"
    else if (-e $bdir/mafNet/cavPor3.$db.net.maf.gz) then
    echo "$db mafNet"
    else
    echo "$db mafs not found"
    endif
end
_EOF_
    # << happy emacs
    chmod +x ./listMafs.csh
    #   see what it says:
    ./listMafs.csh
    # galGal3 mafNet
    # hg18 mafNet
    # mm9 mafNet
    # monDom4 mafNet
    # oryCun1 mafRBestNet
    # rn4 mafNet
    # speTri0 mafs not found
    
    ### After some back and forth on my own, Kate finally got the speTri0 Mafs together:
    # galGal3 mafNet
    # hg18 mafNet
    # mm9 mafNet
    # monDom4 mafNet
    # oryCun1 mafRBestNet
    # rn4 mafNet
    # speTri0 mafRBestNet

    /cluster/bin/phast/all_dists cavPor3.8-way.nh > 8way.distances.txt
    # GuineaPig_cavPor3       Mouse_mm9       0.479871
    # GuineaPig_cavPor3       Rat_rn4 0.487980
    # GuineaPig_cavPor3       Squirrel_speTri0        0.426125
    # GuineaPig_cavPor3       Rabbit_oryCun1  0.445888
    # GuineaPig_cavPor3       Human_hg18      0.378828
    # GuineaPig_cavPor3       Opossum_monDom4 0.835961
    # GuineaPig_cavPor3       Chicken_galGal3 1.211508
    # Mouse_mm9       Rat_rn4 0.160657
    # Mouse_mm9       Squirrel_speTri0        0.470842
    # Mouse_mm9       Rabbit_oryCun1  0.519779
    # Mouse_mm9       Human_hg18      0.452719
    # Mouse_mm9       Opossum_monDom4 0.909852
    # Mouse_mm9       Chicken_galGal3 1.285399
    # Rat_rn4 Squirrel_speTri0        0.478951
    # Rat_rn4 Rabbit_oryCun1  0.527888
    # Rat_rn4 Human_hg18      0.460828
    # Rat_rn4 Opossum_monDom4 0.917961
    # Rat_rn4 Chicken_galGal3 1.293508
    # Squirrel_speTri0        Rabbit_oryCun1  0.466033
    # Squirrel_speTri0        Human_hg18      0.398973
    # Squirrel_speTri0        Opossum_monDom4 0.856106
    # Squirrel_speTri0        Chicken_galGal3 1.231653
    # Rabbit_oryCun1  Human_hg18      0.350036
    # Rabbit_oryCun1  Opossum_monDom4 0.807169
    # Rabbit_oryCun1  Chicken_galGal3 1.182716
    # Human_hg18      Opossum_monDom4 0.710935
    # Human_hg18      Chicken_galGal3 1.086482
    # Opossum_monDom4 Chicken_galGal3 1.016989
    # (total) -       2.437490

    grep -i cavPor 8way.distances.txt | sort -k3,3n
    # GuineaPig_cavPor3       Human_hg18      0.378828
    # GuineaPig_cavPor3       Squirrel_speTri0        0.426125
    # GuineaPig_cavPor3       Rabbit_oryCun1  0.445888
    # GuineaPig_cavPor3       Mouse_mm9       0.479871
    # GuineaPig_cavPor3       Rat_rn4 0.487980
    # GuineaPig_cavPor3       Opossum_monDom4 0.835961
    # GuineaPig_cavPor3       Chicken_galGal3 1.211508
    
    # Note that guinneaPig is closer to human than to other rodents.  
    # This may be reasonable, since the speed of evolution (length of tree limbs) 
    # in rodents is greater than for primates.  For instance mm9 is closer to hg18
    # than it is to either cavPor2 or oryCun1
    
    ## ??? time nice -n +19 featureBits cavPor3 axtChain/cavPor3.speTri0.rbest.chain.psl >& fb.cavPor3.speTri0.rbest.chain.psl.txt &


    #   use the calculated
    #   distances in the table below to order the organisms and check
    #   the button order on the browser.
    #   And if you can fill in the table below entirely, you have
    #   succeeded in finishing all the alignments required.
    #
    #                         featureBits chainLink measures
    #                                    chainCavPor3Link   chain   linearGap
    # distance                     on cavPor3    on other   minScore
    # 0.479871 Mouse_mm9           (28.900%)     (29.330%)  3000     medium
    # 0.487980 Rat_rn4             (27.858%)     (27.602%)  3000     medium
    # 0.479871 Squirrel_speTri0                             3000     medium
    # 0.445888 Rabbit_oryCun1      (35.145%)     (28.238%)  3000     medium
    # 0.378828 Human_hg18          (43.971%)     (48.132%)  3000     medium
    # 0.835961 Opossum_monDom4      (9.540%)     (14.372%)  5000     loose
    # 1.211508 Chicken_galGal3     (10.190%)      (5.437%)  5000     loose
    ### ### ### ### Be sure to use calJac as an example because of scaffolds
    ### ### ### ### will require making maf files by scaffold name

    #   create a coherent set of all the mafs involved in this run
    mkdir mafLinks
    cd mafLinks
    ln -s ../../blastz.hg18/mafNet ./hg18
    ln -s ../../blastz.mm9/mafNet ./mm9
    ln -s ../../blastz.rn4/mafNet ./rn4
    ln -s ../../blastz.monDom4/mafNet ./monDom4
    ln -s ../../blastz.galGal3/mafNet ./galGal3
    ln -s ../../blastz.oryCun1/mafRBestNet ./oryCun1
    ln -s ../../blastz.speTri0/mafRBestNet ./speTri0

    #   check data size:
    du -hscL *
    # 100M    galGal3
    # 930M    hg18
    # 577M    mm9
    # 268M    monDom4
    # 495M    oryCun1
    # 543M    rn4
    # 680M    speTri0
    # 3.6G    total

    #   need to split these things up by Contig number for efficient kluster run
    ssh kkstore05
    mkdir -p /san/sanvol1/scratch/cavPor3/multiz8way/contigMaf
    cd /iscratch/tmp
    #   the 16201 is from petMar/chrom.sizes
    echo "chrM 0 16801" > chrM.bed

    for D in `grep -v cavPor3 /cluster/data/cavPor3/bed/multiz8way/species.list`
do
    echo ${D}
    zcat \
/cluster/data/cavPor3/bed/multiz8way/mafLinks/${D}/cavPor3.${D}.*.maf.gz \
    > ${D}.maf
    mkdir /iscratch/tmp/${D}
    cd /iscratch/tmp/${D}
    mafSplit -verbose=2 /dev/null -byTarget -useHashedName=10 Contig \
    ../${D}.maf -outDirDepth=2
    mafsInRegion ../chrM.bed 0/0/chrM.maf ../${D}.maf
    rsync -a --progress ./ \
    /san/sanvol1/scratch/cavPor3/multiz8way/contigMaf/${D}
    cd /iscratch/tmp
    rm -fr ${D} ${D}.maf
done

    #   create a run-time list of contigs to operate on, not all contigs
    #   exist in all alignments, but we want all contig names used in any
    #   alignment:
    ssh kkstore05 # cavPor3 -> store12 -> kkstore05
    cd /san/sanvol1/scratch/cavPor3/multiz8way/contigMaf
    for D in *
do
    cd "${D}"
    find . -type f
    cd ..
done | sort -u > /tmp/8-way.contig.list
    wc -l /tmp/8-way.contig.list
    mkdir /cluster/data/cavPor3/bed/multiz8way/splitRun
    cp -p /tmp/8-way.contig.list \
    /cluster/data/cavPor3/bed/multiz8way/splitRun
    # wc -l 8-way.contig.list 
    # 296 8-way.contig.list


    # ready for the multiz run
    ssh pk
    cd /cluster/data/cavPor3/bed/multiz8way/splitRun
    mkdir -p maf run
    cd run
    mkdir penn
    # use latest penn utilities
    P=/cluster/bin/penn/multiz.v11.2007-03-19/multiz-tba
    cp -p $P/{autoMZ,multiz,maf_project} penn

    #   set the db and pairs directories here
    cat > autoMultiz.csh << \_EOF_
#!/bin/csh -ef
set db = cavPor3
set subdir = $1
set c = $2
set result = $3
set resultDir = $result:h
set run = `pwd`
set tmp = /scratch/tmp/$db/multiz.$c
set pairs = /san/sanvol1/scratch/$db/multiz8way/contigMaf
rm -fr $tmp
mkdir -p $tmp
mkdir -p $resultDir
cp ../../tree.8.nh ../../species.list $tmp
pushd $tmp
foreach s (`grep -v $db species.list`)
    set in = $pairs/$s/$subdir/$c.maf
    set out = $db.$s.sing.maf
    if (-e $in.gz) then
        zcat $in.gz > $out
    else if (-e $in) then
        cp $in $out
    else
        echo "##maf version=1 scoring=autoMZ" > $out
    endif
end
set path = ($run/penn $path); rehash
$run/penn/autoMZ + T=$tmp E=$db "`cat tree.8.nh`" $db.*.sing.maf $c.maf
popd
cp $tmp/$c.maf $result
rm -fr $tmp
rmdir --ignore-fail-on-non-empty /scratch/tmp/$db
_EOF_
# << happy emacs
    chmod +x autoMultiz.csh

    cat  << \_EOF_ > template
#LOOP
./autoMultiz.csh $(dir1) $(root1) {check out line+ /cluster/data/cavPor3/bed/multiz8way/splitRun/maf/$(dir1)/$(root1).maf}
#ENDLOOP
_EOF_
    # << emacs

    sed -e "s/^\.\///" ../8-way.contig.list \
    | gensub2 stdin single template jobList

    para create jobList
    para try ... check ... 
    
    ## first 20 failed because of tree.8.nh:
    
    ### Hand edited tree.8.nh from cavPor3.8-way.nh to:
    # (((((cavPor3 ((mm9 rn4) speTri0)) oryCun1) hg18) monDom4) galGal3)
    
    # Next 10 succeeded
    # 296 jobs in batch
    # 15118 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # Completed: 10 of 296 jobs
    # Crashed: 20 jobs
    # CPU time in finished jobs:       5742s      95.69m     1.59h    0.07d  0.000 y
    # IO & Wait Time:                    76s       1.27m     0.02h    0.00d  0.000 y
    # Average job time:                 582s       9.70m     0.16h    0.01d
    # Longest running job:                0s       0.00m     0.00h    0.00d
    # Longest finished job:            1482s      24.70m     0.41h    0.02d
    # Submission to last job:          2712s      45.20m     0.75h    0.03d
    # Estimated complete:                 0s       0.00m     0.00h    0.00d
    
    para push; para check
    # 296 jobs in batch
    # 13908 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # running: 25
    # ranOk: 271
    # total jobs in batch: 296
    # [pk:tdreszer run> pc
    # 296 jobs in batch
    # 10574 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # .
    # ranOk: 296
    # total jobs in batch: 296
    para -eta time
    # 296 jobs in batch
    # 10542 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # Completed: 296 of 296 jobs
    # CPU time in finished jobs:      81235s    1353.92m    22.57h    0.94d  0.003 y
    # IO & Wait Time:                 11960s     199.33m     3.32h    0.14d  0.000 y
    # Average job time:                 315s       5.25m     0.09h    0.00d
    # Longest running job:                0s       0.00m     0.00h    0.00d
    # Longest finished job:            3262s      54.37m     0.91h    0.04d
    # Submission to last job:         59090s     984.83m    16.41h    0.68d
    # Estimated complete:                 0s       0.00m     0.00h    0.00d

    # put the split maf results back together into a single maf file
    #   eliminate duplicate comments
    ssh kkstore05
    cd /cluster/data/cavPor3/bed/multiz8way
    mkdir togetherMaf
    grep "^##maf version" splitRun/maf/0/0/Contig00700.maf \
    | sort -u > togetherMaf/cavPor3.8way.maf
    ##maf version=1 scoring=autoMZ.v1
    ##maf version=1 scoring=maf_project.v12
    ##maf version=1 scoring=multiz

    for F in `find ./splitRun/maf -type f -depth`
do
    grep -h "^#" "${F}" | egrep -v "maf version=1|eof maf" \
    | sed -e "s#/_MZ_[^ ]* # #g; s#__[0-9]##g"
done | sort -u >> togetherMaf/cavPor3.8way.maf
    for F in `find ./splitRun/maf -type f -depth`
do
    grep -v -h "^#" "${F}"
done >> togetherMaf/cavPor3.8way.maf

    grep "^##eof maf" splitRun/maf/0/0/Contig00700.maf \
    | sort -u >> togetherMaf/cavPor3.8way.maf

    # load tables for a look
    ssh hgwdev
    mkdir -p /gbdb/cavPor3/multiz8way/maf
    ln -s /cluster/data/cavPor3/bed/multiz8way/togetherMaf/*.maf \
                /gbdb/cavPor3/multiz8way/maf/multiz8way.maf
    # this generates an immense multiz8way.tab file in the directory
    #   where it is running.  Best to run this over in scratch.
    cd /scratch/tmp
    time nice -n +19 hgLoadMaf \
    -pathPrefix=/gbdb/cavPor3/multiz8way/maf cavPor3 multiz8way &
    # Advisory lock created
    # Indexing and tabulating /gbdb/cavPor3/multiz8way/maf/multiz8way.maf
    # Loading multiz8way into database
    # Loaded 8976211 mafs in 1 files from /gbdb/cavPor3/multiz8way/maf
    # Advisory lock has been released
    # real    4m26.559s
    # user    2m35.837s
    # sys     0m47.660s
    
    # load summary table
    time nice -n +19 cat /gbdb/cavPor3/multiz8way/maf/*.maf \
	| hgLoadMafSummary cavPor3 -minSize=30000 -mergeGap=1500 \
	 -maxSize=200000  multiz8waySummary stdin
    # Indexing and tabulating stdin
    # Created 1650940 summary blocks from 26384188 components and 8747391 mafs from stdin
    # Loading into cavPor3 table multiz8waySummary...
    # Loading completeAdvisory lock has been released
    # real    5m31.133s
    # user    4m55.729s
    # sys     0m21.743s
    
    # Gap Annotation
    # prepare bed files with gap info
    ssh kkstore05
    mkdir /cluster/data/cavPor3/bed/multiz8way/anno
    cd /cluster/data/cavPor3/bed/multiz8way/anno
    mkdir maf run

    #	these actually already all exist from previous multiple alignments
    #	remove the echo in front of the twoBitInfo to actually make it work
    for DB in `cat ../species.list`
do
    CDIR="/cluster/data/${DB}"
    if [ ! -f ${CDIR}/${DB}.N.bed ]; then
	echo "creating ${DB}.N.bed"
	echo twoBitInfo -nBed ${CDIR}/${DB}.2bit ${CDIR}/${DB}.N.bed
    else
	ls -og ${CDIR}/${DB}.N.bed
    fi
done
    # creating cavPor3.N.bed
    # twoBitInfo -nBed /cluster/data/cavPor3/cavPor3.2bit /cluster/data/cavPor3/cavPor3.N.bed
    # -rw-rw-r--  1 2164385 Jul 18  2006 /cluster/data/galGal3/galGal3.N.bed
    # -rw-rw-r--  1 232970 Feb  6  2006 /cluster/data/hg18/hg18.N.bed
    # -rw-rw-r--  1 27838 Oct 15  2007 /cluster/data/mm9/mm9.N.bed
    # -rw-rw-r--  1 1788138 Feb 28  2006 /cluster/data/monDom4/monDom4.N.bed
    # -rw-rw-r--  1 13782261 Nov 13  2005 /cluster/data/oryCun1/oryCun1.N.bed
    # -rw-rw-r--  1 20910683 Feb 28  2006 /cluster/data/rn4/rn4.N.bed
    # creating speTri0.N.bed
    # twoBitInfo -nBed /cluster/data/speTri0/speTri0.2bit /cluster/data/speTri0/speTri0.N.bed

    cd run
    rm -f nBeds sizes
    for DB in `grep -v cavPor3 ../../species.list`
do
    echo "${DB} "
    ln -s  /cluster/data/${DB}/${DB}.N.bed ${DB}.bed
    echo ${DB}.bed  >> nBeds
    ln -s  /cluster/data/${DB}/chrom.sizes ${DB}.len
    echo ${DB}.len  >> sizes
done
    # galGal3 
    # hg18 
    # mm9 
    # monDom4 
    # oryCun1 
    # rn4 
    # speTri0 

    ssh memk
    #	temporarily copy the cavPor3.8way.maf file onto the memk
    #	nodes /scratch/data/cavPor3/maf/ directory
    for R in 0 1 2 3 4 5 6 7
do
    ssh mkr0u${R} rsync -a --progress \
/cluster/data/cavPor3/bed/multiz8way/togetherMaf/cavPor3.8way.maf \
	/scratch/data/cavPor3/maf/
done
    
    mkdir /cluster/data/cavPor3/bed/multiz8way/anno/splitMaf
    #	need to split up the single maf file into individual
    #	per-scaffold maf files to run annotation on
    cd /cluster/data/cavPor3/bed/multiz8way/anno/splitMaf
    #	create bed files to list approximately 394 scaffolds in
    #	a single list, approximately 8 lists
    cat << \_EOF_ > mkBedLists.pl
#!/usr/bin/env perl

use strict;
use warnings;

my $bedCount = 0;
my $i = 0;

my $bedFile = sprintf("file_%d.bed", $bedCount);

open (BF,">$bedFile") or die "can not write to $bedFile $!";

open (FH,"</cluster/data/cavPor3/chrom.sizes") or
        die "can not read /cluster/data/cavPor3/chrom.sizes $!";
while (my $line = <FH>) {
    chomp $line;
    if ( (($i + 1) % 394) == 0 )  {
        printf "%s\n", $line;
        close (BF);
        ++$bedCount;
        $bedFile = sprintf("file_%d.bed", $bedCount);
        open (BF,">$bedFile") or die "can not write to $bedFile $!";
    }
    ++$i;
    my ($chr, $size) = split('\s+',$line);
    printf BF "%s\t0\t%d\t%s\n", $chr, $size, $chr;
}
close (FH);
# close (BH);
_EOF_
    # << happy emacs
    chmod +x mkBedLists.pl
    ./mkBedLists.pl
    # -rw-rw-r--  1 tdreszer protein 13805 Jul 10 16:23 file_0.bed
    # -rw-rw-r--  1 tdreszer protein 13602 Jul 10 16:23 file_1.bed
    # -rw-rw-r--  1 tdreszer protein 13756 Jul 10 16:23 file_2.bed
    # -rw-rw-r--  1 tdreszer protein 14166 Jul 10 16:23 file_3.bed
    # -rw-rw-r--  1 tdreszer protein 14090 Jul 10 16:23 file_4.bed
    # -rw-rw-r--  1 tdreszer protein 13790 Jul 10 16:23 file_5.bed
    # -rw-rw-r--  1 tdreszer protein 13790 Jul 10 16:23 file_6.bed
    # -rw-rw-r--  1 tdreszer protein 13545 Jul 10 16:23 file_7.bed

    #	now, run a mafsInRegion on each one of those lists
    cat << \_EOF_ > runOne
#!/bin/csh -fe
set runDir = "/cluster/data/cavPor3/bed/multiz8way/anno/splitMaf"
set resultDir = $1
set bedFile = $resultDir.bed
mkdir -p $resultDir
mkdir -p /scratch/tmp/cavPor3/$resultDir
pushd /scratch/tmp/cavPor3/$resultDir
mafsInRegion $runDir/$bedFile -outDir . \
        /scratch/data/cavPor3/maf/cavPor3.8way.maf
popd
rsync -q -a /scratch/tmp/cavPor3/$resultDir/ ./$resultDir/
rm -fr /scratch/tmp/cavPor3/$resultDir
rmdir --ignore-fail-on-non-empty /scratch/tmp/cavPor3
_EOF_
    # << happy emacs
    chmod +x runOne

    cat << \_EOF_ > template
#LOOP
./runOne $(root1)
#ENDLOOP
_EOF_
    # << happy emacs

    ls file*.bed > runList
    gensub2 runList single template jobList
    para create jobList
    para try ... check ... push ... etc
    # 8 jobs in batch
    # 0 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # Completed: 8 of 8 jobs
    # CPU time in finished jobs:       1810s      30.16m     0.50h    0.02d  0.000 y
    # IO & Wait Time:                  1218s      20.30m     0.34h    0.01d  0.000 y
    # Average job time:                 379s       6.31m     0.11h    0.00d
    # Longest running job:                0s       0.00m     0.00h    0.00d
    # Longest finished job:             614s      10.23m     0.17h    0.01d
    # Submission to last job:           614s      10.23m     0.17h    0.01d
    # Estimated complete:                 0s       0.00m     0.00h    0.00d

    cd /cluster/data/cavPor3/bed/multiz8way/anno/run

    cat << \_EOF_ > doAnno.csh
#!/bin/csh -ef
set outDir = ../maf/$2
set result = $3
set input = $1
mkdir -p $outDir
cat $input | \
nice mafAddIRows -nBeds=nBeds stdin /scratch/data/cavPor3/cavPor3.2bit $result
_EOF_
    # << happy emacs
    chmod +x doAnno.csh

    cat << \_EOF_ > template
#LOOP
./doAnno.csh $(path1) $(lastDir1) {check out line+ ../maf/$(lastDir1)/$(root1).maf}
#ENDLOOP
_EOF_
    # << happy emacs

    find ../splitMaf -type f -name "*.maf" > maf.list
    gensub2 maf.list single template jobList
    para create jobList
    para try ... check ... push ... etc.
    1849 jobs in batch
    # 0 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # unsubmitted jobs: 1839
    # crashed: 10
    # total jobs in batch: 1849
    # Couldn't open speTri0.bed , No such file or directory
    # lrwxrwxrwx  1 tdreszer protein     35 Jul 10 14:13 speTri0.bed -> /cluster/data/speTri0/speTri0.N.bed
    ### /cluster/data/speTri0/speTri0.N.bed Does not exist...
    twoBitInfo -nBed /cluster/data/speTri0/speTri0.2bit /cluster/data/speTri0/speTri0.N.bed &
    para try; para push...
    # 1849 jobs in batch
    # 0 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # Completed: 1849 of 1849 jobs
    # CPU time in finished jobs:       6313s     105.22m     1.75h    0.07d  0.000 y
    # IO & Wait Time:                  5708s      95.13m     1.59h    0.07d  0.000 y
    # Average job time:                   7s       0.11m     0.00h    0.00d
    # Longest running job:                0s       0.00m     0.00h    0.00d
    # Longest finished job:             171s       2.85m     0.05h    0.00d
    # Submission to last job:           651s      10.85m     0.18h    0.01d
    # Estimated complete:                 0s       0.00m     0.00h    0.00d
    
    #	put the results back together into a single file
    ssh kkstore05
    cd /cluster/data/cavPor3/bed/multiz8way/anno
    grep "^##maf version" maf/file_0//scaffold_0.maf \
	| sort -u > cavPor3.anno.8way.maf
    ##maf version=1 scoring=autoMZ.v1   
    find ./maf -type f -depth -name "*.maf" | while read F
do
    grep -v -h "^#" "${F}"
done >> cavPor3.anno.8way.maf
    echo "##eof maf" >> cavPor3.anno.8way.maf
    # -rw-rw-r--  1 tdreszer protein 15080110322 Jul 11 14:14 cavPor3.anno.8way.maf

    ssh hgwdev
    cd /cluster/data/cavPor3/bed/multiz8way/anno
    mkdir -p /gbdb/cavPor3/multiz8way/anno
    ln -s `pwd`/cavPor3.anno.8way.maf \
                /gbdb/cavPor3/multiz8way/anno/multiz8way.maf
    #	by loading this into the table multiz8way, it will replace the
    #	previously loaded table with the unannotated mafs
    #	huge temp files are made, do them on local disk
    cd /scratch/tmp
    time nice -n +19 hgLoadMaf -pathPrefix=/gbdb/cavPor3/multiz8way/anno \
                cavPor3 multiz8way
    # Loaded 10486949 mafs in 1 files from /gbdb/cavPor3/multiz8way/anno
    # Advisory lock has been released
    # real    6m19.376s

    #	normally filter this for chrom size > 1,000,000 and only load
    #	those chroms.  But this is a scaffold assembly, load everything:
    time nice -n +19 hgLoadMafSummary cavPor3 -minSize=30000 -mergeGap=1500 \
	-maxSize=200000  multiz8waySummary \
	    /gbdb/cavPor3/multiz8way/anno/multiz8way.maf
    # Created 1650940 summary blocks from 26384188 components
    # and 10209337 mafs from /gbdb/cavPor3/multiz8way/anno/multiz8way.maf
    # real    6m29.261s

    #	by loading this into the table multiz8waySummary, it will replace
    #	the previously loaded table with the unannotated mafs
    #	remove the multiz8way*.tab files in this /scratch/tmp directory
    rm multiz8way*.tab
    #	And, you can remove the previously loaded non-annotated maf file link:
    rm /gbdb/cavPor3/multiz8way/maf/multiz8way.maf
    rmdir /gbdb/cavPor3/multiz8way/maf

# ### >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> got this far 
# ### >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> got this far 
# ### >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> got this far 
# ### >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> got this far 
# ### >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> got this far
# ### >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> got this far

##################
# Effort to move to hive
##################
 
ssh hgwdev
mv /hive/archive/store12/cavPor3 /hive/data/genomes/cavPor3
ln -sf /hive/data/genomes/cavPor3 /cluster/data/cavPor3
find /gbdb/cavPor3 /usr/local/apache/htdocs/goldenPath/cavPor3 -type l -ls | grep /cluster/store


################## EVERYTHING THAT FOLLOWS IS TEMPLATE FOR WHAT NEEDS TO BE DONE ######################
################## EVERYTHING THAT FOLLOWS IS TEMPLATE FOR WHAT NEEDS TO BE DONE ######################
################## EVERYTHING THAT FOLLOWS IS TEMPLATE FOR WHAT NEEDS TO BE DONE ######################
################## EVERYTHING THAT FOLLOWS IS TEMPLATE FOR WHAT NEEDS TO BE DONE ######################


###########################################################################
## Annotate 5-way multiple alignment with gene annotations
##		(START - 2008-07-14 - Tim)
    # Gene frames
    ## given previous survey done for 9-way alignment on Marmoset
    ## and the appearance of new ensGene tables on everything
    #	use knownGene for hg18, mm9
    #	use ensGene for canFam2, ornAna1
    #	and refGene for cavPor3

    ssh hgwdev
    mkdir /cluster/data/cavPor3/bed/multiz8way/frames
    cd /cluster/data/cavPor3/bed/multiz8way/frames
    mkdir genes
    # knownGene
    for DB in hg18 mm9 rn4
do
    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from knownGene" ${DB} \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/${DB}.tmp.gz
    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
    echo "${DB} done"
done
    # ensGene
    for DB in oryCun1 ornAna1
do
    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from ensGene" ${DB} \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/${DB}.tmp.gz
    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
    echo "${DB} done"
done
    # refGene
    for DB in cavPor3
do
    hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from refGene" ${DB} \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/${DB}.tmp.gz
    mv /scratch/tmp/${DB}.tmp.gz genes/$DB.gp.gz
    echo "${DB} done"
done

    ls -og genes
# -rw-rw-r--  1  861210 Mar 18 15:40 cavPor3.gp.gz
# -rw-rw-r--  1 1865308 Mar 18 15:40 canFam2.gp.gz
# -rw-rw-r--  1 2008806 Mar 18 15:39 hg18.gp.gz
# -rw-rw-r--  1 1965274 Mar 18 15:39 mm9.gp.gz
# -rw-rw-r--  1 1347532 Mar 18 15:40 ornAna1.gp.gz

    ssh kkstore06
    cd /cluster/data/cavPor3/bed/multiz5way/frames
    #	anything to annotate is in a pair, e.g.: cavPor3 genes/cavPor3.gp.gz
    time (cat  ../anno/cavPor3.anno.5way.maf | nice -n +19 genePredToMafFrames cavPor3 stdin stdout cavPor3 genes/cavPor3.gp.gz hg18 genes/hg18.gp.gz mm9 genes/mm9.gp.gz canFam2 genes/canFam2.gp.gz ornAna1 genes/ornAna1.gp.gz | gzip > multiz5way.mafFrames.gz) > frames.log 2>&1
    # see what it looks like in terms of number of annotations per DB:
    zcat multiz5way.mafFrames.gz | cut -f4 | sort | uniq -c | sort -n
#  77526 cavPor3
# 110768 ornAna1
# 221524 mm9
# 230010 hg18
# 243396 canFam2

    #	load the resulting file
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/multiz5way/frames
    time nice -n +19 hgLoadMafFrames cavPor3 multiz5wayFrames \
	multiz5way.mafFrames.gz
    #	real    0m21.968s

    #	enable the trackDb entries:
# frames multiz5wayFrames
# irows on
#############################################################################
# phastCons 5-way (DONE - 2008-03-19 - Hiram)

    # split 5way mafs into 10M chunks and generate sufficient statistics 
    # files for # phastCons
    ssh memk
    mkdir /cluster/data/cavPor3/bed/multiz5way/msa.split
    cd /cluster/data/cavPor3/bed/multiz5way/msa.split
    mkdir -p /san/sanvol1/scratch/cavPor3/multiz5way/cons/ss

    cat << '_EOF_' > doSplit.csh
#!/bin/csh -ef
set MAFS = /cluster/data/cavPor3/bed/multiz5way/anno/maf
set WINDOWS = /san/sanvol1/scratch/cavPor3/multiz5way/cons/ss
pushd $WINDOWS
set resultDir = $1
set c = $2
rm -fr $resultDir/$c
mkdir -p $resultDir
twoBitToFa -seq=$c /scratch/data/cavPor3/cavPor3.2bit /scratch/tmp/cavPor3.$c.fa
# need to truncate odd-ball scaffold/chrom names that include dots
# as phastCons utils can't handle them
set TMP = /scratch/tmp/$c.clean.maf.$$
perl -wpe 's/^s ([^.]+\.[^. ]+)\.\S+/s $1/' $MAFS/$resultDir/$c.maf > $TMP
/cluster/bin/phast/$MACHTYPE/msa_split $TMP -i MAF \
    -M /scratch/tmp/cavPor3.$c.fa \
    -o SS -r $resultDir/$c -w 10000000,0 -I 1000 -B 5000
rm -f /scratch/tmp/cavPor3.$c.fa
rm -f $TMP
popd
mkdir -p $resultDir
date > $resultDir/$c.out
'_EOF_'
    # << happy emacs
    chmod +x doSplit.csh

    cat << '_EOF_' > template
#LOOP
doSplit.csh $(dir1) $(root1) {check out line+ $(dir1)/$(root1).out}
#ENDLOOP
'_EOF_'
    # << happy emacs

    #	create list of maf files:
    (cd ../anno/maf; find . -type f) | sed -e "s#^./##" > maf.list

    gensub2 maf.list single template jobList
    para create jobList
    para try ... check ... etc
# Completed: 2320 of 2320 jobs
# CPU time in finished jobs:       1710s      28.50m     0.47h    0.02d  0.000 y
# IO & Wait Time:                  6951s     115.85m     1.93h    0.08d  0.000 y
# Average job time:                   4s       0.06m     0.00h    0.00d
# Longest finished job:             128s       2.13m     0.04h    0.00d
# Submission to last job:          1048s      17.47m     0.29h    0.01d

    # take the cons and noncons trees from the mouse 30-way

    #	Estimates are not easy to make, probably more correctly,
    #	take the 30-way .mod file, and re-use it here.
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/multiz5way
    cp -p /cluster/data/mm9/bed/multiz30way/mm9.30way.mod .

    # Run phastCons
    #	This job is I/O intensive in its output files, thus it is all
    #	working over in /scratch/tmp/
    ssh memk
    mkdir -p /cluster/data/cavPor3/bed/multiz5way/cons/run.cons
    cd /cluster/data/cavPor3/bed/multiz5way/cons/run.cons

    #	there are going to be several different phastCons runs using
    #	this same script.  They trigger off of the current working directory
    #	$cwd:t which is the "grp" in this script.  It is one of:
    #	all gliers placentals
    #	Well, that's what it was when used in the Mm9 30-way,
    #	in this instance, there is only the directory "all"

    cat << '_EOF_' > doPhast.csh
#!/bin/csh -fe
set PHASTBIN = /cluster/bin/phast.build/cornellCVS/phast/bin
set subDir = $1
set f = $2
set c = $2:r
set len = $3
set cov = $4
set rho = $5
set grp = $cwd:t
set tmp = /scratch/tmp/$f
set cons = /cluster/data/cavPor3/bed/multiz5way/cons
mkdir -p $tmp
set san = /san/sanvol1/scratch/cavPor3/multiz5way/cons
if (-s $cons/$grp/$grp.non-inf) then
  cp -p $cons/$grp/$grp.mod $cons/$grp/$grp.non-inf $tmp
  cp -p $san/ss/$subDir/$f.ss $cons/$grp/$grp.mod $cons/$grp/$grp.non-inf $tmp
else
  cp -p $cons/$grp/$grp.mod $tmp
  cp -p $san/ss/$subDir/$f.ss $cons/$grp/$grp.mod $tmp
endif
pushd $tmp > /dev/null
if (-s $grp.non-inf) then
  $PHASTBIN/phastCons $f.ss $grp.mod \
    --rho $rho --expected-length $len --target-coverage $cov --quiet \
    --not-informative `cat $grp.non-inf` \
    --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
else
  $PHASTBIN/phastCons $f.ss $grp.mod \
    --rho $rho --expected-length $len --target-coverage $cov --quiet \
    --seqname $c --idpref $c --most-conserved $f.bed --score > $f.pp
endif
popd > /dev/null
mkdir -p $san/$grp/pp/$subDir $san/$grp/bed/$subDir
sleep 4
touch $san/$grp/pp/$subDir $san/$grp/bed/$subDir
rm -f $san/$grp/pp/$subDir/$f.pp
rm -f $san/$grp/bed/$subDir/$f.bed
mv $tmp/$f.pp $san/$grp/pp/$subDir
mv $tmp/$f.bed $san/$grp/bed/$subDir
rm -fr $tmp
'_EOF_'
    # << happy emacs
    chmod a+x doPhast.csh

    # Create parasol batch and run it
    pushd /san/sanvol1/scratch/cavPor3/multiz5way/cons
    find ./ss -type f -name "*.ss" | sed -e "s#^./##; s/.ss$//" \
	> /cluster/data/cavPor3/bed/multiz5way/cons/ss.list
    popd

    # run for all species
    cd ..
    mkdir -p all run.cons/all
    cd all
    /cluster/bin/phast.cz/tree_doctor ../../mm9.30way.mod \
--prune-all-but=bosTau3,hg18,mm9,canFam2,ornAna1 \
	| sed -e "s/bosTau3/cavPor3/" > all.mod
    cd ../run.cons/all

    #	root1 == chrom name, file1 == ss file name without .ss suffix
    # Create template file for "all" run
    cat << '_EOF_' > template
#LOOP
../doPhast.csh $(lastDir1) $(file1) 45 .3 .31 {check out line+ /san/sanvol1/scratch/cavPor3/multiz5way/cons/all/pp/$(lastDir1)/$(file1).pp}
#ENDLOOP
'_EOF_'
    # << happy emacs
    gensub2 ../../ss.list single template jobList
    para create jobList
    para try ... check ... push ... etc.
# Completed: 2569 of 2569 jobs
# CPU time in finished jobs:       8636s     143.93m     2.40h    0.10d  0.000 y
# IO & Wait Time:                 17371s     289.52m     4.83h    0.20d  0.001 y
# Average job time:                  10s       0.17m     0.00h    0.00d
# Longest finished job:              44s       0.73m     0.01h    0.00d
# Submission to last job:          1008s      16.80m     0.28h    0.01d

    # create Most Conserved track
    ssh kolossus
    cd /san/sanvol1/scratch/cavPor3/multiz5way/cons/all
    find ./bed -type f -name "chr*.bed" | xargs cat \
	| sort -k1,1 -k2,2n | \
        awk '{printf "%s\t%d\t%d\tlod=%d\t%s\n", $1, $2, $3, $5, $5;}' | \
            /cluster/bin/scripts/lodToBedScore /dev/stdin > mostConserved.bed
    #	~ 3 minutes
    cp -p mostConserved.bed /cluster/data/cavPor3/bed/multiz5way/cons/all

    # load into database
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/multiz5way/cons/all
    time nice -n +19 hgLoadBed cavPor3 phastConsElements5way mostConserved.bed
    #	Loaded 1005876 elements of size 5

    # Try for 5% overall cov, and 70% CDS cov 
    #	We don't have any gene tracks to compare CDS coverage
    #	--rho .31 --expected-length 45 --target-coverage .3
    featureBits cavPor3 phastConsElements5way
    #	132010504 bases of 2731830700 (4.832%) in intersection

    # Create merged posterier probability file and wiggle track data files
    # currently doesn't matter where this is performed, the san is the same
    # network distance from all machines.
    # sort by chromName, chromStart so that items are in numerical order 
    #  for wigEncode
    cd /san/sanvol1/scratch/cavPor3/multiz5way/cons/all
    mkdir -p phastCons5wayScores

for D in `ls -1d pp/file* | sort -t_ -k2n`
do
    TOP=`pwd`
    F=${D/pp\/}
    out=${TOP}/phastCons5wayScores/${F}.data.gz
    echo "${D} > ${F}.data.gz"
    cd ${D}
    find . -name "*.pp" -type f \
	| sed -e "s#^./##; s/chrUn.004./chrUn_004_/; s/-/.-./" \
	| sort -t '.' -k1,1 -k3.3n \
	| sed -e "s/.-./-/; s/chrUn_004_/chrUn.004./" | xargs cat \
	| gzip > ${out}
    cd "${TOP}"
done

    #	copy those files to the downloads area:
# /cluster/data/cavPor3/bed/multiz5way/downloads/phastCons5way/phastConsScores
    #	for hgdownload downloads

    # Create merged posterier probability file and wiggle track data files
    # currently doesn't matter where this is performed, the san is the same
    # network distance from all machines.
    cd /san/sanvol1/scratch/cavPor3/multiz5way/cons/all
    ls -1 phastCons5wayScores/*.data.gz | sort -t_ -k2n | xargs zcat \
	| wigEncode -noOverlap stdin phastCons5way.wig phastCons5way.wib
    # Converted stdin, upper limit 1.00, lower limit 0.00
    time nice -n +19 cp -p *.wi? /cluster/data/cavPor3/bed/multiz5way/cons/all
    #	real    0m40.875s

    # Load gbdb and database with wiggle.
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/multiz5way/cons/all
    ln -s `pwd`/phastCons5way.wib /gbdb/cavPor3/multiz5way/phastCons5way.wib
    time nice -n +19 hgLoadWiggle -pathPrefix=/gbdb/cavPor3/multiz5way cavPor3 \
	phastCons5way phastCons5way.wig
    #	real    1m5.667s
    # remove garbage
    rm wiggle.tab

    #  Create histogram to get an overview of all the data
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/multiz5way/cons/all
    time nice -n +19 hgWiggle -doHistogram \
	-hBinSize=0.001 -hBinCount=1000 -hMinVal=0.0 -verbose=2 \
	    -db=cavPor3 phastCons5way > histogram.data 2>&1
    #	real    3m37.316s

    #	create plot of histogram:

    cat << '_EOF_' | gnuplot > histo.png
set terminal png small color x000000 xffffff xc000ff x66ff66 xffff00 x00ffff
set size 1.4, 0.8
set key left box
set grid noxtics
set grid ytics
set title " Cow BosTau4 Histogram phastCons5way track"
set xlabel " phastCons5way score"
set ylabel " Relative Frequency"
set y2label " Cumulative Relative Frequency (CRF)"
set y2range [0:1]
set y2tics
set yrange [0:0.02]

plot "histogram.data" using 2:5 title " RelFreq" with impulses, \
        "histogram.data" using 2:7 axes x1y2 title " CRF" with lines
'_EOF_'
    #	<< happy emacs

    display histo.png &

    #	These trackDb entries turn on the wiggle phastCons data track:
    #	type wigMaf 0.0 1.0
    #	maxHeightPixels 100:40:11
    #	wiggle phastCons5way
    #	spanList 1
    #	autoScale Off
    #	windowingFunction mean
    #	pairwiseHeight 12
    #	yLineOnOff Off

#############################################################################
#  Downloads (DONE - 2008-01-11 - Hiram)
    #	Let's see if the downloads will work
    ssh hgwdev
    /cluster/data/cavPor3
    #	expecting to find repeat masker .out file here:
    ln -s bed/RepeatMasker/cavPor3.fa.out .
    time nice -n +19 /cluster/bin/scripts/makeDownloads.pl \
	-workhorse=hgwdev cavPor3 > jkStuff/downloads.log 2>&1
    #	real    24m3.210s
    #	failed making upstream sequences:
    #	featureBits cavPor3 mgcGenes:upstream:1000 -fa=stdout
    #	setpriority: Permission denied.
    #	the 'nice' from my bash shell causes trouble inside the csh
    #	script which uses nice.  Finish off the install step manually
    #	with the mgcGenes upstreams ...

#############################################################################
#  PushQ entries (DONE - 2008-01-11 - Hiram)
    ssh hgwdev
    /cluster/data/cavPor3
    /cluster/bin/scripts/makePushQSql.pl cavPor3 > jkStuff/pushQ.sql
    #	output warnings:
# cavPor3 does not have seq
# cavPor3 does not have gbMiscDiff
# Could not tell (from trackDb, all.joiner and hardcoded lists of supporting
# and genbank tables) which tracks to assign these tables to:
#	genscanPep

#############################################################################
#  create download files (DONE - 2008-03-19 - Hiram)
    ssh hgwdev
    cd /cluster/data/cavPor3
    ln -s /cluster/data/cavPor3/bed/repeatMasker/cavPor3.fa.out .
    makeDownloads.pl cavPor3 > makeDownloads.log 2>&1

    #	*EDIT* the README files and ensure they are correct

#############################################################################
#  PushQ entries (DONE - 2008-03-19 - Hiram)
    ssh hgwdev
    /cluster/data/cavPor3
    /cluster/bin/scripts/makePushQSql.pl cavPor3 > jkStuff/pushQ.sql
    #	output warnings:
# hgwdev does not have /usr/local/apache/htdocs/goldenPath/cavPor3/liftOver/cavPor3ToBosTau*
# cavPor3 does not have seq

# Could not tell (from trackDb, all.joiner and hardcoded lists of supporting
# and genbank tables) which tracks to assign these tables to:
#	genscanPep

    #	looks like there should be a bosTau3 to cavPor3 liftOver run

###########################################################################
# HUMAN (hg18) PROTEINS TRACK (DONE braney 2008-03-28)
    ssh kkstore06
    # bash  if not using bash shell already

    mkdir /cluster/data/cavPor3/blastDb
    cd /cluster/data/cavPor3
    grep -v chrUn chrom.sizes | awk '{print $1}' > chr.lst
    for i in `cat chr.lst`; 
    do twoBitToFa  cavPor3.unmasked.2bit -seq=$i stdout; 
    done > temp.fa
    faSplit gap temp.fa 1000000 blastDb/x -lift=blastDb.lft

    grep chrUn chrom.sizes | awk '{print $1}' > chr.lst
    for i in `cat chr.lst`; 
    do twoBitToFa  cavPor3.unmasked.2bit -seq=$i stdout; 
    done > temp.fa
    faSplit sequence temp.fa 150 blastDb/y
    rm temp.fa chr.lst

    cd blastDb
    for i in *.fa
    do
	/cluster/bluearc/blast229/formatdb -i $i -p F
    done
    rm *.fa
    ls *.nsq | wc -l
#   3440

    mkdir -p /san/sanvol1/scratch/cavPor3/blastDb
    cd /cluster/data/cavPor3/blastDb
    for i in nhr nin nsq; 
    do 
	echo $i
	cp *.$i /san/sanvol1/scratch/cavPor3/blastDb
    done

    mkdir -p /cluster/data/cavPor3/bed/tblastn.hg18KG
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG
    echo  /san/sanvol1/scratch/cavPor3/blastDb/*.nsq | xargs ls -S | sed "s/\.nsq//"  > query.lst
    wc -l query.lst
# 3440 query.lst

   # we want around 350000 jobs
   calc `wc /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl | awk '{print $1}'`/\(350000/`wc query.lst | awk '{print $1}'`\)

# 36727/(350000/3440) = 360.973943

   mkdir -p /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/kgfa
   split -l 361 /cluster/data/hg18/bed/blat.hg18KG/hg18KG.psl  /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/kgfa/kg
   ln -s /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/kgfa kgfa
   cd kgfa
   for i in *; do 
     nice pslxToFa $i $i.fa; 
     rm $i; 
     done
   cd ..
   ls -1S kgfa/*.fa > kg.lst
   mkdir -p /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/blastOut
   ln -s /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/blastOut
   for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
   tcsh
   cd /cluster/data/cavPor3/bed/tblastn.hg18KG
   cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl }
#ENDLOOP
'_EOF_'

   cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/cluster/bluearc/blast229/data
export BLASTMAT
g=`basename $2`
f=/tmp/`basename $3`.$g
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /cluster/bluearc/blast229/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
    if /cluster/bin/i386/blastToPsl $f.1 $f.2
    then
	liftUp -nosort -type=".psl" -nohead $f.3 /cluster/data/cavPor3/blastDb.lft carry $f.2
        liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg18/bed/blat.hg18KG/protein.lft warn $f.3

        if pslCheck -prot $3.tmp                                                  
        then                                                                      
            mv $3.tmp $3                                                          
            rm -f $f.1 $f.2 $f.3 $f.4
        fi
        exit 0                                                                    
    fi                                                                            
fi                                                                                
rm -f $f.1 $f.2 $3.tmp $f.8 $f.3 $f.4
exit 1
'_EOF_'
    # << happy emacs
    chmod +x blastSome
    gensub2 query.lst kg.lst blastGsub blastSpec
    exit 
    
    ssh pk
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG
    para create blastSpec
#    para try, check, push, check etc.

    para time

Completed: 350880 of 350880 jobs
CPU time in finished jobs:   27082816s  451380.27m  7523.00h  313.46d  0.859 y
IO & Wait Time:               2334990s   38916.50m   648.61h   27.03d  0.074 y
Average job time:                  84s       1.40m     0.02h    0.00d
Longest finished job:             578s       9.63m     0.16h    0.01d
Submission to last job:         96125s    1602.08m    26.70h    1.11d


    ssh kkstore06
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG
    mkdir chainRun
    cd chainRun
    tcsh
    cat << '_EOF_' > chainGsub
#LOOP
chainOne $(path1)
#ENDLOOP
'_EOF_'

    cat << '_EOF_' > chainOne
(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=150000 stdin /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/blastOut/c.`basename $1`.psl)
'_EOF_'
    chmod +x chainOne
    ls -1dS /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/blastOut/kg?? > chain.lst
    gensub2 chain.lst single chainGsub chainSpec
    # do the cluster run for chaining
    ssh pk
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG/chainRun
    para create chainSpec
    para maxNode 30
    para try, check, push, check etc.

# Completed: 99 of 102 jobs
# Crashed: 3 jobs
# CPU time in finished jobs:     113248s    1887.47m    31.46h    1.31d  0.004 y
# IO & Wait Time:                 86043s    1434.04m    23.90h    1.00d  0.003 y
# Average job time:                2013s      33.55m     0.56h    0.02d
# Longest finished job:            6139s     102.32m     1.71h    0.07d
# Submission to last job:         10416s     173.60m     2.89h    0.12d

# ran three crashed jobs on kolossus

    ssh kkstore06
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG/blastOut
    for i in kg??
    do
       cat c.$i.psl | awk "(\$13 - \$12)/\$11 > 0.6 {print}" > c60.$i.psl
       sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
       awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
       echo $i
    done
    sort -T /tmp -k 14,14 -k 16,16n -k 17,17n u.*.psl m60* | uniq > /cluster/data/cavPor3/bed/tblastn.hg18KG/blastHg18KG.psl
    cd ..
    pslCheck blastHg18KG.psl

    # load table 
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/tblastn.hg18KG
    hgLoadPsl cavPor3 blastHg18KG.psl

    # check coverage
    featureBits cavPor3 blastHg18KG 
# 40254923 bases of 2731830700 (1.474%) in intersection

    featureBits cavPor3 refGene:cds blastHg18KG  -enrichment
# refGene:cds 0.429%, blastHg18KG 1.474%, both 0.379%, cover 88.39%, enrich 59.98x

    ssh kkstore06
    rm -rf /cluster/data/cavPor3/bed/tblastn.hg18KG/blastOut
    rm -rf /cluster/bluearc/cavPor3/bed/tblastn.hg18KG/blastOut
#end tblastn

##########################################################################
#  Create 5-way downloads (DONE - 2008-03-28 - Hiram)
    ssh hgwdev
    mkdir -p /cluster/data/cavPor3/bed/multiz5way/downloads/phastCons5way
    cd /cluster/data/cavPor3/bed/multiz5way/downloads/phastCons5way
    cp -p \
/san/sanvol1/scratch/cavPor3/multiz5way/cons/all/phastCons5wayScores/* .
    ln -s ../../cons/all/all.mod ./5way.mod
    cp /cluster/data/calJac1/bed/multiz9way/downloads/phastCons9way/README.txt .
    # edit that README.txt to be correct for this 5-way alignment
    cd ..
    mkdir multiz5way
    cd multiz5way
    cp -p /cluster/data/calJac1/bed/multiz9way/downloads/multiz9way/README.txt .
    # edit that README.txt to be correct for this 5-way alignment
    ssh kkstore06
    cd /cluster/data/cavPor3/bed/multiz5way/downloads/multiz5way
    ln -s ../../cavPor3.5-way.nh 5way.nh

    time gzip -c ../../anno/cavPor3.anno.5way.maf > cavPor3.5way.maf.gz
    #	real    34m59.295s

    ssh hgwdev
    cd /cluster/data/cavPor3/bed/multiz5way/downloads/multiz5way
    #	creating upstream files from refGene, bash script:
    cat << '_EOF_' > mkUpstream.sh
#!/bin/bash
DB=cavPor3
GENE=refGene
NWAY=multiz5way
export DB GENE

for S in 1000 2000 5000
do
    echo "making upstream${S}.maf"
    featureBits ${DB} ${GENE}:upstream:${S} -fa=/dev/null -bed=stdout \
        | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \
        | $HOME/kent/src/hg/ratStuff/mafFrags/mafFrags ${DB} ${NWAY} \
                stdin stdout \
                -orgs=/cluster/data/${DB}/bed/${NWAY}/species.list \
        | gzip -c > upstream${S}.maf.gz
    echo "done upstream${S}.maf.gz"
done
'_EOF_'
    # << happy emacs
    chmod +x ./mkUpstream.sh
    time nice -n +19 ./mkUpstream.sh
-rw-rw-r--  1    9883443 Mar 28 13:02 upstream1000.maf.gz
-rw-rw-r--  1   17938570 Mar 28 13:06 upstream2000.maf.gz
-rw-rw-r--  1   40384656 Mar 28 13:10 upstream5000.maf.gz
    #
    #	check the names in these upstream files to ensure sanity:
    zcat upstream1000.maf.gz | grep "^s " | awk '{print $2}' \
	| sort | uniq -c | sort -rn | less
    #	should be a list of the other 4 species with a high count,
    #	then refGene names, e.g.:
    #	8806 ornAna1
    #	8806 mm9
    #	8806 hg18
    #	8806 canFam2
    #      7 NM_001077006
    #      3 NM_001113231
    #      3 NM_001105381
    #      3 NM_001102527
    #      3 NM_001102322
    #	...

    ssh kkstore06
    cd /cluster/data/cavPor3/bed/multiz5way/downloads/multiz5way
    md5sum *.maf.gz > md5sum.txt
    cd ../phastCons5way
    md5sum *.data.gz *.mod > md5sum.txt

    ssh hgwdev
    mkdir /usr/local/apache/htdocs/goldenPath/cavPor3/multiz5way
    mkdir /usr/local/apache/htdocs/goldenPath/cavPor3/phastCons5way
    cd /cluster/data/cavPor3/bed/multiz5way/downloads/multiz5way
    ln -s `pwd`/* /usr/local/apache/htdocs/goldenPath/cavPor3/multiz5way
    cd ../phastCons5way
    ln -s `pwd`/* /usr/local/apache/htdocs/goldenPath/cavPor3/phastCons5way
    #	if your ln -s `pwd`/* made extra links to files you don't want there,
    #	check the goldenPath locations and remove those extra links

#############################################################################
# BLASTZ/CHAIN/NET 2X Ground squirrel: speTri0 (In progress 2008-05-16 kate)

    ssh kkstore05
    cd /cluster/data/cavPor3/bed
    mkdir blastzSpeTri0.2008-05-16
    cd blastzSpeTri0.2008-05-16

    cat << '_EOF_' > DEF
# Mouse vs. Ground squirrel

BLASTZ_M=50

# TARGET: Mouse MM9
SEQ1_DIR=/scratch/data/cavPor3/cavPor3.2bit
SEQ1_LEN=/cluster/data/cavPor3/chrom.sizes 
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: Ground squirrel speTri0
SEQ2_DIR=/scratch/data/speTri0/speTri0.2bit
SEQ2_LEN=/cluster/data/speTri0/chrom.sizes
SEQ2_CHUNK=30000000
SEQ2_LIMIT=500
SEQ2_LAP=0

BASE=/cluster/data/cavPor3/bed/blastzSpeTri0.2008-05-16
TMPDIR=/scratch/tmp
'_EOF_'
    # << happy emacs

    doBlastzChainNet.pl `pwd`/DEF -bigClusterHub=pk  \
      -chainMinScore=3000 -chainLinearGap=medium >& do.log &

    # got here
    ln -s blastzSpeTri0.2008-05-16 /cluster/data/cavPor3/bed/blastzSpeTri0

    # failures... have to rescue manually
    # make axt's.  Note -- too many scaffolds to do in a single pass
    # (exhausts mem) so we split
    ssh kksstore05
    cd /cluster/data/cavPor3/bed/blastz.speTri0/axtChain
    netSplit noClass.net noClass
    ~kate/bin/x86_64/netSplit noClass.net noClass -lump=100
    # can't just split to chains (exhausts open files)
    chainSplit chain cavPor3.speTri0.all.chain.gz -lump=100
    cd noClass
    mkdir ../../axtNetChunks
cat > netToAxt.csh << 'EOF'
    foreach i (*.net)
        set p = $i:r
        echo $i
	netToAxt $p.net ../chain/$p.chain /cluster/data/cavPor3/cavPor3.2bit /cluster/data/speTri0/speTri0.2bit ../../axtNetChunks/$i:r.axt
    end
'EOF'
    csh netToAxt.csh >&! netToAxt.log &

    # create unfiltered mafNet to get better squirrel coverage (2008-11-3 kate)
    cd ../../axtNetChunks
    cat *.axt | axtSort stdin stdout > ../axtChain/cavPor3.speTri0.net.axt
    mkdir ../mafNet
    axtToMaf -tPrefix=cavPor3. -qPrefix=speTri0. ../axtChain/cavPor3.speTri0.net.axt \
        /cluster/data/cavPor3/chrom.sizes /cluster/data/speTri0/chrom.sizes \
            ../mafNet/cavPor3.speTri0.net.maf
    gzip stdin > ../mafNet/cavPor3.speTri0.net.maf.gz

    # low cov genome, so use reciprocal best for multiple alignment
    # need to generate liftover chains, which failed in automation
cat > over.csh << 'EOF'
    foreach i (*.net)
        set p = $i:r
        echo $i
	netChainSubset -verbose=0 $p.net ../chain/$p.chain stdout | \
            chainStitchId stdin $i.over.chain
    end
    cat *.over.chain | gzip -c > cavPor3.speTri0.over.chain.gz
'EOF'
    csh over.csh >&! over.log &

    ssh hgwdev
    cd /cluster/data/cavPor3/bed/blastz.speTri0
    mkdir /usr/local/apache/htdocs/goldenPath/cavPor3/vsSpeTri0
    /cluster/bin/scripts/doRecipBest.pl cavPor3 speTri0 >&! rbest.log &

    # TODO: 
    # load chains in database and check coverage
    # NOTE: exhausts mem on hgwdev -- need to try this on kolossus ?
    cd axtChain
    cat > loadChains.csh << 'EOF'
hgLoadChain -tIndex cavPor3 chainSpeTri0 cavPor3.speTri0.all.chain.gz
featureBits cavPor3 chainSpeTri0Link > fb.cavPor3.chainSpeTri0Link.txt
cat fb.cavPor3.chainSpeTri0Link.txt
'EOF'
    csh loadChains.csh >&! loadChains.log &
############################################################################
# SWAP SpeTri0  Blastz (2008-11-01 kate)

mkdir -p /cluster/data/speTri0/bed
cd /cluster/data/speTri0/bed
mkdir blastz.cavPor3.swap
cd blastz.cavPor3.swap

cd /cluster/data/cavPor3/bed/blastz.speTri0/axtChain/chain
mkdir ../chain.swap
cat > swap.csh << 'EOF'
foreach c (*.chain)
    echo $c
    chainSwap $c ../chain.swap/$c
end
'EOF'
csh swap.csh >&! swap.log &

doBlastzChainNet.pl -swap -chainMinScore=3000 -chainLinearGap=medium \
    /cluster/data/cavPor3/bed/blastz.speTri0/DEF >& swap.log  &

############################################################################
# TRANSMAP vertebrate.2008-05-20 build  (2008-05-24 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-05-20

see doc/builds.txt for specific details.
#############################################################################
# N-SCAN gene predictions (nscanGene) - (2008-04-03 markd)

    # obtained NSCAN predictions from michael brent's group
    # at WUSTL
    cd /cluster/data/cavPor3/bed/nscan/
    wget -nv http://mblab.wustl.edu/predictions/Guinea_pig/cavPor3/cavPor3.gtf
    wget -nv http://mblab.wustl.edu/predictions/Guinea_pig/cavPor3/cavPor3.prot.fa
    wget -nv http://mblab.wustl.edu/predictions/Guinea_pig/cavPor3/readme.html
    bzip2 cavPor3.*
    chmod a-w *

    # load track
    gtfToGenePred -genePredExt cavPor3.gtf.bz2 stdout | hgLoadGenePred -bin -genePredExt cavPor3 nscanGene stdin
    hgPepPred cavPor3 generic nscanPep  cavPor3.prot.fa.bz2
    rm *.tab

    # update trackDb; need a cavPor3-specific page to describe informants
    marmoset/cavPor3/nscanGene.html   (copy from readme.html)
    marmoset/cavPor3/trackDb.ra
    # set search regex to
    termRegex scaffold_[0-9]+\.[0-9]+\.[0-9]+
############################################################################

# ### Found this in Kate's unchecked in cavPor3.txt.  I need it to continue the blastz to get the maf for multiz !!
# ### #############################################################################
# ### # BLASTZ/CHAIN/NET 2X Ground squirrel: speTri0 (In progress 2008-05-16 kate)
# ### 
# ###     ssh kkstore05
# ###     cd /cluster/data/cavPor3/bed
# ###     mkdir blastzSpeTri0.2008-05-16
# ###     cd blastzSpeTri0.2008-05-16
# ### 
# ###     cat << '_EOF_' > DEF
# ### # Mouse vs. Ground squirrel
# ### 
# ### BLASTZ_M=50
# ### 
# ### # TARGET: Mouse MM9
# ### SEQ1_DIR=/scratch/data/cavPor3/cavPor3.2bit
# ### SEQ1_LEN=/cluster/data/cavPor3/chrom.sizes 
# ### SEQ1_CHUNK=10000000
# ### SEQ1_LAP=10000
# ### 
# ### # QUERY: Ground squirrel speTri0
# ### SEQ2_DIR=/scratch/data/speTri0/speTri0.2bit
# ### SEQ2_LEN=/cluster/data/speTri0/chrom.sizes
# ### SEQ2_CHUNK=30000000
# ### SEQ2_LIMIT=500
# ### SEQ2_LAP=0
# ### 
# ### BASE=/cluster/data/cavPor3/bed/blastzSpeTri0.2008-05-16
# ### TMPDIR=/scratch/tmp
# ### '_EOF_'
# ###     # << happy emacs
# ### 
# ###     doBlastzChainNet.pl `pwd`/DEF -bigClusterHub=pk  \
# ###       -chainMinScore=3000 -chainLinearGap=medium >& do.log &
# ### 
# ###     # got here
# ###     ln -s blastzSpeTri0.2008-05-16 /cluster/data/cavPor3/bed/blastzSpeTri0
ln -s blastzSpeTri0.2008-05-16 /cluster/data/cavPor3/bed/blastz.speTri0

    # ### 1 crashed job in axtChain:
    # ssh mkr0u7
zcat ../../pslParts/cavPor3.2bit\:scaffold_7\:*.psl.gz \
 | axtChain -psl -verbose=0  -minScore=3000 -linearGap=medium stdin \
     /scratch/data/cavPor3/cavPor3.2bit \
     /scratch/data/speTri0/speTri0.2bit \
     tmp.chain &
    # invalid unsigned number: "2"
    ### divide and conquer
     ll ../../pslParts/cavPor3.2bit\:scaffold_7\:*.psl.gz -rw-rw-r--  1 kate protein 25560954 May 18 08:12 ../../pslParts/cavPor3.2bit:scaffold_7:0-10010000.psl.gz
-rw-rw-r--  1 kate protein 28831900 May 18 08:13 ../../pslParts/cavPor3.2bit:scaffold_7:10000000-20010000.psl.gz
-rw-rw-r--  1 kate protein 25466248 May 18 08:13 ../../pslParts/cavPor3.2bit:scaffold_7:20000000-30010000.psl.gz
-rw-rw-r--  1 kate protein 26433148 May 18 08:13 ../../pslParts/cavPor3.2bit:scaffold_7:30000000-40010000.psl.gz
-rw-rw-r--  1 kate protein 38048425 May 18 08:13 ../../pslParts/cavPor3.2bit:scaffold_7:40000000-50010000.psl.gz
-rw-rw-r--  1 kate protein 24145256 May 18 08:13 ../../pslParts/cavPor3.2bit:scaffold_7:50000000-60010000.psl.gz
-rw-rw-r--  1 kate protein  7480033 May 18 08:13 ../../pslParts/cavPor3.2bit:scaffold_7:60000000-61004451.psl.gz
zcat ../../pslParts/cavPor3.2bit\:scaffold_7\:0-*.psl.gz \
 | axtChain -psl -verbose=0  -minScore=3000 -linearGap=medium stdin \
     /scratch/data/cavPor3/cavPor3.2bit \
     /scratch/data/speTri0/speTri0.2bit \
     tmp.chain &
     # Error reading 4 bytes: Success
zcat ../../pslParts/cavPor3.2bit\:scaffold_7\:40*.psl.gz \
 | axtChain -psl -verbose=0  -minScore=3000 -linearGap=medium stdin \
     /scratch/data/cavPor3/cavPor3.2bit \
     /scratch/data/speTri0/speTri0.2bit \
     tmp.chain &
     # invalid unsigned number: "2"
     mv ../../pslParts/cavPor3.2bit\:scaffold_7\:40000000-50010000.psl.gz ../../pslParts/cavPor3.2bit\:scaffold_7\:40000000-50010000.psl.gz.bad
zcat ../../pslParts/cavPor3.2bit\:scaffold_7\:*.psl.gz \
 | axtChain -psl -verbose=0  -minScore=3000 -linearGap=medium stdin \
     /scratch/data/cavPor3/cavPor3.2bit \
     /scratch/data/speTri0/speTri0.2bit \
     tmp.chain &
     #Error reading 4 bytes: Success
     
     ssh pk; para shove -retries=6
    # 233 jobs in batch
    # 503 jobs (including everybody's) in Parasol queue.
    # Checking finished jobs
    # Completed: 233 of 233 jobs
    # CPU time in finished jobs:     116206s    1936.77m    32.28h    1.34d  0.004 y
    # IO & Wait Time:                  3377s      56.28m     0.94h    0.04d  0.000 y
    # Average job time:                 513s       8.55m     0.14h    0.01d
    # Longest running job:                0s       0.00m     0.00h    0.00d
    # Longest finished job:            8500s     141.67m     2.36h    0.10d
    # Submission to last job:        967976s   16132.93m   268.88h   11.20d
    # Estimated complete:                 0s       0.00m     0.00h    0.00d

    ### ############## Now need to restart the blastz:
    tail do.log
    # Command failed:
    # ssh -x kki nice /cluster/data/cavPor3/bed/blastzSpeTri0.2008-05-16/axtChain/run/doChainRun.csh
 
    ssh kkstore05 # store12->kkstore05
    cd /cluster/data/cavPor3/bed/blastz.speTri0
    screen 

    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF -bigClusterHub=pk  \
       -chainMinScore=3000 -chainLinearGap=medium -continue chainMerge >> do.log 2>&1 
    # real    220m10.699s
    # tail do.log
    # writing /dev/null
    # memory usage 2419843072, utime 5416 s/100, stime 389
    # netChainSubset -verbose=0 noClass.net cavPor3.speTri0.all.chain.gz stdout
    # chainStitchId stdin stdout
    # gzip -c
    # Killed
    # 
    # gzip: stdout: Broken pipe
    # Command failed:
    # ssh -x kolossus nice /cluster/data/cavPor3/bed/blastzSpeTri0.2008-05-16/axtChain/netChains.csh
    
    cp netChain.csh netChain.restart.csh
    # edit to restart in correct place
    ./netChain.restart.csh
    
    ### After some searching I was able to figure out that   = octal "\031"
    # Now I can find the original problem file and try to rebuild it:
    grep -n -P "\031" cavPor3.2bit\:scaffold_7\:40000000-50010000_part007.lst.psl 
    # 4253:62 40      0       0       1       1       1       6       +       scaffold_6393.1-308827  308827  149765  149868  scaffold_'      61004451   49604348        49604456        3       2,38,35,       149765,149794,149833,   49604348,49604383,49604421,

    # For comparison:
    tail -55 cavPor3.2bit\:scaffold_7\:40000000-50010000_part007.lst.psl | head -5  
    # 91      37      0       0       1       8       3       16      +       scaffold_6393.1-308827  308827  244021  244157  scaffold_7      61004451   48951564        48951708        5       18,25,14,15,56, 244021,244039,244072,244086,244101,     48951564,48951583,48951608,48951629,48951652,
    # 62      32      0       0       0       0       0       0       +       scaffold_6393.1-08827  308827  54194   54288   scaffold_7      61004451   49114670        49114764        1       94,     54194,  49114670,
    # 62      40      0       0       1       1       1       6       +       scaffold_6393.1-308827  308827  149765  149868  scaffold_'      61004451   49604348        49604456        3       2,38,35,       149765,149794,149833,   49604348,49604383,49604421,
    # 127     63      0       0       1       18      2       5       +       scaffold_6393.1-308827  308827  159033  159241  scaffold_7      61004451   49613715        49613910        4       59,84,20,27,    159033,159092,159176,159214,    49613715,49613775,49613863,49613883,
    # 29      8       0       0       0       0       0       0       +       scaffold_6393.1-308827  308827  160121  160158  scaffold_7      61004451   49620825        49620862        1       37,     160121, 49620825,
    ### ### ### Yikes!  the prior line as a different strange char:  = octal "\023"

    ## Run on same machine:
    ssh kkr10u47
    /cluster/bin/scripts/blastz-run-ucsc -outFormat psl ../psl/cavPor3.2bit:scaffold_7:40000000-50010000 qParts/part007.lst ../DEF ../psl/cavPor3.2bit:scaffold_7:40000000-50010000/cavPor3.2bit:scaffold_7:40000000-50010000_part007.lst.new.psl
    ll ../psl/cavPor3.2bit\:scaffold_7\:40000000-50010000/cavPor3.2bit\:scaffold_7\:40000000-50010000_part007*
    # -rw-rw-r--  1 tdreszer protein 785511 Jun  3 13:11 ../psl/cavPor3.2bit:scaffold_7:40000000-50010000/cavPor3.2bit:scaffold_7:40000000-50010000_part007.lst.new.psl
    # -rw-rw-r--  1 kate     protein 785511 May 16 19:40 ../psl/cavPor3.2bit:scaffold_7:40000000-50010000/cavPor3.2bit:scaffold_7:40000000-50010000_part007.lst.psl
    grep -n -P "\031" ../psl/cavPor3.2bit\:scaffold_7\:40000000-50010000/cavPor3.2bit\:scaffold_7\:40000000-50010000_part007.lst.new.psl 
    tail -55 ../psl/cavPor3.2bit\:scaffold_7\:40000000-50010000/cavPor3.2bit\:scaffold_7\:40000000-50010000_part007.lst.new.psl | head -5
    # 91      37      0       0       1       8       3       16      +       scaffold_6393.1-308827  308827  244021  244157  scaffold_7      61004451        48951564        48951708       518,25,14,15,56, 244021,244039,244072,244086,244101,     48951564,48951583,48951608,48951629,48951652,
    # 62      32      0       0       0       0       0       0       +       scaffold_6393.1-308827  308827  54194   54288   scaffold_7      61004451        49114670        49114764       194,     54194,  49114670,
    # 62      40      0       0       1       1       1       6       +       scaffold_6393.1-308827  308827  149765  149868  scaffold_7      61004451        49604348        49604456       329,38,35,       149765,149794,149833,   49604348,49604383,49604421,
    # 127     63      0       0       1       18      2       5       +       scaffold_6393.1-308827  308827  159033  159241  scaffold_7      61004451        49613715        49613910       459,84,20,27,    159033,159092,159176,159214,    49613715,49613775,49613863,49613883,
    # 29      8       0       0       0       0       0       0       +       scaffold_6393.1-308827  308827  160121  160158  scaffold_7      61004451        49620825        49620862       137,     160121, 49620825,
    mv ../psl/cavPor3.2bit\:scaffold_7\:40000000-50010000/cavPor3.2bit\:scaffold_7\:40000000-50010000_part007.lst.psl ../psl/cavPor3.2bit\:scaffold_7\:40000000-50010000/cavPor3.2bit\:scaffold_7\:40000000-50010000_part007.lst.psl.old
    mv ../psl/cavPor3.2bit\:scaffold_7\:40000000-50010000/cavPor3.2bit\:scaffold_7\:40000000-50010000_part007.lst.new.psl ../psl/cavPor3.2bit\:scaffold_7\:40000000-50010000/cavPor3.2bit\:scaffold_7\:40000000-50010000_part007.lst.psl
    
    # Okay, we are NOW done with run.blastz so it is on to run.cat
    ssh kkstore05
    cd ../run.cat
    ./cat.csh cavPor3.2bit:scaffold_7:40000000-50010000 cavPor3.2bit:scaffold_7:40000000-50010000.psl.new.gz 
    ll cavPor3.2bit\:scaffold_7\:40000000-50010000.psl.*
    # -rw-rw-r--  1 tdreszer protein 38048414 Jun  3 13:28 cavPor3.2bit:scaffold_7:40000000-50010000.psl.new.gz
    # -rw-rw-r--  1 tdreszer protein 38048475 May 18 08:13 cavPor3.2bit:scaffold_7:40000000-50010000.psl.old.gz
    mv cavPor3.2bit\:scaffold_7\:40000000-50010000.psl.new.gz cavPor3.2bit\:scaffold_7\:40000000-50010000.psl.gz
    mv cavPor3.2bit\:scaffold_7\:40000000-50010000.psl.old.gz cavPor3.2bit\:scaffold_7\:40000000-50010000.psl.gz.old
    mv cavPor3.2bit\:scaffold_7\:40000000-50010000.psl.gz* ../pslParts/
    
    # Okay, we are now done with run.cat.  What can be done for axtChain?
    # run on same cluster machine
    ssh kkr4u00
    cd /cluster/data/cavPor3/bed/blastz.speTri0/axtChain/run
    ./chain.csh cavPor3.2bit:scaffold_7: chain/cavPor3.2bit:scaffold_7:.chain
    ll chain/cavPor3.2bit\:scaffold_7\:.chain 
    # -rw-rw-r--  1 tdreszer protein 304948646 Jun  3 15:05 chain/cavPor3.2bit:scaffold_7:.chain

    # Okay, NOW we are ready to consider restarting doBlastzChainNet.pl
    # Though I assume that some of what was already done, will be in the way.
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF -bigClusterHub=pk  \
       -chainMinScore=3000 -chainLinearGap=medium -continue chainMerge >> do.log 2>&1
    # postProcessChains: looks like this was run successfully already (cavPor3.speTri0.all.chain.gz exists). 
    # Either run with -continue net or some later stage, or move aside/remove /cluster/data/cavPor3/bed/blastzSpeTri0.2008-05-16/axtChain/cavPor3.speTri0.all.chain.gz and run again.
    mv axtChain/cavPor3.speTri0.all.chain.gz axtChain/cavPor3.speTri0.all.chain.gz.old
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF -bigClusterHub=pk  \
       -chainMinScore=3000 -chainLinearGap=medium -continue chainMerge >> do.log 2>&1
    # real    54m1.762s
    tail do.log
    # HgStepManager: executing step 'net' Thu Jun  5 12:46:09 2008.
    # netChains: looks like we are not starting with a clean slate.  Please move aside or remove /cluster/data/cavPor3/bed/blastzSpeTri0.2008-05-16/axtChain/noClass.net and run again.
    mv /cluster/data/cavPor3/bed/blastzSpeTri0.2008-05-16/axtChain/noClass.net /cluster/data/cavPor3/bed/blastzSpeTri0.2008-05-16/axtChain/noClass.net.old

    screen
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF -bigClusterHub=pk  \
       -chainMinScore=3000 -chainLinearGap=medium -continue net >> do.log 2>&1
    real    204m20.325s
    
    tail do.log 
    # writing /dev/null
    # memory usage 2427514880, utime 5453 s/100, stime 405
    # netChainSubset -verbose=0 noClass.net cavPor3.speTri0.all.chain.gz stdout
    # chainStitchId stdin stdout
    # gzip -c
    # 
    # gzip: stdout: Broken pipe
    # Killed  
    # Command failed:
    # ssh -x kolossus nice /cluster/data/cavPor3/bed/blastzSpeTri0.2008-05-16/axtChain/netChains.csh
    
    ### Can't seem to get ahead!!!

    ## Doing each step in netChains.csh manually...
    
    cd /cluster/data/cavPor3/bed/blastzSpeTri0.2008-05-16/axtChain

    # Make nets ("noClass", i.e. without rmsk/class stats which are added later):
    chainPreNet cavPor3.speTri0.all.chain.gz /cluster/data/cavPor3/chrom.sizes  /cluster/data/ speTri0/chrom.sizes stdout \
     | chainNet stdin -minSpace=1 /cluster/data/cavPor3/chrom.sizes  /cluster/data/speTri0/chrom.sizes stdout /dev/null \
     | netSyntenic stdin noClass.net
    # Completed successfully

    # Make liftOver chains:
    netChainSubset -verbose=0 noClass.net cavPor3.speTri0.all.chain.gz netChainSubset.out
    ll netChainSubset.out 
    # -rw-rw-r--  1 tdreszer protein 73728 Jun  6 17:12 netChainSubset.out

    chainStitchId netChainSubset.out chainStitchId.out
    ll chainStitchId.out
    # -rw-rw-r--  1 tdreszer protein 0 Jun  9 09:13 chainStitchId.out
    # gzip -c chainStitchId.out > cavPor3.speTri0.over.chain.gz
    # Duh: gzip: stdout: Broken pipe

    # Okay, continuing with next step, ignoring the empty cavPor3.speTri0.over.chain.gz
    # Make axtNet for download: one .axt for all of cavPor3.
    mkdir ../axtNet
    netToAxt -verbose=0 noClass.net cavPor3.speTri0.all.chain.gz \
      /scratch/data/cavPor3/cavPor3.2bit /scratch/data/speTri0/speTri0.2bit stdout \
      | axtSort stdin stdout \
      | gzip -c > ../axtNet/cavPor3.speTri0.net.axt.gz

    ### Okay Kate rescued this by adjusting file sizes to handle memory
    
    ssh kkstore05 
    cd /cluster/data/cavPor3/bed/blastz.speTri0 
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF -bigClusterHub=pk  \
       -chainMinScore=3000 -chainLinearGap=medium -continue net >> doAferChain.log 2>&1 


     

    # NOTE: be sure to ls the data in above script on workhorse machine before starting script
    time nice -n +19 doBlastzChainNet.pl `pwd`/DEF \
    -bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
        -syntenicNet > do.log 2>&1 
    # <ctl-A><ctl-d>  ps -ef | grep blastzCavPor3
    # real    1764m55.155s
    tail do.log
    #  *** All done!
    # *** Make sure that goldenPath/mm9/vsCavPor3/README.txt is accurate.
    # *** Add {chain,net}CavPor3 tracks to trackDb.ra if necessary.
    # blastz: ranOk: 52984
    cat fb.mm9.chainCavPor3Link.txt
    #    757283793 bases of 2620346127 (28.900%) in intersection

    cd /cluster/data/mm9/bed/blastzCavPor3.2008-04-10
    ######### Change locations in DEF due to Hiram's new methods    
    cat << \_EOF_ > DEF
BLASTZ_M=50

# TARGET: Mouse Mm9
SEQ1_DIR=/scratch/data/mm9/nib
SEQ1_LEN=/scratch/data/mm9/chrom.sizes
SEQ1_CHUNK=10000000
SEQ1_LAP=10000

# QUERY: GuineaPig cavPor3
SEQ2_DIR=/scratch/data/cavPor3/cavPor3.2bit
SEQ2_LEN=/scratch/data/cavPor3/chrom.sizes
# Maximum number of scaffolds that can be lumped together
SEQ2_LIMIT=300
SEQ2_CHUNK=20000000
SEQ2_LAP=0

BASE=/cluster/data/mm9/bed/blastzCavPor3.2008-04-10
TMPDIR=/scratch/tmp
_EOF_

    mkdir /cluster/data/cavPor3/bed/blastz.mm9.swap
    cd /cluster/data/cavPor3/bed/blastz.mm9.swap
    time nice -n +19 doBlastzChainNet.pl \
    /cluster/data/mm9/bed/blastzCavPor3.2008-04-10/DEF \
    -bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
        -swap -syntenicNet > do.log 2>&1 &
    # real   166m53.671s
    # Exit 25   time nice -n +19 doBlastzChainNet.pl /cluster/data/mm9/bed/blastzCavPor3.2008-04-10/DEF -bigC
    # Can't open chain/scaffold_795.chain to append: Too many open files
    # gzip: stdout: Broken pipe
    # Command failed:
    # ssh -x kolossus nice /cluster/data/cavPor3/bed/blastz.mm9.swap/axtChain/netSynteny.csh

    #   broken down during netSynteny.csh due to too many open files on
    #   a chainSplit      
    #   However, there is no need to split when we have scaffolds.
    #   Kate fixes doBlastzChainNet.pl and retry:

    cd /cluster/data/cavPor3/bed/blastz.mm9.swap
    time nice -n +19 ~kate/kent/src/hg/utils/automation/doBlastzChainNet.pl \
    /cluster/data/mm9/bed/blastzCavPor3.2008-04-10/DEF \
    -bigClusterHub=pk -chainMinScore=3000 -chainLinearGap=medium \
        -swap -continue=syntenicNet -syntenicNet > syn.log 2>&1 &
    # real    24m37.561s
    # *** All done!
    # *** Make sure that goldenPath/cavPor3/vsMm9/README.txt is accurate.
    # *** Add {chain,net}Mm9 tracks to trackDb.ra if necessary.
    cat fb.cavPor3.chainMm9Link.txt
    # 781173609 bases of 2663369733 (29.330%) in intersection

############################################################################
# TRANSMAP vertebrate.2008-06-07 build  (2008-06-30 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2008-06-30

see doc/builds.txt for specific details.
############################################################################
#  3way Rodent Multiz (special for Jurgens (Schmitz & co.) in Muenster
#  Previously, Robert Baertsch handled these.  I committed to these
#  many months ago...
# 2008-07-26 kate
# Redo with unfiltered net mafs to maximize squirrel sequence (2008-11-08 kate)

    ssh kkstore05
    cd /cluster/data/cavPor3/bed
    mkdir multiz3way
    cd multiz3way
    mkdir -p /san/sanvol1/scratch/cavPor3/multiz3way/
    cd /san/sanvol1/scratch/cavPor3/multiz3way

    # copy mafs to cluster-friendly disk
    # mm9 - high quality mammalian genome, so use syntenic net
    mafSplit /dev/null -byTarget mm9/ /cluster/data/cavPor3/bed/blastz.mm9/axtChain/cavPor3.mm9.synNet.maf.gz
    cd mm9
    # rename to reflect chrom name
    foreach f (*.maf)
        set c = `head -3 $f | grep cavPor3 | awk '{print $2}' | sed 's/cavPor3.//'`
        echo $c
        mv $f $c.maf
    end

    # speTri0 - low quality mammalian genome, so use reciprocal best net
    # mafSplit /dev/null -byTarget speTri0/ /cluster/data/cavPor3/bed/blastz.speTri0/mafRBestNet/cavPor3.speTri0.rbest.maf.gz
    # redo with full net, to get more sequence
    mafSplit /dev/null -byTarget speTri0/ /cluster/data/cavPor3/bed/blastz.speTri0/mafNet/cavPor3.speTri0.net.maf.gz
    cd speTri0
    # rename to reflect chrom name
    foreach f (*.maf)
        set c = `head -3 $f | grep cavPor3 | awk '{print $2}' | sed 's/cavPor3.//'`
        echo $c
        mv $f $c.maf
    end

    cd /san/sanvol1/scratch/cavPor3/multiz3way

    # get latest PSU utilities
    mkdir penn
    set p=/cluster/bin/penn/multiz.v11.2007-03-19/multiz-tba
    cp -p $p/{autoMZ,multiz,maf_project} penn

    # the autoMultiz cluster run
    ssh pk
    cd /cluster/data/cavPor3/bed/multiz3way

    # create species list and stripped down tree for autoMZ
    cat > tree.nh << 'EOF'
((cavPor3 mm9) speTri0)
'EOF'
    cat > species.lst << 'EOF'
cavPor3 mm9 speTri0
'EOF'
    mkdir run maf
    cd run

    cat > autoMultiz << '_EOF_'
#!/bin/csh -ef
set db = cavPor3
set c = $1
set maf = $2
set binDir = /san/sanvol1/scratch/$db/multiz3way/penn
set tmp = /scratch/tmp/$db/multiz.$c
set pairs = /san/sanvol1/scratch/$db/multiz3way
rm -fr $tmp
mkdir -p $tmp
cp ../{tree.nh,species.lst} $tmp
pushd $tmp
foreach s (`cat species.lst`)
    set in = $pairs/$s/$c.maf
    set out = $db.$s.sing.maf
    if ($s == $db) then
	continue
    endif
    if (-e $in.gz) then
	zcat $in.gz > $out
    else if (-e $in) then
	cp $in $out
    else
	echo "##maf version=1 scoring=autoMZ" > $out
    endif
end
set path = ($binDir $path); rehash
$binDir/autoMZ + T=$tmp E=$db "`cat tree.nh`" $db.*.sing.maf $c.maf
popd
cp $tmp/$c.maf $maf
rm -fr $tmp
'_EOF_'
    # << happy emacs
    chmod +x autoMultiz

cat  << '_EOF_' > template
#LOOP
./autoMultiz $(root1) {check out line+ /cluster/data/cavPor3/bed/multiz3way/maf/$(root1).maf}
#ENDLOOP
'_EOF_'
    # << happy emacs

    mkdir /cluster/data/cavPor3/bed/multiz3way/maf/
    awk '{print $1}' /cluster/data/cavPor3/chrom.sizes > chrom.lst
    gensub2 chrom.lst single template jobList
    para create jobList
    # ~3000 jobs
    para try
    para check
    # ~2 hours of run-time

    # remove empty alignment files and package up
    ssh kkstore05
    cd /cluster/data/cavPor3/bed/multiz3way/maf
    mkdir empty
cat > finish.csh << 'EOF'
    foreach f (*.maf)
        head -11 $f | grep -q 's cavPor3'
        if ($status == 1) then
            echo "$f empty"
            mv $f empty
        else
            echo "   $f NOT empty"
        endif
    end
'EOF'
    # oops, forgot to mkdir, so 'empty' files were lost
    ls *.maf | wc -l 
    # 1491
    tar cvfz rodent3way.tar.gz *.maf


    #########
    # Also do a multiz with non-filtered mouse mafs:
    # mm9 syntenic net mafs are sparse (cover only ~300 scaffolds), so check net
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/blastz.mm9/axtChain
    netFilter -minGap=10 cavPor3.mm9.syn.net.gz | hgLoadNet -verbose=0 cavPor3 synNetMm9 stdin
    # select distinct(tName) shows 302 rows

    mkdir -p /san/sanvol1/scratch/cavPor3/multiz3way.full/
    cd /san/sanvol1/scratch/cavPor3/multiz3way.full
    mafSplit /dev/null -byTarget mm9/ /cluster/data/cavPor3/bed/blastz.mm9//cavPor3.mm9.net.maf.gz
    cd mm9
    # rename to reflect chrom name
    foreach f (*.maf)
        set c = `head -3 $f | grep cavPor3 | awk '{print $2}' | sed 's/cavPor3.//'`
        echo $c
        mv $f $c.maf
    end

    # copy and link various files from first run
    ln -s ../multiz3way/{speTri0,penn} .
    cp ../multiz3way/{species.lst,tree.nh} .

    mkdir run maf
    cp ../multiz3way/run/chrom.lst run
    sed 's/multiz3way/multiz3way.full/' ../multiz3way/run/autoMultiz > run/autoMultiz
    sed 's/multiz3way/multiz3way.full/' ../multiz3way/run/template > run/template
    # check it
    chmod +x run/autoMultiz

    mkdir -p /cluster/data/cavPor3/bed/multiz3way.full/maf

    ssh pk
    cd /san/sanvol1/scratch/cavPor3/multiz3way.full/run
    gensub2 chrom.lst single template jobList
    para create jobList
    para try
    para check
    # ~2 hours of run-time

    ssh kkstore05
    cd /cluster/data/cavPor3/bed/multiz3way.full/maf
    mkdir empty
    csh ../../multiz3way/maf/finish.csh >&! finish.log &

    # post to hgwdev downloads area
    ssh hgwdev
    cd /cluster/data/cavPor3/bed/multiz3way
    mv *.gz /usr/local/apache/htdocs/goldenPath/cavPor3/multizRodent3way/syntenic
    cd /cluster/data/cavPor3/bed/multiz3way.full
    mv *.gz /usr/local/apache/htdocs/goldenPath/cavPor3/multizRodent3way/full


################################################
# AUTOMATE UPSTREAM FILE CREATION (2008-10-15 markd)
update genbank.conf:
cavPor3.upstreamGeneTbl = xenoRefGene
cavPor3.upstreamMaf = multiz8way /hive/data/genomes/cavPor3/bed/multiz8way/species.list

############################################################################
#  cavPor3 - Guinea Pig - Ensembl Genes version 51  (DONE - 2008-12-02 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/cavPor3
    cat << '_EOF_' > cavPor3.ensGene.ra
# required db variable
db cavPor3
# do we need to translate geneScaffold coordinates
# geneScaffolds yes
nameTranslation "s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 cavPor3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cavPor3/bed/ensGene.51
    featureBits cavPor3 ensGene
    # 30699872 bases of 2663369733 (1.153%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/cavPor3/bed/ensGene.51

############################################################################

############################################################################
# TRANSMAP vertebrate.2009-07-01 build  (2009-07-21 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-07-01

see doc/builds.txt for specific details.
############################################################################
############################################################################
# TRANSMAP vertebrate.2009-09-13 build  (2009-09-20 markd)

vertebrate-wide transMap alignments were built  Tracks are created and loaded
by a single Makefile. This is available from:
   svn+ssh://hgwdev.cse.ucsc.edu/projects/compbio/usr/markd/svn/projs/transMap/tags/vertebrate.2009-09-13

see doc/builds.txt for specific details.

############################################################################
# lastz Rabbit oryCun2 swap (DONE - 2010-01-22 - Hiram)
    #	original alignment
    cd /hive/data/genomes/oryCun2/bed/lastzCavPor3.2010-01-21
    cat fb.oryCun2.chainCavPor3Link.txt 
    #	964546600 bases of 2604023284 (37.041%) in intersection

    #	and for the swap
    mkdir /hive/data/genomes/cavPor3/bed/blastz.oryCun2.swap
    cd /hive/data/genomes/cavPor3/bed/blastz.oryCun2.swap
    time doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/oryCun2/bed/lastzCavPor3.2010-01-21/DEF \
	-swap -noLoadChainSplit -workhorse=hgwdev -bigClusterHub=swarm \
	-chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #	real    186m1.649s
    cat fb.cavPor3.chainOryCun2Link.txt 
    #	1003499831 bases of 2663369733 (37.678%) in intersection

############################################################################
# enable native RefSeq Genes track  (2010-06-15)

   # in etc/genbank.conf:
      cavPor3.refseq.mrna.native.load = yes

    ssh genbank
    cd /cluster/data/genbank
   ./bin/gbAlignStep -orgCat=native -initial -srcDb=refseq cavPor3
    ssh hgwdev
    cd /cluster/data/genbank
   ./bin/gbDbLoadStep -byPassGbLoaded -reload -srcDb=refseq cavPor3

############################################################################
# liftOver for hg19 (DONE - 2011-07-14 - Hiram)
    cd /hive/data/genomes/cavPor3/bed/blastz.hg19.swap/axtChain
    ln -s `pwd`/cavPor3.hg19.over.chain.gz \
	/gbdb/cavPor3/liftOver/cavPor3ToHg19.over.chain.gz
    hgAddLiftOverChain -minMatch=0.1 -multiple \
	-path=/gbdb/cavPor3/liftOver/cavPor3ToHg19.over.chain.gz \
	cavPor3 hg19
     ln -s `pwd`/cavPor3.hg19.over.chain.gz \
	/usr/local/apache/htdocs-hgdownload/goldenPath/cavPor3/liftOver/cavPor3ToHg19.over.chain.gz
    cd /usr/local/apache/htdocs-hgdownload/goldenPath/cavPor3/liftOver
    md5sum cavPor3ToHg19.over.chain.gz >> md5sum.txt

############################################################################
