# for emacs: -*- mode: sh; -*-

#	Creating the assembly for Macropus eugenii
#	Tammar wallaby
#       http://www.hgsc.bcm.tmc.edu/projects/wallaby/
#       
#       This isn't a full browser.  Just a 2x coverage genome
#       for use in comparative genomics.

#########################################################################
# DOWNLOAD SEQUENCE, ASSEMBLE, MAKE DB (2008-12-04 Andy) 
       ssh hgwdev
       mkdir -p /hive/data/genomes/macEug1/baylor/chroms
       cd /hive/data/genomes/macEug1/baylor
       wget --timestamping "ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Meugenii/fasta/Meug20071125/contigs/Meug20071125-contigs*"
       cd chroms/
       wget --timestamping "ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Meugenii/fasta/Meug20071125/linearScaffolds/*"
       cd ../
       zcat Meug20071125-contigs.fa.gz | grep "^>" | \
           sed 's/^>//' | tr '|' '\t' | awk 'BEGIN{OFS="\t"}{print $2, $1;}' > contigs.lst
       tabGrep -replace contigs.lst 6 Meug20071125-contigs.agp.gz > macEug1.agp
       zcat Meug20071125-contigs.fa.gz | sed 's/^>\(Contig[[:digit:]]\+\).*/>\1/' \
           | gzip -c > macEug1.contigs.fa.gz
       cut -f6 macEug1.agp > contigs.agp.lst
       faSomeRecords macEug1.contigs.fa.gz contigs.agp.lst stdout | gzip -c > tmp.fa.gz
       mv tmp.fa.gz macEug1.contigs.fa.gz
       cd ../
       cat << '_EOF_' > macEug1.config.ra
db macEug1
clade mammal
scientificName Macropus eugenii
assemblyDate Nov. 2007
assemblyLabel Baylor Release 1.0
orderKey 278
dbDbSpeciesDir wallaby
mitoAcc none
agpFiles /hive/data/genomes/macEug1/baylor/macEug1.agp
fastaFiles /hive/data/genomes/macEug1/baylor/macEug1.contigs.fa.gz
commonName Wallaby
'_EOF_'
# <<   emacs
       tcsh 
       ~/kent/src/hg/utils/automation/makeGenomeDb.pl -stop db -workhorse=hgwdev

###########################################################################
# WINDOWMASKER (DONE 2008-12-05 Andy)
    ssh hgwdev
    mkdir /hive/data/genomes/macEug1/bed/WindowMasker
    screen -S macEug1_WindowMasker
    ssh kolossus
    cd /hive/data/genomes/macEug1/bed/WindowMasker
    tcsh
    ~/kent/src/hg/utils/automation/doWindowMasker.pl macEug1 -buildDir=`pwd` -workhorse=hgwdev >& wm.log
    rm macEug1.wmsk.2bit
    cd ../../
    ln -s bed/WindowMasker/macEug1.wmsk.sdust.2bit macEug1.wmsk.2bit

#########################################################################
## SIMPLE REPEATS TRF (DONE 2008-11-26 - Andy)
    ssh hgwdev
    screen -S macEug1_trf
    mkdir /hive/data/genomes/macEug1/bed/simpleRepeat
    cd /hive/data/genomes/macEug1/bed/simpleRepeat
    tcsh
    time $HOME/kent/src/hg/utils/automation/doSimpleRepeat.pl -buildDir=`pwd` macEug1 > do.log
# *** All done!
# *** Steps were performed in /hive/data/genomes/macEug1/bed/simpleRepeat
#0.559u 0.401s 6:56:42.34 0.0%   0+0k 0+0io 2pf+0w
    cat fb.simpleRepeat
# 38751844 bases of 2541767339 (1.525%) in intersection
    cd ../../
    twoBitMask macEug1.wmsk.2bit -add bed/simpleRepeat/trfMask.bed macEug1.2bit
    ##  can safely ignore warning about >=13 fields in bed file
    twoBitToFa macEug1.2bit stdout | faSize stdin > macEug1.2bit.faSize.txt
# 
# %31.28 masked total, %36.24 masked real
    ##   link to gbdb
    ln -s `pwd`/macEug1.2bit /gbdb/macEug1

#########################################################################
## Genbank gene run (DONE - 2009-09-25 - Hiram)
    cd /hive/data/genomes/macEug1
    # MAKE 11.OOC FILE FOR BLAT
    blat macEug1.2bit \
	 /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/macEug1.11.ooc \
	-repMatch=1024
    #	Wrote 24249 overused 11-mers to jkStuff/macEug1.11.ooc

    mkdir /hive/data/staging/data/macEug1
    cp -p macEug1.2bit chrom.sizes jkStuff/macEug1.11.ooc \
	/hive/data/staging/data/macEug1

    #	add the following two lines to src/lib/gbGenome.c
    #	static char *macEugNames[] = {"Macropus eugenii", NULL};
    #	{"macEug", macEugNames},

    cvs ci -m "Adding Wallaby Macropus eugenii" src/lib/gbGenome.c
    make install-server

    ssh genbank
    screen  # control this business with a screen since it takes a while
    cd /cluster/data/genbank

    time nice -n +19 bin/gbAlignStep -initial macEug1 &
    #	logFile: var/build/logs/2009.09.25-14:54:03.macEug1.initalign.log
    #	real    2613m8.027s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad macEug1
    #	logFile:  var/dbload/hgwdev/logs/2009.09.28-09:23:52.dbload.log
    #	real    15m28.552s

    # enable daily alignment and update of hgwdev
    cd ~/kent/src/hg/makeDb/genbank
    cvsup
    # add macEug1 to:
        etc/align.dbs
        etc/hgwdev.dbs
    cvs ci -m "Added macEug1 - Macropus eugenii - Wallaby" etc/align.dbs etc/hgwdev.dbs
    make etc-update
    #	done - 2009-09-28 - Hiram
