# for emacs: -*- mode: sh; -*-

# Pika "Ochotona princeps"
#########################################################################
# DOWNLOAD SEQUENCE (DONE braney 2008-07-11)
    ssh kkstore05
    mkdir /cluster/store12/ochPri2
    ln -s /cluster/store12/ochPri2 /cluster/data
    mkdir /cluster/data/ochPri2/broad
    cd /cluster/data/ochPri2/broad

    wget --timestamping \
ftp://ftp.broad.mit.edu/pub/assemblies/mammals/pika/OchPri2.0/assembly.agp \
ftp://ftp.broad.mit.edu/pub/assemblies/mammals/pika/OchPri2.0/assembly.bases.gz \
ftp://ftp.broad.mit.edu/pub/assemblies/mammals/pika/OchPri2.0/assembly.quals.gz 
    md5sum ass* > assembly.md5sum

    qaToQac assembly.quals.gz stdout | qacAgpLift assembly.agp stdin ochPri2.qual.qac

    # wget --timestamping \
ftp://ftp.broad.mit.edu/pub/assemblies/mammals/pika/OchPri2.0/BasicStats.out

#### No BasicStats.out

   cut -f 1 assembly.agp | uniq -c | wc -l 
   # Number of scaffolds: 187558

#########################################################################
# Create .ra file and run makeGenomeDb.pl
    ssh kkstore05
    cd /cluster/data/ochPri2
cat << _EOF_ >ochPri2.config.ra
# Config parameters for makeGenomeDb.pl:
db ochPri2
clade mammal
genomeCladePriority 35
scientificName Ochotona princeps
commonName Pika
assemblyDate Jul. 2008
assemblyLabel Broad Institute ochPri2 
orderKey 236.5
#mitoAcc AJ222767
mitoAcc none
fastaFiles /cluster/data/ochPri2/broad/assembly.bases.gz
agpFiles /cluster/data/ochPri2/broad/assembly.agp
qualFiles /cluster/data/ochPri2/broad/ochPri2.qual.qac
dbDbSpeciesDir pika
_EOF_

# use 'screen' make sure on kkstore05
    makeGenomeDb.pl -verbose=2 ochPri2.config.ra > makeGenomeDb.out 2>&1 &

# 'ctl-a ctl -d' returns to previous shell
cut -f 2 chrom.sizes | ave stdin
# Q1 1249.000000
# median 4932.000000
# Q3 9905.250000
# average 18371.833534
# min 600.000000
# max 1977105.000000
# count 187558
# total 3445784354.000000
# standard deviation 52187.432301


#########################################################################
# REPEATMASKER (DONE braney 2008-07-29)
    ssh kkstore05
    screen # use a screen to manage this job
    mkdir /cluster/data/ochPri2/bed/repeatMasker
    cd /cluster/data/ochPri2/bed/repeatMasker
    doRepeatMasker.pl -buildDir=/cluster/data/ochPri2/bed/repeatMasker \
        ochPri2 > do.log 2>&1 &

    # Note: can run simpleRepeats simultaneously
    #### When done with RM:
    ssh pk
    para time

# Completed: 8024 of 8024 jobs
# CPU time in finished jobs:   20582841s  343047.36m  5717.46h  238.23d  0.653 y
# IO & Wait Time:                172016s    2866.93m    47.78h    1.99d  0.005 y
# Average job time:                2587s      43.11m     0.72h    0.03d
# Longest finished job:            8109s     135.15m     2.25h    0.09d
# Submission to last job:        121861s    2031.02m    33.85h    1.41d

    time nice -n +19 featureBits ochPri2 rmsk > fb.ochPri2.rmsk.txt 2>&1 &
    # 219204536 bases of 1923624051 (11.395%) in intersection

    # RepeatMasker and lib version from do.log:
    #    Jun 13 2008 (open-3-2-5) version of RepeatMasker
    # CC   RELEASE 20080611;  


#########################################################################
# SIMPLE REPEATS TRF (DONE braney 2008-07-29)
    ssh kkstore05
    screen # use a screen to manage this job
    mkdir /cluster/data/ochPri2/bed/simpleRepeat
    cd /cluster/data/ochPri2/bed/simpleRepeat
    # 
    doSimpleRepeat.pl -buildDir=/cluster/data/ochPri2/bed/simpleRepeat \
	ochPri2 > do.log 2>&1 &

    #### When done
    ssh pk
    para time

# Completed: 70 of 70 jobs
# CPU time in finished jobs:      51330s     855.49m    14.26h    0.59d  0.002 y
# IO & Wait Time:                   191s       3.19m     0.05h    0.00d  0.000 y
# Average job time:                 736s      12.27m     0.20h    0.01d
# Longest finished job:            7673s     127.88m     2.13h    0.09d
# Submission to last job:          7905s     131.75m     2.20h    0.09d

    featureBits ochPri2 simpleRepeat
    # 181386495 bases of 1923624051 (9.429%) in intersection

    #	after RM run is done, add this mask:
    cd /cluster/data/ochPri2
    twoBitMask ochPri2.rmsk.2bit -add bed/simpleRepeat/trfMask.bed ochPri2.2bit

    twoBitToFa ochPri2.2bit stdout | faSize stdin

# 3445784354 bases (1522160303 N's 1923624051 real 1703533517 upper 220090534
# lower) in 187558 sequences in 1 files
# Total size: mean 18371.8 sd 52187.6 min 600 (scaffold_187557) max 1977105
# (scaffold_0) median 4932
# N count: mean 8115.7 sd 20418.0
# U count: mean 9082.7 sd 30990.6
# L count: mean 1173.5 sd 4397.6
# %6.39 masked total, %11.44 masked real

    twoBitToFa ochPri2.rmsk.2bit stdout | faSize stdin
# 3445784354 bases (1522160303 N's 1923624051 real 1704790340 upper 218833711
# lower) in 187558 sequences in 1 files
# Total size: mean 18371.8 sd 52187.6 min 600 (scaffold_187557) max 1977105
# (scaffold_0) median 4932
# N count: mean 8115.7 sd 20418.0
# U count: mean 9089.4 sd 31003.0
# L count: mean 1166.8 sd 4384.0
# %6.35 masked total, %11.38 masked real

    # Link to it from /gbdb
    ln -s /cluster/data/ochPri2/ochPri2.2bit /gbdb/ochPri2/ochPri2.2bit

    # mkdir /san/sanvol1/scratch/ochPri2
    cp /cluster/data/ochPri2/ochPri2.2bit /san/sanvol1/scratch/ochPri2
    cp /cluster/data/ochPri2/chrom.sizes /san/sanvol1/scratch/ochPri2

############################################################################
#  ochPri2 - Pika - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kolossus
    cd /hive/data/genomes/ochPri2
    cat << '_EOF_' > ochPri2.ensGene.ra
# required db variable
db ochPri2
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#	names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 10995: ENSOPRT00000002716 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 ochPri2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ochPri2/bed/ensGene.51
    featureBits ochPri2 ensGene
    # 25200422 bases of 1923624051 (1.310%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/ochPri2/bed/ensGene.51

############################################################################
