# for emacs: -*- mode: sh; -*-

#  This file is a record of building the Ensembl gene track for all UCSC
#	genome browsers.  The end of this file has a historical record of
#	Robert's experiments with an automated process.
#
############################################################################
# ensembl 64 update (WORKING - 2011-10-12 - Hiram)

     #### when complete, reset dateReferences:
     hgsql -e \
'update trackVersion set dateReference="jun2011" where version="63";' hgFixed
     hgsql -e \
'update trackVersion set dateReference="current" where version="64";' hgFixed


############################################################################
#  bosTau6 - Cow - Ensembl Genes version 64  (DONE - 2011-10-12 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/bosTau6
    cat << '_EOF_' > bosTau6.ensGene.ra
# required db variable
db bosTau6
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9X][0-9]*\)/chr\1/; s/^MT/chrM/; s/^GJ\([0-9]*\).1/chrUn_GJ\1/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=64 bosTau6.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/bosTau6/bed/ensGene.64
    featureBits bosTau6 ensGene
    # 42264149 bases of 2649682029 (1.595%) in intersection

############################################################################
#  myoLuc2 - Microbat - Ensembl Genes version 64  (DONE - 2011-10-12 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/myoLuc2
    cat << '_EOF_' > myoLuc2.ensGene.ra
# required db variable
db myoLuc2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=64 myoLuc2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/myoLuc2/bed/ensGene.64
    featureBits myoLuc2 ensGene
    # 32782563 bases of 1966419868 (1.667%) in intersection

############################################################################
#  gorGor3 - Gorilla - Ensembl Genes version 64  (DONE - 2011-10-12 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/gorGor3
    cat << '_EOF_' > gorGor3.ensGene.ra
# required db variable
db gorGor3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# delete commands take out genes that are only in patch sequence
nameTranslation '/^cutchr/d; /^unplaced/d; s/^\([0-9X][0-9ab]*\)/chr\1/; s/^MT/chrM/;'
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=64 gorGor3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/gorGor3/bed/ensGene.64
    featureBits gorGor3 ensGene
    # 50017329 bases of 3026913193 (1.652%) in intersection

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 64  (DONE - 2011-10-12 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=64 gasAcu1.ensGene.ra

    # requires extra attention after the all database for loop attempt
    cd /hive/data/genomes/gasAcu1/bed/ensGene.64/process
    mv gasAcu1.allGenes.gp.gz gasAcu1.allGenes.gp.beforeLift.gz
    zcat gasAcu1.allGenes.gp.beforeLift.gz \
	| liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
	    ../../../jkStuff/contigsToScaffolds.lft carry stdin
    liftUp -extGenePred gasAcu1.allGenes.gp \
	../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
	    gasAcu1.scaffolds.gp
    gzip gasAcu1.scaffolds.gp
    gzip gasAcu1.allGenes.gp
    #	verify OK
    genePredCheck -db=gasAcu1 gasAcu1.allGenes.gp.gz
    #	checked: 29245 failed: 0

    #	then continue with the load
    cd /hive/data/genomes/gasAcu1
    doEnsGeneUpdate.pl -continue=load -ensVersion=64 gasAcu1.ensGene.ra \
	> ens.64.load 2>&1

    featureBits gasAcu1 ensGene
    # 36792090 bases of 446627861 (8.238%) in intersection

############################################################################
############################################################################
############################################################################
# ensembl 63 update (DONE - 2011-08-15 - Hiram)
grep "_63" EnsGeneAutomate.pm  | grep "=>" | awk '{print $1}' \
	| sed -e "s/'//g" | sort

     #### when complete, reset dateReferences:
     hgsql -e \
'update trackVersion set dateReference="apr2011" where version="62";' hgFixed
     hgsql -e \
'update trackVersion set dateReference="current" where version="63";' hgFixed

############################################################################
#  canFam2 - Dog - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    # failed the peptides to gene coverage check:
# ERROR: percent coverage of peptides to genes: 93
# ERROR: should be greater than 95
    # they have non-coding pseudogenes in there now
    # finished this off manually:
    cd /hive/data/genomes/canFam2/bed/ensGene.63

    hgsql -e 'INSERT INTO trackVersion \
    (db, name, who, version, updateTime, comment, source, dateReference) \
    VALUES("canFam2", "ensGene", "hiram", "63", now(), \
        "with peptides Canis_familiaris.BROADD2.63.pep.all.fa.gz", \
        "ftp://ftp.ensembl.org/pub/release-63/gtf/canis_familiaris/Canis_familiaris.BROADD2.63.gtf.gz", \
        "jun2011" );' hgFixed

    featureBits canFam2 ensGene
    # 32393282 bases of 2384996543 (1.358%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/canFam2/bed/ensGene.63


############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 gasAcu1.ensGene.ra

    # requires extra attention after the all database for loop attempt
    cd /hive/data/genomes/gasAcu1/bed/ensGene.63/process
    mv gasAcu1.allGenes.gp.gz gasAcu1.allGenes.gp.beforeLift.gz
    zcat gasAcu1.allGenes.gp.beforeLift.gz \
	| liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
	    ../../../jkStuff/contigsToScaffolds.lft carry stdin
    liftUp -extGenePred gasAcu1.allGenes.gp \
	../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
	    gasAcu1.scaffolds.gp
    gzip gasAcu1.scaffolds.gp
    gzip gasAcu1.allGenes.gp
    #	verify OK
    genePredCheck -db=gasAcu1 gasAcu1.allGenes.gp.gz
    #	checked: 29245 failed: 0

    #	then continue with the load
    cd /hive/data/genomes/gasAcu1
    doEnsGeneUpdate.pl -continue=load -ensVersion=63 gasAcu1.ensGene.ra \
	> ens.63.load 2>&1

    featureBits gasAcu1 ensGene
    # 36792090 bases of 446627861 (8.238%) in intersection

############################################################################
# bosTau4 was broken - finished manually (DONE - 2011-08-15 - Hiram)
    ssh hgwdev
    cd /hive/data/genomes/bosTau4
    cat << '_EOF_' > bosTau4.ensGene.ra
# required db variable
db bosTau4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 bosTau4.ensGene.ra
    #	broken during processing, fix doProcess.csh to eliminate AAFC03011182
    ssh hgwdev
    cd /hive/data/genomes/bosTau4/bed/ensGene.63/process
    mv allGenes.gtf.gz allGenes.gtf.gz.0
    zcat ../download/Bos_taurus.Btau_4.0.63.gtf.gz \
        | sed -e "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/" \
        | grep -v AAFC03011182 | gzip > allGenes.gtf.gz
    gtfToGenePred -infoOut=infoOut.txt -genePredExt allGenes.gtf.gz stdout \
	| gzip > bosTau4.allGenes.gp.gz
    /cluster/bin/scripts/extractGtf.pl infoOut.txt > ensGtp.tab
    genePredCheck -db=bosTau4 bosTau4.allGenes.gp.gz
    #	checked: 31598 failed: 0
    cd /hive/data/genomes/bosTau4
    doEnsGeneUpdate.pl  -ensVersion=63 -continue=load bosTau4.ensGene.ra \
	> ens.63.load  2>&1
    featureBits bosTau4 ensGene
    #  42306082 bases of 2731830700 (1.549%) in intersection

############################################################################
#  ailMel1 - Panda - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ailMel1
    cat << '_EOF_' > ailMel1.ensGene.ra
# required db variable
db ailMel1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
# knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# changing names for the odd bits in Ensembl 57
# liftUp /hive/data/genomes/hg19/jkStuff/ens.57.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 ailMel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ailMel1/bed/ensGene.63
    featureBits ailMel1 ensGene
    # 31990632 bases of 2245312831 (1.425%) in intersection

############################################################################
#  anoCar2 - Lizard - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/anoCar2
    cat << '_EOF_' > anoCar2.ensGene.ra
# required db variable
db anoCar2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation 's/^\([0-9L]\)/chr\1/; s/^GL\([0-9][0-9]*\).1/chrUn_GL\1/; s/^A/chrUn_A/'
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 anoCar2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/anoCar2/bed/ensGene.63
    featureBits anoCar2 ensGene
    # 27905138 bases of 1701353770 (1.640%) in intersection

############################################################################
#  calJac3 - Marmoset - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/calJac3
    cat << '_EOF_' > calJac3.ensGene.ra
# required db variable
db calJac3
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/;
# s/^GL\([0-9][0-9]*\).1/chrUn_GL\1/;"
# name translation in Ensembl v62
liftUp /hive/data/genomes/calJac3/jkStuff/ens.62.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 calJac3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/calJac3/bed/ensGene.63
    featureBits calJac3 ensGene
    # 51944550 bases of 2752505800 (1.887%) in intersection

############################################################################
#  cavPor3 - Guinea pig - Ensembl Genes version 63  (DONE - 2011-08-15 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cavPor3
    cat << '_EOF_' > cavPor3.ensGene.ra
# required db variable
db cavPor3
# do we need to translate geneScaffold coordinates
# geneScaffolds yes
nameTranslation "s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 cavPor3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cavPor3/bed/ensGene.63
    featureBits cavPor3 ensGene
    # 30971317 bases of 2663369733 (1.163%) in intersection

############################################################################
#  ce10 - C. elegans - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ce10
    cat << '_EOF_' > ce10.ensGene.ra
# required db variable
db ce10
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([IVX]\)/chr\1/; s/^MtDNA/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 ce10.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ce10/bed/ensGene.63
    featureBits ce10 ensGene
    # 31167360 bases of 100286070 (31.078%) in intersection

############################################################################
#  choHof1 - Sloth - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/choHof1
    cat << '_EOF_' > choHof1.ensGene.ra
# required db variable
db choHof1
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 choHof1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/choHof1/bed/ensGene.63
    featureBits choHof1 ensGene
    # 18278941 bases of 2060419685 (0.887%) in intersection

############################################################################
#  ci2 - C. intestinalis - Ensembl Genes version 63  (DONE - 2011-08-15 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 ci2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ci2/bed/ensGene.63
    featureBits ci2 ensGene
    # 20114967 bases of 141233565 (14.242%) in intersection

############################################################################
#  cioSav2 - C. savignyi - Ensembl Genes version 63  (DONE - 2011-08-15 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cioSav2
    cat << '_EOF_' > cioSav2.ensGene.ra
# required db variable
db cioSav2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 cioSav2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cioSav2/bed/ensGene.63
    featureBits cioSav2 ensGene
    # 16572478 bases of 173749524 (9.538%) in intersection

############################################################################
#  danRer7 - Zebrafish - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/danRer7
    cat << '_EOF_' > danRer7.ensGene.ra
# required db variable
db danRer7
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 danRer7.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/danRer7/bed/ensGene.63
    featureBits danRer7 ensGene
    # 63141009 bases of 1409770109 (4.479%) in intersection

############################################################################
#  dasNov2 - Armadillo - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/dasNov2
    cat << '_EOF_' > dasNov2.ensGene.ra
# required db variable
db dasNov2
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
skipInvalid yes
#       31903: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 dasNov2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dasNov2/bed/ensGene.63
    featureBits dasNov2 ensGene
    # 21968539 bases of 2371493872 (0.926%) in intersection

############################################################################
#  dipOrd1 - Kangaroo rat - Ensembl Genes version 63  (DONE - 2011-08-15 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1
    cat << '_EOF_' > dipOrd1.ensGene.ra
# required db variable
db dipOrd1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 dipOrd1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1/bed/ensGene.63
    featureBits dipOrd1 ensGene
    # 25324919 bases of 1844961421 (1.373%) in intersection

############################################################################
#  dm3 - D. melanogaster - Ensembl Genes version 63  (DONE - 2011-08-15 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/dm3
    cat << '_EOF_' > dm3.ensGene.ra
# required db variable
db dm3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XYU][0-9]*\)/chr\1/; s/^dmel_mitochondrion_genome/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
# knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 dm3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dm3/bed/ensGene.63
    featureBits dm3 ensGene
    # 30276468 bases of 162367812 (18.647%) in intersection

############################################################################
#  echTel1 - Tenrec - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/echTel1
    cat << '_EOF_' > echTel1.ensGene.ra
# required db variable
db echTel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has invalid structures from Ensembl:
# 47425: ENSETET00000018714 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 echTel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/echTel1/bed/ensGene.63
    featureBits echTel1 ensGene
    # 25771306 bases of 2111581369 (1.220%) in intersection

############################################################################
#  equCab2 - Horse - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/equCab2
    cat << '_EOF_' > equCab2.ensGene.ra
# required db variable
db equCab2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
#       translate Ensembl chrUnNNNN names to chrUn coordinates
liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 equCab2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/equCab2/bed/ensGene.63
    featureBits equCab2 ensGene
    # 39563318 bases of 2428790173 (1.629%) in intersection

############################################################################
#  eriEur1 - Hedgehog - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/eriEur1
    cat << '_EOF_' > eriEur1.ensGene.ra
# required db variable
db eriEur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 eriEur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/eriEur1/bed/ensGene.63
    featureBits eriEur1 ensGene
    # 22556849 bases of 2133134836 (1.057%) in intersection

############################################################################
#  felCat3 - Cat - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/felCat3
    cat << '_EOF_' > felCat3.ensGene.ra
# required db variable
db felCat3
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 5705: ENSFCAT00000006929 no exonFrame on CDS exon 16
# 28228: ENSFCAT00000009384 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 felCat3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/felCat3/bed/ensGene.63
    featureBits felCat3 ensGene
    # 22300874 bases of 1642698377 (1.358%) in intersection

############################################################################
#  fr2 - Fugu - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
nameTranslation "s/^MT/chrM/;"
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 fr2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/fr2/bed/ensGene.63
    featureBits fr2 ensGene
    # 34568537 bases of 393312790 (8.789%) in intersection

############################################################################
#  galGal3 - Chicken - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 galGal3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/galGal3/bed/ensGene.63
    featureBits galGal3 ensGene
    # 30741650 bases of 1042591351 (2.949%) in intersection

############################################################################
#  hg19 - Human - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/hg19
    cat << '_EOF_' > hg19.ensGene.ra
# required db variable
db hg19
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# delete commands take out genes that are only in patch sequence
nameTranslation 's/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; /^GL3.*/d; /^HSCHR[1-5]/d; /^HSCHR[7-9]/d; /^HG/d'
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# Ensembl 62 has new sequence names for some of the random bits
liftUp /hive/data/genomes/hg19/jkStuff/ens.62.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 hg19.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/hg19/bed/ensGene.63
    featureBits hg19 ensGene
    # 111073277 bases of 2897316137 (3.834%) in intersection

############################################################################
#  loxAfr3 - Elephant - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3
    cat << '_EOF_' > loxAfr3.ensGene.ra
# required db variable
db loxAfr3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 loxAfr3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3/bed/ensGene.63
    featureBits loxAfr3 ensGene
    # 32151456 bases of 3118565340 (1.031%) in intersection

############################################################################
#  macEug1 - Wallaby - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/macEug1
    cat << '_EOF_' > macEug1.ensGene.ra
# required db variable
db macEug1
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 macEug1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/macEug1/bed/ensGene.63
    featureBits macEug1 ensGene
    # 23393650 bases of 2541767339 (0.920%) in intersection

############################################################################
#  melGal1 - Turkey - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/melGal1
    cat << '_EOF_' > melGal1.ensGene.ra
# required db variable
db melGal1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to protect in perl:
nameTranslation '/^718000.*/d; s/^\([0-9WZ][0-9]*\)/chr\1/; s/^GL\([0-9][0-9]*\).1/chrUn_GL\1/;'
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 melGal1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/melGal1/bed/ensGene.63
    featureBits melGal1 ensGene
    # 25080242 bases of 935922386 (2.680%) in intersection

############################################################################
#  micMur1 - Mouse lemur - Ensembl Genes version 63  (DONE - 2011-08-15 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/micMur1
    cat << '_EOF_' > micMur1.ensGene.ra
# required db variable
db micMur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 micMur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/micMur1/bed/ensGene.63
    featureBits micMur1 ensGene
    # 25688755 bases of 1852394361 (1.387%) in intersection

############################################################################
#  mm9 - Mouse - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 mm9.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/mm9/bed/ensGene.63
    featureBits mm9 ensGene
    # 86577373 bases of 2620346127 (3.304%) in intersection

############################################################################
#  monDom5 - Opossum - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/monDom5
    cat << '_EOF_' > monDom5.ensGene.ra
# required db variable
db monDom5
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 monDom5.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/monDom5/bed/ensGene.63
    featureBits monDom5 ensGene
    # 32982595 bases of 3501660299 (0.942%) in intersection

############################################################################
#  nomLeu1 - Gibbon - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/nomLeu1
    cat << '_EOF_' > nomLeu1.ensGene.ra
# required db variable
db nomLeu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With single quotes to protect
#       everything in perl
nameTranslation 's/^GL\([0-9][0-9]*\).1/GL\1/; s/^ADFV\([0-9][0-9]*\).1/ADFV\1/;'
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 nomLeu1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/nomLeu1/bed/ensGene.63
    featureBits nomLeu1 ensGene
    # 45293349 bases of 2756591777 (1.643%) in intersection

############################################################################
#  ochPri2 - Pika - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ochPri2
    cat << '_EOF_' > ochPri2.ensGene.ra
# required db variable
db ochPri2
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 13270: ENSOPRT00000002716 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 ochPri2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ochPri2/bed/ensGene.63
    featureBits ochPri2 ensGene
    # 25447459 bases of 1923624051 (1.323%) in intersection

############################################################################
#  ornAna1 - Platypus - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ornAna1
    cat << '_EOF_' > ornAna1.ensGene.ra
# required db variable
db ornAna1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly, 824 items, 31,254 are OK
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 ornAna1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ornAna1/bed/ensGene.63
    featureBits ornAna1 ensGene
    # 24466297 bases of 1842236818 (1.328%) in intersection

############################################################################
#  oryCun2 - Rabbit - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryCun2
    cat << '_EOF_' > oryCun2.ensGene.ra
# required db variable
db oryCun2
# the lift file will change the chrom names, no nameTranslation needed
# nameTranslation "s/^/chr/;"
# ensembl v62 has new naming scheme based on NCBI release:
liftUp /hive/data/genomes/oryCun2/jkStuff/ens.62.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 oryCun2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryCun2/bed/ensGene.63
    featureBits oryCun2 ensGene
    # 31797207 bases of 2604023284 (1.221%) in intersection

############################################################################
#  oryLat2 - Medaka - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryLat2
    cat << '_EOF_' > oryLat2.ensGene.ra
# required db variable
db oryLat2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 2,687 genes that haven't lifted properly yet
# skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 oryLat2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryLat2/bed/ensGene.63
    featureBits oryLat2 ensGene
    # 32313511 bases of 700386597 (4.614%) in intersection

############################################################################
#  otoGar1 - Bushbaby - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/otoGar1
    cat << '_EOF_' > otoGar1.ensGene.ra
# required db variable
db otoGar1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 otoGar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/otoGar1/bed/ensGene.63
    featureBits otoGar1 ensGene
    # 23692789 bases of 1969052059 (1.203%) in intersection

############################################################################
#  panTro2 - Chimp - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 panTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/panTro2/bed/ensGene.63
    featureBits panTro2 ensGene
    # 50004270 bases of 2909485072 (1.719%) in intersection

############################################################################
#  ponAbe2 - Orangutan - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2
    cat << '_EOF_' > ponAbe2.ensGene.ra
# required db variable
db ponAbe2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 ponAbe2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2/bed/ensGene.63
    featureBits ponAbe2 ensGene
    # 38120849 bases of 3093572278 (1.232%) in intersection

############################################################################
#  proCap1 - Rock hyrax - Ensembl Genes version 63  (DONE - 2011-08-15 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/proCap1
    cat << '_EOF_' > proCap1.ensGene.ra
# required db variable
db proCap1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 proCap1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/proCap1/bed/ensGene.63
    featureBits proCap1 ensGene
    # 25344792 bases of 2407847681 (1.053%) in intersection

############################################################################
#  pteVam1 - Megabat - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/pteVam1
    cat << '_EOF_' > pteVam1.ensGene.ra
# required db variable
db pteVam1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has invalid structures from Ensembl:
#       13027: ENSPVAT00000010661 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 pteVam1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/pteVam1/bed/ensGene.63
    featureBits pteVam1 ensGene
    # 28967283 bases of 1839436660 (1.575%) in intersection

############################################################################
#  rheMac2 - Rhesus - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 rheMac2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rheMac2/bed/ensGene.63
    featureBits rheMac2 ensGene
    # 44562701 bases of 2646704109 (1.684%) in intersection

############################################################################
#  rn4 - Rat - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 rn4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rn4/bed/ensGene.63
    featureBits rn4 ensGene
    # 46705616 bases of 2571531505 (1.816%) in intersection

############################################################################
#  sorAra1 - Shrew - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/sorAra1
    cat << '_EOF_' > sorAra1.ensGene.ra
# required db variable
db sorAra1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 sorAra1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sorAra1/bed/ensGene.63
    featureBits sorAra1 ensGene
    # 19690470 bases of 1832864697 (1.074%) in intersection

############################################################################
#  speTri1 - Squirrel - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/speTri1
    cat << '_EOF_' > speTri1.ensGene.ra
# required db variable
db speTri1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 speTri1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/speTri1/bed/ensGene.63
    featureBits speTri1 ensGene
    # 21595750 bases of 1913367893 (1.129%) in intersection

############################################################################
#  susScr1 - Pig - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/susScr1
    cat << '_EOF_' > susScr1.ensGene.ra
# required db variable
db susScr1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 susScr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/susScr1/bed/ensGene.63
    featureBits susScr1 ensGene
    # 28758401 bases of 2231332019 (1.289%) in intersection

############################################################################
#  susScr2 - Pig - lifted susScr1 v63 genes to susScr2 (DONE - 2011-08-17 - Hiram)
    mkdir /hive/data/genomes/susScr2/bed/ensGene.63
    cd /hive/data/genomes/susScr2/bed/ensGene.63
    ln -s ../../../susScr1/bed/ensGene.63/process/susScr1.allGenes.gp.gz .
    zcat susScr1.allGenes.gp.gz > susScr1.allGenes.genePred
    ln -s ../../../susScr1/bed/liftOver/susScr1ToSusScr2.over.chain.gz
    zcat susScr1ToSusScr2.over.chain.gz > susScr1ToSusScr2.over.chain
    liftOver -genePred susScr1.allGenes.genePred \
        susScr1ToSusScr2.over.chain \
        susScr2.allGenes.gp susScr1.liftOver.unMapped.txt
    gzip susScr2.allGenes.gp
    genePredCheck -db=susScr2 susScr2.allGenes.gp.gz
    #	checked: 22017 failed: 0

    hgLoadGenePred  -genePredExt susScr2 \
	ensGene susScr2.allGenes.gp.gz > loadGenePred.errors.txt 2>&1

    zcat \
../../../susScr1/bed/ensGene.63/download/Sus_scrofa.Sscrofa9.63.pep.all.fa.gz \
        | sed -e 's/^>.* transcript:/>/; s/ CCDS.*$//;' \
	| gzip > ensPep.txt.gz
    zcat ensPep.txt.gz \
	| ~/kent/src/utils/faToTab/faToTab.pl /dev/null /dev/stdin \
	     | sed -e '/^$/d; s/*$//' | sort > ensPep.susScr2.fa.tab
    hgPepPred susScr2 tab ensPep ensPep.susScr2.fa.tab
    ln -s ../../../susScr1/bed/ensGene.63/process/ensGtp.tab .
    hgLoadSqlTab susScr2 ensGtp ~/kent/src/hg/lib/ensGtp.sql ensGtp.tab
    ln -s ../../../susScr1/bed/ensGene.63/process/ensembl*.* .
    hgLoadSqlTab susScr2 ensemblSource ensemblSource.sql ensemblSource.tab
    hgLoadSqlTab susScr2 ensemblToGeneName ensemblToGeneName.sql \
	ensemblToGeneName.tab

    hgsql -e 'INSERT INTO trackVersion \
	(db, name, who, version, updateTime, comment, source, dateReference) \
	VALUES("susScr2", "ensGene", "hiram", "63", now(), \
        "dentical to previous version 60", \
        "lifted susScr1 to susScr2 ftp://ftp.ensembl.org/pub/release-63/gtf/sus_scrofa/Sus_scrofa.Sscrofa9.63.gtf.gz", \
        "current" );' hgFixed

    featureBits susScr2 ensGene
    #	28702434 bases of 2231298548 (1.286%) in intersection

############################################################################
#  taeGut1 - Zebra finch - Ensembl Genes version 63  (DONE - 2011-08-15 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/taeGut1
    cat << '_EOF_' > taeGut1.ensGene.ra
# required db variable
db taeGut1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9LXYZ][0-9ABG]*\)/chr\1/; s/^Un/chrUn/"
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
# geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
# skipInvalid yes
#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 taeGut1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/taeGut1/bed/ensGene.63
    featureBits taeGut1 ensGene
    # 25441417 bases of 1222864691 (2.080%) in intersection

############################################################################
#  tarSyr1 - Tarsier - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1
    cat << '_EOF_' > tarSyr1.ensGene.ra
# required db variable
db tarSyr1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 tarSyr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1/bed/ensGene.63
    featureBits tarSyr1 ensGene
    # 21327630 bases of 2768536343 (0.770%) in intersection

############################################################################
#  tetNig2 - Tetraodon - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/tetNig2
    cat << '_EOF_' > tetNig2.ensGene.ra
# required db variable
db tetNig2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 tetNig2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tetNig2/bed/ensGene.63
    featureBits tetNig2 ensGene
    # 31642974 bases of 302314788 (10.467%) in intersection

############################################################################
#  tupBel1 - Tree shrew - Ensembl Genes version 63  (DONE - 2011-08-15 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/tupBel1
    cat << '_EOF_' > tupBel1.ensGene.ra
# required db variable
db tupBel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 tupBel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tupBel1/bed/ensGene.63
    featureBits tupBel1 ensGene
    # 22849040 bases of 2137225476 (1.069%) in intersection

############################################################################
#  turTru1 - Dolphin - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/turTru1
    cat << '_EOF_' > turTru1.ensGene.ra
# required db variable
db turTru1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 turTru1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/turTru1/bed/ensGene.63
    featureBits turTru1 ensGene
    # 28614121 bases of 2298444090 (1.245%) in intersection

############################################################################
#  vicPac1 - Alpaca - Ensembl Genes version 63  (DONE - 2011-08-15 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/vicPac1
    cat << '_EOF_' > vicPac1.ensGene.ra
# required db variable
db vicPac1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that does not translate properly to UCSC coordinates
#       5017: ENSVPAT00000009076 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=63 vicPac1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/vicPac1/bed/ensGene.63
    featureBits vicPac1 ensGene
    # 17891814 bases of 1922910435 (0.930%) in intersection

############################################################################
############################################################################
# ensembl 62 update (WORKING - 2011-04-19 - Hiram)
# to construct a list of genomes to run:
grep "_62" EnsGeneAutomate.pm  | grep "=>" | awk '{print $1}' \
	| sed -e "s/'//g" | sort

     #### when complete, reset dateReferences:
     hgsql -e \
'update trackVersion set dateReference="feb2011" where version="61";' hgFixed
     hgsql -e \
'update trackVersion set dateReference="current" where version="62";' hgFixed

############################################################################
#  hg19 - Human - Ensembl Genes version 62  (DONE - 2011-04-22 - hiram)
    # This human gene set need a lot of work to get the name translation
    #	to work again.  The contig names have changed in Ensembl for this
    #	version and they defined genes on patch sequence that UCSC does not
    #	include
    ssh hgwdev
    cd /hive/data/genomes/hg19
    cat << '_EOF_' > hg19.ensGene.ra
# required db variable
db hg19
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# delete commands take out genes that are only in patch sequence
nameTranslation 's/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; /^GL3.*/d; /^HSCHR[1-5]/d; /^HSCHR[7-9]/d; /^HG/d'
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# Ensembl 62 has new sequence names for some of the random bits
liftUp /hive/data/genomes/hg19/jkStuff/ens.62.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 hg19.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/hg19/bed/ensGene.62
    featureBits hg19 ensGene
    # 109947258 bases of 2897316137 (3.795%) in intersection

    hgsql -e \
'update trackVersion set dateReference="current" where db="hg19" AND version=62;' hgFixed

############################################################################
#  susScr2 - Pig - lifted susScr1 v62 genes to susScr2 (DONE - 2011-04-21 - Hiram)
    mkdir /hive/data/genomes/susScr2/bed/ensGene.62
    cd /hive/data/genomes/susScr2/bed/ensGene.62
    ln -s ../../../susScr1/bed/ensGene.62/process/susScr1.allGenes.gp.gz .
    zcat susScr1.allGenes.gp.gz > susScr1.allGenes.genePred
    ln -s ../../../susScr1/bed/liftOver/susScr1ToSusScr2.over.chain.gz
    zcat susScr1ToSusScr2.over.chain.gz > susScr1ToSusScr2.over.chain
    liftOver -genePred susScr1.allGenes.genePred \
        susScr1ToSusScr2.over.chain \
        susScr2.allGenes.gp susScr1.liftOver.unMapped.txt
    gzip susScr2.allGenes.gp
    genePredCheck -db=susScr2 susScr2.allGenes.gp.gz
    #	checked: 22017 failed: 0

    hgLoadGenePred  -genePredExt susScr2 \
	ensGene susScr2.allGenes.gp.gz > loadGenePred.errors.txt 2>&1

    zcat \
../../../susScr1/bed/ensGene.62/download/Sus_scrofa.Sscrofa9.62.pep.all.fa.gz \
        | sed -e 's/^>.* transcript:/>/; s/ CCDS.*$//;' \
	| gzip > ensPep.txt.gz
    zcat ensPep.txt.gz \
	| ~/kent/src/utils/faToTab/faToTab.pl /dev/null /dev/stdin \
	     | sed -e '/^$/d; s/*$//' | sort > ensPep.susScr2.fa.tab
    hgPepPred susScr2 tab ensPep ensPep.susScr2.fa.tab
    ln -s ../../../susScr1/bed/ensGene.62/process/ensGtp.tab .
    hgLoadSqlTab susScr2 ensGtp ~/kent/src/hg/lib/ensGtp.sql ensGtp.tab
    hgsql -e 'INSERT INTO trackVersion \
	(db, name, who, version, updateTime, comment, source, dateReference) \
	VALUES("susScr2", "ensGene", "hiram", "62", now(), \
        "dentical to previous version 60", \
        "lifted susScr1 to susScr2 ftp://ftp.ensembl.org/pub/release-62/gtf/sus_scrofa/Sus_scrofa.Sscrofa9.62.gtf.gz", \
        "current" );' hgFixed

    featureBits susScr2 ensGene
    #	28702434 bases of 2231298548 (1.286%) in intersection

#########################################################################
#  anoCar2 - Lizard - Ensembl Genes version 62 (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/anoCar2
    cat << '_EOF_' > anoCar2.ensGene.ra
# required db variable
db anoCar2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation 's/^\([0-9L]\)/chr\1/; s/^GL\([0-9][0-9]*\).1/chrUn_GL\1/; s/^A/chrUn_A/'
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 anoCar2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/anoCar2/bed/ensGene.62
    featureBits anoCar2 ensGene
    # 27905138 bases of 1701353770 (1.640%) in intersection

    hgsql -e \
'update trackVersion set dateReference="current" where db="anoCar2" AND version=62;' hgFixed

    hgsql -e \
'update trackVersion set dateReference="feb2011" where db="anoCar2" AND version=61;' hgFixed

############################################################################
#  nomLeu1 - Gibbon - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/nomLeu1
    cat << '_EOF_' > nomLeu1.ensGene.ra
# required db variable
db nomLeu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With single quotes to protect
#       everything in perl
nameTranslation 's/^GL\([0-9][0-9]*\).1/GL\1/; s/^ADFV\([0-9][0-9]*\).1/ADFV\1/;'   
'_EOF_'
#  << happy emacs
    
    doEnsGeneUpdate.pl  -ensVersion=62 nomLeu1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/nomLeu1/bed/ensGene.62
    featureBits nomLeu1 ensGene
    # 45293349 bases of 2756591777 (1.643%) in intersection

    hgsql -e \
'update trackVersion set dateReference="current" where db="nomLeu1";' hgFixed

############################################################################
#  ailMel1 - Panda - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ailMel1
    cat << '_EOF_' > ailMel1.ensGene.ra
# required db variable
db ailMel1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
# knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# changing names for the odd bits in Ensembl 57
# liftUp /hive/data/genomes/hg19/jkStuff/ens.57.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 ailMel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ailMel1/bed/ensGene.62
    featureBits ailMel1 ensGene
    # 31990632 bases of 2245312831 (1.425%) in intersection

############################################################################
#  anoCar2 - Lizard - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/anoCar2
    cat << '_EOF_' > anoCar2.ensGene.ra
# required db variable
db anoCar2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation 's/^\([0-9L]\)/chr\1/; s/^GL\([0-9][0-9]*\).1/chrUn_GL\1/; s/^A/chrUn_A/'
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 anoCar2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/anoCar2/bed/ensGene.62
    featureBits anoCar2 ensGene
    # 27905138 bases of 1701353770 (1.640%) in intersection

############################################################################
#  bosTau4 - Cow - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/bosTau4
    cat << '_EOF_' > bosTau4.ensGene.ra
# required db variable
db bosTau4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 bosTau4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/bosTau4/bed/ensGene.62
    featureBits bosTau4 ensGene
    # 42306082 bases of 2731830700 (1.549%) in intersection

############################################################################
#  calJac3 - Marmoset - Ensembl Genes version 62  (DONE - 2011-04-21 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/calJac3
    cat << '_EOF_' > calJac3.ensGene.ra
# required db variable
db calJac3
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/;
# s/^GL\([0-9][0-9]*\).1/chrUn_GL\1/;"
# name translation in Ensembl v62
liftUp /hive/data/genomes/calJac3/jkStuff/ens.62.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 calJac3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/calJac3/bed/ensGene.62
    featureBits calJac3 ensGene
    # 51944550 bases of 2752505800 (1.887%) in intersection

############################################################################
#  canFam2 - Dog - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/canFam2
    cat << '_EOF_' > canFam2.ensGene.ra
# required db variable
db canFam2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 canFam2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/canFam2/bed/ensGene.62
    featureBits canFam2 ensGene
    # 34693517 bases of 2384996543 (1.455%) in intersection

############################################################################
#  cavPor3 - Guinea pig - Ensembl Genes version 62  (DONE - 2011-04-19 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cavPor3
    cat << '_EOF_' > cavPor3.ensGene.ra
# required db variable
db cavPor3
# do we need to translate geneScaffold coordinates
# geneScaffolds yes
nameTranslation "s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 cavPor3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cavPor3/bed/ensGene.62
    featureBits cavPor3 ensGene
    # 30971317 bases of 2663369733 (1.163%) in intersection

############################################################################
#  choHof1 - Sloth - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/choHof1
    cat << '_EOF_' > choHof1.ensGene.ra
# required db variable
db choHof1
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 choHof1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/choHof1/bed/ensGene.62
    featureBits choHof1 ensGene
    # 18278941 bases of 2060419685 (0.887%) in intersection

############################################################################
#  ci2 - C. intestinalis - Ensembl Genes version 62  (DONE - 2011-04-19 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 ci2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ci2/bed/ensGene.62
    featureBits ci2 ensGene
    # 20114967 bases of 141233565 (14.242%) in intersection

############################################################################
#  cioSav2 - C. savignyi - Ensembl Genes version 62  (DONE - 2011-04-19 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cioSav2
    cat << '_EOF_' > cioSav2.ensGene.ra
# required db variable
db cioSav2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 cioSav2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cioSav2/bed/ensGene.62
    featureBits cioSav2 ensGene
    # 16572478 bases of 173749524 (9.538%) in intersection

############################################################################
#  danRer7 - Zebrafish - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/danRer7
    cat << '_EOF_' > danRer7.ensGene.ra
# required db variable
db danRer7
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 danRer7.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/danRer7/bed/ensGene.62
    featureBits danRer7 ensGene
    # 62432695 bases of 1409770109 (4.429%) in intersection

############################################################################
#  dasNov2 - Armadillo - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/dasNov2
    cat << '_EOF_' > dasNov2.ensGene.ra
# required db variable
db dasNov2
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
skipInvalid yes
#       31903: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 dasNov2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dasNov2/bed/ensGene.62
    featureBits dasNov2 ensGene
    # 21968539 bases of 2371493872 (0.926%) in intersection

############################################################################
#  dipOrd1 - Kangaroo rat - Ensembl Genes version 62  (DONE - 2011-04-19 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1
    cat << '_EOF_' > dipOrd1.ensGene.ra
# required db variable
db dipOrd1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 dipOrd1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1/bed/ensGene.62
    featureBits dipOrd1 ensGene
    # 25324919 bases of 1844961421 (1.373%) in intersection

############################################################################
#  dm3 - D. melanogaster - Ensembl Genes version 62  (DONE - 2011-04-19 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/dm3
    cat << '_EOF_' > dm3.ensGene.ra
# required db variable
db dm3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XYU][0-9]*\)/chr\1/; s/^dmel_mitochondrion_genome/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
# knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 dm3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dm3/bed/ensGene.62
    featureBits dm3 ensGene
    # 30276468 bases of 162367812 (18.647%) in intersection

############################################################################
#  echTel1 - Tenrec - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/echTel1
    cat << '_EOF_' > echTel1.ensGene.ra
# required db variable
db echTel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has invalid structures from Ensembl:
# 47425: ENSETET00000018714 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 echTel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/echTel1/bed/ensGene.62
    featureBits echTel1 ensGene
    # 25771306 bases of 2111581369 (1.220%) in intersection

############################################################################
#  equCab2 - Horse - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/equCab2
    cat << '_EOF_' > equCab2.ensGene.ra
# required db variable
db equCab2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
#       translate Ensembl chrUnNNNN names to chrUn coordinates
liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 equCab2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/equCab2/bed/ensGene.62
    featureBits equCab2 ensGene
    # 39563318 bases of 2428790173 (1.629%) in intersection

############################################################################
#  eriEur1 - Hedgehog - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/eriEur1
    cat << '_EOF_' > eriEur1.ensGene.ra
# required db variable
db eriEur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 eriEur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/eriEur1/bed/ensGene.62
    featureBits eriEur1 ensGene
    # 22556849 bases of 2133134836 (1.057%) in intersection

############################################################################
#  felCat3 - Cat - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/felCat3
    cat << '_EOF_' > felCat3.ensGene.ra
# required db variable
db felCat3
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 5705: ENSFCAT00000006929 no exonFrame on CDS exon 16
# 28228: ENSFCAT00000009384 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 felCat3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/felCat3/bed/ensGene.62
    featureBits felCat3 ensGene
    # 22300874 bases of 1642698377 (1.358%) in intersection

############################################################################
#  fr2 - Fugu - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
nameTranslation "s/^MT/chrM/;"
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 fr2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/fr2/bed/ensGene.62
    featureBits fr2 ensGene
    # 34568537 bases of 393312790 (8.789%) in intersection

############################################################################
#  galGal3 - Chicken - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 galGal3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/galGal3/bed/ensGene.62
    featureBits galGal3 ensGene
    # 30741650 bases of 1042591351 (2.949%) in intersection

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 62  (DONE - 2011-04-21 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 gasAcu1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1/bed/ensGene.62
    featureBits gasAcu1 ensGene
    # 36792090 bases of 446627861 (8.238%) in intersection

############################################################################
#  loxAfr3 - Elephant - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3
    cat << '_EOF_' > loxAfr3.ensGene.ra
# required db variable
db loxAfr3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 loxAfr3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3/bed/ensGene.62
    featureBits loxAfr3 ensGene
    # 32151456 bases of 3118565340 (1.031%) in intersection

############################################################################
#  macEug1 - Wallaby - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/macEug1
    cat << '_EOF_' > macEug1.ensGene.ra
# required db variable
db macEug1
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 macEug1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/macEug1/bed/ensGene.62
    featureBits macEug1 ensGene
    # 23393650 bases of 2541767339 (0.920%) in intersection

############################################################################
#  melGal1 - Turkey - Ensembl Genes version 62  (DONE - 2011-04-21 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/melGal1
    cat << '_EOF_' > melGal1.ensGene.ra
# required db variable
db melGal1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to protect in perl:
nameTranslation '/^718000.*/d; s/^\([0-9WZ][0-9]*\)/chr\1/; s/^GL\([0-9][0-9]*\).1/chrUn_GL\1/;'
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 melGal1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/melGal1/bed/ensGene.62
    featureBits melGal1 ensGene
    # 25080242 bases of 935922386 (2.680%) in intersection

############################################################################
#  micMur1 - Mouse lemur - Ensembl Genes version 62  (DONE - 2011-04-20 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/micMur1
    cat << '_EOF_' > micMur1.ensGene.ra
# required db variable
db micMur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 micMur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/micMur1/bed/ensGene.62
    featureBits micMur1 ensGene
    # 25688755 bases of 1852394361 (1.387%) in intersection

############################################################################
#  mm9 - Mouse - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 mm9.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/mm9/bed/ensGene.62
    featureBits mm9 ensGene
    # 86577373 bases of 2620346127 (3.304%) in intersection

############################################################################
#  monDom5 - Opossum - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/monDom5
    cat << '_EOF_' > monDom5.ensGene.ra
# required db variable
db monDom5
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 monDom5.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/monDom5/bed/ensGene.62
    featureBits monDom5 ensGene
    # 32982595 bases of 3501660299 (0.942%) in intersection

############################################################################
#  myoLuc1 - Microbat - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/myoLuc1
    cat << '_EOF_' > myoLuc1.ensGene.ra
# required db variable
db myoLuc1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 myoLuc1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/myoLuc1/bed/ensGene.62
    featureBits myoLuc1 ensGene
    # 24710174 bases of 1673855868 (1.476%) in intersection

############################################################################
#  nomLeu1 - Gibbon - Ensembl Genes version 62  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/nomLeu1
    cat << '_EOF_' > nomLeu1.ensGene.ra
# required db variable
db nomLeu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With single quotes to protect
#       everything in perl
nameTranslation 's/^GL\([0-9][0-9]*\).1/GL\1/; s/^ADFV\([0-9][0-9]*\).1/ADFV\1/;'
# nameTranslation 's/^\([0-9L]\)/chr\1/; s/^GL\([0-9][0-9]*\).1/chrUn_GL\1/;
# s/^A/chrUn_A/'
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 nomLeu1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/nomLeu1/bed/ensGene.62
    featureBits nomLeu1 ensGene
    # 45293349 bases of 2756591777 (1.643%) in intersection

############################################################################
#  ochPri2 - Pika - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ochPri2
    cat << '_EOF_' > ochPri2.ensGene.ra
# required db variable
db ochPri2
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 13270: ENSOPRT00000002716 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 ochPri2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ochPri2/bed/ensGene.62
    featureBits ochPri2 ensGene
    # 25447459 bases of 1923624051 (1.323%) in intersection

############################################################################
#  ornAna1 - Platypus - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ornAna1
    cat << '_EOF_' > ornAna1.ensGene.ra
# required db variable
db ornAna1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly, 824 items, 31,254 are OK
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 ornAna1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ornAna1/bed/ensGene.62
    featureBits ornAna1 ensGene
    # 24466297 bases of 1842236818 (1.328%) in intersection

############################################################################
#  oryCun2 - Rabbit - Ensembl Genes version 62  (DONE - 2011-04-21 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryCun2
    cat << '_EOF_' > oryCun2.ensGene.ra
# required db variable
db oryCun2
# the lift file will change the chrom names, no nameTranslation needed
# nameTranslation "s/^/chr/;"
# ensembl v62 has new naming scheme based on NCBI release:
liftUp /hive/data/genomes/oryCun2/jkStuff/ens.62.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 oryCun2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryCun2/bed/ensGene.62
    featureBits oryCun2 ensGene
    # 31797207 bases of 2604023284 (1.221%) in intersection

############################################################################
#  oryLat2 - Medaka - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryLat2
    cat << '_EOF_' > oryLat2.ensGene.ra
# required db variable
db oryLat2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 2,687 genes that haven't lifted properly yet
# skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 oryLat2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryLat2/bed/ensGene.62
    featureBits oryLat2 ensGene
    # 32313511 bases of 700386597 (4.614%) in intersection

############################################################################
#  otoGar1 - Bushbaby - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/otoGar1
    cat << '_EOF_' > otoGar1.ensGene.ra
# required db variable
db otoGar1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 otoGar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/otoGar1/bed/ensGene.62
    featureBits otoGar1 ensGene
    # 23692789 bases of 1969052059 (1.203%) in intersection

############################################################################
#  panTro2 - Chimp - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 panTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/panTro2/bed/ensGene.62
    featureBits panTro2 ensGene
    # 50004270 bases of 2909485072 (1.719%) in intersection

############################################################################
#  ponAbe2 - Orangutan - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2
    cat << '_EOF_' > ponAbe2.ensGene.ra
# required db variable
db ponAbe2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 ponAbe2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2/bed/ensGene.62
    featureBits ponAbe2 ensGene
    # 38120849 bases of 3093572278 (1.232%) in intersection

############################################################################
#  proCap1 - Rock hyrax - Ensembl Genes version 62  (DONE - 2011-04-20 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/proCap1
    cat << '_EOF_' > proCap1.ensGene.ra
# required db variable
db proCap1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 proCap1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/proCap1/bed/ensGene.62
    featureBits proCap1 ensGene
    # 25344792 bases of 2407847681 (1.053%) in intersection

############################################################################
#  pteVam1 - Megabat - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/pteVam1
    cat << '_EOF_' > pteVam1.ensGene.ra
# required db variable
db pteVam1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has invalid structures from Ensembl:
#       13027: ENSPVAT00000010661 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 pteVam1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/pteVam1/bed/ensGene.62
    featureBits pteVam1 ensGene
    # 28967283 bases of 1839436660 (1.575%) in intersection

############################################################################
#  rheMac2 - Rhesus - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 rheMac2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rheMac2/bed/ensGene.62
    featureBits rheMac2 ensGene
    # 44562701 bases of 2646704109 (1.684%) in intersection

############################################################################
#  rn4 - Rat - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 rn4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rn4/bed/ensGene.62
    featureBits rn4 ensGene
    # 46705616 bases of 2571531505 (1.816%) in intersection

############################################################################
#  sacCer2 - S. cerevisiae - Ensembl Genes version 62  (DONE - 2011-04-20 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/sacCer2
    cat << '_EOF_' > sacCer2.ensGene.ra
# required db variable
db sacCer2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^VIII/chrVIII/; s/^VII/chrVII/; s/^VI/chrVI/; s/^V/chrV/; s/^XIII/chrXIII/; s/^XII/chrXII/; s/^XIV/chrXIV/; s/^XI/chrXI/; s/^XVI/chrXVI/; s/^XV/chrXV/; s/^X/chrX/; s/^III/chrIII/; s/^IV/chrIV/; s/^II/chrII/; s/^IX/chrIX/; s/^I/chrI/; s/^Mito/chrM/; s/2-micron/2micron/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 sacCer2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sacCer2/bed/ensGene.62
    featureBits sacCer2 ensGene
    # 8917060 bases of 12162995 (73.313%) in intersection

############################################################################
#  sorAra1 - Shrew - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/sorAra1
    cat << '_EOF_' > sorAra1.ensGene.ra
# required db variable
db sorAra1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 sorAra1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sorAra1/bed/ensGene.62
    featureBits sorAra1 ensGene
    # 19690470 bases of 1832864697 (1.074%) in intersection

############################################################################
#  speTri1 - Squirrel - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/speTri1
    cat << '_EOF_' > speTri1.ensGene.ra
# required db variable
db speTri1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 speTri1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/speTri1/bed/ensGene.62
    featureBits speTri1 ensGene
    # 21595750 bases of 1913367893 (1.129%) in intersection

############################################################################
#  susScr1 - Pig - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/susScr1
    cat << '_EOF_' > susScr1.ensGene.ra
# required db variable
db susScr1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 susScr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/susScr1/bed/ensGene.62
    featureBits susScr1 ensGene
    # 28758401 bases of 2231332019 (1.289%) in intersection

############################################################################
#  taeGut1 - Zebra finch - Ensembl Genes version 62  (DONE - 2011-04-20 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/taeGut1
    cat << '_EOF_' > taeGut1.ensGene.ra
# required db variable
db taeGut1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9LXYZ][0-9ABG]*\)/chr\1/; s/^Un/chrUn/"
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
# geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
# skipInvalid yes
#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 taeGut1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/taeGut1/bed/ensGene.62
    featureBits taeGut1 ensGene
    # 25441417 bases of 1222864691 (2.080%) in intersection

############################################################################
#  tarSyr1 - Tarsier - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1
    cat << '_EOF_' > tarSyr1.ensGene.ra
# required db variable
db tarSyr1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 tarSyr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1/bed/ensGene.62
    featureBits tarSyr1 ensGene
    # 21327630 bases of 2768536343 (0.770%) in intersection

############################################################################
#  tetNig2 - Tetraodon - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/tetNig2
    cat << '_EOF_' > tetNig2.ensGene.ra
# required db variable
db tetNig2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 tetNig2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tetNig2/bed/ensGene.62
    featureBits tetNig2 ensGene
    # 31642974 bases of 302314788 (10.467%) in intersection

############################################################################
#  tupBel1 - Tree shrew - Ensembl Genes version 62  (DONE - 2011-04-20 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/tupBel1
    cat << '_EOF_' > tupBel1.ensGene.ra
# required db variable
db tupBel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 tupBel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tupBel1/bed/ensGene.62
    featureBits tupBel1 ensGene
    # 22849040 bases of 2137225476 (1.069%) in intersection

############################################################################
#  turTru1 - Dolphin - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/turTru1
    cat << '_EOF_' > turTru1.ensGene.ra
# required db variable
db turTru1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 turTru1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/turTru1/bed/ensGene.62
    featureBits turTru1 ensGene
    # 28614121 bases of 2298444090 (1.245%) in intersection

############################################################################
#  vicPac1 - Alpaca - Ensembl Genes version 62  (DONE - 2011-04-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/vicPac1
    cat << '_EOF_' > vicPac1.ensGene.ra
# required db variable
db vicPac1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that does not translate properly to UCSC coordinates
#       5017: ENSVPAT00000009076 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=62 vicPac1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/vicPac1/bed/ensGene.62
    featureBits vicPac1 ensGene
    # 17891814 bases of 1922910435 (0.930%) in intersection

############################################################################
############################################################################
############################################################################
#  anoCar2 - Lizard - Ensembl Genes version 61  (DONE - 2011-04-19 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/anoCar2
    cat << '_EOF_' > anoCar2.ensGene.ra
# required db variable
db anoCar2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation 's/^\([0-9L]\)/chr\1/; s/^GL\([0-9][0-9]*\).1/chrUn_GL\1/; s/^A/chrUn_A/'
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 anoCar2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/anoCar2/bed/ensGene.61
    featureBits anoCar2 ensGene
    # 27905138 bases of 1701353770 (1.640%) in intersection

    hgsql -e \
'update trackVersion set dateReference="current" where db="anoCar2";' hgFixed

############################################################################

############################################################################
# ensembl 61 update (DONE - 2011-02-04 - Hiram)

# to finish off the v61 update after all were complete:
     hgsql -e \
'update trackVersion set dateReference="nov2010" where version="60";' hgFixed
     hgsql -e \
'update trackVersion set dateReference="current" where version="61";' hgFixed

############################################################################
#  hg19 - Human - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/hg19
    cat << '_EOF_' > hg19.ensGene.ra
# required db variable
db hg19
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# changing names for the odd bits in Ensembl 57
liftUp /hive/data/genomes/hg19/jkStuff/ens.57.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 hg19.ensGene.ra
#    XXX same problems as before
    cd /hive/data/genomes/hg19/bed/ensGene.61/process
    gunzip hg19.allGenes.gp.gz
    egrep -v "ENST00000436611|ENST00000436232|ENST00000436870" \
        hg19.allGenes.gp | gzip -c > hg19.allGenes.gp.gz
    genePredCheck -db=hg19 hg19.allGenes.gp.gz
    checked: 167071 failed: 0
    mv hg19.allGenes.gp hg19.allGenes.gp.broken

    cd /hive/data/genomes/hg19
    #	and finish it off:
    doEnsGeneUpdate.pl -ensVersion=61 -continue=load \
	hg19.ensGene.ra > ens.61.load 2>&1
    featureBits hg19 ensGene
    # 109107310 bases of 2897316137 (3.766%) in intersection

############################################################################
# bosTau4 was broken - finished manually (DONE - 2011-02-04 - Hiram)
    ssh hgwdev
    cd /hive/data/genomes/bosTau4
    cat << '_EOF_' > bosTau4.ensGene.ra
# required db variable
db bosTau4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 bosTau4.ensGene.ra
    #	broken during processing, fix doProcess.csh to eliminate AAFC03011182
    ssh hgwdev
    cd /hive/data/genomes/bosTau4/bed/ensGene.61/process
    mv allGenes.gtf.gz allGenes.gtf.gz.0
    zcat ../download/Bos_taurus.Btau_4.0.61.gtf.gz \
        | sed -e "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/" \
        | grep -v AAFC03011182 | gzip > allGenes.gtf.gz
    gtfToGenePred -infoOut=infoOut.txt -genePredExt allGenes.gtf.gz stdout \
	| gzip > bosTau4.allGenes.gp.gz
    /cluster/bin/scripts/extractGtf.pl infoOut.txt > ensGtp.tab
    genePredCheck -db=bosTau4 bosTau4.allGenes.gp.gz
    #	checked: 31598 failed: 0
    cd /hive/data/genomes/bosTau4
    doEnsGeneUpdate.pl  -ensVersion=61 -continue=load bosTau4.ensGene.ra \
	> ens.61.load  2>&1
    featureBits bosTau4 ensGene
    #  42306082 bases of 2731830700 (1.549%) in intersection

#############################################################################
#  calJac3 - Marmoset - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
#	broken bits due to contigs we do ot have

    cd /hive/data/genomes/calJac3
    cat << '_EOF_' > calJac3.ensGene.ra
# required db variable
db calJac3
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# changing names randoms in Ensembl v59
liftUp /hive/data/genomes/calJac3/jkStuff/ens.59.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 calJac3.ensGene.ra
    cd /hive/data/genomes/calJac3/bed/ensGene.61/process
    mv allGenes.gtf.gz allGenes.gtf.badNames.gz
    # eliminate odd bits that we don't have here
    zcat allGenes.gtf.badNames.gz | grep -v ACFV | gzip > allGenes.gtf.gz
    #	now finishing the processing step
    cd /hive/data/genomes/calJac3/bed/ensGene.61/process
    gtfToGenePred -infoOut=infoOut.txt -genePredExt allGenes.gtf.gz stdout \
	| gzip > calJac3.allGenes.gp.gz
    /cluster/bin/scripts/extractGtf.pl infoOut.txt > ensGtp.tab
    mv calJac3.allGenes.gp.gz calJac3.allGenes.beforeLiftUp.gp.gz
    liftUp -extGenePred -type=.gp calJac3.allGenes.gp \
	/hive/data/genomes/calJac3/jkStuff/ens.59.lft carry \
	calJac3.allGenes.beforeLiftUp.gp.gz
    gzip calJac3.allGenes.gp
    genePredCheck -db=calJac3 calJac3.allGenes.gp.gz
    #	checked: 53989 failed: 0

    cd /hive/data/genomes/calJac3
    doEnsGeneUpdate.pl -ensVersion=61 -continue=load calJac3.ensGene.ra \
	> ens.61.load 2>&1
    ssh hgwdev
    cd /hive/data/genomes/calJac3/bed/ensGene.61
    featureBits calJac3 ensGene
    # 51397664 bases of 2752505800 (1.867%) in intersection

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 gasAcu1.ensGene.ra

    # requires extra attention after the all database for loop attempt
    cd /hive/data/genomes/gasAcu1/bed/ensGene.61/process
    mv gasAcu1.allGenes.gp.gz gasAcu1.allGenes.gp.beforeLift.gz
    zcat gasAcu1.allGenes.gp.beforeLift.gz \
	| liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
	    ../../../jkStuff/contigsToScaffolds.lft carry stdin
    liftUp -extGenePred gasAcu1.allGenes.gp \
	../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
	    gasAcu1.scaffolds.gp
    gzip gasAcu1.scaffolds.gp
    gzip gasAcu1.allGenes.gp
    #	verify OK
    genePredCheck -db=gasAcu1 gasAcu1.allGenes.gp.gz
    #	checked: 29245 failed: 0

    #	then continue with the load
    cd /hive/data/genomes/gasAcu1
    doEnsGeneUpdate.pl -continue=load -ensVersion=61 gasAcu1.ensGene.ra \
	> ens.61.load 2>&1

    featureBits gasAcu1 ensGene
    # 36792090 bases of 446627861 (8.238%) in intersection

############################################################################
#  ailMel1 - Panda - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ailMel1
    cat << '_EOF_' > ailMel1.ensGene.ra
# required db variable
db ailMel1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
# knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# changing names for the odd bits in Ensembl 57
# liftUp /hive/data/genomes/hg19/jkStuff/ens.57.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 ailMel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ailMel1/bed/ensGene.61
    featureBits ailMel1 ensGene
    # 31990632 bases of 2245312831 (1.425%) in intersection

############################################################################
#  canFam2 - Dog - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/canFam2
    cat << '_EOF_' > canFam2.ensGene.ra
# required db variable
db canFam2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 canFam2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/canFam2/bed/ensGene.61
    featureBits canFam2 ensGene
    # 34693517 bases of 2384996543 (1.455%) in intersection

############################################################################
#  cavPor3 - Guinea pig - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cavPor3
    cat << '_EOF_' > cavPor3.ensGene.ra
# required db variable
db cavPor3
# do we need to translate geneScaffold coordinates
# geneScaffolds yes
nameTranslation "s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 cavPor3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cavPor3/bed/ensGene.61
    featureBits cavPor3 ensGene
    # 30971317 bases of 2663369733 (1.163%) in intersection

############################################################################
#  choHof1 - Sloth - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/choHof1
    cat << '_EOF_' > choHof1.ensGene.ra
# required db variable
db choHof1
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 choHof1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/choHof1/bed/ensGene.61
    featureBits choHof1 ensGene
    # 18278941 bases of 2060419685 (0.887%) in intersection

############################################################################
#  ci2 - C. intestinalis - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 ci2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ci2/bed/ensGene.61
    featureBits ci2 ensGene
    # 20114967 bases of 141233565 (14.242%) in intersection

############################################################################
#  cioSav2 - C. savignyi - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cioSav2
    cat << '_EOF_' > cioSav2.ensGene.ra
# required db variable
db cioSav2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 cioSav2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cioSav2/bed/ensGene.61
    featureBits cioSav2 ensGene
    # 16572478 bases of 173749524 (9.538%) in intersection

############################################################################
#  danRer7 - Zebrafish - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/danRer7
    cat << '_EOF_' > danRer7.ensGene.ra
# required db variable
db danRer7
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 danRer7.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/danRer7/bed/ensGene.61
    featureBits danRer7 ensGene
    # 62432695 bases of 1409770109 (4.429%) in intersection

############################################################################
#  dasNov2 - Armadillo - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/dasNov2
    cat << '_EOF_' > dasNov2.ensGene.ra
# required db variable
db dasNov2
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
skipInvalid yes
#       31903: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 dasNov2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dasNov2/bed/ensGene.61
    featureBits dasNov2 ensGene
    # 21968539 bases of 2371493872 (0.926%) in intersection

############################################################################
#  dipOrd1 - Kangaroo rat - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1
    cat << '_EOF_' > dipOrd1.ensGene.ra
# required db variable
db dipOrd1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 dipOrd1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1/bed/ensGene.61
    featureBits dipOrd1 ensGene
    # 25324919 bases of 1844961421 (1.373%) in intersection

############################################################################
#  dm3 - D. melanogaster - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/dm3
    cat << '_EOF_' > dm3.ensGene.ra
# required db variable
db dm3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XYU][0-9]*\)/chr\1/; s/^dmel_mitochondrion_genome/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
# knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 dm3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dm3/bed/ensGene.61
    featureBits dm3 ensGene
    # 30276468 bases of 162367812 (18.647%) in intersection

############################################################################
#  echTel1 - Tenrec - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/echTel1
    cat << '_EOF_' > echTel1.ensGene.ra
# required db variable
db echTel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has invalid structures from Ensembl:
# 47425: ENSETET00000018714 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 echTel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/echTel1/bed/ensGene.61
    featureBits echTel1 ensGene
    # 25771306 bases of 2111581369 (1.220%) in intersection

############################################################################
#  equCab2 - Horse - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/equCab2
    cat << '_EOF_' > equCab2.ensGene.ra
# required db variable
db equCab2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
#       translate Ensembl chrUnNNNN names to chrUn coordinates
liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 equCab2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/equCab2/bed/ensGene.61
    featureBits equCab2 ensGene
    # 39563318 bases of 2428790173 (1.629%) in intersection

############################################################################
#  eriEur1 - Hedgehog - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/eriEur1
    cat << '_EOF_' > eriEur1.ensGene.ra
# required db variable
db eriEur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 eriEur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/eriEur1/bed/ensGene.61
    featureBits eriEur1 ensGene
    # 22556849 bases of 2133134836 (1.057%) in intersection

############################################################################
#  felCat3 - Cat - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/felCat3
    cat << '_EOF_' > felCat3.ensGene.ra
# required db variable
db felCat3
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 5705: ENSFCAT00000006929 no exonFrame on CDS exon 16
# 28228: ENSFCAT00000009384 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 felCat3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/felCat3/bed/ensGene.61
    featureBits felCat3 ensGene
    # 22300874 bases of 1642698377 (1.358%) in intersection

############################################################################
#  fr2 - Fugu - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
nameTranslation "s/^MT/chrM/;"
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 fr2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/fr2/bed/ensGene.61
    featureBits fr2 ensGene
    # 34568537 bases of 393312790 (8.789%) in intersection

############################################################################
#  galGal3 - Chicken - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 galGal3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/galGal3/bed/ensGene.61
    featureBits galGal3 ensGene
    # 30741650 bases of 1042591351 (2.949%) in intersection

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 gasAcu1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1/bed/ensGene.61
    featureBits gasAcu1 ensGene
    # 36792090 bases of 446627861 (8.238%) in intersection

############################################################################
#  loxAfr3 - Elephant - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3
    cat << '_EOF_' > loxAfr3.ensGene.ra
# required db variable
db loxAfr3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 loxAfr3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3/bed/ensGene.61
    featureBits loxAfr3 ensGene
    # 32151456 bases of 3118565340 (1.031%) in intersection

############################################################################
#  macEug1 - Wallaby - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/macEug1
    cat << '_EOF_' > macEug1.ensGene.ra
# required db variable
db macEug1
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 macEug1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/macEug1/bed/ensGene.61
    featureBits macEug1 ensGene
    # 23393650 bases of 2541767339 (0.920%) in intersection

############################################################################
#  micMur1 - Mouse lemur - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/micMur1
    cat << '_EOF_' > micMur1.ensGene.ra
# required db variable
db micMur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 micMur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/micMur1/bed/ensGene.61
    featureBits micMur1 ensGene
    # 25688755 bases of 1852394361 (1.387%) in intersection

############################################################################
#  mm9 - Mouse - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 mm9.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/mm9/bed/ensGene.61
    featureBits mm9 ensGene
    # 86577373 bases of 2620346127 (3.304%) in intersection

############################################################################
#  monDom5 - Opossum - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/monDom5
    cat << '_EOF_' > monDom5.ensGene.ra
# required db variable
db monDom5
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 monDom5.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/monDom5/bed/ensGene.61
    featureBits monDom5 ensGene
    # 32982595 bases of 3501660299 (0.942%) in intersection

############################################################################
#  myoLuc1 - Microbat - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/myoLuc1
    cat << '_EOF_' > myoLuc1.ensGene.ra
# required db variable
db myoLuc1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 myoLuc1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/myoLuc1/bed/ensGene.61
    featureBits myoLuc1 ensGene
    # 24710174 bases of 1673855868 (1.476%) in intersection

############################################################################
#  ochPri2 - Pika - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ochPri2
    cat << '_EOF_' > ochPri2.ensGene.ra
# required db variable
db ochPri2
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 13270: ENSOPRT00000002716 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 ochPri2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ochPri2/bed/ensGene.61
    featureBits ochPri2 ensGene
    # 25447459 bases of 1923624051 (1.323%) in intersection

############################################################################
#  ornAna1 - Platypus - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ornAna1
    cat << '_EOF_' > ornAna1.ensGene.ra
# required db variable
db ornAna1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly, 823 items
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 ornAna1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ornAna1/bed/ensGene.61
    featureBits ornAna1 ensGene
    # 24466297 bases of 1842236818 (1.328%) in intersection

############################################################################
#  oryCun2 - Rabbit - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryCun2
    cat << '_EOF_' > oryCun2.ensGene.ra
# required db variable
db oryCun2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^/chr/;"
# ensembl appears to still be in scaffolds ? - older versions
# liftUp /hive/data/genomes/oryCun2/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 oryCun2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryCun2/bed/ensGene.61
    featureBits oryCun2 ensGene
    # 31785271 bases of 2604023284 (1.221%) in intersection

############################################################################
#  oryLat2 - Medaka - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryLat2
    cat << '_EOF_' > oryLat2.ensGene.ra
# required db variable
db oryLat2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 2,687 genes that haven't lifted properly yet
# skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 oryLat2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryLat2/bed/ensGene.61
    featureBits oryLat2 ensGene
    # 32313511 bases of 700386597 (4.614%) in intersection

############################################################################
#  otoGar1 - Bushbaby - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/otoGar1
    cat << '_EOF_' > otoGar1.ensGene.ra
# required db variable
db otoGar1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 otoGar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/otoGar1/bed/ensGene.61
    featureBits otoGar1 ensGene
    # 23692789 bases of 1969052059 (1.203%) in intersection

############################################################################
#  panTro2 - Chimp - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 panTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/panTro2/bed/ensGene.61
    featureBits panTro2 ensGene
    # 50004270 bases of 2909485072 (1.719%) in intersection

############################################################################
#  ponAbe2 - Orangutan - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2
    cat << '_EOF_' > ponAbe2.ensGene.ra
# required db variable
db ponAbe2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 ponAbe2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2/bed/ensGene.61
    featureBits ponAbe2 ensGene
    # 38120849 bases of 3093572278 (1.232%) in intersection

############################################################################
#  proCap1 - Rock hyrax - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/proCap1
    cat << '_EOF_' > proCap1.ensGene.ra
# required db variable
db proCap1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 proCap1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/proCap1/bed/ensGene.61
    featureBits proCap1 ensGene
    # 25344792 bases of 2407847681 (1.053%) in intersection

############################################################################
#  pteVam1 - Megabat - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/pteVam1
    cat << '_EOF_' > pteVam1.ensGene.ra
# required db variable
db pteVam1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has invalid structures from Ensembl:
#       13027: ENSPVAT00000010661 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 pteVam1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/pteVam1/bed/ensGene.61
    featureBits pteVam1 ensGene
    # 28967283 bases of 1839436660 (1.575%) in intersection

############################################################################
#  rheMac2 - Rhesus - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 rheMac2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rheMac2/bed/ensGene.61
    featureBits rheMac2 ensGene
    # 44562701 bases of 2646704109 (1.684%) in intersection

############################################################################
#  rn4 - Rat - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 rn4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rn4/bed/ensGene.61
    featureBits rn4 ensGene
    # 46705616 bases of 2571531505 (1.816%) in intersection

############################################################################
#  sacCer2 - S. cerevisiae - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/sacCer2
    cat << '_EOF_' > sacCer2.ensGene.ra
# required db variable
db sacCer2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^VIII/chrVIII/; s/^VII/chrVII/; s/^VI/chrVI/; s/^V/chrV/; s/^XIII/chrXIII/; s/^XII/chrXII/; s/^XIV/chrXIV/; s/^XI/chrXI/; s/^XVI/chrXVI/; s/^XV/chrXV/; s/^X/chrX/; s/^III/chrIII/; s/^IV/chrIV/; s/^II/chrII/; s/^IX/chrIX/; s/^I/chrI/; s/^Mito/chrM/; s/2-micron/2micron/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 sacCer2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sacCer2/bed/ensGene.61
    featureBits sacCer2 ensGene
    # 8917060 bases of 12162995 (73.313%) in intersection

############################################################################
#  sorAra1 - Shrew - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/sorAra1
    cat << '_EOF_' > sorAra1.ensGene.ra
# required db variable
db sorAra1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 sorAra1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sorAra1/bed/ensGene.61
    featureBits sorAra1 ensGene
    # 19690470 bases of 1832864697 (1.074%) in intersection

############################################################################
#  speTri1 - Squirrel - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/speTri1
    cat << '_EOF_' > speTri1.ensGene.ra
# required db variable
db speTri1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 speTri1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/speTri1/bed/ensGene.61
    featureBits speTri1 ensGene
    # 21595750 bases of 1913367893 (1.129%) in intersection

############################################################################
#  susScr2 - Pig - lifted susScr1 v61 genes to susScr2 (DONE - 2011-02-04 - Hiram)
    mkdir /hive/data/genomes/susScr2/bed/ensGene.61
    cd /hive/data/genomes/susScr2/bed/ensGene.61
    ln -s ../../../susScr1/bed/ensGene.61/process/susScr1.allGenes.gp.gz .
    zcat susScr1.allGenes.gp.gz > susScr1.allGenes.genePred
    ln -s ../../../susScr1/bed/liftOver/susScr1ToSusScr2.over.chain.gz
    zcat susScr1ToSusScr2.over.chain.gz > susScr1ToSusScr2.over.chain
    liftOver -genePred susScr1.allGenes.genePred \
        susScr1ToSusScr2.over.chain \
        susScr2.allGenes.gp susScr1.liftOver.unMapped.txt
    gzip susScr2.allGenes.gp
    genePredCheck -db=susScr2 susScr2.allGenes.gp.gz
    #	checked: 22017 failed: 0

    hgLoadGenePred  -genePredExt susScr2 \
	ensGene susScr2.allGenes.gp.gz > loadGenePred.errors.txt 2>&1

    zcat \
../../../susScr1/bed/ensGene.61/download/Sus_scrofa.Sscrofa9.61.pep.all.fa.gz \
        | sed -e 's/^>.* transcript:/>/; s/ CCDS.*$//;' \
	| gzip > ensPep.txt.gz
    zcat ensPep.txt.gz \
	| ~/kent/src/utils/faToTab/faToTab.pl /dev/null /dev/stdin \
	     | sed -e '/^$/d; s/*$//' | sort > ensPep.susScr2.fa.tab
    hgPepPred susScr2 tab ensPep ensPep.susScr2.fa.tab
    ln -s ../../../susScr1/bed/ensGene.61/process/ensGtp.tab .
    hgLoadSqlTab susScr2 ensGtp ~/kent/src/hg/lib/ensGtp.sql ensGtp.tab
    hgsql -e 'INSERT INTO trackVersion \
	(db, name, who, version, updateTime, comment, source, dateReference) \
	VALUES("susScr2", "ensGene", "hiram", "61", now(), \
        "dentical to previous version 60", \
        "lifted susScr1 to susScr2 ftp://ftp.ensembl.org/pub/release-61/gtf/sus_scrofa/Sus_scrofa.Sscrofa9.61.gtf.gz", \
        "current" );' hgFixed

    featureBits susScr2 ensGene
    #	28702434 bases of 2231298548 (1.286%) in intersection

############################################################################
#  susScr1 - Pig - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/susScr1
    cat << '_EOF_' > susScr1.ensGene.ra
# required db variable
db susScr1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 susScr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/susScr1/bed/ensGene.61
    featureBits susScr1 ensGene
    # 28758401 bases of 2231332019 (1.289%) in intersection

############################################################################
#  taeGut1 - Zebra finch - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/taeGut1
    cat << '_EOF_' > taeGut1.ensGene.ra
# required db variable
db taeGut1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9LXYZ][0-9ABG]*\)/chr\1/; s/^Un/chrUn/"
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
# geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
# skipInvalid yes
#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 taeGut1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/taeGut1/bed/ensGene.61
    featureBits taeGut1 ensGene
    # 25441417 bases of 1222864691 (2.080%) in intersection

############################################################################
#  tarSyr1 - Tarsier - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1
    cat << '_EOF_' > tarSyr1.ensGene.ra
# required db variable
db tarSyr1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 tarSyr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1/bed/ensGene.61
    featureBits tarSyr1 ensGene
    # 21327630 bases of 2768536343 (0.770%) in intersection

############################################################################
#  tetNig2 - Tetraodon - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/tetNig2
    cat << '_EOF_' > tetNig2.ensGene.ra
# required db variable
db tetNig2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 tetNig2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tetNig2/bed/ensGene.61
    featureBits tetNig2 ensGene
    # 31642974 bases of 302314788 (10.467%) in intersection

############################################################################
#  tupBel1 - Tree shrew - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/tupBel1
    cat << '_EOF_' > tupBel1.ensGene.ra
# required db variable
db tupBel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 tupBel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tupBel1/bed/ensGene.61
    featureBits tupBel1 ensGene
    # 22849040 bases of 2137225476 (1.069%) in intersection

############################################################################
#  turTru1 - Dolphin - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/turTru1
    cat << '_EOF_' > turTru1.ensGene.ra
# required db variable
db turTru1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 turTru1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/turTru1/bed/ensGene.61
    featureBits turTru1 ensGene
    # 28614121 bases of 2298444090 (1.245%) in intersection

############################################################################
#  vicPac1 - Alpaca - Ensembl Genes version 61  (DONE - 2011-02-04 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/vicPac1
    cat << '_EOF_' > vicPac1.ensGene.ra
# required db variable
db vicPac1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that does not translate properly to UCSC coordinates
#       5017: ENSVPAT00000009076 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 vicPac1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/vicPac1/bed/ensGene.61
    featureBits vicPac1 ensGene
    # 17891814 bases of 1922910435 (0.930%) in intersection

############################################################################
#  xenTro2 - X. tropicalis - Ensembl Genes version 61  (DONE - 2011-02-04 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/xenTro2
    cat << '_EOF_' > xenTro2.ensGene.ra
# required db variable
db xenTro2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=61 xenTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/xenTro2/bed/ensGene.61
    featureBits xenTro2 ensGene
    # 29181688 bases of 1359412157 (2.147%) in intersection

############################################################################
############################################################################
############################################################################
# ensembl 60 update (DONE - 2010-11-16 - Hiram)

# to finish off the v60 update after all were complete:
     hgsql -e \
'update trackVersion set dateReference="aug2010" where version="59";' hgFixed
     hgsql -e \
'update trackVersion set dateReference="current" where version="60";' hgFixed

############################################################################
#  danRer7 - Zebrafish - Ensembl Genes version 60  (DONE - 2010-12-02 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/danRer7
    cat << '_EOF_' > danRer7.ensGene.ra
# required db variable
db danRer7
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=60 danRer7.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/danRer7/bed/ensGene.60
    featureBits danRer7 ensGene
    # 55845257 bases of 1409772743 (3.961%) in intersection
############################################################################

# panda is a new one in version 60:
############################################################################
#  ailMel1 - Panda - Ensembl Genes version 60  (DONE - 2010-11-16 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ailMel1
    cat << '_EOF_' > ailMel1.ensGene.ra
# required db variable
db ailMel1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
# knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# changing names for the odd bits in Ensembl 57
# liftUp /hive/data/genomes/hg19/jkStuff/ens.57.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=60 ailMel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ailMel1/bed/ensGene.60
    featureBits ailMel1 ensGene
    # 31990632 bases of 2245312831 (1.425%) in intersection
############################################################################
# bosTau4 was broken - finished manually (DONE - 2010-11-16 - Hiram)
    ssh hgwdev
    cd /hive/data/genomes/bosTau4
    cat << '_EOF_' > bosTau4.ensGene.ra
# required db variable
db bosTau4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=60 bosTau4.ensGene.ra
    #	broken during processing, fix doProcess.csh to eliminate AAFC03011182
    ssh hgwdev
    cd /hive/data/genomes/bosTau4/bed/ensGene.60/process
    mv allGenes.gtf.gz allGenes.gtf.gz.0
    zcat ../download/Bos_taurus.Btau_4.0.60.gtf.gz \
        | sed -e "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/" \
        | grep -v AAFC03011182 | gzip > allGenes.gtf.gz
    gtfToGenePred -infoOut=infoOut.txt -genePredExt allGenes.gtf.gz stdout \
	| gzip > bosTau4.allGenes.gp.gz
    /cluster/bin/scripts/extractGtf.pl infoOut.txt > ensGtp.tab
    genePredCheck -db=bosTau4 bosTau4.allGenes.gp.gz
    #	checked: 31598 failed: 0
    cd /hive/data/genomes/bosTau4
    doEnsGeneUpdate.pl  -ensVersion=60 -continue=load bosTau4.ensGene.ra \
	> ens.60.load  2>&1
    featureBits bosTau4 ensGene
    #  42306082 bases of 2731830700 (1.549%) in intersection

#############################################################################
#  calJac3 - Marmoset - Ensembl Genes version 60  (DONE - 2010-11-16 - hiram)
#	broken bits due to contigs we do ot have

    cd /hive/data/genomes/calJac3
    cat << '_EOF_' > calJac3.ensGene.ra
# required db variable
db calJac3
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# changing names randoms in Ensembl v59
liftUp /hive/data/genomes/calJac3/jkStuff/ens.59.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=60 calJac3.ensGene.ra
    cd /hive/data/genomes/calJac3/bed/ensGene.60/process
    mv allGenes.gtf.gz allGenes.gtf.badNames.gz
    # eliminate odd bits that we don't have here
    zcat allGenes.gtf.badNames.gz | grep -v ACFV | gzip > allGenes.gtf.gz
    #	now finishing the processing step
    cd /hive/data/genomes/calJac3/bed/ensGene.60/process
    gtfToGenePred -infoOut=infoOut.txt -genePredExt allGenes.gtf.gz stdout \
	| gzip > calJac3.allGenes.gp.gz
    /cluster/bin/scripts/extractGtf.pl infoOut.txt > ensGtp.tab
    mv calJac3.allGenes.gp.gz calJac3.allGenes.beforeLiftUp.gp.gz
    liftUp -extGenePred -type=.gp calJac3.allGenes.gp \
	/hive/data/genomes/calJac3/jkStuff/ens.59.lft carry \
	calJac3.allGenes.beforeLiftUp.gp.gz
    gzip calJac3.allGenes.gp
    genePredCheck -db=calJac3 calJac3.allGenes.gp.gz
    #	checked: 53989 failed: 0

    cd /hive/data/genomes/calJac3
    doEnsGeneUpdate.pl -ensVersion=60 -continue=load calJac3.ensGene.ra \
	> ens.60.load 2>&1
    ssh hgwdev
    cd /hive/data/genomes/calJac3/bed/ensGene.60
    featureBits calJac3 ensGene
    # 51397664 bases of 2752505800 (1.867%) in intersection

############################################################################
#  hg19 - Human - Ensembl Genes version 60  (DONE - 2010-11-16 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/hg19
    cat << '_EOF_' > hg19.ensGene.ra
# required db variable
db hg19
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# changing names for the odd bits in Ensembl 57
liftUp /hive/data/genomes/hg19/jkStuff/ens.57.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=60 hg19.ensGene.ra
#    XXX same problems as before
    cd /hive/data/genomes/hg19/bed/ensGene.60/process
    gunzip hg19.allGenes.gp.gz
    egrep -v "ENST00000436611|ENST00000436232|ENST00000436870" \
        hg19.allGenes.gp | gzip -c > hg19.allGenes.gp.gz
    genePredCheck -db=hg19 hg19.allGenes.gp.gz
    checked: 157477 failed: 0
    mv hg19.allGenes.gp hg19.allGenes.gp.broken

    cd /hive/data/genomes/hg19
    #	and finish it off:
    doEnsGeneUpdate.pl -ensVersion=60 -continue=load \
	hg19.ensGene.ra > ens.60.load 2>&1
    featureBits hg19 ensGene
    # 106318989 bases of 2897316137 (3.670%) in intersection

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 60  (DONE - 2010-11-16 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=60 gasAcu1.ensGene.ra

    # requires extra attention after the all database for loop attempt
    cd /hive/data/genomes/gasAcu1/bed/ensGene.60/process
    mv gasAcu1.allGenes.gp.gz gasAcu1.allGenes.gp.beforeLift.gz
    zcat gasAcu1.allGenes.gp.beforeLift.gz \
	| liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
	    ../../../jkStuff/contigsToScaffolds.lft carry stdin
    liftUp -extGenePred gasAcu1.allGenes.gp \
	../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
	    gasAcu1.scaffolds.gp
    gzip gasAcu1.scaffolds.gp
    gzip gasAcu1.allGenes.gp
    #	verify OK
    genePredCheck -db=gasAcu1 gasAcu1.allGenes.gp.gz
    #	checked: 29245 failed: 0

    #	then continue with the load
    cd /hive/data/genomes/gasAcu1
    doEnsGeneUpdate.pl -continue=load -ensVersion=60 gasAcu1.ensGene.ra \
	> ens.60.load 2>&1

    featureBits gasAcu1 ensGene
    # 36792090 bases of 446627861 (8.238%) in intersection

############################################################################
#  dm3 - D. melanogaster - Ensembl Genes version 60  (DONE - 2010-11-16 - hiram)
    #	 problem with length of names in third column of ensGtp.tab
    #	go.
    ssh hgwdev
    cd /hive/data/genomes/dm3
    cat << '_EOF_' > dm3.ensGene.ra
# required db variable
db dm3
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XYU][0-9]*\)/chr\1/; s/^dmel_mitochondrion_genome/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
# knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=60 dm3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dm3/bed/ensGene.60/process
    cat << '_EOF_' > ensGtp.sql
# This creates the table holding the relationship between
# ensemble genes, transcripts, and peptides.
CREATE TABLE ensGtp (
  gene char(12) NOT NULL,
  transcript char(12) NOT NULL,
  protein char(26) NOT NULL,
# INDICES
  INDEX(gene(11)),
  UNIQUE(transcript(11)),
  INDEX(protein(26))
) 
'_EOF_'
    # << happy emacs

    # finish off the loading
    cd /hive/data/genomes/dm3/bed/ensGene.60
    # fixup statement in doLoad.csh
    #	hgLoadSqlTab dm3 ensGtp process/ensGtp.sql process/ensGtp.tab
    # then
    ./doLoad.csh
    cd /hive/data/genomes/dm3
    doEnsGeneUpdate.pl -continue=cleanup -ensVersion=60 dm3.ensGene.ra \
	> ens.60.cleanup.txt 2>&1

    cd /hive/data/genomes/dm3/bed/ensGene.60
    featureBits dm3 ensGene
    # 30276468 bases of 162367812 (18.647%) in intersection

############################################################################
#  susScr2 - Pig - lifted susScr1 v60 genes to susScr2 (DONE - 2010-11-16 - Hiram)
    mkdir /hive/data/genomes/susScr2/bed/ensGene.60
    cd /hive/data/genomes/susScr2/bed/ensGene.60
    ln -s ../../../susScr1/bed/ensGene.60/process/susScr1.allGenes.gp.gz .
    zcat susScr1.allGenes.gp.gz > susScr1.allGenes.genePred
    ln -s ../../../susScr1/bed/liftOver/susScr1ToSusScr2.over.chain.gz
    zcat susScr1ToSusScr2.over.chain.gz > susScr1ToSusScr2.over.chain
    liftOver -genePred susScr1.allGenes.genePred \
        susScr1ToSusScr2.over.chain \
        susScr2.allGenes.gp susScr1.liftOver.unMapped.txt
    gzip susScr2.allGenes.gp
    genePredCheck -db=susScr2 susScr2.allGenes.gp.gz
    #	checked: 22017 failed: 0

    hgLoadGenePred  -genePredExt susScr2 \
	ensGene susScr2.allGenes.gp.gz > loadGenePred.errors.txt 2>&1

    zcat \
../../../susScr1/bed/ensGene.60/download/Sus_scrofa.Sscrofa9.60.pep.all.fa.gz \
        | sed -e 's/^>.* transcript:/>/; s/ CCDS.*$//;' \
	| gzip > ensPep.txt.gz
    zcat ensPep.txt.gz \
	| ~/kent/src/utils/faToTab/faToTab.pl /dev/null /dev/stdin \
	     | sed -e '/^$/d; s/*$//' | sort > ensPep.susScr2.fa.tab
    hgPepPred susScr2 tab ensPep ensPep.susScr2.fa.tab
    ln -s ../../../susScr1/bed/ensGene.60/process/ensGtp.tab .
    hgLoadSqlTab susScr2 ensGtp ~/kent/src/hg/lib/ensGtp.sql ensGtp.tab
    hgsql -e 'INSERT INTO trackVersion \
	(db, name, who, version, updateTime, comment, source, dateReference) \
	VALUES("susScr2", "ensGene", "hiram", "60", now(), \
        "with peptides Sus_scrofa.Sscrofa9.60.pep.all.fa.gz", \
        "lifted susScr1 to susScr2 ftp://ftp.ensembl.org/pub/release-60/gtf/sus_scrofa/Sus_scrofa.Sscrofa9.60.gtf.gz", \
        "current" );' hgFixed

    featureBits susScr2 ensGene
    #	28702434 bases of 2231298548 (1.286%) in intersection

############################################################################
#  oryCun2 - Rabbit - Ensembl Genes version 60  (DONE - 2010-11-16 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryCun2
    cat << '_EOF_' > oryCun2.ensGene.ra
# required db variable
db oryCun2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^/chr/;"
# ensembl appears to still be in scaffolds ? - older versions
# liftUp /hive/data/genomes/oryCun2/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=60 oryCun2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryCun2/bed/ensGene.60
    featureBits oryCun2 ensGene
    # 31785271 bases of 2604023284 (1.221%) in intersection

############################################################################
############################################################################
# ensembl 59 update (DONE - 2010-08-09 - Hiram)
############################################################################
############################################################################

# to finish off the v59 update after all were complete:
     hgsql -e \
'update trackVersion set dateReference="may2010" where version="58";' hgFixed
     hgsql -e \
'update trackVersion set dateReference="current" where version="59";' hgFixed

############################################################################
#  hg19 - Human - Ensembl Genes version 59  (DONE - 2010-08-10 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/hg19
    cat << '_EOF_' > hg19.ensGene.ra
# required db variable
db hg19
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# changing names for the odd bits in Ensembl 57
liftUp /hive/data/genomes/hg19/jkStuff/ens.57.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 hg19.ensGene.ra
#    XXX same problems as before
    cd /hive/data/genomes/hg19/bed/ensGene.59/process
    gunzip hg19.allGenes.gp.gz
    egrep -v "LRG_|ENST00000436611|ENST00000436232|ENST00000436870" \
        hg19.allGenes.gp | gzip -c > hg19.allGenes.gp.gz
    genePredCheck -db=hg19 hg19.allGenes.gp.gz
    checked: 151222 failed: 0
    mv hg19.allGenes.gp hg19.allGenes.gp.broken

    cd /hive/data/genomes/hg19
    #	and finish it off:
    doEnsGeneUpdate.pl -ensVersion=59 -continue=load \
	hg19.ensGene.ra > ens.59.load 2>&1
    featureBits hg19 ensGene
    # 104402177 bases of 2899183193 (3.601%) in intersection

############################################################################
#  mm9 - Mouse - Ensembl Genes version 59  (DONE - 2010-08-10 - hiram)
    cd /hive/data/genomes/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=59 mm9.ensGene.ra

    cd /hive/data/genomes/mm9/bed/ensGene.59
    #	ran into trouble with the ensGtp table load, the names of
    #	the proteins have gotten longer and the standard sql definition
    #	was no longer adequate.  So, after the load failed, increase
    #	protein char and index size to 23 in the source tree and rerun:

    #	Then, running the rest of the load script, with this line fixed up:
    #	hgLoadSqlTab mm9 ensGtp ensGtp.sql process/ensGtp.tab
    cd /hive/data/genomes/mm9/bed/ensGene.59
    ./doLoad.csh

    cd /hive/data/genomes/mm9
    doEnsGeneUpdate.pl -ensVersion=59 -verbose=2 -continue=cleanup \
	mm9.ensGene.ra > ens.59.cleanup 2>&1

    featureBits mm9 ensGene
    #	81438727 bases of 2620346127 (3.108%) in intersection

############################################################################
# bosTau4 was broken - finished manually (DONE - 2010-08-10 - Hiram)
    ssh hgwdev
    cd /hive/data/genomes/bosTau4
    cat << '_EOF_' > bosTau4.ensGene.ra
# required db variable
db bosTau4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 bosTau4.ensGene.ra
    #	broken during processing, fix doProcess.csh to eliminate AAFC03011182
    ssh hgwdev
    cd /hive/data/genomes/bosTau4/bed/ensGene.59/process
    mv allGenes.gtf.gz allGenes.gtf.gz.0
    zcat ../download/Bos_taurus.Btau_4.0.59.gtf.gz \
        | sed -e "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/" \
        | grep -v AAFC03011182 | gzip > allGenes.gtf.gz
    gtfToGenePred -infoOut=infoOut.txt -genePredExt allGenes.gtf.gz stdout \
	| gzip > bosTau4.allGenes.gp.gz
    /cluster/bin/scripts/extractGtf.pl infoOut.txt > ensGtp.tab
    genePredCheck -db=bosTau4 bosTau4.allGenes.gp.gz
    cd /hive/data/genomes/bosTau4
    doEnsGeneUpdate.pl  -ensVersion=59 -continue=load bosTau4.ensGene.ra \
	> ens.59.load  2>&1
    featureBits bosTau4 ensGene
    # 42306082 bases of 2731830700 (1.549%) in intersection

#############################################################################
#  calJac3 - Marmoset - Ensembl Genes version 59  (DONE - 2010-08-10 - hiram)
    # version 59 Ensembl changed their chrom names.  Made up a new lift file
    #	after it failed the first time:
    cd /hive/data/genomes/calJac3/bed/ensGene.59/download
    zcat Callithrix_jacchus.C_jacchus3.2.1.59.gtf.gz | awk '{print $1}' \
	| sort -u > ens59chrom.names
for N in `grep GL ens59chrom.names`
do
    B=${N/.1/}
    T=`grep "${B}" ucsc.chrom.names`
    if [ "x${T}y" = "xy" ]; then
        echo FAIL: grep "${B}" ucsc.chrom.names
    else
        S=`grep "${T}" ../../../chrom.sizes | awk '{print $2}'`
        echo $N $T $S | awk '{printf "0\t%s\t%d\t%s\t%s\n", $1, $3, $2, $3}'
    fi
done > ../../../jkStuff/ens.59.lft

    cd /hive/data/genomes/calJac3
    cat << '_EOF_' > calJac3.ensGene.ra
# required db variable
db calJac3
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# changing names randoms in Ensembl v59
liftUp /hive/data/genomes/calJac3/jkStuff/ens.59.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 calJac3.ensGene.ra
    cd /hive/data/genomes/calJac3/bed/ensGene.59/process
    mv allGenes.gtf.gz allGenes.gtf.badNames.gz
    # eliminate odd bits that we don't have here
    zcat allGenes.gtf.badNames.gz | grep -v ACFV | gzip > allGenes.gtf.gz
    #	now finishing the processing step
    cd /hive/data/genomes/calJac3/bed/ensGene.59/process
    gtfToGenePred -infoOut=infoOut.txt -genePredExt allGenes.gtf.gz stdout \
	| gzip > calJac3.allGenes.gp.gz
    /cluster/bin/scripts/extractGtf.pl infoOut.txt > ensGtp.tab
    mv calJac3.allGenes.gp.gz calJac3.allGenes.beforeLiftUp.gp.gz
    liftUp -extGenePred -type=.gp calJac3.allGenes.gp \
	/hive/data/genomes/calJac3/jkStuff/ens.59.lft carry \
	calJac3.allGenes.beforeLiftUp.gp.gz
    gzip calJac3.allGenes.gp
    genePredCheck -db=calJac3 calJac3.allGenes.gp.gz
    #	checked: 53989 failed: 0

    doEnsGeneUpdate.pl -ensVersion=59 -continue=load calJac3.ensGene.ra \
	> ens.59.load 2>&1
    ssh hgwdev
    cd /hive/data/genomes/calJac3/bed/ensGene.59
    featureBits calJac3 ensGene
    # 51397664 bases of 2752505800 (1.867%) in intersection

############################################################################
#  sacCer2 - S. cerevisiae - Ensembl Genes version 59  (DONE - 2010-08-10 - hiram)
    #	version 59 changed the name of MT to Mito
    #	needed to change the sacCer2.ensGene.ra file for the nameTranslation
    ssh hgwdev
    cd /hive/data/genomes/sacCer2
    cat << '_EOF_' > sacCer2.ensGene.ra
# required db variable
db sacCer2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^VIII/chrVIII/; s/^VII/chrVII/; s/^VI/chrVI/; s/^V/chrV/; s/^XIII/chrXIII/; s/^XII/chrXII/; s/^XIV/chrXIV/; s/^XI/chrXI/; s/^XVI/chrXVI/; s/^XV/chrXV/; s/^X/chrX/; s/^III/chrIII/; s/^IV/chrIV/; s/^II/chrII/; s/^IX/chrIX/; s/^I/chrI/; s/^Mito/chrM/; s/2-micron/2micron/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 sacCer2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sacCer2/bed/ensGene.59
    featureBits sacCer2 ensGene
    # 8917060 bases of 12162995 (73.313%) in intersection
    #####################################################################
    # the sacCer2 sequence at Ensembl is a different version.  A liftOver
    # chain file was constructed, the genes lifted and reloaded.
    # Genes on chrX and chrXIV moved a single base after the single difference
    # on each chromosome.  Note the scripts used in:
# /hive/data/genomes/sacCer2/ensembl.versions.sequence/blat.2010-08-11/*.sh

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 59  (DONE - 2010-08-10 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 gasAcu1.ensGene.ra

    # requires extra attention after the all database for loop attempt
    cd /hive/data/genomes/gasAcu1/bed/ensGene.59/process
    mv gasAcu1.allGenes.gp.gz gasAcu1.allGenes.gp.beforeLift.gz
    zcat gasAcu1.allGenes.gp.beforeLift.gz \
	| liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
	    ../../../jkStuff/contigsToScaffolds.lft carry stdin
    liftUp -extGenePred gasAcu1.allGenes.gp \
	../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
	    gasAcu1.scaffolds.gp
    gzip gasAcu1.scaffolds.gp
    gzip gasAcu1.allGenes.gp
    #	verify OK
    genePredCheck -db=gasAcu1 gasAcu1.allGenes.gp.gz
    #	checked: 29245 failed: 0

    #	then continue with the load
    cd /hive/data/genomes/gasAcu1
    doEnsGeneUpdate.pl -continue=load -ensVersion=59 gasAcu1.ensGene.ra \
	> ens.59.load 2>&1

    featureBits gasAcu1 ensGene
    # 36792090 bases of 446627861 (8.238%) in intersection

############################################################################
#  macEug1 - Wallaby - Ensembl Genes version 59  (DONE - 2010-08-10 - hiram)
    #	needed to fix ensGeneScaffolds.pl to make this work
    #	Ensembl has a different naming convention for scaffold names in this
    #	beast
    ssh hgwdev
    cd /hive/data/genomes/macEug1
    cat << '_EOF_' > macEug1.ensGene.ra
# required db variable
db macEug1
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#	during the loading of the gene pred, skip all invalid genes
skipInvalid yes
#	ENSMEUT00000014761 no exonFrame on CDS exon 6
#	ENSMEUT00000012082 no exonFrame on CDS exon 1
#	ENSMEUT00000013746 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 macEug1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/macEug1/bed/ensGene.59
    featureBits macEug1 ensGene
    # 23392744 bases of 2541767339 (0.920%) in intersection

############################################################################
#  dm3 - D. melanogaster - Ensembl Genes version 59  (DONE - 2010-08-10 - hiram)
    #	 for version 59, fixed up the chrom name translations to get it to
    #	go.
    ssh hgwdev
    cd /hive/data/genomes/dm3
    cat << '_EOF_' > dm3.ensGene.ra
# required db variable
db dm3
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XYU][0-9]*\)/chr\1/; s/^dmel_mitochondrion_genome/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
# knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 dm3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dm3/bed/ensGene.59
    featureBits dm3 ensGene
    # 30276468 bases of 162367812 (18.647%) in intersection

############################################################################
#  ce9 - C. elegans - Ensembl Genes version 59  (DONE - 2010-08-10 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ce9
    cat << '_EOF_' > ce9.ensGene.ra
# required db variable
db ce9
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([IVX]\)/chr\1/; s/^MtDNA/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 ce9.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ce9/bed/ensGene.59
    featureBits ce9 ensGene
    # 30249205 bases of 100286004 (30.163%) in intersection

############################################################################
#  susScr2 - Pig - lifted susScr1 v59 genes to susScr2 (DONE - 2010-08-11 - Hiram)
    mkdir /hive/data/genomes/susScr2/bed/ensGene.59
    cd /hive/data/genomes/susScr2/bed/ensGene.59
    ln -s ../../../susScr1/bed/ensGene.59/process/susScr1.allGenes.gp.gz .
    zcat susScr1.allGenes.gp.gz > susScr1.allGenes.genePred
    ln -s ../../../susScr1/bed/liftOver/susScr1ToSusScr2.over.chain.gz
    zcat susScr1ToSusScr2.over.chain.gz > susScr1ToSusScr2.over.chain
    liftOver -genePred susScr1.allGenes.genePred \
        susScr1ToSusScr2.over.chain \
        susScr2.allGenes.gp susScr1.liftOver.unMapped.txt
    gzip susScr2.allGenes.gp
    hgLoadGenePred  -genePredExt susScr2 \
	ensGene susScr2.allGenes.gp.gz > loadGenePred.errors.txt 2>&1

    zcat \
../../../susScr1/bed/ensGene.59/download/Sus_scrofa.Sscrofa9.59.pep.all.fa.gz \
        | sed -e 's/^>.* transcript:/>/; s/ CCDS.*$//;' \
	| gzip > ensPep.txt.gz
    zcat ensPep.txt.gz \
	| ~/kent/src/utils/faToTab/faToTab.pl /dev/null /dev/stdin \
	     | sed -e '/^$/d; s/*$//' | sort > ensPep.susScr2.fa.tab
    hgPepPred susScr2 tab ensPep ensPep.susScr2.fa.tab
    ln -s ../../../susScr1/bed/ensGene.59/process/ensGtp.tab .
    hgLoadSqlTab susScr2 ensGtp ~/kent/src/hg/lib/ensGtp.sql ensGtp.tab
    hgsql -e 'INSERT INTO trackVersion \
	(db, name, who, version, updateTime, comment, source, dateReference) \
	VALUES("susScr2", "ensGene", "hiram", "59", now(), \
        "with peptides Sus_scrofa.Sscrofa9.59.pep.all.fa.gz", \
        "lifted susScr1 to susScr2 ftp://ftp.ensembl.org/pub/release-59/gtf/sus_scrofa/Sus_scrofa.Sscrofa9.59.gtf.gz", \
        "current" );' hgFixed


############################################################################
#  anoCar1 - Lizard - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/anoCar1
    cat << '_EOF_' > anoCar1.ensGene.ra
# required db variable
db anoCar1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 anoCar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/anoCar1/bed/ensGene.59
    featureBits anoCar1 ensGene
    # 26974393 bases of 1741478929 (1.549%) in intersection

############################################################################
#  canFam2 - Dog - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/canFam2
    cat << '_EOF_' > canFam2.ensGene.ra
# required db variable
db canFam2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 canFam2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/canFam2/bed/ensGene.59
    featureBits canFam2 ensGene
    # 34693517 bases of 2384996543 (1.455%) in intersection

############################################################################
#  cavPor3 - Guinea pig - Ensembl Genes version 59  (DONE - 2010-08-09 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cavPor3
    cat << '_EOF_' > cavPor3.ensGene.ra
# required db variable
db cavPor3
# do we need to translate geneScaffold coordinates
# geneScaffolds yes
nameTranslation "s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 cavPor3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cavPor3/bed/ensGene.59
    featureBits cavPor3 ensGene
    # 30971317 bases of 2663369733 (1.163%) in intersection

############################################################################
#  choHof1 - Sloth - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/choHof1
    cat << '_EOF_' > choHof1.ensGene.ra
# required db variable
db choHof1
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
skipInvalid yes
#       18938: ENSCHOT00000005046 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 choHof1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/choHof1/bed/ensGene.59
    featureBits choHof1 ensGene
    # 18277719 bases of 2060419685 (0.887%) in intersection

############################################################################
#  ci2 - C. intestinalis - Ensembl Genes version 59  (DONE - 2010-08-09 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 ci2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ci2/bed/ensGene.59
    featureBits ci2 ensGene
    # 20114967 bases of 141233565 (14.242%) in intersection

############################################################################
#  cioSav2 - C. savignyi - Ensembl Genes version 59  (DONE - 2010-08-09 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cioSav2
    cat << '_EOF_' > cioSav2.ensGene.ra
# required db variable
db cioSav2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 cioSav2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cioSav2/bed/ensGene.59
    featureBits cioSav2 ensGene
    # 16572478 bases of 173749524 (9.538%) in intersection

############################################################################
#  danRer6 - Zebrafish - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/danRer6
    cat << '_EOF_' > danRer6.ensGene.ra
# required db variable
db danRer6
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 danRer6.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/danRer6/bed/ensGene.59
    featureBits danRer6 ensGene
    # 44621280 bases of 1506896106 (2.961%) in intersection

############################################################################
#  dasNov2 - Armadillo - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/dasNov2
    cat << '_EOF_' > dasNov2.ensGene.ra
# required db variable
db dasNov2
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
skipInvalid yes
#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 dasNov2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dasNov2/bed/ensGene.59
    featureBits dasNov2 ensGene
    # 21968073 bases of 2371493872 (0.926%) in intersection

############################################################################
#  dipOrd1 - Kangaroo rat - Ensembl Genes version 59  (DONE - 2010-08-09 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1
    cat << '_EOF_' > dipOrd1.ensGene.ra
# required db variable
db dipOrd1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that have invalid structures from Ensembl:
# 11275: ENSDORT00000004734 no exonFrame on CDS exon 12
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 dipOrd1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1/bed/ensGene.59
    featureBits dipOrd1 ensGene
    # 25324520 bases of 1844961421 (1.373%) in intersection

############################################################################
#  echTel1 - Tenrec - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/echTel1
    cat << '_EOF_' > echTel1.ensGene.ra
# required db variable
db echTel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 29277: ENSETET00000011172 no exonFrame on CDS exon 14
# 44942: ENSETET00000018714 no exonFrame on CDS exon 1

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 echTel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/echTel1/bed/ensGene.59
    featureBits echTel1 ensGene
    # 25769845 bases of 2111581369 (1.220%) in intersection

############################################################################
#  equCab2 - Horse - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/equCab2
    cat << '_EOF_' > equCab2.ensGene.ra
# required db variable
db equCab2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
#       translate Ensembl chrUnNNNN names to chrUn coordinates
liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 equCab2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/equCab2/bed/ensGene.59
    featureBits equCab2 ensGene
    # 39563318 bases of 2428790173 (1.629%) in intersection

############################################################################
#  eriEur1 - Hedgehog - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/eriEur1
    cat << '_EOF_' > eriEur1.ensGene.ra
# required db variable
db eriEur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 4691: ENSEEUT00000004188 no exonFrame on CDS exon 7
# 35795: ENSEEUT00000003156 no exonFrame on CDS exon 4
# 40908: ENSEEUT00000001064 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 eriEur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/eriEur1/bed/ensGene.59
    featureBits eriEur1 ensGene
    # 22555285 bases of 2133134836 (1.057%) in intersection

############################################################################
#  felCat3 - Cat - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/felCat3
    cat << '_EOF_' > felCat3.ensGene.ra
# required db variable
db felCat3
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 2100: ENSFCAT00000006929 no exonFrame on CDS exon 16
# 14578: ENSFCAT00000010965 no exonFrame on CDS exon 1
# 26634: ENSFCAT00000009384 no exonFrame on CDS exon 0

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 felCat3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/felCat3/bed/ensGene.59
    featureBits felCat3 ensGene
    # 22299269 bases of 1642698377 (1.357%) in intersection

############################################################################
#  fr2 - Fugu - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
nameTranslation "s/^MT/chrM/;"
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 fr2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/fr2/bed/ensGene.59
    featureBits fr2 ensGene
    # 34568537 bases of 393312790 (8.789%) in intersection

############################################################################
#  galGal3 - Chicken - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 galGal3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/galGal3/bed/ensGene.59
    featureBits galGal3 ensGene
    # 30741650 bases of 1042591351 (2.949%) in intersection

############################################################################
#  loxAfr3 - Elephant - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3
    cat << '_EOF_' > loxAfr3.ensGene.ra
# required db variable
db loxAfr3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 loxAfr3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3/bed/ensGene.59
    featureBits loxAfr3 ensGene
    # 32151456 bases of 3118565340 (1.031%) in intersection

############################################################################
#  micMur1 - Mouse lemur - Ensembl Genes version 59  (DONE - 2010-08-09 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/micMur1
    cat << '_EOF_' > micMur1.ensGene.ra
# required db variable
db micMur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 micMur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/micMur1/bed/ensGene.59
    featureBits micMur1 ensGene
    # 25688755 bases of 1852394361 (1.387%) in intersection

############################################################################
#  monDom5 - Opossum - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/monDom5
    cat << '_EOF_' > monDom5.ensGene.ra
# required db variable
db monDom5
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 monDom5.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/monDom5/bed/ensGene.59
    featureBits monDom5 ensGene
    # 32982595 bases of 3501660299 (0.942%) in intersection

############################################################################
#  myoLuc1 - Microbat - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/myoLuc1
    cat << '_EOF_' > myoLuc1.ensGene.ra
# required db variable
db myoLuc1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 1265: ENSMLUT00000004658 no exonFrame on CDS exon 1
# 17770: ENSMLUT00000003427 no exonFrame on CDS exon 10
# 32743: ENSMLUT00000009601 no exonFrame on CDS exon 1

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 myoLuc1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/myoLuc1/bed/ensGene.59
    featureBits myoLuc1 ensGene
    # 24707371 bases of 1673855868 (1.476%) in intersection

############################################################################
#  ochPri2 - Pika - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ochPri2
    cat << '_EOF_' > ochPri2.ensGene.ra
# required db variable
db ochPri2
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 10995: ENSOPRT00000002716 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 ochPri2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ochPri2/bed/ensGene.59
    featureBits ochPri2 ensGene
    # 25447459 bases of 1923624051 (1.323%) in intersection

############################################################################
#  ornAna1 - Platypus - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ornAna1
    cat << '_EOF_' > ornAna1.ensGene.ra
# required db variable
db ornAna1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly, 365 items
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 ornAna1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ornAna1/bed/ensGene.59
    featureBits ornAna1 ensGene
    # 24466297 bases of 1842236818 (1.328%) in intersection

############################################################################
#  oryCun2 - Rabbit - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryCun2
    cat << '_EOF_' > oryCun2.ensGene.ra
# required db variable
db oryCun2
# ensembl appears to still be in scaffolds ?
liftUp /hive/data/genomes/oryCun2/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 oryCun2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryCun2/bed/ensGene.59
    featureBits oryCun2 ensGene
    # 31785271 bases of 2604023284 (1.221%) in intersection

############################################################################
#  oryLat2 - Medaka - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryLat2
    cat << '_EOF_' > oryLat2.ensGene.ra
# required db variable
db oryLat2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 2,687 genes that haven't lifted properly yet
# skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 oryLat2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryLat2/bed/ensGene.59
    featureBits oryLat2 ensGene
    # 32313511 bases of 700386597 (4.614%) in intersection

############################################################################
#  otoGar1 - Bushbaby - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/otoGar1
    cat << '_EOF_' > otoGar1.ensGene.ra
# required db variable
db otoGar1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 otoGar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/otoGar1/bed/ensGene.59
    featureBits otoGar1 ensGene
    # 23692789 bases of 1969052059 (1.203%) in intersection

############################################################################
#  panTro2 - Chimp - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 panTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/panTro2/bed/ensGene.59
    featureBits panTro2 ensGene
    # 50004270 bases of 2909485072 (1.719%) in intersection

############################################################################
#  ponAbe2 - Orangutan - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2
    cat << '_EOF_' > ponAbe2.ensGene.ra
# required db variable
db ponAbe2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 ponAbe2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2/bed/ensGene.59
    featureBits ponAbe2 ensGene
    # 38120849 bases of 3093572278 (1.232%) in intersection

############################################################################
#  proCap1 - Rock hyrax - Ensembl Genes version 59  (DONE - 2010-08-09 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/proCap1
    cat << '_EOF_' > proCap1.ensGene.ra
# required db variable
db proCap1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 4595: ENSPCAT00000007286 no exonFrame on CDS exon 1
# 28894: ENSPCAT00000000699 no exonFrame on CDS exon 4

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 proCap1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/proCap1/bed/ensGene.59
    featureBits proCap1 ensGene
    # 25344206 bases of 2407847681 (1.053%) in intersection

############################################################################
#  pteVam1 - Megabat - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/pteVam1
    cat << '_EOF_' > pteVam1.ensGene.ra
# required db variable
db pteVam1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
#       6381: ENSPVAT00000012919 no exonFrame on CDS exon 14
#       23522: ENSPVAT00000010661 no exonFrame on CDS exon 0

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 pteVam1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/pteVam1/bed/ensGene.59
    featureBits pteVam1 ensGene
    # 28966764 bases of 1839436660 (1.575%) in intersection

############################################################################
#  rheMac2 - Rhesus - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 rheMac2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rheMac2/bed/ensGene.59
    featureBits rheMac2 ensGene
    # 44562701 bases of 2646704109 (1.684%) in intersection

############################################################################
#  rn4 - Rat - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 rn4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rn4/bed/ensGene.59
    featureBits rn4 ensGene
    # 46705616 bases of 2571531505 (1.816%) in intersection

############################################################################
#  sorAra1 - Shrew - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/sorAra1
    cat << '_EOF_' > sorAra1.ensGene.ra
# required db variable
db sorAra1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 sorAra1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sorAra1/bed/ensGene.59
    featureBits sorAra1 ensGene
    # 19690470 bases of 1832864697 (1.074%) in intersection

############################################################################
#  speTri1 - Squirrel - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/speTri1
    cat << '_EOF_' > speTri1.ensGene.ra
# required db variable
db speTri1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 1071: ENSSTOT00000007455 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 speTri1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/speTri1/bed/ensGene.59
    featureBits speTri1 ensGene
    # 21594718 bases of 1913367893 (1.129%) in intersection

############################################################################
#  susScr1 - Pig - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/susScr1
    cat << '_EOF_' > susScr1.ensGene.ra
# required db variable
db susScr1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 susScr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/susScr1/bed/ensGene.59
    featureBits susScr1 ensGene
    # 28758401 bases of 2231332019 (1.289%) in intersection

############################################################################
#  taeGut1 - Zebra finch - Ensembl Genes version 59  (DONE - 2010-08-09 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/taeGut1
    cat << '_EOF_' > taeGut1.ensGene.ra
# required db variable
db taeGut1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9LXYZ][0-9ABG]*\)/chr\1/; s/^Un/chrUn/"
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
# geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
# skipInvalid yes
#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 taeGut1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/taeGut1/bed/ensGene.59
    featureBits taeGut1 ensGene
    # 25441417 bases of 1222864691 (2.080%) in intersection

############################################################################
#  tarSyr1 - Tarsier - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1
    cat << '_EOF_' > tarSyr1.ensGene.ra
# required db variable
db tarSyr1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 tarSyr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1/bed/ensGene.59
    featureBits tarSyr1 ensGene
    # 21327630 bases of 2768536343 (0.770%) in intersection

############################################################################
#  tetNig2 - Tetraodon - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/tetNig2
    cat << '_EOF_' > tetNig2.ensGene.ra
# required db variable
db tetNig2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 tetNig2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tetNig2/bed/ensGene.59
    featureBits tetNig2 ensGene
    # 31642974 bases of 302314788 (10.467%) in intersection

############################################################################
#  tupBel1 - Tree shrew - Ensembl Genes version 59  (DONE - 2010-08-09 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/tupBel1
    cat << '_EOF_' > tupBel1.ensGene.ra
# required db variable
db tupBel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 2993: ENSTBET00000015831 no exonFrame on CDS exon 11
# 3556: ENSTBET00000013522 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 tupBel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tupBel1/bed/ensGene.59
    featureBits tupBel1 ensGene
    # 22848317 bases of 2137225476 (1.069%) in intersection

############################################################################
#  turTru1 - Dolphin - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/turTru1
    cat << '_EOF_' > turTru1.ensGene.ra
# required db variable
db turTru1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 turTru1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/turTru1/bed/ensGene.59
    featureBits turTru1 ensGene
    # 28614121 bases of 2298444090 (1.245%) in intersection

############################################################################
#  vicPac1 - Alpaca - Ensembl Genes version 59  (DONE - 2010-08-09 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/vicPac1
    cat << '_EOF_' > vicPac1.ensGene.ra
# required db variable
db vicPac1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that does not translate properly to UCSC coordinates
#       4649: ENSVPAT00000009076 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 vicPac1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/vicPac1/bed/ensGene.59
    featureBits vicPac1 ensGene
    # 17891814 bases of 1922910435 (0.930%) in intersection

############################################################################
#  xenTro2 - X. tropicalis - Ensembl Genes version 59  (DONE - 2010-08-09 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/xenTro2
    cat << '_EOF_' > xenTro2.ensGene.ra
# required db variable
db xenTro2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=59 xenTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/xenTro2/bed/ensGene.59
    featureBits xenTro2 ensGene
    # 29181688 bases of 1359412157 (2.147%) in intersection

############################################################################
# ensembl 58 update (DONE - 2010-05-27 - Hiram)

#############################################################################
#  calJac3 - Marmoset - Ensembl Genes version 58  (DONE - 2010-07-20 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/calJac3
    cat << '_EOF_' > calJac3.ensGene.ra
# required db variable
db calJac3
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 calJac3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/calJac3/bed/ensGene.58
    featureBits calJac3 ensGene
    # 50565085 bases of 2752505800 (1.837%) in intersection

############################################################################
#  susScr2 - Pig - lifted susScr1 v58 genes to susScr2 (DONE - 2010-05-28 - Hiram)
    mkdir /hive/data/genomes/susScr2/bed/ensGene.58
    cd /hive/data/genomes/susScr2/bed/ensGene.58
    ln -s ../../../susScr1/bed/ensGene.58/process/susScr1.allGenes.gp.gz .
    zcat susScr1.allGenes.gp.gz > susScr1.allGenes.genePred
    ln -s ../../../susScr1/bed/liftOver/susScr1ToSusScr2.over.chain.gz
    zcat susScr1ToSusScr2.over.chain.gz > susScr1ToSusScr2.over.chain
    liftOver -genePred susScr1.allGenes.genePred \
        susScr1ToSusScr2.over.chain \
        susScr2.allGenes.gp susScr1.liftOver.unMapped.txt
    gzip susScr2.allGenes.gp
    hgLoadGenePred  -genePredExt susScr2 \
	ensGene susScr2.allGenes.gp.gz > loadGenePred.errors.txt 2>&1

    zcat \
../../../susScr1/bed/ensGene.58/download/Sus_scrofa.Sscrofa9.58.pep.all.fa.gz \
        | sed -e 's/^>.* transcript:/>/; s/ CCDS.*$//;' \
	| gzip > ensPep.txt.gz
    zcat ensPep.txt.gz \
	| ~/kent/src/utils/faToTab/faToTab.pl /dev/null /dev/stdin \
	     | sed -e '/^$/d; s/*$//' | sort > ensPep.susScr2.fa.tab
    hgPepPred susScr2 tab ensPep ensPep.susScr2.fa.tab
    ln -s ../../../susScr1/bed/ensGene.58/process/ensGtp.tab .
    hgLoadSqlTab susScr2 ensGtp ~/kent/src/hg/lib/ensGtp.sql ensGtp.tab
    hgsql -e 'INSERT INTO trackVersion \
	(db, name, who, version, updateTime, comment, source, dateReference) \
	VALUES("susScr2", "ensGene", "hiram", "58", now(), \
        "with peptides Sus_scrofa.Sscrofa9.58.pep.all.fa.gz", \
        "lifted susScr1 to susScr2 ftp://ftp.ensembl.org/pub/release-58/gtf/sus_scrofa/Sus_scrofa.Sscrofa9.58.gtf.gz", \
        "current" );' hgFixed

############################################################################
#  mm9 - Mouse - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    cd /hive/data/genomes/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=58 mm9.ensGene.ra

    cd /hive/data/genomes/mm9/bed/ensGene.58
    #	ran into trouble with the ensGtp table load, the names of
    #	the proteins have gotten longer and the standard sql definition
    #	was no longer adequate.  So, after the load failed, increase
    #	protein char and index size to 22:
    cat << '_EOF_' > ensGtp.sql
# This creates the table holding the relationship between
# ensemble genes, transcripts, and peptides.
CREATE TABLE ensGtp (
  gene char(20) NOT NULL,
  transcript char(20) NOT NULL,
  protein char(23) NOT NULL,
# INDICES
  INDEX(gene(19)),
  UNIQUE(transcript(19)),
  INDEX(protein(23))
) 
'_EOF_'
    # << happy emacs

    #	Then, running the rest of the load script, with this line fixed up:
    #	hgLoadSqlTab mm9 ensGtp ensGtp.sql process/ensGtp.tab
    ./finiLoad.csh > finiLoad.log 2>&1

    cd /hive/data/genomes/mm9
    doEnsGeneUpdate.pl -ensVersion=58 -verbose=2 -continue=cleanup \
	mm9.ensGene.ra > ens.58.cleanup 2>&1

    featureBits mm9 ensGene
    #	79466978 bases of 2620346127 (3.033%) in intersection

############################################################################
#  hg19 - Human - Ensembl Genes version 58  (DONE - 2010-04-06 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/hg19
    cat << '_EOF_' > hg19.ensGene.ra
# required db variable
db hg19
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# changing names for the odd bits in Ensembl 58
liftUp /hive/data/genomes/hg19/jkStuff/ens.57.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 hg19.ensGene.ra
    #	New problems in v58: LRG genome sequence, see also:
    #	http://www.lrg-sequence.org/
    #	and as was in v57, four of their genes cross the boundaries of
    #	the haplotypes into the chromosomes.
    #	We can't manage these, so, fixup doProcess.csh:
    cd /hive/data/genomes/hg19/bed/ensGene.58/process
    gunzip hg19.allGenes.gp.gz
    egrep -v "LRG_|ENST00000436611|ENST00000436232|ENST00000436870" \
        hg19.allGenes.gp | gzip -c > hg19.allGenes.gp.gz
    genePredCheck -db=hg19 hg19.allGenes.gp.gz
    checked: 151222 failed: 0
    mv hg19.allGenes.gp hg19.allGenes.gp.broken

    cd /hive/data/genomes/hg19
    #	and finish it off:
    doEnsGeneUpdate.pl -ensVersion=58 -continue=load \
	hg19.ensGene.ra > ens.58.load 2>&1
    featureBits hg19 ensGene
    # 104402177 bases of 2897316137 (3.603%) in intersection

############################################################################
#  rn4 - Rat - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 rn4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rn4/bed/ensGene.58
    featureBits rn4 ensGene
    # 46705616 bases of 2571531505 (1.816%) in intersection

############################################################################
# bosTau4 was broken - finished manually (DONE - 2010-05-27 - Hiram)
    ssh hgwdev
    cd /hive/data/genomes/bosTau4
    cat << '_EOF_' > bosTau4.ensGene.ra
# required db variable
db bosTau4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 bosTau4.ensGene.ra
    #	broken during processing, fix doProcess.csh to eliminate AAFC03011182
    ssh hgwdev
    cd /hive/data/genomes/bosTau4/bed/ensGene.58/process
    mv allGenes.gtf.gz allGenes.gtf.gz.0
    zcat ../download/Bos_taurus.Btau_4.0.58.gtf.gz \
        | sed -e "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/" \
        | grep -v AAFC03011182 | gzip > allGenes.gtf.gz
    gtfToGenePred -infoOut=infoOut.txt -genePredExt allGenes.gtf.gz stdout \
	| gzip > bosTau4.allGenes.gp.gz
    /cluster/bin/scripts/extractGtf.pl infoOut.txt > ensGtp.tab
    genePredCheck -db=bosTau4 bosTau4.allGenes.gp.gz
    cd /hive/data/genomes/bosTau4
    doEnsGeneUpdate.pl  -ensVersion=58 -continue=load bosTau4.ensGene.ra \
	> ens.58.load  2>&1
    featureBits bosTau4 ensGene
    # 42306079 bases of 2731830700 (1.549%) in intersection

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 gasAcu1.ensGene.ra

    # requires extra attention after the all database for loop attempt
    cd /hive/data/genomes/gasAcu1/bed/ensGene.58/process
    mv gasAcu1.allGenes.gp.gz gasAcu1.allGenes.gp.beforeLift.gz
    zcat gasAcu1.allGenes.gp.beforeLift.gz \
	| liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
	    ../../../jkStuff/contigsToScaffolds.lft carry stdin
    liftUp -extGenePred gasAcu1.allGenes.gp \
	../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
	    gasAcu1.scaffolds.gp
    gzip gasAcu1.scaffolds.gp
    gzip gasAcu1.allGenes.gp
    #	verify OK
    genePredCheck -db=gasAcu1 gasAcu1.allGenes.gp.gz
    #	checked: 29245 failed: 0

    #	then continue with the load
    cd /hive/data/genomes/gasAcu1
    doEnsGeneUpdate.pl -continue=load -ensVersion=58 gasAcu1.ensGene.ra \
	> ens.58.load 2>&1

    featureBits gasAcu1 ensGene
    # 36792090 bases of 446627861 (8.238%) in intersection

############################################################################
#  anoCar1 - Lizard - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/anoCar1
    cat << '_EOF_' > anoCar1.ensGene.ra
# required db variable
db anoCar1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 anoCar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/anoCar1/bed/ensGene.58
    featureBits anoCar1 ensGene
    # 26974393 bases of 1741478929 (1.549%) in intersection

############################################################################
#  canFam2 - Dog - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/canFam2
    cat << '_EOF_' > canFam2.ensGene.ra
# required db variable
db canFam2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 canFam2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/canFam2/bed/ensGene.58
    featureBits canFam2 ensGene
    # 34693517 bases of 2384996543 (1.455%) in intersection

############################################################################
#  cavPor3 - Guinea pig - Ensembl Genes version 58  (DONE - 2010-05-27 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cavPor3
    cat << '_EOF_' > cavPor3.ensGene.ra
# required db variable
db cavPor3
# do we need to translate geneScaffold coordinates
# geneScaffolds yes
nameTranslation "s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 cavPor3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cavPor3/bed/ensGene.58
    featureBits cavPor3 ensGene
    # 30971317 bases of 2663369733 (1.163%) in intersection

############################################################################
#  choHof1 - Sloth - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/choHof1
    cat << '_EOF_' > choHof1.ensGene.ra
# required db variable
db choHof1
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
skipInvalid yes
#       18938: ENSCHOT00000005046 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 choHof1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/choHof1/bed/ensGene.58
    featureBits choHof1 ensGene
    # 18277677 bases of 2060419685 (0.887%) in intersection

############################################################################
#  ci2 - C. intestinalis - Ensembl Genes version 58  (DONE - 2010-05-27 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 ci2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ci2/bed/ensGene.58
    featureBits ci2 ensGene
    # 20114967 bases of 141233565 (14.242%) in intersection

############################################################################
#  cioSav2 - C. savignyi - Ensembl Genes version 58  (DONE - 2010-05-27 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cioSav2
    cat << '_EOF_' > cioSav2.ensGene.ra
# required db variable
db cioSav2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 cioSav2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cioSav2/bed/ensGene.58
    featureBits cioSav2 ensGene
    # 16572478 bases of 173749524 (9.538%) in intersection

############################################################################
#  danRer6 - Zebrafish - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/danRer6
    cat << '_EOF_' > danRer6.ensGene.ra
# required db variable
db danRer6
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 danRer6.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/danRer6/bed/ensGene.58
    featureBits danRer6 ensGene
    # 44621280 bases of 1506896106 (2.961%) in intersection

############################################################################
#  dasNov2 - Armadillo - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/dasNov2
    cat << '_EOF_' > dasNov2.ensGene.ra
# required db variable
db dasNov2
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
skipInvalid yes
#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 dasNov2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dasNov2/bed/ensGene.58
    featureBits dasNov2 ensGene
    # 21968025 bases of 2371493872 (0.926%) in intersection

############################################################################
#  dipOrd1 - Kangaroo rat - Ensembl Genes version 58  (DONE - 2010-05-27 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1
    cat << '_EOF_' > dipOrd1.ensGene.ra
# required db variable
db dipOrd1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that have invalid structures from Ensembl:
# 11275: ENSDORT00000004734 no exonFrame on CDS exon 12
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 dipOrd1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1/bed/ensGene.58
    featureBits dipOrd1 ensGene
    # 25324463 bases of 1844961421 (1.373%) in intersection

############################################################################
#  echTel1 - Tenrec - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/echTel1
    cat << '_EOF_' > echTel1.ensGene.ra
# required db variable
db echTel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 29277: ENSETET00000011172 no exonFrame on CDS exon 14
# 44942: ENSETET00000018714 no exonFrame on CDS exon 1

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 echTel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/echTel1/bed/ensGene.58
    featureBits echTel1 ensGene
    # 25769836 bases of 2111581369 (1.220%) in intersection

############################################################################
#  equCab2 - Horse - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/equCab2
    cat << '_EOF_' > equCab2.ensGene.ra
# required db variable
db equCab2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
#       translate Ensembl chrUnNNNN names to chrUn coordinates
liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 equCab2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/equCab2/bed/ensGene.58
    featureBits equCab2 ensGene
    # 39563285 bases of 2428790173 (1.629%) in intersection

############################################################################
#  eriEur1 - Hedgehog - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/eriEur1
    cat << '_EOF_' > eriEur1.ensGene.ra
# required db variable
db eriEur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 4691: ENSEEUT00000004188 no exonFrame on CDS exon 7
# 35795: ENSEEUT00000003156 no exonFrame on CDS exon 4
# 40908: ENSEEUT00000001064 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 eriEur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/eriEur1/bed/ensGene.58
    featureBits eriEur1 ensGene
    # 22555252 bases of 2133134836 (1.057%) in intersection

############################################################################
#  felCat3 - Cat - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/felCat3
    cat << '_EOF_' > felCat3.ensGene.ra
# required db variable
db felCat3
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 2100: ENSFCAT00000006929 no exonFrame on CDS exon 16
# 14578: ENSFCAT00000010965 no exonFrame on CDS exon 1
# 26634: ENSFCAT00000009384 no exonFrame on CDS exon 0

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 felCat3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/felCat3/bed/ensGene.58
    featureBits felCat3 ensGene
    # 22299251 bases of 1642698377 (1.357%) in intersection

############################################################################
#  fr2 - Fugu - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
nameTranslation "s/^MT/chrM/;"
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 fr2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/fr2/bed/ensGene.58
    featureBits fr2 ensGene
    # 34568537 bases of 393312790 (8.789%) in intersection

############################################################################
#  galGal3 - Chicken - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 galGal3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/galGal3/bed/ensGene.58
    featureBits galGal3 ensGene
    # 30741650 bases of 1042591351 (2.949%) in intersection

############################################################################
#  loxAfr3 - Elephant - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3
    cat << '_EOF_' > loxAfr3.ensGene.ra
# required db variable
db loxAfr3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 loxAfr3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3/bed/ensGene.58
    featureBits loxAfr3 ensGene
    # 32151456 bases of 3118565340 (1.031%) in intersection

############################################################################
#  micMur1 - Mouse lemur - Ensembl Genes version 58  (DONE - 2010-05-27 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/micMur1
    cat << '_EOF_' > micMur1.ensGene.ra
# required db variable
db micMur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 micMur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/micMur1/bed/ensGene.58
    featureBits micMur1 ensGene
    # 25688743 bases of 1852394361 (1.387%) in intersection

############################################################################
#  monDom5 - Opossum - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/monDom5
    cat << '_EOF_' > monDom5.ensGene.ra
# required db variable
db monDom5
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 monDom5.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/monDom5/bed/ensGene.58
    featureBits monDom5 ensGene
    # 32982595 bases of 3501660299 (0.942%) in intersection

############################################################################
#  myoLuc1 - Microbat - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/myoLuc1
    cat << '_EOF_' > myoLuc1.ensGene.ra
# required db variable
db myoLuc1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 1265: ENSMLUT00000004658 no exonFrame on CDS exon 1
# 17770: ENSMLUT00000003427 no exonFrame on CDS exon 10
# 32743: ENSMLUT00000009601 no exonFrame on CDS exon 1

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 myoLuc1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/myoLuc1/bed/ensGene.58
    featureBits myoLuc1 ensGene
    # 24707365 bases of 1673855868 (1.476%) in intersection

############################################################################
#  ochPri2 - Pika - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ochPri2
    cat << '_EOF_' > ochPri2.ensGene.ra
# required db variable
db ochPri2
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 10995: ENSOPRT00000002716 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 ochPri2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ochPri2/bed/ensGene.58
    featureBits ochPri2 ensGene
    # 25447435 bases of 1923624051 (1.323%) in intersection

############################################################################
#  ornAna1 - Platypus - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ornAna1
    cat << '_EOF_' > ornAna1.ensGene.ra
# required db variable
db ornAna1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly, 365 items
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 ornAna1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ornAna1/bed/ensGene.58
    featureBits ornAna1 ensGene
    # 24466294 bases of 1842236818 (1.328%) in intersection

############################################################################
#  oryCun2 - Rabbit - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryCun2
    cat << '_EOF_' > oryCun2.ensGene.ra
# required db variable
db oryCun2
# ensembl appears to still be in scaffolds ?
liftUp /hive/data/genomes/oryCun2/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 oryCun2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryCun2/bed/ensGene.58
    featureBits oryCun2 ensGene
    # 31785271 bases of 2604023284 (1.221%) in intersection

############################################################################
#  oryLat2 - Medaka - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryLat2
    cat << '_EOF_' > oryLat2.ensGene.ra
# required db variable
db oryLat2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 2,687 genes that haven't lifted properly yet
# skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 oryLat2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryLat2/bed/ensGene.58
    featureBits oryLat2 ensGene
    # 32313511 bases of 700386597 (4.614%) in intersection

############################################################################
#  otoGar1 - Bushbaby - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/otoGar1
    cat << '_EOF_' > otoGar1.ensGene.ra
# required db variable
db otoGar1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 otoGar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/otoGar1/bed/ensGene.58
    featureBits otoGar1 ensGene
    # 23692750 bases of 1969052059 (1.203%) in intersection

############################################################################
#  panTro2 - Chimp - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 panTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/panTro2/bed/ensGene.58
    featureBits panTro2 ensGene
    # 50004270 bases of 2909485072 (1.719%) in intersection

############################################################################
#  ponAbe2 - Orangutan - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2
    cat << '_EOF_' > ponAbe2.ensGene.ra
# required db variable
db ponAbe2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 ponAbe2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2/bed/ensGene.58
    featureBits ponAbe2 ensGene
    # 38120801 bases of 3093572278 (1.232%) in intersection

############################################################################
#  proCap1 - Rock hyrax - Ensembl Genes version 58  (DONE - 2010-05-27 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/proCap1
    cat << '_EOF_' > proCap1.ensGene.ra
# required db variable
db proCap1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 4595: ENSPCAT00000007286 no exonFrame on CDS exon 1
# 28894: ENSPCAT00000000699 no exonFrame on CDS exon 4

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 proCap1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/proCap1/bed/ensGene.58
    featureBits proCap1 ensGene
    # 25344155 bases of 2407847681 (1.053%) in intersection

############################################################################
#  pteVam1 - Megabat - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/pteVam1
    cat << '_EOF_' > pteVam1.ensGene.ra
# required db variable
db pteVam1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
#       6381: ENSPVAT00000012919 no exonFrame on CDS exon 14
#       23522: ENSPVAT00000010661 no exonFrame on CDS exon 0

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 pteVam1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/pteVam1/bed/ensGene.58
    featureBits pteVam1 ensGene
    # 28966701 bases of 1839436660 (1.575%) in intersection

############################################################################
#  rheMac2 - Rhesus - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 rheMac2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rheMac2/bed/ensGene.58
    featureBits rheMac2 ensGene
    # 44562701 bases of 2646704109 (1.684%) in intersection

############################################################################
#  sacCer2 - S. cerevisiae - Ensembl Genes version 58  (DONE - 2010-05-27 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/sacCer2
    cat << '_EOF_' > sacCer2.ensGene.ra
# required db variable
db sacCer2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^VIII/chrVIII/; s/^VII/chrVII/; s/^VI/chrVI/; s/^V/chrV/; s/^XIII/chrXIII/; s/^XII/chrXII/; s/^XIV/chrXIV/; s/^XI/chrXI/; s/^XVI/chrXVI/; s/^XV/chrXV/; s/^X/chrX/; s/^III/chrIII/; s/^IV/chrIV/; s/^II/chrII/; s/^IX/chrIX/; s/^I/chrI/; s/^MT/chrM/; s/2-micron/2micron/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 sacCer2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sacCer2/bed/ensGene.58
    featureBits sacCer2 ensGene
    # 8912793 bases of 12162995 (73.278%) in intersection

############################################################################
#  sorAra1 - Shrew - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/sorAra1
    cat << '_EOF_' > sorAra1.ensGene.ra
# required db variable
db sorAra1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 sorAra1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sorAra1/bed/ensGene.58
    featureBits sorAra1 ensGene
    # 19690431 bases of 1832864697 (1.074%) in intersection

############################################################################
#  speTri1 - Squirrel - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/speTri1
    cat << '_EOF_' > speTri1.ensGene.ra
# required db variable
db speTri1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 1071: ENSSTOT00000007455 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 speTri1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/speTri1/bed/ensGene.58
    featureBits speTri1 ensGene
    # 21594682 bases of 1913367893 (1.129%) in intersection

############################################################################
#  susScr1 - Pig - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/susScr1
    cat << '_EOF_' > susScr1.ensGene.ra
# required db variable
db susScr1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 susScr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/susScr1/bed/ensGene.58
    featureBits susScr1 ensGene
    # 28758401 bases of 2231332019 (1.289%) in intersection

############################################################################
#  taeGut1 - Zebra finch - Ensembl Genes version 58  (DONE - 2010-05-27 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/taeGut1
    cat << '_EOF_' > taeGut1.ensGene.ra
# required db variable
db taeGut1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9LXYZ][0-9ABG]*\)/chr\1/; s/^Un/chrUn/"
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
# geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
# skipInvalid yes
#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 taeGut1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/taeGut1/bed/ensGene.58
    featureBits taeGut1 ensGene
    # 25441417 bases of 1222864691 (2.080%) in intersection

############################################################################
#  tarSyr1 - Tarsier - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1
    cat << '_EOF_' > tarSyr1.ensGene.ra
# required db variable
db tarSyr1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 tarSyr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1/bed/ensGene.58
    featureBits tarSyr1 ensGene
    # 21327582 bases of 2768536343 (0.770%) in intersection

############################################################################
#  tetNig2 - Tetraodon - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/tetNig2
    cat << '_EOF_' > tetNig2.ensGene.ra
# required db variable
db tetNig2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 tetNig2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tetNig2/bed/ensGene.58
    featureBits tetNig2 ensGene
    # 31642974 bases of 302314788 (10.467%) in intersection

############################################################################
#  tupBel1 - Tree shrew - Ensembl Genes version 58  (DONE - 2010-05-27 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/tupBel1
    cat << '_EOF_' > tupBel1.ensGene.ra
# required db variable
db tupBel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 2993: ENSTBET00000015831 no exonFrame on CDS exon 11
# 3556: ENSTBET00000013522 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 tupBel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tupBel1/bed/ensGene.58
    featureBits tupBel1 ensGene
    # 22848284 bases of 2137225476 (1.069%) in intersection

############################################################################
#  turTru1 - Dolphin - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/turTru1
    cat << '_EOF_' > turTru1.ensGene.ra
# required db variable
db turTru1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 turTru1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/turTru1/bed/ensGene.58
    featureBits turTru1 ensGene
    # 28614079 bases of 2298444090 (1.245%) in intersection

############################################################################
#  vicPac1 - Alpaca - Ensembl Genes version 58  (DONE - 2010-05-27 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/vicPac1
    cat << '_EOF_' > vicPac1.ensGene.ra
# required db variable
db vicPac1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that does not translate properly to UCSC coordinates
#       4649: ENSVPAT00000009076 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 vicPac1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/vicPac1/bed/ensGene.58
    featureBits vicPac1 ensGene
    # 17891769 bases of 1922910435 (0.930%) in intersection

############################################################################
#  xenTro2 - X. tropicalis - Ensembl Genes version 58  (DONE - 2010-05-27 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/xenTro2
    cat << '_EOF_' > xenTro2.ensGene.ra
# required db variable
db xenTro2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=58 xenTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/xenTro2/bed/ensGene.58
    featureBits xenTro2 ensGene
    # 29181688 bases of 1359412157 (2.147%) in intersection

############################################################################
############################################################################
# ensembl 57 update (DONE - 2010-04-02 - Hiram)

############################################################################
#  susScr1 - Pig - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/susScr1
    cat << '_EOF_' > susScr1.ensGene.ra
# required db variable
db susScr1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 susScr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/susScr1/bed/ensGene.57
    featureBits susScr1 ensGene
    # 28707614 bases of 2231332019 (1.287%) in intersection

############################################################################
#  susScr2 - Pig - lifted susScr1 v57 genes to susScr2 (DONE - 2010-04-06 - Hiram)
    mkdir /hive/data/genomes/susScr2/bed/ensGene.57
    cd /hive/data/genomes/susScr2/bed/ensGene.57
    ln -s ../../../susScr1/bed/ensGene.57/process/susScr1.allGenes.gp.gz .
    zcat susScr1.allGenes.gp.gz > susScr1.allGenes.genePred
    ln -s ../../../susScr1/bed/liftOver/susScr1ToSusScr2.over.chain.gz
    zcat susScr1ToSusScr2.over.chain.gz > susScr1ToSusScr2.over.chain
    liftOver -genePred susScr1.allGenes.genePred \
        susScr1ToSusScr2.over.chain \
        susScr2.allGenes.gp susScr1.liftOver.unMapped.txt
    hgLoadGenePred  -genePredExt susScr2 \
	ensGene susScr2.allGenes.gp.gz >& loadGenePred.errors.txt
    zcat \
../../../susScr1/bed/ensGene.57/download/Sus_scrofa.Sscrofa9.57.pep.all.fa.gz \
        | sed -e 's/^>.* transcript:/>/; s/ CCDS.*$//;' \
	| gzip > ensPep.txt.gz
    zcat ensPep.txt.gz \
	| ~/kent/src/utils/faToTab/faToTab.pl /dev/null /dev/stdin \
	     | sed -e '/^$/d; s/*$//' | sort > ensPep.susScr2.fa.tab
    hgPepPred susScr2 tab ensPep ensPep.susScr2.fa.tab
    ln -s ../../../susScr1/bed/ensGene.57/process/ensGtp.tab .
    hgLoadSqlTab susScr2 ensGtp ~/kent/src/hg/lib/ensGtp.sql ensGtp.tab
    hgsql -e 'INSERT INTO trackVersion \
	(db, name, who, version, updateTime, comment, source, dateReference) \
	VALUES("susScr2", "ensGene", "hiram", "57", now(), \
        "with peptides Sus_scrofa.Sscrofa9.57.pep.all.fa.gz", \
        "lifted susScr1 to susScr2 ftp://ftp.ensembl.org/pub/release-57/gtf/sus_scrofa/Sus_scrofa.Sscrofa9.57.gtf.gz", \
        "mar2010" );' hgFixed

############################################################################
#  loxAfr3 - Elephant - Ensembl Genes version 57  (DONE - 2010-04-01 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3
    cat << '_EOF_' > loxAfr3.ensGene.ra
# required db variable
db loxAfr3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 loxAfr3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/loxAfr3/bed/ensGene.57
    featureBits loxAfr3 ensGene
    # 32110794 bases of 3118565340 (1.030%) in intersection

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 57  (DONE - 2010-04-06 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 gasAcu1.ensGene.ra

    # requires extra attention after the all database for loop attempt
    cd /hive/data/genomes/gasAcu1/bed/ensGene.57/process
    mv gasAcu1.allGenes.gp.gz gasAcu1.allGenes.gp.beforeLift.gz
    zcat gasAcu1.allGenes.gp.beforeLift.gz \
	| liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
	    ../../../jkStuff/contigsToScaffolds.lft carry stdin
    liftUp -extGenePred gasAcu1.allGenes.gp \
	../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
	    gasAcu1.scaffolds.gp
    gzip gasAcu1.scaffolds.gp
    gzip gasAcu1.allGenes.gp
    #	verify OK
    genePredCheck -db=gasAcu1 gasAcu1.allGenes.gp.gz
    #	checked: 29109 failed: 0

    #	then continue with the load
    cd /hive/data/genomes/gasAcu1
    doEnsGeneUpdate.pl -continue=load -ensVersion=57 gasAcu1.ensGene.ra \
	> ens.57.load 2>&1

    featureBits gasAcu1 ensGene
    # 36789271 bases of 446627861 (8.237%) in intersection

############################################################################
#  anoCar1 - Lizard - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/anoCar1
    cat << '_EOF_' > anoCar1.ensGene.ra
# required db variable
db anoCar1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 anoCar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/anoCar1/bed/ensGene.57
    featureBits anoCar1 ensGene
    # 26956669 bases of 1741478929 (1.548%) in intersection

############################################################################
#  canFam2 - Dog - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/canFam2
    cat << '_EOF_' > canFam2.ensGene.ra
# required db variable
db canFam2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 canFam2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/canFam2/bed/ensGene.57
    featureBits canFam2 ensGene
    # 34634472 bases of 2384996543 (1.452%) in intersection

############################################################################
#  cavPor3 - Guinea pig - Ensembl Genes version 57  (DONE - 2010-04-05 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cavPor3
    cat << '_EOF_' > cavPor3.ensGene.ra
# required db variable
db cavPor3
# do we need to translate geneScaffold coordinates
# geneScaffolds yes
nameTranslation "s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 cavPor3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cavPor3/bed/ensGene.57
    featureBits cavPor3 ensGene

############################################################################
#  ce7 - C. elegans - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ce7
    cat << '_EOF_' > ce7.ensGene.ra
# required db variable
db ce7
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([IVX]\)/chr\1/; s/^MtDNA/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 ce7.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ce7/bed/ensGene.57
    featureBits ce7 ensGene
    # 29594668 bases of 100286002 (29.510%) in intersection

############################################################################
#  ci2 - C. intestinalis - Ensembl Genes version 57  (DONE - 2010-04-05 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 ci2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ci2/bed/ensGene.57
    featureBits ci2 ensGene
    # 20113161 bases of 141233565 (14.241%) in intersection

############################################################################
#  cioSav2 - C. savignyi - Ensembl Genes version 57  (DONE - 2010-04-05 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cioSav2
    cat << '_EOF_' > cioSav2.ensGene.ra
# required db variable
db cioSav2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 cioSav2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cioSav2/bed/ensGene.57
    featureBits cioSav2 ensGene
    # 16616680 bases of 173749524 (9.564%) in intersection

############################################################################
#  danRer6 - Zebrafish - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/danRer6
    cat << '_EOF_' > danRer6.ensGene.ra
# required db variable
db danRer6
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 danRer6.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/danRer6/bed/ensGene.57
    featureBits danRer6 ensGene
    # 44586206 bases of 1506896106 (2.959%) in intersection

############################################################################
#  equCab2 - Horse - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/equCab2
    cat << '_EOF_' > equCab2.ensGene.ra
# required db variable
db equCab2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
#       translate Ensembl chrUnNNNN names to chrUn coordinates
liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 equCab2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/equCab2/bed/ensGene.57
    featureBits equCab2 ensGene
    # 39506745 bases of 2428790173 (1.627%) in intersection

############################################################################
#  fr2 - Fugu - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
nameTranslation "s/^MT/chrM/;"
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 fr2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/fr2/bed/ensGene.57
    featureBits fr2 ensGene
    # 34560383 bases of 393312790 (8.787%) in intersection

############################################################################
#  galGal3 - Chicken - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 galGal3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/galGal3/bed/ensGene.57
    featureBits galGal3 ensGene
    # 30733557 bases of 1042591351 (2.948%) in intersection

############################################################################
#  monDom5 - Opossum - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/monDom5
    cat << '_EOF_' > monDom5.ensGene.ra
# required db variable
db monDom5
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 monDom5.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/monDom5/bed/ensGene.57
    featureBits monDom5 ensGene
    # 32999268 bases of 3501660299 (0.942%) in intersection

############################################################################
#  ornAna1 - Platypus - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ornAna1
    cat << '_EOF_' > ornAna1.ensGene.ra
# required db variable
db ornAna1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly, 365 items
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 ornAna1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ornAna1/bed/ensGene.57
    featureBits ornAna1 ensGene
    # 24537221 bases of 1842236818 (1.332%) in intersection

############################################################################
#  oryLat2 - Medaka - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryLat2
    cat << '_EOF_' > oryLat2.ensGene.ra
# required db variable
db oryLat2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 2,687 genes that haven't lifted properly yet
# skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 oryLat2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryLat2/bed/ensGene.57
    featureBits oryLat2 ensGene
    # 32301732 bases of 700386597 (4.612%) in intersection

############################################################################
#  panTro2 - Chimp - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 panTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/panTro2/bed/ensGene.57
    featureBits panTro2 ensGene
    # 49983145 bases of 2909485072 (1.718%) in intersection

############################################################################
#  ponAbe2 - Orangutan - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2
    cat << '_EOF_' > ponAbe2.ensGene.ra
# required db variable
db ponAbe2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 ponAbe2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2/bed/ensGene.57
    featureBits ponAbe2 ensGene
    # 38087987 bases of 3093572278 (1.231%) in intersection

############################################################################
#  rheMac2 - Rhesus - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 rheMac2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rheMac2/bed/ensGene.57
    featureBits rheMac2 ensGene
    # 44519581 bases of 2646704109 (1.682%) in intersection

############################################################################
#  rn4 - Rat - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 rn4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rn4/bed/ensGene.57
    featureBits rn4 ensGene
    # 46518438 bases of 2571531505 (1.809%) in intersection

############################################################################
#  sacCer2 - S. cerevisiae - Ensembl Genes version 57  (DONE - 2010-04-05 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/sacCer2
    cat << '_EOF_' > sacCer2.ensGene.ra
# required db variable
db sacCer2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^VIII/chrVIII/; s/^VII/chrVII/; s/^VI/chrVI/; s/^V/chrV/; s/^XIII/chrXIII/; s/^XII/chrXII/; s/^XIV/chrXIV/; s/^XI/chrXI/; s/^XVI/chrXVI/; s/^XV/chrXV/; s/^X/chrX/; s/^III/chrIII/; s/^IV/chrIV/; s/^II/chrII/; s/^IX/chrIX/; s/^I/chrI/; s/^MT/chrM/; s/2-micron/2micron/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 sacCer2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sacCer2/bed/ensGene.57
    featureBits sacCer2 ensGene
    # 8912793 bases of 12162995 (73.278%) in intersection

############################################################################
#  taeGut1 - Zebra finch - Ensembl Genes version 57  (DONE - 2010-04-05 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/taeGut1
    cat << '_EOF_' > taeGut1.ensGene.ra
# required db variable
db taeGut1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9LXYZ][0-9ABG]*\)/chr\1/; s/^Un/chrUn/"
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
# geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
# skipInvalid yes
#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 taeGut1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/taeGut1/bed/ensGene.57
    featureBits taeGut1 ensGene
    # 25428670 bases of 1222864691 (2.079%) in intersection

############################################################################
#  tetNig2 - Tetraodon - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/tetNig2
    cat << '_EOF_' > tetNig2.ensGene.ra
# required db variable
db tetNig2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 tetNig2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tetNig2/bed/ensGene.57
    featureBits tetNig2 ensGene
    # 31637658 bases of 302314788 (10.465%) in intersection

############################################################################
#  xenTro2 - X. tropicalis - Ensembl Genes version 57  (DONE - 2010-04-05 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/xenTro2
    cat << '_EOF_' > xenTro2.ensGene.ra
# required db variable
db xenTro2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 xenTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/xenTro2/bed/ensGene.57
    featureBits xenTro2 ensGene
    # 29158032 bases of 1359412157 (2.145%) in intersection

############################################################################
# bosTau4 was broken - finished manually (DONE - 2010-04-05 - Hiram)
    ssh hgwdev
    cd /hive/data/genomes/bosTau4
    cat << '_EOF_' > bosTau4.ensGene.ra
# required db variable
db bosTau4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 bosTau4.ensGene.ra
    #	broken during processing, fix doProcess.csh to eliminate AAFC03011182
    zcat ../download/Bos_taurus.Btau_4.0.57.gtf.gz \
        | sed -e "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/" \
        | grep -v AAFC03011182 | gzip > allGenes.gtf.gz
    ssh hgwdev
    cd /hive/data/genomes/bosTau4
    doEnsGeneUpdate.pl  -ensVersion=57 -continue=load bosTau4.ensGene.ra \
	> ens.57.load 
    cd /hive/data/genomes/bosTau4/bed/ensGene.57
    featureBits bosTau4 ensGene
    # 42207115 bases of 2731830700 (1.545%) in intersection


############################################################################
#  oryCun2 - Rabbit - Ensembl Genes version 57  (DONE - 2010-04-02 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/oryCun2
    cat << '_EOF_' > oryCun2.ensGene.ra
# required db variable
db oryCun2
# ensembl appears to still be in scaffolds ?
liftUp /hive/data/genomes/oryCun2/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 oryCun2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryCun2/bed/ensGene.57
    featureBits oryCun2 ensGene
    # 31748363 bases of 2604023284 (1.219%) in intersection

############################################################################
#  felCat3 - Cat - Ensembl Genes version 57  (DONE - 2010-04-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/felCat3
    cat << '_EOF_' > felCat3.ensGene.ra
# required db variable
db felCat3
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 2100: ENSFCAT00000006929 no exonFrame on CDS exon 16
# 14578: ENSFCAT00000010965 no exonFrame on CDS exon 1
# 26634: ENSFCAT00000009384 no exonFrame on CDS exon 0

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 felCat3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/felCat3/bed/ensGene.57
    featureBits felCat3 ensGene
    # 22220711 bases of 1642698377 (1.353%) in intersection

############################################################################
#  mm9 - Mouse - Ensembl Genes version 57  (DONE - 2010-04-06 - hiram)
    cd /hive/data/genomes/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=57 mm9.ensGene.ra

    cd /hive/data/genomes/mm9/bed/ensGene.57
    #	ran into trouble with the ensGtp table load, the names of
    #	the proteins have gotten longer and the standard sql definition
    #	was no longer adequate.  So, after the load failed, increase
    #	protein char and index size to 22:
    cat << '_EOF_' > ensGtp.sql
# This creates the table holding the relationship between
# ensemble genes, transcripts, and peptides.
CREATE TABLE ensGtp (
  gene char(20) NOT NULL,
  transcript char(20) NOT NULL,
  protein char(23) NOT NULL,
# INDICES
  INDEX(gene(19)),
  UNIQUE(transcript(19)),
  INDEX(protein(23))
) 
'_EOF_'
    # << happy emacs

    #	Then, running the rest of the load script, with this line fixed up:
    #	hgLoadSqlTab mm9 ensGtp ensGtp.sql process/ensGtp.tab
    ./finiLoad.csh > finiLoad.log 2>&1

    cd /hive/data/genomes/mm9
    doEnsGeneUpdate.pl -ensVersion=57 -verbose=2 -continue=cleanup \
	mm9.ensGene.ra > ens.57.cleanup

    featureBits mm9 ensGene
    #	79248889 bases of 2620346127 (3.024%) in intersection

############################################################################
#  hg19 - Human - Ensembl Genes version 57  (DONE - 2010-04-06 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/hg19
    cat << '_EOF_' > hg19.ensGene.ra
# required db variable
db hg19
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# changing names for the odd bits in Ensembl 57
liftUp /hive/data/genomes/hg19/jkStuff/ens.57.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl  -ensVersion=57 hg19.ensGene.ra
    #	four of their genes cross the boundaries of the haplotypes into
    #	the chromosomes.  We can't do that, so, fixup doProcess.csh:
    cd /hive/data/genomes/hg19/bed/ensGene.57/process
    gunzip hg19.allGenes.gp.gz
    egrep -v "ENST00000436611|ENST00000383191|ENST00000436232|ENST00000436870" \
        hg19.allGenes.gp | gzip -c > hg19.allGenes.gp.gz
    genePredCheck -db=hg19 hg19.allGenes.gp.gz
    checked: 143123 failed: 0

    cd /hive/data/genomes/hg19
    #	and finish it off:
    doEnsGeneUpdate.pl -ensVersion=57 -continue=load \
	hg19.ensGene.ra > ens.57.load 2>&1
    featureBits hg19 ensGene
    # 101913378 bases of 2897316137 (3.518%) in intersection

############################################################################
# ensembl 56 update (DONE - 2009-10-27 - Hiram) only did Rat as a one-off

############################################################################
#  rn4 - Rat - Ensembl Genes version 56  (DONE - 2009-10-27 - hiram)
    ssh kkr14u08
    cd /hive/data/genomes/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=56 rn4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rn4/bed/ensGene.56
    featureBits rn4 ensGene
    # 46518438 bases of 2571531505 (1.809%) in intersection

############################################################################
# ensembl 54 update (DONE - 2009-08-05 - Hiram)
    # hg18 needs to get to its last version
############################################################################
#  hg18 - Human - Ensembl Genes version 54  (DONE - 2009-08-05 - hiram)
    ssh hgwdev
    cd /hive/data/genomes/hg18
    cat << '_EOF_' > hg18.ensGene.ra
# required db variable
db hg18
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
haplotypeLift /hive/data/genomes/hg18/jkStuff/ensGene.haplotype.lift
liftUp /hive/data/genomes/hg18/jkStuff/liftContigs.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=54 hg18.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/hg18/bed/ensGene.54
    featureBits hg18 ensGene
    # 70647021 bases of 2881515245 (2.452%) in intersection

############################################################################
# To finish off the update to 55 and 54, run the following:
     hgsql -e \
'update trackVersion set dateReference="dec2008" where version="52";' hgFixed
     hgsql -e \
'update trackVersion set dateReference="may2009" where version="54";' hgFixed
     hgsql -e \
'update trackVersion set dateReference="current" where version="55";' hgFixed

############################################################################

############################################################################
# ensembl 55 updates (WORKING - 2009-07-14 - Hiram)
    # see also: more notes about how this is done in the "ensembl 50 updates"
    #	section below  (and in /hive/users/hiram/ensGene/)
############################################################################
#  macEug1 - Wallaby - (BROKEN - 2009-07-20 - Hiram)
    # can not get this one to work, either a broken GeneScaffold lift
    #	or a different set of scaffold names, need to investigate
############################################################################
#  danRer6 - Zebrafish - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u05
    cd /hive/data/genomes/danRer6
    cat << '_EOF_' > danRer6.ensGene.ra
# required db variable
db danRer6
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 danRer6.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/danRer6/bed/ensGene.55
    featureBits danRer6 ensGene
    # 44586206 bases of 1506896106 (2.959%) in intersection
############################################################################
#  hg19 - Human - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u08
    cd /hive/data/genomes/hg19
    cat << '_EOF_' > hg19.ensGene.ra
# required db variable
db hg19
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
haplotypeLift /hive/data/genomes/hg19/jkStuff/ensGene.haplotype.lift
# liftUp /hive/data/genomes/hg19/jkStuff/liftContigs.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 hg19.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/hg19/bed/ensGene.55
    featureBits hg19 ensGene
    # 85295627 bases of 2897316137 (2.944%) in intersection

############################################################################
#  anoCar1 - Lizard - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/anoCar1
    cat << '_EOF_' > anoCar1.ensGene.ra
# required db variable
db anoCar1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 anoCar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/anoCar1/bed/ensGene.55
    featureBits anoCar1 ensGene
    # 26956669 bases of 1741478929 (1.548%) in intersection
############################################################################
#  choHof1 - Sloth - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u03
    cd /hive/data/genomes/choHof1
    cat << '_EOF_' > choHof1.ensGene.ra
# required db variable
db choHof1
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
skipInvalid yes
#       18938: ENSCHOT00000005046 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 choHof1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/choHof1/bed/ensGene.55
    featureBits choHof1 ensGene
    # 18231244 bases of 2060419685 (0.885%) in intersection
############################################################################
#  dasNov2 - Armadillo - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u02
    cd /hive/data/genomes/dasNov2
    cat << '_EOF_' > dasNov2.ensGene.ra
# required db variable
db dasNov2
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
skipInvalid yes
#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 dasNov2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dasNov2/bed/ensGene.55
    featureBits dasNov2 ensGene
    # 21864229 bases of 2371493872 (0.922%) in intersection
############################################################################
#  loxAfr2 - Elephant - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh swarm
    cd /hive/data/genomes/loxAfr2
    cat << '_EOF_' > loxAfr2.ensGene.ra
# required db variable
db loxAfr2
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
skipInvalid yes
#        ENSLAFT00000000586 no exonFrame on CDS exon 4
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 loxAfr2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/loxAfr2/bed/ensGene.55
    featureBits loxAfr2 ensGene
    # 23586871 bases of 2444975542 (0.965%) in intersection

############################################################################
#  bosTau4 - Cow - Ensembl Genes version 55  (DONE - 2009-07-15 - hiram)
    ssh kkr14u02
    cd /hive/data/genomes/bosTau4
    cat << '_EOF_' > bosTau4.ensGene.ra
# required db variable
db bosTau4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 bosTau4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/bosTau4/bed/ensGene.55
    featureBits bosTau4 ensGene
    # 42207115 bases of 2731830700 (1.545%) in intersection

############################################################################
#  canFam2 - Dog - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/canFam2
    cat << '_EOF_' > canFam2.ensGene.ra
# required db variable
db canFam2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 canFam2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/canFam2/bed/ensGene.55
    featureBits canFam2 ensGene
    # 34634472 bases of 2384996543 (1.452%) in intersection

############################################################################
#  cavPor3 - Guinea Pig - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u01
    cd /hive/data/genomes/cavPor3
    cat << '_EOF_' > cavPor3.ensGene.ra
# required db variable
db cavPor3
# do we need to translate geneScaffold coordinates
# geneScaffolds yes
nameTranslation "s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 cavPor3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cavPor3/bed/ensGene.55
    featureBits cavPor3 ensGene
    # 30852014 bases of 2663369733 (1.158%) in intersection

############################################################################
#  ci2 - C. intestinalis - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u08
    cd /hive/data/genomes/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 ci2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ci2/bed/ensGene.55
    featureBits ci2 ensGene
    # 20113161 bases of 141233565 (14.241%) in intersection

############################################################################
#  cioSav2 - C. savignyi - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u08
    cd /hive/data/genomes/cioSav2
    cat << '_EOF_' > cioSav2.ensGene.ra
# required db variable
db cioSav2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 cioSav2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cioSav2/bed/ensGene.55
    featureBits cioSav2 ensGene
    # 16616680 bases of 173749524 (9.564%) in intersection

############################################################################
#  dipOrd1 - Kangaroo rat - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh swarm
    cd /hive/data/genomes/dipOrd1
    cat << '_EOF_' > dipOrd1.ensGene.ra
# required db variable
db dipOrd1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that have invalid structures from Ensembl:
# 11275: ENSDORT00000004734 no exonFrame on CDS exon 12
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 dipOrd1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1/bed/ensGene.55
    featureBits dipOrd1 ensGene
    # 25275613 bases of 1844961421 (1.370%) in intersection

############################################################################
#  echTel1 - Tenrec - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u03
    cd /hive/data/genomes/echTel1
    cat << '_EOF_' > echTel1.ensGene.ra
# required db variable
db echTel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 29277: ENSETET00000011172 no exonFrame on CDS exon 14
# 44942: ENSETET00000018714 no exonFrame on CDS exon 1

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 echTel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/echTel1/bed/ensGene.55
    featureBits echTel1 ensGene
    # 25563184 bases of 2111581369 (1.211%) in intersection

############################################################################
#  equCab2 - Horse - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u08
    cd /hive/data/genomes/equCab2
    cat << '_EOF_' > equCab2.ensGene.ra
# required db variable
db equCab2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
#       translate Ensembl chrUnNNNN names to chrUn coordinates
liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 equCab2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/equCab2/bed/ensGene.55
    featureBits equCab2 ensGene
    # 39506745 bases of 2428790173 (1.627%) in intersection

############################################################################
#  eriEur1 - Hedgehog - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u01
    cd /hive/data/genomes/eriEur1
    cat << '_EOF_' > eriEur1.ensGene.ra
# required db variable
db eriEur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 4691: ENSEEUT00000004188 no exonFrame on CDS exon 7
# 35795: ENSEEUT00000003156 no exonFrame on CDS exon 4
# 40908: ENSEEUT00000001064 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 eriEur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/eriEur1/bed/ensGene.55
    featureBits eriEur1 ensGene
    # 22480171 bases of 2133134836 (1.054%) in intersection

############################################################################
#  felCat3 - Cat - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u05
    cd /hive/data/genomes/felCat3
    cat << '_EOF_' > felCat3.ensGene.ra
# required db variable
db felCat3
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 2100: ENSFCAT00000006929 no exonFrame on CDS exon 16
# 14578: ENSFCAT00000010965 no exonFrame on CDS exon 1
# 26634: ENSFCAT00000009384 no exonFrame on CDS exon 0

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 felCat3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/felCat3/bed/ensGene.55
    featureBits felCat3 ensGene
    # 22220711 bases of 1642698377 (1.353%) in intersection

############################################################################
#  fr2 - Fugu - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u06
    cd /hive/data/genomes/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
nameTranslation "s/^MT/chrM/;"
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 fr2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/fr2/bed/ensGene.55
    featureBits fr2 ensGene
    # 34560383 bases of 393312790 (8.787%) in intersection

############################################################################
#  galGal3 - Chicken - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 galGal3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/galGal3/bed/ensGene.55
    featureBits galGal3 ensGene
    # 30733557 bases of 1042591351 (2.948%) in intersection

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 55  (DONE - 2009-07-15 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 gasAcu1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1/bed/ensGene.55
    featureBits gasAcu1 ensGene
    # 36789271 bases of 446627861 (8.237%) in intersection

############################################################################
#  gorGor1 - Gorilla - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u04
    cd /hive/data/genomes/gorGor1
    cat << '_EOF_' > gorGor1.ensGene.ra
# required db variable
db gorGor1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC
# names
liftUp /hive/data/genomes/gorGor1/jkStuff/ensemblLiftToUcsc.lift
# ignore the single gene that has an invalid structure from Ensembl:
skipInvalid yes
# 8939: ENSGGOT00000010340 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 gorGor1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/gorGor1/bed/ensGene.55
    featureBits gorGor1 ensGene
    # 23242041 bases of 2075548667 (1.120%) in intersection

############################################################################
#  micMur1 - Mouse lemur - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u08
    cd /hive/data/genomes/micMur1
    cat << '_EOF_' > micMur1.ensGene.ra
# required db variable
db micMur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 micMur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/micMur1/bed/ensGene.55
    featureBits micMur1 ensGene
    # 25659397 bases of 1852394361 (1.385%) in intersection

############################################################################
#  mm9 - Mouse - Ensembl Genes version 55  (DONE - 2009-07-15 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 mm9.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/mm9/bed/ensGene.55
    featureBits mm9 ensGene
    # 63272128 bases of 2620346127 (2.415%) in intersection

############################################################################
#  monDom5 - Opossum - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u03
    cd /hive/data/genomes/monDom5
    cat << '_EOF_' > monDom5.ensGene.ra
# required db variable
db monDom5
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 monDom5.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/monDom5/bed/ensGene.55
    featureBits monDom5 ensGene
    # 32999268 bases of 3501660299 (0.942%) in intersection

############################################################################
#  myoLuc1 - Microbat - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh swarm
    cd /hive/data/genomes/myoLuc1
    cat << '_EOF_' > myoLuc1.ensGene.ra
# required db variable
db myoLuc1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 1265: ENSMLUT00000004658 no exonFrame on CDS exon 1
# 17770: ENSMLUT00000003427 no exonFrame on CDS exon 10
# 32743: ENSMLUT00000009601 no exonFrame on CDS exon 1

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 myoLuc1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/myoLuc1/bed/ensGene.55
    featureBits myoLuc1 ensGene
    # 24630744 bases of 1673855868 (1.471%) in intersection

############################################################################
#  ochPri2 - Pika - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/ochPri2
    cat << '_EOF_' > ochPri2.ensGene.ra
# required db variable
db ochPri2
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 10995: ENSOPRT00000002716 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 ochPri2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ochPri2/bed/ensGene.55
    featureBits ochPri2 ensGene
    # 25342444 bases of 1923624051 (1.317%) in intersection

############################################################################
#  ornAna1 - Platypus - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u02
    cd /hive/data/genomes/ornAna1
    cat << '_EOF_' > ornAna1.ensGene.ra
# required db variable
db ornAna1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly, 365 items
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 ornAna1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ornAna1/bed/ensGene.55
    featureBits ornAna1 ensGene
    # 24537221 bases of 1842236818 (1.332%) in intersection

############################################################################
#  oryCun1 - Rabbit - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh swarm
    cd /hive/data/genomes/oryCun1
    cat << '_EOF_' > oryCun1.ensGene.ra
# required db variable
db oryCun1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 24994: ENSOCUT00000009485 no exonFrame on CDS exon 9
# 26897: ENSOCUT00000004627 no exonFrame on CDS exon 3
# 32794: ENSOCUT00000014840 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 oryCun1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryCun1/bed/ensGene.55
    featureBits oryCun1 ensGene
    # 22839824 bases of 2076044328 (1.100%) in intersection

############################################################################
#  oryLat2 - Medaka - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh swarm
    cd /hive/data/genomes/oryLat2
    cat << '_EOF_' > oryLat2.ensGene.ra
# required db variable
db oryLat2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 2,687 genes that haven't lifted properly yet
# skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 oryLat2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryLat2/bed/ensGene.55
    featureBits oryLat2 ensGene
    # 32301732 bases of 700386597 (4.612%) in intersection

############################################################################
#  otoGar1 - Bushbaby - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kolossus
    cd /hive/data/genomes/otoGar1
    cat << '_EOF_' > otoGar1.ensGene.ra
# required db variable
db otoGar1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 otoGar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/otoGar1/bed/ensGene.55
    featureBits otoGar1 ensGene
    # 23597902 bases of 1969052059 (1.198%) in intersection

############################################################################
#  panTro2 - Chimp - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u08
    cd /hive/data/genomes/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 panTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/panTro2/bed/ensGene.55
    featureBits panTro2 ensGene
    # 49983145 bases of 2909485072 (1.718%) in intersection

############################################################################
#  ponAbe2 - Orangutan - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u03
    cd /hive/data/genomes/ponAbe2
    cat << '_EOF_' > ponAbe2.ensGene.ra
# required db variable
db ponAbe2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 ponAbe2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2/bed/ensGene.55
    featureBits ponAbe2 ensGene
    # 38087987 bases of 3093572278 (1.231%) in intersection

############################################################################
#  proCap1 - Rock hyrax - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u03
    cd /hive/data/genomes/proCap1
    cat << '_EOF_' > proCap1.ensGene.ra
# required db variable
db proCap1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 4595: ENSPCAT00000007286 no exonFrame on CDS exon 1
# 28894: ENSPCAT00000000699 no exonFrame on CDS exon 4

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 proCap1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/proCap1/bed/ensGene.55
    featureBits proCap1 ensGene
    # 25296156 bases of 2407847681 (1.051%) in intersection

############################################################################
#  pteVam1 - Megabat - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u08
    cd /hive/data/genomes/pteVam1
    cat << '_EOF_' > pteVam1.ensGene.ra
# required db variable
db pteVam1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
#       6381: ENSPVAT00000012919 no exonFrame on CDS exon 14
#       23522: ENSPVAT00000010661 no exonFrame on CDS exon 0

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 pteVam1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/pteVam1/bed/ensGene.55
    featureBits pteVam1 ensGene
    # 28914790 bases of 1839436660 (1.572%) in intersection

############################################################################
#  rheMac2 - Rhesus - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kolossus
    cd /hive/data/genomes/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 rheMac2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rheMac2/bed/ensGene.55
    featureBits rheMac2 ensGene
    # 44519581 bases of 2646704109 (1.682%) in intersection

############################################################################
#  rn4 - Rat - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh swarm
    cd /hive/data/genomes/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 rn4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rn4/bed/ensGene.55
    featureBits rn4 ensGene
    # 43758167 bases of 2571531505 (1.702%) in intersection

############################################################################
#  sacCer2 - S. cerevisiae - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u02
    cd /hive/data/genomes/sacCer2
    cat << '_EOF_' > sacCer2.ensGene.ra
# required db variable
db sacCer2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^VIII/chrVIII/; s/^VII/chrVII/; s/^VI/chrVI/; s/^V/chrV/; s/^XIII/chrXIII/; s/^XII/chrXII/; s/^XIV/chrXIV/; s/^XI/chrXI/; s/^XVI/chrXVI/; s/^XV/chrXV/; s/^X/chrX/; s/^III/chrIII/; s/^IV/chrIV/; s/^II/chrII/; s/^IX/chrIX/; s/^I/chrI/; s/^MT/chrM/; s/2-micron/2micron/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 sacCer2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sacCer2/bed/ensGene.55
    featureBits sacCer2 ensGene
    # 8912793 bases of 12162995 (73.278%) in intersection

############################################################################
#  sorAra1 - Shrew - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u04
    cd /hive/data/genomes/sorAra1
    cat << '_EOF_' > sorAra1.ensGene.ra
# required db variable
db sorAra1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 sorAra1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sorAra1/bed/ensGene.55
    featureBits sorAra1 ensGene
    # 19509213 bases of 1832864697 (1.064%) in intersection

############################################################################
#  speTri1 - Squirrel - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kolossus
    cd /hive/data/genomes/speTri1
    cat << '_EOF_' > speTri1.ensGene.ra
# required db variable
db speTri1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 1071: ENSSTOT00000007455 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 speTri1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/speTri1/bed/ensGene.55
    featureBits speTri1 ensGene
    # 21590338 bases of 1913367893 (1.128%) in intersection

############################################################################
#  taeGut1 - Zebra finch - Ensembl Genes version 55  (DONE - 2009-07-15 - hiram)
    ssh kkr14u03
    cd /hive/data/genomes/taeGut1
    cat << '_EOF_' > taeGut1.ensGene.ra
# required db variable
db taeGut1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9LXYZ][0-9ABG]*\)/chr\1/; s/^Un/chrUn/"
# need to translate Ensembl GeneScaffold coordinates to UCSC scaffolds
# geneScaffolds yes
#       during the loading of the gene pred, skip all invalid genes
# skipInvalid yes
#       13843: ENSDNOT00000025033 no exonFrame on CDS exon 5
#       23044: ENSDNOT00000004471 no exonFrame on CDS exon 1
#       30976: ENSDNOT00000003424 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 taeGut1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/taeGut1/bed/ensGene.55
    featureBits taeGut1 ensGene
    #   25428670 bases of 1222864691 (2.079%) in intersection

############################################################################
#  tarSyr1 - Tarsier - Ensembl Genes version 55  (DONE - 2009-07-20 - hiram)
    ssh kkr14u01
    cd /hive/data/genomes/tarSyr1
    cat << '_EOF_' > tarSyr1.ensGene.ra
# required db variable
db tarSyr1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 tarSyr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1/bed/ensGene.55
    featureBits tarSyr1 ensGene
    # 21282560 bases of 2768536343 (0.769%) in intersection

############################################################################
#  tupBel1 - TreeShrew - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u05
    cd /hive/data/genomes/tupBel1
    cat << '_EOF_' > tupBel1.ensGene.ra
# required db variable
db tupBel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 2993: ENSTBET00000015831 no exonFrame on CDS exon 11
# 3556: ENSTBET00000013522 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 tupBel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tupBel1/bed/ensGene.55
    featureBits tupBel1 ensGene
    # 22808448 bases of 2137225476 (1.067%) in intersection

############################################################################
#  turTru1 - Dolphin - Ensembl Genes version 55  (DONE - 2009-07-20 - hiram)
    ssh kkr14u06
    cd /hive/data/genomes/turTru1
    cat << '_EOF_' > turTru1.ensGene.ra
# required db variable
db turTru1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 turTru1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/turTru1/bed/ensGene.55
    featureBits turTru1 ensGene
    # 28534327 bases of 2298444090 (1.241%) in intersection

############################################################################
#  vicPac1 - Alpaca - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kkr14u06
    cd /hive/data/genomes/vicPac1
    cat << '_EOF_' > vicPac1.ensGene.ra
# required db variable
db vicPac1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the 53 genes that do not translate properly to UCSC coordinates
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 vicPac1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/vicPac1/bed/ensGene.55
    featureBits vicPac1 ensGene
    # 17833823 bases of 1922910435 (0.927%) in intersection

############################################################################
#  xenTro2 - X. tropicalis - Ensembl Genes version 55  (DONE - 2009-07-14 - hiram)
    ssh kolossus
    cd /hive/data/genomes/xenTro2
    cat << '_EOF_' > xenTro2.ensGene.ra
# required db variable
db xenTro2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=55 xenTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/xenTro2/bed/ensGene.55
    featureBits xenTro2 ensGene
    # 29158032 bases of 1359412157 (2.145%) in intersection

############################################################################

############################################################################
# ensembl 52 updates (DONE - 2009-01-21,22 - Hiram)
    # see also: more notes about how this is done in the "ensembl 50 updates"
    #	section below
############################################################################
#  bosTau4 - Cow - Ensembl Genes version 52  (DONE - 2009-01-22 - hiram)
    ssh swarm
    cd /hive/data/genomes/bosTau4
    cat << '_EOF_' > bosTau4.ensGene.ra
# required db variable
    db bosTau4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; s/^MT/chrM/"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 bosTau4.ensGene.ra \
	> ensGene.52.log 2>&1

    # v52 introduced the chrM genes as MT genes, needed to alter
    #	the nameTranslation sed command to include s/^MT/chrM/
    #	And then, it breaks down in the process step due to this single
    #	stray gene definition, to fix it:
    ssh hgwdev
    cd /hive/data/genomes/bosTau4/bed/ensGene.52/process
    mv bosTau4.allGenes.gp.gz bosTau4.allGenes.gp.gz.broken
    zcat bosTau4.allGenes.gp.gz.broken | grep -v AAFC03011182 \
	| gzip -c > bosTau4.allGenes.gp.gz
    #	verify that:
    genePredCheck -db=bosTau4 bosTau4.allGenes.gp.gz
    #	checked: 29802 failed: 0
    #	continuing:
    cd /hive/data/genomes/bosTau4
    $HOME/kent/src/hg/utils/automation/doEnsGeneUpdate.pl -verbose=2 \
	-ensVersion=52 -continue=load bosTau4.ensGene.ra >> ensGene.52.log 2>&1

    cd /hive/data/genomes/bosTau4/bed/ensGene.52
    featureBits bosTau4 ensGene
    # 41476954 bases of 2731830700 (1.518%) in intersection

############################################################################
#  canFam2 - Dog - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/canFam2
    cat << '_EOF_' > canFam2.ensGene.ra
# required db variable
db canFam2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 canFam2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/canFam2/bed/ensGene.52
    featureBits canFam2 ensGene
    # 34634856 bases of 2384996543 (1.452%) in intersection

############################################################################
#  cavPor3 - Guinea Pig - Ensembl Genes version 52  (DONE - 2009-01-21 -
#  hiram)
    ssh hgwdev
    cd /hive/data/genomes/cavPor3
    cat << '_EOF_' > cavPor3.ensGene.ra
# required db variable
db cavPor3
# do we need to translate geneScaffold coordinates
# geneScaffolds yes
nameTranslation "s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 cavPor3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cavPor3/bed/ensGene.52
    featureBits cavPor3 ensGene
    # 30852014 bases of 2663369733 (1.158%) in intersection

############################################################################
#  ce6 - C. elegans - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/ce6
    cat << '_EOF_' > ce6.ensGene.ra
# required db variable
db ce6
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([IVX]\)/chr\1/; s/^MtDNA/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 ce6.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ce6/bed/ensGene.52
    featureBits ce6 ensGene
    # 29421784 bases of 100281426 (29.339%) in intersection

############################################################################
#  ci2 - C. intestinalis - Ensembl Genes version 52  (DONE - 2009-01-21 -
#  hiram)
    ssh swarm
    cd /hive/data/genomes/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 ci2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ci2/bed/ensGene.52
    featureBits ci2 ensGene
    # 20113161 bases of 141233565 (14.241%) in intersection

############################################################################
#  cioSav2 - C. savignyi - Ensembl Genes version 52  (DONE - 2009-01-21 -
#  hiram)
    ssh kolossus
    cd /hive/data/genomes/cioSav2
    cat << '_EOF_' > cioSav2.ensGene.ra
# required db variable
db cioSav2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 cioSav2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cioSav2/bed/ensGene.52
    featureBits cioSav2 ensGene
    # 16616680 bases of 173749524 (9.564%) in intersection

############################################################################
#  danRer5 - Zebrafish - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/danRer5
    cat << '_EOF_' > danRer5.ensGene.ra
# required db variable
db danRer5
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 danRer5.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/danRer5/bed/ensGene.52
    featureBits danRer5 ensGene
    # 36854235 bases of 1435609608 (2.567%) in intersection

############################################################################
#  dasNov1 - Armadillo - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/dasNov1
    cat << '_EOF_' > dasNov1.ensGene.ra
# required db variable
db dasNov1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 18192: ENSDNOT00000004471 no exonFrame on CDS exon 7
# 35247: ENSDNOT00000007696 no exonFrame on CDS exon 8
# 38952: ENSDNOT00000019234 no exonFrame on CDS exon 0

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 dasNov1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dasNov1/bed/ensGene.52
    featureBits dasNov1 ensGene
    # 22721709 bases of 2146362222 (1.059%) in intersection

############################################################################
#  dipOrd1 - Kangaroo rat - Ensembl Genes version 52  (DONE - 2009-01-21 -
#  hiram)
    ssh swarm
    cd /hive/data/genomes/dipOrd1
    cat << '_EOF_' > dipOrd1.ensGene.ra
# required db variable
db dipOrd1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that have invalid structures from Ensembl:
# 11275: ENSDORT00000004734 no exonFrame on CDS exon 12
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 dipOrd1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/dipOrd1/bed/ensGene.52
    featureBits dipOrd1 ensGene
    # 25213442 bases of 1844961421 (1.367%) in intersection

############################################################################
#  echTel1 - Tenrec - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/echTel1
    cat << '_EOF_' > echTel1.ensGene.ra
# required db variable
db echTel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 29277: ENSETET00000011172 no exonFrame on CDS exon 14
# 44942: ENSETET00000018714 no exonFrame on CDS exon 1

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 echTel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/echTel1/bed/ensGene.52
    featureBits echTel1 ensGene
    # 25563184 bases of 2111581369 (1.211%) in intersection

############################################################################
#  equCab2 - Horse - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/equCab2
    cat << '_EOF_' > equCab2.ensGene.ra
# required db variable
db equCab2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
#       translate Ensembl chrUnNNNN names to chrUn coordinates
liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 equCab2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/equCab2/bed/ensGene.52
    featureBits equCab2 ensGene
    # 39198296 bases of 2428790173 (1.614%) in intersection

############################################################################
#  eriEur1 - Hedgehog - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/eriEur1
    cat << '_EOF_' > eriEur1.ensGene.ra
# required db variable
db eriEur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 4691: ENSEEUT00000004188 no exonFrame on CDS exon 7
# 35795: ENSEEUT00000003156 no exonFrame on CDS exon 4
# 40908: ENSEEUT00000001064 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 eriEur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/eriEur1/bed/ensGene.52
    featureBits eriEur1 ensGene
    # 22480171 bases of 2133134836 (1.054%) in intersection

############################################################################
#  felCat3 - Cat - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/felCat3
    cat << '_EOF_' > felCat3.ensGene.ra
# required db variable
db felCat3
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 2100: ENSFCAT00000006929 no exonFrame on CDS exon 16
# 14578: ENSFCAT00000010965 no exonFrame on CDS exon 1
# 26634: ENSFCAT00000009384 no exonFrame on CDS exon 0

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 felCat3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/felCat3/bed/ensGene.52
    featureBits felCat3 ensGene

############################################################################
#  fr2 - Fugu - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
nameTranslation "s/^MT/chrM/;"
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 fr2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/fr2/bed/ensGene.52
    featureBits fr2 ensGene
    # 34560383 bases of 393312790 (8.787%) in intersection

############################################################################
#  galGal3 - Chicken - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh swarm
    cd /hive/data/genomes/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 galGal3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/galGal3/bed/ensGene.52
    featureBits galGal3 ensGene
    # 30733654 bases of 1042591351 (2.948%) in intersection

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 52  (DONE - 2009-01-22 - hiram)
    ssh swarm
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 gasAcu1.ensGene.ra

    # requires extra attention after the all database for loop attempt
    cd /hive/data/genomes/gasAcu1/bed/ensGene.52/process
    mv gasAcu1.allGenes.gp.gz gasAcu1.allGenes.gp.beforeLift.gz
    zcat gasAcu1.allGenes.gp.beforeLift.gz \
	| liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
	    ../../../jkStuff/contigsToScaffolds.lft carry stdin
    liftUp -extGenePred gasAcu1.allGenes.gp \
	../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
	    gasAcu1.scaffolds.gp
    gzip gasAcu1.scaffolds.gp
    gzip gasAcu1.allGenes.gp
    #	verify OK
    genePredCheck -db=gasAcu1 gasAcu1.allGenes.gp.gz
    #	checked: 29109 failed: 0

    #	then continue with the load
    cd /hive/data/genomes/gasAcu1
    doEnsGeneUpdate.pl -continue=load -ensVersion=52 gasAcu1.ensGene.ra \
	>> ensGene.52.load.log 2>&1

    featureBits gasAcu1 ensGene
    # 36789458 bases of 446627861 (8.237%) in intersection

############################################################################
#  gorGor1 - Gorilla - Ensembl Genes version 52  (DONE - 2008-12-19 - hiram)
    ssh kolossus
    cd /hive/data/genomes/gorGor1
    cat << '_EOF_' > gorGor1.ensGene.ra
# required db variable
db gorGor1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC
# names
liftUp /hive/data/genomes/gorGor1/jkStuff/ensemblLiftToUcsc.lift
# ignore the single gene that has an invalid structure from Ensembl:
skipInvalid yes
# 8939: ENSGGOT00000010340 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 gorGor1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/gorGor1/bed/ensGene.52
    featureBits gorGor1 ensGene
    # 23242041 bases of 2075548667 (1.120%) in intersection

############################################################################
#  hg18 - Human - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    #	v52 introduced genes on contig coordinates, needed to add
    #	the liftUp liftContigs.lft specification to the .ra file:
    ssh kolossus
    cd /hive/data/genomes/hg18
    cat << '_EOF_' > hg18.ensGene.ra
# required db variable
db hg18
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
haplotypeLift /hive/data/genomes/hg18/jkStuff/ensGene.haplotype.lift
liftUp /hive/data/genomes/hg18/jkStuff/liftContigs.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 hg18.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/hg18/bed/ensGene.52
    featureBits hg18 ensGene
    # 70647021 bases of 2881515245 (2.452%) in intersection

############################################################################
#  loxAfr1 - Elephant - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh swarm
    cd /hive/data/genomes/loxAfr1
    cat << '_EOF_' > loxAfr1.ensGene.ra
# required db variable
db loxAfr1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the one gene that has an invalid structure from Ensembl:
# 13002: ENSLAFT00000000586 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 loxAfr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/loxAfr1/bed/ensGene.52
    featureBits loxAfr1 ensGene
    # 21706063 bases of 2295548473 (0.946%) in intersection

############################################################################
#  micMur1 - Mouse lemur - Ensembl Genes version 52  (DONE - 2009-01-21 -
#  hiram)
    ssh kolossus
    cd /hive/data/genomes/micMur1
    cat << '_EOF_' > micMur1.ensGene.ra
# required db variable
db micMur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 micMur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/micMur1/bed/ensGene.52
    featureBits micMur1 ensGene
    # 25659397 bases of 1852394361 (1.385%) in intersection

############################################################################
#  mm9 - Mouse - Ensembl Genes version 52  (DONE - 2009-01-22 - hiram)
     ssh swarm
    cd /hive/data/genomes/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 mm9.ensGene.ra

    #	ran into trouble with the ensGtp table load, the names of
    #	the proteins have gotten longer and the standard sql definition
    #	was no longer adequate.  So, after the load failed, increase
    #	protein char and index size to 23:
    cat << '_EOF_' > ensGtp.sql
# This creates the table holding the relationship between
# ensemble genes, transcripts, and peptides.
CREATE TABLE ensGtp (
  gene char(20) NOT NULL,
  transcript char(20) NOT NULL,
  protein char(23) NOT NULL,
# INDICES
  INDEX(gene(19)),
  UNIQUE(transcript(19)),
  INDEX(protein(23))
) 
'_EOF_'
    # << happy emacs

    #	Then, running the rest of the load script:
    ssh hgwdev
    cd /hive/data/genomes/mm9/bed/ensGene.52
    ./finiLoad.csh > ensGene.52.log 2>&1

    doEnsGeneUpdate -ensVersion=52 -verbose=2 -continue=cleanup mm9.ensGene.ra

    ssh hgwdev
    cd /hive/data/genomes/mm9/bed/ensGene.52
    featureBits mm9 ensGene
    # 63272128 bases of 2620346127 (2.415%) in intersection

############################################################################
#  monDom5 - Opossum - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/monDom5
    cat << '_EOF_' > monDom5.ensGene.ra
# required db variable
db monDom5
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 monDom5.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/monDom5/bed/ensGene.52
    featureBits monDom5 ensGene
    # 33008124 bases of 3501660299 (0.943%) in intersection

############################################################################
#  myoLuc1 - Microbat - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh swarm
    cd /hive/data/genomes/myoLuc1
    cat << '_EOF_' > myoLuc1.ensGene.ra
# required db variable
db myoLuc1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 1265: ENSMLUT00000004658 no exonFrame on CDS exon 1
# 17770: ENSMLUT00000003427 no exonFrame on CDS exon 10
# 32743: ENSMLUT00000009601 no exonFrame on CDS exon 1

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 myoLuc1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/myoLuc1/bed/ensGene.52
    featureBits myoLuc1 ensGene
    # 24630744 bases of 1673855868 (1.471%) in intersection

############################################################################
#  ochPri2 - Pika - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh swarm
    cd /hive/data/genomes/ochPri2
    cat << '_EOF_' > ochPri2.ensGene.ra
# required db variable
db ochPri2
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 10995: ENSOPRT00000002716 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 ochPri2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ochPri2/bed/ensGene.52
    featureBits ochPri2 ensGene
    # 25342444 bases of 1923624051 (1.317%) in intersection

############################################################################
#  ornAna1 - Platypus - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/ornAna1
    cat << '_EOF_' > ornAna1.ensGene.ra
# required db variable
db ornAna1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly, 365 items
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 ornAna1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ornAna1/bed/ensGene.52
    featureBits ornAna1 ensGene
    # 24537443 bases of 1842236818 (1.332%) in intersection

############################################################################
#  oryCun1 - Rabbit - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kkr14u03
    cd /hive/data/genomes/oryCun1
    cat << '_EOF_' > oryCun1.ensGene.ra
# required db variable
db oryCun1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# 24994: ENSOCUT00000009485 no exonFrame on CDS exon 9
# 26897: ENSOCUT00000004627 no exonFrame on CDS exon 3
# 32794: ENSOCUT00000014840 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 oryCun1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryCun1/bed/ensGene.52
    featureBits oryCun1 ensGene
    # 22839824 bases of 2076044328 (1.100%) in intersection

############################################################################
#  oryLat2 - Medaka - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/oryLat2
    cat << '_EOF_' > oryLat2.ensGene.ra
# required db variable
db oryLat2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 2,687 genes that haven't lifted properly yet
# skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 oryLat2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryLat2/bed/ensGene.52
    featureBits oryLat2 ensGene
    # 32301878 bases of 700386597 (4.612%) in intersection

############################################################################
#  otoGar1 - Bushbaby - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/otoGar1
    cat << '_EOF_' > otoGar1.ensGene.ra
# required db variable
db otoGar1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 otoGar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/otoGar1/bed/ensGene.52
    featureBits otoGar1 ensGene
    # 23597902 bases of 1969052059 (1.198%) in intersection

############################################################################
#  panTro2 - Chimp - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 panTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/panTro2/bed/ensGene.52
    featureBits panTro2 ensGene
    # 49983145 bases of 2909485072 (1.718%) in intersection

############################################################################
#  ponAbe2 - Orangutan - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh swarm
    cd /hive/data/genomes/ponAbe2
    cat << '_EOF_' > ponAbe2.ensGene.ra
# required db variable
db ponAbe2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 ponAbe2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2/bed/ensGene.52
    featureBits ponAbe2 ensGene

############################################################################
#  proCap1 - Rock hyrax - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/proCap1
    cat << '_EOF_' > proCap1.ensGene.ra
# required db variable
db proCap1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 4595: ENSPCAT00000007286 no exonFrame on CDS exon 1
# 28894: ENSPCAT00000000699 no exonFrame on CDS exon 4

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 proCap1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/proCap1/bed/ensGene.52
    featureBits proCap1 ensGene
    # 25234470 bases of 2407847681 (1.048%) in intersection

############################################################################
#  pteVam1 - Megabat - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh swarm
    cd /hive/data/genomes/pteVam1
    cat << '_EOF_' > pteVam1.ensGene.ra
# required db variable
db pteVam1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
#       6381: ENSPVAT00000012919 no exonFrame on CDS exon 14
#       23522: ENSPVAT00000010661 no exonFrame on CDS exon 0

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 pteVam1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/pteVam1/bed/ensGene.52
    featureBits pteVam1 ensGene
    # 28832895 bases of 1839436660 (1.567%) in intersection

############################################################################
#  rheMac2 - Rhesus - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 rheMac2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rheMac2/bed/ensGene.52
    featureBits rheMac2 ensGene
    # 44519581 bases of 2646704109 (1.682%) in intersection

############################################################################
#  rn4 - Rat - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 rn4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rn4/bed/ensGene.52
    featureBits rn4 ensGene
    # 43758167 bases of 2571531505 (1.702%) in intersection

############################################################################
#  sacCer1 - S. cerevisiae - Ensembl Genes version 52  (DONE - 2009-01-21 -
#  hiram)
    ssh swarm
    cd /hive/data/genomes/sacCer1
    cat << '_EOF_' > sacCer1.ensGene.ra
# required db variable
db sacCer1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^VIII/chr8/; s/^VII/chr7/; s/^VI/chr6/; s/^V/chr5/; s/^XIII/chr13/; s/^XII/chr12/; s/^XIV/chr14/; s/^XI/chr11/; s/^XVI/chr16/; s/^XV/chr15/; s/^X/chr10/; s/^III/chr3/; s/^IV/chr4/; s/^II/chr2/; s/^IX/chr9/; s/^I/chr1/; s/^MT/chrM/; /2-micron/d"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 sacCer1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sacCer1/bed/ensGene.52
    featureBits sacCer1 ensGene
    #	8908962 bases of 12156302 (73.287%) in intersection
    #	this complains about table 'gap' missing

############################################################################
#  sorAra1 - Shrew - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh swarm
    cd /hive/data/genomes/sorAra1
    cat << '_EOF_' > sorAra1.ensGene.ra
# required db variable
db sorAra1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 sorAra1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sorAra1/bed/ensGene.52
    featureBits sorAra1 ensGene
    # 19509213 bases of 1832864697 (1.064%) in intersection

############################################################################
#  speTri1 - Squirrel - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/speTri1
    cat << '_EOF_' > speTri1.ensGene.ra
# required db variable
db speTri1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# 1071: ENSSTOT00000007455 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 speTri1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/speTri1/bed/ensGene.52
    featureBits speTri1 ensGene
    # 21590338 bases of 1913367893 (1.128%) in intersection

############################################################################
#  tarSyr1 - Tarsier - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/tarSyr1
    cat << '_EOF_' > tarSyr1.ensGene.ra
# required db variable
db tarSyr1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the 2,819 genes that do not translate properly to UCSC # coordinates
#       out of 43,529 genes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 tarSyr1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tarSyr1/bed/ensGene.52
    featureBits tarSyr1 ensGene
    # 20286934 bases of 2768536343 (0.733%) in intersection

############################################################################
#  tupBel1 - TreeShrew - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    ssh kolossus
    cd /hive/data/genomes/tupBel1
    cat << '_EOF_' > tupBel1.ensGene.ra
# required db variable
db tupBel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# 2993: ENSTBET00000015831 no exonFrame on CDS exon 11
# 3556: ENSTBET00000013522 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 tupBel1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/tupBel1/bed/ensGene.52
    featureBits tupBel1 ensGene
    # 22808448 bases of 2137225476 (1.067%) in intersection

############################################################################
#  turTru1 - Dolphin - Ensembl Genes version 52  (DONE - 2009-01-21 - hiram)
    #	this one does not work currently.  Something is fishy with
    #	the protein vs. peptide names

############################################################################
#  vicPac1 - Alpaca - Ensembl Genes version 52  (DONE - 2009-01-22 - hiram)
    ssh kolossus
    cd /hive/data/genomes/vicPac1
    cat << '_EOF_' > vicPac1.ensGene.ra
# required db variable
db vicPac1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the 53 genes that do not translate properly to UCSC coordinates
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 vicPac1.ensGene.ra
    #	ran into a little problem with this one, one of the files at
    #	the Ensembl FTP site was not gzipped and we were looking for
    #	and expect to find the .gz file.  So, after the download failed,
    #	fixup the doDownload.csh script to fetch the unzipped gtf file,
    #	and gzip it for the rest of the sequence, then continuing:
    doEnsGeneUpdate.pl -ensVersion=52 -verbose=2 -continue=process \
	vicPac1.ensGene.ra >>  ensGene.52.log 2>&1

    ssh hgwdev
    cd /hive/data/genomes/vicPac1/bed/ensGene.52
    featureBits vicPac1 ensGene
    #	17768749 bases of 1922910435 (0.924%) in intersection

############################################################################
#  xenTro2 - X. tropicalis - Ensembl Genes version 52  (DONE - 2009-01-21 -
#  hiram)
    ssh swarm
    cd /hive/data/genomes/xenTro2
    cat << '_EOF_' > xenTro2.ensGene.ra
# required db variable
db xenTro2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=52 xenTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/xenTro2/bed/ensGene.52
    featureBits xenTro2 ensGene
    # 29158101 bases of 1359412157 (2.145%) in intersection

#############################################################################
#############################################################################
# ensembl 51 updates (DONE - 2008-12-05 - Hiram)
    # see also: more notes about how this is done in the "ensembl 50 updates"
    #	section below

############################################################################
#  canFam2 - Dog - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/canFam2
    cat << '_EOF_' > canFam2.ensGene.ra
# required db variable
db canFam2
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 canFam2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/canFam2/bed/ensGene.51
    featureBits canFam2 ensGene
    # 34551623 bases of 2384996543 (1.449%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/canFam2/bed/ensGene.51

############################################################################
#  ce6 - C. elegans - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/ce6
    cat << '_EOF_' > ce6.ensGene.ra
# required db variable
db ce6
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([IVX]\)/chr\1/; s/^MtDNA/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 ce6.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ce6/bed/ensGene.51
    featureBits ce6 ensGene
    # 29421784 bases of 100281426 (29.339%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/ce6/bed/ensGene.51

############################################################################
#  ci2 - C. intestinalis - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 ci2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ci2/bed/ensGene.51
    featureBits ci2 ensGene
    # 20106805 bases of 141233565 (14.237%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/ci2/bed/ensGene.51

############################################################################
#  cioSav2 - C. savignyi - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/cioSav2
    cat << '_EOF_' > cioSav2.ensGene.ra
# required db variable
db cioSav2
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#	to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 cioSav2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/cioSav2/bed/ensGene.51
    featureBits cioSav2 ensGene
    # 16601350 bases of 173749524 (9.555%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/cioSav2/bed/ensGene.51

############################################################################
#  danRer5 - Zebrafish - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/danRer5
    cat << '_EOF_' > danRer5.ensGene.ra
# required db variable
db danRer5
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 danRer5.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/danRer5/bed/ensGene.51
    featureBits danRer5 ensGene
    # 36864148 bases of 1435609608 (2.568%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/danRer5/bed/ensGene.51

############################################################################
#  equCab2 - Horse - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/equCab2
    cat << '_EOF_' > equCab2.ensGene.ra
# required db variable
db equCab2
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
#	translate Ensembl chrUnNNNN names to chrUn coordinates
liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 equCab2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/equCab2/bed/ensGene.51
    featureBits equCab2 ensGene
    # 39169781 bases of 2428790173 (1.613%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/equCab2/bed/ensGene.51

############################################################################
#  fr2 - Fugu - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
nameTranslation "s/^MT/chrM/;"
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 fr2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/fr2/bed/ensGene.51
    featureBits fr2 ensGene
    # 34554303 bases of 393312790 (8.785%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/fr2/bed/ensGene.51

############################################################################
#  galGal3 - Chicken - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 galGal3.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/galGal3/bed/ensGene.51
    featureBits galGal3 ensGene
    # 30700613 bases of 1042591351 (2.945%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/galGal3/bed/ensGene.51

############################################################################
#  hg18 - Human - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/hg18
    cat << '_EOF_' > hg18.ensGene.ra
# required db variable
db hg18
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#	to UCSC simple haplotype coordinates
haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 hg18.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/hg18/bed/ensGene.51
    featureBits hg18 ensGene
    # 69963186 bases of 2881515245 (2.428%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/hg18/bed/ensGene.51

############################################################################
#  micMur1 - Mouse lemur - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/micMur1
    cat << '_EOF_' > micMur1.ensGene.ra
# required db variable
db micMur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 micMur1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/micMur1/bed/ensGene.51
    featureBits micMur1 ensGene
    # 25425991 bases of 1852394361 (1.373%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/micMur1/bed/ensGene.51

############################################################################
#  mm9 - Mouse - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 mm9.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/mm9/bed/ensGene.51
    featureBits mm9 ensGene
    # 60671138 bases of 2620346127 (2.315%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/mm9/bed/ensGene.51

############################################################################
#  oryLat2 - Medaka - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/oryLat2
    cat << '_EOF_' > oryLat2.ensGene.ra
# required db variable
db oryLat2
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 2,687 genes that haven't lifted properly yet
# skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 oryLat2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/oryLat2/bed/ensGene.51
    featureBits oryLat2 ensGene
    # 32293719 bases of 700386597 (4.611%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/oryLat2/bed/ensGene.51

############################################################################
#  otoGar1 - Bushbaby - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/otoGar1
    cat << '_EOF_' > otoGar1.ensGene.ra
# required db variable
db otoGar1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 otoGar1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/otoGar1/bed/ensGene.51
    featureBits otoGar1 ensGene
    # 23463097 bases of 1969052059 (1.192%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/otoGar1/bed/ensGene.51

############################################################################
#  panTro2 - Chimp - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 panTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/panTro2/bed/ensGene.51
    featureBits panTro2 ensGene
    # 49736660 bases of 2909485072 (1.709%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/panTro2/bed/ensGene.51

############################################################################
#  ponAbe2 - Orangutan - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/ponAbe2
    cat << '_EOF_' > ponAbe2.ensGene.ra
# required db variable
db ponAbe2
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#	to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 ponAbe2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/ponAbe2/bed/ensGene.51
    featureBits ponAbe2 ensGene
    # 37839545 bases of 3093572278 (1.223%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/ponAbe2/bed/ensGene.51

############################################################################
#  rheMac2 - Rhesus - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 rheMac2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rheMac2/bed/ensGene.51
    featureBits rheMac2 ensGene
    # 44297247 bases of 2646704109 (1.674%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/rheMac2/bed/ensGene.51

############################################################################
#  rn4 - Rat - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 rn4.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/rn4/bed/ensGene.51
    featureBits rn4 ensGene
    # 43712046 bases of 2571531505 (1.700%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/rn4/bed/ensGene.51

############################################################################
#  sacCer1 - S. cerevisiae - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/sacCer1
    cat << '_EOF_' > sacCer1.ensGene.ra
# required db variable
db sacCer1
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^VIII/chr8/; s/^VII/chr7/; s/^VI/chr6/; s/^V/chr5/; s/^XIII/chr13/; s/^XII/chr12/; s/^XIV/chr14/; s/^XI/chr11/; s/^XVI/chr16/; s/^XV/chr15/; s/^X/chr10/; s/^III/chr3/; s/^IV/chr4/; s/^II/chr2/; s/^IX/chr9/; s/^I/chr1/; s/^MT/chrM/; /2-micron/d"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 sacCer1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/sacCer1/bed/ensGene.51
    featureBits sacCer1 ensGene
    # table gap doesn't exist
    #	8908962 bases of 12156302 (73.287%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/sacCer1/bed/ensGene.51

############################################################################
#  xenTro2 - X. tropicalis - Ensembl Genes version 51  (DONE - 2008-12-03 - hiram)
    ssh kkr14u07
    cd /hive/data/genomes/xenTro2
    cat << '_EOF_' > xenTro2.ensGene.ra
# required db variable
db xenTro2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 xenTro2.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/xenTro2/bed/ensGene.51
    featureBits xenTro2 ensGene
    # 29135083 bases of 1359412157 (2.143%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/xenTro2/bed/ensGene.51

############################################################################
#############################################################################
# ensembl 50 updates (DONE - 2009-08-07 - Hiram)
    #	When a new Ensembl release is available.  Some manual work needs
    #	to be done to src/hg/utils/automation/EnsGeneAutomate.pm
    #	to specify the files for each release.
    #	Fixups:
    #	version correspondence with their date strings:
    #	add $verToDate[##] = "monYYYY"; 
    #	add new ## to @versionList
    #	create lists for FtpFileNames_##, FtpPeptideFileNames_## and
    #	FtpMySqlFileNames_##
    #	and associated reference pointers to those specific new lists
    #	These lists relate our database names with their file names.
    #	This could perhaps be done in an automated fashion by scanning
    #	their FTP directories and relating their scientific names to
    #	our names.  However, there is a difficulty involved even if that
    #	could be done.  And that is, the exact correspondence between
    #	which genome version they are annotating vs. the genome version
    #	we have here.  I prefer to do this manually once at the beginning.
    #	It can be argued this could be error prone since it is manually
    #	done.  But, for now it is good enough for me.  If someone else wants
    #	to take on this task, and it would actually be interesting, be my guest.
    #	new Dbs at Ensembl for build 50:
    #	bosTau4, ce6, tetNig2 (ucsc is tetNig1)
    #	new Dbs here for build 50
    #	micMur1 myoLuc1 ochPri2 speTri1
    #	any new Dbs listed above will need to be run manually the first time
    #	to debug their <db>.ensGene.ra configuration file in /cluster/data/<db>
    #	no Db here:
    #	aedAeg0 dm5 monDom5 sorAra0 tetNig2

    #	The following three shell scripts can be run to make up the
    #	three different lists.  All that needs to be filled in are the
    #	UCSC database names.  For the gtf file names:

echo "user anonymous qa@ucsc
cd pub/release-50/gtf
ls -lR gtfFiles.fl
bye" > gtf50.rsp

ftp -n -v -i ftp.ensembl.org < gtf50.rsp

awk '
BEGIN { D = "" }
/^\.\// { D = $0 }
/^-/ { printf "%s/%s\n", D, $9 }
' gtfFiles.fl | sed -e "s#:/#/#; s#^./#'abcDef0' => '#; s/$/',/"

    #	for the peptide names:

echo "user anonymous qa@ucsc
cd pub/release-50/fasta
ls -lR pepFiles.fl
bye" > pep50.rsp

ftp -n -v -i ftp.ensembl.org < pep50.rsp
awk '
BEGIN { D = "" }
/^\.\// { D = $0 }
/^-/ { printf "%s/%s\n", D, $9 }
' pepFiles.fl | sed -e "s#:/#/#; s#^./#'abcDef0' => '#; s/$/',/" \
    | grep pep.all.fa.gz

    #	for the MySQL directory names:
echo "user anonymous qa@ucsc
cd pub/release-50/mysql
ls -lR mysqlFiles.fl
bye" > mysql50.rsp

ftp -n -v -i ftp.ensembl.org < mysql50.rsp

awk '
BEGIN { D = "" }
/^\.\// { D = $0 }
/seq_region.txt/ { printf "%s\n", D }
' mysqlFiles.fl | sed -e "s#:##g; s#^./#'abcDef0' => '#; s/$/',/" \
        | grep _core_

    #	quick verification that all the UCSC database names were entered
    #	correctly, the counts on each name should all be == 3
    egrep "_50_|\.50\." EnsGeneAutomate.pm | awk '{print $1}' | sort | uniq -c

    #	to see which DBs can be run:
    cd ~/kent/src/hg/utils/automation
    ./ensVersions 50 | grep -v "NOT FOUND" | sort

    #	manually ran the following to verify functionality and get
    #	ensGene.ra files established for new organisms:
    #	mm9 bosTau4 ce6 micMur1 myoLuc1 ochPri2 speTri1 hg18

    #	Then, running the following loop:
    cd /scratch/tmp
for D in canFam2 ci2 cioSav2 danRer5 dasNov1 echTel1 \
	equCab2 felCat3 fr2 galGal3 gasAcu1 loxAfr1 ornAna1 \
	oryCun1 oryLat1 otoGar1 panTro2 ponAbe2 rheMac2 rn4 sacCer1 \
	tupBel1 xenTro2
do
    cd /cluster/data/${D}
    $HOME/kent/src/hg/utils/automation/doEnsGeneUpdate.pl -verbose=2 \
	-ensVersion=50 ${D}.ensGene.ra > ${D}.ensGene.update.50.log 2>&1
    cat ${D}.ensGene.update.50.log
    mv ${D}.ensGene.update.50.log bed/ensGene.50
done > ensGene.update.50.log
    #	individual makeDoc entries are attached below
    #	Failed: gasAcu1 oryLat1
    #	OK: canFam2 ci2 cioSav2 danRer5 dasNov1 echTel1 equCab2 felCat3
    #	OK: fr2 galGal3 loxAfr1 ornAna1 oryCun1 otoGar1 panTro2 ponAbe2
    #	OK: rheMac2 rn4 sacCer1 (no gap table ?) tupBel1 xenTro2
    #	the ones that used to have skipInvalid need to be verified to
    #	see if that really still applies:
    #   skipInvalid: dasNov1 echTel1 felCat3 loxAfr1 ornAna1 oryCun1 oryLat1
    #	skipInvalid: tupBel1
    #	To check these skipInvalid:
    cd /cluster/data/<db>/bed/ensGene.50/process
    zcat <db>.allGenes.gp.gz | genePredCheck -db=<db> stdin
    #	you should see errors on the specific items mentioned in their
    #	<db>.ensGene.ra file
    #	the above were verified to have the same few errors as before.
    #	ornAna1 is a bit wierd.  Ensembl seems to have Contigs that we do not.
    #	gasAcu1 and oryLat1 needed special lifts

    #	To finalize the build, update the "current" status in the trackVersion
    #	table in hgFixed
     hgsql -e \
'update trackVersion set dateReference="mar2008" where version="49";' hgFixed
     hgsql -e \
'update trackVersion set dateReference="current" where version="50";' hgFixed


    #	to establish a new ensGene.ra file, start out with simply:
# required db variable
db newDb
# do we need to translate geneScaffold coordinates
geneScaffolds yes
    #	It is probably a geneScaffold assembly, if not, leave this out
    #	run the script with -stop=process to see if it reports any
    #	errors at that point.  You will then know what type of
    #	nameTranslation sed string you need to add, or any other
    #	special lifts, and if there are bugs in some of the Ensembl
    #	predictions.  Add whatever new arguments you need to, then
    #	continue the script with --continue.  Remove the process
    #	directory to re-run the process step again.

    #	manually running mm9 to see if it works:
    cd /cluster/data/mm9
    $HOME/kent/src/hg/utils/automation/doEnsGeneUpdate.pl \
            -verbose=2 -ensVersion=50 mm9.ensGene.ra > ensGene50.log 2>&1

    #	This run reports completion as follows:
############################################################################
#  mm9 - Mouse - Ensembl Genes version 50 (DONE - 2008-08-07 - hiram)
    ssh kkstore06
    cd /cluster/data/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 mm9.ensGene.ra
    ssh hgwdev
    cd /cluster/data/mm9/bed/ensGene.50
    featureBits mm9 ensGene
    # 60671138 bases of 2620346127 (2.315%) in intersection

############################################################################
#  bosTau4 - Cow - Ensembl Genes version 51  (DONE - 2008-12-03,04 - hiram)
    ssh kkr14u04
    cd /hive/data/genomes/bosTau4
    cat << '_EOF_' > bosTau4.ensGene.ra
# required db variable
db bosTau4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 bosTau4.ensGene.ra
    # broke down during process step, fix it:
    cd /hive/data/genomes/bosTau4/bed/ensGene.51/process
    mv bosTau4.allGenes.gp.gz bosTau4.allGenes.broken.gp.gz
    zcat bosTau4.allGenes.broken.gp.gz | grep -v AAFC03011182 | \
	gzip -c > bosTau4.allGenes.gp.gz
    # check it, from last line in doProcess.csh
    genePredCheck -db=bosTau4 bosTau4.allGenes.gp.gz
    #	checked: 29516 failed: 0
    #	now, continuing

    cd /hive/data/genomes/bosTau4
    doEnsGeneUpdate.pl -verbose=2 -ensVersion=51 -continue=load \
	bosTau4.ensGene.ra > ensGene51.load.log 2>&1
    ssh hgwdev
    cd /hive/data/genomes/bosTau4/bed/ensGene.51
    featureBits bosTau4 ensGene
    # 41425444 bases of 2731830700 (1.516%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/bosTau4/bed/ensGene.51

############################################################################
#  bosTau4 - Cow - Ensembl V50 Genes (DONE - 2008-04-22 - hiram)
#	This one had to be done manually.  They have a mistake
#	in their gtf file.  A single line has a gene name in the
#	chromosome column.  The lifting business for chrUn seems to
#	be unnecessary as it was done for bosTau3.  It looks like
#	they properly have their chrUn names the same as ours
    ssh hgwdev
    cd /cluster/data/bosTau4

    ssh kkstore05
    cd /cluster/data/bosTau4
    cat << '_EOF_' > bosTau4.ensGene.ra
# required db variable
db bosTau4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs
    $HOME/kent/src/hg/utils/automation/doEnsGeneUpdate.pl \
	-verbose=2 -ensVersion=50 -stop=process bosTau4.ensGene.ra \
	> ensGene50.process.log 2>&1
    #	the load step is broken due to the single bogus name.
    #	strip it out by fixing the load script to grep it out

    #	the fixed step:
zcat process/bosTau4.allGenes.gp.gz | grep -v AAFC03011182 \
    | hgLoadGenePred  -genePredExt bosTau4 \
    ensGene stdin >& loadGenePred.errors.txt


    $HOME/kent/src/hg/utils/automation/doEnsGeneUpdate.pl \
	-continue=load -ensVersion=50 bosTau4.ensGene.ra \
	> ensGene50.load.log 2>&1

    $HOME/kent/src/hg/utils/automation/doEnsGeneUpdate.pl \
	-continue=cleanup -ensVersion=50 bosTau4.ensGene.ra \
	> ensGene50.cleanup.log 2>&1

    ssh hgwdev
    cd /cluster/data/bosTau4/bed/ensGene.50
    featureBits bosTau4 ensGene
    # 41259085 bases of 2731830700 (1.510%) in intersection

############################################################################
#  ce6 - C. elegans - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore06
    cd /cluster/data/ce6
    cat << '_EOF_' > ce6.ensGene.ra
# required db variable
db ce6
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([IVX]\)/chr\1/; s/^MtDNA/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 ce6.ensGene.ra
    ssh hgwdev
    cd /cluster/data/ce6/bed/ensGene.50
    featureBits ce6 ensGene
    # 29421784 bases of 100281426 (29.339%) in intersection

############################################################################
#  micMur1 - Mouse lemur - Ensembl Genes version 50  (DONE - 2008-08-08 -
#  hiram)
    ssh kkstore05
    cd /cluster/data/micMur1
    cat << '_EOF_' > micMur1.ensGene.ra
# required db variable
db micMur1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 micMur1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/micMur1/bed/ensGene.50
    featureBits micMur1 ensGene
    # 25425991 bases of 1852394361 (1.373%) in intersection

############################################################################
#  myoLuc1 - Microbat - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore05
    cd /cluster/data/myoLuc1
    cat << '_EOF_' > myoLuc1.ensGene.ra
# required db variable
db myoLuc1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# ENSMLUT00000004658 no exonFrame on CDS exon 1
# ENSMLUT00000003427 no exonFrame on CDS exon 10
# ENSMLUT00000009601 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 myoLuc1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/myoLuc1/bed/ensGene.50
    featureBits myoLuc1 ensGene
    # 24559555 bases of 1673855868 (1.467%) in intersection

############################################################################
#  ochPri2 - Pika - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore05
    cd /cluster/data/ochPri2
    cat << '_EOF_' > ochPri2.ensGene.ra
# required db variable
db ochPri2
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
#  ENSOPRT00000002716 no exonFrame on CDS exon 2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 ochPri2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/ochPri2/bed/ensGene.50
    featureBits ochPri2 ensGene
    # 25069963 bases of 1923624051 (1.303%) in intersection

############################################################################
#  speTri1 - Squirrel - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore05
    cd /cluster/data/speTri1
    cat << '_EOF_' > speTri1.ensGene.ra
# required db variable
db speTri1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the single gene that has an invalid structure from Ensembl:
# ENSSTOT00000007455 no exonFrame on CDS exon
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 speTri1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/speTri1/bed/ensGene.50
    featureBits speTri1 ensGene
    # 21525994 bases of 1913367893 (1.125%) in intersection

############################################################################
#  hg18 - Human - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore02
    cd /cluster/data/hg18
    cat << '_EOF_' > hg18.ensGene.ra
# required db variable
db hg18
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 hg18.ensGene.ra
    ssh hgwdev
    cd /cluster/data/hg18/bed/ensGene.50
    featureBits hg18 ensGene
    # 69928854 bases of 2881515245 (2.427%) in intersection

############################################################################
#  canFam2 - Dog - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore04
    cd /cluster/data/canFam2
    cat << '_EOF_' > canFam2.ensGene.ra
# required db variable
db canFam2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 canFam2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/canFam2/bed/ensGene.50
    featureBits canFam2 ensGene
    # 34550366 bases of 2384996543 (1.449%) in intersection

############################################################################
#  ci2 - C. intestinalis - Ensembl Genes version 50  (DONE - 2008-08-08 -
#  hiram)
    ssh kkstore02
    cd /cluster/data/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 ci2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/ci2/bed/ensGene.50
    featureBits ci2 ensGene
    # 20124353 bases of 141233565 (14.249%) in intersection

############################################################################
#  cioSav2 - C. savignyi - Ensembl Genes version 50  (DONE - 2008-08-08 -
#  hiram)
    ssh kkstore02
    cd /cluster/data/cioSav2
    cat << '_EOF_' > cioSav2.ensGene.ra
# required db variable
db cioSav2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 cioSav2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/cioSav2/bed/ensGene.50
    featureBits cioSav2 ensGene
    # 16601350 bases of 173749524 (9.555%) in intersection

############################################################################
#  danRer5 - Zebrafish - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore06
    cd /cluster/data/danRer5
    cat << '_EOF_' > danRer5.ensGene.ra
# required db variable
db danRer5
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 danRer5.ensGene.ra
    ssh hgwdev
    cd /cluster/data/danRer5/bed/ensGene.50
    featureBits danRer5 ensGene
    # 36864148 bases of 1435609608 (2.568%) in intersection

############################################################################
#  dasNov1 - Armadillo - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore04
    cd /cluster/data/dasNov1
    cat << '_EOF_' > dasNov1.ensGene.ra
# required db variable
db dasNov1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# ENSDNOT00000004471 no exonFrame on CDS exon 7
# ENSDNOT00000007696 no exonFrame on CDS exon 8
# ENSDNOT00000019234 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 dasNov1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/dasNov1/bed/ensGene.50
    featureBits dasNov1 ensGene
    # 22658142 bases of 2146362222 (1.056%) in intersection

############################################################################
#  echTel1 - Tenrec - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore02
    cd /cluster/data/echTel1
    cat << '_EOF_' > echTel1.ensGene.ra
# required db variable
db echTel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# ENSETET00000011172 no exonFrame on CDS exon 14
# ENSETET00000018714 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 echTel1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/echTel1/bed/ensGene.50
    featureBits echTel1 ensGene
    # 25441754 bases of 2111581369 (1.205%) in intersection

############################################################################
#  equCab2 - Horse - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore05
    cd /cluster/data/equCab2
    cat << '_EOF_' > equCab2.ensGene.ra
# required db variable
db equCab2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
#       translate Ensembl chrUnNNNN names to chrUn coordinates
liftUp /cluster/data/equCab2/jkStuff/chrUn.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 equCab2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/equCab2/bed/ensGene.50
    featureBits equCab2 ensGene
    # 39169781 bases of 2454424288 (1.596%) in intersection

############################################################################
#  felCat3 - Cat - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore05
    cd /cluster/data/felCat3
    cat << '_EOF_' > felCat3.ensGene.ra
# required db variable
db felCat3
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# ENSFCAT00000006929 no exonFrame on CDS exon 15
# ENSFCAT00000009384 no exonFrame on CDS exon 0
# ENSFCAT00000010965 no exonFrame on CDS exon 1

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 felCat3.ensGene.ra
    ssh hgwdev
    cd /cluster/data/felCat3/bed/ensGene.50
    featureBits felCat3 ensGene
    # 22020647 bases of 1642698377 (1.341%) in intersection

############################################################################
#  fr2 - Fugu - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore02
    cd /cluster/data/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
nameTranslation "s/^MT/chrM/;"
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 fr2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/fr2/bed/ensGene.50
    featureBits fr2 ensGene
    # 34554303 bases of 393312790 (8.785%) in intersection

############################################################################
#  galGal3 - Chicken - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore03
    cd /cluster/data/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 galGal3.ensGene.ra
    ssh hgwdev
    cd /cluster/data/galGal3/bed/ensGene.50
    featureBits galGal3 ensGene
    # 30853095 bases of 1042591351 (2.959%) in intersection

############################################################################
#  loxAfr1 - Elephant - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore04
    cd /cluster/data/loxAfr1
    cat << '_EOF_' > loxAfr1.ensGene.ra
# required db variable
db loxAfr1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the one gene that has an invalid structure from Ensembl:
# ENSLAFT00000000586 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 loxAfr1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/loxAfr1/bed/ensGene.50
    featureBits loxAfr1 ensGene
    # 23295034 bases of 2295548473 (1.015%) in intersection

############################################################################
#  ornAna1 - Platypus - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore05
    cd /cluster/data/ornAna1
    cat << '_EOF_' > ornAna1.ensGene.ra
# required db variable
db ornAna1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
'_EOF_'
#  << happy emacs

    #	There are 362 items that have invalid chrom messages

    doEnsGeneUpdate.pl -ensVersion=50 ornAna1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/ornAna1/bed/ensGene.50
    featureBits ornAna1 ensGene
    # 24505297 bases of 1842236818 (1.330%) in intersection

############################################################################
#  oryCun1 - Rabbit - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore04
    cd /cluster/data/oryCun1
    cat << '_EOF_' > oryCun1.ensGene.ra
# required db variable
db oryCun1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# ENSOCUT00000004627 no exonFrame on CDS exon 3
# ENSOCUT00000009485 no exonFrame on CDS exon 9
# ENSOCUT00000014840 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 oryCun1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/oryCun1/bed/ensGene.50
    featureBits oryCun1 ensGene
    # 22733387 bases of 2076044328 (1.095%) in intersection

############################################################################
#  otoGar1 - Bushbaby - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore05
    cd /cluster/data/otoGar1
    cat << '_EOF_' > otoGar1.ensGene.ra
# required db variable
db otoGar1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 otoGar1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/otoGar1/bed/ensGene.50
    featureBits otoGar1 ensGene
    # 23463097 bases of 1969052059 (1.192%) in intersection

############################################################################
#  panTro2 - Chimp - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore04
    cd /cluster/data/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 panTro2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/panTro2/bed/ensGene.50
    featureBits panTro2 ensGene
    # 49736660 bases of 2909485072 (1.709%) in intersection

############################################################################
#  ponAbe2 - Orangutan - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore02
    cd /cluster/data/ponAbe2
    cat << '_EOF_' > ponAbe2.ensGene.ra
# required db variable
db ponAbe2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 ponAbe2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/ponAbe2/bed/ensGene.50
    featureBits ponAbe2 ensGene
    # 37737277 bases of 3093572278 (1.220%) in intersection

############################################################################
#  rheMac2 - Rhesus - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore01
    cd /cluster/data/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 rheMac2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/rheMac2/bed/ensGene.50
    featureBits rheMac2 ensGene
    # 44297247 bases of 2646704109 (1.674%) in intersection

############################################################################
#  rn4 - Rat - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore06
    cd /cluster/data/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 rn4.ensGene.ra
    ssh hgwdev
    cd /cluster/data/rn4/bed/ensGene.50
    featureBits rn4 ensGene
    # 43712046 bases of 2571531505 (1.700%) in intersection

############################################################################
#  sacCer1 - S. cerevisiae - Ensembl Genes version 50  (DONE - 2008-08-08 -
#  hiram)
    ssh kkstore03
    cd /cluster/data/sacCer1
    cat << '_EOF_' > sacCer1.ensGene.ra
# required db variable
db sacCer1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^VIII/chr8/; s/^VII/chr7/; s/^VI/chr6/; s/^V/chr5/; s/^XIII/chr13/; s/^XII/chr12/; s/^XIV/chr14/; s/^XI/chr11/; s/^XVI/chr16/; s/^XV/chr15/; s/^X/chr10/; s/^III/chr3/; s/^IV/chr4/; s/^II/chr2/; s/^IX/chr9/; s/^I/chr1/; s/^MT/chrM/; /2-micron/d"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 sacCer1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/sacCer1/bed/ensGene.50
    featureBits sacCer1 ensGene
    #	8908962 bases of 12156302 (73.287%) in intersection

############################################################################
#  tupBel1 - TreeShrew - Ensembl Genes version 50  (DONE - 2008-08-08 - hiram)
    ssh kkstore05
    cd /cluster/data/tupBel1
    cat << '_EOF_' > tupBel1.ensGene.ra
# required db variable
db tupBel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# ENSTBET00000015831 no exonFrame on CDS exon 11
# ENSTBET00000013522 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 tupBel1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/tupBel1/bed/ensGene.50
    featureBits tupBel1 ensGene
    # 22746299 bases of 2137225476 (1.064%) in intersection

############################################################################
#  xenTro2 - X. tropicalis - Ensembl Genes version 50  (DONE - 2008-08-08 -
#  hiram)
    ssh kkstore04
    cd /cluster/data/xenTro2
    cat << '_EOF_' > xenTro2.ensGene.ra
# required db variable
db xenTro2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 xenTro2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/xenTro2/bed/ensGene.50
    featureBits xenTro2 ensGene
    # 29150618 bases of 1359412157 (2.144%) in intersection

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 51  (DONE - 2008-08-11 - hiram)
    # requires extra attention after the all database for loop attempt
    cd /hive/data/genomes/gasAcu1/bed/ensGene.51/process
    mv gasAcu1.allGenes.gp.gz gasAcu1.allGenes.gp.beforeLift.gz
    zcat gasAcu1.allGenes.gp.beforeLift.gz \
	| liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
	    ../../../jkStuff/contigsToScaffolds.lft carry stdin
    liftUp -extGenePred gasAcu1.allGenes.gp \
	../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
	    gasAcu1.scaffolds.gp
    gzip gasAcu1.scaffolds.gp
    gzip gasAcu1.allGenes.gp
    #	verify OK
    genePredCheck -db=gasAcu1 gasAcu1.allGenes.gp.gz
    #	checked: 29096 failed: 0

    #	then continue with the load
    cd /hive/data/genomes/gasAcu1
    doEnsGeneUpdate.pl -continue=load -ensVersion=51 gasAcu1.ensGene.ra \
	> ensGene.51.load.log 2>&1

    #	it responds with the following make doc output:
########
#  gasAcu1 - Stickleback - Ensembl Genes version 51  (DONE - 2008-12-04 hiram)
    ssh kkr14u07
    cd /hive/data/genomes/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=51 gasAcu1.ensGene.ra
    ssh hgwdev
    cd /hive/data/genomes/gasAcu1/bed/ensGene.51
    featureBits gasAcu1 ensGene
    # 36787204 bases of 446627861 (8.237%) in intersection

 *** All done!  (through the 'makeDoc' step)
 *** Steps were performed in /hive/data/genomes/gasAcu1/bed/ensGene.51

############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes version 50  (DONE - 2008-08-11 - hiram)
    # requires extra attention after the all database for loop attempt
    cd /cluster/data/gasAcu1/bed/ensGene.50/process
    mv mv gasAcu1.allGenes.gp.gz gasAcu1.allGenes.gp.beforeLift.gz
    zcat gasAcu1.allGenes.gp.beforeLift.gz \
	| liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
	    ../../../jkStuff/contigsToScaffolds.lft carry stdin
    liftUp -extGenePred gasAcu1.allGenes.gp \
	../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
	    gasAcu1.scaffolds.gp
    gzip gasAcu1.scaffolds.gp
    gzip gasAcu1.allGenes.gp

    #	then continue with the load
    cd /cluster/data/gasAcu1
    doEnsGeneUpdate.pl -continue=load -ensVersion=50 gasAcu1.ensGene.ra \
	> bed/ensGene.50/load.log 2>&1

    #	it responds with the following make doc output:

#  gasAcu1 - Stickleback - Ensembl Genes version 50  (DONE - 2008-08-11 -
#  hiram)
    ssh kkstore05
    cd /cluster/data/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=50 gasAcu1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/gasAcu1/bed/ensGene.50
    featureBits gasAcu1 ensGene
    # 36956798 bases of 446627861 (8.275%) in intersection

############################################################################
#  oryLat1 - Medaka - Ensembl Genes version 50 (DONE - 2008-08-11 - hiram)
    #	need to eliminate the bad lifts due to our chrUn error at this time
    cd  /cluster/data/oryLat1/bed/ensGene.50
    grep "invalid chrom" *.errors.txt | cut -d\  -f2 | sort -u > badLifts.name
    hgsql -N -e "select transcript from ensGtp;" oryLat1 \
	| sort > ensGtp.transcript
    comm -13 ensGene.name ensGtp.transcript | sort > ensGtp.not.ensGene.name
    comm -13 ensGene.name ensPep.name | sort > ensPep.not.ensGene.name
    #	do the badLifts account for all the missing peptides:
    comm -12 badLifts.name ensPep.not.ensGene.name | wc -l
    #	2608 -> yes, this is the count of missing peptides.
    #	So, remove this set of business from ensPep table
    for N in `cat badLifts.name`
do
    hgsql -e "delete from ensPep where name=\"$N\";" oryLat1
    echo $N
done
    #	And from the ensGtp table
    for N in `cat badLifts.name`
do
    hgsql -e "delete from ensGtp where transcript=\"$N\";" oryLat1
    echo $N
done
    #
    genePredCheck -db=oryLat1 ensGene
    #	checked: 22463 failed: 0

#############################################################################
#############################################################################
# ensembl 49 updates (DONE - 2009-03-31 - Hiram)

#	hg18 and loxAfr1 were done manually to verify operations, then,
#	all at once, except for those two:
    ssh hgwdev
    cd /scratch/tmp
    hgsql -N -e "select db from trackVersion where version=48;" hgFixed \
	| sort -u | egrep -v "hg18|loxAfr1" | while read DB
do
    echo $DB
    cd /cluster/data/${DB} && \
	$HOME/kent/src/hg/utils/automation/doEnsGeneUpdate.pl \
	    -verbose=2 -ensVersion=49 ${DB}.ensGene.ra
done > ensGene.49.update.log 2>&1

    #	bosTau3 failed due to the peptides to gene ratio check
    #	oryLat1 failed due to the peptides to gene ratio check
    #	fr2 failed due to chrMT naming problem - chrMT is new to their release
    #	gasAcu1 failed due to attempted genePredCheck from kkr1u00 ?
    #	actually gasAcu1 needs special processing, it has an extra couple
    #	of lifts to be done.
    #	mm9 failed, says "download" was already done
    #	sacCer1 failed, due to incorrect peptide file name
    #	felCat3, ornAna1, otoGar1, tupBel1 failed due to wget failures
    for DB in felCat3 ornAna1 otoGar1 tupBel1
do
    echo $DB
    rm -fr /cluster/data/${DB}/bed/ensGene.49/download
    cd /cluster/data/${DB} && \
	$HOME/kent/src/hg/utils/automation/doEnsGeneUpdate.pl \
	    -verbose=2 -ensVersion=49 ${DB}.ensGene.ra
done > ensGene.49.secondTry.log 2>&1

    #	Interestingly, when the chooseFileServer discovers the file server
    #	is busy, it goes to chooseWorkhorse and gets one of the kki nodes
    #	which can not wget.  So, trying a couple of these again, and
    #	the new to v49 genome ponAbe2:
    for DB in felCat3 mm9 ponAbe2
do
    echo $DB
    rm -fr /cluster/data/${DB}/bed/ensGene.49/download
    cd /cluster/data/${DB} && \
	$HOME/kent/src/hg/utils/automation/doEnsGeneUpdate.pl \
	    -verbose=2 -ensVersion=49 ${DB}.ensGene.ra
done > ensGene.49.thirdTry.log 2>&1
    #	that worked just fine.

############################################################################
#  The version 49 update individual entries follow:
############################################################################
#  bosTau3 - Cow - Ensembl Genes (DONE - 2008-04-22 - hiram)
#	This one had to be done manually.  There was a chrUn lift file that
#	needed to be made to turn the Ensembl chrUn coordinates into
#	the UCSC chrUn.003.N contig coordinates
#	It was run with a geneScaffolds yes to fetch the MySQL tables,
#	Then a script was run:
    ssh hgwdev
    cd /cluster/data/bosTau3
    cat << '_EOF_' > jkStuff/chrUnLiftAcross.pl
#!/usr/bin/env perl

use strict;
use warnings;

open (FH,"<chrUn.seq_region.txt") or die "can not read chrUn.seq_region.txt";

my $start = 0;
my $end = 0;
my $gap = 10000;

while (my $line = <FH>) {
    chomp $line;
    my ($region_id, $name, $type, $size) = split('\s+', $line);
    $end = $start + $size;
    printf "chrUn\t%d\t%d\t%s\t%d\t%d\t+\n", $start, $end, $name, 0, $size;
    $start += $size + $gap;
}

close (FH);
'_EOF_'
    # << happy emacs
    chmod +x jkStuff/chrUnLiftAcross.pl
    cd bed/ensGene.49/download
    ../../../jkStuff/chrUnLiftAcross.pl > ../../../jkStuff/chrUn.liftAcross.txt
    #	then use that chrUn.liftAcross.txt in the process script procedure
    #	after that, comment out the gene Scaffolds and -continue=load
    ssh kkstore05
    cd /cluster/data/bosTau3
    cat << '_EOF_' > bosTau3.ensGene.ra
# required db variable
db bosTau3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9UX][0-9n]*\)/chr\1/; /^MT/d"
# cause SQL tables to be fetched to see if chrUn can be fixed up
# geneScaffolds yes
'_EOF_'
#  << happy emacs
    $HOME/kent/src/hg/utils/automation/doEnsGeneUpdate.pl \
	-ensVersion=49 -stop=process bosTau3.ensGene.ra
    #	do the manual fixups as described above

    doEnsGeneUpdate.pl -continue=load -ensVersion=49 bosTau3.ensGene.ra
    ssh hgwdev
    cd /cluster/data/bosTau3/bed/ensGene.49
    featureBits bosTau3 ensGene
    # 39278215 bases of 2731807384 (1.438%) in intersection

############################################################################
#  canFam2 - Dog - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore04
    cd /cluster/data/canFam2
    cat << '_EOF_' > canFam2.ensGene.ra
# required db variable
db canFam2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 canFam2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/canFam2/bed/ensGene.49
    featureBits canFam2 ensGene
    # 34551622 bases of 2384996543 (1.449%) in intersection
############################################################################
#  ci2 - C. intestinalis - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore02
    cd /cluster/data/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 ci2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/ci2/bed/ensGene.49
    featureBits ci2 ensGene
    # 20121618 bases of 141233565 (14.247%) in intersection

############################################################################
#  cioSav2 - C. savignyi - Ensembl Genes (DONE - 2008-04-03 - hiram)
    ssh kkstore02
    cd /cluster/data/cioSav2
    cat << '_EOF_' > cioSav2.ensGene.ra
# required db variable
db cioSav2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
# nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 cioSav2.ensGene.ra
    #	the names in this assembly exceeded our usual size of 18 characters
    #	have to do the load manually with a customized ensGtp.sql
    #	to set the index sizes to 19
    ssh hgwdev
    cd /cluster/data/cioSav2/bed/ensGene.49
    featureBits cioSav2 ensGene
    # 16603725 bases of 173749524 (9.556%) in intersection
############################################################################
#  danRer5 - Zebrafish - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore06
    cd /cluster/data/danRer5
    cat << '_EOF_' > danRer5.ensGene.ra
# required db variable
db danRer5
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 danRer5.ensGene.ra
    ssh hgwdev
    cd /cluster/data/danRer5/bed/ensGene.49
    featureBits danRer5 ensGene
    # 36884539 bases of 1435609608 (2.569%) in intersection
 
############################################################################
#  dasNov1 - Armadillo - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore04
    cd /cluster/data/dasNov1
    cat << '_EOF_' > dasNov1.ensGene.ra
# required db variable
db dasNov1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# ENSDNOT00000004471 no exonFrame on CDS exon 7
# ENSDNOT00000007696 no exonFrame on CDS exon 8
# ENSDNOT00000019234 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 dasNov1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/dasNov1/bed/ensGene.49
    featureBits dasNov1 ensGene
    # 22682674 bases of 2146362222 (1.057%) in intersection

############################################################################
#  echTel1 - Tenrec - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore02
    cd /cluster/data/echTel1
    cat << '_EOF_' > echTel1.ensGene.ra
# required db variable
db echTel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# ENSETET00000011172 no exonFrame on CDS exon 14
# ENSETET00000018714 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 echTel1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/echTel1/bed/ensGene.49
    featureBits echTel1 ensGene
    # 25450282 bases of 2111581369 (1.205%) in intersection

############################################################################
#  felCat3 - Cat - Ensembl Genes (DONE - 2008-04-01 - hiram)
    ssh kkstore05
    cd /cluster/data/felCat3
    cat << '_EOF_' > felCat3.ensGene.ra
# required db variable
db felCat3
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# ENSFCAT00000006929 no exonFrame on CDS exon 15
# ENSFCAT00000009384 no exonFrame on CDS exon 0
# ENSFCAT00000010965 no exonFrame on CDS exon 1

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 felCat3.ensGene.ra
    ssh hgwdev
    cd /cluster/data/felCat3/bed/ensGene.49
    featureBits felCat3 ensGene
    # 20984470 bases of 1642698377 (1.277%) in intersection

############################################################################
#  galGal3 - Chicken - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore03
    cd /cluster/data/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 galGal3.ensGene.ra
    ssh hgwdev
    cd /cluster/data/galGal3/bed/ensGene.49
    featureBits galGal3 ensGene
    # 30853095 bases of 1042591351 (2.959%) in intersection

############################################################################

#  gasAcu1 - Stickleback - Ensembl Genes (DONE - 2008-04-02 - hiram)
    ssh kkstore05
    cd /cluster/data/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 gasAcu1.ensGene.ra

    #	to fix the processing failures, in the process directory:
    zcat gasAcu1.allGenes.gp.beforeLift.gz \
	| liftUp -extGenePred -type=.gp gasAcu1.scaffolds.gp \
	    ../../../jkStuff/contigsToScaffolds.lft carry stdin

    liftUp -extGenePred gasAcu1.allGenes.gp \
	../../../jkStuff/UCSC.chromToScaffoldSansGaps.lft carry \
	    gasAcu1.scaffolds.gp
    gzip gasAcu1.scaffolds.gp
    #	then continue with the load
    doEnsGeneUpdate.pl -continue=load -ensVersion=49 gasAcu1.ensGene.ra

    ssh hgwdev    cd /cluster/data/gasAcu1/bed/ensGene.49    featureBits gasAcu1 ensGene
    # 36957312 bases of 446627861 (8.275%) in intersection
############################################################################
#  fr2 - Fugu - Ensembl Genes (DONE - 2008-04-02 - hiram)
    #	fixed the fr2.ensGene.ra file to translate MT into chrM
    ssh kkstore02
    cd /cluster/data/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
nameTranslation "s/^MT/chrM/;"
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 fr2.ensGene.ra
    ssh hgwdev    cd /cluster/data/fr2/bed/ensGene.49    featureBits fr2 ensGene
    # 34552659 bases of 393312790 (8.785%) in intersection

############################################################################
#  mm9 - Mouse - Ensembl Genes (DONE - 2008-04-01 - hiram)
    ssh kkstore06
    cd /cluster/data/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 mm9.ensGene.ra
    ssh hgwdev
    cd /cluster/data/mm9/bed/ensGene.49
    featureBits mm9 ensGene
    # 60655001 bases of 2620346127 (2.315%) in intersection

############################################################################
#  ornAna1 - Platypus - Ensembl Genes (DONE - 2008-04-03 - hiram)
    ssh kkstore05
    cd /cluster/data/ornAna1
    cat << '_EOF_' > ornAna1.ensGene.ra
# required db variable
db ornAna1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 ornAna1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/ornAna1/bed/ensGene.49
    featureBits ornAna1 ensGene
    # 24473045 bases of 1842236818 (1.328%) in intersection

############################################################################
#  oryCun1 - Rabbit - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore04
    cd /cluster/data/oryCun1
    cat << '_EOF_' > oryCun1.ensGene.ra
# required db variable
db oryCun1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# ENSOCUT00000004627 no exonFrame on CDS exon 3
# ENSOCUT00000009485 no exonFrame on CDS exon 9
# ENSOCUT00000014840 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 oryCun1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/oryCun1/bed/ensGene.49
    featureBits oryCun1 ensGene
    # 22761080 bases of 2076044328 (1.096%) in intersection

############################################################################
#  oryLat1 - Medaka - Ensembl Genes (DONE - 2008-03-31 - 04-15 - hiram)
    ssh kkstore04
    cd /cluster/data/oryLat1
    cat << '_EOF_' > oryLat1.ensGene.ra
# required db variable
db oryLat1
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 2,687 genes that haven't lifted properly yet
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 oryLat1.ensGene.ra
    featureBits oryLat1 ensGene
    #	29680340 bases of 700386597 (4.238%) in intersection

    #	version 49 update had a big change in the Medaka genes.
    #	Many of the new genes are in the Ensembl Gene Scaffold
    #	(actually here ultracontigs) coordinate space that will not
    #	translate to our chrUn assembly.  So, some fixups are
    #	necessary to get joinerCheck to be a bit more happy.
    ssh hgwdev
    cd ~/kent/src/hg/makeDb/schema
    joinerCheck -keys -database=oryLat1 -identifier=ensemblTranscriptId \
	all.joiner
# Checking keys on database oryLat1
# oryLat1.ensGtp.transcript - hits 22447 of 25134
# Error: 2687 of 25134 elements of oryLat1.ensGtp.transcript are not in key ensGene.name line 1726 of all.joiner
# Example miss: ENSORLT00000022895
# oryLat1.ensPep.name - hits 22053 of 24661
# Error: 2608 of 24661 elements of oryLat1.ensPep.name are not in key ensGene.name line 1728 of all.joiner
# Example miss: ENSORLT00000022872
    cd  /cluster/data/oryLat1/bed/ensGene.49
    grep "invalid chrom" *.errors.txt | cut -d\  -f2 | sort -u > badLifts.name
    hgsql -N -e "select transcript from ensGtp;" oryLat1 \
	| sort > ensGtp.transcript
    comm -13 ensGene.name ensGtp.transcript | sort > ensGtp.not.ensGene.name
    comm -13 ensGene.name ensPep.name | sort > ensPep.not.ensGene.name
    #	do the badLifts account for all the missing peptides:
    comm -12 badLifts.name ensPep.not.ensGene.name | wc -l
    #	2608 -> yes, this is the count of missing peptides.
    #	So, remove this set of business from ensPep table
    for N in `cat badLifts.name`
do
    hgsql -e "delete from ensPep where name=\"$N\";" oryLat1
    echo $N
done
    #	And from the ensGtp table
    for N in `cat badLifts.name`
do
    hgsql -e "delete from ensGtp where transcript=\"$N\";" oryLat1
    echo $N
done

############################################################################

############################################################################
#  otoGar1 - Bushbaby - Ensembl Genes (DONE - 2008-04-03 - hiram)
    ssh kkstore05
    cd /cluster/data/otoGar1
    cat << '_EOF_' > otoGar1.ensGene.ra
# required db variable
db otoGar1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 otoGar1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/otoGar1/bed/ensGene.49
    featureBits otoGar1 ensGene
    # 23497004 bases of 1969052059 (1.193%) in intersection

############################################################################
#  panTro2 - Chimp - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore04
    cd /cluster/data/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 panTro2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/panTro2/bed/ensGene.49
    featureBits panTro2 ensGene
    # 51932042 bases of 2909485072 (1.785%) in intersection

############################################################################
#  ponAbe2 - Orangutan - Ensembl Genes (DONE - 2008-04-01 - hiram)
    ssh kkstore02
    cd /cluster/data/ponAbe2
    cat << '_EOF_' > ponAbe2.ensGene.ra
# required db variable
db ponAbe2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optional haplotype lift-down from Ensembl full chrom coordinates
#       to UCSC simple haplotype coordinates
# haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 ponAbe2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/ponAbe2/bed/ensGene.49
    featureBits ponAbe2 ensGene
    # 37382766 bases of 3093572278 (1.208%) in intersection

############################################################################
#  rheMac2 - Rhesus - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore01
    cd /cluster/data/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 rheMac2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/rheMac2/bed/ensGene.49
    featureBits rheMac2 ensGene
    # 44288934 bases of 2646704109 (1.673%) in intersection

############################################################################
#  rn4 - Rat - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore06
    cd /cluster/data/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 rn4.ensGene.ra
    ssh hgwdev
    cd /cluster/data/rn4/bed/ensGene.49
    featureBits rn4 ensGene
    # 43706532 bases of 2571531505 (1.700%) in intersection

############################################################################
#   sacCer1 - S. cerevisiae - Ensembl Genes (DONE - 2008-04-03 - hiram)
    ssh kkstore03
    cd /cluster/data/sacCer1
    cat << '_EOF_' > sacCer1.ensGene.ra
# required db variable
db sacCer1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^VIII/chr8/; s/^VII/chr7/; s/^VI/chr6/; s/^V/chr5/; s/^XIII/chr13/; s/^XII/chr12/; s/^XIV/chr14/; s/^XI/chr11/; s/^XVI/chr16/; s/^XV/chr15/; s/^X/chr10/; s/^III/chr3/; s/^IV/chr4/; s/^II/chr2/; s/^IX/chr9/; s/^I/chr1/; s/^MT/chrM/; /2-micron/d"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 sacCer1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/sacCer1/bed/ensGene.49
    featureBits sacCer1 ensGene
    # 8908962 bases of 12156302 (73.287%) in intersection
    #	this genome has trouble with featureBits: table gap doesn't exist

############################################################################
#  tetNig1 - Tetraodon - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore03
    cd /cluster/data/tetNig1
    cat << '_EOF_' > tetNig1.ensGene.ra
# required db variable
db tetNig1
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 tetNig1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/tetNig1/bed/ensGene.49
    featureBits tetNig1 ensGene
    # 37844709 bases of 342403326 (11.053%) in intersection

############################################################################
#  tupBel1 - TreeShrew - Ensembl Genes (DONE - 2008-04-03 - hiram)
    ssh kkstore05
    cd /cluster/data/tupBel1
    cat << '_EOF_' > tupBel1.ensGene.ra
# required db variable
db tupBel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
# ignore genes that do not properly convert to a gene pred, and contig
#       names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# ENSTBET00000015831 no exonFrame on CDS exon 11
# ENSTBET00000013522 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 tupBel1.ensGene.ra
    ssh hgwdev
    cd /cluster/data/tupBel1/bed/ensGene.49
    featureBits tupBel1 ensGene
    # 22740204 bases of 2137225476 (1.064%) in intersection

############################################################################
#  xenTro2 - X. tropicalis - Ensembl Genes (DONE - 2008-03-31 - hiram)
    ssh kkstore04
    cd /cluster/data/xenTro2
    cat << '_EOF_' > xenTro2.ensGene.ra
# required db variable
db xenTro2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=49 xenTro2.ensGene.ra
    ssh hgwdev
    cd /cluster/data/xenTro2/bed/ensGene.49
    featureBits xenTro2 ensGene
    # 29161645 bases of 1359412157 (2.145%) in intersection

############################################################################

############################################################################
# ensembl 48 updates
############################################################################
#	The following was generated in the following manner:
    ssh hgwdev
    cd /tmp
for D in bosTau3 canFam2 ci2 danRer5 dasNov1 echTel1 felCat3 fr2 galGal3 \
	gasAcu1 hg18 loxAfr1 mm9 ornAna1 oryCun1 oryLat1 otoGar1 panTro2 \
	rheMac2 rn4 sacCer1 tetNig1 tupBel1 xenTro2
do
    cd /cluster/data/${D}
    $HOME/kent/src/hg/utils/automation/doEnsGeneUpdate.pl \
        -verbose=0 -ensVersion=48 ${D}.ensGene.ra -continue=makeDoc
done

    #	The database names were found by the following:
    cd /cluster/data
    ls -d */*.ensGene.ra | sed -e "s#/.*##"

    #  The following is the output of the above for loop:

############################################################################
#  bosTau3 - Cow - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore05
    cd /cluster/data/bosTau3
    cat << '_EOF_' > bosTau3.ensGene.ra
# required db variable
db bosTau3
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^Un/d; s/^\([0-9X][0-9]*\)/chr\1/; /^MT/d"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 bosTau3.ensGene.ra
    featureBits bosTau3 ensGene
    # 35387571 bases of 2731807384 (1.295%) in intersection

############################################################################
#  canFam2 - Dog - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore04
    cd /cluster/data/canFam2
    cat << '_EOF_' > canFam2.ensGene.ra
# required db variable
db canFam2
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 canFam2.ensGene.ra
    featureBits canFam2 ensGene
    # 34551622 bases of 2384996543 (1.449%) in intersection

############################################################################
#  ci2 - C. intestinalis - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore02
    cd /cluster/data/ci2
    cat << '_EOF_' > ci2.ensGene.ra
# required db variable
db ci2
# optional nameTranslation, the sed command that will transform
#       Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][pq]\)/chr0\1/; s/^\([0-9][0-9][pq]\)/chr\1/; "
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 ci2.ensGene.ra
    featureBits ci2 ensGene
    # 20121618 bases of 141233565 (14.247%) in intersection

############################################################################
#  danRer5 - Zebrafish - Ensembl Genes (DONE - 2008-03-03 - hiram)
    ssh kkstore06
    cd /cluster/data/danRer5
    cat << '_EOF_' > danRer5.ensGene.ra
# required db variable
db danRer5
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 danRer5.ensGene.ra
    featureBits danRer5 ensGene
    # 36884539 bases of 1435609608 (2.569%) in intersection
############################################################################
############################################################################
#  dasNov1 - Armadillo - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore04
    cd /cluster/data/dasNov1
    cat << '_EOF_' > dasNov1.ensGene.ra
# required db variable
db dasNov1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
#	names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# ENSDNOT00000004471 no exonFrame on CDS exon 7
# ENSDNOT00000007696 no exonFrame on CDS exon 8
# ENSDNOT00000019234 no exonFrame on CDS exon 0
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 dasNov1.ensGene.ra
    featureBits dasNov1 ensGene
    # 22684492 bases of 2146362222 (1.057%) in intersection
############################################################################
############################################################################
#  echTel1 - Tenrec - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore02
    cd /cluster/data/echTel1
    cat << '_EOF_' > echTel1.ensGene.ra
# required db variable
db echTel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
#	names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# ENSETET00000011172 no exonFrame on CDS exon 14
# ENSETET00000018714 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 echTel1.ensGene.ra
    featureBits echTel1 ensGene
    # 25450282 bases of 2111581369 (1.205%) in intersection
############################################################################
############################################################################
#  felCat3 - Cat - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore05
    cd /cluster/data/felCat3
    cat << '_EOF_' > felCat3.ensGene.ra
# required db variable
db felCat3
# do we need to translate geneScaffold coordinates
geneScaffolds yes
#	names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# ENSFCAT00000006929 no exonFrame on CDS exon 15
# ENSFCAT00000009384 no exonFrame on CDS exon 0
# ENSFCAT00000010965 no exonFrame on CDS exon 1

'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 felCat3.ensGene.ra
    featureBits felCat3 ensGene
    # 20984470 bases of 1642698377 (1.277%) in intersection
############################################################################
############################################################################
#  fr2 - Fugu - Ensembl Genes (DONE - 2008-03-03 - hiram)
    ssh kkstore02
    cd /cluster/data/fr2
    cat << '_EOF_' > fr2.ensGene.ra
# required db variable
db fr2
# lift Ensembl scaffolds to UCSC chrUn coordinates
liftUp /cluster/data/fr2/jkStuff/liftAll.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 fr2.ensGene.ra
    featureBits fr2 ensGene
    # 32309856 bases of 393312790 (8.215%) in intersection
############################################################################
############################################################################
#  galGal3 - Chicken - Ensembl Genes (DONE - 2008-03-03 - hiram)
    ssh kkstore03
    cd /cluster/data/galGal3
    cat << '_EOF_' > galGal3.ensGene.ra
# required db variable
db galGal3
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9EWXYZ][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 galGal3.ensGene.ra
    featureBits galGal3 ensGene
    # 30853095 bases of 1042591351 (2.959%) in intersection
############################################################################
############################################################################
#  gasAcu1 - Stickleback - Ensembl Genes (DONE - 2008-03-03 - hiram)
    ssh kkstore05
    cd /cluster/data/gasAcu1
    cat << '_EOF_' > gasAcu1.ensGene.ra
# required db variable
db gasAcu1
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^group\([IUVX]\)/chr\1/; s/^MT/chrM/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 gasAcu1.ensGene.ra
    featureBits gasAcu1 ensGene
    # 36960585 bases of 446627861 (8.275%) in intersection
############################################################################
############################################################################
#  hg18 - Human - Ensembl Genes (DONE - 2008-02-27 - hiram)
    ssh kkstore02
    cd /cluster/data/hg18
    cat << '_EOF_' > hg18.ensGene.ra
# required db variable
db hg18
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
# optional haplotype lift-down from Ensembl full chrom coordinates
#	to UCSC simple haplotype coordinates
haplotypeLift /cluster/data/hg18/jkStuff/ensGene.haplotype.lift
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 hg18.ensGene.ra
    featureBits hg18 ensGene
    # 66667439 bases of 2881515245 (2.314%) in intersection
############################################################################
############################################################################
#  loxAfr1 - Elephant - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore04
    cd /cluster/data/loxAfr1
    cat << '_EOF_' > loxAfr1.ensGene.ra
# required db variable
db loxAfr1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
#	names that are not in the UCSC assembly
skipInvalid yes
# ignore the one gene that has an invalid structure from Ensembl:
# ENSLAFT00000000586 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 loxAfr1.ensGene.ra
    featureBits loxAfr1 ensGene
    # 23294087 bases of 2295548473 (1.015%) in intersection
############################################################################
############################################################################
#  mm9 - Mouse - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore06
    cd /cluster/data/mm9
    cat << '_EOF_' > mm9.ensGene.ra
# required db variable
db mm9
# optional liftRandoms yes/no or absent
liftRandoms yes
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 mm9.ensGene.ra
    featureBits mm9 ensGene
    # 62429979 bases of 2620346127 (2.383%) in intersection
############################################################################
############################################################################
#  ornAna1 - Platypus - Ensembl Genes (DONE - 2008-02-26 - hiram)
    ssh kkstore05
    cd /cluster/data/ornAna1
    cat << '_EOF_' > ornAna1.ensGene.ra
# required db variable
db ornAna1
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^\(X[0-9]\)/chr\1/; s/^MT/chrM/"
# ignore genes that do not properly convert to a gene pred, and contig
#	names that are not in the UCSC assembly
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 ornAna1.ensGene.ra
    featureBits ornAna1 ensGene
    # 24477086 bases of 1842236818 (1.329%) in intersection
############################################################################
############################################################################
#  oryCun1 - Rabbit - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore04
    cd /cluster/data/oryCun1
    cat << '_EOF_' > oryCun1.ensGene.ra
# required db variable
db oryCun1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# ignore genes that do not properly convert to a gene pred, and contig
#	names that are not in the UCSC assembly
skipInvalid yes
# ignore the three genes that have invalid structures from Ensembl:
# ENSOCUT00000004627 no exonFrame on CDS exon 3
# ENSOCUT00000009485 no exonFrame on CDS exon 9
# ENSOCUT00000014840 no exonFrame on CDS exon 3
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 oryCun1.ensGene.ra
    featureBits oryCun1 ensGene
    # 22763228 bases of 2076044328 (1.096%) in intersection
############################################################################
############################################################################
#  oryLat1 - Medaka - Ensembl Genes (DONE - 2008-03-03 - hiram)
    ssh kkstore04
    cd /cluster/data/oryLat1
    cat << '_EOF_' > oryLat1.ensGene.ra
# required db variable
db oryLat1
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9][0-9]*\)/chr\1/; s/^MT/chrM/"
# ignore 66 genes that haven't lifted properly yet
skipInvalid yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 oryLat1.ensGene.ra
    featureBits oryLat1 ensGene
    # 31757387 bases of 700386597 (4.534%) in intersection
############################################################################
############################################################################
#  otoGar1 - Bushbaby - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore05
    cd /cluster/data/otoGar1
    cat << '_EOF_' > otoGar1.ensGene.ra
# required db variable
db otoGar1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/otoGar1/jkStuff/ensGene.lft
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 otoGar1.ensGene.ra
    featureBits otoGar1 ensGene
    # 23498471 bases of 1969052059 (1.193%) in intersection
############################################################################
############################################################################
#  panTro2 - Chimp - Ensembl Genes (DONE - 2008-02-26 - hiram)
    ssh kkstore04
    cd /cluster/data/panTro2
    cat << '_EOF_' > panTro2.ensGene.ra
# required db variable
db panTro2
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 panTro2.ensGene.ra
    featureBits panTro2 ensGene
    # 51969416 bases of 2909485072 (1.786%) in intersection
############################################################################
############################################################################
#  rheMac2 - Rhesus - Ensembl Genes (DONE - 2008-02-28 - hiram)
    ssh kkstore01
    cd /cluster/data/rheMac2
    cat << '_EOF_' > rheMac2.ensGene.ra
# required db variable
db rheMac2
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "/^109[0-9]*/d; /^MT/d; s/^\([0-9XY][0-9]*\)/chr\1/;"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 rheMac2.ensGene.ra
    featureBits rheMac2 ensGene
    # 44305902 bases of 2646704109 (1.674%) in intersection
############################################################################
############################################################################
#  rn4 - Rat - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore06
    cd /cluster/data/rn4
    cat << '_EOF_' > rn4.ensGene.ra
# required db variable
db rn4
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/"
# optionally update the knownToEnsembl table after ensGene updated
knownToEnsembl yes
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 rn4.ensGene.ra
    featureBits rn4 ensGene
    # 44218002 bases of 2571531505 (1.720%) in intersection
############################################################################
############################################################################
#  sacCer1 - S. cerevisiae - Ensembl Genes (DONE - 2008-03-03 - hiram)
    ssh kkstore03
    cd /cluster/data/sacCer1
    cat << '_EOF_' > sacCer1.ensGene.ra
# required db variable
db sacCer1
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^VIII/chr8/; s/^VII/chr7/; s/^VI/chr6/; s/^V/chr5/; s/^XIII/chr13/; s/^XII/chr12/; s/^XIV/chr14/; s/^XI/chr11/; s/^XVI/chr16/; s/^XV/chr15/; s/^X/chr10/; s/^III/chr3/; s/^IV/chr4/; s/^II/chr2/; s/^IX/chr9/; s/^I/chr1/; s/^MT/chrM/; /2-micron/d"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 sacCer1.ensGene.ra
    featureBits sacCer1 ensGene
    # table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
table gap doesn't exist
8908962 bases of 12156302 (73.287%) in intersection
############################################################################
############################################################################
#  tetNig1 - Tetraodon - Ensembl Genes (DONE - 2008-03-03 - hiram)
    ssh kkstore03
    cd /cluster/data/tetNig1
    cat << '_EOF_' > tetNig1.ensGene.ra
# required db variable
db tetNig1
# optional nameTranslation, the sed command that will transform
#	Ensemble names to UCSC names.  With quotes just to make sure.
nameTranslation "s/^\([0-9XY][0-9]*\)/chr\1/; s/^MT/chrM/; s/^Un/chrUn/"
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 tetNig1.ensGene.ra
    featureBits tetNig1 ensGene
    # 37871392 bases of 342403326 (11.060%) in intersection
############################################################################
############################################################################
#  tupBel1 - TreeShrew - Ensembl Genes (DONE - 2008-02-29 - hiram)
    ssh kkstore05
    cd /cluster/data/tupBel1
    cat << '_EOF_' > tupBel1.ensGene.ra
# required db variable
db tupBel1
# do we need to translate geneScaffold coordinates
geneScaffolds yes
# after geneScaffold conversions, change Ensembl chrom names to UCSC names
liftUp /cluster/data/tupBel1/jkStuff/ensGene.lft
# ignore genes that do not properly convert to a gene pred, and contig
#	names that are not in the UCSC assembly
skipInvalid yes
# ignore the two genes that have invalid structures from Ensembl:
# ENSTBET00000015831 no exonFrame on CDS exon 11
# ENSTBET00000013522 no exonFrame on CDS exon 1
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 tupBel1.ensGene.ra
    featureBits tupBel1 ensGene
    # 22740204 bases of 2137225476 (1.064%) in intersection
############################################################################
############################################################################
#  xenTro2 - X. tropicalis - Ensembl Genes (DONE - 2008-03-03 - hiram)
    ssh kkstore04
    cd /cluster/data/xenTro2
    cat << '_EOF_' > xenTro2.ensGene.ra
# required db variable
db xenTro2
'_EOF_'
#  << happy emacs

    doEnsGeneUpdate.pl -ensVersion=48 xenTro2.ensGene.ra
    featureBits xenTro2 ensGene
    # 29163250 bases of 1359412157 (2.145%) in intersection
############################################################################
############################################################################


############################################################################
#  Archive of Robert's procedure to update the Ensembl gene tracks
############################################################################
#load ensembl gene predictions (build 43) and related tables by downloading from ensembl ftp site (Robert Mar 14,2007)
#following tables are loaded
#ensGene ensGeneChk ensGeneChkDetails ensGeneXref ensGtp ensInfo ensPseudo superfamily sfDescription knownToEnsembl
#scripts used:
#hgLoadEnsembl - main driver script that calls ensemblDownload, ensemblDbImport and loadEnsembl
#ensemblDownload - downloads data from ensembl ftp site to directory, creates tables.tmp containing list of ensembl tables to be loaded
#ensemblDbImport - loads tables into temporary database using the native ensembl mysql table structure 
#exportEnsembl - creates genePred files from the ensembl temporary database 
#geneCheckAndLoad - runs gene-check on a genePred and loads the two details tables into the database.
#ensemblSuperfamily -  load superfamily track using Ensembl cross reference
#loadEnsembl - loads data created by exportEnsembl into ensGene, creates and loads ensInfo table with attributes
#ensemblGetAll - generates script to load all ensembl builds, requires manual editting to add ucsc database
#ensGeneToGenePred - awk script called by loadEnsembl that converts dump of ensembl exons to genePred format
#mkRandomNTLift - read ctgPos table and make lift file
cd ~/kent/src/hg/makeDb/outside/ensembl
make
mkdir -p /cluster/store8/ensembl/run.build43
cd /cluster/store8/ensembl/run.build43
mkRandomNTLift hg18 > lift.hg18
mkRandomNTLift mm8 > lift.mm8
hgLoadEnsembl -l /cluster/store8/ensembl/run.build43/lift.hg18 homo_sapiens core_43_36e /cluster/store8/ensembl/homo_sapiens_43_36e hg18
hgLoadEnsembl -l /cluster/store8/ensembl/run.build43/lift.mm8 mus_musculus core_43_36d /cluster/store8/ensembl/mus_musculus_43_36d mm8
hgLoadEnsembl rattus_norvegicus core_43_34m /cluster/store8/ensembl/rattus_norvegicus_43_34m rn4
hgLoadEnsembl pan_troglodytes core_43_21b /cluster/store8/ensembl/pan_troglodytes_43_21b panTro2
hgLoadEnsembl canis_familiaris core_43_2a /cluster/store8/ensembl/canis_familiaris_43_2a canFam2
#hgLoadEnsembl danio_rerio core_43_6d /cluster/store8/ensembl/danio_rerio_43_6d danRer4
