# for emacs: -*- mode: sh; -*-


# Macaca Mulatta  (rheMac1)
# Mmul_0.1(January 10, 2005)
# contigs and scaffolds, no chromosomes
# Mmul_0.1 is a preliminary assembly of the rhesus monkey, Macaca
# mulatta using whole genome shotgun (WGS) reads from small and medium
# insert clones.
# ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/
# anonymous ftp, no login/password needed

# DOWNLOAD SEQUENCE (DONE March 9, 2005 Robert)
    ssh kksilo
    mkdir /cluster/store10/rheMac1
    cd /cluster/data
    ln -s /cluster/store10/rheMac1 rheMac1
    cd /cluster/data/rheMac1
    mkdir downloads
    cd downloads
   mkdir contigs
    cd contigs
    wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/contigs/Mmul20041001.agp
    wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/contigs/Mmul20041001.fa.qual.gz
    wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/contigs/Mmul20041001.fa.gz
    cd ..

    mkdir linearScaffolds
    cd linearScaffolds
    wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/linearScaffolds/Rmac20041001-freeze-assembly.fa.gz
    wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/linearScaffolds/Rmac20041001-freeze-assembly.fa.qual.gz
    cd ..

    mkdir repeat-reads
    cd repeat-reads
    wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/repeats/Mmul20041001.reptigs.fa.gz
    wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/repeats/Mmul20041001.reptigs.fa.qual.gz
    cd ..

# GUNZIP
  gunzip */*.gz

# CHECK FILES

# Check that IDs in .fa match IDs in .qual

  ssh kksilo
  cd /cluster/data/rheMac1/downloads
  # foreach Bin0 contigs linearScaffolds repeat-reads
  grep ">" *.fa > id.fa
  grep ">" *.qual > id.qual
  diff id.fa id.qual

  /cluster/bin/i386/checkAgpAndFa contigs/Mmul20041001.agp linearScaffolds/Rmac20041001-freeze-assembly.fa >& check.out

# SPLIT SCAFFOLDS 

  ssh kksilo
  cd /cluster/data/rheMac1
  mkdir scaffolds
  nice /cluster/bin/i386/faSplit byname downloads/linearScaffolds/Rmac20041001-freeze-assembly.fa scaffolds/ -outDirDepth=3
  # generate list
  find scaffolds -print > scaffolds.list.0
  grep fa scaffolds.list.0 > scaffolds.list

# MAKE 2BIT NIB FILE 

  ssh kksilo
  cd /cluster/data/rheMac1/downloads/linearScaffolds
  nice /cluster/bin/i386/faToTwoBit Rmac20041001-freeze-assembly.fa rheMac1.2bit
  nice /cluster/bin/i386/twoBitToFa rheMac1.2bit check.fa
  mv rheMac1.2bit ../..
  diff -q Rmac20041001-freeze-assembly.fa check.fa
  # could also use cmp
  # note: size match
  ssh hgwdev
  mkdir /gbdb/rheMac1
  mkdir /gbdb/rheMac1/nib
  # could get rid of the nib in this path, 2bit isn't a nib (4bit)
  ln -s /cluster/data/rheMac1/rheMac1.2bit /gbdb/rheMac1/nib

# CREATE DATABASE (DONE March 9, 2005 Robert)

  ssh hgwdev
  hgsql hg17
  create database rheMac1;
  use rheMac1;
  create table grp (PRIMARY KEY(NAME)) select * from hg17.grp;
  # add rows to hgcentraltest on hgwbeta in dbDb and defaultDb
  # set orderKey past dog, before chimp

# CREATE GAP TABLE

  ssh hgwdev
  cd kent/src/hg/lib
  # remove bin column for now
  hgsql rheMac1 < gap2.sql
  cd /cluster/data/rheMac1/downloads/contigs
  grep fragment Mmul20041001.agp > fragment.txt
  hgsql rheMac1
  load data local infile 'fragment.txt' into table gap
  # create trackDb/monkey/rheMac1/gap.html

# CREATE CONTIG TABLE 

  ssh hgwdev
  cd kent/src/hg/lib
  # this doesn't include strand or contig start/end
  hgsql rheMac1 < ctgPos2.sql
  cd /cluster/data/rheMac1/downloads/contigs
  grep Contig Mmul20041001.agp > Contig.out
  ssh kksilo
  cd /cluster/data/rheMac1/downloads/contigs
#where is getContig.pl??
#  getContig.pl < Contig.out > ctgPos2
#  ssh hgwdev
#  load data local infile 'ctgPos2' into table ctgPos2

# CREATE CHROMINFO TABLE 
# an improvement here would be to have getMaxCoord output the 2bit file name
  ssh hgwdev
  cd /cluster/data/rheMac1
  # get coordinates from agp and put in database table
  getcoords.pl < downloads/contigs/Mmul20041001.agp > scaffolds.coords
  hgsql rheMac1 < /cluster/data/bosTau1/coords.sql
  load data local infile 'scaffolds.coords' into table scaffoldCoords
  faSize downloads/linearScaffolds/Rmac20041001-freeze-assembly.fa -detailed=on > chrom.sizes

  # calculate maximum coords; check that start coord is always 1
##  /cluster/home/heather/bin/i386/GetMaxCoord > getMaxCoord.rheMac1

   edit chromInfo.sql; allow fileName to be null
   cp ~baertsch/kent/src/hg/lib/chromInfo.sql .
   hgsql rheMac1 < chromInfo.sql
   load data local infile 'chrom.sizes' into table chromInfo
   update chromInfo set fileName = "/gbdb/rheMac1/nib/rheMac1.2bit"

# LOAD GAP & GOLD TABLES FROM AGP 
    ssh hgwdev
    hgGoldGapGl -noGl rheMac1 /cluster/data/rheMac1/downloads/contigs/Mmul20041001.agp 

# REPEATMASKER 
# using the split scaffold fa files generated earlier

# note the version of RepeatMasker in use; will want this for download README
  cat /cluster/bluearc/RepeatMasker/Libraries/version
  # RepBase Update 9.04, RM database version 20040702

# do a trial run
  ssh hgwdev
  cd /cluster/data/rheMac1/scaffolds/0/0/0
  /cluster/bluearc/RepeatMasker/RepeatMasker -ali -s -spec macaca SCAFFOLD1000.fa
# configure
  cd /cluster/data/rheMac1
  mkdir jkStuff
  cp /cluster/data/anoGam1/jkStuff/RMAnopheles jkStuff/RMRhesus
  # change references anoGam1 --> rheMac1
  mkdir RMRun

  # /bin/csh makeJoblist-RM.csh

  cd scaffolds
  foreach i (0 1 2 3 4 5 6 7 8 9)
    cd $i
    foreach j (0 1 2 3 4 5 6 7 8 9)
      cd $j
      foreach k (0 1 2 3 4 5 6 7 8 9)
        cd $k
        foreach f (*.fa)
          echo /cluster/data/rheMac1/jkStuff/RMRhesus \
	       /cluster/data/rheMac1/scaffolds/$i/$j/$k $f \
	      '{'check out line+ /cluster/data/rheMac1/scaffolds/$i/$j/$k/$f.out'}' \
	  >> /cluster/data/rheMac1/RMRun/RMJobs.2
	end
      cd ..
      end
    cd ..
    end
  cd ..
  end

  # do the run  (DONE March 11, 2005 Robert)
  ssh kk
  cd /cluster/data/rheMac1/RMRun
  para create RMJobs.2
  para try
  para push

  # concatenate into one output file; took about 90 minutes
  ssh kksilo
  cd /cluster/data/rheMac1
  mkdir repeats
  /bin/tcsh concatRM.csh
  cd repeats
  # use grep -v to get rid of all the headers
  # then, add back in an initial header
  # this is so hgLoadOut is happy
  grep SCAFFOLD repeats.all > repeats.clean
  cat repeats.header repeats.clean > repeats.final
  grep "There were no repetitive sequences found" repeats.final > repeats.notfound
  grep -v "There were no repetitive sequences found" repeats.final > repeats.out
  ssh hgwdev
  hgLoadOut rheMac1 repeats.out
  # Strange perc. fields:
Processing repeats.out
Strange perc. field -0.1 line 647418 of repeats.out
Strange perc. field -20.5 line 972234 of repeats.out
Strange perc. field -42.4 line 972234 of repeats.out
Strange perc. field -0.6 line 972234 of repeats.out
Strange perc. field -6.1 line 1121046 of repeats.out
Strange perc. field -1.2 line 1349524 of repeats.out
Strange perc. field -4.5 line 1418655 of repeats.out
Strange perc. field -0.5 line 1527820 of repeats.out
Strange perc. field -4.6 line 1826806 of repeats.out
Strange perc. field -1.2 line 1826806 of repeats.out
Strange perc. field -0.3 line 1992385 of repeats.out
Strange perc. field -5.6 line 2168548 of repeats.out
Strange perc. field -89.9 line 2783366 of repeats.out
Strange perc. field -18.2 line 2783366 of repeats.out
Strange perc. field -72.8 line 2783366 of repeats.out
Strange perc. field -1.0 line 2892969 of repeats.out
Strange perc. field -2.7 line 2892969 of repeats.out
Strange perc. field -5.5 line 2963499 of repeats.out
Strange perc. field -28.4 line 3041043 of repeats.out
Strange perc. field -9.1 line 3041043 of repeats.out
Strange perc. field -0.4 line 3167171 of repeats.out
Strange perc. field -0.2 line 3167171 of repeats.out
Strange perc. field -0.5 line 3467859 of repeats.out
Strange perc. field -0.1 line 3487882 of repeats.out
Strange perc. field -4.6 line 3637051 of repeats.out
Strange perc. field -1.2 line 3637051 of repeats.out
Strange perc. field -2.1 line 4155697 of repeats.out
Strange perc. field -2.4 line 4155697 of repeats.out
Strange perc. field -3.5 line 4423839 of repeats.out
Strange perc. field -0.6 line 4781433 of repeats.out
note: 883 records dropped due to repStart > repEnd
  hgsql rheMac1
  rename table repeats_rmsk to rmsk

  # select count(*) from rmsk;
  # select count(*) from rmsk where repName like "Bov%";

# SIMPLE REPEATS 
# put the results throughout the scaffold 0/0/0 directories,
# same as RepeatMasker, to avoid too many files in the same directory
  ssh kksilo
  cd /cluster/data/rheMac1
  mkdir bed
  cd bed
  mkdir simpleRepeat
  cd simpleRepeat

  /bin/csh makeJoblist-trf.csh
  tcsh trf-run.csh > & ! trf.log &

  # concatenate into one output file; took about an hour
  /bin/tcsh concatTRF.csh

  # load
  /cluster/bin/i386/hgLoadBed rheMac1 simpleRepeat trf.bed \
    -sqlTable = /cluster/home/heather/kent/src/hg/lib/simpleRepeat.sql

#Reading trf.bed
#Loaded 628275 elements of size 16
#Sorted
#Saving bed.tab
#Loading rheMac1

  create index chrom_2 on simpleRepeat (chrom(16), chromStart);
  create index chrom_3 on simpleRepeat (chrom(16), chromEnd);


# CREATE MASKED FA USING REPEATMASKER AND FILTERED TRF FILES 

  ssh kksilo
  cd /cluster/data/rheMac1
  /cluster/bin/i386/maskOutFa -soft downloads/linearScaffolds/Rmac20041001-freeze-assembly.fa repeats/repeat.out rheMac1.softmask.fa
WARNING: negative rEnd: -364 SCAFFOLD25002:50185-50335 L1MCc
WARNING: negative rEnd: -17 SCAFFOLD65002:22962-23303 L1MCc
WARNING: negative rEnd: -168 SCAFFOLD15011:216786-217104 L1MEd
WARNING: negative rEnd: -63 SCAFFOLD15011:218103-218302 L1MEd
WARNING: negative rEnd: -58 SCAFFOLD30017:67006-67483 L1ME3A
WARNING: negative rEnd: -184 SCAFFOLD70018:124976-125294 L1MDa
WARNING: negative rEnd: -243 SCAFFOLD30019:12475-12710 L1MC
WARNING: negative rEnd: -559 SCAFFOLD90026:6083-6297 L1M3e
WARNING: negative rEnd: -772 SCAFFOLD65027:182872-183028 L1ME2
WARNING: negative rEnd: -38 SCAFFOLD50029:73156-74338 L1MEd
WARNING: negative rEnd: -206 SCAFFOLD125032:291298-291342 L1PB2
WARNING: negative rEnd: -426 SCAFFOLD80032:171209-171503 L1M2c
WARNING: negative rEnd: -221 SCAFFOLD65033:142196-142540 L1MCc
WARNING: negative rEnd: -189 SCAFFOLD80035:127722-128125 L1MCc
WARNING: negative rEnd: -37 SCAFFOLD91036:35051-35595 L1M4b
WARNING: negative rEnd: -1042 SCAFFOLD61039:14634-14836 L1MEb
WARNING: negative rEnd: -610 SCAFFOLD100041:45441-45539 L1P4d
WARNING: negative rEnd: -12 SCAFFOLD61046:8669-8902 L1M4b
WARNING: negative rEnd: -3 SCAFFOLD54:56518-56566 FRAM
WARNING: negative rEnd: -161 SCAFFOLD100056:7086-7696 L1M2a
WARNING: negative rEnd: -1175 SCAFFOLD75057:117403-117585 L1MEc
WARNING: negative rEnd: -195 SCAFFOLD75057:118016-118986 L1MEc
WARNING: negative rEnd: -95 SCAFFOLD70059:42627-42663 L1M3b
WARNING: negative rEnd: -143 SCAFFOLD80068:10427-10703 L1MCc
WARNING: negative rEnd: -32 SCAFFOLD15069:109411-109499 L1PA10
WARNING: negative rEnd: -465 SCAFFOLD95070:93652-93843 L1PBa
WARNING: negative rEnd: -149 SCAFFOLD50071:46224-46441 L1MCc
WARNING: negative rEnd: -9 SCAFFOLD120072:35714-35752 AluJ/FLAM
WARNING: negative rEnd: -121 SCAFFOLD40075:175596-175802 L1M4b
WARNING: negative rEnd: -3 SCAFFOLD105090:247522-247572 FRAM
WARNING: negative rEnd: -751 SCAFFOLD55093:83484-83644 L1MD2
WARNING: negative rEnd: -448 SCAFFOLD96093:5787-6077 L1M2
WARNING: negative rEnd: -259 SCAFFOLD80094:21315-21842 L1MA4A
WARNING: negative rEnd: -17 SCAFFOLD45096:62501-62604 L1MCb
WARNING: negative rEnd: -465 SCAFFOLD110103:97060-97263 L1PA15-16
WARNING: negative rEnd: -241 SCAFFOLD45103:66136-66966 L1M4b
WARNING: negative rEnd: -50 SCAFFOLD45103:67253-67435 L1M4b
WARNING: negative rEnd: -1116 SCAFFOLD100106:180437-180562 L1MEb
WARNING: negative rEnd: -144 SCAFFOLD60110:170536-170650 L1MCc
WARNING: negative rEnd: -421 SCAFFOLD25112:16796-16975 L1ME1
WARNING: negative rEnd: -273 SCAFFOLD45112:69878-69957 L1M3de
WARNING: negative rEnd: -112 SCAFFOLD45112:70274-70337 L1M3de
WARNING: negative rEnd: -161 SCAFFOLD45112:70642-70730 L1M3de
WARNING: negative rEnd: -1056 SCAFFOLD90113:3554-3834 L1MEc
WARNING: negative rEnd: -277 SCAFFOLD65123:11579-12006 L1ME3A
WARNING: negative rEnd: -49 SCAFFOLD25127:16121-16503 L1MDa
WARNING: negative rEnd: -27 SCAFFOLD25127:16811-16842 L1MDa
WARNING: negative rEnd: -182 SCAFFOLD25140:786-1026 L1MEc
WARNING: negative rEnd: -134 SCAFFOLD11150:14559-14793 L1MEd
WARNING: negative rEnd: -198 SCAFFOLD5150:89989-90045 L1M4
WARNING: negative rEnd: -131 SCAFFOLD5150:90409-90468 L1M4
WARNING: negative rEnd: -3 SCAFFOLD5150:91247-91362 L1M4
WARNING: negative rEnd: -11 SCAFFOLD40153:105943-106073 MLT2B4
WARNING: negative rEnd: -31 SCAFFOLD85154:75958-75978 FRAM
WARNING: negative rEnd: -728 SCAFFOLD56155:5878-6313 L1MEb
WARNING: negative rEnd: -639 SCAFFOLD56155:6492-6680 L1MEb
WARNING: negative rEnd: -26 SCAFFOLD100160:1-587 L1MB8
WARNING: negative rEnd: -202 SCAFFOLD105165:51915-52062 L1M3de
WARNING: negative rEnd: -111 SCAFFOLD6165:3223-3363 L1M4
WARNING: negative rEnd: -760 SCAFFOLD40168:10976-11421 L1MEa
WARNING: negative rEnd: -698 SCAFFOLD40168:11433-11589 L1MEa
WARNING: negative rEnd: -14 SCAFFOLD75169:54846-54883 FRAM
WARNING: negative rEnd: -92 SCAFFOLD105171:55570-55642 MLT2B4
WARNING: negative rEnd: -274 SCAFFOLD50189:22507-22737 L1MEb
WARNING: negative rEnd: -231 SCAFFOLD40196:85003-85368 L1MCc
WARNING: negative rEnd: -136 SCAFFOLD40196:85393-85520 L1MCc
WARNING: negative rEnd: -12 SCAFFOLD70199:80341-80456 L1ME3A
WARNING: negative rEnd: -677 SCAFFOLD100203:35059-35219 L1MCc
WARNING: negative rEnd: -170 SCAFFOLD35203:54829-55008 L1MB3
WARNING: negative rEnd: -1 SCAFFOLD50207:100221-100236 Alu
WARNING: negative rEnd: -343 SCAFFOLD126214:34627-34745 L1P4d
WARNING: negative rEnd: -1956 SCAFFOLD40225:109594-109802 L1MB8
WARNING: negative rEnd: -1682 SCAFFOLD40225:109813-110031 L1MB8
WARNING: negative rEnd: -419 SCAFFOLD20230:16903-17089 L1MCc
WARNING: negative rEnd: -97 SCAFFOLD75231:22726-23168 L1MCc
WARNING: negative rEnd: -302 SCAFFOLD120233:107608-107705 L1M1
WARNING: negative rEnd: -124 SCAFFOLD100239:46223-46431 L1MCc
WARNING: negative rEnd: -63 SCAFFOLD41244:21184-21368 L1ME1
WARNING: negative rEnd: -153 SCAFFOLD80248:45221-45315 L1M4b
WARNING: negative rEnd: -9 SCAFFOLD110249:100831-100895 L1M3de
WARNING: negative rEnd: -57 SCAFFOLD10251:25507-25787 L1MB8
WARNING: negative rEnd: -643 SCAFFOLD77259:860-1045 L1MCc
WARNING: negative rEnd: -498 SCAFFOLD76268:14604-14797 L1MEb
WARNING: negative rEnd: -258 SCAFFOLD76268:14809-14996 L1MEb
WARNING: negative rEnd: -4 SCAFFOLD90269:54122-54199 MLT2B4
WARNING: negative rEnd: -365 SCAFFOLD7273:15-191 L1MCc
WARNING: negative rEnd: -228 SCAFFOLD50275:11636-12058 L1PBa
WARNING: negative rEnd: -751 SCAFFOLD20279:104338-104383 L1M4b
WARNING: negative rEnd: -226 SCAFFOLD30287:7366-7781 L1PBa
WARNING: negative rEnd: -1039 SCAFFOLD5293:72196-72400 L1MEb
WARNING: negative rEnd: -603 SCAFFOLD10304:114127-114271 L1M5
WARNING: negative rEnd: -160 SCAFFOLD10304:114363-114560 L1M5
WARNING: negative rEnd: -92 SCAFFOLD10304:114575-114740 L1M5
WARNING: negative rEnd: -901 SCAFFOLD10327:91532-91805 L1MEd
WARNING: negative rEnd: -564 SCAFFOLD10327:92262-92572 L1MEd
WARNING: negative rEnd: -308 SCAFFOLD55348:27043-27419 L1M4b
WARNING: negative rEnd: -1930 SCAFFOLD100349:55726-55876 L1M4b
WARNING: negative rEnd: -175 SCAFFOLD100349:56278-57031 L1M4b
WARNING: negative rEnd: -100 SCAFFOLD100349:63947-64039 L1M4b
WARNING: negative rEnd: -266 SCAFFOLD80349:48410-48682 L1MEd
WARNING: negative rEnd: -145 SCAFFOLD121350:10964-11015 L1MDa
WARNING: negative rEnd: -53 SCAFFOLD75350:47481-47572 L1ME1
WARNING: negative rEnd: -636 SCAFFOLD10375:46541-46860 L1M4b
WARNING: negative rEnd: -87 SCAFFOLD40382:8244-8802 L1MEe
WARNING: negative rEnd: -28 SCAFFOLD45384:49442-49558 MLT2B4
WARNING: negative rEnd: -552 SCAFFOLD80392:104844-105116 L1MEd
WARNING: negative rEnd: -427 SCAFFOLD80392:109481-109601 L1MEd
WARNING: negative rEnd: -939 SCAFFOLD60398:64940-65310 L1M4
WARNING: negative rEnd: -7 SCAFFOLD76399:14698-15345 L1M4c
WARNING: negative rEnd: -309 SCAFFOLD100411:46163-46198 L1MEd
WARNING: negative rEnd: -152 SCAFFOLD100411:46508-46659 L1MEd
WARNING: negative rEnd: -116 SCAFFOLD11422:8740-9284 L1M3c
WARNING: negative rEnd: -147 SCAFFOLD80423:74087-74325 L1MEd
WARNING: negative rEnd: -606 SCAFFOLD50470:9112-9341 L1P4b
WARNING: negative rEnd: -420 SCAFFOLD25477:86802-87608 L1MEc
WARNING: negative rEnd: -2 SCAFFOLD25477:87814-88211 L1MEc
WARNING: negative rEnd: -24 SCAFFOLD50494:29845-29965 MLT2B4
WARNING: negative rEnd: -1030 SCAFFOLD15495:103193-103375 L1ME3A
WARNING: negative rEnd: -392 SCAFFOLD15495:104580-104749 L1ME3A
WARNING: negative rEnd: -148 SCAFFOLD15495:105065-105297 L1ME3A
WARNING: negative rEnd: -46 SCAFFOLD110523:36392-36574 HAL1
WARNING: negative rEnd: -378 SCAFFOLD110523:37954-38837 HAL1
WARNING: negative rEnd: -184 SCAFFOLD40529:9254-9330 L1M4
WARNING: negative rEnd: -9 SCAFFOLD95544:42116-42338 L1M4
WARNING: negative rEnd: -1997 SCAFFOLD30557:64701-64869 L1M4b
WARNING: negative rEnd: -739 SCAFFOLD30557:65181-66324 L1M4b
WARNING: negative rEnd: -681 SCAFFOLD30557:66636-66693 L1M4b
WARNING: negative rEnd: -6 SCAFFOLD105562:14442-14491 FRAM
WARNING: negative rEnd: -9 SCAFFOLD96566:4841-4880 FRAM
WARNING: negative rEnd: -250 SCAFFOLD65573:63410-63539 AluJo_short_
WARNING: negative rEnd: -201 SCAFFOLD576:16208-16578 L1M4b
WARNING: negative rEnd: -6 SCAFFOLD75578:54511-54560 FRAM
WARNING: negative rEnd: -104 SCAFFOLD50626:58371-58412 MLT2B4
WARNING: negative rEnd: -517 SCAFFOLD124648:98-302 L1M3e
WARNING: negative rEnd: -278 SCAFFOLD20658:33043-33220 L1P4b
WARNING: negative rEnd: -420 SCAFFOLD660:27740-27925 L1MCc
WARNING: negative rEnd: -444 SCAFFOLD660:28264-28359 L1MCc
WARNING: negative rEnd: -72 SCAFFOLD20676:75256-75325 MLT2B4
WARNING: negative rEnd: -165 SCAFFOLD100679:57845-58265 L1PA13
WARNING: negative rEnd: -281 SCAFFOLD10695:66679-66826 L1MDa
WARNING: negative rEnd: -147 SCAFFOLD50701:26891-27410 L1M3c
WARNING: negative rEnd: -335 SCAFFOLD60709:63984-64229 L1MCc
WARNING: negative rEnd: -46 SCAFFOLD80722:29210-29650 L1M4
WARNING: negative rEnd: -1477 SCAFFOLD25737:40798-40921 L1MD2
WARNING: negative rEnd: -629 SCAFFOLD25737:40904-41337 L1MD2
WARNING: negative rEnd: -271 SCAFFOLD25737:47745-47952 L1MD2
WARNING: negative rEnd: -38 SCAFFOLD80773:51525-52241 L1MEd
WARNING: negative rEnd: -751 SCAFFOLD80773:52278-52545 L1MEd
WARNING: negative rEnd: -112 SCAFFOLD87802:479-871 L1MCc
WARNING: negative rEnd: -15 SCAFFOLD71803:28509-28547 MLT2B4
WARNING: negative rEnd: -259 SCAFFOLD60807:87176-87306 L1MCc
WARNING: negative rEnd: -24 SCAFFOLD60839:5191-5420 L1M4b
WARNING: negative rEnd: -115 SCAFFOLD75849:14545-14683 L1M4
WARNING: negative rEnd: -491 SCAFFOLD5875:11189-11235 L1MEb
WARNING: negative rEnd: -321 SCAFFOLD110959:5414-5484 L1MCc
  # matches select count(*) from rmsk where repEnd < 0;

  /cluster/bin/i386/maskOutFa -softAdd rheMac1.softmask.fa bed/simpleRepeat/trf.bed rheMac1.softmask2.fa

  # hard masking (Ns instead of lower case) for download files
  # split for use by genscan
  /cluster/bin/i386/maskOutFa rheMac1.softmask2.fa hard rheMac1.hardmask.fa
  mkdir hardmask-split
  /cluster/bin/i386/faSplit about rheMac1.hardmask.fa 2000000 hardmask-split

# DOWNLOAD FILES

  ssh hgwdev
  cd /cluster/data/rheMac1
  cp rheMac1.softmask.fa /usr/local/apache/htdocs/goldenPath/rheMac1/bigZips
  cp rheMac1.softmask2.fa /usr/local/apache/htdocs/goldenPath/rheMac1/bigZips
  cp rheMac1.hardmask.fa /usr/local/apache/htdocs/goldenPath/rheMac1/bigZips

  cd /usr/local/apache/htdocs/goldenPath/rheMac1/bigZips
  nice gzip rheMac1.softmask.fa
  nice gzip rheMac1.softmask2.fa
  nice gzip rheMac1.hardmask.fa

# REGENERATE 2BIT NIB 
  ssh kksilo
  cd /cluster/data/rheMac1
  /cluster/bin/i386/faToTwoBit rheMac1.softmask2.fa rheMac1.softmask.2bit
  /cluster/bin/i386/twoBitToFa rheMac1.softmask.2bit check.softmask.fa
  diff -q rheMac1.softmask2.fa check.softmask.fa
  mv rheMac1.2bit rheMac1.2bit.unmasked
  mv rheMac1.softmask.2bit rheMac1.2bit
  # note: size match

# GENERATE split masked fas for blastz 
  ssh kkr1u00
  mkdir -p /iscratch/i/rheMac1/nib
  cd /cluster/data/rheMac1
  cp /cluster/data/rheMac1/rheMac1.2bit /iscratch/i/rheMac1/nib/

  #mkdir -p /iscratch/i/rheMac1/splitFas
  #/cluster/bin/i386/faSplit sequence /cluster/data/rheMac1/rheMac1.softmask2.fa 350 /iscratch/i/rheMac1/splitDir/
  cd /iscratch/i/rheMac1
  # check that I got all scaffolds
  #grep SCAFFOLD *.fa > SCAFFOLD.list
  # sync
  /cluster/bin/scripts/iSync

ssh kksilo
cd /cluster/data/hg17/bed/blastz.rheMac1
# alignments (DONE - March 16 - Robert)
# edit doBlastzChainNet.pl to add lines for hg17 glitch
# in function getDbFromPath
#  if ($db eq "hg") {
#      $db = "hg17";
#  }
doBlastzChainNet.pl DEF -blastzOutRoot /panasas/store/rheMacOut/ >& do.log &
# make blat ooc (DONE - March 17 - Robert)
  ssh kkr1u00
  mkdir /cluster/data/rheMac1/bed/ooc
  cd /cluster/data/rheMac1/bed/ooc
  ls -1 /cluster/data/rheMac1/rheMac1.2bit > nib.lst
  /cluster/bin/i386/blat nib.lst /dev/null /dev/null -tileSize=11 \
    -makeOoc=11.ooc -repMatch=800
Writing 11.ooc
Wrote 38031 overused 11-mers to 11.ooc
Done making 11.ooc
  cp -p 11.ooc /iscratch/i/rheMac1/
  iSync

# SWAP CHAINS FROM HG17, BUILD NETS ETC.   (DONE  3/31/2005  Robert)
    ssh eieio
    mkdir /cluster/data/rheMac1/bed/blastz.hg17.swap
    cd /cluster/data/rheMac1/bed/blastz.hg17.swap
# use special version of doBlastzChainNet.pl to handle hg17 hack
# must run on kolossus , due to memory requirements.
    nohup /cluster/data/hg17/bed/blastz.rheMac1/doBlastzChainNet.pl -swap /cluster/data/hg17/bed/blastz.rheMac1/DEF   \
       -workhorse=kolossus >& do.log &
    tail -f do.log
    # Add {chain,net}hg17 to trackDb.ra if necessary.
    # Add /usr/local/apache/htdocs/goldenPath/rheMac1/vsHg17/README.txt

# hgLoadChain ran out of memory , split chains
cd /cluster/data/rheMac1/bed/blastz.hg17.swap/axtChain
mkdir split
chainSplit split rheMac1.hg17.all.chain.gz -q &
# filter chains score < 5000
mkdir splitFilter
for i in `ls split` ; do echo $i ;chainFilter split/$i -minScore=5000 > splitFilter/$i ; done
rm -rf split
#assumes empty chain tables exist
for i in `ls splitFilter` ; do echo $i ; hgLoadChain -oldTable -tIndex rheMac1 chainHg17 splitFilter/$i ; done
# Add gap/repeat stats to the net file using database tables:
cd /cluster/data/rheMac1/bed/blastz.hg17.swap/axtChain
netClass -verbose=0 -noAr noClass.net rheMac1 hg17 rheMac1.hg17.net

# Load nets:
netFilter -minGap=10 rheMac1.hg17.net \
| hgLoadNet -verbose=0 rheMac1 netHg17 stdin


# make liftOver chains
ssh eieio
gunzip /cluster/data/hg17/bed/liftOver/hg17.rheMac1.over.chain.gz
ssh hgwdev
ln /cluster/data/hg17/bed/liftOver/hg17.rheMac1.over.chain /gbdb/hg17/liftOver/hg17TorheMac1.over.chain -s
    cd /usr/local/apache/htdocs/goldenPath/bosTau1/liftOver
    cp /cluster/data/bosTau1/bed/bedOver/bosTau1.hg17.over.chain.gz .

    # hgsql hgcentraltest
# insert into liftOverChain 
#      (fromDb, toDb, path, minMatch, minSizeT, minSizeQ, multiple, minBlocks, fudgeThick) values ("rheMac1", "hg17", "/gbdb/rheMac1/liftOver/rheMac1.hg17.over.chain", 0.95, 0, 0, "Y", 1, "N");
# insert into liftOverChain 
#      (fromDb, toDb, path, minMatch, minSizeT, minSizeQ, multiple, minBlocks, fudgeThick) values ("hg17", "rheMac1", "/gbdb/hg17/liftOver/hg17TorheMac1.over.chain", 0.95, 0, 0, "Y", 1, "N");


# GC5BASE (DONE - 2005-04-01 - Galt)
    ssh hgwdev
    mkdir -p /cluster/data/rheMac1/bed/gc5Base
    cd /cluster/data/rheMac1/bed/gc5Base
    hgGcPercent -wigOut -doGaps -file=stdout -win=5 rheMac1 \
        /cluster/data/rheMac1 | wigEncode stdin gc5Base.wig gc5Base.wib
    #       Calculating gcPercent with window size 5
    #       Using twoBit: /cluster/data/rheMac1/rheMac1.2bit
    #       File stdout created
    #   Converted stdin, upper limit 100.00, lower limit 0.00
                                                                                                            
    #   runs for about 1 hour
                                                                                                            
    mkdir /gbdb/rheMac1/wib
    ln -s `pwd`/gc5Base.wib /gbdb/rheMac1/wib
    hgLoadWiggle -pathPrefix=/gbdb/rheMac1/wib rheMac1 gc5Base gc5Base.wig
                                                                                                            
    #   analyze table to update the Cardinality column, so it's not NULL:
    #   (Hiram is going to add this to hgLoadWiggle so it is done automatically)
    hgsql rheMac1 -e "analyze table gc5Base;"
    #   verify index is correct:
    #   should see good numbers in Cardinality column, not NULL:
    hgsql rheMac1 -e "show index from gc5Base;"

# load qual scores (DONE - 2005-06-09  - Robert)

    ssh kkstore01-10
    cd /cluster/data/rheMac1/qual
    wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/Rmacaque/fasta/linearScaffolds/Rmac20041001-freeze-assembly.fa.qual.gz 
    gunzip *.gz
    qaToQac Rmac20041001-freeze-assembly.fa.qual Rmac20041001-freeze-assembly.fa.qac
    qacToWig -fixed Rmac20041001-freeze-assembly.fa.qac rheMac1.wig
    wigEncode rheMac1.wig rheMac1.qual.wig rheMac1.qual.wib 
    ln /cluster/data/rheMac1/qual/rheMac1.qual.wib /gbdb/rheMac1/wib/rheMac.qual.wib -s
    ssh hgwdev
    cd /cluster/data/rheMac1/qual
    hgLoadWiggle rheMac1 quality rheMac1.qual.wig
