#!/bin/csh -f
# "Make doc" for creating AGP files for ENCODE ortholog regions based on
#  coordinates in bed files in the "edited" directory

# TODO: contigAcc files should go in $outDir (neater this way)

cd /cluster/data/encode/synteny/agp

# create script to run AGP-maker on an  assembly
cat > genomeAgps.csh << 'EOF'
# create AGP's for a genome assembly

if ($#argv != 3) then
    echo "usage: $0 <db> <org> <outdir>"
    exit 1
endif
set bedDir = ../edited
set db = $1
set org = $2
set outDir = $3
mkdir -p $outDir
set buildDir = /cluster/data/$db
set bedFile = $bedDir/$db.bed

if (-f $org.contig.tab) then
    set contigArg = "-contigFile=$org.contig.tab"
else
    set contigArg = ""
endif

if (-f $buildDir/chrom.lst) then
    cat $buildDir/chrom.lst | \
            xargs -iX cat $buildDir/X/chr{X,X_random}.agp | \
                regionAgp  $contigArg \
                  -namePrefix=${org}_ $bedFile stdin -dir $outDir
else
    cat $buildDir/?{,?}/*.agp | regionAgp $contigArg \
                  -namePrefix=${org}_ $bedFile stdin -dir $outDir
endif
'EOF'
    # << this line makes emacs coloring happy

# MOUSE

# Create AGP's
csh genomeAgps.csh mm3 mouse mm3

# Create packing list
/cluster/data/encode/bin/scripts/encodeRegionPackingList \
        ../edited/mm3.bed mm3 mouse "Mus musculus" 10090 C57BL/6J FEB-2003 \
        mm3 "NCBI Build 30" > mm3/mm3.packing.list

# Copy  to downloads area
cd mm3
set DOWNLOAD_DIR = /usr/local/apache/htdocs/encode/downloads/EncodeAgps
cp mm3.packing.list $DOWNLOAD_DIR
tar cvfz mouse.agp.tar.gz *.agp
mv mouse.agp.tar.gz $DOWNLOAD_DIR
cd ..

# CHICKEN

# Create AGP's
csh genomeAgps.csh galGal2 chicken galGal2.new
mv galGal2.new galGal2

# Get contig to accession mapping (doced in makeGalGal2.doc) 
hgsql galGal2 -s -e "select * from contigAcc" > chicken.contig.tab

# Create packing list
/cluster/data/encode/bin/scripts/encodeRegionPackingList ../edited/galGal2.bed galGal2  chicken "Gallus gallus" 9031 N/A FEB-2004 galGal2 "CGSC Feb. 2004" > galGal2/galGal2.packing.list

# Copy  to downloads area
cd galGal2
set DOWNLOAD_DIR = /usr/local/apache/htdocs/encode/downloads/EncodeAgps
cp galGal2.packing.list $DOWNLOAD_DIR
tar cvfz chicken.agp.tar.gz *.agp
mv chicken.agp.tar.gz $DOWNLOAD_DIR
cd ..

# RAT

# Get contig to accession map
cd /cluster/data/encode/synteny/agp
hgsql rn3 -s -e "select * from contigAcc" > rat.contig.tab

# Create AGP's
csh genomeAgps.csh rn3 rat rn3.new
mv rn3.new rn3

# Create packing list
/cluster/data/encode/bin/scripts/encodeRegionPackingList ../edited/rn3.bed rn3  rat "Rattus norvegicus" 10116  BN/SsNHsdMCW JUN-2003 rn3 "Baylor HGSC v3.1" > rn3/rn3.packing.list

# Copy  to downloads area
cd rn3
set DOWNLOAD_DIR = /usr/local/apache/htdocs/encode/downloads/EncodeAgps
cp rn3.packing.list $DOWNLOAD_DIR
tar cvfz rat.agp.tar.gz *.agp
mv rat.agp.tar.gz $DOWNLOAD_DIR
cd $DOWNLOAD_DIR
tar tvfz rat* | wc -l
    # 51
grep Region rn3.packing.list | wc -l
    # 51
cd ..


# CHIMP

# Get contig to accession map
cd /cluster/data/encode/synteny/agp
hgsql panTro1 -s -e "select * from contigAcc" > chimp.contig.tab

# Create AGP's
# NOTE: next time, put chimpChromContigs.agp into $outDir
cat /cluster/data/panTro1/?{,?}/*.agp | \
   chimpChromContigAgp stdin /cluster/data/panTro1/assembly.agp chimpChromContigs.agp
mkdir -p panTro1.new
regionAgp -contigFile=chimp.contig.tab \
      -namePrefix=chimp_ ../edited/panTro1.bed chimpChromContigs.agp -dir panTro1.new
mv panTro1.new panTro1

# Create packing list
/cluster/data/encode/bin/scripts/encodeRegionPackingList ../edited/panTro1.bed panTro1  chimp "Pan troglodytes" 9598 N/A NOV-2003 panTro1 "NCBI Build 1 v1" > panTro1/panTro1.packing.list

# Copy  to downloads area
cd panTro1
set DOWNLOAD_DIR = /usr/local/apache/htdocs/encode/downloads/EncodeAgps
cp panTro1.packing.list $DOWNLOAD_DIR
tar cvfz chimp.agp.tar.gz *.agp
mv chimp.agp.tar.gz $DOWNLOAD_DIR
cd $DOWNLOAD_DIR
tar tvfz chimp* | wc -l
    # 55
grep Region panTro1.packing.list | wc -l
    # 55
cd ..

# TESTS ---------------------------------------------------------

rm -fr testMouse
mkdir -p testMouse
cat /cluster/data/mm3/{6,11}/*.agp | regionAgp tests/test.bed stdin -namePrefix=mouse_ -dir testMouse
/cluster/data/encode/bin/scripts/encodeRegionPackingList tests/test.bed testMouse mouse "Mus musculus" 10090 C57BL/6J FEB-2003 mm3 "NCBI Build 30"

rm -fr testChicken
mkdir -p testChicken
hgsql galGal2 -s -e "select * from contigAcc" > chicken.contig.tab
cat /cluster/data/galGal2/{1,13}/*.agp | regionAgp tests/test.chicken.bed stdin -namePrefix=chicken_ -dir testChicken -contigFile=chicken.contig.tab

/cluster/data/encode/bin/scripts/encodeRegionPackingList tests/test.chicken.bed testChicken chicken "Gallus gallus" 9031 N/A FEB-2004 galGal2 "CGSC Feb. 2004"



