bash #################### # 12 Flies downloaded from UCSC hgdownload on 14 December 2011 using: for g in droAna3 dreEre2 droGri2 droMoj3 dp4 droVir3 droWil1; do wget http://hgdownload.cse.ucsc.edu/goldenPath/droAna3/bigZips/$g.fa.gz && \ gunzip $g.fa.gz && \ perl -ple "s/>/>$g./" -i $g.fa done for g in dm3 droSim1; do wget http://hgdownload.cse.ucsc.edu/goldenPath/$g/bigZips/chromFa.tar.gz tar -xvzf chromFa.tar.gz && \ cat chr*.fa > $g.fa && \ perl -ple "s/>/>$g./" -i $g.fa && \ rm chromFa.tar.gz chr*.fa done for g in droPer1 droSec1; do wget http://hgdownload.cse.ucsc.edu/goldenPath/$g/bigZips/scaffoldFa.gz && \ gunzip scaffoldFa.gz && mv scaffoldFa $g.fa && \ perl -ple "s/>/>$g./" -i $g.fa done wget http://hgdownload.cse.ucsc.edu/goldenPath/droYak2/bigZips/chromFa.tar.gz && \ tar -xvzf chromFa.tar.gz && \ cat 2*/* 3*/* 4/* M/* U*/* X*/* Yh/* > droYak2.fa && \ perl -ple "s/>/>droYak2./" -i droYak2.fa && \ rm -rf 2*/ 3*/ 4/ M/ U*/ X*/ Yh/ chromFa.tar.gz #################### # 8 Flies downloaded from NCBI on 25 January 2012 using: curl -O "ftp://anonymous@ftp.ncbi.nih.gov/genbank/wgs/wgs.{AFFD,AFFE,AFFF,AFFG,AFFH,AFFI,AFPQ,AFPP}.1.fsa_nt.gz" curl -O "ftp://anonymous@ftp.ncbi.nih.gov/genbank/wgs/wgs.{AFPP}.2.fsa_nt.gz" for f in *.fsa_nt.gz; do gunzip $f; done # Where the mapping to species names is: mv wgs.AFFD.1.fsa_nt droBia.fa mv wgs.AFFE.1.fsa_nt droBip.fa mv wgs.AFFF.1.fsa_nt droEle.fa mv wgs.AFFG.1.fsa_nt droFic.fa mv wgs.AFFH.1.fsa_nt droKik.fa mv wgs.AFFI.1.fsa_nt droTak.fa mv wgs.AFPP.1.fsa_nt droRho.1.fa mv wgs.AFPP.2.fsa_nt droRho.2.fa mv wgs.AFPQ.1.fsa_nt droEug.fa cat droRho.1.fa droRho.2.fa > droRho.fa && rm droRho.1.fa droRho.2.fa for g in droBia droBip droEle droFic droKik droTak droRho droEug; do perl -ple "s/^>gi\|\d+\|\S+?\|.*?(\S+),.*/>$g.\$1/" -i $g.fa done #################### # Repeat masking performed by using RepeatMasker and the drosophila melanogaster repeat library along with TRF to find simple repeats (this is the UCSC standard pipeline).