Genome_zhushi - duan12345678/Sympatric-speciation GitHub Wiki

#######Busco busco -i Galili.Chr.v3.fasta -o busco_1216.out --cpu 50 -l /data/01/bio-software/mammalia_odb10 -m geno -f

######Genome annotion

#####Repeat sequence ###RepeatMolder /data/00/software/Repeatmask/RepeatModeler-2.0/BuildDatabase -name Spalax_galili Galili.Chr.v3.fasta /data/00/software/Repeatmask/RepeatModeler-2.0/RepeatModeler -pa 30 -database Spalax_galili ln -s RM*/consensi.fa.classified custom.lib /data/00/software/Repeatmask/RepeatMasker/RepeatMasker -pa 30 -lib ./custom.lib --gff Galili.Chr.v3.fasta ###repeatMolder+LTR perl ConvertRepeatMasker2gff.pl Galili.Chr.v3.fasta.out Denovo.gff Denovo perl LTR.InsertTime.pl Galili.Chr.v3.fasta LTRretriever no 30 sh LTRretriever/LTRretriever.retriever.sh ln -s LTRretriever/03.LTR_retriever/LTRretriever.genome.fna.mod.LTRlib.redundant.fa . ln -s ../01.repeatmodeler/custom.lib . cat custom.lib LTRretriever.genome.fna.mod.LTRlib.redundant.fa /data/00/software/Repeatmask/RepeatMasker/Libraries/RepeatMasker.lib > custom.lib.fix.fa /data/00/software/Repeatmask/RepeatMasker/RepeatMasker -pa 30 -lib ./custom.lib.fix.fa --gff Galili.Chr.v3.fasta perl ConvertRepeatMasker2gff.pl Galili.Chr.v3.fasta.out Denovo.gff Denovo ###Repeatmask /data/00/software/Repeatmask/RepeatMasker/RepeatMasker -pa 30 -pa 30 -nolow -norna -no_is -gff -species Mesangiospermae Galili.Chr.v3.fasta perl /data/01/user192/qijiao/zhushi_0301/utils/ConvertRepeatMasker2gff.pl Galili.Chr.v3.fasta.out TE.gff TE ##Repeatprotinmask split_fasta.pl Galili.Chr.v3.fasta split_by_scaffold for i in split_by_scaffold/* ; do echo /data/00/software/Repeatmask/RepeatMasker/RepeatProteinMask -noLowSimple -pvalue 1e-04 $i ; done > 02.split.running.sh parallel -j 30 < 02.split.running.sh cat split_by_scaffold/*annot > Galili.Chr.v3.fasta.repeatproteinmasker.annot rm -r split_by_scaffold perl ConvertRepeatMasker2gff.pl Galili.Chr.v3.fasta.repeatproteinmasker.annot TP.gff TP ###trf trf Galili.Chr.v3.fasta 2 7 7 80 10 50 2000 -d -h perl ConvertTrf2Gff.pl Galili.Chr.v3.fasta.2.7.7.80.10.50.2000.dat Galili.Chr.v3.fasta.trf.gff ###merge ln -s ../01.repeatmodeler.fix/Denovo.gff ./Denovo.gff ln -s ../02.repeatmakser/TE.gff ./TE.gff ln -s ../03.repeatproteinmask/TP.gff ./TP.gff ln -s ../04.trf/Fmechowii.fasta.trf.gff ./TRF.gff cat Denovo.gff TE.gff TP.gff TRF.gff | grep -v -P '^#' | cut -f 1,4,5 | sort -k1,1 -k2,2n -k3,3n > All.repeat.bed bedtools merge -i All.repeat.bed > All.repeat.merge.bed bedtools maskfasta -fi Galili.Chr.v3.fasta -bed All.repeat.merge.bed -fo Galili.Chr.v3.fasta.mask bedtools maskfasta -fi Galili.Chr.v3.fasta -bed All.repeat.merge.bed -fo Galili.Chr.v3.fasta.mask_soft -soft grep -v 'Class=Unknown;' Denovo.gff > Denovo.gff.known grep -v 'Class=Unknown;' TE.gff > TE.gff.known grep -v 'Class=Unknown;' TP.gff > TP.gff.known grep -v 'Class=Unknown;' TRF.gff > TRF.gff.known perl bed_intersect.pl TRF.gff.known Denovo.gff.known TRF.gff.known.noDenovo perl bed_intersect.pl TP.gff.known Denovo.gff.known TP.gff.known.noDenovo perl bed_intersect.pl TE.gff.known Denovo.gff.known TE.gff.known.noDenovo perl gff_rep_summary.pl final_rep_dir Denovo.gff.known TE.gff.known.noDenovo TRF.gff.known.noDenovo TP.gff.known.noDenovo perl lenbed.pl All.repeat.merge.bed > All.repeat.merge.bed.len cat final_rep_dir/99.SUMMARY2 final_rep_dir/99.SUMMARY3 All.repeat.merge.bed.len > SUMMARY.FINAL.txt

#####RNA seq /data/00/software/hisat/hisat2-2.1.0/hisat2-build Galili.Chr.v3.fasta Spalax_galili 1> hisat2-index.log 2>&1\n /data/00/software/hisat/hisat2-2.1.0/hisat2 --new-summary -p2 -x $pre -1 -$fq_1 -2 $fq_2 -S $sam samtools sort @ 6 -o $bam $sam singularity exec -B RNA_SEQ trinityrnaseq.v2.15.1.simg Trinity -seqType fq --left$fq_1 --right $fq_2 --CPU 6 --max_memory 20G /data/00/software/stringtie/stringtie-2.0.4.Linux_x86_64/stringtie --merge -p 15 -o $gtf $list samtools faidx $fasta gffread -w transcript.fa -g $fasta $merge.gtf gffread merged.gtf -o- > merged.gff3 /data/01/softwore/TransDecoder/bin/cdna_alignment_orf_to_genome_orf.pl transcripts.fasta.transdecoder.gff transcripts.gff3 transcripts.fasta>transcripts.fasta.transdecoder.genome.gff3 singularity exec -B 03.gene_predict_1213 pasapipeline_2.5.3.sif seqclean transcript.fa -v UniVec.fasta /data/00/software/singularity/singularity_built/bin/singularity exec -B 06.pasa pasapipeline_2.5.3.sif /usr/local/src/PASApipeline/Launch_PASA_pipeline.pl -c alignAssembly.config -C -R -g Galili.Chr.v3.fasta -t transcript.fa --ALIGNERS blat,gmap --CPU 20 singularity exec -B 06.pasa pasapipeline_2.5.3.sif /usr/local/src/PASApipeline/scripts/pasa_asmbls_to_training_set.dbi --pasa_transcripts_fasta Spalax_galili.sqlite.assemblies.fasta --pasa_transcripts_gff3 Spalax_galili.sqlite.pasa_assemblies.gff3

####GeMoMa java -jar GeMoMa-1.7.1.jar CLI GeMoMaPipeline threads=30 t=Galili.Chr.v3.fasta s=own g=$ref.genome.fa a=$ref.genomic.gff outdir=03.gene_predict/02.homologs/GeMoMa/B amboo.results AnnotationFinalizer.r=NO tblastn=false ; perl ConvertFormat_GeMoMa.pl 03.gene_predict/02.homologs/GeMoMa/$ref.results/final_annotation.gff

Ab initio prediction

perl split.pl $soft_mask && rm $soft_mask /data/00/software/augustus/augustus-3.3.3/bin/augustus --softmasking=1 --species=human /01.genescan/soft_fa/Chr$i.fa --UTR=off | perl ConvertFormat_augustus.pl - ./augustus/Chr$i.soft.gff ###genscan perl split_fasta.pl Galili.Chr.v3.fasta.hardmask genscan_split 3000000 /data/00/software/genscan/genscan /data/00/software/genscan/HumanIso.smat /data/01/user192/qijiao/zhushi/03.gene_predict/01.abinitio/01.gene scan/genscan_temp/Chr$i-0.fa |/data/01/user192/qijiao/zhushi/00.scprit/utils/ConvertFormat_genscan.pl - > /data/01/user192/qijiao/zhushi/03.gene_predict/01.abinitio/0 1.genescan/genscan_temp/Chr$i-0.fa.gff ####glimer /data/00/software/GlimmerHMM/bin/glimmerhmm_linux_x86_64 ./01.genescan/soft_fa/Chr$i.fa -d /data /00/software/GlimmerHMM/trained_dir/human/Train0-43 -g > ./02.glimmer/glimmer_result/Chr$i.sort. gff

####evm mkdir 04.evm EVM.prepare.pl /data/00/software/EVM/EVM_V1.1.1/EVidenceModeler-1.1.1 01.abinitio GeMoMa 03.rna ./00.data/Galili.Chr.v3.fasta-split 04.evm EVM.run.cmd.pl /data/00/software/EVM/EVM_V1.1.1/EVidenceModeler-1.1.1 ./00.data/Galili.Chr.v3.fasta-split 03.gene_predict/04.evm /data/01/user192/zhushi/00.scprit/utils > 03.gene_predict/04.evm/01.split_prepare.sh parallel -j 30 < 03.gene_predict/04.evm/01.split_prepare.sh cat 03.gene_predict/04.evm/evm_for_each_chr//split_evm_running.sh > 03.gene_predict/04.evm/02.split_run.sh parallel -j 30 < 03.gene_predict/04.evm/02.split_run.sh cat 03.gene_predict/04.evm/evm_for_each_chr//commands.list > 03.gene_predict/04.evm/03.running.sh parallel -j 30 < 03.gene_predict/04.evm/03.running.sh perl /data/01/user192/zhushi/00.scprit/utils/EVM.merge.cmd.pl 03.gene_predict/04.evm/evm_for_each_chr /data/00/software/EVM/EVM_V1.1.1/EVidenceModeler-1.1.1 > 03.gene_predict/04.evm/04.merge.sh parallel -j 30 < 03.gene_predict/04.evm/04.merge.sh cat 03.gene_predict/04.evm/evm_for_each_chr/*/evm.out.gff > 03.gene_predict/04.evm/merge.out.gff perl /data/00/software/EVM/EVM_V1.1.1/EVidenceModeler-1.1.1/EvmUtils/gff3_file_to_proteins.pl 03.gene_predict/04.evm/merge.out.gff Galili.Chr.v3.fasta prot > 03.gene_predict/04.evm/merge.out.gff.pep perl /data/01/zhushi/00.scprit/utils/gff.clean_name2spe.pl 03.gene_predict/04.evm/merge.out.gff Spalax_galili 03.gene_predict/04.evm/merge.out.gff.fix gffread 03.gene_predict/04.evm/merge.out.gff.fix -g Galili.Chr.v3.fasta a -x 03.gene_predict/04.evm/merge.out.gff.fix.cds -y 03.gene_predict/04.evm/merge.out.gff.fix.pep ####pinggu busco -i merge.out.gff.fix.pep -o busco_1217_v2.out --cpu 50 -l /data/01/user194/bio-software/mammalia_odb10 -m proteins -f ####Mitochondrial genome assembly get_organelle_from_reads.py -1 ../00.rawdata/$i_R1.fq.gz -2 ../00.rawdata/$i_R2.fq.gz -R 10 -k21,45,65,85,105 -F animal_mt -o animal_mt_$i_out