M2 Lab3 - areed37/Andrew_Reed_EPP_531 GitHub Wiki
###Genome Assessment
#make a directory for busco stuff
mkdir busco
cd busco
#need to move the fasta instead of linking to it.
cp Sassafras_V1.0_with_Hi-C_1700_0.45_group.hic.hap2.p_ctg.noseq.fasta .
#checking that busco runs
singularity exec -B $PWD /sphinx_local/images/ezlabgva-busco-v5.6.1_cv1.img busco --help
#running busco (done by another group member)
singularity exec -B $PWD /sphinx_local/images/ezlabgva-busco-v5.6.1_cv1.img busco -i Genome.fasta -m genome -l embryophyta -c 5 -o busco_results
spack load py-pandas/wbl4hvd
spack load py-biopython/2kcwn4f
#chloroplast allignments
minimap2 -t 5 -x asm5 Sassafras_albidum_chloroplast.fa Sassafras_V1.0_with_Hi-C_1700_0.45_group.hic_hap2.p_ctg.fa > ChloAlignment.paf
#find the list of mapping scaffolds
python3 find_scaffolds_by_paf_coverage.py ChloAlignment.paf > ChloAlignment_list.txt
#remove the mapping scaffolds
python remove_contigs_by_name.py ChloAlignment_list.txt Sassafras_V1.0_with_Hi-C_1700_0.45_group.hic_hap2.p_ctg.fa > assembly-CP_filtered.fa
# BUSCO version is: 5.6.1
# The lineage dataset is: embryophyta_odb10 (Creation date: 2024-01-08, number of genomes: 50, number of BUSCOs: 1614)
# Summarized benchmarking in BUSCO notation for file /pickett_sphinx/projects/EPP531_AGA/oksuz/BUSCO/Sassafras_V1.0_wit$
# BUSCO was run in mode: euk_genome_met
# Gene predictor used: metaeuk
***** Results: *****
C:99.1%[S:5.0%,D:94.1%],F:0.5%,M:0.4%,n:1614
1598 Complete BUSCOs (C)
80 Complete and single-copy BUSCOs (S)
1518 Complete and duplicated BUSCOs (D)
8 Fragmented BUSCOs (F)
8 Missing BUSCOs (M)
1614 Total BUSCO groups searched
Assembly Statistics:
Dependencies and versions:
hmmsearch: 3.1
bbtools: 39.01
metaeuk: 6.a5d39d9
busco: 5.6.1
#mitochondria allignments
minimap2 -t 5 -x asm5 Cinnamomum_camphora_mitochondrion.fa Sassafras_V1.0_with_Hi-C_1700_0.45_group.hic_hap2.p_ctg.fa > MiAlignment.paf
python3 find_scaffolds_by_paf_coverage.py MiAlignment.paf > MiAlignment_list.txt
python remove_contigs_by_name.py MiAlignment_list.txt Sassafras_V1.0_with_Hi-C_1700_0.45_group.hic_hap2.p_ctg.fa > assembly-MI_filtered.fa