M2 Lab3 - areed37/Andrew_Reed_EPP_531 GitHub Wiki

###Genome Assessment

#make a directory for busco stuff
mkdir busco
cd busco

#need to move the fasta instead of linking to it.
cp Sassafras_V1.0_with_Hi-C_1700_0.45_group.hic.hap2.p_ctg.noseq.fasta .

#checking that busco runs
singularity exec -B $PWD /sphinx_local/images/ezlabgva-busco-v5.6.1_cv1.img busco --help

#running busco (done by another group member)
singularity exec -B $PWD /sphinx_local/images/ezlabgva-busco-v5.6.1_cv1.img busco -i Genome.fasta -m genome -l embryophyta -c 5 -o busco_results

spack load py-pandas/wbl4hvd
spack load py-biopython/2kcwn4f

#chloroplast allignments
minimap2 -t 5 -x asm5 Sassafras_albidum_chloroplast.fa Sassafras_V1.0_with_Hi-C_1700_0.45_group.hic_hap2.p_ctg.fa > ChloAlignment.paf

#find the list of mapping scaffolds
python3 find_scaffolds_by_paf_coverage.py ChloAlignment.paf > ChloAlignment_list.txt

#remove the mapping scaffolds
python remove_contigs_by_name.py ChloAlignment_list.txt Sassafras_V1.0_with_Hi-C_1700_0.45_group.hic_hap2.p_ctg.fa > assembly-CP_filtered.fa

# BUSCO version is: 5.6.1
# The lineage dataset is: embryophyta_odb10 (Creation date: 2024-01-08, number of genomes: 50, number of BUSCOs: 1614)
# Summarized benchmarking in BUSCO notation for file /pickett_sphinx/projects/EPP531_AGA/oksuz/BUSCO/Sassafras_V1.0_wit$
# BUSCO was run in mode: euk_genome_met
# Gene predictor used: metaeuk
    ***** Results: *****
    C:99.1%[S:5.0%,D:94.1%],F:0.5%,M:0.4%,n:1614
    1598  Complete BUSCOs (C)
    80   Complete and single-copy BUSCOs (S)
    1518  Complete and duplicated BUSCOs (D)
    8    Fragmented BUSCOs (F)
    8    Missing BUSCOs (M)
    1614  Total BUSCO groups searched
Assembly Statistics:
Dependencies and versions:
    hmmsearch: 3.1
    bbtools: 39.01
    metaeuk: 6.a5d39d9
    busco: 5.6.1

#mitochondria allignments
minimap2 -t 5 -x asm5 Cinnamomum_camphora_mitochondrion.fa Sassafras_V1.0_with_Hi-C_1700_0.45_group.hic_hap2.p_ctg.fa > MiAlignment.paf

python3 find_scaffolds_by_paf_coverage.py MiAlignment.paf > MiAlignment_list.txt

python remove_contigs_by_name.py MiAlignment_list.txt Sassafras_V1.0_with_Hi-C_1700_0.45_group.hic_hap2.p_ctg.fa > assembly-MI_filtered.fa