07_ANNOTATION - eolesin/AMOR_Indiv_Assembly_Protocol GitHub Wiki

Run HMMs on for single-copy core genes for each sample contig set.

for i in `cat AMOR_2020_Good`; do anvi-run-hmms -c \
03_INDIV_ASSEMBLY/${i}/${i}.prefixed.contigs.db -T 50; 
done

Run CAT on contig sets to get taxonomic assignments of contigs

DB_PATH="/export/dahlefs/work/Databases/CAT_prepare_20210107/2021-01-07_CAT_database"
TAX_PATH="/export/dahlefs/work/Databases/CAT_prepare_20210107/2021-01-07_taxonomy"

for i in `cat AMOR_2020_Good`; 
do nice CAT contigs -c ${i}/${i}.prefixed.fa -d ${DB_PATH} \
-t ${TAX_PATH} -n 16 -o ${i}_CATout --top 11 --I_know_what_Im_doing; 
done

Add verbose names to the CAT output files

# get the verbose names of assignments
# CAT taxonomy add names
TAX_PATH="/export/dahlefs/work/Databases/CAT_prepare_20210107/2021-01-07_taxonomy"
for i in `cat AMOR_2020_Good`; do CAT add_names -i ${i}_CATout.ORF2LCA.txt \
-o ${i}_CAT_fullnames.txt -t ${TAX_PATH} --only_official; 
done

Use BAT on the dereplicated genome set

genomes="/export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98/dereplicated_genomes_AM"
DB_PATH="/export/dahlefs/work/Databases/CAT_prepare_20210107/2021-01-07_CAT_database"
TAX_PATH="/export/dahlefs/work/Databases/CAT_prepare_20210107/2021-01-07_taxonomy"

This command runs BAT on the MAGs.

CAT bins -b ${genomes} -d $DB_PATH -t $TAX_PATH --bin_suffix .fa

Gene annotation (on kjempefuru)

cd 03_INDIV_ASSEMBLY/
for SET in `cat /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/AMOR_2020_Good`; \
    do anvi-get-sequences-for-gene-calls -c $SET/$SET.prefixed.contigs.db \
    --get-aa-sequences \ 
    -o $SET/$SET.prefixed.aminoacidseqs.fa; \
done

# using Pfams
cd ..
for i in `cat AMOR_2020_Good`; 
do anvi-run-pfams -c 03_INDIV_ASSEMBLY/${i}/${i}.prefixed.contigs.db \
--pfam-data-dir /export/dahlefs/work/Databases/2021-06-15_Pfams-DB/ -T 60; 
done

# run kegg - kofams using the dev version of Anvi'o from Achim's anvio-dev conda env.

for SET in `cat /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/AMOR_2020_Good`; 
    do anvi-run-kegg-kofams -c 03_INDIV_ASSEMBLY/$SET/$SET.prefixed.contigs.db \
    --kegg-data-dir /export/dahlefs/work/Databases/2021-04-22_KOFAM-DB -T 40 \
    >> /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/06_ANNOTATION/2021-05-26_anvi-run-kegg-kofams.log;
done

# Estimate metabolism 

Track down info about where contigs binned to if you have a gene of interest on a contig.

cd 2021-08-26_ALL_P70_ANI98_gtdbtk/
grep "s_12ROV10_HD34C_MAG_00026" gtdbtk.bac120.summary.tsv