11_MAG_MAPPING - eolesin/AMOR_Indiv_Assembly_Protocol GitHub Wiki
Mapping MAGS back to the read data Pulling most commands from the Anvio docs for the TARA oceans dataset analysis: https://merenlab.org/data/tara-oceans-mags/
- Import to anvio
# WITHIN THE 11_MAP_MAGS folder on kjempefuru
# concatenate all the contigs from all the "winning" MAGS from dRep:
cat dereplicated_genomes/*fa > NON-REDUNDANT-MAGS.fa
# Runar did not have good deflines tisk tisk, which Anvio complains about. Argh!
# replacing dash, and parentheses with underscore.
sed -i 's/-/_/g; s/(/_/g; s/)/_/g' NON_REDUNDANT_MAGS.fa
# Then we import the file to anvio to make it a contig.db
anvi-gen-contigs-database -f NON-REDUNDANT-MAGS.fa -o NON-REDUNDANT-MAGs-CONTIGS.db
Achim did some magic to make the contigs.db file
* First need to remove all of the renamed bins that are not in the dereplicated set:
diff -q dereplicated_genomes/ Runar_renamed_bins/|grep Only|awk -F": " '{print $2}'>removed_bins_Runar.tx
xargs rm < ../removed_bins_Runar.txt
achimm@kjempefuru /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98 $ cp Runar_renamed_bins/*
Runar_renamed_bins_AM/
achimm@kjempefuru /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98/Runar_renamed_bins_AM $ xargs rm < ../removed_bins_Runar.txt
achimm@kjempefuru /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98 $ cp dereplicated_genomes/s_* dereplicated_genomes_AM/
achimm@kjempefuru /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98 $ cp Runar_renamed_bins_AM/* dereplicated_genomes_AM/
* concatenate:
/export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98/dereplicated_genomes_AM $ cat *fa > ../NON-REDUNDANT-MAGS.fa
* Make contigs.db from the fasta
conda activate anvio-dev
anvi-gen-contigs-database -f NON-REDUNDANT-MAGS.fa -o NON-REDUNDANT-MAGs-CONTIGS.db -T 30
05. aug. 2021
-------------
* anvi-gen contigs ran:
Contigs with at least one gene call ..........: 522929 of 523015 (100.0%)
Contigs database .............................: A new database, NON-REDUNDANT-MAGs-CONTIGS.db, has been created.
Number of contigs ............................: 523,015
Number of splits .............................: 568,494
Total number of nucleotides ..................: 4,912,733,430
Gene calling step skipped ....................: False
Splits broke genes (non-mindful mode) ........: False
Desired split length (what the user wanted) ..: 20,000
Average split length (what anvi'o gave back) .: 22,549
* Check how the split names look:
for split_name in `sqlite3 /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98/NON-REDUNDANT-MAGs-CONTIGS.db 'select split from splits_basic_info'`; do echo -e "$split_name"; done > splitnames.txt
Save bin information in tab table:
for split_name in `sqlite3 /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98/NON-REDUNDANT-MAGs-CONTIGS.db 'select split from splits_basic_info'`
do
The table has to look like this:
s_GS19_ROV14_BS02_Bin_00113_contig_001242_split_00001 s_GS19_ROV14_BS02_Bin_00113
s_10_17ROV19_HD25_MAG_00093_contig_000921_split_00001 s_10_17ROV19_HD25_MAG_00093
s_CGB10_2_bin_39_000000000043_split_00001 s_CGB10_2_bin_39
s_CH2_B1_bin_57_000000000272_split_00001 s_CH2_B1_bin_57
s_10ROV5WB_metabat_100_000000000014_split_00001 s_10ROV5WB_metabat_100
s_CGB_Flange_2011_3_36_000000000132_split_00001 s_CGB_Flange_2011_3_36
s_CGB9_3_Binning1_14_000000000005_split_00001 s_CGB9_3_Binning1_14
→ Our samples: The first 6 fields, but it varies for Runars samples.
→ All our samples contain 'contig' so I could do: if split name contains 'contig' , take splitname minus the last 4 fiels, if it doesnt, take split names without the last 3 fields
for split_name in `sqlite3 /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/11_MAP_MAGS/ALL_withRunar_drep_COMP70_ANI98/NON-REDUNDANT-MAGS.db 'select split from splits_basic_info'`
do
if [ $split_name == *"contig"* ](/eolesin/AMOR_Indiv_Assembly_Protocol/wiki/-$split_name-==-*"contig"*-)
then
MAG=`echo $split_name | rev | cut -d '_' -f5- | rev`
else
MAG=`echo $split_name | rev | cut -d '_' -f4- | rev`
fi
echo -e "$split_name\t$MAG" >> NON-REDUNDANT-MAGs-COLLECTION.txt
done
→ square brackets dont get copied from Zim apparently!!
- Map reads back
PATH_2019="/export/dahlefs/work/Shotgun/Metagenomes_chimneys_2019/01_QC/"
PATH_2020="/export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/02_HUMAN_Decontam/"
PATH_Runar="/export/dahlefs/work/Runar_AMOR_metagenomes/"
for i in `cat AMOR_only`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam;
done
for i in `cat Iron_mats_Good`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam;
done
for i in `cat AMOR_2019`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_2019}${i}-QUALITY_PASSED_R1.fastq -2 ${PATH_2019}${i}-QUALITY_PASSED_R2.fastq \
--no-unal -S Bowtie/${i}-in-NRMAGS.sam;
done
for i in `cat sample_list_noNP`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_Runar}${i}_S1_L001_R1_001.fastq -2 ${PATH_Runar}${i}_S1_L001_R2_001.fastq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam; done
-
Covert .sam to .bam. Index with samtools and remove temp files.
-
Emily and Achim put MAG names in Runar's bin FASTA contig headers. On SAGA.
#!/usr/bin/bash
# every job must be accounted for
#SBATCH --account=nn9836k
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=5G
# every job requires some specification of the memory (RAM) it needs
# every job requires a runtime limit
#SBATCH --time=48:00:00
module load anvio/6.1-intel-2019b-Python-3.7.4
for sample in *.fa;
do pref=$(basename "$sample" .fa); inew=${pref//./_}; inew2=${inew//-/_};
anvi-script-reformat-fasta ${sample} --simplify-names --prefix s_${inew2} -o Renamed_MAGs_Runar/${sample};
done
-
Concatenated fasta of renamed winning dereplicated genomes imported into Anvio and created contig.db file of them
-
Rerun the mapping using the new contig.db
# in /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/11_MAP_MAGS/ALL_withRunar_drep_COMP70_ANI98
screen
PATH_2019="/export/dahlefs/work/Shotgun/Metagenomes_chimneys_2019/01_QC/"
PATH_2020="/export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/02_HUMAN_Decontam/"
PATH_Runar="/export/dahlefs/work/Runar_AMOR_metagenomes/"
conda activate bowtie2
bowtie2-build --threads 40 NON-REDUNDANT-MAGS.fa NON-REDUNDANT-MAGS
#This time I added a log file output for each sample so we can actually see how much of
# each sample maps to the MAGs without crawling through BAMs or something.
for i in `cat AMOR_only`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log;
done
for i in `cat Iron_mats_Good`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log;
done
for i in `cat AMOR_2019`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_2019}${i}-QUALITY_PASSED_R1.fastq -2 ${PATH_2019}${i}-QUALITY_PASSED_R2.fastq \
--no-unal -S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log;
done
for i in `cat sample_list_noNP`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_Runar}${i}_S1_L001_R1_001.fastq -2 ${PATH_Runar}${i}_S1_L001_R2_001.fastq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log; done
Convert sam to bam and do sorting and indexing
for i in `cat all_samples_all_projects`
do samtools view -F 4 -bS Bowtie/${i}-in-NRMAGS.sam > ${i}-in-NRMAGS-RAW.bam;
samtools sort Bowtie/${i}-in-NRMAGS-RAW.bam -o ${i}-in-NRMAGS.bam
samtools index Bowtie/${i}-in-NRMAGS.bam;
done
Perform the profiling in Anvio. Merge the profiles
for i in `cat all_samples_all_projects`; do anvi-profile -c NON-REDUNDANT-MAGS.db \
-i Bowtie/${i}-in-NRMAGS.bam --skip-SNV-profiling --num-threads 20 -o ${i}-in-NRMAGS;
done
# merge resulting profiles into a single anvi'o merged profile
anvi-merge *-in-NRMAGS/PROFILE.db \
-c NON-REDUNDANT-MAGS.db \
-o NON-REDUNDANT-MAGS-MERGED