FRCBam - NBISweden/workshop-genome_assembly GitHub Wiki
FRC: Misassembly detection
Notes:
- Include
--mp-sam "${MP_ALIGNMENT}"
to also include mate-pair data. - The combined output is in
${PREFIX}_FRC.txt
. - Dependencies: FRC, GNUplot
Command:
#!/usr/bin/env bash
PATH="$PATH:/path/to/FRC"
CPUS="${SLURM_NPROCS:-1}"
JOB=$SLURM_ARRAY_TASK_ID
ESTIMATED_GENOME_SIZE=100000000
DATA_DIR=/path/to/BAMs
FILES=( $DATA_DIR/*.bam )
apply_FRC () {
PE_ALIGNMENT="$1" # The paired-end BAM alignment file is the first parameter to this function
GENOME_SIZE="$2" # The estimated genome size is the second parameter to this function
PREFIX=$( basename "${PE_ALIGNMENT}" .bam )
FRC --pe-sam "${PE_ALIGNMENT}" --genome-size "${GENOME_SIZE}" --output "${PREFIX}"
}
BAM="${FILES[$JOB]}"
apply_FRC "$BAM" "$ESTIMATED_GENOME_SIZE"
GNUplot can be used to plot the files:
gnuplot << EOF
set terminal png size 800,600
set output 'FRC_Curve_all_assemblies.png'
set title "FRC Curve" font ",14"
set key right bottom font ",8"
set autoscale
set ylabel "Approximate Coverage (%)"
set xlabel "Feature Threshold"
files = system('find -name "*alignment_FRC.txt"')
plot for [data in files] data using 1:2 with lines title data
EOF