Fastp - NBISweden/workshop-genome_assembly GitHub Wiki

Fastp: A general purpose trimming tool.

Website: https://github.com/OpenGene/fastp

Notes:

  • Adapters are auto-detected (although can be specified)
  • Quality filtering is enabled by default
  • Length filtering is enabled by default

Command:

#!/usr/bin/env bash

CPUS="${SLURM_NPROCS:-8}"
JOB=$SLURM_ARRAY_TASK_ID

DATA_DIR=/path/to/reads
FILES=( $DATA_DIR/*_R1.fastq.gz )

apply_fastp () {
	READ1="$1"      # Read 1 of the read pair to be screened
	READ2="$2"      # Read 2 of the read pair to be screened
	if [ "$READ1" == "$READ2" ]; then
		>&2 echo "READ1 and READ2 are the same file. R2 Pattern replacement failed. Please check string substitution pattern lower down"
		exit 2
	fi
	PREFIX=$(basename "${READ1%_R1*}")
	fastp -w ${CPUS} -i "${READ1}" -I "${READ2}" -o "${PREFIX}_fastp-trimmed_R1.fastq.gz" -O "${PREFIX}_fastp-trimmmed_R2.fastq.gz"
}

FASTQ="${FILES[$JOB]}"
apply_fastp "$FASTQ" "${FASTQ/_R1./_R2.}"