M3 Lab2 - areed37/Andrew_Reed_EPP_531 GitHub Wiki
RepeatModeler and RepeatMasker
Mapping for annotation
#make a list for the RNA seq data we want to download
nano SRR-accession_list.txt
ERX651074
SRX950759
SRX950758
SRX339687
#load sratoolkit
spack load sratoolkit
#make file for script
nano NCBI.sh
for i in $(cat SRR-accession_list.txt);
do
prefetch $i && fasterq-dump $i
done
#run script
bash NCBI.sh
#put all the single ends together
cat file1.fastq file2.fastq file3.fastq > single_ends.fastq
#then compress them
gzip single_ends.fastq
#separate the paired-ends
gzip ERR706845_1.fastq
gzip ERR706845_2.fastq
#copy file to this directory
cp pathto/Redbud_Genome_Hap2.fasta.masked
#load star
spack load star
#use star to index the genome (script file star.sh)
STAR \
--runMode genomeGenerate \
--genomeDir Hap1 \
--genomeSAindexNbases 13 \
--genomeFastaFiles Redbud_Genome_Hap2.fasta.masked \
--runThreadN 3
#use star to map the genome (script file starMapping.sh)
STAR \
--genomeDir Hap1 \
--readFilesIn ERR706845_1.fastq.gz ERR706845_2.fastq.gz \
--readFilesCommand zcat \
--outFileNamePrefix Redbud_Hap1-rna_ \
--outSAMtype BAM SortedByCoordinate \
--outSAMstrandField intronMotif \
--limitBAMsortRAM 107374182400 \
--runThreadN 10 \
>& star_hap1.out