M3 Lab2 - areed37/Andrew_Reed_EPP_531 GitHub Wiki

RepeatModeler and RepeatMasker

Mapping for annotation

#make a list for the RNA seq data we want to download
nano SRR-accession_list.txt
   ERX651074
   SRX950759
   SRX950758
   SRX339687

#load sratoolkit
spack load sratoolkit
#make file for script
nano NCBI.sh
   for i in $(cat SRR-accession_list.txt);
   do
       prefetch $i && fasterq-dump $i
   done
#run script
bash NCBI.sh

#put all the single ends together
cat file1.fastq file2.fastq file3.fastq > single_ends.fastq
#then compress them
gzip single_ends.fastq

#separate the paired-ends
gzip ERR706845_1.fastq
gzip ERR706845_2.fastq

#copy file to this directory
cp pathto/Redbud_Genome_Hap2.fasta.masked
#load star
spack load star
#use star to index the genome (script file star.sh)
STAR \
        --runMode genomeGenerate \
        --genomeDir Hap1 \
        --genomeSAindexNbases 13 \
        --genomeFastaFiles Redbud_Genome_Hap2.fasta.masked \
        --runThreadN 3

#use star to map the genome (script file starMapping.sh)
STAR \
        --genomeDir Hap1 \
        --readFilesIn ERR706845_1.fastq.gz ERR706845_2.fastq.gz \
        --readFilesCommand zcat \
        --outFileNamePrefix Redbud_Hap1-rna_ \
        --outSAMtype BAM SortedByCoordinate \
        --outSAMstrandField intronMotif \
        --limitBAMsortRAM 107374182400 \
        --runThreadN 10 \
>& star_hap1.out