SHARE
TWEET

Untitled

a guest Dec 27th, 2016 71 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/bin/bash
  2. set -euo pipefail
  3. IFS=$'nt'
  4.  
  5. # Check for missing metadata file input
  6. if [ -z ${1+x} ]; then
  7.     echo "ERROR: missing input file; aborting."
  8.     exit 1
  9. fi
  10.  
  11. INPUT=$1
  12.  
  13. # Create / touch lists of series under processing / finished processing
  14. touch processing.txt finished.txt
  15.  
  16. # Find GSE and SRR column numbers in metadata file
  17. GSECOL=$(head -1 $INPUT | tr 't' 'n' | nl | grep GSE | cut -f 1 | tr -d ' ')
  18. SRRCOL=$(head -1 $INPUT | tr 't' 'n' | nl | grep SRR | cut -f 1 | tr -d ' ')
  19.  
  20. # Main loop: for every unique GSE in metadata file
  21. UNDER_PROCESSING=$(wc -l processing.txt | cut -d ' ' -f 1)
  22. tail -n +2 $INPUT | cut -f $GSECOL | sort | uniq | while read GSE; do
  23.  
  24.     # Get all SRRs for current GSE
  25.     RUNS=$(grep -w $GSE $INPUT | cut -f $SRRCOL)
  26.  
  27.     # Working directory
  28.     WORKDIR=data/$CELL/$GSE
  29.  
  30.     # Skip if GSE is already finished or is currently being processed
  31.     set +e
  32.     if grep -wq $GSE processing.txt || grep -wq $GSE finished.txt; then
  33.         continue
  34.     fi
  35.     set -e
  36.  
  37.     # Create directory structure
  38.     mkdir -p $WORKDIR/01_fastq $WORKDIR/02_alignment $WORKDIR/03_expression
  39.  
  40.     # Start a maximum of 10 different series at once, then exit
  41.     if [ $UNDER_PROCESSING -eq 10 ]; then
  42.         echo "Started 10 different GSE series; stopping."
  43.         echo ""
  44.         exit 1
  45.     else
  46.         UNDER_PROCESSING=$(( $UNDER_PROCESSING + 1 ))
  47.     fi
  48.  
  49.     # Queue downloads
  50.     for SRR in $RUNS; do
  51.  
  52.         # Queue download
  53.         sbatch --job-name $SRR.download
  54.             --error $WORKDIR/01_fastq/slurm.download.fastq.$SRR.err
  55.             --output $WORKDIR/01_fastq/slurm.download.fastq.$SRR.out
  56.             --mail-type=NONE
  57.             scripts/01_download_fastq.sh $GSE $SRR
  58.                 >> sbatch.download.ids 2>&1
  59.     done
  60.  
  61.     # Queue alignment
  62.     sbatch --job-name $GSE.alignment
  63.         --error $WORKDIR/02_alignment/slurm.alignment.err
  64.         --output $WORKDIR/02_alignment/slurm.alignment.out
  65.         --mail-type=NONE
  66.         --dependency=afterok:$(cat sbatch.download.ids | cut -d ' ' -f 4 | xargs | tr ' ' ':')
  67.         scripts/02_alignment.sh $GSE
  68.             > sbatch.alignment.id 2>&1
  69.  
  70.     rm sbatch.download.ids
  71.  
  72.     # Queue expression estimation
  73.     sbatch --job-name $GSE.expression
  74.         --error $WORKDIR/03_expression/slurm.expression.err
  75.         --output $WORKDIR/03_expression/slurm.expression.out
  76.         --mail-type=NONE
  77.         --dependency=afterok:$(cat sbatch.alignment.id | cut -d ' ' -f 4)
  78.         scripts/03_estimate_expression.sh $GSE
  79.             > /dev/null
  80.  
  81.     rm sbatch.alignment.id
  82.  
  83.     # Add GSE to [processing.txt] file
  84.     echo "$GSE" >> processing.txt
  85.     echo "Queued pipeline for $GSE ($CELL); $UNDER_PROCESSING in queue."
  86. done
  87.    
  88. #!/bin/bash -l
  89. #SBATCH --account XXXXXXXX
  90. #SBATCH --partition core
  91. #SBATCH --ntasks 1
  92. #SBATCH --job-name download.fastq
  93. #SBATCH --time 15:00:00
  94. #SBATCH --mail-user XXXXXXXXXXX
  95. #SBATCH --mail-type=FAIL
  96. #SBATCH --error log.download.fastq.err
  97. #SBATCH --output log.download.fastq.out
  98.  
  99. # Get GSE and SRR IDs from input argument 1
  100. GSE=$1
  101. SRR=$2
  102.  
  103. # Working directory
  104. WORKDIR=data/$CELL/$GSE
  105.  
  106. # Modules
  107. module load bioinfo-tools sratools/2.8.0
  108.  
  109. # Download FASTQ files
  110. fastq-dump --outdir $WORKDIR/01_fastq --gzip --skip-technical --split-files --readids --clip -v $SRR
  111.     > $WORKDIR/01_fastq/log.download.fastq.$SRR.txt 2>&1
  112.  
  113. # Delete SRA file
  114. if [ -f /proj/ncbi/sra/$SRR.sra ]; then
  115.     rm /proj/ncbi/sra/$SRR.sra
  116. fi
RAW Paste Data
Top