# Updated on Jan-25-2022, by CX Qiu

#!/bin/bash
# this scRNA-seq pipeline accept a input folder, and then use the default parameter for the data processing and analysis

run_ID="run_17_E10_20220118"

my_folder="/net/shendure/vol2/projects/cxqiu/JAX_rna"

num=${SGE_TASK_ID}

# define the fastq folder including all fastq files
fastq_folder="${my_folder}/${run_ID}/nobackup/fastq_1"

#make sure not to put a slash on the end of this path
work_folder="${my_folder}/${run_ID}/nobackup/run_1/output_${num}"

# define the PCR group id after demultiplexing
sample_ID="${my_folder}/${run_ID}/sample_ID_${num}.txt"

# define the core number for parallel processing
core=10
core_sam=3

# define the number of unique reads cutoff for splitting single cell
cutoff=200

# mouse genome
index="/net/shendure/vol10/nobackup/genome/STAR/mm10"
gtf_file="/net/shendure/vol10/nobackup/genome/GTF/gencode.vM12.chr_patch_hapl_scaff.annotation.gtf.gz"

#define the mismatch rate for removing duplicates:
mismatch=1

#define the bin of python (python V2.7)
python_path="/net/shendure/vol12/projects/sciRNAseq_script/anaconda2/bin/"

#define the bin of R (R V3.6.3)
R_path="/net/gs/vol1/home/cxqiu/R/bin/"

#define the location of script:
script_path="/net/gs/vol1/home/cxqiu/work/scripts/JAX_rna/Jun_pipeline"
# define the location of the ligation barcodes
ligation_barcode=$script_path/lig_384_bc.pickle2
# define the location of the RT barcodes
RT_barcode=$script_path/RT_384_bc.pickle2
# define the location of the combined RT and ligation barcodes
barcodes=$script_path/combined_384_bc.txt
# define the location of the R script for multi-core processing
Jun_R_script=$script_path/sci3_bash_input_ID_output_core.R

now=$(date)
echo "Current time : $now"
module load modules modules-init modules-gs
module load samtools/1.9
module load bedtools/2.27.1
module load STAR/2.6.1d

UMI_attach_folder=$work_folder/UMI_attach
trimmed_fastq_folder=$work_folder/trimmed_fastq
STAR_alignment_folder=$work_folder/STAR_alignment
filtered_sam_folder=$work_folder/filtered_sam
rmdup_sam_folder=$work_folder/rmdup_sam
sam_splitted_folder=$work_folder/sam_splitted
report_folder=$work_folder/report

##########################
### Step 1: UMI attach ###
##########################
### ignoring I1 and I2 (PCR well information)
### adding R1 (barcode information) to R2 (mRNA)

echo "changing the name of the fastq files..."
for sample in $(cat $sample_ID); do
    echo changing name $sample
    mv $fastq_folder/*$sample*R1*.gz $fastq_folder/$sample.R1.fastq.gz
    mv $fastq_folder/*$sample*R2*.gz $fastq_folder/$sample.R2.fastq.gz
done

echo "Attaching barcode and UMI...."
mkdir -p $UMI_attach_folder
$python_path/python \
    $script_path/UMI_barcode_attach_gzipped_with_dic.py \
    $fastq_folder \
    $sample_ID \
    $UMI_attach_folder \
    $ligation_barcode \
    $RT_barcode \
    $core

echo ">>>Step 1 (UMI attach) has been done."



