-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExmAln.4.LocalRealignment.sh
129 lines (119 loc) · 4.59 KB
/
ExmAln.4.LocalRealignment.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/bin/bash
#$ -cwd
ChaIn="no"
AllowMisencoded="false"
while getopts i:b:s:l:c:A opt; do
case "$opt" in
i) BamFil="$OPTARG";;
b) BamLst="$OPTARG";;
s) Settings="$OPTARG";;
l) LogFil="$OPTARG";;
c) ChaIn="$OPTARG";;
A) AllowMisencoded="true";;
esac
done
#load settings file
. $Settings
#Set local Variables
Chr=$SGE_TASK_ID
Chr=${Chr/23/X}
Chr=${Chr/24/Y}
if [[ "$BUILD" = "hg19" ]]; then
Chr=chr$Chr
fi
TmpLog=$LogFil.LocReal.$Chr.log
JobNm=${JOB_NAME#*.}
TmpDir=$BamFil.$Chr.Realignjavdir #java temp directory
mkdir -p $TmpDir
RalDir=realign.$BamFil #directory to collect individual chromosome realignments
mkdir -p $RalDir
StatDir=$JOB_ID.LocReal.stat
mkdir -p $StatDir
StatFil=$StatDir/$Chr.$JOB_ID.LocReal.stat #Status file to check if all chromosome are complete
TgtFil=$RalDir/$BamFil.$Chr.target_intervals.list
#Start Log
uname -a >> $TmpLog
echo "Start Local Realignment around InDels on Chromosome $Chr - $0:`date`" >> $TmpLog
echo " Job name: "$JOB_NAME >> $TmpLog
echo " Job ID: "$JOB_ID >> $TmpLog
echo " Chromosome: "$Chr >> $TmpLog
echo "----------------------------------------------------------------" >> $TmpLog
#Run jobs
#Generate target file
echo "- Create target interval file using GATK RealignerTargetCreator `date`..." >> $TmpLog
cmd="$JAVA7BIN -Xmx7G -Djava.io.tmpdir=$TmpDir -jar $GATKJAR -T RealignerTargetCreator -R $REF -I $BamLst -L $Chr -known $INDEL -known $INDEL1KG -o $TgtFil -nt $NumCores"
if [[ $AllowMisencoded="true" ]]; then cmd=$cmd" -allowPotentiallyMisencodedQuals"; fi
echo " "$cmd >> $TmpLog
$cmd
if [[ $? == 1 ]]; then
echo "----------------------------------------------------------------" >> $TmpLog
echo "Create target interval file using GATK RealignerTargetCreator failed `date`" >> $TmpLog
qstat -j $JOB_ID | grep -E "usage *$SGE_TASK_ID:" >> $TmpLog
cat $TmpLog >> $LogFil
exit 1
fi
#Realign InDels
realignedFile=$RalDir/realigned.$BamFil.Chr_$Chr.bam
echo "- Realign InDels file using GATK IndelRealigner `date`..." >> $TmpLog
cmd="$JAVA7BIN -Xmx7G -Djava.io.tmpdir=$TmpDir -jar $GATKJAR -T IndelRealigner -R $REF -I $BamLst -targetIntervals $TgtFil -L $Chr -known $INDEL -known $INDEL1KG -o $realignedFile -allowPotentiallyMisencodedQuals"
echo " "$cmd >> $TmpLog
if [[ $AllowMisencoded="true" ]]; then cmd=$cmd" -allowPotentiallyMisencodedQuals"; fi
$cmd
if [[ $? == 1 ]]; then
echo "----------------------------------------------------------------" >> $TmpLog
echo "Realign InDels file using GATK IndelRealigner failed `date`" >> $TmpLog
qstat -j $JOB_ID | grep -E "usage *$SGE_TASK_ID:" >> $TmpLog
cat $TmpLog >> $LogFil
exit 1
fi
echo "----------------------------------------------------------------" >> $TmpLog
#Call next job if chain
if [[ $ChaIn = "chain" ]]; then
#calculate an amount of time to wait based on the chromosome and the current time past the hour
#ensures that even if all the jobs finish at the same time they will each execute the next bit of code at 10 second intervals rather than all at once
Sekunds=`date +%-S`
Minnits=`date +%-M`
Minnits=$((Minnits%4))
Minnits=$((Minnits*60))
Sekunds=$((Sekunds+Minnits))
CHR=$SGE_TASK_ID
GoTime=$((CHR-1))
GoTime=$((GoTime*10))
WaitTime=$((GoTime-Sekunds))
if [[ $WaitTime -lt 0 ]]; then
WaitTime=$((240+WaitTime))
fi
echo "- Check if all realigns are complete..." >> $TmpLog
echo " Min:Sec past hour " `date +%M:%S` >> $TmpLog
echo " Sleeping for "$WaitTime" seconds..." >> $TmpLog
sleep $WaitTime
echo `date` $CHR > $StatFil #send marker to status file
#count markers in status file and if equals 24 call merge job
ralfin=$(ls $StatDir | wc -l)
if [ $ralfin -eq 24 ]; then
echo " All realigns complete at `date`" >> $TmpLog
echo "- Call Base Quality Score Recalibration with GATK `date`:" >> $TmpLog
cmd="qsub -pe smp $NumCores -l $GenBQSRAlloc -N GnBQSR.$JobNm -o stdostde/ -e stdostde/ $EXOMSCR/ExmAln.5.GenerateBQSRTable.sh -i $BamFil -s $Settings -d $RalDir -l $LogFil -c chain"
echo " "$cmd >> $TmpLog
$cmd
else
echo " Realigns Finished at `date`: $ralfin" >> $TmpLog
echo " Exiting..."
grep -vE "^/ifs" $TmpLog > $TmpLog.2
cat $TmpLog.2 > $TmpLog
rm $TmpLog.2
fi
echo "----------------------------------------------------------------" >> $LogFil
fi
#End Log
echo "End Local Realignment of Chromosome $Chr $0:`date`" >> $TmpLog
qstat -j $JOB_ID | grep -E "usage *$SGE_TASK_ID:" >> $TmpLog
echo "===========================================================================================" >> $TmpLog
echo "" >> $TmpLog
cat $TmpLog >> $LogFil
#remove temporary files
if [ $ralfin -eq 24 ]; then
echo HELLO
#rm -r $StatDir
fi
rm -r $TmpLog $TmpDir $TgtFil