forked from agoga/nreshk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpipeline.py
330 lines (261 loc) · 12 KB
/
pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
#This function should not be run every time the pipeline is run.
#It only needs to be run when you have added new flat files to the data
#A two dimensional dictionary is created for quick lookup of flat file
#names by their site and MJD's and stored in a pickle file to be loaded
#in every time the pipeline is run
def create_flat_dict_file(flatDir,fileName):
import pickle
import pprint
import os
import astropy.io.fits
#find every flat file and put into dictionary
flatDict = dict()
flatDict.update({'cpt':dict()})
flatDict.update({'tlv':dict()})
flatDict.update({'lsc':dict()})
flatDict.update({'elp':dict()})
#this loops through all the flat_....fits files in the flats directory
#and creates the 2 dimensional dictionary
for path, subdirs, files in os.walk(flatDir):
for name in files:
if name.startswith("flat_") and name.endswith(".fits"):
flHDu1 = astropy.io.fits.open(os.path.join(path,name))
header = flHDu1[0].header
mjd = float(header['MJD-OBS'])
site = header['SITEID']
siteDict = flatDict[site]
siteDict.update({mjd:os.path.join(path,name)})
f = open(flatDir+fileName,"wb")
pickle.dump(flatDict,f)
f.close()
#This is the NRES specific function goes through the current folder(which should be one observation)
#and loads, from fits files, the wavelength grid, spectra, header information
#This function is only needed when there is both old and new data from NRES and we don't know which
#is which. When all the data is in the new format then half this function can be removed.
def load_folder_for_pipeline(path,dirs,curFiles):
import os
import astropy.io.fits
waveGrid = []
spec = []
header = []
fileName = ''#spec file name
oldFormat = False
#see if the folder contains a -noflat or -wave file
#There may be a faster way to determine if this folder is old or new but I don't see a safer way
for file in curFiles:
if file.endswith("-noflat.fits") or file.endswith("-wave.fits"):
oldFormat = True
break
#we've now looped through all the files if any are old then we try old format
if oldFormat:
#print('old format')
#load the data in the old format
for file in curFiles:
if file.endswith("-noflat.fits"):
fileName = file
specHDu = astropy.io.fits.open(os.path.join(path,dirs,file))
elif file.endswith("-wave.fits"):
wvHDu = astropy.io.fits.open(os.path.join(path,dirs,file))
spec = specHDu[0].data[0]
header = specHDu[0].header
#need the length here because sometimes the wave grid is in the first element
#of this array but most of the time it's not
if len(wvHDu)>1 and type(wvHDu[1]) == astropy.io.fits.hdu.table.BinTableHDU:
#print('wavehdu')
#print(wvHDu.info())
#print(wvHDu[1].data.dtype)
#print(type(wvHDu[1].data))
#print('done')
#print('stuff ' + str(i))
#print((wvHDu[1].data)[0])
#there are two different names for this data, it's mostly in the first name but
#sometimes in the second
try:
waveGrid = wvHDu[1].data['WavelenStar'][0]
except:
waveGrid=wvHDu[1].data['Wavelength'][0]
elif len(wvHDu)>1:
waveGrid = (wvHDu[1].data)[0]
else:
#print(wvHDu[0].data.dtype)
waveGrid = (wvHDu[0].data)[0]
specHDu.close()
wvHDu.close()
else: #ELSE IT'S NEW FORMAT and super easy
for file in curFiles:
if file.endswith(".fits"):
fileName = file
oHDu = astropy.io.fits.open(os.path.join(path,dirs,file))
waveGrid = oHDu[7].data
spec = oHDu[1].data
header = oHDu[0].header
oHDu.close()
#end big if
if len(waveGrid) == 0 or len(spec) == 0 or len(header) == 0:
print('BIG PROBLEM IN load_folder_for_pipeline')
return waveGrid, spec, header, fileName, oldFormat
#Takes in the data array with reference spectra and set name, reference spectra and setname used
#for printing.
#This function goes through the data array for each observation and finds any other obs on that day
#It will combined these observations into one targOLap and run the rest of the pipeline on the
#averaged data to give a better signal to noise for these observations.
#the output is a longer data array to be used later for printing the final
#This is how the data structure is created and maintained:
#data[0].append(mjd)
#data[1].append(shk)
#data[2].append(tuple((header,False)))
#data[3].append(tuple((lamGrid,correlation[0],targOlapf)))
#E
#correlation[0] is the delta lamda to place spectra in lab frame and lamGrid is the lab frame
def sum_daily_data(setName,data,labSpec):
from scipy import interpolate
import numpy as np
from calc_shk import calc_shk
from helpers import mkdir_p
from astropy.time import Time
from helpers import pdf_from_data
import helpers as h
rv = 0
done = []#array to hold which MJD are done
#make a clean copy
dailyData = np.asarray(data)
#IF YOU DONT INCLUDE [:] THEN THIS IS A REFERENCE AND LIFE WILL STINK WHEN SORTING
sortedList = data[0][:]
datesList = dailyData[0]
for i in range(len(datesList)):
curD = dailyData[:,i]
header = curD[2][0]
site = header['SITEID']
#dont double up if we've alrady DONE this day
curMjd = curD[0]
if(curMjd in done):
continue
#find the closest MJD's to the current
sortedList.sort(key=lambda x: abs(x - curMjd))
#uncomment this if you don't want nights with only 1 obs to have circles
#if there are no observations on this day(using index 1 because 0 is the same obs)
#if abs(curMjd - sortedList[1]) >= 1:
#continue
#grab the current day's values
curTuple = curD[3]
curLamGrid=curTuple[0]
curTargOlapf=curTuple[2]
curDLam=curTuple[1]
#need to interpolate because all the observations have slightly different lamda grids
#put them all onto the referece spectra's grid held in curLamGrid
interpfunc = interpolate.interp1d(curLamGrid-curDLam, curTargOlapf, kind='linear',fill_value='extrapolate')
curTarg=interpfunc(curLamGrid)
combinedTarg = curTarg
done.append(curMjd)
for i in range(len(sortedList)):
same = sortedList[i]
#don't do the current date
if same == curMjd:
continue
#the list is sorted so once we reach a point where the obs is more than
#a day apart we're done
if abs(curMjd - same) >= 1:
break
#print(str(curMjd) + " also has " + str(same))
#create an array of all the spectra that have been done, i..e these two are now done
done.append(same)
#grab the same day's data(will happen for multiple obs
sameI = np.argmin(abs(datesList-same))
sameTuple = dailyData[:,sameI][3]
sameLamGrid=sameTuple[0]
sameTargOlapf=sameTuple[2]
sameDLam=sameTuple[1]
#The issue is the two targetOlaps are on different grids.
#Interp them on the same grid then add
interpfunc = interpolate.interp1d(sameLamGrid-sameDLam, sameTargOlapf, kind='linear',fill_value='extrapolate')
sameTarg=interpfunc(curLamGrid)
#this is the big deal, increases signal to noise for each days spectra
combinedTarg = combinedTarg+sameTarg
#end combining loop
#printing stuff
first = str(curMjd).split('.')[0]
#lookup this star's teff
tempEff = h.tEffLookup[setName.strip('/')]
#find SHK with new offset to lamda grid
shkRet = calc_shk(curLamGrid, combinedTarg, rv, teff=tempEff)
shk = shkRet[0]
windows = shkRet[1]
mkdir_p("output/"+setName+"/"+first+'/')
np.savez("output/"+setName+"/"+first+"/combined_data", targOlapf=combinedTarg,flatOlap=combinedTarg, lamGrid=curLamGrid, adjLamGrid=curLamGrid,windows=windows)
decimalYr = Time(curMjd,format='mjd')
decimalYr.format = 'decimalyear'
title = 'NRES spectra, ' + site +', '+header['DATE-OBS']+' ('+ '{:.6}'.format(decimalYr.value) +'), S='+'{:.4}'.format(shk)
pdf_from_data(curLamGrid, labSpec,curLamGrid, combinedTarg,windows,title, "output/"+setName+"/"+first+"/","combined",width=.3)
#print("c " + str(type(curMjd))+ "s " +str(type(shk)))
#print("adding " + str(curMjd) + " with shk " + str(shk))
data[0].append(curMjd)
data[1].append(shk)
data[2].append(tuple((header,True)))
data[3].append(tuple((curLamGrid,0,combinedTarg)))
return data
#this function plots the final time series of SHK values for each star
#and is the reason for the lame format of the data array.
#data format
#first array is list of mjds
#data[0].append(mjd)
#second array is list of shks
#data[1].append(shk)
#third array is header data and bool saying whether it is a single observation(false) or average
#data[2].append(tuple((header,False)))
#fourth array is data for the nightly observation function: grid, grid offset, and spectra
#data[3].append(tuple((lamGrid,correlation[0],targOlapf)))
def plot_daily_data_timeseries(data,setName,bad):
import numpy as np
import matplotlib.patches as mpatches
from matplotlib import pyplot as plt
from astropy.time import Time
mjdArray = np.asarray(data[0])
shkValArray = np.asarray(data[1])
headerArray = np.asarray(data[2])
fig, ax = plt.subplots(figsize=(12,6))
pltStr = []
for h in headerArray:
#h[0] is header
#h[1] is bool saying wheather it's a average observation or single
s = h[0]['SITEID']
tStr =''
if s == 'lsc':
tStr +='b'
elif s == 'cpt':
tStr +='g'
elif s== 'elp':
tStr +='r'
elif s=='tlv':
tStr +='k'
if h[1] == True:
tStr += 'o'
else:
tStr += '^'
pltStr.append(tStr)
t= Time(mjdArray, format='mjd')
t.format = 'decimalyear'
for i in range(len(pltStr)):
mark = pltStr[i][1]
col = pltStr[i][0]
size = 150
opac = .3
if mark == 'o':
size = 100
opac = 1
if mjdArray[i] not in bad:
plt.scatter(t[i].value,shkValArray[i],marker=mark,c=col, s=size, alpha = opac)
else:
plt.scatter(t[i].value,shkValArray[i],marker='x',c=col, s=100, alpha = opac)
rl = mpatches.Patch(color='red', label='McDonald Obs\'')
bl = mpatches.Patch(color='blue', label='Cerro Tololo Interamerican Obs\'')
gl = mpatches.Patch(color='green', label='South African Astro Obs\'')
kl = mpatches.Patch(color='black', label='Wise Obs\'')
plt.legend(handles=[rl,bl,gl,kl],prop={'size': 10}, loc=4)
plt.style.use('classic')
ax.ticklabel_format(useOffset=False)
plt.title('HD '+setName+' magnetic activity time series')
plt.xlabel('Time(years)')
plt.ylabel('Unadjusted S-index')
plt.savefig('output/'+setName+'/'+setName+'_shk_time_series.pdf')
plt.show()
plt.close()