-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVALDLinelist.py
455 lines (385 loc) · 23.2 KB
/
VALDLinelist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
"""
This script contains some functions for downloading and manipulating Vienna Atomic Line Database (VALD) formatted
line lists.
This code was meant for use with the VALD Database as of 2/15/21 (http://vald.astro.uu.se/~vald/php/vald.php)
and for "vald3line-BPz-freeformat" as of 3/28/19 (https://www.lupm.in2p3.fr/users/plez/).
Functions:
VALDForm: Fills out the Extract All form on the VALD website ("http://vald.astro.uu.se/"). The VALD website
might not give the full wavelength range of data. For large wavelength ranges, one must fill out multiple forms or
use VALDDownload.
VALDEmail: Uses the Gmail API to grab the newest email from VALD and returns the message along with the
received date. This function requires a credentials.JSON file and a token.pickle file that gives this function
permission to view your Gmail account.
VALDDownload: Fills out the Extract All form on the VALD website ("http://vald.astro.uu.se/") as many
times as needed to extract the desired wavelength range, accesses your Gmail emails from VALD to click the
download links, and saves the un-zipped versions of the files to the desired output folder.
VALDFormat: Updates the 4th element lines within a single line list so that they will work with
Turbospectrum's "vald3line-BPz-freeformat". Use VALDCombineFormat if you also want to combine VALD files.
VALDCombineNoFormat: Appends multiple VALD line lists together while NOT updating the 4th element lines.
VALDCombineFormat: Appends multiple VALD line lists together while also updating the 4th element lines so
that they will work with Turbospectrum's "vald3line-BPz-freeformat".
VALDSplit: Splits a VALD line list file into multiple files of the desired file size. Used for input
into Turbospectrum's "vald3line-BPz-freeformat" which has an input file size limit (~100 mb).
VALDToSpectrum: Converts a VALD formatted line list into a SPECTRUM (https://www.appstate.edu/~grayro/spectrum/spectrum.html)
formatted line list.
"""
import os
import time
import gzip
import datetime
import base64
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pickle
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
def VALDForm(wave_start, wave_end, extraction_format="long", data_retrieval="ftp", linelist_config="custom", comment="",
email="", web_driver="", server="uppsala", show_browser=False):
""" Fills out the Extract All form on the VALD website ("http://vald.astro.uu.se/"). The VALD website
might not give the full wavelength range of data. For large wavelength ranges, one must fill out multiple forms or
use VALDDownload.
This function requires the use of Selenium and a chrome driver
(https://sites.google.com/a/chromium.org/chromedriver/downloads).
:param wave_start: (float) The desired starting wavelength value in angstroms.
:param wave_end: (float) The desired ending wavelength value in angstroms.
:param extraction_format: (str) The desired extraction formatting; Long or Short (default is "long").
:param data_retrieval: (str) The desired data retrieval method; Email or FTP (default is "ftp").
:param linelist_config: (str) The desired line list configuration; Default or Custom (default is custom).
:param comment: (str) An optional comment that can be added to the request (default is "").
:param email: (str) Your registered VALD email address.
:param web_driver: (str) The path to your Chrome driver.
:param server: (str) The desired server; moscow, montpellier, or uppsala (default is "uppsala").
:param show_browser: (bool) Opens up your web browser while running (default is False).
"""
# Find the Chrome webdriver
if show_browser:
driver = webdriver.Chrome(web_driver)
else:
option = webdriver.ChromeOptions()
option.add_argument("headless")
driver = webdriver.Chrome(web_driver, options=option)
# Go to VALD Interface
driver.get("http://vald.astro.uu.se/")
# Choose the server
server = server.lower()
if isinstance(server, str) and (server == "montpellier" or server == "moscow"):
server_dict = {"moscow": "VALD3 Mirror Moscow", "montpellier": "VALD3 Mirror Montpellier"}
driver.find_element_by_link_text(server_dict[server]).click()
# Log in with a registered email address
login = driver.find_element_by_name("user")
login.send_keys(email)
login.send_keys(Keys.RETURN)
# Select the Extract All option and fill out the form
driver.find_element_by_xpath("//input[@value='Extract All']").send_keys(Keys.RETURN)
driver.find_element_by_xpath("//table/tbody/tr[2]/td[2]/input").send_keys(str(wave_start))
driver.find_element_by_xpath("//table/tbody/tr[3]/td[2]/input").send_keys(str(wave_end))
if extraction_format.lower() == "long":
driver.find_element_by_xpath("//table/tbody/tr[5]/td[2]/input").click()
if data_retrieval.lower() == "ftp":
driver.find_element_by_xpath("//table/tbody/tr[7]/td[2]/input").click()
if linelist_config.lower() == "custom":
driver.find_element_by_xpath("//table/tbody/tr[18]/td[2]/input").click()
if comment != "" and isinstance(comment, str):
driver.find_element_by_xpath("//table/tbody/tr[22]/td[2]/input").send_keys(comment)
driver.find_element_by_xpath("//table/tbody/tr[24]/td[1]/input").send_keys(Keys.RETURN)
# Quits the driver
driver.quit()
def VALDEmail(VALDserver="uppsala", display_message=True):
""" Uses the Gmail API to grab the newest email from VALD and returns the message along with the received date.
This function requires a credentials.JSON file and a token.pickle file that gives this function permission
to view your Gmail account.
Used this link to start working with the Gmail API:
https://developers.google.com/gmail/api/quickstart/python
:param VALDserver: (str) The server that the email will be coming from; uppsala, montpellier, or moscow
(default is "uppsala").
:param display_message: (bool) Prints the email message to the console (default is True).
:return: (str/NoneType) The message from the VALD email. If no such email exists, returns None.
"""
server = {"uppsala": "vald@physics.uu.se", "montpellier": "vald@vald.lupm.univ-montp2.fr",
"moscow": "vald3@inasan.ru"}
# Variable creds will store the user access token.
# If no valid token found, we will create one.
creds = None
# The file token.pickle contains the user access token.
# Check if it exists
if os.path.exists('token.pickle'):
# Read the token from the file and store it in the variable creds
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If credentials are not available or are invalid, ask the user to log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', ['https://www.googleapis.com/auth/gmail.readonly'])
creds = flow.run_local_server(port=0)
# Save the access token in token.pickle file for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
# Connect to the Gmail API
service = build('gmail', 'v1', credentials=creds)
# Request the newest VALD message and decode it
result = service.users().messages().list(userId='me', labelIds=['INBOX'], q=server[VALDserver.lower()]).execute()
message = result.get('messages', [])[0]
if not message:
return None
else:
txt = service.users().messages().get(userId='me', id=message['id']).execute()
payload = txt['payload']
date_line = payload['headers'][1]['value']
date = date_line[date_line.index(';') + 1:].lstrip()
parts = payload.get('parts')
# Grab the correct email from an email chain
for part in parts:
try:
data = part['body']['data'].replace("-", "+").replace("_", "/")
decoded_data = date + '\n' + str(base64.b64decode(data), 'utf-8')
if "http://" in decoded_data:
if display_message:
print(decoded_data)
return decoded_data
except:
return None
def VALDDownload(output_folder, wave_start, wave_end, extraction_format="long", data_retrieval="ftp",
linelist_config="custom", email="", server="uppsala", web_driver="", download_folder="", silent=False,
show_browser=False):
""" Fills out the Extract All form on the VALD website ("http://vald.astro.uu.se/") as many times as needed to
extract the desired wavelength range, accesses your Gmail emails from VALD to click the download links, and saves
the un-zipped versions of the files to the desired output folder.
Used this link to start working with the Gmail API:
https://developers.google.com/gmail/api/quickstart/python
:param output_folder: (str) The desired output folder for the unzipped VALD files.
:param wave_start: (float) The desired starting wavelength value in angstroms.
:param wave_end: (float) The desired ending wavelength value in angstroms.
:param extraction_format: (str) The desired extraction formatting; Long or Short (default is "long").
:param data_retrieval: (str) The desired data retrieval method; Email or FTP (default is "ftp").
:param linelist_config: (str) The desired line list configuration; Default or Custom (default is custom).
:param email: (str) Your registered VALD email address.
:param server: (str) The desired server; moscow, montpellier, uppsala, or vienna (default is "uppsala").
:param web_driver: (str) The path to your Chrome driver.
:param download_folder: (str) The path to your download folder.
:param silent: (bool) Will not print lines to the console (default is False).
:param show_browser: (bool) Opens up your web browser while running (default is False).
"""
month_dict = {"Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10,
"Nov": 11, "Dec": 12}
last_wavelength, count = 0, 1
while last_wavelength < wave_end:
# Fills out the Extract All form on VALD website
VALDForm(wave_start, wave_end+5, extraction_format=extraction_format, data_retrieval=data_retrieval,
linelist_config=linelist_config, comment="Range " + str(count), email=email, server=server,
web_driver=web_driver, show_browser=show_browser)
start_date = datetime.datetime.now()
# Finds the VALD download link in my email and downloads the file
wait_time = 0
if not silent:
print("\n---------------------------------------------------------------------\n",
"Waiting for an email(" + str(count) + ") from VALD...")
while True:
message = VALDEmail(server, display_message=False)
if message is not None:
date = message.split("\n")[0].split()
year, month, day = int(date[3]), month_dict[date[2]], int(date[1])
hour, minute, second = int(date[4][:2].lstrip("0")), int(date[4][3:5].lstrip("0")), int(date[4][6:8].lstrip("0"))
received_date = datetime.datetime(year, month, day, hour, minute, second)
if received_date > start_date:
break
time.sleep(60)
wait_time += 60
if wait_time > 1800:
if not silent:
print("\nEmail was not received in time.\nTry a different server.")
break
else:
if not silent:
print("Still waiting for email(" + str(count) + ")...")
if wait_time > 1800:
break
if not silent:
print("Found email(" + str(count) + ") from VALD: \n", message, "\n")
link = message[message.index("http"): message.index("gz")+2]
request_num = link.split(".")[4]
if show_browser:
driver = webdriver.Chrome(web_driver)
else:
option = webdriver.ChromeOptions()
option.add_argument("headless")
driver = webdriver.Chrome(web_driver, options=option)
driver.get(link)
# Unzips the downloaded VALD file
file_not_downloaded = True
if not silent:
print("Waiting for VALD file(" + str(count) + ") to download...")
while file_not_downloaded:
# To make sure that the file is fully downloaded
time.sleep(5)
for file in os.listdir(download_folder):
if request_num in file and "crdownload" not in file:
input_name = os.path.join(download_folder, file)
output_name = os.path.join(output_folder, os.path.splitext(file)[0] + "_" + str(count) + ".lst")
with gzip.open(input_name, mode="rt") as fin:
out = fin.read()
with open(output_name, "w") as fout:
fout.write(out)
os.remove(input_name)
file_not_downloaded = False
break
if not silent:
print("Still waiting for file(" + str(count) + ")...")
if not silent:
print("VALD file(" + str(count) + ") downloaded.\n")
# Finds the last wavelength value
with open(output_name, "r") as f:
lines = f.readlines()
for line in reversed(lines):
if " References:" in line:
last_wavelength = wave_start = float(lines[lines.index(line) - 5].split()[2][:-1])
break
if not silent:
print("Last wavelength value for file(" + str(count) + "): " + str(last_wavelength))
count += 1
driver.quit()
def VALDFormat(input_file, output_file, silent=True):
""" Updates the 4th element lines within a single line list so that they will work
with Turbospectrum's "vald3line-BPz-freeformat". Use VALDCombineFormat if you also want to combine VALD files.
:param input_file: (str) The input file name.
:param output_file: (str) The output file name.
:param silent: (bool) Will not print lines to the console (default is True).
"""
stop_string = "* oscillator strengths"
with open(input_file, "r") as fin:
with open(output_file, "a") as fout:
for line in fin:
if stop_string in line:
break
elif line[0] == "'":
if ":" in line:
# Finds the element symbol string at the end of the 4th element line.
chars = line[line.rfind(" "):-2]
# Finds any atomic masses in parenthesis within the element symbol strings.
r = [i for i in range(len(chars)) if chars[i] == ")"]
l = [j for j in range(len(chars)) if chars[j] == "("]
# Removes the atomic masses from the element symbol strings.
if len(l) > 0:
chars = "".join([chars[r[-1]+1:] if k == range(len(l))[-1] else chars[r[k]+1:l[k+1]]
for k in range(len(l))])
file_line = "'_" + " " * 29 + chars + " " * 5 + "'\n"
else:
file_line = line
if not silent:
print(file_line)
fout.write(file_line)
def VALDCombineNoFormat(input_folder, output_file, silent=True):
""" Appends multiple VALD line lists together while NOT updating the 4th element lines.
:param input_folder: (str) The input folder; Must only contain VALD line list files!
:param output_file: (str) The desired output file name.
:param silent: (bool) Will not print lines to the console (default is True).
"""
stop_string = "* oscillator strengths"
for input_file in [file for file in os.listdir(input_folder) if os.path.isfile(os.path.join(input_folder, file))]:
with open(input_folder + "\\" + input_file, "r") as fin:
with open(output_file, "a") as fout:
for line in fin:
if stop_string in line:
break
elif line[0] == "'":
fout.write(line)
if not silent:
print(line)
def VALDCombineFormat(input_folder, output_file, silent=True):
""" Appends multiple VALD line lists together while also updating the 4th element lines so that they will work
with Turbospectrum's "vald3line-BPz-freeformat".
:param input_folder: (str) The input folder; Must only contain VALD line list files!
:param output_file: (str) The desired output file name.
:param silent: (bool) Will not print lines to the console (default is True).
"""
if os.path.exists(output_file):
os.remove(output_file)
for input_file in [file for file in os.listdir(input_folder) if os.path.isfile(os.path.join(input_folder, file))]:
VALDFormat(input_folder + "\\" + input_file, output_file, silent=silent)
def VALDSplit(input_file, max_size=(100, "mb"), silent=True):
""" Splits a VALD line list file into multiple files of roughly the desired file size (never larger).
Used for input into Turbospectrum's "vald3line-BPz-freeformat" which has an input file size limit (~100 mb).
:param input_file: (str) The name of the VALD line list.
:param max_size: (tuple(int, str)) The desired maximum size of the output files with units
of "b", "kb", "mb", or "gb"; (the value, the unit) (default is (150, "mb")).
:param silent: (bool) Will not print lines to the console; recommended for larger files (default is True).
"""
byte_dict = {"b": 1, "kb": 1e3, "mb": 1e6, "gb": 1e9}
max_size = max_size[0]*byte_dict[max_size[1].lower()]
with open(input_file, "r") as fin:
lines, file_num = fin.readlines(), 1
while lines:
with open(os.path.splitext(input_file)[0] + "_" + str(file_num) + ".cut", "w", encoding="utf-8") as fout:
length = 0
for index, line in enumerate(lines, 1):
length += len(line) + 1 # number of characters per line including "\n"
fout.write(line)
if not silent:
print(line.rstrip("\n"))
if max_size - length < 500 and line[1] == "_":
new_start = index
break
lines = lines[new_start:]
file_num += 1
def VALDToSpectrum(input_file, output_file, eV=True, isotope=False, silent=False):
""" Converts a VALD formatted line list into a SPECTRUM (https://www.appstate.edu/~grayro/spectrum/spectrum.html)
formatted line list.
SPECTRUM line list format:
wavelength, species_code, mass_number (for isotope mode), e_low, e_high, loggf, fudge_factor, transition, source
:param input_file: (str) The VALD formatted file.
:param output_file: (str) The name of the SPECTRUM formatted output file.
:param eV: (bool) True if the energy unit is eV, False for cm**-1 (default is True).
:param isotope: (bool) True if you want to add the mass_number column (default is False).
:param silent: (bool) Will not print lines to the console (default is False).
"""
elem_dict = {'H': '1', 'He': '2', 'Li': '3', 'Be': '4', 'B': '5', 'C': '6', 'N': '7', 'O': '8', 'F': '9',
'Ne': '10', 'Na': '11', 'Mg': '12', 'Al': '13', 'Si': '14', 'P': '15', 'S': '16', 'Cl': '17',
'Ar': '18', 'K': '19', 'Ca': '20', 'Sc': '21', 'Ti': '22', 'V': '23', 'Cr': '24', 'Mn': '25',
'Fe': '26', 'Co': '27', 'Ni': '28', 'Cu': '29', 'Zn': '30', 'Ga': '31', 'Ge': '32', 'As': '33',
'Se': '34', 'Br': '35', 'Kr': '36', 'Rb': '37', 'Sr': '38', 'Y': '39', 'Zr': '40', 'Nb': '41',
'Mo': '42', 'Tc': '43', 'Ru': '44', 'Rh': '45', 'Pd': '46', 'Ag': '47', 'Cd': '48', 'In': '49',
'Sn': '50', 'Sb': '51', 'Te': '52', 'I': '53', 'Xe': '54', 'Cs': '55', 'Ba': '56', 'La': '57',
'Ce': '58', 'Pr': '59', 'Nd': '60', 'Pm': '61', 'Sm': '62', 'Eu': '63', 'Gd': '64', 'Tb': '65',
'Dy': '66', 'Ho': '67', 'Er': '68', 'Tm': '69', 'Yb': '70', 'Lu': '71', 'Hf': '72', 'Ta': '73',
'W': '74', 'Re': '75', 'Os': '76', 'Ir': '77', 'Pt': '78', 'Au': '79', 'Hg': '80', 'Tl': '81',
'Pb': '82', 'Bi': '83', 'Po': '84', 'At': '85', 'Rn': '86', 'Fr': '87', 'Ra': '88', 'Ac': '89',
'Th': '90', 'Pa': '91', 'U': '92'}
ion_dict = {'1': '.0', '2': '.1', '3': '.2', '4': '.3', '5': '.4', '6': '0.5', '7': '0.6', '8': '0.7',
'9': '0.8', '10': '0.9'}
mol_dict = {'MgH': '112.0', 'TiO': '822.0', 'C2': '606.0', 'H2': '101.0', 'CN': '607.0', 'SiO': '814.0',
'CH': '106.0', 'OH': '608.0', 'SiH': '114'}
with open(input_file, "r") as fin:
with open(output_file, "w") as fout:
for line in fin:
line = line.split(",")
if isinstance(line, list) and len(line) >= 13:
symbol = line[0][1:line[0].index(" ")]
element = elem_dict.get(symbol)
if element is None:
code = mol_dict.get(symbol)
else:
code = element + ion_dict.get(line[0][line[0].index(" ")+1:-1])
wavelength = "{:<12}".format(line[1].strip())
species_code = "{:<8}".format(code)
if isotope:
mass_number = "{:<4}".format("0") # for isotope mode
else:
mass_number = ""
if eV:
e_low = "{:<12}".format(int(float(line[3].strip())/1.23981e-4))
e_high = "{:<12}".format(int(float(line[5].strip())/1.23981e-4))
else:
e_low = "{:<12}".format(int(line[3].strip()))
e_high = "{:<12}".format(int(line[5].strip()))
loggf = "{:<12}".format(line[2])
fudge = "{:<10}".format("1.000")
transition = "{:<4}".format("99")
source = "{:<8}".format("VALD")
out = wavelength + species_code + mass_number + e_low + e_high + loggf + fudge + transition + source
fout.write(out + "\n")
if not silent:
print(out)
elif line[0] == "*":
break