-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcif2data.py
185 lines (179 loc) · 7.89 KB
/
cif2data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/python3
import seekpath
import glob
import pymatgen
from pymatgen.analysis.structure_matcher import StructureMatcher
import numpy
import re
import seekpath.hpkot
import CifFile
import sys
def main():
#
args = sys.argv
with open(str(args[1]), "r") as f:
files = f.readlines()
matcher = StructureMatcher(scale=False)
#
# Read All files specified as command-line arguments
#
for input_file in files:
input_file = input_file.strip("\n")
print("Reading "+input_file+" ... ", end="")
#
# PyMatGen structure from CIF file
#
try:
structure = pymatgen.core.Structure.from_file(input_file)
except ValueError:
print("Invalid structure.")
continue
except AssertionError:
print("Invalid data.")
continue
#
# Remove oxidation state. Excepting "D" (deuterium)
#
try:
structure.remove_oxidation_states()
except ValueError:
print("Invalid element.")
continue
#
# Treatment for implicit hydrogen for CIF file
#
pressure = 100.0
temperature = 300.0
#
if input_file.split(".")[-1] == "cif":
cf = CifFile.ReadCif(input_file)
attached_hydrogens = cf.get_all("_atom_site_attached_hydrogens")
if attached_hydrogens != ['0'] and attached_hydrogens != [] and attached_hydrogens != ['.']:
print("Implicit hydrogen.", attached_hydrogens)
continue
try:
formula_input = str(cf.get_all("_chemical_formula_sum")[0]).split(" ")
except IndexError:
print("No chemical formula.")
continue
formula_data = str(structure.composition.hill_formula).split(" ")
try:
formula_input_dict = {re.sub("[^a-zA-Z]", "", chem):
1.0 if re.sub("[a-zA-Z]", "", chem) == "" else float(re.sub("[a-zA-Z]", "", chem))
for chem in formula_input}
except ValueError:
print("Invalid chemical formula string.")
continue
formula_data_dict = {re.sub("[^a-zA-Z]", "", chem):
1.0 if re.sub("[a-zA-Z]", "", chem) == "" else float(re.sub("[a-zA-Z]", "", chem))
for chem in formula_data}
formula_input = re.sub(" ", "", str(cf.get_all("_chemical_formula_sum")[0]))
formula_data = re.sub(" ", "", str(structure.composition.hill_formula))
if len(formula_data_dict) != len(formula_input_dict):
print("Number of elements are invalid. ", formula_input, formula_data)
continue
else:
elem_switch = -1
ratio0 = 0.0
for elem in formula_input_dict:
if elem in formula_data_dict:
ratio = formula_input_dict[elem] / formula_data_dict[elem]
if elem_switch == -1:
ratio0 = ratio
elem_switch = 0
else:
if abs(ratio - ratio0) > 0.001:
elem_switch = 1
break
else:
elem_switch = 2
break
if elem_switch == 1:
print("Chemical formula does not match.", formula_input, formula_data)
continue
elif elem_switch == 2:
print("Elements does not match", formula_input, formula_data)
continue
#
# Temperature and pressure
#
cell_measurement_pressure = cf.get_all("_cell_measurement_pressure")
diffrn_ambient_pressure = cf.get_all("_diffrn_ambient_pressure")
cell_measurement_temperature = cf.get_all("_cell_measurement_temperature")
diffrn_ambient_temperature = cf.get_all("_diffrn_ambient_temperature")
if cell_measurement_pressure:
pressure = float(cell_measurement_pressure[0].split("(")[0])
if diffrn_ambient_pressure:
pressure = float(diffrn_ambient_pressure[0].split("(")[0])
if cell_measurement_temperature:
temperature = float(cell_measurement_temperature[0].split("(")[0])
if diffrn_ambient_temperature:
temperature = float(diffrn_ambient_temperature[0].split("(")[0])
#
# Refine 3-folded Wyckoff position
#
frac_coord2 = numpy.array(structure.frac_coords)
for ipos in range(len(frac_coord2)):
for iaxis in range(3):
coord3 = frac_coord2[ipos, iaxis] * 6.0
if abs(round(coord3) - coord3) < 0.001:
frac_coord2[ipos, iaxis] = float(round(coord3)) / 6.0
#
# Disordered structure raises AttributeError. So it is skipped.
#
try:
skp = seekpath.get_explicit_k_path((structure.lattice.matrix, frac_coord2,
[pymatgen.core.Element(str(spc)).number for spc in structure.species]))
structure2 = pymatgen.core.Structure(skp["primitive_lattice"],
skp["primitive_types"], skp["primitive_positions"])
if len(skp["primitive_types"]) != 1:
structure2.merge_sites(tol=0.01, mode="average")
except AttributeError:
print("Fractional occupancy, may be disordered.")
continue
except seekpath.hpkot.SymmetryDetectionError:
print("Except seekpath.hpkot.SymmetryDetection: Spglib could not detect the symmetry of the system")
continue
except ValueError:
print("Problem creating primitive cell, I found the following group of atoms with len != 1: (0, 5)")
continue
#
output_file = re.sub(" ", "", str(structure2.composition.alphabetical_formula))
#
# This structure is the same or not as the known structures
#
known = False
print(glob.glob(output_file + "-*.xsf"), " ... ", end="")
for known_file in glob.glob(output_file + "-*.xsf"):
known_structure = pymatgen.core.Structure.from_file(known_file)
if matcher.fit(structure2, known_structure):
print("Same as " + known_file)
known = True
break
#
# If it is new structure, save it as XSF
#
if not known:
#
if input_file.split(".")[-1] == "xsf":
output_file += "-" + re.sub("[^0-9]", "", input_file.split("-")[-1]) + ".xsf"
else:
output_file += "-" + re.sub("[^0-9]", "", input_file.split("/")[-1])+".xsf"
print("Write to "+output_file)
structure2.to(fmt="xsf", filename=output_file)
with open(output_file, 'w') as f:
print("CRYSTAL", file=f)
print("# temperature = %f" % temperature, file=f)
print("# pressure = %f" % pressure, file=f)
print("PRIMVEC", file=f)
for i in range(3):
print("%.14f %.14f %.14f" % (structure.lattice.matrix[i][0],
structure.lattice.matrix[i][1],
structure.lattice.matrix[i][2]), file=f)
print("PRIMCOORD", file=f)
print("%d 1" % len(structure.cart_coords), file=f)
for site, coord in zip(structure, structure.cart_coords):
sp = site.specie.Z
x, y, z = coord
print("%d %20.14f %20.14f %20.14f" % (sp, x, y, z), file=f)
main()