-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmajka2wls.py
53 lines (41 loc) · 1.2 KB
/
majka2wls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# Generate cstenten.wls from cstenten.out (majka output)
import argparse
def has_forbidden_character(string):
try:
string.encode('iso-8859-2')
except UnicodeEncodeError:
return True
return False
def parse_line(text: str) -> str:
"""Parses one line into a word."""
text = text.rstrip()
if text[0] == "+":
return text[1:]
if text[0] == "@" or text[0] == "!" or text[0] == "$":
w = text.split("\t")[1]
if "#" in w:
return w.split("#")[0].rstrip()
else:
return w
raise ValueError("Invalid input: "+text)
parser = argparse.ArgumentParser()
parser.add_argument('outf', type=str,
help='wls output file')
parser.add_argument('inpf', type=str,
help='majka input file')
args = parser.parse_args()
words = set()
with open(args.inpf) as inpf:
for line in inpf:
words.add(parse_line(line))
assert len(words) > 0, "no words found"
try:
words.remove(None)
except KeyError:
pass
words_ordered = list(words)
words_ordered.sort()
assert len(words) == len(words_ordered)
with open(args.outf, "w") as outf:
for word in words_ordered:
outf.write(word+"\n")