-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathvislcg3-evaluate.py
131 lines (122 loc) · 3.53 KB
/
vislcg3-evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
Todo:
- Subreadings
- Per rule statistics
"""
import sys, re
r_wordform = re.compile('^"<[^>]+>"$')
r_baseform = re.compile(';?\t"[^$]+" ')
r_rule = re.compile('(REMOVE|SELECT):[^$]+$')
if len(sys.argv) < 3:
print('vislcg3-evaluate.py <src> <ref> <tst>')
sys.exit(-1)
f_src = open(sys.argv[1])
f_ref = open(sys.argv[2])
f_tst = open(sys.argv[3])
tokens = {}
decisions = {}
idx = 0
n_analyses_src = 0
n_analyses_ref = 0
n_analyses_tst = 0
for line in f_src.readlines():
if r_wordform.match(line):
idx += 1
wordform = line[2:-3]
tokens[idx] = (wordform, {})
if r_baseform.match(line):
n_analyses_src += 1
tokens[idx][1][line.strip()] = False
idx = 0
for line in f_ref.readlines():
if r_wordform.match(line):
idx += 1
wordform = line[2:-3]
if tokens[idx][0] != wordform:
print('Alignment broken,\n', idx, line)
sys.exit(-1)
if r_baseform.match(line):
n_analyses_ref += 1
tokens[idx][1][line.strip()] = True
idx = 0
n_removed = 0
n_fn = 0 # Bad rule non-application
n_fp = 0 # Bad rule application
n_tn = 0 # Good rule non-application
n_tp = 0 # Good rule application
errors = []
for line in f_tst.readlines():
if r_wordform.match(line):
idx += 1
wordform = line[2:-3]
if tokens[idx][0] != wordform:
print('Alignment broken,\n', idx, line)
sys.exit(-1)
if r_baseform.match(line):
analysis = line.strip()
if line[0] == ';':
n_removed += 1
rule_match = r_rule.search(analysis)
analysis = analysis[1:rule_match.start()].strip()
rules = rule_match.group().strip()
for rule in rules.split(' '):
if rule not in decisions:
decisions[rule] = [0,0,0,0] # n_tp, n_fp, n_tn, n_fn
if analysis in tokens[idx][1]:
# If the rule has deleted a reading that is found in the reference
if tokens[idx][1][analysis] == True:
n_fp += 1
errors.append((idx, analysis, tokens[idx][1], rules))
for rule in rules.split(' '):
decisions[rule][1] += 1
# If the rule has deleted a reading that is not found in the reference
elif tokens[idx][1][analysis] == False:
n_tp += 1
for rule in rules.split(' '):
decisions[rule][0] += 1
else:
n_analyses_tst += 1
if analysis in tokens[idx][1]:
# If a reading is left in that is not found in the reference
if tokens[idx][1][analysis] == False:
n_fn += 1
errors.append((idx, analysis, tokens[idx][1], ''))
# If a reading is left that is the one found in the reference
elif tokens[idx][1][analysis] == True:
n_tn += 1
print('Tokens:')
print()
for token in tokens:
print(token,'|', tokens[token], file=sys.stderr)
print()
print('Errors:')
print()
for error in errors:
print(error[0], '|', error[1], '|', error[2])
print()
print('Decisions:')
print()
print('\t\tTP\tFP\tTN\tFN')
for rule in decisions:
print(rule, '\t' + '\t'.join([str(i) for i in decisions[rule]]))
print()
print('--------------------------------------------------------------------------------')
print('Input analyses:', n_analyses_src)
print('Reference analyses:', n_analyses_ref)
print('Output analyses:', n_analyses_tst)
print()
print('Input ambiguity:', n_analyses_src / len(tokens))
print('Reference ambiguity:', n_analyses_ref / len(tokens))
print('Output ambiguity:', n_analyses_tst / len(tokens))
print()
print('False positives:', n_fp)
print('False negatives:', n_fn)
print('True positives:', n_tp)
print('True negatives:', n_tn)
print()
precision = n_tp / (n_tp + n_fp)
recall = n_tp / (n_tp + n_fn)
print('Precision:', precision)
print('Recall:', recall)
print('F1: %.4f' % (2 * ((precision * recall) / (precision + recall))))
print()