-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvariable_unfolding.py
154 lines (118 loc) · 5.21 KB
/
variable_unfolding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import ROOT
import numpy as np
from qunfold.root2numpy import TH2_to_numpy, TH1_to_numpy, TVector_to_numpy
from qunfold.utils import normalize_response, compute_chi2
from qunfold import QPlotter
from utils import *
import pylab as plt
from unfolding_functions import roounfolder, qunfolder
##################CONFIGS#####################
# Pick a variable (ex. c_thetap or ttbar_mass)
variable = "ttbar_mass"
y_axis_min=0
y_axis_max=0.14
#######################################
# Load RooUnfold library
if ROOT.gSystem.Load("libRooUnfold"):
raise ImportError("RooUnfold was not loaded successfully!")
# Open input ROOT file
root_file = ROOT.TFile.Open("ttbar_qunfold.root")
# Get binning for histograms
overflow = True
binning = get_binning(root_file=root_file, variable=variable, overflow=overflow)
# Read particle-level(=measured) histograms
dir = "particle"
th1_measured = root_file.Get(f"{dir}/{variable}")
th1_fake = root_file.Get(f"{dir}/{variable}_fake")
d = TH1_to_numpy(th1_measured, overflow=overflow)
fake = TH1_to_numpy(th1_fake, overflow=overflow)
# Read parton-level(=truth) histograms
dir = "parton"
th1_truth = root_file.Get(f"{dir}/{variable}_parton")
th1_miss = root_file.Get(f"{dir}/{variable}_miss")
t = TH1_to_numpy(th1_truth, overflow=overflow)
miss = TH1_to_numpy(th1_miss, overflow=overflow)
# Read Monte Carlo migration matrix (measured X truth)
dir = "migration"
th2_migration = root_file.Get(f"{dir}/{variable}_migration")
M = TH2_to_numpy(th2_migration, overflow=overflow)
# Get Monte Carlo reco and truth histograms
th1_proj_reco_mc = th2_migration.ProjectionX()
th1_reco_mc = th1_proj_reco_mc.Clone("th1_reco_mc")
th1_reco_mc.Add(th1_fake)
th1_proj_truth_mc = th2_migration.ProjectionY()
th1_truth_mc = th1_proj_truth_mc.Clone("th1_truth_mc")
th1_truth_mc.Add(th1_miss)
mu = TH1_to_numpy(th1_reco_mc, overflow=overflow)
x = TH1_to_numpy(th1_truth_mc, overflow=overflow)
# Normalize migration matrix in numpy
proj_truth_mc = TH1_to_numpy(th1_proj_truth_mc)
M = normalize_response(response=M, truth_mc=proj_truth_mc)
# Create response matrix object in RooUnfold
roo_response = ROOT.RooUnfoldResponse(th1_reco_mc, th1_truth_mc, th2_migration)
# Estimate purity from Monte Carlo data
purity = estimate_purity(reco=mu, fake=fake)
roo_purity = TVector_to_numpy(roo_response.Vpurity())
# Estimate efficiency from Monte Carlo data
efficiency = estimate_efficiency(truth=x, miss=miss)
roo_efficiency = TVector_to_numpy(roo_response.Vefficiency())
# Sanity checks
assert np.allclose(np.sum(M, axis=0), 1)
# assert np.allclose(purity[1:-1], roo_purity)
# assert np.allclose(efficiency[1:-1], roo_efficiency)
assert np.allclose(mu, M @ (x - miss) + fake)
assert np.allclose(mu, M @ (x * efficiency) / purity)
# Run Matrix Inversion unfolding in numpy
#np_unfolded = (1 / efficiency) * (np.linalg.inv(M) @ (d * purity))
#Passing from migration to response and correcting fake events
resp = M * efficiency
meas = d - fake
#run RooUnfold and QUnfold algorithm
sol_mi, cov_mi = roounfolder("MI", roo_response, th1_measured, th1_fake)
sol_ibu, cov_ibu = roounfolder("IBU", roo_response, th1_measured, th1_fake)
sol_grb, cov_grb = qunfolder("GRB", resp, d, fake, binning)
#sol_sa, cov_sa = qunfolder("SA", resp, d, fake, binning, num_reads = 100)
#calculating error and chi square
err_mi = np.sqrt(np.diag(cov_mi))
chi2_mi = compute_chi2(observed=sol_mi, expected=t)
err_ibu= np.sqrt(np.diag(cov_ibu))
chi2_ibu = compute_chi2(observed=sol_ibu, expected=t)
err_grb= np.sqrt(np.diag(cov_grb))[1:-1]
chi2_grb = compute_chi2(observed=sol_grb[1:-1], expected=t[1:-1])
#err_sa = np.sqrt(np.diag(cov_sa))
#chi2_sa= compute_chi2(observed=sol_sa, expected=t)
#Plot just one method and save the response
qplotter = QPlotter(
response=M,
measured=d,
truth=t,
unfolded=sol_grb,
covariance=cov_grb,
binning=binning,
method="GRB",
norm=True,
)
qplotter.save_response(f"{variable}_response.pdf")
#qplotter.show_histograms()
#plotting and comparing the results
fig = plt.figure(figsize=(12, 9))
gs = fig.add_gridspec(nrows=2, ncols=1, height_ratios=[3, 1], hspace=0)
ax1 = fig.add_subplot(gs[0])
ax2 = fig.add_subplot(gs[1], sharex=ax1)
QPlotter.histogram_plot(ax=ax1, xedges=binning[1:-1], hist=t[1:-1], label="Truth", norm=True)
QPlotter.histogram_plot(ax=ax1, xedges=binning[1:-1], hist=d[1:-1], label="Measured", norm=True)
binning = binning[1:-1]
xlims = (binning[1], binning[-2])
widths = np.diff(binning)
xmid = binning[:-1] + 0.5 * widths
ax1.set_ybound(y_axis_min,y_axis_max)
#Add a QPlotter.errorbar_plot for each unfolding method
QPlotter.errorbar_plot(ax=ax1, xmid=xmid, hist=sol_grb[1:-1], err=err_grb, xlims=xlims, label="GRB", chi2=chi2_grb, norm=True)
QPlotter.errorbar_plot(ax=ax1, xmid=xmid, hist=sol_mi[1:-1], err=err_mi, xlims=xlims, label="MI", chi2=chi2_mi, norm=True)
QPlotter.errorbar_plot(ax=ax1, xmid=xmid, hist=sol_ibu[1:-1], err=err_ibu, xlims=xlims, label="IBU", chi2=chi2_ibu, norm=True)
#QPlotter.errorbar_plot(ax=ax1, xmid=xmid, hist=sol_sa[1:-1], err=err_sa, xlims=xlims, label="SA", chi2=chi2_sa, norm=True)
#Add a ratio plot
sol_ratio = sol_grb[1:-1] / t[1:-1]
err_ratio = err_grb / t[1:-1]
QPlotter.ratio_plot(ax=ax2, xmid=xmid, ratio=sol_ratio, err=err_ratio, label="GRB", xticks=binning)
fig.savefig(f"{variable}_unfolding.pdf")