Skip to content

Commit 0307576

Browse files
committed
Fix feeddown folding
1 parent a6c5cce commit 0307576

File tree

3 files changed

+32
-15
lines changed

3 files changed

+32
-15
lines changed

machine_learning_hep/analysis/analyzer_jets.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def calculate_efficiencies(self):
226226
# gen-level efficiency for feeddown estimation
227227
h_eff_gen = h_genmatch[cat].Clone()
228228
h_eff_gen.Divide(h_gen[cat])
229-
self._save_hist(h_eff_gen, f'eff/h_effgen_{cat}.png')
229+
self._save_hist(h_eff_gen, f'eff/h_effgen_{cat}.png', 'text')
230230
self.hcandeff_gen[cat] = h_eff_gen
231231

232232
# matching loss
@@ -947,18 +947,18 @@ def estimate_feeddown(self):
947947
f';p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var}',
948948
bins_ptjet, self.bins_candpt, bins_obs[var])
949949
fill_hist_fast(h3_fd_gen_orig, df[['pt_jet', 'pt_cand', f'{colname}']])
950-
self._save_hist(project_hist(h3_fd_gen_orig, [0, 2], {}), f'fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png')
951950

952951
# new method
953952
h3_fd_gen = h3_fd_gen_orig.Clone()
954953
ensure_sumw2(h3_fd_gen)
955954
self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_gen.png')
956955
# apply np efficiency
957956
for ipt in range(get_nbins(h3_fd_gen, 1)):
958-
eff_np = self.hcandeff_gen['np'].GetBinContent(ipt+1)
959-
for iptjet, ishape in itertools.product(
960-
range(get_nbins(h3_fd_gen, 0)), range(get_nbins(h3_fd_gen, 2))):
961-
scale_bin(h3_fd_gen, eff_np, iptjet+1, ipt+1, ishape+1)
957+
for iptjet in range(get_nbins(h3_fd_gen, 0)):
958+
eff_np = self.hcandeff_gen['np'].GetBinContent(iptjet + 1, ipt + 1)
959+
print(f'scaling with {eff_np=} for {iptjet=}, {ipt=}', flush=True)
960+
for ishape in range(get_nbins(h3_fd_gen, 2)):
961+
scale_bin(h3_fd_gen, eff_np, iptjet+1, ipt+1, ishape+1)
962962
self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_gen_geneff.png')
963963

964964
# 3d folding incl. kinematic efficiencies
@@ -970,6 +970,8 @@ def estimate_feeddown(self):
970970
rfile.Get(f'h_effkine_fd_det_nocuts_{var}'),
971971
rfile.Get(f'h_effkine_fd_det_cut_{var}'))
972972
h_response = rfile.Get(f'h_response_fd_{var}')
973+
h_response.Print('v')
974+
print(f'fd folding for {var=}')
973975
h_response_norm = norm_response(h_response, 3)
974976
h3_fd_gen.Multiply(h_effkine_gen)
975977
self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_gen_genkine.png')
@@ -987,14 +989,15 @@ def estimate_feeddown(self):
987989
for iptjet, ishape in itertools.product(
988990
range(get_nbins(h3_fd_det, 0)), range(get_nbins(h3_fd_det, 2))):
989991
scale_bin(h3_fd_det, 1./eff_pr, iptjet+1, ipt+1, ishape+1)
990-
self._save_hist(project_hist(h3_fd_det, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_det_deteff.png')
991992

992993
# project to 2d (ptjet-shape)
993994
h_fd_det = project_hist(h3_fd_det, [0, 2], {})
995+
self._save_hist(h_fd_det, f'fd/h_ptjet-{var}_fdnew_det_deteff.png')
994996

995997
# old method
996998
h3_fd_gen = h3_fd_gen_orig.Clone()
997999
ensure_sumw2(h3_fd_gen)
1000+
self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png')
9981001
for ipt in range(get_nbins(h3_fd_gen, 1)):
9991002
eff_pr = self.hcandeff['pr'].GetBinContent(ipt+1)
10001003
eff_np = self.hcandeff['np'].GetBinContent(ipt+1)

machine_learning_hep/processer_jet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ def _prepare_response_fd(self, dfi, h_effkine, h_response, var):
429429
(df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())]
430430
fill_hist(h_effkine[('det', 'cut', var)], df[['fJetPt', 'fPt', var]])
431431

432-
fill_hist(h_response[var], df[['fJetPt', 'fPt', f'{var}', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']])
432+
fill_hist(h_response[var], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']])
433433

434434
df = dfi
435435
df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) &

machine_learning_hep/utils/hist.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -293,13 +293,19 @@ def norm_response(response, dim_out):
293293
for bin_in in itertools.product(*(range(1, get_nbins(response_norm, iaxis) + 1)
294294
for iaxis in range(dim_out, get_dim(response_norm)))):
295295
for iaxis, val in enumerate(bin_in, dim_out):
296-
get_axis(response_norm, iaxis).SetRange(val, val)
297-
norm = response_norm.Projection(0).Integral()
296+
get_axis(response, iaxis).SetRange(val, val)
297+
# norm = response.Projection(0).Integral()
298+
norm = 0.
299+
for bin_out in itertools.product(*(range(1, get_nbins(response, i) + 1) for i in range(dim_out))):
300+
norm += get_bin_val(response, bin_out + bin_in)
298301
if np.isclose(norm, 0.):
299302
continue
300-
for bin_out in itertools.product(*(range(1, get_nbins(response_norm, i)+1) for i in range(dim_out))):
301-
set_bin_val(response_norm, bin_out + bin_in, get_bin_val(response_norm, bin_out + bin_in) / norm)
302-
set_bin_err(response_norm, bin_out + bin_in, get_bin_err(response_norm, bin_out + bin_in) / norm)
303+
total = 0.
304+
for bin_out in itertools.product(*(range(1, get_nbins(response_norm, i) + 1) for i in range(dim_out))):
305+
set_bin_val(response_norm, bin_out + bin_in, get_bin_val(response, bin_out + bin_in) / norm)
306+
set_bin_err(response_norm, bin_out + bin_in, get_bin_err(response, bin_out + bin_in) / norm)
307+
total += get_bin_val(response_norm, bin_out + bin_in)
308+
print(f'distributed {bin_in=} to {total=} counts')
303309
return response_norm
304310

305311

@@ -309,10 +315,18 @@ def fold_hist(hist, response):
309315
dim_out = get_dim(response) - get_dim(hist)
310316
axes_spec = list(np.array(get_axis(response, i).GetXbins(), 'd') for i in range(dim_out))
311317
hfold = create_hist('test', 'test', *axes_spec)
312-
for bin_out in itertools.product(*(range(1, get_nbins(hfold, i)+1) for i in range(get_dim(hfold)))):
318+
# TODO: setup axes
319+
for bin_in in itertools.product(*(range(1, get_nbins(hist, i) + 1) for i in range(get_dim(hist)))):
320+
total = 0.
321+
for bin_out in itertools.product(*(range(1, get_nbins(hfold, i) + 1) for i in range(get_dim(hfold)))):
322+
total += get_bin_val(response, bin_out + bin_in)
323+
print(f'redistributed {bin_in=} to {total=} counts')
324+
325+
for bin_out in itertools.product(*(range(1, get_nbins(hfold, i) + 1) for i in range(get_dim(hfold)))):
313326
val = 0.
314327
err = 0.
315-
for bin_in in itertools.product(*(range(1, get_nbins(hist, i)+1) for i in range(get_dim(hist)))):
328+
for bin_in in itertools.product(*(range(1, get_nbins(hist, i) + 1) for i in range(get_dim(hist)))):
329+
print(f'{bin_out=} collecting {bin_in=} with weight {get_bin_val(response, bin_out + bin_in)}')
316330
val += get_bin_val(hist, bin_in) * get_bin_val(response, bin_out + bin_in)
317331
err += get_bin_err(hist, bin_in)**2 * get_bin_val(response, bin_out + bin_in)**2
318332
set_bin_val(hfold, bin_out, val)

0 commit comments

Comments
 (0)