Skip to content

Commit d245e7f

Browse files
committed
Update scripts to reflect published version of the paper
1 parent 5a757e7 commit d245e7f

File tree

6 files changed

+330
-174
lines changed

6 files changed

+330
-174
lines changed
Lines changed: 84 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,44 @@
1-
from collections import OrderedDict
2-
31
import numpy as np
42
from transformers import pipeline
53

6-
from experiments.benchmark import DifferenceRecognitionResult
74
from experiments.utils import load_document_length_benchmarks
8-
from recognizers import DiffAlign
9-
5+
from recognizers import DiffAlign, DiffDel, DiffMask
106

117
benchmarks = load_document_length_benchmarks("validation")
128
device = 0
13-
num_seeds = 10
149

1510
seeds = [22417, 26186, 28852, 39168, 43002, 73246, 75213, 75370, 92253, 96301]
11+
num_seeds = len(seeds)
1612

17-
recognizers = []
18-
for seed in seeds[:num_seeds]:
19-
recognizers.append(DiffAlign(
13+
diffalign_recognizers = [
14+
DiffAlign(
15+
pipeline=pipeline(
16+
model=f"../SimCSE/models/simcse-xlm-roberta-base-mean-pooling-seed{seed}",
17+
task="feature-extraction",
18+
),
19+
)
20+
for seed in seeds[:num_seeds]
21+
]
22+
diffdel_recognizers = [
23+
DiffDel(
2024
pipeline=pipeline(
2125
model=f"../SimCSE/models/simcse-xlm-roberta-base-mean-pooling-seed{seed}",
2226
task="feature-extraction",
2327
),
24-
batch_size=4,
25-
))
26-
recognizers.append(DiffAlign(
27-
pipeline=pipeline(
28-
model="sentence-transformers/paraphrase-xlm-r-multilingual-v1",
29-
task="feature-extraction",
30-
),
31-
batch_size=4,
32-
))
28+
)
29+
for seed in seeds[:num_seeds]
30+
]
31+
diffmask_recognizers = [
32+
DiffMask(
33+
pipeline=pipeline(
34+
model="xlm-roberta-base",
35+
task="fill-mask",
36+
),
37+
)
38+
]
3339

34-
results = OrderedDict()
35-
for i, recognizer in enumerate(recognizers):
40+
diffalign_results = np.zeros((len(diffalign_recognizers), len(benchmarks)))
41+
for i, recognizer in enumerate(diffalign_recognizers):
3642
print(recognizer)
3743
recognizer.pipeline.device = device
3844
recognizer.device = device
@@ -43,17 +49,48 @@
4349
result = benchmark.evaluate(recognizer)
4450
print(result)
4551
recognizer_results.append(result)
46-
results[str(recognizer)] = recognizer_results
47-
recognizers[i] = None
52+
diffalign_results[i, :] = [result.spearman for result in recognizer_results]
53+
diffalign_recognizers[i] = None
4854
del recognizer
55+
diffalign_results = np.mean(diffalign_results, axis=0)
56+
assert len(diffalign_results) == len(benchmarks)
4957

50-
simcse_spearmans = [[] for _ in range(len(benchmarks))]
51-
for recognizer_name, recognizer_results in list(results.items()):
52-
if "simcse" in recognizer_name.lower():
53-
for i, result in enumerate(recognizer_results):
54-
simcse_spearmans[i].append(result.spearman)
55-
del results[recognizer_name]
56-
results["\\xlmr{} + SimCSE (unsupervised)"] = [DifferenceRecognitionResult(spearman=np.mean(spearmans)) for spearmans in simcse_spearmans]
58+
diffdel_results = np.zeros((len(diffdel_recognizers), len(benchmarks)))
59+
for i, recognizer in enumerate(diffdel_recognizers):
60+
print(recognizer)
61+
recognizer.pipeline.device = device
62+
recognizer.device = device
63+
recognizer.pipeline.model = recognizer.pipeline.model.to(device)
64+
recognizer_results = []
65+
for benchmark in benchmarks:
66+
print(benchmark)
67+
result = benchmark.evaluate(recognizer)
68+
print(result)
69+
recognizer_results.append(result)
70+
diffdel_results[i, :] = [result.spearman for result in recognizer_results]
71+
diffdel_recognizers[i] = None
72+
del recognizer
73+
diffdel_results = np.mean(diffdel_results, axis=0)
74+
assert len(diffdel_results) == len(benchmarks)
75+
76+
diffmask_results = np.zeros((len(diffmask_recognizers), len(benchmarks)))
77+
for i, recognizer in enumerate(diffmask_recognizers):
78+
print(recognizer)
79+
recognizer.pipeline.device = device
80+
recognizer.device = device
81+
recognizer.pipeline.model = recognizer.pipeline.model.to(device)
82+
recognizer_results = []
83+
for benchmark in benchmarks:
84+
print(benchmark)
85+
if benchmark.num_sentences_per_document > 7:
86+
break # Exceeds max seq length
87+
result = benchmark.evaluate(recognizer)
88+
print(result)
89+
recognizer_results.append(result)
90+
diffmask_results[i, :7] = [result.spearman for result in recognizer_results]
91+
diffmask_recognizers[i] = None
92+
del recognizer
93+
diffmask_results = np.mean(diffmask_results, axis=0)
5794

5895
template = """\
5996
\\begin{tikzpicture}
@@ -64,35 +101,46 @@
64101
ymin=0, ymax=100,
65102
xtick={2,4,6,8,10,12,14,16},
66103
ytick={0,20,40,60,80,100},
67-
legend pos=south west,
104+
legend pos=north east,
68105
legend cell align={left},
69106
ymajorgrids=true,
70107
grid style=dashed,
71108
]
72109
73110
\\addplot[
74-
color=blue,
111+
color=blue,
112+
style=solid,
113+
]
114+
coordinates {
115+
(0,0.00)(1,0.00)
116+
};
117+
\\addlegendentry{\\diffalign{}}
118+
\\addplot[
119+
color=red,
120+
style=dashed,
75121
]
76122
coordinates {
77123
(0,0.00)(1,0.00)
78124
};
79-
\\addlegendentry{\\xlmr{} + SimCSE (unsupervised)}
125+
\\addlegendentry{\\diffdel{}}
80126
\\addplot[
81-
color=gray,
127+
color=black,
128+
style=dotted,
129+
line width=1.5pt,
82130
]
83131
coordinates {
84132
(0,0.00)(1,0.00)
85133
};
86-
\\addlegendentry{\\xlmr{} trained on paraphrases}
134+
\\addlegendentry{\\diffmask{}}
87135
\\end{axis}
88136
\\end{tikzpicture}
89137
"""
90138

91139
document_lengths = list(range(1, 17))
92-
for recognizer_results in reversed(list(results.values())):
140+
for recognizer_results in [diffalign_results, diffdel_results, diffmask_results]:
93141
assert len(recognizer_results) == len(document_lengths)
94142
line: str = ""
95143
for document_length, result in zip(document_lengths, recognizer_results):
96-
line += f"({document_length},{result.spearman*100:.1f})"
144+
line += f"({document_length},{result * 100:.1f})"
97145
template = template.replace("(0,0.00)(1,0.00)", line, 1)
98146
print(template)
Lines changed: 78 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,44 @@
1-
from collections import OrderedDict
2-
31
import numpy as np
42
from transformers import pipeline
53

6-
from experiments.benchmark import DifferenceRecognitionResult
74
from experiments.utils import load_language_pairs_benchmarks
8-
from recognizers import DiffAlign
9-
5+
from recognizers import DiffAlign, DiffDel, DiffMask
106

117
benchmarks = load_language_pairs_benchmarks("validation")
128
device = 0
13-
num_seeds = 10
149

1510
seeds = [22417, 26186, 28852, 39168, 43002, 73246, 75213, 75370, 92253, 96301]
11+
num_seeds = len(seeds)
1612

17-
recognizers = []
18-
for seed in seeds[:num_seeds]:
19-
recognizers.append(DiffAlign(
13+
diffalign_recognizers = [
14+
DiffAlign(
15+
pipeline=pipeline(
16+
model=f"../SimCSE/models/simcse-xlm-roberta-base-mean-pooling-seed{seed}",
17+
task="feature-extraction",
18+
),
19+
)
20+
for seed in seeds[:num_seeds]
21+
]
22+
diffdel_recognizers = [
23+
DiffDel(
2024
pipeline=pipeline(
2125
model=f"../SimCSE/models/simcse-xlm-roberta-base-mean-pooling-seed{seed}",
2226
task="feature-extraction",
2327
),
24-
))
25-
recognizers.append(DiffAlign(
26-
pipeline=pipeline(
27-
model="sentence-transformers/paraphrase-xlm-r-multilingual-v1",
28-
task="feature-extraction",
29-
),
30-
))
28+
)
29+
for seed in seeds[:num_seeds]
30+
]
31+
diffmask_recognizers = [
32+
DiffMask(
33+
pipeline=pipeline(
34+
model="xlm-roberta-base",
35+
task="fill-mask",
36+
),
37+
)
38+
]
3139

32-
results = OrderedDict()
33-
for i, recognizer in enumerate(recognizers):
40+
diffalign_results = np.zeros((len(diffalign_recognizers), len(benchmarks)))
41+
for i, recognizer in enumerate(diffalign_recognizers):
3442
print(recognizer)
3543
recognizer.pipeline.device = device
3644
recognizer.device = device
@@ -41,17 +49,47 @@
4149
result = benchmark.evaluate(recognizer)
4250
print(result)
4351
recognizer_results.append(result)
44-
results[str(recognizer)] = recognizer_results
45-
recognizers[i] = None
52+
diffalign_results[i, :] = [result.spearman for result in recognizer_results]
53+
diffalign_recognizers[i] = None
4654
del recognizer
55+
diffalign_results = np.mean(diffalign_results, axis=0)
56+
assert len(diffalign_results) == len(benchmarks)
4757

48-
simcse_spearmans = [[] for _ in range(len(benchmarks))]
49-
for recognizer_name, recognizer_results in list(results.items()):
50-
if "simcse" in recognizer_name.lower():
51-
for i, result in enumerate(recognizer_results):
52-
simcse_spearmans[i].append(result.spearman)
53-
del results[recognizer_name]
54-
results["\\xlmr{} + SimCSE (unsupervised)"] = [DifferenceRecognitionResult(spearman=np.mean(spearmans)) for spearmans in simcse_spearmans]
58+
diffdel_results = np.zeros((len(diffdel_recognizers), len(benchmarks)))
59+
for i, recognizer in enumerate(diffdel_recognizers):
60+
print(recognizer)
61+
recognizer.pipeline.device = device
62+
recognizer.device = device
63+
recognizer.pipeline.model = recognizer.pipeline.model.to(device)
64+
recognizer_results = []
65+
for benchmark in benchmarks:
66+
print(benchmark)
67+
result = benchmark.evaluate(recognizer)
68+
print(result)
69+
recognizer_results.append(result)
70+
diffdel_results[i, :] = [result.spearman for result in recognizer_results]
71+
diffdel_recognizers[i] = None
72+
del recognizer
73+
diffdel_results = np.mean(diffdel_results, axis=0)
74+
assert len(diffdel_results) == len(benchmarks)
75+
76+
diffmask_results = np.zeros((len(diffmask_recognizers), len(benchmarks)))
77+
for i, recognizer in enumerate(diffmask_recognizers):
78+
print(recognizer)
79+
recognizer.pipeline.device = device
80+
recognizer.device = device
81+
recognizer.pipeline.model = recognizer.pipeline.model.to(device)
82+
recognizer_results = []
83+
for benchmark in benchmarks:
84+
print(benchmark)
85+
result = benchmark.evaluate(recognizer)
86+
print(result)
87+
recognizer_results.append(result)
88+
diffmask_results[i, :] = [result.spearman for result in recognizer_results]
89+
diffmask_recognizers[i] = None
90+
del recognizer
91+
diffmask_results = np.mean(diffmask_results, axis=0)
92+
assert len(diffmask_results) == len(benchmarks)
5593

5694
template = """\
5795
\\begin{tikzpicture}
@@ -61,7 +99,7 @@
6199
ylabel={Spearman correlation},
62100
enlargelimits=0.05,
63101
ymin=0, ymax=100,
64-
legend pos=north west,
102+
legend pos=north east,
65103
legend cell align={left},
66104
ybar interval=0.7,
67105
]
@@ -71,21 +109,27 @@
71109
]
72110
coordinates {(0,0.1) (1,0.2) (2,0.3) (3,0.4) (4,0.5) (5,0.6) (6,0.7)};
73111
74-
\\addlegendentry{\\xlmr{} + SimCSE (unsupervised)}
112+
\\addlegendentry{\\footnotesize{\\diffalign{}}}
113+
\\addplot[
114+
color=red,
115+
fill=red!40,
116+
]
117+
coordinates {(0,0.1) (1,0.2) (2,0.3) (3,0.4) (4,0.5) (5,0.6) (6,0.7)};
118+
\\addlegendentry{\\footnotesize{\\diffdel{}}}
75119
\\addplot[
76-
color=gray,
77-
fill=gray!40,
120+
color=black,
121+
fill=black!40,
78122
]
79123
coordinates {(0,0.1) (1,0.2) (2,0.3) (3,0.4) (4,0.5) (5,0.6) (6,0.7)};
80-
\\addlegendentry{\\xlmr{} trained on paraphrases}
124+
\\addlegendentry{\\footnotesize{\\diffmask{}}}
81125
\\end{axis}
82126
\\end{tikzpicture}
83127
"""
84128

85-
for recognizer_results in reversed(list(results.values())):
129+
for recognizer_results in [diffalign_results, diffdel_results, diffmask_results]:
86130
bars = ""
87131
for i, result in enumerate(recognizer_results):
88-
bars += f"({i},{result.spearman * 100:.1f}) "
132+
bars += f"({i},{result * 100:.1f}) "
89133
bars += "(7, 0)" # Dummy
90134
template = template.replace("(0,0.1) (1,0.2) (2,0.3) (3,0.4) (4,0.5) (5,0.6) (6,0.7)", bars, 1)
91135
print(template)

0 commit comments

Comments
 (0)