|
1 | | -from collections import OrderedDict |
2 | | - |
3 | 1 | import numpy as np |
4 | 2 | from transformers import pipeline |
5 | 3 |
|
6 | | -from experiments.benchmark import DifferenceRecognitionResult |
7 | 4 | from experiments.utils import load_language_pairs_benchmarks |
8 | | -from recognizers import DiffAlign |
9 | | - |
| 5 | +from recognizers import DiffAlign, DiffDel, DiffMask |
10 | 6 |
|
11 | 7 | benchmarks = load_language_pairs_benchmarks("validation") |
12 | 8 | device = 0 |
13 | | -num_seeds = 10 |
14 | 9 |
|
15 | 10 | seeds = [22417, 26186, 28852, 39168, 43002, 73246, 75213, 75370, 92253, 96301] |
| 11 | +num_seeds = len(seeds) |
16 | 12 |
|
17 | | -recognizers = [] |
18 | | -for seed in seeds[:num_seeds]: |
19 | | - recognizers.append(DiffAlign( |
| 13 | +diffalign_recognizers = [ |
| 14 | + DiffAlign( |
| 15 | + pipeline=pipeline( |
| 16 | + model=f"../SimCSE/models/simcse-xlm-roberta-base-mean-pooling-seed{seed}", |
| 17 | + task="feature-extraction", |
| 18 | + ), |
| 19 | + ) |
| 20 | + for seed in seeds[:num_seeds] |
| 21 | +] |
| 22 | +diffdel_recognizers = [ |
| 23 | + DiffDel( |
20 | 24 | pipeline=pipeline( |
21 | 25 | model=f"../SimCSE/models/simcse-xlm-roberta-base-mean-pooling-seed{seed}", |
22 | 26 | task="feature-extraction", |
23 | 27 | ), |
24 | | - )) |
25 | | -recognizers.append(DiffAlign( |
26 | | - pipeline=pipeline( |
27 | | - model="sentence-transformers/paraphrase-xlm-r-multilingual-v1", |
28 | | - task="feature-extraction", |
29 | | - ), |
30 | | -)) |
| 28 | + ) |
| 29 | + for seed in seeds[:num_seeds] |
| 30 | +] |
| 31 | +diffmask_recognizers = [ |
| 32 | + DiffMask( |
| 33 | + pipeline=pipeline( |
| 34 | + model="xlm-roberta-base", |
| 35 | + task="fill-mask", |
| 36 | + ), |
| 37 | + ) |
| 38 | +] |
31 | 39 |
|
32 | | -results = OrderedDict() |
33 | | -for i, recognizer in enumerate(recognizers): |
| 40 | +diffalign_results = np.zeros((len(diffalign_recognizers), len(benchmarks))) |
| 41 | +for i, recognizer in enumerate(diffalign_recognizers): |
34 | 42 | print(recognizer) |
35 | 43 | recognizer.pipeline.device = device |
36 | 44 | recognizer.device = device |
|
41 | 49 | result = benchmark.evaluate(recognizer) |
42 | 50 | print(result) |
43 | 51 | recognizer_results.append(result) |
44 | | - results[str(recognizer)] = recognizer_results |
45 | | - recognizers[i] = None |
| 52 | + diffalign_results[i, :] = [result.spearman for result in recognizer_results] |
| 53 | + diffalign_recognizers[i] = None |
46 | 54 | del recognizer |
| 55 | +diffalign_results = np.mean(diffalign_results, axis=0) |
| 56 | +assert len(diffalign_results) == len(benchmarks) |
47 | 57 |
|
48 | | -simcse_spearmans = [[] for _ in range(len(benchmarks))] |
49 | | -for recognizer_name, recognizer_results in list(results.items()): |
50 | | - if "simcse" in recognizer_name.lower(): |
51 | | - for i, result in enumerate(recognizer_results): |
52 | | - simcse_spearmans[i].append(result.spearman) |
53 | | - del results[recognizer_name] |
54 | | -results["\\xlmr{} + SimCSE (unsupervised)"] = [DifferenceRecognitionResult(spearman=np.mean(spearmans)) for spearmans in simcse_spearmans] |
| 58 | +diffdel_results = np.zeros((len(diffdel_recognizers), len(benchmarks))) |
| 59 | +for i, recognizer in enumerate(diffdel_recognizers): |
| 60 | + print(recognizer) |
| 61 | + recognizer.pipeline.device = device |
| 62 | + recognizer.device = device |
| 63 | + recognizer.pipeline.model = recognizer.pipeline.model.to(device) |
| 64 | + recognizer_results = [] |
| 65 | + for benchmark in benchmarks: |
| 66 | + print(benchmark) |
| 67 | + result = benchmark.evaluate(recognizer) |
| 68 | + print(result) |
| 69 | + recognizer_results.append(result) |
| 70 | + diffdel_results[i, :] = [result.spearman for result in recognizer_results] |
| 71 | + diffdel_recognizers[i] = None |
| 72 | + del recognizer |
| 73 | +diffdel_results = np.mean(diffdel_results, axis=0) |
| 74 | +assert len(diffdel_results) == len(benchmarks) |
| 75 | + |
| 76 | +diffmask_results = np.zeros((len(diffmask_recognizers), len(benchmarks))) |
| 77 | +for i, recognizer in enumerate(diffmask_recognizers): |
| 78 | + print(recognizer) |
| 79 | + recognizer.pipeline.device = device |
| 80 | + recognizer.device = device |
| 81 | + recognizer.pipeline.model = recognizer.pipeline.model.to(device) |
| 82 | + recognizer_results = [] |
| 83 | + for benchmark in benchmarks: |
| 84 | + print(benchmark) |
| 85 | + result = benchmark.evaluate(recognizer) |
| 86 | + print(result) |
| 87 | + recognizer_results.append(result) |
| 88 | + diffmask_results[i, :] = [result.spearman for result in recognizer_results] |
| 89 | + diffmask_recognizers[i] = None |
| 90 | + del recognizer |
| 91 | +diffmask_results = np.mean(diffmask_results, axis=0) |
| 92 | +assert len(diffmask_results) == len(benchmarks) |
55 | 93 |
|
56 | 94 | template = """\ |
57 | 95 | \\begin{tikzpicture} |
|
61 | 99 | ylabel={Spearman correlation}, |
62 | 100 | enlargelimits=0.05, |
63 | 101 | ymin=0, ymax=100, |
64 | | - legend pos=north west, |
| 102 | + legend pos=north east, |
65 | 103 | legend cell align={left}, |
66 | 104 | ybar interval=0.7, |
67 | 105 | ] |
|
71 | 109 | ] |
72 | 110 | coordinates {(0,0.1) (1,0.2) (2,0.3) (3,0.4) (4,0.5) (5,0.6) (6,0.7)}; |
73 | 111 |
|
74 | | - \\addlegendentry{\\xlmr{} + SimCSE (unsupervised)} |
| 112 | + \\addlegendentry{\\footnotesize{\\diffalign{}}} |
| 113 | +\\addplot[ |
| 114 | + color=red, |
| 115 | + fill=red!40, |
| 116 | + ] |
| 117 | + coordinates {(0,0.1) (1,0.2) (2,0.3) (3,0.4) (4,0.5) (5,0.6) (6,0.7)}; |
| 118 | + \\addlegendentry{\\footnotesize{\\diffdel{}}} |
75 | 119 | \\addplot[ |
76 | | - color=gray, |
77 | | - fill=gray!40, |
| 120 | + color=black, |
| 121 | + fill=black!40, |
78 | 122 | ] |
79 | 123 | coordinates {(0,0.1) (1,0.2) (2,0.3) (3,0.4) (4,0.5) (5,0.6) (6,0.7)}; |
80 | | - \\addlegendentry{\\xlmr{} trained on paraphrases} |
| 124 | + \\addlegendentry{\\footnotesize{\\diffmask{}}} |
81 | 125 | \\end{axis} |
82 | 126 | \\end{tikzpicture} |
83 | 127 | """ |
84 | 128 |
|
85 | | -for recognizer_results in reversed(list(results.values())): |
| 129 | +for recognizer_results in [diffalign_results, diffdel_results, diffmask_results]: |
86 | 130 | bars = "" |
87 | 131 | for i, result in enumerate(recognizer_results): |
88 | | - bars += f"({i},{result.spearman * 100:.1f}) " |
| 132 | + bars += f"({i},{result * 100:.1f}) " |
89 | 133 | bars += "(7, 0)" # Dummy |
90 | 134 | template = template.replace("(0,0.1) (1,0.2) (2,0.3) (3,0.4) (4,0.5) (5,0.6) (6,0.7)", bars, 1) |
91 | 135 | print(template) |
0 commit comments