From 8ce5b4a8748d675cdccebdc25cc907af215a5198 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Oct 2025 15:41:32 +0000 Subject: [PATCH 1/2] Initial plan From e128335a26490d83a720c5e43f939088193d747f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Oct 2025 15:49:04 +0000 Subject: [PATCH 2/2] Fix BPE tokenization alignment in aspect extraction predictions Co-authored-by: yangheng95 <51735130+yangheng95@users.noreply.github.com> --- .../AspectTermExtraction/prediction/aspect_extractor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyabsa/tasks/AspectTermExtraction/prediction/aspect_extractor.py b/pyabsa/tasks/AspectTermExtraction/prediction/aspect_extractor.py index c05068e1..968a7bbf 100644 --- a/pyabsa/tasks/AspectTermExtraction/prediction/aspect_extractor.py +++ b/pyabsa/tasks/AspectTermExtraction/prediction/aspect_extractor.py @@ -548,6 +548,7 @@ def _extract(self, examples): ate_logits = torch.argmax(F.log_softmax(ate_logits, dim=2), dim=2) ate_logits = ate_logits.detach().cpu().numpy() label_ids = label_ids.to(DeviceTypeOption.CPU).numpy() + valid_ids = valid_ids.to(DeviceTypeOption.CPU).numpy() for i, i_ate_logits in enumerate(ate_logits): pred_iobs = [] sentence_res.append( @@ -561,7 +562,9 @@ def _extract(self, examples): ): break else: - pred_iobs.append(label_map.get(i_ate_logits[j], "O")) + # Only use predictions for the first BPE token of each original word + if valid_ids[i][j] == 1: + pred_iobs.append(label_map.get(i_ate_logits[j], "O")) ate_result = [] polarity = []