From 0b7a278c4f7bda5b74546badd53f5d114304d372 Mon Sep 17 00:00:00 2001 From: meriam2303 Date: Mon, 1 Sep 2025 16:28:11 +0100 Subject: [PATCH 1/6] french+arabic+maghrebi --- .../java/opennlp/tools/postag/POSTaggerMEIT.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java index d901654a7..6deee6c25 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java @@ -152,6 +152,22 @@ private static Stream provideData() { // "NOUN", "PROPN", "PROPN", "PUNCT" // ok! , ok! , ??? , ??? , ok! , ok! , ok! , ok! , ok! , ok! , ok! , ok! + ok! , // ok! , ??? , ok! , ok! + // via: @meriam2303 , original by Guillaume Musso: + // La jeune fille et la nuit, S.469 + Arguments.of(FRENCH,0, + "Vivre avec elle me faisait souffrir, mais vivre sans elle m'aurait tué.", + new String[]{"VERB","ADP","PRON","PRON","AUX","VERB","PUNCT","CCONJ","VERB","ADP","PRON","PRON","AUX", + "VERB","PUNCT"}) + // via @meriam2303, original by Hind Choueykh Ben Salah + // التجريد في الشّعر العربي , S. 42 + Arguments.of(ARABIC,0, + "عشق أبو نواس جارية تدعى جنان", + new String[]{"VERB","PROPN","NOUN","VERB","PROPN"}) + // via @meriam2303, original by Mohamed Laarousi Elmetoui + // التوت المر , S.7 + Arguments.of(MARGHREBI_ARABIC_FRENCH,0, + "Wassa3 belek ya baba...", + new String[]{"VERB","NOUN","ITNJ","NOUN","PUNCT"}) ); } } From 56ccbbef1cbf035c502628a761379713fd41aa0d Mon Sep 17 00:00:00 2001 From: meriam2303 Date: Mon, 29 Sep 2025 19:07:00 +0200 Subject: [PATCH 2/6] arabic+maghrebi_arabic_french rauskommentiert --- .../java/opennlp/tools/postag/POSTaggerMEIT.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java index 6deee6c25..5221eb5f4 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java @@ -160,14 +160,14 @@ private static Stream provideData() { "VERB","PUNCT"}) // via @meriam2303, original by Hind Choueykh Ben Salah // التجريد في الشّعر العربي , S. 42 - Arguments.of(ARABIC,0, - "عشق أبو نواس جارية تدعى جنان", - new String[]{"VERB","PROPN","NOUN","VERB","PROPN"}) + //Arguments.of(ARABIC,0, + //"عشق أبو نواس جارية تدعى جنان", + //new String[]{"VERB","PROPN","NOUN","VERB","PROPN"}) // via @meriam2303, original by Mohamed Laarousi Elmetoui // التوت المر , S.7 - Arguments.of(MARGHREBI_ARABIC_FRENCH,0, - "Wassa3 belek ya baba...", - new String[]{"VERB","NOUN","ITNJ","NOUN","PUNCT"}) + //Arguments.of(MARGHREBI_ARABIC_FRENCH,0, + //"Wassa3 belek ya baba...", + //new String[]{"VERB","NOUN","ITNJ","NOUN","PUNCT"}) ); } } From b69b920e5fd24263887e03963efec95707d7c3bf Mon Sep 17 00:00:00 2001 From: meriam2303 Date: Tue, 30 Sep 2025 17:17:50 +0200 Subject: [PATCH 3/6] added french constant and fixed the syntax error --- .../src/test/java/opennlp/tools/postag/POSTaggerMEIT.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java index 5221eb5f4..0963f67ec 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java @@ -43,6 +43,7 @@ public class POSTaggerMEIT { private static final String GERMAN = "de"; private static final String POLISH = "pl"; private static final String PORTUGUESE = "pt"; + private static final String FRENCH = "fr"; private static final Map TOKENIZERS = new HashMap<>(); private static final Map TAGGERS = new HashMap<>(); @@ -142,7 +143,7 @@ private static Stream provideData() { "Un gran embossament d'aire fred es comença a despenjar cap al centre d'Europa.", // OpenNLP, different at: idx pos 2, 3, 5, and 13(+14) -> however, only pos 5 is "wrong" (ref) new String[]{"DET", "ADJ", "NOUN", "ADP", "NOUN", "ADJ", "PRON", "VERB", "ADP", "VERB", "NOUN", - "ADP+DET", "NOUN", "ADP", "PROPN", "PUNCT"}) + "ADP+DET", "NOUN", "ADP", "PROPN", "PUNCT"}), // REFERENCE ("gold"): // "DET", "ADJ", "NOUN", "ADP", "NOUN", "ADJ", "PRON", "VERB", "ADP", "VERB", "NOUN", "ADP+DET", // "NOUN", "ADP", "PROPN", "PUNCT"}) @@ -154,7 +155,7 @@ private static Stream provideData() { // ok! , ??? , ok! , ok! // via: @meriam2303 , original by Guillaume Musso: // La jeune fille et la nuit, S.469 - Arguments.of(FRENCH,0, + Arguments.of(FRENCH, 0, "Vivre avec elle me faisait souffrir, mais vivre sans elle m'aurait tué.", new String[]{"VERB","ADP","PRON","PRON","AUX","VERB","PUNCT","CCONJ","VERB","ADP","PRON","PRON","AUX", "VERB","PUNCT"}) From 6fe239aa4ad097d53a7c7ce7ac2ccb7f1bed9019 Mon Sep 17 00:00:00 2001 From: Richard Zowalla Date: Tue, 14 Oct 2025 10:22:40 +0200 Subject: [PATCH 4/6] Fix checkstyle --- .../src/test/java/opennlp/tools/postag/POSTaggerMEIT.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java index 0963f67ec..04578c0ef 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java @@ -156,9 +156,9 @@ private static Stream provideData() { // via: @meriam2303 , original by Guillaume Musso: // La jeune fille et la nuit, S.469 Arguments.of(FRENCH, 0, - "Vivre avec elle me faisait souffrir, mais vivre sans elle m'aurait tué.", - new String[]{"VERB","ADP","PRON","PRON","AUX","VERB","PUNCT","CCONJ","VERB","ADP","PRON","PRON","AUX", - "VERB","PUNCT"}) + "Vivre avec elle me faisait souffrir, mais vivre sans elle m'aurait tué.", + new String[] {"VERB", "ADP", "PRON", "PRON", "AUX", "VERB", "PUNCT", "CCONJ", "VERB", + "ADP", "PRON", "PRON", "AUX", "VERB", "PUNCT"}) // via @meriam2303, original by Hind Choueykh Ben Salah // التجريد في الشّعر العربي , S. 42 //Arguments.of(ARABIC,0, From 72c796579014139faf763d2f5625427266eb98bc Mon Sep 17 00:00:00 2001 From: Richard Zowalla Date: Tue, 14 Oct 2025 10:28:58 +0200 Subject: [PATCH 5/6] Init FRENCH resources for test context --- .../src/test/java/opennlp/tools/postag/POSTaggerMEIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java index 04578c0ef..17d26d44b 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java @@ -52,7 +52,7 @@ public class POSTaggerMEIT { @BeforeAll public static void initResources() throws IOException { - List langs = List.of(CATALAN, ENGLISH, GERMAN, POLISH, PORTUGUESE); + final List langs = List.of(CATALAN, ENGLISH, FRENCH, GERMAN, POLISH, PORTUGUESE); for (String langCode: langs) { TOKENIZERS.put(langCode, new ThreadSafeTokenizerME(langCode)); TAGGERS.put(langCode, new ThreadSafePOSTaggerME(langCode)); From 814ab2da292693b34f991ad55447645e0d2c09d5 Mon Sep 17 00:00:00 2001 From: meriam2303 Date: Sun, 9 Nov 2025 19:53:53 +0100 Subject: [PATCH 6/6] changed faisait to verb --- .../src/test/java/opennlp/tools/postag/POSTaggerMEIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java index 17d26d44b..931e5c6fe 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java @@ -157,7 +157,7 @@ private static Stream provideData() { // La jeune fille et la nuit, S.469 Arguments.of(FRENCH, 0, "Vivre avec elle me faisait souffrir, mais vivre sans elle m'aurait tué.", - new String[] {"VERB", "ADP", "PRON", "PRON", "AUX", "VERB", "PUNCT", "CCONJ", "VERB", + new String[] {"VERB", "ADP", "PRON", "PRON", "VERB", "VERB", "PUNCT", "CCONJ", "VERB", "ADP", "PRON", "PRON", "AUX", "VERB", "PUNCT"}) // via @meriam2303, original by Hind Choueykh Ben Salah // التجريد في الشّعر العربي , S. 42