-
-
Notifications
You must be signed in to change notification settings - Fork 16
Update new language doc #126
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e359163
a109f4e
9fd9e01
72e7c28
95b483d
681b568
bfa7ae6
5aa7ec4
f20350f
9cc0a44
4bb9964
a46c301
df82a8b
8fcfb16
0c50978
9523646
a8a7f2d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,8 @@ NOTE: Take a look at [PR #40](https://github.com/unicode-org/inflection/pull/40) | |
In general, to bootstrap your progress look for grammatically similar language that's already supported, e.g. if you are adding Serbian look for existing Russian implementation. | ||
This will help you find most of the files you need to add/change and will speed up implementation of the rules and lexicons. | ||
|
||
Before you add new language support, go to the README.md in the inflection subfolder (inflection/inflection/README.md), build the project, and make sure all the tests run on your computer. | ||
|
||
## Mark your language as supported | ||
* UPDATE: inflection/src/inflection/util/LocaleUtils.hpp | ||
* UPDATE: inflection/src/inflection/util/LocaleUtils.cpp | ||
|
@@ -29,13 +31,13 @@ TODO: We need to expand what each of these do. | |
* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer.hpp | ||
* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer.cpp | ||
* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer_*Xx*DisplayFunction.hpp | ||
* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer_*Xx*DisplayFunction.hpp | ||
* ADD: inflection/src/inflection/grammar/synthesis/*Xx*GrammarSynthesizer_*Xx*DisplayFunction.cpp | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for fixing this documentation. This is a good improvement. |
||
* UPDATE: inflection/src/inflection/grammar/synthesis/GrammarSynthesizerFactory.cpp | ||
* UPDATE: inflection/src/inflection/grammar/synthesis/fwd.hpp | ||
|
||
## Add language specific properties for lists, quantities and related topics | ||
* ADD: inflection/src/inflection/dialog/language/*Xx*CommonConceptFactory.hpp | ||
* ADD: inflection/src/inflection/dialog/language/*Xx*CommonConceptFactory.hpp | ||
* ADD: inflection/src/inflection/dialog/language/*Xx*CommonConceptFactory.cpp | ||
* UPDATE: inflection/src/inflection/dialog/language/fwd.hpp | ||
|
||
## Define and create lexion | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
dictionary_da.lst filter=lfs diff=lfs merge=lfs -text | ||
dictionary_en.lst filter=lfs diff=lfs merge=lfs -text | ||
dictionary_es.lst filter=lfs diff=lfs merge=lfs -text | ||
dictionary_ml.lst filter=lfs diff=lfs merge=lfs -text | ||
inflectional_da.xml filter=lfs diff=lfs merge=lfs -text | ||
inflectional_en.xml filter=lfs diff=lfs merge=lfs -text | ||
inflectional_es.xml filter=lfs diff=lfs merge=lfs -text | ||
inflectional_ml.xml filter=lfs diff=lfs merge=lfs -text | ||
inflectional_sv.xml filter=lfs diff=lfs merge=lfs -text | ||
dictionary_sv.lst filter=lfs diff=lfs merge=lfs -text |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
എനിക്ക്,first,singular,dative | ||
ഞാൻ,first,singular,nominative | ||
എന്നെ,first,singular,accusative | ||
എൻ്റെ,first,singular,genitive,dependency=dependent | ||
എൻ്റെത്,first,singular,genitive,dependency=independent | ||
നമുക്ക്,first,plural,dative | ||
ഞങ്ങൾ,first,plural,nominative | ||
ഞങ്ങളെ,first,plural,accusative | ||
ഞങ്ങളുടെ,first,plural,genitive,dependency=dependent | ||
ഞങ്ങളുടേതു്,first,plural,genitive,dependency=independent | ||
നമ്മുടെ,first,plural,genitive,dependency=dependent | ||
നമ്മുടേതു്,first,plural,genitive,dependency=independent | ||
നിനക്ക്,second,singular,dative,dependency=nonhonorific | ||
നീ,second,singular,nominative,dependency=nonhonorific | ||
നിനെ,second,singular,accusative,dependency=nonhonorific | ||
നിന്റെ,second,singular,genitive,dependency=dependent,dependency=nonhonorific | ||
നിന്റേതു്,second,singular,genitive,dependency=independent,dependency=nonhonorific | ||
Comment on lines
+13
to
+17
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is confusing. Typically gender, number, or animacy would be used for dependency. The dependency is typically being used for the word being used in combination to the pronoun. For example, if I were to say "mio" or "mia" in a language, and the gender depended on the gender of the object being possessed instead of the gender of the person being referenced, then I'd use dependency. |
||
നിങ്ങൾക്ക്,second,plural,dative,dependency=honorific | ||
നിങ്ങൾ,second,plural,nominative,dependency=honorific | ||
നിങ്ങളെ,second,plural,accusative,dependency=honorific | ||
നിങ്ങളുടെ,second,plural,genitive,dependency=dependent,dependency=honorific | ||
നിങ്ങളുടേതു്,second,plural,genitive,dependency=independent,dependency=honorific | ||
അവൻ,third,singular,nominative,masculine | ||
അവനെ,third,singular,accusative,masculine | ||
അവൻ്റെ,third,singular,genitive,dependency=dependent,masculine | ||
അവൻ്റെത്,third,singular,genitive,dependency=independent,masculine | ||
അവൾ,third,singular,nominative,feminine | ||
അവളെ,third,singular,accusative,feminine | ||
അവളുടെ,third,singular,genitive,dependency=dependent,feminine | ||
അവളുടേതു്,third,singular,genitive,dependency=independent,feminine | ||
അത്,third,singular,nominative,neuter | ||
അതിനെ,third,singular,accusative,neuter | ||
അതിന്റെ,third,singular,genitive,dependency=dependent,neuter | ||
അതിന്റേതു്,third,singular,genitive,dependency=independent,neuter | ||
അവർ,third,plural,nominative | ||
അവരെ,third,plural,accusative | ||
അവരുടെ,third,plural,genitive,dependency=dependent | ||
അവരുടേതു്,third,plural,genitive,dependency=independent | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# | ||
# Copyright 2025 Unicode Incorporated and others. All rights reserved. | ||
# | ||
tokenizer.implementation.class=DefaultTokenizer | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
/* | ||
* Copyright 2025 Apple Inc. All rights reserved. | ||
*/ | ||
#include <inflection/dialog/language/MlCommonConceptFactory.hpp> | ||
|
||
namespace inflection::dialog::language { | ||
|
||
MlCommonConceptFactory::MlCommonConceptFactory(const ::inflection::util::ULocale& language) | ||
: super(language) | ||
{ | ||
} | ||
|
||
MlCommonConceptFactory::~MlCommonConceptFactory() | ||
{ | ||
} | ||
Comment on lines
+2
to
+15
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In all of the new files, please make sure that the copyright is not Apple, and it's not Unicode. I'm assuming that this class is trying to emulate the English behavior, where a quantity changes by only it being singular or plural for the noun in a quantity. For example, 1 man, 2 men, 1 woman, 2 women, and so forth. Are such quantities affected by grammatical case? If so, then this class likely needs a little more customization for the quantify method. For Slavic languages, the rules can get fairly complicated, and Malayalam seems to have more grammatical cases than Russian. So I'm wondering how this would work. I also see that the number pronunciation doesn't vary like many other European languages. So Malayalam language may be simpler to support. |
||
|
||
} // namespace inflection::dialog::language |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
/* | ||
* Copyright 2025 Apple Inc. All rights reserved. | ||
*/ | ||
#pragma once | ||
|
||
#include <inflection/dialog/language/fwd.hpp> | ||
#include <inflection/dialog/CommonConceptFactoryImpl.hpp> | ||
|
||
class inflection::dialog::language::MlCommonConceptFactory | ||
: public CommonConceptFactoryImpl | ||
{ | ||
public: | ||
typedef CommonConceptFactoryImpl super; | ||
public: | ||
explicit MlCommonConceptFactory(const ::inflection::util::ULocale& language); | ||
~MlCommonConceptFactory() override; | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* | ||
* Copyright 2025 Apple Inc. All rights reserved. | ||
*/ | ||
#include <inflection/grammar/synthesis/MlGrammarSynthesizer.hpp> | ||
|
||
#include <inflection/dialog/SemanticFeatureModel.hpp> | ||
#include <inflection/grammar/synthesis/MlGrammarSynthesizer_CountLookupFunction.hpp> | ||
#include <inflection/grammar/synthesis/MlGrammarSynthesizer_GenderLookupFunction.hpp> | ||
#include <inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.hpp> | ||
#include <inflection/grammar/synthesis/MlGrammarSynthesizer_MlDisplayFunction.hpp> | ||
#include <inflection/grammar/synthesis/GrammemeConstants.hpp> | ||
|
||
namespace inflection::grammar::synthesis { | ||
|
||
void MlGrammarSynthesizer::addSemanticFeatures(::inflection::dialog::SemanticFeatureModel& featureModel) | ||
{ | ||
featureModel.putDefaultFeatureFunctionByName(GrammemeConstants::NUMBER, new MlGrammarSynthesizer_CountLookupFunction()); | ||
featureModel.putDefaultFeatureFunctionByName(GrammemeConstants::GENDER, new MlGrammarSynthesizer_GenderLookupFunction()); | ||
featureModel.putDefaultFeatureFunctionByName(GrammemeConstants::CASE, new MlGrammarSynthesizer_CaseLookupFunction()); | ||
|
||
featureModel.setDefaultDisplayFunction(new MlGrammarSynthesizer_MlDisplayFunction(featureModel)); | ||
} | ||
|
||
} // namespace inflection::grammar::synthesis | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
/* | ||
* Copyright 2025 Apple Inc. All rights reserved. | ||
*/ | ||
#pragma once | ||
|
||
#include <inflection/dialog/fwd.hpp> | ||
#include <inflection/grammar/synthesis/fwd.hpp> | ||
#include <string> | ||
|
||
class inflection::grammar::synthesis::MlGrammarSynthesizer final | ||
{ | ||
public: | ||
static void addSemanticFeatures(::inflection::dialog::SemanticFeatureModel& featureModel); | ||
private: | ||
MlGrammarSynthesizer() = delete; | ||
}; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
/* | ||
* Copyright 2025 Apple Inc. All rights reserved. | ||
*/ | ||
#include <inflection/grammar/synthesis/MlGrammarSynthesizer_CaseLookupFunction.hpp> | ||
|
||
#include <inflection/grammar/synthesis/GrammemeConstants.hpp> | ||
#include <inflection/dialog/SemanticFeature.hpp> | ||
#include <inflection/dialog/DisplayValue.hpp> | ||
#include <inflection/dialog/SpeakableString.hpp> | ||
#include <inflection/util/LocaleUtils.hpp> | ||
#include <inflection/util/StringViewUtils.hpp> | ||
|
||
namespace inflection::grammar::synthesis { | ||
|
||
MlGrammarSynthesizer_CaseLookupFunction::MlGrammarSynthesizer_CaseLookupFunction() | ||
: super() | ||
{ | ||
// No file needed | ||
} | ||
|
||
inflection::dialog::SpeakableString* MlGrammarSynthesizer_CaseLookupFunction::getFeatureValue(const ::inflection::dialog::DisplayValue& displayValue, const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& /*constraints*/) const | ||
{ | ||
std::u16string displayString; | ||
::inflection::util::StringViewUtils::lowercase(&displayString, displayValue.getDisplayString(), ::inflection::util::LocaleUtils::MALAYALAM()); | ||
|
||
if (displayString.length() >= 3) { | ||
// Genitive-indicative suffixes in Malayalam | ||
if (displayString.ends_with(u"ഉടെ") || // uṭe | ||
displayString.ends_with(u"യുടെ") || // yude (my, your, his, her...) | ||
displayString.ends_with(u"ന്റെ") || // ente (mine), avante, etc. | ||
displayString.ends_with(u"ആയുടെ")) // āyuṭe (fem. 3rd person possessive) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this logic correct? Are these all suffixes? Should the |
||
{ | ||
return new ::inflection::dialog::SpeakableString(GrammemeConstants::CASE_GENITIVE()); | ||
} | ||
} | ||
return nullptr; | ||
} | ||
|
||
} // namespace inflection::grammar::synthesis | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* | ||
* Copyright 2025 Apple Inc. All rights reserved. | ||
*/ | ||
#pragma once | ||
|
||
#include <inflection/dialog/fwd.hpp> | ||
#include <inflection/grammar/synthesis/fwd.hpp> | ||
#include <inflection/dialog/DefaultFeatureFunction.hpp> | ||
#include <set> | ||
#include <string> | ||
|
||
class inflection::grammar::synthesis::MlGrammarSynthesizer_CaseLookupFunction | ||
: public ::inflection::dialog::DefaultFeatureFunction | ||
{ | ||
public: | ||
typedef ::inflection::dialog::DefaultFeatureFunction super; | ||
|
||
public: | ||
::inflection::dialog::SpeakableString* getFeatureValue(const ::inflection::dialog::DisplayValue& displayValue, const ::std::map<::inflection::dialog::SemanticFeature, ::std::u16string>& constraints) const override; | ||
|
||
MlGrammarSynthesizer_CaseLookupFunction(); | ||
MlGrammarSynthesizer_CaseLookupFunction(const MlGrammarSynthesizer_CaseLookupFunction&) = delete; | ||
MlGrammarSynthesizer_CaseLookupFunction& operator=(const MlGrammarSynthesizer_CaseLookupFunction&) = delete; | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good suggestion!