diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b3790a9ace..253f6a0307 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -99,6 +99,8 @@ jobs: - name: Test mlibc run: 'meson test -v -C pkg-builds/${{matrix.builds}}' working-directory: build/ + env: + LOCPATH: "${{ github.workspace }}/build/packages/locale-data/usr/lib/locale/" compile-sysdeps: strategy: diff --git a/ci/bootstrap.yml b/ci/bootstrap.yml index f8dcae334e..8897374517 100644 --- a/ci/bootstrap.yml +++ b/ci/bootstrap.yml @@ -21,6 +21,11 @@ sources: tag: 'libdrm-2.4.124' version: '2.4.124' + - name: glibc + git: 'https://sourceware.org/git/glibc.git' + tag: 'glibc-2.42' + version: '2.42' + tools: [] packages: @@ -30,6 +35,7 @@ packages: pkgs_required: - linux-headers - libdrm-headers + - locale-data configure: - args: - 'meson' @@ -65,6 +71,7 @@ packages: pkgs_required: - linux-headers - libdrm-headers + - locale-data configure: - args: - 'meson' @@ -97,6 +104,7 @@ packages: pkgs_required: - linux-headers - libdrm-headers + - locale-data configure: - args: - 'meson' @@ -130,6 +138,7 @@ packages: pkgs_required: - linux-headers - libdrm-headers + - locale-data configure: - args: - 'meson' @@ -226,3 +235,21 @@ packages: libdrm_dep = declare_dependency(include_directories: include_directories('include')) EOF - args: ['cp', '-r', '@THIS_SOURCE_DIR@/include', '@THIS_COLLECT_DIR@/usr/src/libdrm-headers/include'] + + - name: locale-data + architecture: noarch + from_source: 'glibc' + build: + - args: | + ARCH="@OPTION:arch@" + LOCALEDEF_FLAGS="" + if [ "$ARCH" == "m68k" ]; then + LOCALEDEF_FLAGS="--big-endian" + fi + mkdir -p @THIS_COLLECT_DIR@/usr/lib/locale/ + localedef $LOCALEDEF_FLAGS -fUTF-8 -ide_DE --prefix=@THIS_COLLECT_DIR@ --no-archive de_DE.utf8 + localedef $LOCALEDEF_FLAGS -fUTF-8 -ide_DE --prefix=@THIS_COLLECT_DIR@ --no-archive de_DE + localedef $LOCALEDEF_FLAGS -fUTF-8 -iru_RU --prefix=@THIS_COLLECT_DIR@ --no-archive ru_RU.utf8 + localedef $LOCALEDEF_FLAGS -fUTF-8 -ien_US --prefix=@THIS_COLLECT_DIR@ --no-archive en_US.utf8 + environ: + I18NPATH: '@THIS_SOURCE_DIR@/localedata/' diff --git a/meson.build b/meson.build index b1a0eb8ce0..6aa69a4320 100644 --- a/meson.build +++ b/meson.build @@ -71,6 +71,14 @@ if not headers_only libc_deps += frigg_dep rtld_deps += frigg_dep + libsmarter_dep = dependency( + 'libsmarter', + default_options: [], + fallback: ['libsmarter', 'libsmarter_dep'], + ) + libc_deps += libsmarter_dep + rtld_deps += libsmarter_dep + add_project_arguments('-Wno-unused-function', '-D__MLIBC_BUILDING_MLIBC', language: ['c', 'cpp']) add_project_arguments('-nostdinc', '-fno-builtin', '-ffreestanding', language: ['c', 'cpp']) add_project_arguments('-Werror=misleading-indentation', language: ['c', 'cpp']) @@ -221,6 +229,7 @@ if host_machine.system() == 'linux' internal_conf.set10('MLIBC_MAP_DSO_SEGMENTS', true) internal_conf.set10('MLIBC_MMAP_ALLOCATE_DSO', true) + internal_conf.set10('MLIBC_MAP_FILE_WINDOWS', true) subdir('sysdeps/linux') elif host_machine.system() == 'aero' rtld_include_dirs += include_directories('sysdeps/aero/include') @@ -371,8 +380,10 @@ configure_file(input: 'mlibc-config.h.in', internal_sources = [ 'options/internal/generic/allocator.cpp', + 'options/internal/generic/c-locale-defaults.cpp', 'options/internal/generic/charcode.cpp', 'options/internal/generic/charset.cpp', + 'options/internal/generic/ctype.cpp', 'options/internal/generic/debug.cpp', 'options/internal/generic/ensure.cpp', 'options/internal/generic/essential.cpp', @@ -381,6 +392,7 @@ internal_sources = [ 'options/internal/generic/global-config.cpp', 'options/internal/generic/inline-emitter.cpp', 'options/internal/generic/locale.cpp', + 'options/internal/generic/locale-defaults.cpp', 'options/internal/generic/sigset.cpp', 'options/internal/generic/strings.cpp', 'options/internal/generic/ubsan.cpp', diff --git a/options/ansi/generic/ctype.cpp b/options/ansi/generic/ctype.cpp index d5a2321376..88b1c28cf9 100644 --- a/options/ansi/generic/ctype.cpp +++ b/options/ansi/generic/ctype.cpp @@ -4,6 +4,7 @@ #include #include +#include // -------------------------------------------------------------------------------------- // char ctype functions. @@ -14,7 +15,7 @@ int isalpha(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_alpha(cp); + return mlibc::current_charset()->is_alpha(cp, mlibc::getActiveLocale()); } int isdigit(int nc) { @@ -22,7 +23,7 @@ int isdigit(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_digit(cp); + return mlibc::current_charset()->is_digit(cp, mlibc::getActiveLocale()); } int isxdigit(int nc) { @@ -30,7 +31,7 @@ int isxdigit(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_xdigit(cp); + return mlibc::current_charset()->is_xdigit(cp, mlibc::getActiveLocale()); } int isalnum(int nc) { @@ -38,7 +39,7 @@ int isalnum(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_alnum(cp); + return mlibc::current_charset()->is_alnum(cp, mlibc::getActiveLocale()); } int ispunct(int nc) { @@ -46,7 +47,7 @@ int ispunct(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_punct(cp); + return mlibc::current_charset()->is_punct(cp, mlibc::getActiveLocale()); } int isgraph(int nc) { @@ -54,7 +55,7 @@ int isgraph(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_graph(cp); + return mlibc::current_charset()->is_graph(cp, mlibc::getActiveLocale()); } int isblank(int nc) { @@ -62,7 +63,7 @@ int isblank(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_blank(cp); + return mlibc::current_charset()->is_blank(cp, mlibc::getActiveLocale()); } int isspace(int nc) { @@ -70,7 +71,7 @@ int isspace(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_space(cp); + return mlibc::current_charset()->is_space(cp, mlibc::getActiveLocale()); } int isprint(int nc) { @@ -78,7 +79,7 @@ int isprint(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_print(cp); + return mlibc::current_charset()->is_print(cp, mlibc::getActiveLocale()); } int islower(int nc) { @@ -86,7 +87,7 @@ int islower(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_lower(cp); + return mlibc::current_charset()->is_lower(cp, mlibc::getActiveLocale()); } int isupper(int nc) { @@ -94,7 +95,7 @@ int isupper(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_upper(cp); + return mlibc::current_charset()->is_upper(cp, mlibc::getActiveLocale()); } int iscntrl(int nc) { @@ -102,7 +103,7 @@ int iscntrl(int nc) { mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::generic_is_control(cp); + return mlibc::current_charset()->is_cntrl(cp, mlibc::getActiveLocale()); } int isascii(int nc) { @@ -118,190 +119,139 @@ int isascii(int nc) { // -------------------------------------------------------------------------------------- int iswalpha(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_alpha(cp); + return mlibc::current_charset()->is_alpha(cp, l); } int iswdigit(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_digit(cp); + return mlibc::current_charset()->is_digit(cp, l); } int iswxdigit(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_xdigit(cp); + return mlibc::current_charset()->is_xdigit(cp, l); } int iswalnum(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_alnum(cp); + return mlibc::current_charset()->is_alnum(cp, l); } int iswpunct(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_punct(cp); + return mlibc::current_charset()->is_punct(cp, l); } int iswgraph(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_graph(cp); + return mlibc::current_charset()->is_graph(cp, l); } int iswblank(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_blank(cp); + return mlibc::current_charset()->is_blank(cp, l); } int iswspace(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_space(cp); + return mlibc::current_charset()->is_space(cp, l); } int iswprint(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_print(cp); + return mlibc::current_charset()->is_print(cp, l); } int iswlower(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_lower(cp); + return mlibc::current_charset()->is_lower(cp, l); } int iswupper(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::current_charset()->is_upper(cp); + return mlibc::current_charset()->is_upper(cp, l); } int iswcntrl(wint_t nc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) return 0; - return mlibc::generic_is_control(cp); + return mlibc::current_charset()->is_cntrl(cp, l); } // -------------------------------------------------------------------------------------- // iswctype functions. // -------------------------------------------------------------------------------------- -namespace { - enum { - ct_null, - ct_alnum, - ct_alpha, - ct_blank, - ct_cntrl, - ct_digit, - ct_graph, - ct_lower, - ct_print, - ct_punct, - ct_space, - ct_upper, - ct_xdigit, - ct_count - }; -} // namespace - wctype_t wctype(const char *cs) { - frg::string_view s{cs}; - if(s == "alnum") return ct_alnum; - if(s == "alpha") return ct_alpha; - if(s == "blank") return ct_blank; - if(s == "cntrl") return ct_cntrl; - if(s == "digit") return ct_digit; - if(s == "graph") return ct_graph; - if(s == "lower") return ct_lower; - if(s == "print") return ct_print; - if(s == "punct") return ct_punct; - if(s == "space") return ct_space; - if(s == "upper") return ct_upper; - if(s == "xdigit") return ct_xdigit; - mlibc::infoLogger() << "mlibc: wctype(\"" << cs << "\") is not supported" << frg::endlog; - return ct_null; + auto l = mlibc::getActiveLocale(); + return mlibc::current_charset()->wctype({cs}, l); } int iswctype(wint_t wc, wctype_t type) { - switch (type) { - case ct_alnum: - return iswalnum(wc); - case ct_alpha: - return iswalpha(wc); - case ct_blank: - return iswblank(wc); - case ct_cntrl: - return iswcntrl(wc); - case ct_digit: - return iswdigit(wc); - case ct_graph: - return iswgraph(wc); - case ct_lower: - return iswlower(wc); - case ct_print: - return iswprint(wc); - case ct_punct: - return iswpunct(wc); - case ct_space: - return iswspace(wc); - case ct_upper: - return iswupper(wc); - case ct_xdigit: - return iswxdigit(wc); - } - return 0; + auto l = mlibc::getActiveLocale(); + return mlibc::current_charset()->iswctype(wc, type, l); } // -------------------------------------------------------------------------------------- // char conversion functions. // -------------------------------------------------------------------------------------- -int tolower(int nc) { - auto cc = mlibc::current_charcode(); - mlibc::codepoint cp; - if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) - return nc; - return mlibc::current_charset()->to_lower(cp); +int tolower(int c) { + auto l = mlibc::getActiveLocale(); + return c >= -128 && c < 256 ? l->ctype.map_tolower()[c + 128] : c; } -int toupper(int nc) { - auto cc = mlibc::current_charcode(); - mlibc::codepoint cp; - if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) - return nc; - return mlibc::current_charset()->to_upper(cp); +int toupper(int c) { + auto l = mlibc::getActiveLocale(); + return c >= -128 && c < 256 ? l->ctype.map_toupper()[c + 128] : c; } // -------------------------------------------------------------------------------------- @@ -309,18 +259,20 @@ int toupper(int nc) { // -------------------------------------------------------------------------------------- wint_t towlower(wint_t wc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(wc, cp); e != mlibc::charcode_error::null) return wc; - return mlibc::current_charset()->to_lower(cp); + return mlibc::current_charset()->to_lower(cp, l); } wint_t towupper(wint_t wc) { + auto l = mlibc::getActiveLocale(); auto cc = mlibc::platform_wide_charcode(); mlibc::codepoint cp; if(auto e = cc->promote(wc, cp); e != mlibc::charcode_error::null) return wc; - return mlibc::current_charset()->to_upper(cp); + return mlibc::current_charset()->to_upper(cp, l); } diff --git a/options/ansi/generic/locale.cpp b/options/ansi/generic/locale.cpp index 1bedbd61e9..cbaab38553 100644 --- a/options/ansi/generic/locale.cpp +++ b/options/ansi/generic/locale.cpp @@ -1,159 +1,126 @@ #include #include +#include #include #include #include +#include #include -namespace { - // Values of the C locale are defined by the C standard. - constexpr lconv c_lconv = { - const_cast("."), // decimal_point - const_cast(""), // thousands_sep - const_cast(""), // grouping - const_cast(""), // mon_decimal_point - const_cast(""), // mon_thousands_sep - const_cast(""), // mon_grouping - const_cast(""), // positive_sign - const_cast(""), // negative_sign - const_cast(""), // currency_symbol - CHAR_MAX, // frac_digits - CHAR_MAX, // p_cs_precedes - CHAR_MAX, // n_cs_precedes - CHAR_MAX, // p_sep_by_space - CHAR_MAX, // n_sep_by_space - CHAR_MAX, // p_sign_posn - CHAR_MAX, // n_sign_posn - const_cast(""), // int_curr_symbol - CHAR_MAX, // int_frac_digits - CHAR_MAX, // int_p_cs_precedes - CHAR_MAX, // int_n_cs_precedes - CHAR_MAX, // int_p_sep_by_space - CHAR_MAX, // int_n_sep_by_space - CHAR_MAX, // int_p_sign_posn - CHAR_MAX // int_n_sign_posn - }; -} // namespace - -namespace mlibc { - struct locale_description { - // Identifier of this locale. used in setlocale(). - const char *name; - lconv lc; - }; - - constinit const locale_description c_locale{ - .name = "C", - .lc = c_lconv - }; - - constinit const locale_description posix_locale{ - .name = "POSIX", - .lc = c_lconv - }; - - const locale_description *query_locale_description(const char *name) { - if(!strcmp(name, "C")) - return &c_locale; - if(!strcmp(name, "POSIX")) - return &posix_locale; - return nullptr; - } - - const locale_description *collate_facet; - const locale_description *ctype_facet; - const locale_description *monetary_facet; - const locale_description *numeric_facet; - const locale_description *time_facet; - const locale_description *messages_facet; -} // namespace mlibc - -[[gnu::constructor]] -static void init_locale() { - mlibc::collate_facet = &mlibc::c_locale; - mlibc::ctype_facet = &mlibc::c_locale; - mlibc::monetary_facet = &mlibc::c_locale; - mlibc::numeric_facet = &mlibc::c_locale; - mlibc::time_facet = &mlibc::c_locale; - mlibc::messages_facet = &mlibc::c_locale; -} - char *setlocale(int category, const char *name) { - if(category == LC_ALL) { - // ´TODO: Implement correct return value when categories differ. - auto current_desc = mlibc::collate_facet; - __ensure(current_desc == mlibc::ctype_facet); - __ensure(current_desc == mlibc::monetary_facet); - __ensure(current_desc == mlibc::numeric_facet); - __ensure(current_desc == mlibc::time_facet); - __ensure(current_desc == mlibc::messages_facet); - - if(name) { - // Our default C locale is the C locale. - if(!strlen(name)) - name = "C"; - - auto new_desc = mlibc::query_locale_description(name); - if(!new_desc) { - mlibc::infoLogger() << "mlibc: Locale " << name - << " is not supported" << frg::endlog; - return nullptr; + const char *usedName = name; + + if (name && *name == '\0') { + auto lc_all = getenv("LC_ALL"); + + if (lc_all && strlen(lc_all)) { + usedName = lc_all; + } else { + int categoryEnvMask = 0; + + auto applyEnvCategory = [&](const char *categoryName, int val) { + if (auto e = getenv(categoryName); e && strlen(e)) { + if (!mlibc::applyCategory(val, e, mlibc::getGlobalLocale())) { + return false; + } else { + categoryEnvMask |= (1 << val); + } + } + return true; + }; + + auto applyLang = [&](int val, const char *locale) { + if (!((1 << val) & categoryEnvMask)) + if (!mlibc::applyCategory(val, locale, mlibc::getGlobalLocale())) + return false; + + return true; + }; + + if (!applyEnvCategory("LC_CTYPE", LC_CTYPE)) return nullptr; + if (!applyEnvCategory("LC_NUMERIC", LC_NUMERIC)) return nullptr; + if (!applyEnvCategory("LC_TIME", LC_TIME)) return nullptr; + if (!applyEnvCategory("LC_COLLATE", LC_COLLATE)) return nullptr; + if (!applyEnvCategory("LC_MONETARY", LC_MONETARY)) return nullptr; + if (!applyEnvCategory("LC_MESSAGES", LC_MESSAGES)) return nullptr; + if (!applyEnvCategory("LC_PAPER", LC_PAPER)) return nullptr; + if (!applyEnvCategory("LC_NAME", LC_NAME)) return nullptr; + if (!applyEnvCategory("LC_ADDRESS", LC_ADDRESS)) return nullptr; + if (!applyEnvCategory("LC_TELEPHONE", LC_TELEPHONE)) return nullptr; + if (!applyEnvCategory("LC_MEASUREMENT", LC_MEASUREMENT)) return nullptr; + if (!applyEnvCategory("LC_IDENTIFICATION", LC_IDENTIFICATION)) return nullptr; + + auto lang = getenv("LANG"); + if (lang && strlen(lang)) { + if (!applyLang(LC_CTYPE, lang)) return nullptr; + if (!applyLang(LC_NUMERIC, lang)) return nullptr; + if (!applyLang(LC_TIME, lang)) return nullptr; + if (!applyLang(LC_COLLATE, lang)) return nullptr; + if (!applyLang(LC_MONETARY, lang)) return nullptr; + if (!applyLang(LC_MESSAGES, lang)) return nullptr; + if (!applyLang(LC_PAPER, lang)) return nullptr; + if (!applyLang(LC_NAME, lang)) return nullptr; + if (!applyLang(LC_ADDRESS, lang)) return nullptr; + if (!applyLang(LC_TELEPHONE, lang)) return nullptr; + if (!applyLang(LC_MEASUREMENT, lang)) return nullptr; + if (!applyLang(LC_IDENTIFICATION, lang)) return nullptr; + } else { + if (!applyLang(LC_CTYPE, "C")) return nullptr; + if (!applyLang(LC_NUMERIC, "C")) return nullptr; + if (!applyLang(LC_TIME, "C")) return nullptr; + if (!applyLang(LC_COLLATE, "C")) return nullptr; + if (!applyLang(LC_MONETARY, "C")) return nullptr; + if (!applyLang(LC_MESSAGES, "C")) return nullptr; + if (!applyLang(LC_PAPER, "C")) return nullptr; + if (!applyLang(LC_NAME, "C")) return nullptr; + if (!applyLang(LC_ADDRESS, "C")) return nullptr; + if (!applyLang(LC_TELEPHONE, "C")) return nullptr; + if (!applyLang(LC_MEASUREMENT, "C")) return nullptr; + if (!applyLang(LC_IDENTIFICATION, "C")) return nullptr; + mlibc::getGlobalLocale()->localeName = {"C", getAllocator()}; } - - mlibc::collate_facet = new_desc; - mlibc::ctype_facet = new_desc; - mlibc::monetary_facet = new_desc; - mlibc::numeric_facet = new_desc; - mlibc::time_facet = new_desc; - mlibc::messages_facet = new_desc; - } - return const_cast(current_desc->name); - }else{ - const mlibc::locale_description **facet_ptr; - switch(category) { - case LC_COLLATE: - facet_ptr = &mlibc::collate_facet; - break; - case LC_CTYPE: - facet_ptr = &mlibc::ctype_facet; - break; - case LC_MONETARY: - facet_ptr = &mlibc::monetary_facet; - break; - case LC_NUMERIC: - facet_ptr = &mlibc::numeric_facet; - break; - case LC_TIME: - facet_ptr = &mlibc::time_facet; - break; - case LC_MESSAGES: - facet_ptr = &mlibc::messages_facet; - break; - default: - mlibc::infoLogger() << "mlibc: Unexpected value " << category - << " for category in setlocale()" << frg::endlog; - return nullptr; } - auto current_desc = *facet_ptr; - if(name) { - // Our default C locale is the C locale. - if(!strlen(name)) - name = "C"; - - auto new_desc = mlibc::query_locale_description(name); - if(!new_desc) { - mlibc::infoLogger() << "mlibc: Locale " << name - << " is not supported" << frg::endlog; - return nullptr; - } + return mlibc::getGlobalLocale()->localeName.data(); + } - *facet_ptr = new_desc; - } - return const_cast(current_desc->name); + if(usedName && category == LC_ALL) { + if (!mlibc::applyCategory(LC_CTYPE, usedName, mlibc::getGlobalLocale())) + return nullptr; + if (!mlibc::applyCategory(LC_NUMERIC, usedName, mlibc::getGlobalLocale())) + return nullptr; + if (!mlibc::applyCategory(LC_TIME, usedName, mlibc::getGlobalLocale())) + return nullptr; + if (!mlibc::applyCategory(LC_COLLATE, usedName, mlibc::getGlobalLocale())) + return nullptr; + if (!mlibc::applyCategory(LC_MONETARY, usedName, mlibc::getGlobalLocale())) + return nullptr; + if (!mlibc::applyCategory(LC_MESSAGES, usedName, mlibc::getGlobalLocale())) + return nullptr; + if (!mlibc::applyCategory(LC_PAPER, usedName, mlibc::getGlobalLocale())) + return nullptr; + if (!mlibc::applyCategory(LC_NAME, usedName, mlibc::getGlobalLocale())) + return nullptr; + if (!mlibc::applyCategory(LC_ADDRESS, usedName, mlibc::getGlobalLocale())) + return nullptr; + if (!mlibc::applyCategory(LC_TELEPHONE, usedName, mlibc::getGlobalLocale())) + return nullptr; + if (!mlibc::applyCategory(LC_MEASUREMENT, usedName, mlibc::getGlobalLocale())) + return nullptr; + if (!mlibc::applyCategory(LC_IDENTIFICATION, usedName, mlibc::getGlobalLocale())) + return nullptr; + + mlibc::getGlobalLocale()->localeName = {usedName, getAllocator()}; + return mlibc::getGlobalLocale()->localeName.data(); + } else if(usedName) { + if (!mlibc::applyCategory(category, usedName, mlibc::getGlobalLocale())) + return nullptr; + return mlibc::getGlobalLocale()->localeName.data(); + } else { + return mlibc::getGlobalLocale()->localeName.data(); } } @@ -162,35 +129,35 @@ namespace { } // namespace struct lconv *localeconv(void) { + auto cur = mlibc::getActiveLocale(); + // Numeric locale. - const auto &numeric_lc = mlibc::numeric_facet->lc; - effective_lc.decimal_point = numeric_lc.decimal_point; - effective_lc.thousands_sep = numeric_lc.thousands_sep; - effective_lc.grouping = numeric_lc.grouping; + effective_lc.decimal_point = const_cast(cur->numeric.get(DECIMAL_POINT).asString().data()); + effective_lc.thousands_sep = const_cast(cur->numeric.get(THOUSANDS_SEP).asString().data()); + effective_lc.grouping = const_cast(reinterpret_cast(cur->numeric.get(GROUPING).asByteSpan().data())); // Monetary locale. - const auto &monetary_lc = mlibc::monetary_facet->lc; - effective_lc.mon_decimal_point = monetary_lc.mon_decimal_point; - effective_lc.mon_thousands_sep = monetary_lc.mon_thousands_sep; - effective_lc.mon_grouping = monetary_lc.mon_grouping; - effective_lc.positive_sign = monetary_lc.positive_sign; - effective_lc.negative_sign = monetary_lc.negative_sign; - effective_lc.currency_symbol = monetary_lc.currency_symbol; - effective_lc.frac_digits = monetary_lc.frac_digits; - effective_lc.p_cs_precedes = monetary_lc.p_cs_precedes; - effective_lc.n_cs_precedes = monetary_lc.n_cs_precedes; - effective_lc.p_sep_by_space = monetary_lc.p_sep_by_space; - effective_lc.n_sep_by_space = monetary_lc.n_sep_by_space; - effective_lc.p_sign_posn = monetary_lc.p_sign_posn; - effective_lc.n_sign_posn = monetary_lc.n_sign_posn; - effective_lc.int_curr_symbol = monetary_lc.int_curr_symbol; - effective_lc.int_frac_digits = monetary_lc.int_frac_digits; - effective_lc.int_p_cs_precedes = monetary_lc.int_p_cs_precedes; - effective_lc.int_n_cs_precedes = monetary_lc.int_n_cs_precedes; - effective_lc.int_p_sep_by_space = monetary_lc.int_p_sep_by_space; - effective_lc.int_n_sep_by_space = monetary_lc.int_n_sep_by_space; - effective_lc.int_p_sign_posn = monetary_lc.int_p_sign_posn; - effective_lc.int_n_sign_posn = monetary_lc.int_n_sign_posn; + effective_lc.mon_decimal_point = const_cast(cur->monetary.get(MON_DECIMAL_POINT).asString().data()); + effective_lc.mon_thousands_sep = const_cast(cur->monetary.get(MON_THOUSANDS_SEP).asString().data()); + effective_lc.mon_grouping = const_cast(reinterpret_cast(cur->monetary.get(MON_GROUPING).asByteSpan().data())); + effective_lc.positive_sign = const_cast(cur->monetary.get(POSITIVE_SIGN).asString().data()); + effective_lc.negative_sign = const_cast(cur->monetary.get(NEGATIVE_SIGN).asString().data()); + effective_lc.currency_symbol = const_cast(cur->monetary.get(CURRENCY_SYMBOL).asString().data()); + effective_lc.frac_digits = cur->monetary.get(FRAC_DIGITS).asUint32(); + effective_lc.p_cs_precedes = cur->monetary.get(P_CS_PRECEDES).asUint32(); + effective_lc.n_cs_precedes = cur->monetary.get(N_CS_PRECEDES).asUint32(); + effective_lc.p_sep_by_space = cur->monetary.get(P_SEP_BY_SPACE).asUint32(); + effective_lc.n_sep_by_space = cur->monetary.get(N_SEP_BY_SPACE).asUint32(); + effective_lc.p_sign_posn = cur->monetary.get(P_SIGN_POSN).asUint32(); + effective_lc.n_sign_posn = cur->monetary.get(N_SIGN_POSN).asUint32(); + effective_lc.int_curr_symbol = const_cast(cur->monetary.get(INT_CURR_SYMBOL).asString().data()); + effective_lc.int_frac_digits = cur->monetary.get(INT_FRAC_DIGITS).asUint32(); + effective_lc.int_p_cs_precedes = cur->monetary.get(INT_P_CS_PRECEDES).asUint32(); + effective_lc.int_n_cs_precedes = cur->monetary.get(INT_N_CS_PRECEDES).asUint32(); + effective_lc.int_p_sep_by_space = cur->monetary.get(INT_P_SEP_BY_SPACE).asUint32(); + effective_lc.int_n_sep_by_space = cur->monetary.get(INT_N_SEP_BY_SPACE).asUint32(); + effective_lc.int_p_sign_posn = cur->monetary.get(INT_P_SIGN_POSN).asUint32(); + effective_lc.int_n_sign_posn = cur->monetary.get(INT_N_SIGN_POSN).asUint32(); return &effective_lc; } diff --git a/options/ansi/generic/stdio.cpp b/options/ansi/generic/stdio.cpp index 874eb23a4c..0f1f58c5dd 100644 --- a/options/ansi/generic/stdio.cpp +++ b/options/ansi/generic/stdio.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -26,7 +27,14 @@ template struct PrintfAgent { PrintfAgent(F *formatter, frg::va_struct *vsp) - : _formatter{formatter}, _vsp{vsp} { } + : _formatter{formatter}, _vsp{vsp} { + auto l = mlibc::getActiveLocale(); + locale_opts = frg::locale_options( + l->numeric.get(DECIMAL_POINT).asString().data(), + l->numeric.get(THOUSANDS_SEP).asString().data(), + reinterpret_cast(l->numeric.get(GROUPING).asByteSpan().data()) + ); + } frg::expected operator() (char c) { _formatter->append(c); @@ -57,10 +65,10 @@ struct PrintfAgent { frg::do_printf_chars(*_formatter, t, opts, szmod, _vsp); break; case 'd': case 'i': case 'o': case 'x': case 'X': case 'b': case 'B': case 'u': - frg::do_printf_ints(*_formatter, t, opts, szmod, _vsp); + frg::do_printf_ints(*_formatter, t, opts, szmod, _vsp, locale_opts); break; case 'f': case 'F': case 'g': case 'G': case 'e': case 'E': case 'a': case 'A': - frg::do_printf_floats(*_formatter, t, opts, szmod, _vsp); + frg::do_printf_floats(*_formatter, t, opts, szmod, _vsp, locale_opts); break; case 'm': __ensure(!opts.fill_zeros); @@ -119,6 +127,7 @@ struct PrintfAgent { private: F *_formatter; + frg::locale_options locale_opts; frg::va_struct *_vsp; }; diff --git a/options/ansi/generic/stdlib.cpp b/options/ansi/generic/stdlib.cpp index 23e106f709..41ad5143a6 100644 --- a/options/ansi/generic/stdlib.cpp +++ b/options/ansi/generic/stdlib.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -74,13 +73,13 @@ __attribute__((__noreturn__)) void siglongjmp(sigjmp_buf buffer, int value) { } double strtod(const char *__restrict string, char **__restrict end) { - return mlibc::strtofp(string, end); + return mlibc::strtofp(string, end, mlibc::getActiveLocale()); } float strtof(const char *__restrict string, char **__restrict end) { - return mlibc::strtofp(string, end); + return mlibc::strtofp(string, end, mlibc::getActiveLocale()); } long double strtold(const char *__restrict string, char **__restrict end) { - return mlibc::strtofp(string, end); + return mlibc::strtofp(string, end, mlibc::getActiveLocale()); } long strtol(const char *__restrict string, char **__restrict end, int base) { diff --git a/options/internal/generic/c-locale-defaults.cpp b/options/internal/generic/c-locale-defaults.cpp new file mode 100644 index 0000000000..68db63ecf5 --- /dev/null +++ b/options/internal/generic/c-locale-defaults.cpp @@ -0,0 +1,448 @@ +#include + +namespace mlibc { + +alignas(uint16_t) frg::array cLocaleCtypeClass = { + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\002\000" "\002\000" "\002\000" "\002\000" + "\002\000" "\002\000" "\002\000" "\002\000" "\002\000" "\003\040" + "\002\040" "\002\040" "\002\040" "\002\040" "\002\000" "\002\000" + "\002\000" "\002\000" "\002\000" "\002\000" "\002\000" "\002\000" + "\002\000" "\002\000" "\002\000" "\002\000" "\002\000" "\002\000" + "\002\000" "\002\000" "\002\000" "\002\000" "\001\140" "\004\300" + "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" + "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" + "\004\300" "\004\300" "\010\330" "\010\330" "\010\330" "\010\330" + "\010\330" "\010\330" "\010\330" "\010\330" "\010\330" "\010\330" + "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" "\004\300" + "\004\300" "\010\325" "\010\325" "\010\325" "\010\325" "\010\325" + "\010\325" "\010\305" "\010\305" "\010\305" "\010\305" "\010\305" + "\010\305" "\010\305" "\010\305" "\010\305" "\010\305" "\010\305" + "\010\305" "\010\305" "\010\305" "\010\305" "\010\305" "\010\305" + "\010\305" "\010\305" "\010\305" "\004\300" "\004\300" "\004\300" + "\004\300" "\004\300" "\004\300" "\010\326" "\010\326" "\010\326" + "\010\326" "\010\326" "\010\326" "\010\306" "\010\306" "\010\306" + "\010\306" "\010\306" "\010\306" "\010\306" "\010\306" "\010\306" + "\010\306" "\010\306" "\010\306" "\010\306" "\010\306" "\010\306" + "\010\306" "\010\306" "\010\306" "\010\306" "\010\306" "\004\300" + "\004\300" "\004\300" "\004\300" "\002\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" + "\000\000" "\000\000" "\000\000" "\000\000" "\000\000" "\000" /* null implied */ +}; + +alignas(uint32_t) frg::array cLocaleCtypeClass32 = { + "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + "\000\000\003\040" "\000\000\002\040" "\000\000\002\040" + "\000\000\002\040" "\000\000\002\040" "\000\000\002\000" + "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + "\000\000\002\000" "\000\000\002\000" "\000\000\002\000" + "\000\000\002\000" "\000\000\002\000" "\000\000\001\140" + "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + "\000\000\010\330" "\000\000\010\330" "\000\000\010\330" + "\000\000\010\330" "\000\000\010\330" "\000\000\010\330" + "\000\000\010\330" "\000\000\010\330" "\000\000\010\330" + "\000\000\010\330" "\000\000\004\300" "\000\000\004\300" + "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + "\000\000\004\300" "\000\000\004\300" "\000\000\010\325" + "\000\000\010\325" "\000\000\010\325" "\000\000\010\325" + "\000\000\010\325" "\000\000\010\325" "\000\000\010\305" + "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + "\000\000\010\305" "\000\000\010\305" "\000\000\010\305" + "\000\000\010\305" "\000\000\004\300" "\000\000\004\300" + "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + "\000\000\004\300" "\000\000\010\326" "\000\000\010\326" + "\000\000\010\326" "\000\000\010\326" "\000\000\010\326" + "\000\000\010\326" "\000\000\010\306" "\000\000\010\306" + "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + "\000\000\010\306" "\000\000\010\306" "\000\000\010\306" + "\000\000\004\300" "\000\000\004\300" "\000\000\004\300" + "\000\000\004\300" "\000\000\002\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000\000" "\000\000\000\000" "\000\000\000\000" + "\000\000\000" /* null implied */ +}; + +frg::array cLocaleCtypeToUpper = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xffffffff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff +}; + +frg::array cLocaleCtypeToLower = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xffffffff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff +}; + +frg::array cLocaleClassUpper = { + 7, 1, 6, 1, 1, + 6 * sizeof(uint32_t), + 0, 8 * sizeof(uint32_t), + 0x07fffffe, 0x00000000, +}; + +frg::array cLocaleClassLower = { + 7, 1, 6, 1, 1, + 6 * sizeof(uint32_t), + 0, 8 * sizeof(uint32_t), + 0x00000000, 0x07fffffe, +}; + +frg::array cLocaleClassAlpha = { + 7, 1, 6, 1, 1, + 6 * sizeof(uint32_t), + 0, 8 * sizeof(uint32_t), + 0x07fffffe, 0x07fffffe, +}; + +frg::array cLocaleClassDigit = { + 6, 1, 6, 0, 1, + 6 * sizeof(uint32_t), + 7 * sizeof(uint32_t), + 0x00000000, 0x03ff0000, +}; + +frg::array cLocaleClassXdigit = { + 7, 1, 7, 0, 3, + 6 * sizeof(uint32_t), + 7 * sizeof(uint32_t), + 0x00000000, 0x03ff0000, 0x0000007e, 0x0000007e, +}; + +frg::array cLocaleClassSpace = { + 6, 1, 6, 0, 1, + 6 * sizeof(uint32_t), + 7 * sizeof(uint32_t), + 0x00003e00, 0x00000001, +}; + +frg::array cLocaleClassPrint = { + 7, 1, 7, 0, 3, + 6 * sizeof(uint32_t), + 7 * sizeof(uint32_t), + 0x00000000, 0xffffffff, 0xffffffff, 0x7fffffff, +}; + +frg::array cLocaleClassGraph = { + 7, 1, 7, 0, 3, + 6 * sizeof(uint32_t), + 7 * sizeof(uint32_t), + 0x00000000, 0xfffffffe, 0xffffffff, 0x7fffffff, +}; + +frg::array cLocaleClassBlank = { + 6, 1, 6, 0, 1, + 6 * sizeof(uint32_t), + 7 * sizeof(uint32_t), + 0x00000200, 0x00000001, +}; + +frg::array cLocaleClassCntrl = { + 7, 1, 7, 0, 3, + 6 * sizeof(uint32_t), + 7 * sizeof(uint32_t), + 0xffffffff, 0x00000000, 0x00000000, 0x80000000, +}; + +frg::array cLocaleClassPunct = { + 7, 1, 7, 0, 3, + 6 * sizeof(uint32_t), + 7 * sizeof(uint32_t), + 0x00000000, 0xfc00fffe, 0xf8000001, 0x78000001, +}; + +frg::array cLocaleClassAlnum = { + 7, 1, 7, 0, 3, + 6 * sizeof(uint32_t), + 7 * sizeof(uint32_t), + 0x00000000, 0x03ff0000, 0x07fffffe, 0x07fffffe, +}; + +frg::array cLocaleMapToUpper = { + 7, 1, 5, 3, 31, + 6 * sizeof(uint32_t), + 0, 0, 0, 10 * sizeof(uint32_t), + 0x00000000, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +frg::array cLocaleMapToLower = { + 7, 1, 5, 3, 31, + 6 * sizeof(uint32_t), + 0, 0, 10 * sizeof(uint32_t), 0, + 0x00000000, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +frg::array cLocaleCollSeqMb = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', + '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', + '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', + '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', + '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', + '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', + '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f', + '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', + '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', + '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', + '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', + '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', + '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', + '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', + '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', + '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', + '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', + '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', + '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', + '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', + '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', + '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', + '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff' +}; + +frg::array cLocaleCollSeqWc = { + 8, 1, 8, 0, 0xFF, + 6 * sizeof(uint32_t), + 7 * sizeof(uint32_t), + L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07', + L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f', + L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17', + L'\x18', L'\x19', L'\x1a', L'\x1b', L'\x1c', L'\x1d', L'\x1e', L'\x1f', + L'\x20', L'\x21', L'\x22', L'\x23', L'\x24', L'\x25', L'\x26', L'\x27', + L'\x28', L'\x29', L'\x2a', L'\x2b', L'\x2c', L'\x2d', L'\x2e', L'\x2f', + L'\x30', L'\x31', L'\x32', L'\x33', L'\x34', L'\x35', L'\x36', L'\x37', + L'\x38', L'\x39', L'\x3a', L'\x3b', L'\x3c', L'\x3d', L'\x3e', L'\x3f', + L'\x40', L'\x41', L'\x42', L'\x43', L'\x44', L'\x45', L'\x46', L'\x47', + L'\x48', L'\x49', L'\x4a', L'\x4b', L'\x4c', L'\x4d', L'\x4e', L'\x4f', + L'\x50', L'\x51', L'\x52', L'\x53', L'\x54', L'\x55', L'\x56', L'\x57', + L'\x58', L'\x59', L'\x5a', L'\x5b', L'\x5c', L'\x5d', L'\x5e', L'\x5f', + L'\x60', L'\x61', L'\x62', L'\x63', L'\x64', L'\x65', L'\x66', L'\x67', + L'\x68', L'\x69', L'\x6a', L'\x6b', L'\x6c', L'\x6d', L'\x6e', L'\x6f', + L'\x70', L'\x71', L'\x72', L'\x73', L'\x74', L'\x75', L'\x76', L'\x77', + L'\x78', L'\x79', L'\x7a', L'\x7b', L'\x7c', L'\x7d', L'\x7e', L'\x7f', + L'\x80', L'\x81', L'\x82', L'\x83', L'\x84', L'\x85', L'\x86', L'\x87', + L'\x88', L'\x89', L'\x8a', L'\x8b', L'\x8c', L'\x8d', L'\x8e', L'\x8f', + L'\x90', L'\x91', L'\x92', L'\x93', L'\x94', L'\x95', L'\x96', L'\x97', + L'\x98', L'\x99', L'\x9a', L'\x9b', L'\x9c', L'\x9d', L'\x9e', L'\x9f', + L'\xa0', L'\xa1', L'\xa2', L'\xa3', L'\xa4', L'\xa5', L'\xa6', L'\xa7', + L'\xa8', L'\xa9', L'\xaa', L'\xab', L'\xac', L'\xad', L'\xae', L'\xaf', + L'\xb0', L'\xb1', L'\xb2', L'\xb3', L'\xb4', L'\xb5', L'\xb6', L'\xb7', + L'\xb8', L'\xb9', L'\xba', L'\xbb', L'\xbc', L'\xbd', L'\xbe', L'\xbf', + L'\xc0', L'\xc1', L'\xc2', L'\xc3', L'\xc4', L'\xc5', L'\xc6', L'\xc7', + L'\xc8', L'\xc9', L'\xca', L'\xcb', L'\xcc', L'\xcd', L'\xce', L'\xcf', + L'\xd0', L'\xd1', L'\xd2', L'\xd3', L'\xd4', L'\xd5', L'\xd6', L'\xd7', + L'\xd8', L'\xd9', L'\xda', L'\xdb', L'\xdc', L'\xdd', L'\xde', L'\xdf', + L'\xe0', L'\xe1', L'\xe2', L'\xe3', L'\xe4', L'\xe5', L'\xe6', L'\xe7', + L'\xe8', L'\xe9', L'\xea', L'\xeb', L'\xec', L'\xed', L'\xee', L'\xef', + L'\xf0', L'\xf1', L'\xf2', L'\xf3', L'\xf4', L'\xf5', L'\xf6', L'\xf7', + L'\xf8', L'\xf9', L'\xfa', L'\xfb', L'\xfc', L'\xfd', L'\xfe', L'\xff' +}; + +} // namespace mlibc diff --git a/options/internal/generic/charset.cpp b/options/internal/generic/charset.cpp index c42b4f457e..b09e2aff90 100644 --- a/options/internal/generic/charset.cpp +++ b/options/internal/generic/charset.cpp @@ -2,6 +2,92 @@ #include #include #include +#include + +namespace { + +enum class class_bits : unsigned { + upper, + lower, + alpha, + digit, + xdigit, + space, + print, + graph, + blank, + cntrl, + punct, + alnum, +}; + +constexpr int ctype_class_bit(class_bits bit) { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return 1 << std::to_underlying(bit); +#else + return std::to_underlying(bit) < 8 + ? ((1 << std::to_underlying(bit)) << 8) + : ((1 << std::to_underlying(bit)) >> 8); +#endif +} + +int wctype_table_lookup(frg::span table, uint32_t wc) { + uint32_t shift1 = table[0]; + uint32_t index1 = wc >> shift1; + uint32_t bound = table[1]; + if (index1 < bound) { + uint32_t lookup1 = table[5 + index1]; + if (lookup1 != 0) { + uint32_t shift2 = table[2]; + uint32_t mask2 = table[3]; + uint32_t index2 = (wc >> shift2) & mask2; + uint32_t lookup2 = table[(lookup1 / sizeof(uint32_t)) + index2]; + if (lookup2 != 0) { + uint32_t mask3 = table[4]; + uint32_t index3 = (wc >> 5) & mask3; + uint32_t lookup3 = table[(lookup2 / sizeof(uint32_t)) + index3]; + + return (lookup3 >> (wc & 0x1f)) & 1; + } + } + } + return 0; +} + +uint32_t wctrans_table_lookup(frg::span table, uint32_t wc) { + uint32_t shift1 = table[0]; + uint32_t index1 = wc >> shift1; + uint32_t bound = table[1]; + if (index1 < bound) { + uint32_t lookup1 = table[5 + index1]; + if (lookup1 != 0) { + uint32_t shift2 = table[2]; + uint32_t mask2 = table[3]; + uint32_t index2 = (wc >> shift2) & mask2; + uint32_t lookup2 = table[(lookup1 / sizeof(uint32_t)) + index2]; + if (lookup2 != 0) { + uint32_t mask3 = table[4]; + uint32_t index3 = wc & mask3; + int32_t lookup3 = table[(lookup2 / sizeof(uint32_t)) + index3]; + + return wc + lookup3; + } + } + } + return wc; +} + +int ctype_class_check(mlibc::codepoint c, class_bits b, mlibc::localeinfo *l) { + if (c <= 0x7F) + return l->ctype.ctype_class()[c + 128] & ctype_class_bit(b); + + auto index = l->ctype.class_offset() + std::to_underlying(b); + auto entry = l->ctype.members[index].asUint32Span(); + + return wctype_table_lookup(entry, c); +} + +} // namespace namespace mlibc { @@ -10,129 +96,92 @@ bool charset::is_ascii_superset() { return true; } -bool charset::is_alpha(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::is_alpha() is not implemented" - " for the full Unicode charset" << frg::endlog; - return false; -} - -bool charset::is_digit(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - return c >= '0' && c <= '9'; - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::is_digit() is not implemented" - " for the full Unicode charset" << frg::endlog; - return false; -} - -bool charset::is_xdigit(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::is_xdigit() is not implemented" - " for the full Unicode charset" << frg::endlog; - return false; -} - -bool charset::is_alnum(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::is_alnum() is not implemented" - " for the full Unicode charset" << frg::endlog; - return false; -} - -bool charset::is_punct(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - return c == '!' || c == '"' || c == '#' || c == '$' || c == '%' || c == '&' - || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ',' - || c == '-' || c == '.' || c == '/' - || c == ':' || c == ';' || c == '<' || c == '=' || c == '>' || c == '?' - || c == '@' - || c == '[' || c == '\\' || c == ']' || c == '^' || c == '_' || c == '`' - || c == '{' || c == '|' || c == '}' || c == '~'; - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::is_punct() is not implemented" - " for the full Unicode charset" << frg::endlog; - return false; -} - -bool charset::is_graph(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - return c >= 0x21 && c <= 0x7E; - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::is_graph() is not implemented" - " for the full Unicode charset" << frg::endlog; - return false; -} - -bool charset::is_blank(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - return c == ' ' || c == '\t'; - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::is_blank() is not implemented" - " for the full Unicode charset " << c << frg::endlog; - return false; -} - -bool charset::is_space(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::is_space() is not implemented" - " for the full Unicode charset" << frg::endlog; - return false; -} - -bool charset::is_print(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - return c >= 0x20 && c <= 0x7E; - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::is_print() is not implemented" - " for the full Unicode charset" << frg::endlog; - return false; -} - -bool charset::is_lower(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - return (c >= 'a' && c <= 'z'); - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::is_print() is not implemented" - " for the full Unicode charset" << frg::endlog; - return false; -} - -bool charset::is_upper(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - return (c >= 'A' && c <= 'Z'); - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::is_print() is not implemented" - " for the full Unicode charset" << frg::endlog; - return false; -} - -codepoint charset::to_lower(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - if(c >= 'A' && c <= 'Z') - return c - 'A' + 'a'; - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::to_lower() is not implemented" - " for the full Unicode charset" << frg::endlog; - return c; -} - -codepoint charset::to_upper(codepoint c) { - if(c <= 0x7F && is_ascii_superset()) - if(c >= 'a' && c <= 'z') - return c - 'a' + 'A'; - if(c > 0x7F) - mlibc::infoLogger() << "mlibc: charset::to_upper() is not implemented" - " for the full Unicode charset" << frg::endlog; - return c; +bool charset::is_alpha(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::alpha, l); +} + +bool charset::is_digit(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::digit, l); +} + +bool charset::is_xdigit(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::xdigit, l); +} + +bool charset::is_alnum(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::alnum, l); +} + +bool charset::is_cntrl(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::cntrl, l); +} + +bool charset::is_punct(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::punct, l); +} + +bool charset::is_graph(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::graph, l); +} + +bool charset::is_blank(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::blank, l); +} + +bool charset::is_space(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::space, l); +} + +bool charset::is_print(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::print, l); +} + +bool charset::is_lower(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::lower, l); +} + +bool charset::is_upper(codepoint c, mlibc::localeinfo *l) { + return ctype_class_check(c, class_bits::upper, l); +} + +codepoint charset::to_lower(codepoint c, mlibc::localeinfo *l) { + auto index = l->ctype.map_offset() + 1; + auto entry = l->ctype.members[index].asUint32Span(); + return wctrans_table_lookup(entry, c); +} + +codepoint charset::to_upper(codepoint c, mlibc::localeinfo *l) { + auto index = l->ctype.map_offset(); + auto entry = l->ctype.members[index].asUint32Span(); + return wctrans_table_lookup(entry, c); +} + +wctype_t charset::wctype(frg::string_view name, mlibc::localeinfo *l) { + size_t offset = 0; + auto class_names = l->ctype.class_names(); + + for (size_t i = 0; offset < class_names.size(); i++) { + auto end = class_names.find_first('\0', offset); + + if (end == size_t(-1) || end == offset) + break; + + if (name == class_names.sub_string(offset, end - offset)) + return l->ctype.class_offset() + i; + + offset = end + 1; + } + + return 0; +} + +bool charset::iswctype(wint_t wc, wctype_t t, mlibc::localeinfo *l) { + if (t == 0 || wc == static_cast(WEOF)) + return 0; + + auto table = l->ctype.get(__NL_ITEM(LC_CTYPE, t)).asUint32Span(); + + return wctype_table_lookup(table, wc); } charset *current_charset() { diff --git a/options/internal/generic/ctype.cpp b/options/internal/generic/ctype.cpp new file mode 100644 index 0000000000..78edd6ca70 --- /dev/null +++ b/options/internal/generic/ctype.cpp @@ -0,0 +1,46 @@ +#include +#include +#include + +namespace mlibc { + +int isalpha_l(int nc, localeinfo *loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_alpha(cp, loc); +} +int isdigit_l(int nc, localeinfo *loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_digit(cp, loc); +} + +int isspace_l(int nc, localeinfo *loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_space(cp, loc); +} + +int isxdigit_l(int nc, localeinfo *loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_xdigit(cp, loc); +} + +int tolower_l(int nc, localeinfo *loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->to_lower(cp, loc); +} + +} // namespace mlibc diff --git a/options/internal/generic/locale-defaults.cpp b/options/internal/generic/locale-defaults.cpp new file mode 100644 index 0000000000..05b3821286 --- /dev/null +++ b/options/internal/generic/locale-defaults.cpp @@ -0,0 +1,376 @@ +#include + +namespace mlibc { + +nl_ctype::nl_ctype() { + set(_NL_CTYPE_CLASS, frg::span{cLocaleCtypeClass}); + set(_NL_CTYPE_TOLOWER, frg::span{cLocaleCtypeToLower}); + set(_NL_CTYPE_TOUPPER, frg::span{cLocaleCtypeToUpper}); + set(_NL_CTYPE_CLASS32, frg::span{cLocaleCtypeClass32}); + set(_NL_CTYPE_CLASS_NAMES, {"upper\0" "lower\0" "alpha\0" "digit\0" "xdigit\0" "space\0" "print\0" "graph\0" "blank\0" "cntrl\0" "punct\0" "alnum\0"}); + set(_NL_CTYPE_MAP_NAMES, {"toupper\0" "tolower\0"}); + set(_NL_CTYPE_MB_CUR_MAX, uint32_t{1}); + set(_NL_CTYPE_CODESET_NAME, {"ANSI_X3.4-1968"}); + set(_NL_CTYPE_CLASS_OFFSET, uint32_t{72}); + set(_NL_CTYPE_MAP_OFFSET, uint32_t{84}); + set(_NL_CTYPE_INDIGITS_MB_LEN, uint32_t{1}); + set(_NL_CTYPE_INDIGITS0_MB, {"0"}); + set(_NL_CTYPE_INDIGITS1_MB, {"1"}); + set(_NL_CTYPE_INDIGITS2_MB, {"2"}); + set(_NL_CTYPE_INDIGITS3_MB, {"3"}); + set(_NL_CTYPE_INDIGITS4_MB, {"4"}); + set(_NL_CTYPE_INDIGITS5_MB, {"5"}); + set(_NL_CTYPE_INDIGITS6_MB, {"6"}); + set(_NL_CTYPE_INDIGITS7_MB, {"7"}); + set(_NL_CTYPE_INDIGITS8_MB, {"8"}); + set(_NL_CTYPE_INDIGITS9_MB, {"9"}); + set(_NL_CTYPE_INDIGITS_WC_LEN, uint32_t{1}); + set(_NL_CTYPE_INDIGITS0_WC, {L"0"}); + set(_NL_CTYPE_INDIGITS1_WC, {L"1"}); + set(_NL_CTYPE_INDIGITS2_WC, {L"2"}); + set(_NL_CTYPE_INDIGITS3_WC, {L"3"}); + set(_NL_CTYPE_INDIGITS4_WC, {L"4"}); + set(_NL_CTYPE_INDIGITS5_WC, {L"5"}); + set(_NL_CTYPE_INDIGITS6_WC, {L"6"}); + set(_NL_CTYPE_INDIGITS7_WC, {L"7"}); + set(_NL_CTYPE_INDIGITS8_WC, {L"8"}); + set(_NL_CTYPE_INDIGITS9_WC, {L"9"}); + set(_NL_CTYPE_OUTDIGIT0_MB, {"0"}); + set(_NL_CTYPE_OUTDIGIT1_MB, {"1"}); + set(_NL_CTYPE_OUTDIGIT2_MB, {"2"}); + set(_NL_CTYPE_OUTDIGIT3_MB, {"3"}); + set(_NL_CTYPE_OUTDIGIT4_MB, {"4"}); + set(_NL_CTYPE_OUTDIGIT5_MB, {"5"}); + set(_NL_CTYPE_OUTDIGIT6_MB, {"6"}); + set(_NL_CTYPE_OUTDIGIT7_MB, {"7"}); + set(_NL_CTYPE_OUTDIGIT8_MB, {"8"}); + set(_NL_CTYPE_OUTDIGIT9_MB, {"9"}); + set(_NL_CTYPE_OUTDIGIT0_WC, {L'0'}); + set(_NL_CTYPE_OUTDIGIT1_WC, {L'1'}); + set(_NL_CTYPE_OUTDIGIT2_WC, {L'2'}); + set(_NL_CTYPE_OUTDIGIT3_WC, {L'3'}); + set(_NL_CTYPE_OUTDIGIT4_WC, {L'4'}); + set(_NL_CTYPE_OUTDIGIT5_WC, {L'5'}); + set(_NL_CTYPE_OUTDIGIT6_WC, {L'6'}); + set(_NL_CTYPE_OUTDIGIT7_WC, {L'7'}); + set(_NL_CTYPE_OUTDIGIT8_WC, {L'8'}); + set(_NL_CTYPE_OUTDIGIT9_WC, {L'9'}); + set(_NL_CTYPE_EXTRA_MAP_1, frg::span{cLocaleClassUpper}); + set(_NL_CTYPE_EXTRA_MAP_2, frg::span{cLocaleClassLower}); + set(_NL_CTYPE_EXTRA_MAP_3, frg::span{cLocaleClassAlpha}); + set(_NL_CTYPE_EXTRA_MAP_4, frg::span{cLocaleClassDigit}); + set(_NL_CTYPE_EXTRA_MAP_5, frg::span{cLocaleClassXdigit}); + set(_NL_CTYPE_EXTRA_MAP_6, frg::span{cLocaleClassSpace}); + set(_NL_CTYPE_EXTRA_MAP_7, frg::span{cLocaleClassPrint}); + set(_NL_CTYPE_EXTRA_MAP_8, frg::span{cLocaleClassGraph}); + set(_NL_CTYPE_EXTRA_MAP_9, frg::span{cLocaleClassBlank}); + set(_NL_CTYPE_EXTRA_MAP_10, frg::span{cLocaleClassCntrl}); + set(_NL_CTYPE_EXTRA_MAP_11, frg::span{cLocaleClassPunct}); + set(_NL_CTYPE_EXTRA_MAP_12, frg::span{cLocaleClassAlnum}); + set(_NL_CTYPE_EXTRA_MAP_13, frg::span{cLocaleMapToUpper}); + set(_NL_CTYPE_EXTRA_MAP_14, frg::span{cLocaleMapToLower}); + set(_NL_CTYPE_EXTRA_MAP_15, frg::span{}); + set(_NL_CTYPE_EXTRA_MAP_16, frg::span{}); +} + +nl_numeric::nl_numeric() { + set(DECIMAL_POINT, {"."}); + set(THOUSEP, {""}); + set(GROUPING, frg::span{reinterpret_cast(""), 1}); + set(_NL_NUMERIC_DECIMAL_POINT_WC, {L'.'}); + set(_NL_NUMERIC_THOUSANDS_SEP_WC, {L'\0'}); + set(_NL_NUMERIC_CODESET, {"ANSI_X3.4-1968"}); +} + +nl_time::nl_time() { + set(ABDAY_1, {"Sun"}); + set(ABDAY_2, {"Mon"}); + set(ABDAY_3, {"Tue"}); + set(ABDAY_4, {"Wed"}); + set(ABDAY_5, {"Thu"}); + set(ABDAY_6, {"Fri"}); + set(ABDAY_7, {"Sat"}); + set(DAY_1, {"Sunday"}); + set(DAY_2, {"Monday"}); + set(DAY_3, {"Tuesday"}); + set(DAY_4, {"Wednesday"}); + set(DAY_5, {"Thursday"}); + set(DAY_6, {"Friday"}); + set(DAY_7, {"Saturday"}); + set(ABMON_1, {"Jan"}); + set(ABMON_2, {"Feb"}); + set(ABMON_3, {"Mar"}); + set(ABMON_4, {"Apr"}); + set(ABMON_5, {"May"}); + set(ABMON_6, {"Jun"}); + set(ABMON_7, {"Jul"}); + set(ABMON_8, {"Aug"}); + set(ABMON_9, {"Sep"}); + set(ABMON_10, {"Oct"}); + set(ABMON_11, {"Nov"}); + set(ABMON_12, {"Dec"}); + set(MON_1, {"January"}); + set(MON_2, {"February"}); + set(MON_3, {"March"}); + set(MON_4, {"April"}); + set(MON_5, {"May"}); + set(MON_6, {"June"}); + set(MON_7, {"July"}); + set(MON_8, {"August"}); + set(MON_9, {"September"}); + set(MON_10, {"October"}); + set(MON_11, {"November"}); + set(MON_12, {"December"}); + set(AM_STR, {"AM"}); + set(PM_STR, {"PM"}); + set(D_T_FMT, {"%a %b %e %H:%M:%S %Y"}); + set(D_FMT, {"%m/%d/%y"}); + set(T_FMT, {"%H:%M:%S"}); + set(T_FMT_AMPM, {"%I:%M:%S %p"}); + set(ERA, {""}); + set(ERA_YEAR, {""}); + set(ERA_D_FMT, {""}); + set(ALT_DIGITS, {""}); + set(ERA_D_T_FMT, {""}); + set(ERA_T_FMT, {""}); + set(_NL_TIME_ERA_NUM_ENTRIES, 0); + set(_NL_TIME_ERA_ENTRIES, {""}); + set(_NL_WABDAY_1, {L"Sun"}); + set(_NL_WABDAY_2, {L"Mon"}); + set(_NL_WABDAY_3, {L"Tue"}); + set(_NL_WABDAY_4, {L"Wed"}); + set(_NL_WABDAY_5, {L"Thu"}); + set(_NL_WABDAY_6, {L"Fri"}); + set(_NL_WABDAY_7, {L"Sat"}); + set(_NL_WDAY_1, {L"Sunday"}); + set(_NL_WDAY_2, {L"Monday"}); + set(_NL_WDAY_3, {L"Tuesday"}); + set(_NL_WDAY_4, {L"Wednesday"}); + set(_NL_WDAY_5, {L"Thursday"}); + set(_NL_WDAY_6, {L"Friday"}); + set(_NL_WDAY_7, {L"Saturday"}); + set(_NL_WABMON_1, {L"Jan"}); + set(_NL_WABMON_2, {L"Feb"}); + set(_NL_WABMON_3, {L"Mar"}); + set(_NL_WABMON_4, {L"Apr"}); + set(_NL_WABMON_5, {L"May"}); + set(_NL_WABMON_6, {L"Jun"}); + set(_NL_WABMON_7, {L"Jul"}); + set(_NL_WABMON_8, {L"Aug"}); + set(_NL_WABMON_9, {L"Sep"}); + set(_NL_WABMON_10, {L"Oct"}); + set(_NL_WABMON_11, {L"Nov"}); + set(_NL_WABMON_12, {L"Dec"}); + set(_NL_WMON_1, {L"January"}); + set(_NL_WMON_2, {L"February"}); + set(_NL_WMON_3, {L"March"}); + set(_NL_WMON_4, {L"April"}); + set(_NL_WMON_5, {L"May"}); + set(_NL_WMON_6, {L"June"}); + set(_NL_WMON_7, {L"July"}); + set(_NL_WMON_8, {L"August"}); + set(_NL_WMON_9, {L"September"}); + set(_NL_WMON_10, {L"October"}); + set(_NL_WMON_11, {L"November"}); + set(_NL_WMON_12, {L"December"}); + set(_NL_WAM_STR, {L"AM"}); + set(_NL_WPM_STR, {L"PM"}); + set(_NL_WD_T_FMT, {L"%a %b %e %H:%M:%S %Y"}); + set(_NL_WD_FMT, {L"%m/%d/%y"}); + set(_NL_WT_FMT, {L"%H:%M:%S"}); + set(_NL_WT_FMT_AMPM, {L"%I:%M:%S %p"}); + set(_NL_WERA_YEAR, {L""}); + set(_NL_WERA_D_FMT, {L""}); + set(_NL_WALT_DIGITS, {L""}); + set(_NL_WERA_D_T_FMT, {L""}); + set(_NL_WERA_T_FMT, {L""}); + set(_NL_TIME_WEEK_NDAYS, {"\7"}); + set(_NL_TIME_WEEK_1STDAY, 19971130); + set(_NL_TIME_WEEK_1STWEEK, {"\4"}); + set(_NL_TIME_FIRST_WEEKDAY, {"\1"}); + set(_NL_TIME_FIRST_WORKDAY, {"\2"}); + set(_NL_TIME_CAL_DIRECTION, {"\1"}); + set(_NL_TIME_TIMEZONE, {""}); + set(_DATE_FMT, {"%a %b %e %H:%M:%S %Z %Y"}); + set(_NL_W_DATE_FMT, {L"%a %b %e %H:%M:%S %Z %Y"}); + set(_NL_TIME_CODESET, {"ANSI_X3.4-1968"}); + set(ALTMON_1, {"January"}); + set(ALTMON_2, {"February"}); + set(ALTMON_3, {"March"}); + set(ALTMON_4, {"April"}); + set(ALTMON_5, {"May"}); + set(ALTMON_6, {"June"}); + set(ALTMON_7, {"July"}); + set(ALTMON_8, {"August"}); + set(ALTMON_9, {"September"}); + set(ALTMON_10, {"October"}); + set(ALTMON_11, {"November"}); + set(ALTMON_12, {"December"}); + set(_NL_WALTMON_1, {L"January"}); + set(_NL_WALTMON_2, {L"February"}); + set(_NL_WALTMON_3, {L"March"}); + set(_NL_WALTMON_4, {L"April"}); + set(_NL_WALTMON_5, {L"May"}); + set(_NL_WALTMON_6, {L"June"}); + set(_NL_WALTMON_7, {L"July"}); + set(_NL_WALTMON_8, {L"August"}); + set(_NL_WALTMON_9, {L"September"}); + set(_NL_WALTMON_10, {L"October"}); + set(_NL_WALTMON_11, {L"November"}); + set(_NL_WALTMON_12, {L"December"}); + set(_NL_ABALTMON_1, {"Jan"}); + set(_NL_ABALTMON_2, {"Feb"}); + set(_NL_ABALTMON_3, {"Mar"}); + set(_NL_ABALTMON_4, {"Apr"}); + set(_NL_ABALTMON_5, {"May"}); + set(_NL_ABALTMON_6, {"Jun"}); + set(_NL_ABALTMON_7, {"Jul"}); + set(_NL_ABALTMON_8, {"Aug"}); + set(_NL_ABALTMON_9, {"Sep"}); + set(_NL_ABALTMON_10, {"Oct"}); + set(_NL_ABALTMON_11, {"Nov"}); + set(_NL_ABALTMON_12, {"Dec"}); + set(_NL_WABALTMON_1, {L"Jan"}); + set(_NL_WABALTMON_2, {L"Feb"}); + set(_NL_WABALTMON_3, {L"Mar"}); + set(_NL_WABALTMON_4, {L"Apr"}); + set(_NL_WABALTMON_5, {L"May"}); + set(_NL_WABALTMON_6, {L"Jun"}); + set(_NL_WABALTMON_7, {L"Jul"}); + set(_NL_WABALTMON_8, {L"Aug"}); + set(_NL_WABALTMON_9, {L"Sep"}); + set(_NL_WABALTMON_10, {L"Oct"}); + set(_NL_WABALTMON_11, {L"Nov"}); + set(_NL_WABALTMON_12, {L"Dec"}); +} + +extern frg::array cLocaleCollSeqMb; +extern frg::array cLocaleCollSeqWc; + +nl_collate::nl_collate() { + set(_NL_COLLATE_NRULES, 0); + set(_NL_COLLATE_RULESETS, frg::span{}); + set(_NL_COLLATE_TABLEMB, frg::span{}); + set(_NL_COLLATE_WEIGHTMB, frg::span{}); + set(_NL_COLLATE_EXTRAMB, frg::span{}); + set(_NL_COLLATE_INDIRECTMB, frg::span{}); + set(_NL_COLLATE_GAP1, frg::span{}); + set(_NL_COLLATE_GAP2, frg::span{}); + set(_NL_COLLATE_GAP3, frg::span{}); + set(_NL_COLLATE_TABLEWC, frg::span{}); + set(_NL_COLLATE_WEIGHTWC, frg::span{}); + set(_NL_COLLATE_EXTRAWC, frg::span{}); + set(_NL_COLLATE_INDIRECTWC, frg::span{}); + set(_NL_COLLATE_SYMB_HASH_SIZEMB, 0); + set(_NL_COLLATE_SYMB_TABLEMB, frg::span{}); + set(_NL_COLLATE_SYMB_EXTRAMB, frg::span{}); + set(_NL_COLLATE_COLLSEQMB, frg::span{reinterpret_cast(cLocaleCollSeqMb.data()), cLocaleCollSeqMb.size()}); + set(_NL_COLLATE_COLLSEQWC, frg::span{cLocaleCollSeqWc}); + set(_NL_COLLATE_CODESET, {"ANSI_X3.4-1968"}); +} + +nl_monetary::nl_monetary() { + set(INT_CURR_SYMBOL, {""}); + set(CURRENCY_SYMBOL, {""}); + set(MON_DECIMAL_POINT, {""}); + set(MON_THOUSANDS_SEP, {""}); + set(MON_GROUPING, frg::span{reinterpret_cast(""), 1}); + set(POSITIVE_SIGN, {""}); + set(NEGATIVE_SIGN, {""}); + set(INT_FRAC_DIGITS, '\xff'); + set(FRAC_DIGITS, '\xff'); + set(P_CS_PRECEDES, '\xff'); + set(P_SEP_BY_SPACE, '\xff'); + set(N_CS_PRECEDES, '\xff'); + set(N_SEP_BY_SPACE, '\xff'); + set(P_SIGN_POSN, '\xff'); + set(N_SIGN_POSN, '\xff'); + set(CRNCYSTR, {"-"}); + set(INT_P_CS_PRECEDES, '\xff'); + set(INT_P_SEP_BY_SPACE, '\xff'); + set(INT_N_CS_PRECEDES, '\xff'); + set(INT_N_SEP_BY_SPACE, '\xff'); + set(INT_P_SIGN_POSN, '\xff'); + set(INT_N_SIGN_POSN, '\xff'); + set(_NL_MONETARY_CODESET, {"ANSI_X3.4-1968"}); +} + +nl_messages::nl_messages() { + set(YESEXPR, {"^[yY]"}); + set(NOEXPR, {"^[nN]"}); + set(YESSTR, {""}); + set(NOSTR, {""}); + set(_NL_MESSAGES_CODESET, {"ANSI_X3.4-1968"}); +} + +nl_paper::nl_paper() { + set(_NL_PAPER_HEIGHT, 297); + set(_NL_PAPER_WIDTH, 210); + set(_NL_PAPER_CODESET, {"ANSI_X3.4-1968"}); +} + +nl_name::nl_name() { + set(_NL_NAME_NAME_FMT, {"%p%t%g%t%m%t%f"}); + set(_NL_NAME_NAME_GEN, {""}); + set(_NL_NAME_NAME_MR, {""}); + set(_NL_NAME_NAME_MRS, {""}); + set(_NL_NAME_NAME_MISS, {""}); + set(_NL_NAME_NAME_MS, {""}); + set(_NL_NAME_CODESET, {"ANSI_X3.4-1968"}); +} + +nl_address::nl_address() { + set(_NL_ADDRESS_POSTAL_FMT, {"%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"}); + set(_NL_ADDRESS_COUNTRY_NAME, {""}); + set(_NL_ADDRESS_COUNTRY_POST, {""}); + set(_NL_ADDRESS_COUNTRY_AB2, {""}); + set(_NL_ADDRESS_COUNTRY_AB3, {""}); + set(_NL_ADDRESS_COUNTRY_CAR, {""}); + set(_NL_ADDRESS_COUNTRY_NUM, 0); + set(_NL_ADDRESS_COUNTRY_ISBN, {""}); + set(_NL_ADDRESS_LANG_NAME, {""}); + set(_NL_ADDRESS_LANG_AB, {""}); + set(_NL_ADDRESS_LANG_TERM, {""}); + set(_NL_ADDRESS_LANG_LIB, {""}); + set(_NL_ADDRESS_CODESET, {"ANSI_X3.4-1968"}); +} + +nl_telephone::nl_telephone() { + set(_NL_TELEPHONE_TEL_INT_FMT, {"+%c %a %l"}); + set(_NL_TELEPHONE_TEL_DOM_FMT, {""}); + set(_NL_TELEPHONE_INT_SELECT, {""}); + set(_NL_TELEPHONE_INT_PREFIX, {""}); + set(_NL_TELEPHONE_CODESET, {"ANSI_X3.4-1968"}); +} + +nl_measurement::nl_measurement() { + set(_NL_MEASUREMENT_MEASUREMENT, {"\1"}); + set(_NL_MEASUREMENT_CODESET, {"ANSI_X3.4-1968"}); +} + +nl_identification::nl_identification() { + set(_NL_IDENTIFICATION_TITLE, {"ISO/IEC 14652 i18n FDCC-set"}); + set(_NL_IDENTIFICATION_SOURCE, {"ISO/IEC Copyright Office"}); + set(_NL_IDENTIFICATION_ADDRESS, {"Case postale 56, CH-1211 Geneve 20, Switzerland"}); + set(_NL_IDENTIFICATION_CONTACT, {""}); + set(_NL_IDENTIFICATION_EMAIL, {""}); + set(_NL_IDENTIFICATION_TEL, {""}); + set(_NL_IDENTIFICATION_FAX, {""}); + set(_NL_IDENTIFICATION_LANGUAGE, {""}); + set(_NL_IDENTIFICATION_TERRITORY, {"ISO"}); + set(_NL_IDENTIFICATION_AUDIENCE, {""}); + set(_NL_IDENTIFICATION_APPLICATION, {""}); + set(_NL_IDENTIFICATION_ABBREVIATION, {""}); + set(_NL_IDENTIFICATION_REVISION, {"1.0"}); + set(_NL_IDENTIFICATION_DATE, {"2001-12-08"}); + set(_NL_IDENTIFICATION_CATEGORY, { + "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" + "i18n:1999\0" "i18n:1999\0" "\0" "i18n:1999\0" + "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" + "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" "i18n:1999\0" + "i18n:1999" + }); + set(_NL_IDENTIFICATION_CODESET, {"ANSI_X3.4-1968"}); +} + +} // namespace mlibc diff --git a/options/internal/generic/locale.cpp b/options/internal/generic/locale.cpp index 1939d3348c..d6ef6af200 100644 --- a/options/internal/generic/locale.cpp +++ b/options/internal/generic/locale.cpp @@ -1,98 +1,627 @@ -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include + +namespace { + +mlibc::localeinfo cLocale{}; +mlibc::localeinfo startingLocale{}; + +thread_local mlibc::localeinfo *current_locale = nullptr; +mlibc::localeinfo *current_global_locale = &startingLocale; + +// handle to the opened locale-archive file, if any +smarter::shared_ptr localeArchive; + +// map of (locale name, mapping) +frg::hash_map< + frg::string, + smarter::shared_ptr, + frg::hash>, + MemoryAllocator +> localeFiles{frg::hash>(), getAllocator()}; + +// value of LOCPATH environment variable, if set +frg::string cachedLocpath{getAllocator()}; +// list of paths to inspect for searching locale (non-archive) files +frg::vector, MemoryAllocator> locpaths{getAllocator()}; + +std::array lcNames{ + "LC_CTYPE", + "LC_NUMERIC", + "LC_TIME", + "LC_COLLATE", + "LC_MONETARY", + "LC_MESSAGES", + "LC_ALL", + "LC_PAPER", + "LC_NAME", + "LC_ADDRESS", + "LC_TELEPHONE", + "LC_MEASUREMENT", + "LC_IDENTIFICATION", +}; + +using DecomposedLocale = std::tuple< + std::optional, // language + std::optional, // territory + std::optional, // codeset + std::optional // modifier +>; + +frg::string normalizeCodeset(frg::string_view codeset) { + bool only_digit = true; + + for (size_t i = 0; i < codeset.size(); i++) { + if (only_digit && !mlibc::isdigit_l(codeset[i], &cLocale)) + only_digit = false; + } + + frg::string normalized{getAllocator()}; + + if (only_digit) + normalized += "iso"; + + for(size_t i = 0; i < codeset.size(); i++) { + if (mlibc::isalpha_l(codeset[i], &cLocale)) + normalized.push_back(tolower_l(codeset[i], &cLocale)); + else if (mlibc::isdigit_l(codeset[i], &cLocale)) + normalized.push_back(codeset[i]); + } + + return normalized; +} + +// given a locale name string, break it into its constituent parts +DecomposedLocale parseLocaleName(frg::string_view name) { + std::optional language; + std::optional territory; + std::optional codeset; + std::optional modifier; + + size_t offset = 0; + size_t territoryEnd = size_t(-1); + + auto languageLen = name.find_first_of({"_.@", 3}); + if (languageLen != size_t(-1)) { + language = name.sub_string(0, languageLen); + offset = languageLen + 1; + } else { + language = name; + return {language, territory, codeset, modifier}; + } + + if (name[languageLen] == '_') { + territoryEnd = name.find_first_of({".@", 2}, offset); + if (territoryEnd != size_t(-1)) { + if (territoryEnd > offset) + territory = name.sub_string(offset, territoryEnd - offset); + offset = territoryEnd + 1; + } else { + if (name.size() > offset) + territory = name.sub_string(offset, name.size() - offset); + return {language, territory, codeset, modifier}; + } + } + + if (name[frg::max(languageLen, territoryEnd)] == '.') { + auto codesetEnd = name.find_first('@', offset); + if (codesetEnd != size_t(-1)) { + if (codesetEnd > offset) + codeset = name.sub_string(offset, codesetEnd - offset); + + offset = codesetEnd + 1; + } else { + if (name.size() > offset) + codeset = name.sub_string(offset, name.size() - offset); + + return {language, territory, codeset, modifier}; + } + } + + if (name.size() > offset) + modifier = name.sub_string(offset, name.size() - offset); + + return {language, territory, codeset, modifier}; +} + +// Given a DecomposedLocale, build a prioritized list of locale names to check +frg::vector, MemoryAllocator> buildLocaleList(DecomposedLocale loc) { + auto &[lang, territory, codeset, modifier] = loc; + __ensure(lang); + + auto normCodeset = [&] { + if (codeset) + return normalizeCodeset(*codeset); + else + return frg::string{getAllocator()}; + }(); + frg::vector, MemoryAllocator> ret{getAllocator()}; + + // the ordering of the components is apparently: + // lang > TERRITORY > normcodeset > codeset > modifier + // codeset is skipped if it equals normcodeset + + // lang_TERRITORY.codeset@modifier + if (territory && codeset && codeset != normCodeset && modifier) { + frg::string str{*lang, getAllocator()}; + str += '_'; + str += *territory; + str += '.'; + str += *codeset; + str += '@'; + str += *modifier; + ret.push_back(std::move(str)); + } + // lang_TERRITORY.normcodeset@modifier + if (territory && !normCodeset.empty() && modifier) { + frg::string str{*lang, getAllocator()}; + str += '_'; + str += *territory; + str += '.'; + str += normCodeset; + str += '@'; + str += *modifier; + ret.push_back(std::move(str)); + } + // lang_TERRITORY@modifier + if (territory && modifier) { + frg::string str{*lang, getAllocator()}; + str += '_'; + str += *territory; + str += '@'; + str += *modifier; + ret.push_back(std::move(str)); + } + // lang_TERRITORY.codeset + if (territory && codeset && codeset != normCodeset) { + frg::string str{*lang, getAllocator()}; + str += '_'; + str += *territory; + str += '.'; + str += *codeset; + ret.push_back(std::move(str)); + } + // lang_TERRITORY.normcodeset + if (territory && !normCodeset.empty()) { + frg::string str{*lang, getAllocator()}; + str += '_'; + str += *territory; + str += '.'; + str += normCodeset; + ret.push_back(std::move(str)); + } + // lang_TERRITORY + if (territory) { + frg::string str{*lang, getAllocator()}; + str += '_'; + str += *territory; + ret.push_back(std::move(str)); + } + // lang.codeset@modifier + if (modifier && codeset && codeset != normCodeset) { + frg::string str{*lang, getAllocator()}; + str += '.'; + str += *codeset; + str += '@'; + str += *modifier; + ret.push_back(std::move(str)); + } + // lang.normcodeset@modifier + if (modifier && !normCodeset.empty()) { + frg::string str{*lang, getAllocator()}; + str += '.'; + str += normCodeset; + str += '@'; + str += *modifier; + ret.push_back(std::move(str)); + } + // lang@modifier + if (modifier) { + frg::string str{*lang, getAllocator()}; + str += '@'; + str += *modifier; + ret.push_back(std::move(str)); + } + // lang.codeset + if (codeset && codeset != normCodeset) { + frg::string str{*lang, getAllocator()}; + str += '.'; + str += *codeset; + ret.push_back(std::move(str)); + } + // lang.normcodeset + if (!normCodeset.empty()) { + frg::string str{*lang, getAllocator()}; + str += '.'; + str += normCodeset; + ret.push_back(std::move(str)); + } + // lang + ret.push_back(frg::string{*lang, getAllocator()}); + + // if (debugLocale) { + // mlibc::infoLogger() << "mlibc: locale name list for '" << ret[0] << "':" << frg::endlog; + // for (auto e : ret) + // mlibc::infoLogger() << frg::fmt("\t'{}'", e) << frg::endlog; + // } + + return ret; +} + +// open the locale-archive database, if it exists +void openLocaleDatabase() { +#if !MLIBC_MAP_FILE_WINDOWS + return; +#endif + + if (locpaths.empty()) { + auto locpath = getenv("LOCPATH"); + if (locpath && strlen(locpath)) { + cachedLocpath = {locpath, getAllocator()}; + size_t off = 0; + size_t end = 0; + while(end != size_t(-1)) { + auto view = frg::string_view{cachedLocpath}.sub_string(off, cachedLocpath.size() - off); + end = view.find_first(':'); + locpaths.push_back( + frg::string{ + view.sub_string(0, (end == size_t(-1)) ? view.size() : end), + getAllocator() + } + ); + } + } + locpaths.push_back(frg::string{"/usr/lib/locale", getAllocator()}); + locpaths.push_back(frg::string{"/usr/share/i18n/locales", getAllocator()}); + } + + if (!cachedLocpath.empty()) + return; + + if (localeArchive) + return; + + struct stat info; + if (mlibc::sys_stat(mlibc::fsfd_target::path, -1, "/usr/lib/locale/locale-archive", 0, &info)) + return; + + auto window = smarter::allocate_shared(getAllocator(), "/usr/lib/locale/locale-archive"); + LocaleArchive::Header *header = static_cast(window->get()); + if (header->magic != LocaleArchive::HEADER_MAGIC) + return; + + localeArchive = std::move(window); +} + +frg::optional findLocaleRecord(const char *name) { + __ensure(localeArchive); + + auto header = static_cast(localeArchive->get()); + if (header->namehash_size <= 2) + return frg::null_opt; + auto namehashtab = reinterpret_cast((uint8_t *) localeArchive->get() + header->namehash_offset); + + auto parsedName = parseLocaleName(name); + auto list = buildLocaleList(parsedName); + + for (auto e : list) { + auto hash = nameHashVal(e.data(), e.size()); + auto idx = hash % header->namehash_size; + auto incr = 1 + hash % (header->namehash_size - 2); + + while(true) { + if (namehashtab[idx].name_offset == 0) + break; + + auto current_entry_name = (char *) localeArchive->get() + namehashtab[idx].name_offset; + + if (namehashtab[idx].hashval == hash && !strcmp(e.data(), current_entry_name)) + return namehashtab[idx].locrec_offset ? frg::optional{namehashtab[idx].locrec_offset} : frg::null_opt; + + idx += incr; + if (idx >= header->namehash_size) + idx -= header->namehash_size; + } + } + + return frg::null_opt; +} + +bool parseCategoryInfo(int category, frg::span rec, mlibc::localeinfo *out) { + uint32_t magic; + memcpy(&magic, rec.data(), sizeof(magic)); + if (magic != categoryMagic(category)) + return false; + + uint32_t elements; + memcpy(&elements, rec.data() + 4, sizeof(elements)); + frg::span offsets{reinterpret_cast(rec.data() + 8), elements}; + + switch(category) { + case LC_CTYPE: { + parse_category_array(out->ctype.members, ctype_parser, rec, offsets); + break; + } + case LC_NUMERIC: { + parse_category_array(out->numeric.members, numeric_parser, rec, offsets); + break; + } + case LC_TIME: { + parse_category_array(out->time.members, time_parser, rec, offsets); + break; + } + case LC_COLLATE: + parse_category_array(out->collate.members, collate_parser, rec, offsets); + break; + case LC_MONETARY: { + parse_category_array(out->monetary.members, monetary_parser, rec, offsets); + break; + } + case LC_MESSAGES: { + parse_category_array(out->messages.members, messages_parser, rec, offsets); + break; + } + // skip LC_ALL + case LC_PAPER: { + parse_category_array(out->paper.members, paper_parser, rec, offsets); + break; + } + case LC_NAME: { + parse_category_array(out->name.members, name_parser, rec, offsets); + break; + } + case LC_ADDRESS: { + parse_category_array(out->address.members, address_parser, rec, offsets); + break; + } + case LC_TELEPHONE: { + parse_category_array(out->telephone.members, telephone_parser, rec, offsets); + break; + } + case LC_MEASUREMENT: { + parse_category_array(out->measurement.members, measurement_parser, rec, offsets); + break; + } + case LC_IDENTIFICATION: { + parse_category_array(out->identification.members, identification_parser, rec, offsets); + break; + } + default: + mlibc::infoLogger() << "mlibc: category " << lcNames[category] << " unknown!" << frg::endlog; + return false; + } + + return true; +} + +bool findLocaleFileRecord(int category, const char *name, mlibc::localeinfo *out) { + auto parsedName = parseLocaleName(name); + auto list = buildLocaleList(parsedName); + + for (auto lp : locpaths) { + for (auto locale : list) { + frg::string path{lp, getAllocator()}; + path += '/'; + path += locale; + path += '/'; + if (category == LC_MESSAGES) + path += "LC_MESSAGES/SYS_LC_MESSAGES"; + else + path += lcNames[category]; + + auto e = localeFiles.find(path); + if (e != localeFiles.end()) { + auto &[name, window] = *e; + if (parseCategoryInfo(category, {static_cast(window->get()), window->size()}, out)) + return true; + } else { + struct stat info; + if (mlibc::sys_stat(mlibc::fsfd_target::path, -1, path.data(), 0, &info)) + continue; + + auto window = smarter::allocate_shared(getAllocator(), path.data()); + localeFiles.insert(path, window); + + if (parseCategoryInfo(category, {static_cast(window->get()), window->size()}, out)) { + return true; + } + } + } + } + + return false; +} + +bool findLocaleArchiveRecord(int category, const char *name, mlibc::localeinfo *out) { + if (!localeArchive) + return false; + + __ensure(category != LC_ALL); + + auto recordOff = findLocaleRecord(name); + if (!recordOff) { + if (debugLocale) + mlibc::infoLogger() << "mlibc: Locale '" << name + << "' not found in locale database" << frg::endlog; + return false; + } + + auto locrec = reinterpret_cast((uint8_t *) localeArchive->get() + *recordOff); + if (locrec->record[category].offset == 0 || locrec->record[category].len == 0) + return false; + + const uint8_t *record = reinterpret_cast(localeArchive->get()) + locrec->record[category].offset; + frg::span rec{record, locrec->record[category].len}; + + return parseCategoryInfo(category, rec, out); +} + +} // namespace namespace mlibc { -char *nl_langinfo(nl_item item) { - if(item == CODESET) { - return const_cast("UTF-8"); - } else if(item >= ABMON_1 && item <= ABMON_12) { - switch(item) { - case ABMON_1: return const_cast("Jan"); - case ABMON_2: return const_cast("Feb"); - case ABMON_3: return const_cast("Mar"); - case ABMON_4: return const_cast("Apr"); - case ABMON_5: return const_cast("May"); - case ABMON_6: return const_cast("Jun"); - case ABMON_7: return const_cast("Jul"); - case ABMON_8: return const_cast("Aug"); - case ABMON_9: return const_cast("Sep"); - case ABMON_10: return const_cast("Oct"); - case ABMON_11: return const_cast("Nov"); - case ABMON_12: return const_cast("Dec"); - default: - __ensure(!"ABMON_* constants don't seem to be contiguous!"); - __builtin_unreachable(); - } - } else if(item >= MON_1 && item <= MON_12) { - switch(item) { - case MON_1: return const_cast("January"); - case MON_2: return const_cast("Feburary"); - case MON_3: return const_cast("March"); - case MON_4: return const_cast("April"); - case MON_5: return const_cast("May"); - case MON_6: return const_cast("June"); - case MON_7: return const_cast("July"); - case MON_8: return const_cast("August"); - case MON_9: return const_cast("September"); - case MON_10: return const_cast("October"); - case MON_11: return const_cast("November"); - case MON_12: return const_cast("December"); +bool applyCategory(int category, const char *name, localeinfo *info) { + if (!strcmp(name, "C") || !strcmp(name, "POSIX") || !strcmp(name, "") + || !strncmp(name, "C.", 2) || !strncmp(name, "POSIX.", 6)) { + switch (category) { + case LC_CTYPE: info->ctype = cLocale.ctype; break; + case LC_NUMERIC: info->numeric = cLocale.numeric; break; + case LC_TIME: info->time = cLocale.time; break; + case LC_COLLATE: info->collate = cLocale.collate; break; + case LC_MONETARY: info->monetary = cLocale.monetary; break; + case LC_MESSAGES: info->messages = cLocale.messages; break; + // skip LC_ALL + case LC_PAPER: info->paper = cLocale.paper; break; + case LC_NAME: info->name = cLocale.name; break; + case LC_ADDRESS: info->address = cLocale.address; break; + case LC_TELEPHONE: info->telephone = cLocale.telephone; break; + case LC_MEASUREMENT: info->measurement = cLocale.measurement; break; + case LC_IDENTIFICATION: info->identification = cLocale.identification; break; default: - __ensure(!"MON_* constants don't seem to be contiguous!"); - __builtin_unreachable(); - } - } else if(item == AM_STR) { - return const_cast("AM"); - } else if(item == PM_STR) { - return const_cast("PM"); - } else if(item >= DAY_1 && item <= DAY_7) { - switch(item) { - case DAY_1: return const_cast("Sunday"); - case DAY_2: return const_cast("Monday"); - case DAY_3: return const_cast("Tuesday"); - case DAY_4: return const_cast("Wednesday"); - case DAY_5: return const_cast("Thursday"); - case DAY_6: return const_cast("Friday"); - case DAY_7: return const_cast("Saturday"); - default: - __ensure(!"DAY_* constants don't seem to be contiguous!"); - __builtin_unreachable(); - } - } else if(item >= ABDAY_1 && item <= ABDAY_7) { - switch(item) { - case ABDAY_1: return const_cast("Sun"); - case ABDAY_2: return const_cast("Mon"); - case ABDAY_3: return const_cast("Tue"); - case ABDAY_4: return const_cast("Wed"); - case ABDAY_5: return const_cast("Thu"); - case ABDAY_6: return const_cast("Fri"); - case ABDAY_7: return const_cast("Sat"); - default: - __ensure(!"ABDAY_* constants don't seem to be contiguous!"); - __builtin_unreachable(); - } - }else if(item == D_FMT) { - return const_cast("%m/%d/%y"); - }else if(item == T_FMT) { - return const_cast("%H:%M:%S"); - }else if(item == T_FMT_AMPM) { - return const_cast("%I:%M:%S %p"); - }else if(item == D_T_FMT) { - return const_cast("%a %b %e %T %Y"); - } else if (item == RADIXCHAR) { - return const_cast("."); - } else if (item == THOUSEP) { - return const_cast(""); - }else if(item == YESEXPR) { - return const_cast("^[yY]"); - }else if(item == NOEXPR) { - return const_cast("^[nN]"); - }else{ - mlibc::infoLogger() << "mlibc: nl_langinfo item " - << item << " is not implemented properly" << frg::endlog; - return const_cast(""); + mlibc::infoLogger() << "mlibc: unhandled defaults for category " + << lcNames[category] << " in C/POSIX locale" << frg::endlog; + return false; + } + + return true; + } + + openLocaleDatabase(); + + if (findLocaleArchiveRecord(category, name, info)) { + return true; + } else if (findLocaleFileRecord(category, name, info)) { + return true; + } + + return false; +} + +localeinfo *loadLocale(int category_mask, const char *name, localeinfo *base) { + category_mask &= LC_ALL_MASK; + if(!category_mask) + return nullptr; + + if (!strcmp(name, "POSIX") || !strcmp(name, "C") || !strcmp(name, "")) + return frg::construct(getAllocator()); + + openLocaleDatabase(); + + localeinfo *info = base; + + if (!info) + info = frg::construct(getAllocator()); + + bool error = false; + + while(category_mask) { + auto mlibc_cat = __builtin_ctz(category_mask); + __ensure(mlibc_cat != LC_ALL); + + if (!findLocaleArchiveRecord(mlibc_cat, name, info)) { + error |= !findLocaleFileRecord(mlibc_cat, name, info); + } + + if (error) + break; + + category_mask &= ~(1 << mlibc_cat); + } + + if (error) { + if (debugLocale) + mlibc::infoLogger() << "mlibc: Failed to load locale '" << (name ? name : "(null)") + << "' for category mask 0x" << frg::hex_fmt{category_mask} << frg::endlog; + + if (!base) + frg::destruct(getAllocator(), info); + return nullptr; + } + + return info; +} + +localeinfo *useThreadLocalLocale(localeinfo *loc) { + localeinfo *old = current_locale; +#if __MLIBC_POSIX_OPTION + if(loc == LC_GLOBAL_LOCALE) + current_locale = nullptr; + else +#endif // __MLIBC_POSIX_OPTION + if (loc) + current_locale = reinterpret_cast(loc); + + return old; +} + +localeinfo *useGlobalLocale(localeinfo *loc) { + localeinfo *old = current_global_locale; + current_global_locale = reinterpret_cast(loc); + return old; +} + +void freeLocale(localeinfo *loc) { + if(loc && loc != &startingLocale) + frg::destruct(getAllocator(), reinterpret_cast(loc)); +} + +localeinfo *getActiveLocale() { + if (current_locale) + return current_locale; + return current_global_locale; +} + +localeinfo *getGlobalLocale() { + return current_global_locale; +} + +char *nl_langinfo(nl_item item) { + return nl_langinfo_l(item, mlibc::getActiveLocale()); +} + +char *nl_langinfo_l(nl_item item, localeinfo * loc) { + __ensure(loc != nullptr); +#ifdef LC_GLOBAL_LOCALE + __ensure(loc != LC_GLOBAL_LOCALE); +#endif + auto l = reinterpret_cast(loc); + + auto category = item >> 16; + + switch(item) { + case CODESET: + return const_cast(l->ctype.get(CODESET).asString().data()); + + case ABDAY_1 ... ALT_DIGITS: + return const_cast(l->time.get(item).asString().data()); + + case DECIMAL_POINT: + return const_cast(l->numeric.get(DECIMAL_POINT).asString().data()); + case THOUSEP: + return const_cast(l->numeric.get(THOUSANDS_SEP).asString().data()); + case YESEXPR ... NOSTR: + return const_cast(l->messages.get(item).asString().data()); + + case CRNCYSTR: + return const_cast(l->monetary.get(CRNCYSTR).asString().data()); + case CURRENCY_SYMBOL: + return const_cast(l->monetary.get(CURRENCY_SYMBOL).asString().data()); + + default: { + mlibc::infoLogger() << "mlibc: nl_langinfo item " << frg::hex_fmt{item & 0xFFFF} + << " of category " << lcNames[category] << " is missing" << frg::endlog; + return const_cast(""); + } } } diff --git a/options/internal/generic/strings.cpp b/options/internal/generic/strings.cpp index 323fbf2d4d..ecb8fb1e5d 100644 --- a/options/internal/generic/strings.cpp +++ b/options/internal/generic/strings.cpp @@ -19,4 +19,11 @@ int strncasecmp(const char *a, const char *b, size_t size) { return 0; } +size_t strnlen(const char *s, size_t n) { + size_t len = 0; + while(len < n && s[len]) + ++len; + return len; +} + } // namespace mlibc diff --git a/options/internal/include/mlibc/charset.hpp b/options/internal/include/mlibc/charset.hpp index a068f05ed9..567a7d9e71 100644 --- a/options/internal/include/mlibc/charset.hpp +++ b/options/internal/include/mlibc/charset.hpp @@ -2,6 +2,8 @@ #define MLIBC_CHARSET_HPP #include +#include +#include namespace mlibc { @@ -12,20 +14,24 @@ struct charset { // Returns true iif the meaning of the first 0x7F characters matches ASCII. bool is_ascii_superset(); - bool is_alpha(codepoint c); - bool is_digit(codepoint c); - bool is_xdigit(codepoint c); - bool is_alnum(codepoint c); - bool is_punct(codepoint c); - bool is_graph(codepoint c); - bool is_blank(codepoint c); - bool is_space(codepoint c); - bool is_print(codepoint c); - - bool is_lower(codepoint c); - bool is_upper(codepoint c); - codepoint to_lower(codepoint c); - codepoint to_upper(codepoint c); + bool is_alpha(codepoint c, mlibc::localeinfo *l); + bool is_digit(codepoint c, mlibc::localeinfo *l); + bool is_xdigit(codepoint c, mlibc::localeinfo *l); + bool is_alnum(codepoint c, mlibc::localeinfo *l); + bool is_cntrl(codepoint c, mlibc::localeinfo *l); + bool is_punct(codepoint c, mlibc::localeinfo *l); + bool is_graph(codepoint c, mlibc::localeinfo *l); + bool is_blank(codepoint c, mlibc::localeinfo *l); + bool is_space(codepoint c, mlibc::localeinfo *l); + bool is_print(codepoint c, mlibc::localeinfo *l); + + bool is_lower(codepoint c, mlibc::localeinfo *l); + bool is_upper(codepoint c, mlibc::localeinfo *l); + codepoint to_lower(codepoint c, mlibc::localeinfo *l); + codepoint to_upper(codepoint c, mlibc::localeinfo *l); + + wctype_t wctype(frg::string_view name, mlibc::localeinfo *l); + bool iswctype(wint_t wc, wctype_t, mlibc::localeinfo *l); }; charset *current_charset(); diff --git a/options/internal/include/mlibc/ctype.hpp b/options/internal/include/mlibc/ctype.hpp new file mode 100644 index 0000000000..046ea64193 --- /dev/null +++ b/options/internal/include/mlibc/ctype.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace mlibc { + +int isalpha_l(int c, localeinfo *l); +int isdigit_l(int c, localeinfo *l); +int isspace_l(int c, localeinfo *l); +int isxdigit_l(int c, localeinfo *l); + +int tolower_l(int c, localeinfo *l); + +} // namespace mlibc diff --git a/options/internal/include/mlibc/file-window.hpp b/options/internal/include/mlibc/file-window.hpp index 509f0470c5..29f352539f 100644 --- a/options/internal/include/mlibc/file-window.hpp +++ b/options/internal/include/mlibc/file-window.hpp @@ -22,27 +22,28 @@ struct file_window { if(mlibc::sys_stat(mlibc::fsfd_target::fd, fd, "", 0, &info)) mlibc::panicLogger() << "mlibc: Error getting stats for " << path << frg::endlog; + size_ = info.st_size; + #if MLIBC_MAP_FILE_WINDOWS - if(mlibc::sys_vm_map(nullptr, (size_t)info.st_size, PROT_READ, MAP_PRIVATE, + if(mlibc::sys_vm_map(nullptr, size_, PROT_READ, MAP_PRIVATE, fd, 0, &_ptr)) mlibc::panicLogger() << "mlibc: Error mapping file_window to " << path << frg::endlog; #else - _ptr = getAllocator().allocate(info.st_size); + _ptr = getAllocator().allocate(size_); __ensure(_ptr); size_t progress = 0; - size_t st_size = static_cast(info.st_size); - while(progress < st_size) { + while(progress < size_) { ssize_t chunk; if(int e = mlibc::sys_read(fd, reinterpret_cast(_ptr) + progress, - st_size - progress, &chunk); e) + size_ - progress, &chunk); e) mlibc::panicLogger() << "mlibc: Read from file_window failed" << frg::endlog; if(!chunk) break; progress += chunk; } - if(progress != st_size) - mlibc::panicLogger() << "stat reports " << info.st_size << " but we only read " + if(progress != size_) + mlibc::panicLogger() << "stat reports " << size_ << " but we only read " << progress << " bytes" << frg::endlog; #endif @@ -50,14 +51,26 @@ struct file_window { mlibc::panicLogger() << "mlibc: Error closing file_window to " << path << frg::endlog; } - // TODO: Write destructor to deallocate/unmap memory. + ~file_window() { +#if MLIBC_MAP_FILE_WINDOWS + if (mlibc::sys_vm_unmap(_ptr, size_)) + mlibc::panicLogger() << "mlibc: Error unmapping file_window" << frg::endlog; +#else + getAllocator().deallocate(_ptr, size_); +#endif + } - void *get() { + void *get() const { return _ptr; } + size_t size() const { + return size_; + } + private: void *_ptr; + size_t size_; }; #endif // MLIBC_FILE_WINDOW diff --git a/options/internal/include/mlibc/locale-archive-parsing.hpp b/options/internal/include/mlibc/locale-archive-parsing.hpp new file mode 100644 index 0000000000..555893ff1e --- /dev/null +++ b/options/internal/include/mlibc/locale-archive-parsing.hpp @@ -0,0 +1,537 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr bool debugLocale = false; + +#pragma mark - locale-archive format description + +namespace LocaleArchive { + +constexpr uint32_t HEADER_MAGIC = 0xde020109; + +struct Header { + uint32_t magic; + /* Serial number. */ + uint32_t serial; + /* Name hash table. */ + uint32_t namehash_offset; + uint32_t namehash_used; + uint32_t namehash_size; + /* String table. */ + uint32_t string_offset; + uint32_t string_used; + uint32_t string_size; + /* Table with locale records. */ + uint32_t locrectab_offset; + uint32_t locrectab_used; + uint32_t locrectab_size; + /* MD5 sum hash table. */ + uint32_t sumhash_offset; + uint32_t sumhash_used; + uint32_t sumhash_size; +}; + +struct NameHashEntry { + /* Hash value of the name. */ + uint32_t hashval; + /* Offset of the name in the string table. */ + uint32_t name_offset; + /* Offset of the LocaleRecord. */ + uint32_t locrec_offset; +}; + +struct LocaleRecord { + /* # of NameHashEntry records that point here */ + uint32_t refs; + struct { + uint32_t offset; + uint32_t len; + } record[LC_IDENTIFICATION + 1]; +}; + +struct SumHashEntry { + /* MD5 sum. */ + char sum[16]; + /* Offset of the file in the archive. */ + uint32_t file_offset; +}; + +} // namespace LocaleArchive + +#pragma mark - hash and magic calculations +template +constexpr T nameHashVal(const void *key, size_t keylen) { + size_t cnt = 0; + T hval = keylen; + + while (cnt < keylen) { + hval = (hval << 9) | (hval >> (sizeof hval * CHAR_BIT - 9)); + hval += (T)((const unsigned char *)key)[cnt++]; + } + return hval != 0 ? hval : ~((T)0); +} + +constexpr uint32_t categoryMagic(int category) { + if (category == LC_COLLATE) + return 0x20051014 ^ category; + else if (category == LC_CTYPE) + return 0x20090720 ^ category; + else + return 0x20031115 ^ category; +} + +#pragma mark - locale definition field parsers +frg::string_view parse_string(frg::span data) { + auto ptr = reinterpret_cast(data.data()); + auto len = mlibc::strnlen(ptr, data.size()); + return frg::string_view{ptr, frg::min(len + 1, data.size())}; +} + +template +frg::string_view parse_stringlist(frg::span data) { + size_t termIndex = 0; + size_t i = 0; + frg::string_view area{reinterpret_cast(data.data()), data.size()}; + + for (; i < Min; i++) { + termIndex = area.find_first('\0', termIndex) + 1; + __ensure(area[termIndex] != '\0'); + } + + for (; i < Max; i++) { + if (area[termIndex] == '\0') + break; + termIndex = area.find_first('\0', termIndex) + 1; + } + + __ensure(area[termIndex] == '\0'); + + return area.sub_string(0, termIndex + 1); +} + +frg::span parse_bytearray(frg::span data) { + auto ptr = reinterpret_cast(data.data()); + auto len = mlibc::strnlen(ptr, data.size()); + return frg::span{data.data(), len}; +} + +template +frg::span parse_uint32array(frg::span data) { + auto ptr = reinterpret_cast(data.data()); + auto len = frg::min(data.size() & ~(sizeof(uint32_t) - 1), N * sizeof(uint32_t)); + return frg::span{ptr, len}; +} + +void parse_ignore(frg::span) { return; } + +char parse_int_elem(frg::span data) { return data[0] == 255 ? CHAR_MAX : data[0]; } + +// parse generic types, typically `uint*_t` +template + requires std::is_trivially_copyable_v +T parse(frg::span data) { + T v{}; + memcpy(&v, data.data(), frg::min(sizeof(v), data.size())); + return v; +} + +#pragma mark - parser execution internals +template +struct is_std_optional : std::false_type {}; + +template +struct is_std_optional> : std::true_type {}; + +template +inline constexpr bool is_std_optional_v = is_std_optional::value; + +template +using parser_result_t = std::invoke_result_t>; + +template +auto +apply_parsers(const T &parser, frg::span base, frg::span offsets) { + constexpr size_t Parsers = std::tuple_size_v; + + return [&](std::index_sequence) { + return std::make_tuple((I < offsets.size() ? [&] { + if constexpr (std::is_void_v(parser.parsers))>>) + return std::nullopt; + else { + return std::optional{std::get(parser.parsers)( + base.subspan(offsets[I], frg::min(sizeof(T), base.size() - offsets[I])) + )}; + } + }() + : std::nullopt)...); + }(std::make_index_sequence{}); +} + +template +void assign_if_present(TupleTie &tie, const TupleIn &in, std::index_sequence) { + ( + [](auto &var, const T &opt) { + if constexpr (is_std_optional_v) { + if (opt) { + var = *opt; + + if constexpr (std::is_same_v< + std::remove_cvref_t, + frg::string_view>) + __ensure(var.asString().ends_with("\0")); + } + } + }(std::get(tie), std::get(in)), + ... + ); +} + +template +void parse_category_array( + std::array &opts, + const T &parser, + frg::span base, + frg::span offsets +) { + auto res = apply_parsers(parser, base, offsets); + + constexpr size_t tuple_size = std::tuple_size_v; + constexpr size_t assign_count = std::min(tuple_size, Opts); + + assign_if_present(opts, res, std::make_index_sequence{}); +} + +#pragma mark - locale category parser definitions +template +struct category { + const char *name; + int glibc_val; + std::tuple parsers; +}; + +auto numeric_parser = category{ + .name = "LC_NUMERIC", + .glibc_val = LC_NUMERIC, + .parsers = std::make_tuple( + parse_string, // decimal_point + parse_string, // thousands_sep + parse_bytearray, // grouping + parse, // numeric_decimal_point_wc + parse, // numeric_thousands_sep_wc, + parse_string // codeset + ) +}; + +auto messages_parser = category{ + .name = "LC_MESSAGES", + .glibc_val = LC_MESSAGES, + .parsers = std::make_tuple( + parse_string, // yesexpr + parse_string, // noexpr + parse_string, // yesstr + parse_string, // nostr + parse_string // codeset + ) +}; + +auto monetary_parser = category{ + .name = "LC_MONETARY", + .glibc_val = LC_MONETARY, + .parsers = std::make_tuple( + parse_string, // int_curr_symbol + parse_string, // currency_symbol + parse_string, // mon_decimal_point + parse_string, // mon_thousands_sep + parse_bytearray, // mon_grouping + parse_string, // positive_sign + parse_string, // negative_sign + parse, // int_frac_digits + parse, // frac_digits + parse, // p_cs_precedes + parse, // p_sep_by_space + parse, // n_cs_precedes + parse, // n_sep_by_space + parse, // p_sign_posn + parse, // n_sign_posn + parse_string, // crncystr + parse_int_elem, // int_p_cs_precedes + parse_int_elem, // int_n_cs_precedes + parse_int_elem, // int_p_sep_by_space + parse_int_elem, // int_n_sep_by_space + parse_int_elem, // int_p_sign_posn + parse_int_elem // int_n_sign_posn + ) +}; + +auto time_parser = category{ + .name = "LC_TIME", + .glibc_val = LC_TIME, + .parsers = std::make_tuple( + parse_string, // abday1 + parse_string, // abday2 + parse_string, // abday3 + parse_string, // abday4 + parse_string, // abday5 + parse_string, // abday6 + parse_string, // abday7 + parse_string, // day1 + parse_string, // day2 + parse_string, // day3 + parse_string, // day4 + parse_string, // day5 + parse_string, // day6 + parse_string, // day7 + parse_string, // abmon1 + parse_string, // abmon2 + parse_string, // abmon3 + parse_string, // abmon4 + parse_string, // abmon5 + parse_string, // abmon6 + parse_string, // abmon7 + parse_string, // abmon8 + parse_string, // abmon9 + parse_string, // abmon10 + parse_string, // abmon11 + parse_string, // abmon12 + parse_string, // mon1 + parse_string, // mon2 + parse_string, // mon3 + parse_string, // mon4 + parse_string, // mon5 + parse_string, // mon6 + parse_string, // mon7 + parse_string, // mon8 + parse_string, // mon9 + parse_string, // mon10 + parse_string, // mon11 + parse_string, // mon12 + parse_string, // am + parse_string, // pm + parse_string, // d_t_fmt + parse_string, // d_fmt + parse_string, // t_fmt + parse_string, // t_fmt_ampm + parse_string, // era + parse_string, // era_year + parse_string, // era_d_fmt + parse_string, // alt_digits + parse_string, // era_d_t_fmt + parse_string // era_t_fmt + ) +}; + +auto collate_parser = category{ + .name = "LC_COLLATE", + .glibc_val = LC_COLLATE, + .parsers = std::make_tuple( + parse, // nrules + parse_string, // rulesets + parse_string, // tablemb + parse_string, // weightmb + parse_string, // extramb + parse_string, // indirectmb + parse_string, // tablewc + parse_string, // weightwc + parse_string, // extrawc + parse_string, // indirectwc + parse, // symb-hash-sizemb + parse_string, // symb-tablemb + parse_string, // symb-extramb + parse_string, // collseqmb + parse_string, // collseqwc + parse_string // codeset + ) +}; + +auto ctype_parser = category{ + .name = "LC_CTYPE", + .glibc_val = LC_CTYPE, + .parsers = std::make_tuple( + parse_bytearray, // class + parse_uint32array<256 + 128>, // toupper + parse_ignore, // gap1 + parse_uint32array<256 + 128>, // tolower + parse_ignore, // gap2 + parse_bytearray, // class32 + parse_ignore, // gap3 + parse_ignore, // gap4 + parse_ignore, // gap5 + parse_ignore, // gap6 + parse_stringlist<10, 32>, // class_names + parse_stringlist<2, 32>, // map_names + parse_ignore, // TODO: width + parse, // mb_cur_max + parse_string, // codeset_name + parse_ignore, // TODO: toupper32 + parse_ignore, // TODO: tolower32 + parse, // class_offset + parse, // map_offset + parse, // indigits_mb_len + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_ignore, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray, + parse_bytearray + ) +}; + +auto paper_parser = category{ + .name = "LC_PAPER", + .glibc_val = LC_PAPER, + .parsers = std::make_tuple( + parse, // height + parse, // width + parse_string // codeset + ) +}; + +auto name_parser = category{ + .name = "LC_NAME", + .glibc_val = LC_NAME, + .parsers = std::make_tuple( + parse_string, // fmt + parse_string, // gen + parse_string, // mr + parse_string, // mrs + parse_string, // miss + parse_string, // ms + parse_string // codeset + ) +}; + +auto address_parser = category{ + .name = "LC_ADDRESS", + .glibc_val = LC_ADDRESS, + .parsers = std::make_tuple( + parse_string, // postal_fmt + parse_string, // country_name + parse_string, // country_post + parse_string, // country_ab2 + parse_string, // country_ab3 + parse_string, // country_car + parse, // country_num + parse_string, // country_isbn + parse_string, // lang_name + parse_string, // lang_ab + parse_string, // lang_term + parse_string, // lang_lib + parse_string // codeset + ) +}; + +auto telephone_parser = category{ + .name = "LC_TELEPHONE", + .glibc_val = LC_TELEPHONE, + .parsers = std::make_tuple( + parse_string, // tel_int_fmt + parse_string, // tel_dom_fmt + parse_string, // int_select + parse_string, // int_prefix + parse_string // codeset + ) +}; + +auto measurement_parser = category{ + .name = "LC_MEASUREMENT", + .glibc_val = LC_MEASUREMENT, + .parsers = std::make_tuple( + parse, // measurement + parse_string // codeset + ) +}; + +auto identification_parser = category{ + .name = "LC_IDENTIFICATION", + .glibc_val = LC_IDENTIFICATION, + .parsers = std::make_tuple( + parse_string, // title + parse_string, // source + parse_string, // address + parse_string, // contact + parse_string, // email + parse_string, // tel + parse_string, // fax + parse_string, // language + parse_string, // territory + parse_string, // audience + parse_string, // application + parse_string, // abbreviation + parse_string, // revision + parse_string, // date + parse_string, // category + parse_string // codeset + ) +}; diff --git a/options/internal/include/mlibc/locale.hpp b/options/internal/include/mlibc/locale.hpp index a46a2c3e5e..8b1a987df2 100644 --- a/options/internal/include/mlibc/locale.hpp +++ b/options/internal/include/mlibc/locale.hpp @@ -1,11 +1,217 @@ #ifndef MLIBC_LOCALE #define MLIBC_LOCALE +#include #include +#include +#include +#include namespace mlibc { +struct category_item { + using value_types = std::variant< + std::monostate, + uint32_t, + frg::span, + frg::string_view, + frg::basic_string_view + >; + + category_item() = default; + + category_item(uint32_t v) { + value = v; + } + + category_item(frg::span v) { + value = v; + } + + category_item(frg::span v) { + value = frg::span{reinterpret_cast(v.data()), v.size() * sizeof(uint32_t)}; + } + + category_item(frg::string_view v) { + value = v; + } + + category_item(frg::basic_string_view v) { + value = v; + } + + frg::string_view asString() const { + if (std::holds_alternative(value)) + return {""}; + return std::get(value); + } + + uint32_t asUint32() const { + return std::get(value); + } + + frg::span asByteSpan() const { + if (std::holds_alternative(value)) + return {}; + return std::get>(value); + } + + frg::span asUint16Span() const { + if (std::holds_alternative(value)) + return {}; + auto span = std::get>(value); + return frg::span{reinterpret_cast(span.data()), span.size() / sizeof(uint16_t)}; + } + + frg::span asUint32Span() const { + if (std::holds_alternative(value)) + return {}; + auto span = std::get>(value); + return frg::span{reinterpret_cast(span.data()), span.size() / sizeof(uint32_t)}; + } + + value_types value; +}; + +template +struct nl_category { + std::array members; + + const category_item &get(nl_item item) { + __ensure(item >> 16 == Category); + return members[item & 0xFFFF]; + } + +protected: + void set(nl_item item, category_item value) { + __ensure(item >> 16 == Category); + members[item & 0xFFFF] = value; + } +}; + +extern frg::array cLocaleCtypeClass; +extern frg::array cLocaleCtypeClass32; +extern frg::array cLocaleCtypeToUpper; +extern frg::array cLocaleCtypeToLower; +extern frg::array cLocaleClassUpper; +extern frg::array cLocaleClassLower; +extern frg::array cLocaleClassAlpha; +extern frg::array cLocaleClassDigit; +extern frg::array cLocaleClassXdigit; +extern frg::array cLocaleClassSpace; +extern frg::array cLocaleClassPrint; +extern frg::array cLocaleClassGraph; +extern frg::array cLocaleClassBlank; +extern frg::array cLocaleClassCntrl; +extern frg::array cLocaleClassPunct; +extern frg::array cLocaleClassAlnum; +extern frg::array cLocaleMapToUpper; +extern frg::array cLocaleMapToLower; + +struct nl_ctype : nl_category { + nl_ctype(); + + frg::span ctype_class() { + return get(_NL_CTYPE_CLASS).asUint16Span(); + } + + frg::span map_tolower() { + return get(_NL_CTYPE_TOLOWER).asUint32Span(); + } + + frg::span map_toupper() { + return get(_NL_CTYPE_TOUPPER).asUint32Span(); + } + + frg::span ctype_class32() { + return get(_NL_CTYPE_CLASS32).asByteSpan(); + } + + frg::string_view class_names() { + return get(_NL_CTYPE_CLASS_NAMES).asString(); + } + + uint32_t class_offset() { + return get(_NL_CTYPE_CLASS_OFFSET).asUint32(); + } + + uint32_t map_offset() { + return get(_NL_CTYPE_MAP_OFFSET).asUint32(); + } +}; + +struct nl_numeric : nl_category { + nl_numeric(); +}; + +struct nl_time : nl_category { + nl_time(); +}; + +struct nl_collate : nl_category { + nl_collate(); +}; + +struct nl_monetary : nl_category { + nl_monetary(); +}; + +struct nl_messages : nl_category { + nl_messages(); +}; + +struct nl_paper : nl_category { + nl_paper(); +}; + +struct nl_name : nl_category { + nl_name(); +}; + +struct nl_address : nl_category { + nl_address(); +}; + +struct nl_telephone : nl_category { + nl_telephone(); +}; + +struct nl_measurement : nl_category { + nl_measurement(); +}; + +struct nl_identification : nl_category { + nl_identification(); +}; + +struct localeinfo { + frg::string localeName = {"C", getAllocator()}; + + mlibc::nl_ctype ctype = {}; + mlibc::nl_numeric numeric = {}; + mlibc::nl_time time = {}; + mlibc::nl_collate collate = {}; + mlibc::nl_monetary monetary = {}; + mlibc::nl_messages messages = {}; + mlibc::nl_paper paper = {}; + mlibc::nl_name name = {}; + mlibc::nl_address address = {}; + mlibc::nl_telephone telephone = {}; + mlibc::nl_measurement measurement = {}; + mlibc::nl_identification identification = {}; +}; + +bool applyCategory(int category, const char *name, localeinfo *info); +localeinfo *loadLocale(int category, const char *name, localeinfo *base); +localeinfo *useThreadLocalLocale(localeinfo *loc); +localeinfo *useGlobalLocale(localeinfo *loc); +void freeLocale(localeinfo *loc); + +localeinfo *getActiveLocale(); +localeinfo *getGlobalLocale(); + char *nl_langinfo(nl_item item); +char *nl_langinfo_l(nl_item item, localeinfo *l); } // namespace mlibc diff --git a/options/internal/include/mlibc/strings.hpp b/options/internal/include/mlibc/strings.hpp index 5a93c7c67b..a479695217 100644 --- a/options/internal/include/mlibc/strings.hpp +++ b/options/internal/include/mlibc/strings.hpp @@ -6,6 +6,7 @@ namespace mlibc { int strncasecmp(const char *a, const char *b, size_t size); +size_t strnlen(const char *s, size_t n); } // namespace mlibc diff --git a/options/internal/include/mlibc/strtofp.hpp b/options/internal/include/mlibc/strtofp.hpp index f9c5e20d72..bd8a96398c 100644 --- a/options/internal/include/mlibc/strtofp.hpp +++ b/options/internal/include/mlibc/strtofp.hpp @@ -1,14 +1,19 @@ #ifndef MLIBC_STRTOFP_HPP #define MLIBC_STRTOFP_HPP -#include #include +#include +#include +#include #include namespace mlibc { template -T strtofp(const char *str, char **endptr) { +T strtofp(const char *str, char **endptr, mlibc::localeinfo *l) { + while(isspace_l(*str, l)) + str++; + if (strcmp(str, "INF") == 0 || strcmp(str, "inf") == 0) { if (endptr) *endptr = (char *)str + 3; @@ -19,14 +24,14 @@ T strtofp(const char *str, char **endptr) { else return __builtin_infl(); } else if (strcmp(str, "INFINITY") == 0 || strcmp(str, "infinity") == 0) { - if (endptr) - *endptr = (char *)str + 8; - if constexpr (std::is_same_v) - return __builtin_inff(); - else if constexpr (std::is_same_v) - return __builtin_inf(); - else - return __builtin_infl(); + if (endptr) + *endptr = (char *)str + 8; + if constexpr (std::is_same_v) + return __builtin_inff(); + else if constexpr (std::is_same_v) + return __builtin_inf(); + else + return __builtin_infl(); } else if (strncmp(str, "NAN", 3) == 0 || strncmp(str, "nan", 3) == 0) { if (endptr) *endptr = (char *)str + 3; @@ -38,6 +43,8 @@ T strtofp(const char *str, char **endptr) { return __builtin_nanl(""); } + auto decimal = l->numeric.get(DECIMAL_POINT).asString(); + bool negative = *str == '-'; if (*str == '+' || *str == '-') str++; @@ -54,7 +61,7 @@ T strtofp(const char *str, char **endptr) { if (!hex) { while (true) { - if (!isdigit(*tmp)) + if (!isdigit_l(*tmp, l)) break; result *= static_cast(10); result += static_cast(*tmp - '0'); @@ -62,22 +69,22 @@ T strtofp(const char *str, char **endptr) { } } else { while (true) { - if (!isxdigit(*tmp)) + if (!isxdigit_l(*tmp, l)) break; result *= static_cast(16); - result += static_cast(*tmp <= '9' ? (*tmp - '0') : (tolower(*tmp) - 'a' + 10)); + result += static_cast(*tmp <= '9' ? (*tmp - '0') : (tolower_l(*tmp, l) - 'a' + 10)); tmp++; } } - if (*tmp == '.') { - tmp++; + if (!strncmp(tmp, decimal.data(), strnlen(decimal.data(), decimal.size()))) { + tmp += strnlen(decimal.data(), decimal.size()); if (!hex) { T d = static_cast(10); while (true) { - if (!isdigit(*tmp)) + if (!isdigit_l(*tmp, l)) break; result += static_cast(*tmp - '0') / d; d *= static_cast(10); @@ -87,9 +94,9 @@ T strtofp(const char *str, char **endptr) { T d = static_cast(16); while (true) { - if (!isxdigit(*tmp)) + if (!isxdigit_l(*tmp, l)) break; - result += static_cast(*tmp <= '9' ? (*tmp - '0') : (tolower(*tmp) - 'a' + 10)) / d; + result += static_cast(*tmp <= '9' ? (*tmp - '0') : (tolower_l(*tmp, l) - 'a' + 10)) / d; d *= static_cast(16); tmp++; } @@ -106,7 +113,7 @@ T strtofp(const char *str, char **endptr) { int exp = 0; while (true) { - if (!isdigit(*tmp)) + if (!isdigit_l(*tmp, l)) break; exp *= 10; exp += *tmp - '0'; @@ -133,7 +140,7 @@ T strtofp(const char *str, char **endptr) { int exp = 0; while (true) { - if (!isdigit(*tmp)) + if (!isdigit_l(*tmp, l)) break; exp *= 10; exp += *tmp - '0'; @@ -160,6 +167,6 @@ T strtofp(const char *str, char **endptr) { return result; } -} +} // namespace mlibc #endif // MLIBC_STRTOFP_HPP diff --git a/options/posix/generic/langinfo.cpp b/options/posix/generic/langinfo.cpp index b239cbdc14..f4ddc7a27e 100644 --- a/options/posix/generic/langinfo.cpp +++ b/options/posix/generic/langinfo.cpp @@ -8,8 +8,7 @@ char *nl_langinfo(nl_item item) { return mlibc::nl_langinfo(item); } -char *nl_langinfo_l(nl_item, locale_t) { - __ensure(!"Not implemented"); - __builtin_unreachable(); +char *nl_langinfo_l(nl_item item, locale_t loc) { + return mlibc::nl_langinfo_l(item, reinterpret_cast(loc)); } diff --git a/options/posix/generic/posix_ctype.cpp b/options/posix/generic/posix_ctype.cpp index 19f129f004..914c3f57e1 100644 --- a/options/posix/generic/posix_ctype.cpp +++ b/options/posix/generic/posix_ctype.cpp @@ -2,135 +2,251 @@ #include #include +#include +#include -int isalnum_l(int c, locale_t) { - return isalnum(c); +int isalnum_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_alnum(cp, static_cast(loc)); } -int isalpha_l(int c, locale_t) { - return isalpha(c); +int isalpha_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_alpha(cp, static_cast(loc)); } -int isblank_l(int c, locale_t) { - return isblank(c); +int isblank_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_blank(cp, static_cast(loc)); } -int iscntrl_l(int c, locale_t) { - return iscntrl(c); +int iscntrl_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_cntrl(cp, static_cast(loc)); } -int isdigit_l(int c, locale_t) { - return isdigit(c); +int isdigit_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_digit(cp, static_cast(loc)); } -int isgraph_l(int c, locale_t) { - return isgraph(c); +int isgraph_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_graph(cp, static_cast(loc)); } -int islower_l(int c, locale_t) { - return islower(c); +int islower_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_lower(cp, static_cast(loc)); } -int isprint_l(int c, locale_t) { - return isprint(c); +int isprint_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_print(cp, static_cast(loc)); } -int ispunct_l(int c, locale_t) { - return ispunct(c); +int ispunct_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_punct(cp, static_cast(loc)); } -int isspace_l(int c, locale_t) { - return isspace(c); +int isspace_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_space(cp, static_cast(loc)); } -int isupper_l(int c, locale_t) { - return isupper(c); +int isupper_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_upper(cp, static_cast(loc)); } -int isxdigit_l(int c, locale_t) { - return isxdigit(c); +int isxdigit_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_xdigit(cp, static_cast(loc)); } int isascii_l(int c, locale_t) { - return isascii(c); + return isascii(c); } -int tolower_l(int c, locale_t) { - return tolower(c); +int tolower_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->to_lower(cp, static_cast(loc)); } -int toupper_l(int c, locale_t) { - return toupper(c); +int toupper_l(int nc, locale_t loc) { + auto cc = mlibc::current_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->to_upper(cp, static_cast(loc)); } -int iswalnum_l(wint_t c, locale_t) { - return iswalnum(c); +int iswalnum_l(wint_t nc, locale_t loc) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_alnum(cp, static_cast(loc)); } -int iswblank_l(wint_t c, locale_t) { - return iswblank(c); +int iswblank_l(wint_t nc, locale_t loc) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_blank(cp, static_cast(loc)); } -int iswcntrl_l(wint_t c, locale_t) { - return iswcntrl(c); +int iswcntrl_l(wint_t nc, locale_t loc) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_cntrl(cp, static_cast(loc)); } -int iswdigit_l(wint_t c, locale_t) { - return iswdigit(c); +int iswdigit_l(wint_t nc, locale_t loc) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_digit(cp, static_cast(loc)); } -int iswgraph_l(wint_t c, locale_t) { - return iswgraph(c); +int iswgraph_l(wint_t nc, locale_t loc) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_graph(cp, static_cast(loc)); } -int iswlower_l(wint_t c, locale_t) { - return iswlower(c); +int iswlower_l(wint_t nc, locale_t loc) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_lower(cp, static_cast(loc)); } -int iswprint_l(wint_t c, locale_t) { - return iswprint(c); +int iswprint_l(wint_t nc, locale_t loc) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_print(cp, static_cast(loc)); } -int iswpunct_l(wint_t c, locale_t) { - return iswpunct(c); +int iswpunct_l(wint_t nc, locale_t loc) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_punct(cp, static_cast(loc)); } -int iswspace_l(wint_t c, locale_t) { - return iswspace(c); +int iswspace_l(wint_t nc, locale_t loc) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_space(cp, static_cast(loc)); } -int iswupper_l(wint_t c, locale_t) { - return iswupper(c); +int iswupper_l(wint_t nc, locale_t loc) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_upper(cp, static_cast(loc)); } -int iswxdigit_l(wint_t c, locale_t) { - return iswxdigit(c); +int iswxdigit_l(wint_t nc, locale_t loc) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(nc, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_xdigit(cp, static_cast(loc)); } -int iswalpha_l(wint_t c, locale_t) { - return iswalpha(c); +int iswalpha_l(wint_t c, locale_t l) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(c, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->is_alpha(cp, static_cast(l)); } -wctype_t wctype_l(const char* p, locale_t) { - return wctype(p); +wctype_t wctype_l(const char* p, locale_t loc) { + auto l = static_cast(loc); + return mlibc::current_charset()->wctype({p}, l); } -int iswctype_l(wint_t w, wctype_t t, locale_t) { - return iswctype(w, t); +int iswctype_l(wint_t wc, wctype_t t, locale_t loc) { + auto l = static_cast(loc); + return mlibc::current_charset()->iswctype(wc, t, l); } -wint_t towlower_l(wint_t c, locale_t) { - return towlower(c); +wint_t towlower_l(wint_t c, locale_t l) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(c, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->to_lower(cp, static_cast(l)); } -wint_t towupper_l(wint_t c, locale_t) { - return towupper(c); +wint_t towupper_l(wint_t c, locale_t l) { + auto cc = mlibc::platform_wide_charcode(); + mlibc::codepoint cp; + if(auto e = cc->promote(c, cp); e != mlibc::charcode_error::null) + return 0; + return mlibc::current_charset()->to_upper(cp, static_cast(l)); } wctrans_t wctrans_l(const char* c, locale_t) { - return wctrans(c); + return wctrans(c); } wint_t towctrans_l(wint_t c, wctrans_t desc, locale_t) { - return towctrans(c, desc); + return towctrans(c, desc); } diff --git a/options/posix/generic/posix_locale.cpp b/options/posix/generic/posix_locale.cpp index f19e623e46..353b52d7b7 100644 --- a/options/posix/generic/posix_locale.cpp +++ b/options/posix/generic/posix_locale.cpp @@ -1,34 +1,18 @@ #include #include #include +#include -namespace { - -bool newlocale_seen = false; -bool uselocale_seen = false; - -} // namespace - -locale_t newlocale(int, const char *, locale_t) { - // Due to all of the locale functions being stubs, the locale will not be used - if(!newlocale_seen) { - mlibc::infoLogger() << "mlibc: newlocale() is a no-op" << frg::endlog; - newlocale_seen = true; - } - return nullptr; +locale_t newlocale(int category, const char *name, locale_t base) { + return mlibc::loadLocale(category, name, reinterpret_cast(base)); } -void freelocale(locale_t) { - mlibc::infoLogger() << "mlibc: freelocale() is a no-op" << frg::endlog; - return; +void freelocale(locale_t loc) { + mlibc::freeLocale(reinterpret_cast(loc)); } -locale_t uselocale(locale_t) { - if(!uselocale_seen) { - mlibc::infoLogger() << "mlibc: uselocale() is a no-op" << frg::endlog; - uselocale_seen = true; - } - return nullptr; +locale_t uselocale(locale_t loc) { + return mlibc::useThreadLocalLocale(reinterpret_cast(loc)); } locale_t duplocale(locale_t) { diff --git a/options/posix/generic/posix_stdlib.cpp b/options/posix/generic/posix_stdlib.cpp index 54c96d8591..2fdc09fd30 100644 --- a/options/posix/generic/posix_stdlib.cpp +++ b/options/posix/generic/posix_stdlib.cpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include @@ -523,19 +525,16 @@ int grantpt(int) { return 0; } -double strtod_l(const char *__restrict__ nptr, char ** __restrict__ endptr, locale_t) { - mlibc::infoLogger() << "mlibc: strtod_l ignores locale!" << frg::endlog; - return strtod(nptr, endptr); +double strtod_l(const char *__restrict__ nptr, char ** __restrict__ endptr, locale_t loc) { + return mlibc::strtofp(nptr, endptr, static_cast(loc)); } -long double strtold_l(const char *__restrict__, char ** __restrict__, locale_t) { - __ensure(!"Not implemented"); - __builtin_unreachable(); +long double strtold_l(const char *__restrict__ nptr, char ** __restrict__ endptr, locale_t loc) { + return mlibc::strtofp(nptr, endptr, static_cast(loc)); } -float strtof_l(const char *__restrict__ nptr, char **__restrict__ endptr, locale_t) { - mlibc::infoLogger() << "mlibc: strtof_l ignores locales" << frg::endlog; - return strtof(nptr, endptr); +float strtof_l(const char *__restrict__ nptr, char **__restrict__ endptr, locale_t loc) { + return mlibc::strtofp(nptr, endptr, static_cast(loc)); } int strcoll_l(const char *, const char *, locale_t) { diff --git a/subprojects/libsmarter.wrap b/subprojects/libsmarter.wrap new file mode 100644 index 0000000000..2ea987bf1c --- /dev/null +++ b/subprojects/libsmarter.wrap @@ -0,0 +1,4 @@ +[wrap-git] +directory = libsmarter +url = https://github.com/managarm/libsmarter.git +revision = 338cce63b22c85557c9274ad8ecfc8423a14024d diff --git a/tests/ansi/locale.c b/tests/ansi/locale.c index 175f68e5b0..224bc3a705 100644 --- a/tests/ansi/locale.c +++ b/tests/ansi/locale.c @@ -1,18 +1,42 @@ -#include -#include +#include +#include #include #include -#include +#include +#include +#include +#include int main() { - wchar_t c = 0xC9; - unsigned char buf[MB_LEN_MAX] = { 0 }; + char buf[64] = { 0 }; + wint_t c = 0xC9; setlocale(LC_ALL, ""); if (sprintf(buf, "%lc", c) < 0) return -1; - assert(buf[0] == 0xc3 && buf[1] == 0x89 + assert((unsigned char) buf[0] == 0xc3 && (unsigned char) buf[1] == 0x89 && buf[2] == '\0' && buf[3] == '\0'); + setlocale(LC_ALL, "C"); + + snprintf(buf, sizeof(buf), "%2.2f", 12.34); + assert(!strcmp("12.34", buf)); + + setlocale(LC_ALL, "de_DE"); + + snprintf(buf, sizeof(buf), "%2.2f", 12.34); + assert(!strcmp("12,34", buf)); + + char lower = tolower('A'); + assert(lower == 'a'); + char upper = toupper(lower); + assert(upper == 'A'); + + assert(isalpha('z')); + assert(!isalpha('z' + 1)); + + snprintf(buf, sizeof(buf), "%'g", 12345.67); + // assert(!strcmp("12.345,7", buf)); + return 0; } diff --git a/tests/ansi/strtof.c b/tests/ansi/strtof.c index a13af90d5d..7f74668172 100644 --- a/tests/ansi/strtof.c +++ b/tests/ansi/strtof.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #define FLT_RANGE 0.000001f @@ -22,6 +23,8 @@ assert(pEnd == (off == -1 ? s + strlen(s) : s + off)); }) int main () { + setlocale(LC_ALL, "C"); + DO_TEST("0", 0.0f, -1, strtof, FLT_RANGE); DO_TEST("0.12", 0.12f, -1, strtof, FLT_RANGE); DO_TEST("12", 12.0f, -1, strtof, FLT_RANGE); diff --git a/tests/meson.build b/tests/meson.build index 6690249ff5..cf0217b475 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -33,6 +33,7 @@ all_test_cases = [ 'ansi/fgetpos', 'ansi/fputs', 'ansi/ftell', + 'ansi/locale', 'bsd/ns_get_put', 'bsd/reallocarray', 'bsd/strl', @@ -107,6 +108,7 @@ all_test_cases = [ 'posix/shm', 'posix/swab', 'posix/select', + 'posix/locale', 'glibc/getopt', 'glibc/ffsl-ffsll', 'glibc/error_message_count', @@ -157,11 +159,20 @@ host_libc_excluded_test_cases = [ 'glibc/error_at_line', # These tests depend on mlibc error messages. 'rtld/search-order', # See rtld/search-order/meson.build. ] + +if target_machine.cpu_family() == 'm68k' + host_libc_excluded_test_cases += [ + 'ansi/locale', + 'posix/locale', + ] +endif + host_libc_noasan_test_cases = [ 'posix/pthread_cancel', 'posix/pthread_attr', # does some stack overflowing to check stack size 'posix/posix_memalign', 'posix/search', # requires tdelete (#351) + 'posix/locale', # encounters memory leaks 'ansi/calloc', # does some overflowing 'linux/pthread_attr', # encounters memory leaks ] diff --git a/tests/posix/locale.c b/tests/posix/locale.c new file mode 100644 index 0000000000..8ee8ece7f2 --- /dev/null +++ b/tests/posix/locale.c @@ -0,0 +1,191 @@ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int main() { + char buf[64] = { 0 }; + wint_t c = 0xC9; + char *locale = setlocale(LC_ALL, ""); + assert(locale && strlen(locale)); + if (snprintf(buf, MB_LEN_MAX, "%lc", c) < 0) + return -1; + + assert((unsigned char) buf[0] == 0xc3 && (unsigned char) buf[1] == 0x89 + && buf[2] == '\0' && buf[3] == '\0'); + + locale_t fake = newlocale(LC_ALL_MASK, "swamp german", 0); + assert(fake == 0); + + locale_t german = newlocale(LC_ALL_MASK, "de_DE.utf8", 0); + assert(german != 0); + + locale_t posix = newlocale(LC_ALL_MASK, "POSIX", 0); + assert(posix != 0); + + locale = setlocale(LC_ALL, "C"); + assert(locale && strlen(locale)); + + char *decimal_point = nl_langinfo_l(DECIMAL_POINT, german); + assert(!strcmp(",", decimal_point)); + char *tousands_sep = nl_langinfo_l(THOUSEP, german); + assert(!strcmp(".", tousands_sep)); + char *yesexpr = nl_langinfo_l(YESEXPR, german); + assert(!strcmp("^[+1jJyY]", yesexpr)); + char *noexpr = nl_langinfo_l(NOEXPR, german); + assert(!strcmp("^[-0nN]", noexpr)); + char *currency_symbol = nl_langinfo_l(CURRENCY_SYMBOL, german); + assert(!strcmp("€", currency_symbol)); + char *crncystr = nl_langinfo_l(CRNCYSTR, german); + assert(!strcmp("+€", crncystr)); + char *abday_1 = nl_langinfo_l(ABDAY_1, german); + assert(!strcmp("So", abday_1)); + char *day_1 = nl_langinfo_l(DAY_1, german); + assert(!strcmp("Sonntag", day_1)); + char *day_7 = nl_langinfo_l(DAY_7, german); + assert(!strcmp("Samstag", day_7)); + char *pm = nl_langinfo_l(PM_STR, german); + assert(!strcmp("", pm)); + + char lower = tolower_l('A', german); + assert(lower == 'a'); + char upper = toupper_l(lower, german); + assert(upper == 'A'); + wchar_t wlower = towlower_l(L'Ä', german); + assert(wlower == L'ä'); + + assert(isblank_l(' ', german) == true); + assert(isblank_l('\t', german) == true); + assert(isblank_l('\n', german) == false); + assert(isblank_l('a', german) == false); + assert(isblank_l('1', german) == false); + assert(isblank_l('\v', german) == false); + assert(isblank_l('\r', german) == false); + + assert(isblank_l(' ', posix) == true); + assert(isblank_l('\t', posix) == true); + assert(isblank_l('\n', posix) == false); + assert(isblank_l('a', posix) == false); + assert(isblank_l('1', posix) == false); + assert(isblank_l('\v', posix) == false); + assert(isblank_l('\r', posix) == false); + + uselocale(german); + + assert(iswalpha_l(L'ß', german)); + assert(iswlower_l(L'ß', german)); + assert(!iswalpha_l(L'ß', posix)); + + wlower = towlower_l(L'Ä', german); + assert(wlower == L'ä'); + + memset(buf, 0, sizeof(buf)); + snprintf(buf, sizeof(buf), "%2.2f", 12.34); + assert(!strcmp("12,34", buf)); + + assert(strtod_l(" 12,34", NULL, german) == 12.34); + + wctype_t wctype = wctype_l("alpha", german); + assert(wctype); + assert(iswctype_l(L'ß', wctype, german)); + assert(!iswctype_l(L'÷', wctype, german)); + assert(!iswctype_l(L'❔', wctype, german)); + wctype = wctype_l("graph", german); + assert(wctype); + assert(iswctype_l(L'ß', wctype, german)); + assert(iswctype_l(L'÷', wctype, german)); + assert(iswctype_l(L'❔', wctype, german)); + assert(!iswctype_l(L'\x91', wctype, german)); + assert(!iswctype_l(WEOF, wctype, german)); + + uselocale(LC_GLOBAL_LOCALE); + freelocale(german); + + snprintf(buf, sizeof(buf), "%2.2f", 12.34); + assert(!strcmp("12.34", buf)); + + decimal_point = nl_langinfo_l(DECIMAL_POINT, posix); + assert(!strcmp(".", decimal_point)); + tousands_sep = nl_langinfo_l(THOUSEP, posix); + assert(!strcmp("", tousands_sep)); + yesexpr = nl_langinfo_l(YESEXPR, posix); + assert(!strcmp("^[yY]", yesexpr)); + noexpr = nl_langinfo_l(NOEXPR, posix); + assert(!strcmp("^[nN]", noexpr)); + currency_symbol = nl_langinfo_l(CURRENCY_SYMBOL, posix); + assert(!strcmp("", currency_symbol)); + crncystr = nl_langinfo_l(CRNCYSTR, posix); + assert(!strcmp("-", crncystr)); + abday_1 = nl_langinfo_l(ABDAY_1, posix); + assert(!strcmp("Sun", abday_1)); + day_1 = nl_langinfo_l(DAY_1, posix); + assert(!strcmp("Sunday", day_1)); + day_7 = nl_langinfo_l(DAY_7, posix); + assert(!strcmp("Saturday", day_7)); + pm = nl_langinfo_l(PM_STR, posix); + assert(!strcmp("PM", pm)); + + freelocale(posix); + + locale = setlocale(LC_ALL, "de_DE"); + assert(locale && strlen(locale)); + + tousands_sep = nl_langinfo(THOUSEP); + assert(!strcmp(".", tousands_sep)); + decimal_point = nl_langinfo(DECIMAL_POINT); + assert(!strcmp(",", decimal_point)); + pm = nl_langinfo(PM_STR); + assert(!strcmp("", pm)); + + lower = tolower('A'); + assert(lower == 'a'); + upper = toupper(lower); + assert(upper == 'A'); + + assert(isalpha('z')); + assert(!isalpha('z' + 1)); + + snprintf(buf, sizeof(buf), "%'g", 12345.67); + // assert(!strcmp("12.345,7", buf)); + + locale = setlocale(LC_NUMERIC, "C"); + assert(locale && strlen(locale)); + + tousands_sep = nl_langinfo(THOUSEP); + assert(!strcmp("", tousands_sep)); + decimal_point = nl_langinfo(DECIMAL_POINT); + assert(!strcmp(".", decimal_point)); + pm = nl_langinfo(PM_STR); + assert(!strcmp("", pm)); + + locale = setlocale(LC_TIME, "ru_RU.utf8"); + assert(locale && strlen(locale)); + + day_7 = nl_langinfo(DAY_7); + assert(!strcmp("Суббота", day_7)); + + locale = setlocale(LC_MONETARY, "en_US.utf8"); + assert(locale && strlen(locale)); + + currency_symbol = nl_langinfo(CURRENCY_SYMBOL); + assert(!strcmp("$", currency_symbol)); + + locale = setlocale(LC_ALL, "C"); + assert(locale && strlen(locale)); + + pm = nl_langinfo(PM_STR); + assert(!strcmp("PM", pm)); + + return 0; +}