diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/draft/FileUtilities.java b/tools/cldr-code/src/main/java/org/unicode/cldr/draft/FileUtilities.java index 30e0de101be..86824e70231 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/draft/FileUtilities.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/draft/FileUtilities.java @@ -22,6 +22,8 @@ import java.util.List; import java.util.Locale; import java.util.regex.Pattern; +import org.unicode.cldr.tool.TablePrinter; +import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; import org.unicode.cldr.util.PatternCache; import org.unicode.cldr.util.With; @@ -616,4 +618,12 @@ public static void handleFile(String filename, LineHandler handler) throws IOExc public static Iterable in(File file) { return With.in(new FileLines(openFile(file, StandardCharsets.UTF_8))); } + + public static void makeTsv(String title, TablePrinter tablePrinter) throws IOException { + String fileName = anchorize(title); + final File tsvFile = new File(CLDRPaths.CHART_DIRECTORY + "tsv/", fileName + ".tsv"); + try (final PrintWriter newLsraw = FileUtilities.openUTF8Writer(tsvFile); ) { + tablePrinter.toTsv(newLsraw); + } + } } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDelta.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDelta.java index 80f44a23bca..8b1bdc7a895 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDelta.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDelta.java @@ -1103,9 +1103,13 @@ private void writeNonLdmlPlain(Anchors anchors) throws IOException { continue; } File dirOld = new File(PREV_CHART_VERSION_DIRECTORY + "common/" + dir); - System.out.println("\tLast dir: " + dirOld); + System.out.println("\tLast dir:\t" + dirOld); File dir2 = new File(CHART_VERSION_DIRECTORY + "common/" + dir); - System.out.println("\tCurr dir: " + dir2); + boolean isDirectory = dir2.isDirectory(); + System.out.println("\tCurr dir:\t" + dir2 + (isDirectory ? "" : "\tNOT DIRECTORY")); + if (!isDirectory) { + continue; + } for (String file : dir2.list()) { if (!file.endsWith(".xml")) { diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartUnitConversions.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartUnitConversions.java index 8bb262bef7a..183e9543a82 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartUnitConversions.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartUnitConversions.java @@ -20,20 +20,6 @@ public class ChartUnitConversions extends Chart { private static final boolean DUMP_UNIT_TABLE_TO_STDOUT = false; - public static final String QUANTITY_MSG = - "The units are grouped and ordered by Quantity (which are based on the NIST quantities, see " - + "NIST 811). Note that the quantities are informative."; - public static final String RATIONAL_MSG = - "Each numeric value is an exact rational. (Radians are an exception since the value of π is irrational; a rational approximation is used.)" - + "The format is a terminating decimal where possible; " - + "otherwise a repeating decimal if possible (where ˙ marks the start of the reptend); " - + "otherwise a rational number (of the form numerator/denominator)." - + ""; - public static final String SPEC_GENERAL_MSG = - "The " - + ldmlSpecLink("/tr35-general.html#Contents") - + " should be consulted for more details, such as how to handle complex units (such as foot-per-minute) by converting the elements"; - public static void main(String[] args) { new ChartUnitConversions().writeChart(null); } @@ -53,21 +39,49 @@ public String getExplanation() { return "

Unit Conversions provide conversions for units, such as meter ⟹ foot, " + "so that a source units can be converted into what is needed for localized " + "Unit Preferences. " - + "There are many possible units, and additional units and conversions will be added in future releases.

" - + "" + dataScrapeMessage( diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LanguageInfo.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LanguageInfo.java new file mode 100644 index 00000000000..5ece0845f21 --- /dev/null +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LanguageInfo.java @@ -0,0 +1,2163 @@ +package org.unicode.cldr.tool; + +import com.google.common.base.Joiner; +import com.google.common.collect.Multimap; +import com.google.common.collect.Multimaps; +import com.google.common.collect.Sets; +import com.google.common.collect.TreeMultimap; +import com.ibm.icu.impl.Relation; +import com.ibm.icu.impl.Row.R2; +import com.ibm.icu.impl.Row.R4; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.Normalizer; +import com.ibm.icu.text.Normalizer2; +import com.ibm.icu.text.NumberFormat; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.ULocale; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; +import org.unicode.cldr.draft.FileUtilities; +import org.unicode.cldr.util.ArrayComparator; +import org.unicode.cldr.util.CLDRConfig; +import org.unicode.cldr.util.CLDRFile; +import org.unicode.cldr.util.CLDRLocale; +import org.unicode.cldr.util.CLDRPaths; +import org.unicode.cldr.util.CLDRURLS; +import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.Factory; +import org.unicode.cldr.util.Iso639Data; +import org.unicode.cldr.util.LanguageTagParser; +import org.unicode.cldr.util.Level; +import org.unicode.cldr.util.Log; +import org.unicode.cldr.util.NameGetter; +import org.unicode.cldr.util.NameType; +import org.unicode.cldr.util.Organization; +import org.unicode.cldr.util.StandardCodes; +import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; +import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; +import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; +import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; +import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; +import org.unicode.cldr.util.TransliteratorUtilities; +import org.unicode.cldr.util.XPathParts; + +/** + * TODO This is old code that read supplemental data. Should be replaced by using + * SupplementalDataInfo. https://unicode-org.atlassian.net/browse/CLDR-15673 + */ +class LanguageInfo { + private static final Map> localeAliasInfo = new TreeMap<>(); + + Multimap language_scripts = TreeMultimap.create(); + + Multimap language_territories = TreeMultimap.create(); + + List> deprecatedItems = new ArrayList<>(); + + Multimap territory_languages; + + Multimap script_languages; + + // Map group_contains = new TreeMap(); + + Set aliases = + new TreeSet( + new ArrayComparator( + new Comparator[] {new UTF16.StringComparator(), ShowLanguages.col})); + + Comparator col3 = + new ArrayComparator( + new Comparator[] {ShowLanguages.col, ShowLanguages.col, ShowLanguages.col}); + + Map currency_fractions = new TreeMap(ShowLanguages.col); + + Map currency_territory = new TreeMap(ShowLanguages.col); + + Map territory_currency = new TreeMap(ShowLanguages.col); + + Set territoriesWithCurrencies = new TreeSet<>(); + + Set currenciesWithTerritories = new TreeSet<>(); + + Map>> territoryData = new TreeMap<>(); + + Set territoryTypes = new TreeSet<>(); + + Map> charSubstitutions = + new TreeMap>(ShowLanguages.col); + + String defaultDigits = null; + + Map> territoryLanguageData = new TreeMap<>(); + + private Relation territoriesToModernCurrencies = + Relation.of(new TreeMap>(), TreeSet.class, null); + + /** + * Add main for quick checking + * + * @param args + * @throws IOException + */ + public static void main(String[] args) throws IOException { + Factory cldrFactory = + CLDRConfig.getInstance().getCldrFactory(); // .make(CLDRPaths.MAIN_DIRECTORY, ".*"); + LanguageInfo linfo = new LanguageInfo(cldrFactory); + linfo.showCountryLanguageInfo(null); + } + + public LanguageInfo(Factory cldrFactory) throws IOException { + CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); + for (String path : supp) { + String fullPath = supp.getFullXPath(path); + if (fullPath == null) { + supp.getFullXPath(path); + } + XPathParts parts = XPathParts.getFrozenInstance(fullPath); + + // + if (path.indexOf("/zoneItem") >= 0) { + Map attributes = parts.getAttributes(parts.size() - 1); + String type = attributes.get("type"); + // String territory = attributes.get("territory"); + String aliasAttributes = attributes.get("aliases"); + if (aliasAttributes != null) { + String[] aliasesList = aliasAttributes.split("\\s+"); + + for (int i = 0; i < aliasesList.length; ++i) { + String alias = aliasesList[i]; + aliases.add(new String[] {"timezone", alias, type}); + } + } + // TODO territory, multizone + continue; + } + + if (path.indexOf("/currencyData") >= 0) { + if (path.indexOf("/fractions") >= 0) { + // + String element = parts.getElement(parts.size() - 1); + if (!element.equals("info")) + throw new IllegalArgumentException( + "Unexpected fractions element: " + element); + Map attributes = parts.getAttributes(parts.size() - 1); + String iso4217 = attributes.get("iso4217"); + String digits = attributes.get("digits"); + String rounding = attributes.get("rounding"); + digits = digits + (rounding.equals("0") ? "" : " (" + rounding + ")"); + if (iso4217.equals("DEFAULT")) defaultDigits = digits; + else currency_fractions.put(getName(NameType.CURRENCY, iso4217, false), digits); + continue; + } + // + // + if (path.indexOf("/region") >= 0) { + Map attributes = parts.getAttributes(parts.size() - 2); + String iso3166 = attributes.get("iso3166"); + attributes = parts.getAttributes(parts.size() - 1); + String iso4217 = attributes.get("iso4217"); + String to = attributes.get("to"); + if (to == null) to = "\u221E"; + String from = attributes.get("from"); + if (from == null) from = "-\u221E"; + String countryName = getName(NameType.TERRITORY, iso3166, false); + String currencyName = getName(NameType.CURRENCY, iso4217, false); + Set info = territory_currency.get(countryName); + if (info == null) territory_currency.put(countryName, info = new TreeSet(col3)); + info.add(new String[] {from, to, currencyName}); + info = currency_territory.get(currencyName); + if (info == null) + currency_territory.put(currencyName, info = new TreeSet(ShowLanguages.col)); + territoriesWithCurrencies.add(iso3166); + currenciesWithTerritories.add(iso4217); + if (to.equals("\u221E") || to.compareTo("2006") > 0) { + territoriesToModernCurrencies.put(iso3166, iso4217); + info.add("" + countryName + ""); + + } else { + info.add("" + countryName + ""); + } + continue; + } + } + + if (path.indexOf("/languageData") >= 0) { + Map attributes = parts.findAttributes("language"); + String language = attributes.get("type"); + String alt = attributes.get("alt"); + ShowLanguages.addTokens(language, attributes.get("scripts"), " ", language_scripts); + // mark the territories + if (alt == null) + ; // nothing + else if ("secondary".equals(alt)) language += "*"; + else language += "*" + alt; + // + ShowLanguages.addTokens( + language, attributes.get("territories"), " ", language_territories); + continue; + } + + if (path.indexOf("/deprecatedItems") >= 0) { + deprecatedItems.add(parts.findAttributes("deprecatedItems")); + continue; + } + if (path.indexOf("/calendarPreferenceData/calendarPreference") >= 0) { + Map attributes = parts.findAttributes("calendarPreference"); + if (attributes == null) { + System.err.println( + "Err: on path " + + fullPath + + " , no attributes on 'calendarPreference'. Probably, this tool is out of date."); + } else { + String ordering = attributes.get("ordering"); + String territories = attributes.get("territories"); + if (territories == null) { + System.err.println( + "Err: on path " + + fullPath + + ", missing territories. Probably, this tool is out of date."); + } else if (ordering == null) { + System.err.println( + "Err: on path " + + fullPath + + ", missing ordering. Probably, this tool is out of date."); + } else { + addTerritoryInfo(territories, "Preferred Calendar", ordering); + } + } + } + if (path.indexOf("/weekData") >= 0 || path.indexOf("measurementData") >= 0) { + String element = parts.getElement(parts.size() - 1); + Map attributes = parts.getAttributes(parts.size() - 1); + // later, make this a table + String key = "count"; + String display = "Days in week (min)"; + boolean useTerritory = true; + switch (element) { + case "firstDay": + key = "day"; + display = "First day of week"; + break; + case "weekendStart": + key = "day"; + display = "First day of weekend"; + break; + case "weekendEnd": + key = "day"; + display = "Last day of weekend"; + break; + case "measurementSystem": + // + key = "type"; + display = "Meas. system"; + break; + case "paperSize": + key = "type"; + display = "Paper Size"; + break; + case "weekOfPreference": + useTerritory = false; + break; + } + if (useTerritory) { + String type = attributes.get(key); + String territories = attributes.get("territories"); + addTerritoryInfo(territories, display, type); + } + } + if (path.indexOf("/generation") >= 0 || path.indexOf("/version") >= 0) continue; + ShowLanguages.skipped++; + if (ShowLanguages.SHOW_SKIPPED) { + System.out.println("Skipped Element: " + path); + } + } + + for (String territory : + ShowLanguages.supplementalDataInfo.getTerritoriesWithPopulationData()) { + for (String language : + ShowLanguages.supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( + territory)) { + language_territories.put(language, territory); + } + } + territory_languages = Multimaps.invertFrom(language_territories, TreeMultimap.create()); + script_languages = Multimaps.invertFrom(language_scripts, TreeMultimap.create()); + + // now get some metadata + localeAliasInfo.put("language", new TreeMap()); + localeAliasInfo.put("script", new TreeMap()); + localeAliasInfo.put("territory", new TreeMap()); + localeAliasInfo.put("variant", new TreeMap()); + localeAliasInfo.put("zone", new TreeMap()); + localeAliasInfo.put("subdivision", new TreeMap()); + localeAliasInfo.put("unit", new TreeMap()); + localeAliasInfo.put("usage", new TreeMap()); + + // localeAliasInfo.get("language").put("nb", "no"); + localeAliasInfo.get("language").put("zh_CN", "zh_Hans_CN"); + localeAliasInfo.get("language").put("zh_SG", "zh_Hans_SG"); + localeAliasInfo.get("language").put("zh_TW", "zh_Hant_TW"); + localeAliasInfo.get("language").put("zh_MO", "zh_Hant_MO"); + localeAliasInfo.get("language").put("zh_HK", "zh_Hant_HK"); + + // CLDRFile supp2 = cldrFactory.make(CLDRFile.SUPPLEMENTAL_METADATA, false); + Map, String>>> localeAliasInfo2 = + ShowLanguages.supplementalDataInfo.getLocaleAliasInfo(); + for (Entry, String>>> entry1 : + localeAliasInfo2.entrySet()) { + String element = entry1.getKey(); + for (Entry, String>> entry2 : entry1.getValue().entrySet()) { + String type = entry2.getKey(); + R2, String> replacementReason = entry2.getValue(); + List replacementList = replacementReason.get0(); + String replacement = + replacementList == null ? null : Joiner.on(" ").join(replacementList); + String reason = replacementReason.get1(); + if (element.equals("timezone")) { + element = "zone"; + } + try { + localeAliasInfo.get(element).put(type, replacement == null ? "?" : replacement); + } catch (Exception e) { + // TODO Auto-generated catch block + throw new IllegalArgumentException( + "Can't find alias data for '" + element + "'", e); + } + + String name = ""; + if (replacement == null) { + name = "(none)"; + } else if (element.equals("language")) { + name = getName(replacement, false); + } else if (element.equals("zone")) { + element = "timezone"; + name = replacement + "*"; + } else { + NameType nameType = NameType.typeNameToCode(element); + if (nameType != NameType.NONE) { + name = getName(nameType, replacement, false); + } else { + name = "*" + replacement; + } + } + if (element.equals("territory")) { + territoryAliases.put(type, name); + aliases.add( + new String[] { + element, getName(NameType.TERRITORY, type, false), name, reason + }); + } else { + aliases.add(new String[] {element, type, name, reason}); + } + continue; + } + } + Log.setLog(CLDRPaths.CHART_DIRECTORY + "supplemental/", "characterLog.txt"); + Log.close(); + } + + public void printLikelySubtags(PrintWriter index) throws IOException { + + PrintWriter pw = + new PrintWriter( + new FormattedFileWriter( + null, + "Likely Subtags", + null, + ShowLanguages.SUPPLEMENTAL_INDEX_ANCHORS)); + + TablePrinter tablePrinter = + new TablePrinter() + .addColumn("Source Lang", "class='source'", null, "class='source'", true) + .setSortPriority(1) + .setSpanRows(false) + .addColumn("Source Script", "class='source'", null, "class='source'", true) + .setSortPriority(0) + .setSpanRows(false) + .setBreakSpans(true) + .addColumn("Source Region", "class='source'", null, "class='source'", true) + .setSortPriority(2) + .setSpanRows(false) + .addColumn("Target Lang", "class='target'", null, "class='target'", true) + .setSortPriority(3) + .setBreakSpans(true) + .addColumn("Target Script", "class='target'", null, "class='target'", true) + .setSortPriority(4) + .addColumn("Target Region", "class='target'", null, "class='target'", true) + .setSortPriority(5) + .addColumn( + "Source ID", + "class='source'", + CldrUtility.getDoubleLinkMsg(), + "class='source'", + true) + .addColumn("Target ID", "class='target'", null, "class='target'", true); + Map subtags = ShowLanguages.supplementalDataInfo.getLikelySubtags(); + LanguageTagParser sourceParsed = new LanguageTagParser(); + LanguageTagParser targetParsed = new LanguageTagParser(); + for (String source : subtags.keySet()) { + String target = subtags.get(source); + sourceParsed.set(source); + targetParsed.set(target); + tablePrinter + .addRow() + .addCell(getName(NameType.LANGUAGE, sourceParsed.getLanguage())) + .addCell(getName(NameType.SCRIPT, sourceParsed.getScript())) + .addCell(getName(NameType.TERRITORY, sourceParsed.getRegion())) + .addCell(getName(NameType.LANGUAGE, targetParsed.getLanguage())) + .addCell(getName(NameType.SCRIPT, targetParsed.getScript())) + .addCell(getName(NameType.TERRITORY, targetParsed.getRegion())) + .addCell(source) + .addCell(target) + .finishRow(); + } + pw.println(tablePrinter.toTable()); + pw.close(); + } + + static class LanguageData extends R4 { + public LanguageData(Double a, Double b, Double c, String d) { + super(a, b, c, d); + } + } + + private String getName(final NameType nameType, final String value) { + if (value == null || value.equals("") || value.equals("und")) { + return "\u00A0"; + } + String result = ShowLanguages.englishNameGetter.getNameFromTypeEnumCode(nameType, value); + if (result == null) { + result = value; + } + return result; + } + + static final Comparator INVERSE_COMPARABLE = + new Comparator() { + @Override + public int compare(Object o1, Object o2) { + return ((Comparable) o2).compareTo(o1); + } + }; + + // http://www.faqs.org/rfcs/rfc2396.html + // delims = "<" | ">" | "#" | "%" | <"> + // "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`" + // Within a query component, the characters ";", "/", "?", ":", "@", + // "&", "=", "+", ",", and "$" are reserved. + static final UnicodeSet ESCAPED_URI_QUERY = + new UnicodeSet( + "[\\u0000-\\u0020\\u007F <>#%\"\\{}|\\\\\\^\\[\\]`;/?:@\\&=+,$\\u0080-\\U0001FFFF]") + .freeze(); + + private static final int MINIMAL_BIG_VENDOR = 8; + + static { + System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement()); + } + + private String urlEncode(String input) { + try { + byte[] utf8 = input.getBytes("utf-8"); + StringBuffer output = new StringBuffer(); + for (int i = 0; i < utf8.length; ++i) { + int b = utf8[i] & 0xFF; + if (ESCAPED_URI_QUERY.contains(b)) { + output.append('%'); + if (b < 0x10) output.append('0'); + output.append(Integer.toString(b, 16)); + } else { + output.append((char) b); + } + } + return output.toString(); + } catch (UnsupportedEncodingException e) { + throw (IllegalArgumentException) new IllegalArgumentException().initCause(e); + } + } + + private String addBug(int bugNumber, String text, String from, String subject, String body) { + return "" + text + ""; + } + + void showLanguageCountryInfo(PrintWriter pw) throws IOException { + FormattedFileWriter ffw = + new FormattedFileWriter( + null, + "Language-Territory Information", + null + // "

The language data is provided for + // localization testing, and is under development for CLDR 1.5. " + // + + // "To add a new territory for a language, see the add new links + // below. " + + // "For more information, see Territory-Language + // Information." + // + + // "

" + , + ShowLanguages.SUPPLEMENTAL_INDEX_ANCHORS); + PrintWriter pw21 = new PrintWriter(ffw); + PrintWriter pw2 = pw21; + NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); + nf.setGroupingUsed(true); + // NumberFormat percent = new DecimalFormat("000.0%"); + TablePrinter tablePrinter = + new TablePrinter() + // tablePrinter.setSortPriorities(0,5) + .addColumn("L", "class='source'", null, "class='source'", true) + .setSortPriority(0) + .setBreakSpans(true) + .setRepeatHeader(true) + .setHidden(true) + .addColumn("Language", "class='source'", null, "class='source'", true) + .setSortPriority(0) + .setBreakSpans(true) + .addColumn( + "Code", + "class='source'", + CldrUtility.getDoubleLinkMsg(), + "class='source'", + true) + // .addColumn("Report Bug", "class='target'", null, "class='target'", + // false) + .addColumn("Territory", "class='target'", null, "class='target'", true) + .addColumn( + "Code", + "class='target'", + "{0}", + "class='target'", + true) + .addColumn( + "Language Population", + "class='target'", + "{0,number,#,#@@}", + "class='targetRight'", + true) + .setSortPriority(1) + .setSortAscending(false) + // .addColumn("Territory Population", "class='target'", "{0,number,#,##0}", + // "class='targetRight'", true) + // .addColumn("Language Literacy", "class='target'", "{0,number,00.0}%", + // "class='targetRight'", true) + // .addColumn("Territory Literacy", "class='target'", "{0,number,00.0}%", + // "class='targetRight'", true) + // .addColumn("Territory GDP (PPP)", "class='target'", "{0,number,#,##0}", + // "class='targetRight'", true) + ; + TreeSet languages = new TreeSet<>(); + Collection data = new ArrayList<>(); + String msg = "
Please click on each country code"; + + Collection plainData = new ArrayList<>(); + + for (String territoryCode : + ShowLanguages.supplementalDataInfo.getTerritoriesWithPopulationData()) { + // PopulationData territoryData = + // supplementalDataInfo.getPopulationDataForTerritory(territoryCode); + String territoryName = + ShowLanguages.englishNameGetter.getNameFromTypeEnumCode( + NameType.TERRITORY, territoryCode); + for (String languageCode : + ShowLanguages.supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( + territoryCode)) { + PopulationData languageData = + ShowLanguages.supplementalDataInfo.getLanguageAndTerritoryPopulationData( + languageCode, territoryCode); + languages.add(languageCode); + Comparable[] items = + new Comparable[] { + getFirstPrimaryWeight(getLanguageName(languageCode)), + getLanguageName(languageCode), // + getLanguagePluralMessage(msg, + // languageCode), + languageCode, + // bug, + territoryName + getOfficialStatus(territoryCode, languageCode), + territoryCode, + languageData.getPopulation(), + // population, + // languageliteracy, + // territoryLiteracy, + // gdp + }; + Comparable[] plainItems = + new Comparable[] { + getLanguageName(languageCode), // + getLanguagePluralMessage(msg, + // languageCode), + languageCode, + territoryName, + territoryCode, + getRawOfficialStatus(territoryCode, languageCode), + languageData.getPopulation(), + languageData.getLiteratePopulation() + }; + + data.add(items); + plainData.add(plainItems); + } + } + for (String languageCode : languages) { + Comparable[] items = + new Comparable[] { + getFirstPrimaryWeight(getLanguageName(languageCode)), + getLanguageName( + languageCode), // + getLanguagePluralMessage(msg, languageCode), + languageCode, + // bug, + addBug( + 1217, + "add new", + "", + "Add territory to " + + getLanguageName(languageCode) + + " (" + + languageCode + + ")", + ""), + "", + 0.0d, + // 0.0d, + // 0.0d, + // 0.0d, + // gdp + }; + data.add(items); + } + Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); + String value = tablePrinter.addRows(flattened).toTable(); + pw2.println(value); + pw2.close(); + try (PrintWriter pw21plain = + FileUtilities.openUTF8Writer(ffw.getDir(), ffw.getBaseFileName() + ".txt")) { + for (Comparable[] row : plainData) { + pw21plain.println(Joiner.on("\t").join(row)); + } + } + } + + private String getLanguagePluralMessage(String msg, String languageCode) { + String mainLanguageCode = new LanguageTagParser().set(languageCode).getLanguage(); + String messageWithPlurals = + msg + + ", on plurals" + + ", and on likely-subtags"; + return messageWithPlurals; + } + + private String getLanguageName(String languageCode) { + String result = + ShowLanguages.englishNameGetter.getNameFromIdentifierOptAlt( + languageCode, NameGetter.NameOpt.COMPOUND_ONLY, CLDRFile.SHORT_ALTS); + if (!result.equals(languageCode)) return result; + Set names = Iso639Data.getNames(languageCode); + if (names != null && names.size() != 0) { + return names.iterator().next(); + } + return languageCode; + } + + static final Set TC_Vendors = + Sets.union( + Organization.getTCOrgs(), + // This adds the CLDR org at the end of the list + Set.of(Organization.cldr)); + + void showCoverageGoals(PrintWriter pw) throws IOException { + try (PrintWriter pw2 = + new PrintWriter( + new FormattedFileWriter( + null, + "Coverage Goals", + null, + ShowLanguages.SUPPLEMENTAL_INDEX_ANCHORS)); + PrintWriter coverageGoalsTsv = + FileUtilities.openUTF8Writer( + CLDRPaths.CHART_DIRECTORY + "tsv/", "coverage_goals.tsv"); ) { + + TablePrinter tablePrinter = + new TablePrinter() + // tablePrinter.setSortPriorities(0,4) + .addColumn("Language", "class='source'", null, "class='source'", false) + .setSortPriority(0) + .setBreakSpans(false) + .addColumn( + "Code", + "class='source'", + "{0}", + "class='source'", + false) + .addColumn("D. Votes", "class='target'", null, "class='target'", false); + + Map> vendordata = ShowLanguages.sc.getLocaleTypes(); + Set locales = new TreeSet<>(); + Set vendors = new LinkedHashSet<>(); + Set smallVendors = new LinkedHashSet<>(); + + for (Organization organization : TC_Vendors) { + // if (vendor.equals(Organization.java)) continue; + Map data = vendordata.get(organization); + vendors.add(organization); + tablePrinter + .addColumn( + organization.getDisplayName(), + "class='target'", + null, + "class='target'", + false) + .setSpanRows(false); + locales.addAll(data.keySet()); + showTabbedOrgLevels(coverageGoalsTsv, organization, data); + } + + for (Entry> vendorData : vendordata.entrySet()) { + Organization organization = vendorData.getKey(); + if (!TC_Vendors.contains(organization)) { + smallVendors.add(organization); + Map data = vendordata.get(organization); + showTabbedOrgLevels(coverageGoalsTsv, organization, data); + continue; + } + } + + Collection data = new ArrayList<>(); + List list = new ArrayList<>(); + LanguageTagParser ltp = new LanguageTagParser(); + // String alias2 = getAlias("sh_YU"); + + pw2.append("

TC Orgs

"); + + for (String locale : locales) { + list.clear(); + String localeCode = locale.equals("*") ? "und" : locale; + String alias = getAlias(localeCode); + if (!alias.equals(localeCode)) { + throw new IllegalArgumentException( + "Should use canonical form: " + locale + " => " + alias); + } + // String baseLang = ltp.set(localeCode).getLanguage(); + String baseLangName = getLanguageName(localeCode); + list.add("und".equals(localeCode) ? "other" : baseLangName); + list.add(locale); + int defaultVotes = + ShowLanguages.supplementalDataInfo.getRequiredVotes( + CLDRLocale.getInstance(locale), null); + list.add(String.valueOf(defaultVotes)); + for (Organization vendor : vendors) { + String status = getVendorStatus(locale, vendor, vendordata); + // if (!baseLang.equals(locale) && + // !status.startsWith("<")) { + // String langStatus = getVendorStatus(baseLang, + // vendor, + // vendordata); + // if (!langStatus.equals(status)) { + // status += "*"; + // } + // } + list.add(status); + } + data.add(list.toArray(new String[list.size()])); + } + Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); + String value = tablePrinter.addRows(flattened).toTable(); + pw2.println(value); + + pw2.append("

Others

    "); + + for (Organization vendor2 : smallVendors) { + pw2.append("
  • "); + pw2.append(TransliteratorUtilities.toHTML.transform(vendor2.getDisplayName())) + .append(": "); + boolean first1 = true; + for (Level level : Level.values()) { + boolean first2 = true; + Level other = null; + for (Entry data2 : vendordata.get(vendor2).entrySet()) { + String key = data2.getKey(); + Level level2 = data2.getValue(); + if (level != level2) { + continue; + } + if (key.equals("*")) { + other = level2; + continue; + } + if (first2) { + if (first1) { + first1 = false; + } else { + pw2.append("; "); + } + pw2.append(level2.toString()).append(": "); + first2 = false; + } else { + pw2.append(", "); + } + pw2.append(TransliteratorUtilities.toHTML.transform(key)); + } + if (other != null) { + if (first2) { + if (first1) { + first1 = false; + } else { + pw2.append("; "); + } + pw2.append(level.toString()).append(": "); + first2 = false; + } else { + pw2.append(", "); + } + pw2.append("other"); + } + } + pw2.append("
  • "); + } + pw2.append("
"); + } + } + + public void showTabbedOrgLevels( + PrintWriter coverageGoalsTsv, Organization organization, Map data) { + coverageGoalsTsv.println( + String.format( + "\n#%s\t;\t%s\t;\t%s\t;\t%s\n", "Org", "Locale", "Level", "Locale Name")); + for (Entry entry : data.entrySet()) { + String locale = entry.getKey(); + Level level = entry.getValue(); + final String name = + locale.equals("*") + ? "ALL" + : ShowLanguages.englishNameGetter.getNameFromIdentifierOptAlt( + locale, NameGetter.NameOpt.COMPOUND_ONLY, CLDRFile.SHORT_ALTS); + coverageGoalsTsv.println( + String.format("%s\t;\t%s\t;\t%s\t;\t%s", organization, locale, level, name)); + } + } + + LanguageTagParser lpt2 = new LanguageTagParser(); + + // TODO replace this with standard call. + + private String getAlias(String locale) { + lpt2.set(locale); + locale = lpt2.toString(); // normalize + // String language = lpt2.getLanguage(); + String script = lpt2.getScript(); + String region = lpt2.getRegion(); + // List variants = lpt2.getVariants(); + String temp; + for (String old : localeAliasInfo.get("language").keySet()) { + if (locale.startsWith(old)) { + // the above is a rough check, and will fail with old=moh and locale=mo + if (!locale.equals(old) && !locale.startsWith(old + "_")) { + continue; + } + temp = localeAliasInfo.get("language").get(old); + lpt2.setLanguage(temp.split("\\s+")[0] + locale.substring(old.length())); + break; + } + } + temp = localeAliasInfo.get("script").get(script); + if (temp != null) { + lpt2.setScript(temp.split("\\s+")[0]); + } + temp = localeAliasInfo.get("territory").get(region); + if (temp != null) { + lpt2.setRegion(temp.split("\\s+")[0]); + } + return lpt2.toString(); + } + + private String getVendorStatus( + String locale, Organization vendor, Map> vendordata) { + Level statusLevel = vendordata.get(vendor).get(locale); + return statusLevel == null ? "" : statusLevel.toString(); + // String status = statusLevel == null ? null : statusLevel.toString(); + // String curLocale = locale; + // while (status == null) { + // curLocale = LocaleIDParser.getParent(curLocale); + // if ("root".equals(curLocale)) { + // status = " "; + // break; + // } + // statusLevel = vendordata.get(vendor).get(curLocale); + // if (statusLevel != null) { + // status = statusLevel + "†"; + // } + // } + // return status; + } + + void showCountryLanguageInfo(PrintWriter pw) throws IOException { + NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); + nf.setGroupingUsed(true); + // NumberFormat percent = new DecimalFormat("000.0%"); + TablePrinter tablePrinter = + new TablePrinter() + // tablePrinter.setSortPriorities(0,4) + .addColumn("T", "class='source'", null, "class='source'", true) + .setSortPriority(0) + .setBreakSpans(true) + .setRepeatHeader(true) + .setHidden(true) + .addColumn("Territory", "class='source'", null, "class='source'", true) + .setSortPriority(0) + .setBreakSpans(true) + .addColumn( + "Code", + "class='source'", + CldrUtility.getDoubleLinkMsg(), + "class='source'", + true) + .addColumn( + "Terr. Literacy", + "class='target'", + "{0,number,@@}%", + "class='targetRight'", + true) + .addColumn("Language", "class='target'", null, "class='target'", false) + .addColumn( + "Code", + "class='target'", + "{0}", + "class='target'", + false) + .addColumn( + "Lang. Pop.", + "class='target'", + "{0,number,#,#@@}", + "class='targetRight'", + true) + .addColumn( + "Pop.%", + "class='target'", "{0,number,@@}%", "class='targetRight'", true) + .setSortAscending(false) + .setSortPriority(1) + .addColumn( + "Literacy%", + "class='target'", "{0,number,@@}%", "class='targetRight'", true) + .addColumn( + "Writing Pop.", + "class='target'", + "{0,number,@@}%", + "class='targetRight'", + true) + .addColumn("Report Bug", "class='target'", null, "class='target'", false); + + TablePrinter tablePrinterTSV = + new TablePrinter() + // tablePrinter.setSortPriorities(0,4) + .addColumn("Territory") + .setSortPriority(0) + .addColumn("Code") + .addColumn("Terr. Literacy") + .addColumn("Language") + .addColumn("Code") + .addColumn("Status") + .addColumn("Lang. Pop.") + .addColumn("Pop.%") + .setSortAscending(false) + .setSortPriority(1) + .addColumn("Literacy%") + .addColumn("Writing Pop.") + .setCellPattern("{0,number,0}"); + + for (String territoryCode : + ShowLanguages.supplementalDataInfo.getTerritoriesWithPopulationData()) { + String territoryName = + ShowLanguages.englishNameGetter.getNameFromTypeEnumCode( + NameType.TERRITORY, territoryCode); + PopulationData territoryData2 = + ShowLanguages.supplementalDataInfo.getPopulationDataForTerritory(territoryCode); + double territoryLiteracy = territoryData2.getLiteratePopulationPercent(); + + for (String languageCode : + ShowLanguages.supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( + territoryCode)) { + PopulationData languageData = + ShowLanguages.supplementalDataInfo.getLanguageAndTerritoryPopulationData( + languageCode, territoryCode); + if (languageData.getPopulation() == 0d) { + continue; + } + double languagePopulationPercent = + 100 * languageData.getPopulation() / territoryData2.getPopulation(); + double languageliteracy = languageData.getLiteratePopulationPercent(); + double writingFrequency = languageData.getWritingPercent(); + + tablePrinter + .addRow() + .addCell(getFirstPrimaryWeight(territoryName)) + .addCell(territoryName) + .addCell(territoryCode) + .addCell(territoryLiteracy) + .addCell( + getLanguageName(languageCode) + + getOfficialStatus(territoryCode, languageCode)) + .addCell(languageCode) + .addCell(languageData.getPopulation()) + .addCell(languagePopulationPercent) + .addCell(languageliteracy) + .addCell(writingFrequency) + .addCell( + addBug( + 1217, + "bug", + "", + "Fix info for " + + getLanguageName(languageCode) + + " (" + + languageCode + + ")" + + " in " + + territoryName + + " (" + + territoryCode + + ")", + "")) + .finishRow(); + + tablePrinterTSV + .addRow() + .addCell(territoryName) + .addCell(territoryCode) + .addCell(territoryLiteracy / 100d) + .addCell(getLanguageName(languageCode)) + .addCell(languageCode) + .addCell(getRawOfficialStatus(territoryCode, languageCode)) + .addCell(languageData.getPopulation()) + .addCell(languagePopulationPercent / 100d) + .addCell(languageliteracy / 100d) + .addCell(writingFrequency * languageData.getPopulation() / 100d) + .finishRow(); + } + + tablePrinter + .addRow() + .addCell(getFirstPrimaryWeight(territoryName)) + .addCell(territoryName) + .addCell(territoryCode) + .addCell(territoryLiteracy) + .addCell( + addBug( + 1217, + "add new", + "", + "Add language to " + territoryName + "(" + territoryCode + ")", + "")) + .addCell("") + .addCell(0.0d) + .addCell(0.0d) + .addCell(0.0d) + .addCell(0.0d) + .addCell("") + .finishRow(); + } + String title = "Territory-Language Information"; + try (PrintWriter pw2 = + new PrintWriter( + new FormattedFileWriter( + null, title, null, ShowLanguages.SUPPLEMENTAL_INDEX_ANCHORS))) { + pw2.println(tablePrinter.toTable()); + } + FileUtilities.makeTsv(title, tablePrinterTSV); + } + + void showCountryInfo(PrintWriter pw) throws IOException { + PrintWriter pw21 = + new PrintWriter( + new FormattedFileWriter( + null, + "Territory Information", + null, + ShowLanguages.SUPPLEMENTAL_INDEX_ANCHORS)); + PrintWriter pw2 = pw21; + NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); + nf.setGroupingUsed(true); + // NumberFormat percent = new DecimalFormat("000.0%"); + TablePrinter tablePrinter = + new TablePrinter() + // tablePrinter.setSortPriorities(0,4) + .addColumn("T", "class='source'", null, "class='source'", true) + .setSortPriority(0) + .setBreakSpans(true) + .setRepeatHeader(true) + .setHidden(true) + .addColumn("Territory", "class='source'", null, "class='source'", true) + .setSortPriority(0) + .setBreakSpans(true) + .addColumn( + "Code", + "class='source'", + CldrUtility.getDoubleLinkMsg(), + "class='source'", + true) + .addColumn( + "Terr. Pop (M)", + "class='target'", + "{0,number,#,#@@}", + "class='targetRight'", + true) + .addColumn( + "Terr. GDP ($M PPP)", + "class='target'", + "{0,number,#,#@@}", + "class='targetRight'", + true) + .addColumn( + "Currencies (2006...)", + "class='target'", + null, + "class='target'", + true); + for (Iterator it = territoryTypes.iterator(); it.hasNext(); ) { + String header = it.next(); + if (header.equals("calendar")) header = "calendar (+gregorian)"; + tablePrinter + .addColumn(header) + .setHeaderAttributes("class='target'") + .setCellAttributes("class='target'") + .setSpanRows(true); + } + + tablePrinter.addColumn("Report Bug", "class='target'", null, "class='target'", false); + + for (String territoryCode : + ShowLanguages.supplementalDataInfo.getTerritoriesWithPopulationData()) { + String territoryName = + ShowLanguages.englishNameGetter.getNameFromTypeEnumCode( + NameType.TERRITORY, territoryCode); + PopulationData territoryData2 = + ShowLanguages.supplementalDataInfo.getPopulationDataForTerritory(territoryCode); + double population = territoryData2.getPopulation() / 1000000; + double gdp = territoryData2.getGdp() / 1000000; + + Map> worldData = + territoryData.get(getName(NameType.TERRITORY, "001", false)); + Map> countryData = + territoryData.get(getName(NameType.TERRITORY, territoryCode, false)); + + tablePrinter + .addRow() + .addCell(getFirstPrimaryWeight(territoryName)) + .addCell(territoryName) + .addCell(territoryCode) + .addCell(population) + .addCell(gdp) + .addCell(getCurrencyNames(territoryCode)); + + addOtherCountryData(tablePrinter, worldData, countryData); + + tablePrinter + .addCell( + addBug( + 1217, + "bug", + "", + "Fix info for " + territoryName + " (" + territoryCode + ")", + "")) + .finishRow(); + } + String value = tablePrinter.toTable(); + pw2.println(value); + pw2.close(); + } + + static Normalizer2 nfd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE); + + // Do just an approximation for now + + private String getFirstPrimaryWeight(String territoryName) { + char first = territoryName.charAt(0); + String result = nfd.getDecomposition(first); + if (result == null) { + return UTF16.valueOf(first); + } + return UTF16.valueOf(result.codePointAt(0)); + } + + // private String getTerritoryWithLikelyLink(String territoryCode) { + // return "" + territoryCode + + // ""; + // } + + private String getOfficialStatus(String territoryCode, String languageCode) { + PopulationData x = + ShowLanguages.supplementalDataInfo.getLanguageAndTerritoryPopulationData( + languageCode, territoryCode); + if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; + return " {" + + x.getOfficialStatus().toShortString() + + "}"; + } + + private String getRawOfficialStatus(String territoryCode, String languageCode) { + PopulationData x = + ShowLanguages.supplementalDataInfo.getLanguageAndTerritoryPopulationData( + languageCode, territoryCode); + if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; + return x.getOfficialStatus().toString(); + } + + private void addOtherCountryData( + TablePrinter tablePrinter, + Map> worldData, + Map> countryData) { + for (Iterator it2 = territoryTypes.iterator(); it2.hasNext(); ) { + String type = it2.next(); + Set worldResults = worldData.get(type); + Set territoryResults = null; + if (countryData != null) { + territoryResults = countryData.get(type); + } + if (territoryResults == null) { + territoryResults = worldResults; + } + String out = ""; + if (territoryResults != null) { + out = territoryResults + ""; + out = out.substring(1, out.length() - 1); // remove [ and ] + } + tablePrinter.addCell(out); + } + } + + private String getCurrencyNames(String territoryCode) { + Set currencies = territoriesToModernCurrencies.getAll(territoryCode); + if (currencies == null || currencies.size() == 0) return ""; + StringBuilder buffer = new StringBuilder(); + for (String code : currencies) { + if (buffer.length() != 0) buffer.append(",
"); + buffer.append(getName(NameType.CURRENCY, code, false)); + } + return buffer.toString(); + } + + private void addCharSubstitution(String value, String substitute) { + if (substitute.equals(value)) return; + LinkedHashSet already = charSubstitutions.get(value); + if (already == null) charSubstitutions.put(value, already = new LinkedHashSet<>(0)); + already.add(substitute); + Log.logln(hex(value, " ") + "; " + hex(substitute, " ")); + } + + /** */ + // public void showTerritoryInfo() { + // Map territory_parent = new TreeMap(); + // gather("001", territory_parent); + // for (Iterator it = territory_parent.keySet().iterator(); it.hasNext();) { + // String territory = (String) it.next(); + // String parent = (String) territory_parent.get(territory); + // System.out.println(territory + "\t" + + // englishNameGetter.getName(english.TERRITORY_NAME, territory) + "\t" + // + parent + "\t" + englishNameGetter.getName(english.TERRITORY_NAME, + // parent)); + // } + // } + + // private void gather(String item, Map territory_parent) { + // Collection containedByItem = (Collection) group_contains.get(item); + // if (containedByItem == null) + // return; + // for (Iterator it = containedByItem.iterator(); it.hasNext();) { + // String contained = (String) it.next(); + // territory_parent.put(contained, item); + // gather(contained, territory_parent); + // } + // } + + private void addTerritoryInfo(String territoriesList, String type, String info) { + String[] territories = territoriesList.split("\\s+"); + territoryTypes.add(type); + for (int i = 0; i < territories.length; ++i) { + String territory = getName(NameType.TERRITORY, territories[i], false); + Map> s = territoryData.get(territory); + if (s == null) { + territoryData.put(territory, s = new TreeMap<>()); + } + Set ss = s.get(type); + if (ss == null) { + s.put(type, ss = new TreeSet<>()); + } + ss.add(info); + } + } + + public void showCalendarData(PrintWriter pw0) throws IOException { + PrintWriter pw = + new PrintWriter( + new FormattedFileWriter( + null, + "Other Territory Data", + null, + ShowLanguages.SUPPLEMENTAL_INDEX_ANCHORS)); + pw.println(""); + pw.println(""); + for (Iterator it = territoryTypes.iterator(); it.hasNext(); ) { + String header = it.next(); + if (header.equals("calendar")) header = "calendar (+gregorian)"; + pw.println(""); + } + pw.println(""); + + String worldName = getName(NameType.TERRITORY, "001", false); + Map> worldData = territoryData.get(worldName); + for (Iterator it = territoryData.keySet().iterator(); it.hasNext(); ) { + String country = it.next(); + if (country.equals(worldName)) continue; + showCountry(pw, country, country, worldData); + } + showCountry(pw, worldName, "Other", worldData); + pw.println("
Territory" + header + "
"); + pw.close(); + } + + private void showCountry( + PrintWriter pw, + String country, + String countryTitle, + Map> worldData) { + pw.println("" + countryTitle + ""); + Map> data = territoryData.get(country); + for (Iterator it2 = territoryTypes.iterator(); it2.hasNext(); ) { + String type = it2.next(); + String target = "target"; + Set results = data.get(type); + Set worldResults = worldData.get(type); + if (results == null) { + results = worldResults; + target = "target2"; + } else if (results.equals(worldResults)) { + target = "target2"; + } + String out = ""; + if (results != null) { + out = results + ""; + out = out.substring(1, out.length() - 1); // remove [ and ] + } + pw.println("" + out + ""); + } + pw.println(""); + } + + public void showCorrespondances() { + // show correspondances between language and script + Map name_script = new TreeMap<>(); + for (Iterator it = ShowLanguages.sc.getAvailableCodes("script").iterator(); + it.hasNext(); ) { + String script = it.next(); + String name = + ShowLanguages.englishNameGetter.getNameFromTypeEnumCode( + NameType.SCRIPT, script); + if (name == null) name = script; + name_script.put(name, script); + /* + * source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages + * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories + * : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages + * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts + */ } + String delimiter = "\\P{L}+"; + Map name_language = new TreeMap<>(); + for (Iterator it = ShowLanguages.sc.getAvailableCodes("language").iterator(); + it.hasNext(); ) { + String language = it.next(); + String names = + ShowLanguages.englishNameGetter.getNameFromTypeEnumCode( + NameType.LANGUAGE, language); + if (names == null) names = language; + name_language.put(names, language); + } + for (Iterator it = ShowLanguages.sc.getAvailableCodes("language").iterator(); + it.hasNext(); ) { + String language = it.next(); + String names = + ShowLanguages.englishNameGetter.getNameFromTypeEnumCode( + NameType.LANGUAGE, language); + if (names == null) names = language; + String[] words = names.split(delimiter); + if (words.length > 1) { + // System.out.println(names); + } + for (int i = 0; i < words.length; ++i) { + String name = words[i]; + String script = name_script.get(name); + if (script != null) { + Set langSet = (Set) script_languages.asMap().get(script); + if (langSet != null && langSet.contains(language)) System.out.print("*"); + System.out.println( + "\t" + name + " [" + language + "]\t=> " + name + " [" + script + "]"); + } else { + String language2 = name_language.get(name); + if (language2 != null && !language.equals(language2)) { + Set langSet = (Set) language_scripts.get(language); + if (langSet != null) System.out.print("*"); + System.out.print( + "?\tSame script?\t + " + + getName(NameType.LANGUAGE, language, false) + + "\t & " + + getName(NameType.LANGUAGE, language2, false)); + langSet = (Set) language_scripts.get(language2); + if (langSet != null) System.out.print("*"); + System.out.println(); + } + } + } + } + } + + /** + * @throws IOException + */ + public void printCurrency(PrintWriter index) throws IOException { + PrintWriter pw = + new PrintWriter( + new FormattedFileWriter( + null, + "Detailed Territory-Currency Information", + null + // "

The following table shows when currencies were in use in + // different countries. " + + // "See also Decimal Digits and + // Rounding. " + + // "To correct any information here, please file a " + + // addBug(1274, "bug", "", "Currency Bug", + // "") + + // ".

" + , + ShowLanguages.SUPPLEMENTAL_INDEX_ANCHORS)); + String section1 = "Territory to Currency"; + String section2 = "Decimal Digits and Rounding"; + ShowLanguages.showContents(pw, "territory_currency", section1, "format_info", section2); + + pw.println( + "

" + + CldrUtility.getDoubleLinkedText("territory_currency", "1. " + section1) + + "

"); + + // doTitle(pw, "Territory \u2192 Currency"); + pw.println(""); + pw.println( + "" + + "" + + "" + + "" + + "" + + "" + + ""); + + Relation currencyToTerritory = + Relation.of(new HashMap>(), HashSet.class); + Relation modernCurrencyToTerritory = + Relation.of(new HashMap>(), HashSet.class); + + for (Entry nameCode : ShowLanguages.NAME_TO_REGION.entrySet()) { + String name = nameCode.getKey(); + String regionCode = nameCode.getValue(); + if (!StandardCodes.isCountry(regionCode)) { + continue; + } + if (ShowLanguages.sc.isLstregPrivateUse("region", regionCode)) { + continue; + } + Set info = + ShowLanguages.supplementalDataInfo.getCurrencyDateInfo(regionCode); + + int infoSize = 1; + if (info != null) { + infoSize = info.size(); + } + pw.println( + "" + + "" + + ""); + if (info == null) { + pw.println( + "" + + "" + + "" + + "" + + ""); + continue; + } + boolean first = true; + for (CurrencyDateInfo infoItem : info) { + Date endData = infoItem.getEnd(); + if (endData.equals(CurrencyDateInfo.END_OF_TIME)) { + modernCurrencyToTerritory.put( + infoItem.getCurrency(), getTerritoryName(regionCode)); + } else { + currencyToTerritory.put(infoItem.getCurrency(), getTerritoryName(regionCode)); + } + if (first) first = false; + else pw.println(""); + pw.println( + "" + + "" + + "" + + "" + + ""); + } + } + // doFooter(pw); + // pw.close(); + // pw = new PrintWriter(new FormattedFileWriter(index, "Currency Format Info", null)); + pw.write("
TerritoryCodeFromToCurrencyName
" + + name + + "" + + CldrUtility.getDoubleLinkedText(regionCode) + + "" + + "na" + + "" + + "na" + + "" + + "na" + + "" + + "na" + + "
" + + CurrencyDateInfo.formatDate(infoItem.getStart()) + + "" + + CurrencyDateInfo.formatDate(endData) + + "" + + infoItem.getCurrency() + + "" + + ShowLanguages.englishNameGetter.getNameFromTypeEnumCode( + NameType.CURRENCY, infoItem.getCurrency()) + + "
"); + + pw.println( + "

" + + CldrUtility.getDoubleLinkedText("format_info", "2. " + section2) + + "

"); + + pw.write( + "

This table shows the number of digits used for each currency, " + + " and the countries where it is or was in use. " + + "Countries where the currency is in current use are bolded. " + + "If the currency uses ‘nickel rounding’ in transactions, the digits are followed by ‘(5)’. " + + "Where the values are different in a cash context, that is shown in a second column." + + "

"); + pw.write("
"); + + // doTitle(pw, "Currency Format Info"); + // + + pw.println( + "" + + "" + + "" + + "" + + "" + + "" + + ""); + Set currencyList = new TreeSet(ShowLanguages.col); + currencyList.addAll(currency_fractions.keySet()); + currencyList.addAll(currency_territory.keySet()); + + for (Entry nameCode : ShowLanguages.NAME_TO_CURRENCY.entrySet()) { + // String name = nameCode.getKey(); + String currency = nameCode.getValue(); + CurrencyNumberInfo info = + ShowLanguages.supplementalDataInfo.getCurrencyNumberInfo(currency); + Set territories = currencyToTerritory.get(currency); + Set modernTerritories = modernCurrencyToTerritory.get(currency); + + // String fractions = (String) currency_fractions.get(currency); + // if (fractions == null) + // fractions = defaultDigits; + // Set territories = (Set) currency_territory.get(currency); + pw.print( + "" + + "" + + "" + + "" + + "" + + ""); + } + pw.println("
NameCurrencyDigitsCash DigitsCountries
" + + TransliteratorUtilities.toHTML.transform( + ShowLanguages.englishNameGetter.getNameFromTypeEnumCode( + NameType.CURRENCY, currency)) + + "" + + CldrUtility.getDoubleLinkedText(currency) + + "" + + info.getDigits() + + (info.getRounding() == 0 ? "" : " (" + info.getRounding() + ")") + + "" + + (info.cashDigits == info.getDigits() + && info.cashRounding == info.getRounding() + ? "" + : (info.cashDigits + + (info.cashRounding == 0 + ? "" + : " (" + info.cashRounding + ")"))) + + ""); + boolean first = true; + boolean needBreak = false; + if (modernTerritories != null) { + needBreak = true; + for (String territory : modernTerritories) { + if (first) first = false; + else pw.print(", "); + pw.print("" + territory + ""); + } + } + // boolean haveBreak = true; + if (territories != null) { + for (String territory : territories) { + if (first) first = false; + else if (!needBreak) pw.print(", "); + else { + pw.print(",
"); + needBreak = false; + } + pw.print(territory); + } + } + pw.println("
"); + pw.close(); + // doFooter(pw); + + // if (false) { + // doTitle(pw, "Territories Versus Currencies"); + // pw.println("Territories Without CurrenciesCurrencies Without + // Territories"); + // pw.println(""); + // Set territoriesWithoutCurrencies = new TreeSet(); + // territoriesWithoutCurrencies.addAll(sc.getGoodAvailableCodes("territory")); + // territoriesWithoutCurrencies.removeAll(territoriesWithCurrencies); + // territoriesWithoutCurrencies.removeAll(group_contains.keySet()); + // boolean first = true; + // for (Iterator it = territoriesWithoutCurrencies.iterator(); it.hasNext();) { + // if (first) first = false; + // else pw.print(", "); + // pw.print(englishNameGetter.getName(NameType.TERRITORY, it.next().toString(), + // false)); + // } + // pw.println(""); + // Set currenciesWithoutTerritories = new TreeSet(); + // currenciesWithoutTerritories.addAll(sc.getGoodAvailableCodes("currency")); + // currenciesWithoutTerritories.removeAll(currenciesWithTerritories); + // first = true; + // for (Iterator it = currenciesWithoutTerritories.iterator(); it.hasNext();) { + // if (first) first = false; + // else pw.print(", "); + // pw.print(englishNameGetter.getName(NameType.CURRENCY, it.next().toString(), + // false)); + // } + // pw.println(""); + // doFooter(pw); + // } + } + + private String getTerritoryName(String territory) { + String name; + name = + ShowLanguages.englishNameGetter.getNameFromTypeEnumCode( + NameType.TERRITORY, territory); + if (name == null) { + name = ShowLanguages.sc.getData("territory", territory); + } + if (name != null) { + return TransliteratorUtilities.toHTML.transform(name) + " (" + territory + ")"; + } else { + return territory; + } + } + + /** + * @throws IOException + */ + public void printAliases(PrintWriter index) throws IOException { + PrintWriter pw = + new PrintWriter( + new FormattedFileWriter( + null, "Aliases", null, ShowLanguages.SUPPLEMENTAL_INDEX_ANCHORS)); + + // doTitle(pw, "Aliases"); + pw.println(""); + pw.println( + "" + + "" + + "" + + ""); + for (Iterator it = aliases.iterator(); it.hasNext(); ) { + String[] items = it.next(); + pw.println( + "" + + "" + + "" + + ""); + } + // doFooter(pw); + pw.println("
" + + "Type" + + "" + + "Code" + + "" + + "Reason" + + "" + + "Substitute (if available)" + + "
" + + items[0] + + "" + + CldrUtility.getDoubleLinkedText(items[1]) + + "" + + items[3] + + "" + + items[2] + + "
"); + pw.close(); + } + + // deprecatedItems + // public void printDeprecatedItems(PrintWriter pw) { + // doTitle(pw, "Deprecated Items"); + // pw.print("TypeElementsAttributesValues"); + // for (Iterator it = deprecatedItems.iterator(); it.hasNext();) { + // Map source = (Map)it.next(); + // Object item; + // pw.print(""); + // pw.print("" + ((item = source.get("type")) != null ? item : "any") + // + ""); + // pw.print("" + ((item = source.get("elements")) != null ? item : + // "any") + ""); + // pw.print("" + ((item = source.get("attributes")) != null ? item : + // "any") + ""); + // pw.print("" + ((item = source.get("values")) != null ? item : + // "any") + ""); + // pw.print(""); + // } + // doFooter(pw); + // } + + public void printWindows_Tzid(PrintWriter index) throws IOException { + Map>> zoneMapping = + ShowLanguages.supplementalDataInfo.getTypeToZoneToRegionToZone(); + PrintWriter pw = + new PrintWriter( + new FormattedFileWriter( + null, + "Zone \u2192 Tzid", + null, + ShowLanguages.SUPPLEMENTAL_INDEX_ANCHORS)); + for (Entry>> typeAndZoneToRegionToZone : + zoneMapping.entrySet()) { + String type = typeAndZoneToRegionToZone.getKey(); + Map> zoneToRegionToZone = + typeAndZoneToRegionToZone.getValue(); + pw.println("

Mapping for: " + type + "


"); + // doTitle(pw, "Windows \u2192 Tzid"); + pw.println(""); + pw.println( + ""); + + for (Entry> zoneAndregionToZone : + zoneToRegionToZone.entrySet()) { + String source = zoneAndregionToZone.getKey(); + Map regionToZone = zoneAndregionToZone.getValue(); + for (Entry regionAndZone : regionToZone.entrySet()) { + String region = regionAndZone.getKey(); + String target = regionAndZone.getValue(); + if (region == null) region = "any"; + pw.println( + ""); + } + } + // doFooter(pw); + pw.println("
" + + type + + "" + + "Region" + + "" + + "TZID" + + "
" + + source + + "" + + region + + "" + + target + + "
"); + } + pw.close(); + } + + // + + public void printCharacters(PrintWriter index) throws IOException { + String title = "Character Fallback Substitutions"; + + PrintWriter pw = + new PrintWriter( + new FormattedFileWriter( + null, title, null, ShowLanguages.SUPPLEMENTAL_INDEX_ANCHORS)); + // doTitle(pw, title); + pw.println(""); + + pw.println( + ""); + UnicodeSet chars = new UnicodeSet("[:NFKC_QuickCheck=N:]"); + for (com.ibm.icu.text.UnicodeSetIterator it = + new com.ibm.icu.text.UnicodeSetIterator(chars); + it.next(); ) { + String value = it.getString(); + addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFC)); + addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFKC)); + } + int[] counts = new int[4]; + for (Iterator it = charSubstitutions.keySet().iterator(); it.hasNext(); ) { + String value = it.next(); + LinkedHashSet substitutes = charSubstitutions.get(value); + String nfc = Normalizer.normalize(value, Normalizer.NFC); + String nfkc = Normalizer.normalize(value, Normalizer.NFKC); + + String sourceTag = "" + + (!first + ? "" + : sourceTag + + hex(value, ", ") + + "" + + sourceTag + + TransliteratorUtilities.toHTML.transliterate( + value) + + "" + + sourceTag + + UCharacter.getName(value, ", ") + + "") + + targetTag + + type + + "" + + targetTag + + hex(substitute, ", ") + + "" + + targetTag + + TransliteratorUtilities.toHTML.transliterate(substitute) + + "" + + targetTag + + UCharacter.getName(substitute, ", ") + + ""); + first = false; + } + } + // doFooter(pw); + pw.println("
Substitute for character (if not in repertoire)The following (in priority order, first string that is in repertoire)
"; + if (substitutes.size() > 1) { + sourceTag = ""; + } + boolean first = true; + for (Iterator it2 = substitutes.iterator(); it2.hasNext(); ) { + String substitute = it2.next(); + String type = "Explicit"; + String targetTag = ""; + if (substitute.equals(nfc)) { + type = "NFC"; + targetTag = ""; + counts[2]++; + } else if (substitute.equals(nfkc)) { + type = "NFKC"; + targetTag = ""; + counts[3]++; + } else { + counts[0]++; + } + pw.println( + "
"); + + pw.close(); + for (int i = 0; i < counts.length; ++i) { + System.out.println("Count\t" + i + "\t" + counts[i]); + } + } + + public static String hex(String s, String separator) { + StringBuffer result = new StringBuffer(); + int cp; + for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { + cp = UTF16.charAt(s, i); + if (i != 0) result.append(separator); + result.append(com.ibm.icu.impl.Utility.hex(cp)); + } + return result.toString(); + } + + /** */ + // private PrintWriter doTitle(PrintWriter pw, String title) { + // //String anchor = FileUtilities.anchorize(title); + // pw.println("
"); + // //anchors.put(title, anchor); + // //PrintWriter result = null; + // //return result; + // } + + // private void doFooter(PrintWriter pw) { + // pw.println("
"); + // } + public void printContains2( + PrintWriter pw, String lead, String start, int depth, boolean isFirst) { + String name = depth == 4 ? start : getName(NameType.TERRITORY, start, false); + if (!isFirst) pw.print(lead); + int count = getTotalContainedItems(start, depth); + pw.print( + "" + + name + + ""); // colSpan='" + (5 - + // depth) + "' + if (depth == 4) pw.println(""); + Collection contains = getContainedCollection(start, depth); + if (contains != null) { + Collection contains2 = new TreeSet(territoryNameComparator); + contains2.addAll(contains); + boolean first = true; + for (Iterator it = contains2.iterator(); it.hasNext(); ) { + String item = it.next(); + printContains2(pw, lead, item, depth + 1, first); // + " " + first = false; + } + } + } + + private int getTotalContainedItems(String start, int depth) { + Collection c = getContainedCollection(start, depth); + if (c == null) return 1; + int sum = 0; + for (Iterator it = c.iterator(); it.hasNext(); ) { + sum += getTotalContainedItems(it.next(), depth + 1); + } + return sum; + } + + /** */ + private Collection getContainedCollection(String start, int depth) { + Collection contains = + ShowLanguages.supplementalDataInfo.getContainmentCore().get(start); + if (contains == null) { + contains = ShowLanguages.sc.getCountryToZoneSet().get(start); + if (contains == null && depth == 3) { + contains = new TreeSet<>(); + if (start.compareTo("A") >= 0) { + contains.add("MISSING TZID"); + } else { + contains.add("Not yet ISO code"); + } + } + } + return contains; + } + + private String getName(NameType nameType, String oldcode, boolean codeFirst) { + if (oldcode.contains(" ")) { + String[] result = oldcode.split("\\s+"); + for (int i = 0; i < result.length; ++i) { + result[i] = getName(nameType, result[i], codeFirst); + } + return CldrUtility.join(Arrays.asList(result), ", "); + } else { + int pos = oldcode.indexOf('*'); + String code = pos < 0 ? oldcode : oldcode.substring(0, pos); + String ename = ShowLanguages.englishNameGetter.getNameFromTypeEnumCode(nameType, code); + String nameString = ename == null ? code : ename; + return nameString.equals(oldcode) + ? nameString + : codeFirst + ? "[" + oldcode + "]" + "\t" + nameString + : nameString + "\t" + "[" + oldcode + "]"; + } + } + + private String getName(String locale, boolean codeFirst) { + String ename = getLanguageName(locale); + return codeFirst + ? "[" + locale + "]\t" + (ename == null ? locale : ename) + : (ename == null ? locale : ename) + "\t[" + locale + "]"; + } + + Comparator territoryNameComparator = + new Comparator() { + @Override + public int compare(Object o1, Object o2) { + return ShowLanguages.col.compare( + getName(NameType.TERRITORY, (String) o1, false), + getName(NameType.TERRITORY, (String) o2, false)); + } + }; + + static String[] stringArrayPattern = new String[0]; + static String[][] string2ArrayPattern = new String[0][]; + + public static Map territoryAliases = new HashMap<>(); + + public void printContains(PrintWriter index) throws IOException { + String title = "Territory Containment (UN M.49)"; + + PrintWriter pw = + new PrintWriter( + new FormattedFileWriter( + null, title, null, ShowLanguages.SUPPLEMENTAL_INDEX_ANCHORS)); + // doTitle(pw, title); + List rows = new ArrayList<>(); + printContains3("001", rows, new ArrayList()); + TablePrinter tablePrinter = + new TablePrinter() + .addColumn("World", "class='source'", null, "class='z0'", true) + .setSortPriority(0) + .addColumn("Continent", "class='source'", null, "class='z1'", true) + .setSortPriority(1) + .addColumn("Subcontinent", "class='source'", null, "class='z2'", true) + .setSortPriority(2) + .addColumn( + "Country (Territory)", "class='source'", null, "class='z3'", true) + .setSortPriority(3) + .addColumn("Time Zone", "class='source'", null, "class='z4'", true) + .setSortPriority(4); + String[][] flatData = rows.toArray(string2ArrayPattern); + pw.println(tablePrinter.addRows(flatData).toTable()); + + showSubtable(pw, ContainmentStyle.grouping, "Groupings", "Grouping", "Contained Regions"); + showSubtable( + pw, ContainmentStyle.deprecated, "Deprecated", "Container", "Deprecated Region"); + + // Relation deprecated = supplementalDataInfo + // .getTerritoryToContained(ContainmentStyle.deprecated); + // + // for (String region : deprecated.keySet()) { + // nameToContainers.add(region); + // } + // pw.println("

Groupings and Deprecated Regions

"); + // for (String region : nameToContainers) { + // String name = getName(NameType.TERRITORY, region, false); + // Set dep = deprecated.get(region); + // Set gro = grouping.get(region); + // Iterator depIt = (dep == null ? Collections.EMPTY_SET : + // dep).iterator(); + // Iterator groIt = (gro == null ? Collections.EMPTY_SET : + // gro).iterator(); + // while (depIt.hasNext() || groIt.hasNext()) { + // String dep1 = depIt.hasNext() ? getName(NameType.TERRITORY, + // depIt.next(), false) : ""; + // String gro1 = groIt.hasNext() ? getName(NameType.TERRITORY, + // groIt.next(), false) : ""; + // tablePrinter2.addRow() + // .addCell(name) + // .addCell(gro1) + // .addCell(dep1) + // .finishRow(); + // } + // } + // pw.println(tablePrinter2.toTable()); + // pw.println("

Other Groupings

"); + // for (Entry> regionContained : grouping.keyValuesSet()) + // { + // showContainers(pw, regionContained); + // } + // + // pw.println("

Deprecated Codes

"); + // for (Entry> regionContained : + // deprecated.keyValuesSet()) { + // showContainers(pw, regionContained); + // } + pw.close(); + } + + public void showSubtable( + PrintWriter pw, + ContainmentStyle containmentStyle, + String title, + String containerTitle, + String containeeTitle) { + pw.println("

" + title + "

"); + TablePrinter tablePrinter2 = + new TablePrinter() + .addColumn(containerTitle, "class='source'", null, "class='z0'", true) + .setSortPriority(0) + .addColumn(containeeTitle, "class='source'", null, "class='z4'", true) + .setSortPriority(1); + + Relation grouping = + ShowLanguages.supplementalDataInfo.getTerritoryToContained(containmentStyle); + + for (Entry containerRegion : grouping.keyValueSet()) { + String container = getName(NameType.TERRITORY, containerRegion.getKey(), false); + String containee = getName(NameType.TERRITORY, containerRegion.getValue(), false); + tablePrinter2.addRow().addCell(container).addCell(containee).finishRow(); + } + pw.println(tablePrinter2.toTable()); + } + + public void showContainers(PrintWriter pw, Entry> regionContained) { + String region = regionContained.getKey(); + Set contained = regionContained.getValue(); + pw.println("
  • " + getName(NameType.TERRITORY, region, false) + "
      "); + for (String sub : contained) { + pw.println("
    • " + getName(NameType.TERRITORY, sub, false) + "
    • "); + } + pw.println("
"); + } + + private void printContains3(String start, List rows, ArrayList currentRow) { + int len = currentRow.size(); + if (len > 3) { + return; // skip long items + } + currentRow.add(getName(NameType.TERRITORY, start, false)); + // Collection contains = (Collection) group_contains.get(start); + Collection contains = + ShowLanguages.supplementalDataInfo.getContainmentCore().get(start); + if (contains == null) { + contains = ShowLanguages.sc.getCountryToZoneSet().get(start); + currentRow.add(""); + if (contains == null) { + currentRow.set(len + 1, "???"); + rows.add(currentRow.toArray(stringArrayPattern)); + } else { + for (String item : contains) { + currentRow.set(len + 1, item); + rows.add(currentRow.toArray(stringArrayPattern)); + } + } + currentRow.remove(len + 1); + } else { + for (String item : contains) { + if (territoryAliases.keySet().contains(item)) { + continue; + } + printContains3(item, rows, currentRow); + } + } + currentRow.remove(len); + } +} diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowLanguages.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowLanguages.java index 8cede73a4f0..7aa10eff56a 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowLanguages.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowLanguages.java @@ -6,20 +6,10 @@ */ package org.unicode.cldr.tool; -import com.google.common.base.Joiner; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Multimap; -import com.google.common.collect.Multimaps; -import com.google.common.collect.Sets; -import com.google.common.collect.TreeMultimap; import com.ibm.icu.impl.Relation; -import com.ibm.icu.impl.Row.R2; -import com.ibm.icu.impl.Row.R4; -import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.Collator; -import com.ibm.icu.text.Normalizer; -import com.ibm.icu.text.Normalizer2; -import com.ibm.icu.text.NumberFormat; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.util.ICUUncheckedIOException; @@ -27,10 +17,7 @@ import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Date; @@ -38,11 +25,8 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; @@ -50,14 +34,11 @@ import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.draft.ScriptMetadata; import org.unicode.cldr.draft.ScriptMetadata.Info; -import org.unicode.cldr.util.ArrayComparator; import org.unicode.cldr.util.CLDRConfig; import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CLDRFile.WinningChoice; -import org.unicode.cldr.util.CLDRLocale; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CLDRTool; -import org.unicode.cldr.util.CLDRURLS; import org.unicode.cldr.util.CldrUtility; import org.unicode.cldr.util.Factory; import org.unicode.cldr.util.FileCopier; @@ -65,28 +46,21 @@ import org.unicode.cldr.util.Iso639Data.Scope; import org.unicode.cldr.util.Iso639Data.Type; import org.unicode.cldr.util.LanguageTagParser; -import org.unicode.cldr.util.Level; -import org.unicode.cldr.util.Log; import org.unicode.cldr.util.NameGetter; import org.unicode.cldr.util.NameType; -import org.unicode.cldr.util.Organization; import org.unicode.cldr.util.StandardCodes; import org.unicode.cldr.util.StandardCodes.CodeType; import org.unicode.cldr.util.SupplementalDataInfo; import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; -import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; -import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; -import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; -import org.unicode.cldr.util.TransliteratorUtilities; import org.unicode.cldr.util.XPathParts; @CLDRTool(alias = "showlanguages", description = "Generate Language info charts") public class ShowLanguages { private static final boolean SHOW_NATIVE = true; - private static final boolean SHOW_SKIPPED = false; - private static int skipped = 0; + static final boolean SHOW_SKIPPED = false; + static int skipped = 0; static Comparator col = new org.unicode.cldr.util.MultiComparator( @@ -98,7 +72,7 @@ public class ShowLanguages { static Factory cldrFactory = CLDRConfig.getInstance().getCldrFactory(); // .make(CLDRPaths.MAIN_DIRECTORY, ".*"); static CLDRFile english = CLDRConfig.getInstance().getEnglish(); - private static NameGetter englishNameGetter = english.nameGetter(); + static NameGetter englishNameGetter = english.nameGetter(); public static void main(String[] args) throws IOException { System.out.println("Writing into " + FormattedFileWriter.CHART_TARGET_DIR); @@ -781,2083 +755,6 @@ private static void addLanguageScriptCells( } } - // TODO This is old code that read supplemental data. Should be replaced by using - // SupplementalDataInfo. - // https://unicode-org.atlassian.net/browse/CLDR-15673 - - static class LanguageInfo { - private static final Map> localeAliasInfo = new TreeMap<>(); - - Multimap language_scripts = TreeMultimap.create(); - - Multimap language_territories = TreeMultimap.create(); - - List> deprecatedItems = new ArrayList<>(); - - Multimap territory_languages; - - Multimap script_languages; - - // Map group_contains = new TreeMap(); - - Set aliases = - new TreeSet( - new ArrayComparator(new Comparator[] {new UTF16.StringComparator(), col})); - - Comparator col3 = new ArrayComparator(new Comparator[] {col, col, col}); - - Map currency_fractions = new TreeMap(col); - - Map currency_territory = new TreeMap(col); - - Map territory_currency = new TreeMap(col); - - Set territoriesWithCurrencies = new TreeSet<>(); - - Set currenciesWithTerritories = new TreeSet<>(); - - Map>> territoryData = new TreeMap<>(); - - Set territoryTypes = new TreeSet<>(); - - Map> charSubstitutions = - new TreeMap>(col); - - String defaultDigits = null; - - Map> territoryLanguageData = new TreeMap<>(); - - private Relation territoriesToModernCurrencies = - Relation.of(new TreeMap>(), TreeSet.class, null); - - public LanguageInfo(Factory cldrFactory) throws IOException { - CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); - for (String path : supp) { - String fullPath = supp.getFullXPath(path); - if (fullPath == null) { - supp.getFullXPath(path); - } - XPathParts parts = XPathParts.getFrozenInstance(fullPath); - - // - if (path.indexOf("/zoneItem") >= 0) { - Map attributes = parts.getAttributes(parts.size() - 1); - String type = attributes.get("type"); - // String territory = attributes.get("territory"); - String aliasAttributes = attributes.get("aliases"); - if (aliasAttributes != null) { - String[] aliasesList = aliasAttributes.split("\\s+"); - - for (int i = 0; i < aliasesList.length; ++i) { - String alias = aliasesList[i]; - aliases.add(new String[] {"timezone", alias, type}); - } - } - // TODO territory, multizone - continue; - } - - if (path.indexOf("/currencyData") >= 0) { - if (path.indexOf("/fractions") >= 0) { - // - String element = parts.getElement(parts.size() - 1); - if (!element.equals("info")) - throw new IllegalArgumentException( - "Unexpected fractions element: " + element); - Map attributes = parts.getAttributes(parts.size() - 1); - String iso4217 = attributes.get("iso4217"); - String digits = attributes.get("digits"); - String rounding = attributes.get("rounding"); - digits = digits + (rounding.equals("0") ? "" : " (" + rounding + ")"); - if (iso4217.equals("DEFAULT")) defaultDigits = digits; - else - currency_fractions.put( - getName(NameType.CURRENCY, iso4217, false), digits); - continue; - } - // - // - if (path.indexOf("/region") >= 0) { - Map attributes = parts.getAttributes(parts.size() - 2); - String iso3166 = attributes.get("iso3166"); - attributes = parts.getAttributes(parts.size() - 1); - String iso4217 = attributes.get("iso4217"); - String to = attributes.get("to"); - if (to == null) to = "\u221E"; - String from = attributes.get("from"); - if (from == null) from = "-\u221E"; - String countryName = getName(NameType.TERRITORY, iso3166, false); - String currencyName = getName(NameType.CURRENCY, iso4217, false); - Set info = territory_currency.get(countryName); - if (info == null) - territory_currency.put(countryName, info = new TreeSet(col3)); - info.add(new String[] {from, to, currencyName}); - info = currency_territory.get(currencyName); - if (info == null) - currency_territory.put(currencyName, info = new TreeSet(col)); - territoriesWithCurrencies.add(iso3166); - currenciesWithTerritories.add(iso4217); - if (to.equals("\u221E") || to.compareTo("2006") > 0) { - territoriesToModernCurrencies.put(iso3166, iso4217); - info.add("" + countryName + ""); - - } else { - info.add("" + countryName + ""); - } - continue; - } - } - - if (path.indexOf("/languageData") >= 0) { - Map attributes = parts.findAttributes("language"); - String language = attributes.get("type"); - String alt = attributes.get("alt"); - addTokens(language, attributes.get("scripts"), " ", language_scripts); - // mark the territories - if (alt == null) - ; // nothing - else if ("secondary".equals(alt)) language += "*"; - else language += "*" + alt; - // - addTokens(language, attributes.get("territories"), " ", language_territories); - continue; - } - - if (path.indexOf("/deprecatedItems") >= 0) { - deprecatedItems.add(parts.findAttributes("deprecatedItems")); - continue; - } - if (path.indexOf("/calendarPreferenceData/calendarPreference") >= 0) { - Map attributes = parts.findAttributes("calendarPreference"); - if (attributes == null) { - System.err.println( - "Err: on path " - + fullPath - + " , no attributes on 'calendarPreference'. Probably, this tool is out of date."); - } else { - String ordering = attributes.get("ordering"); - String territories = attributes.get("territories"); - if (territories == null) { - System.err.println( - "Err: on path " - + fullPath - + ", missing territories. Probably, this tool is out of date."); - } else if (ordering == null) { - System.err.println( - "Err: on path " - + fullPath - + ", missing ordering. Probably, this tool is out of date."); - } else { - addTerritoryInfo(territories, "Preferred Calendar", ordering); - } - } - } - if (path.indexOf("/weekData") >= 0 || path.indexOf("measurementData") >= 0) { - String element = parts.getElement(parts.size() - 1); - Map attributes = parts.getAttributes(parts.size() - 1); - // later, make this a table - String key = "count"; - String display = "Days in week (min)"; - boolean useTerritory = true; - switch (element) { - case "firstDay": - key = "day"; - display = "First day of week"; - break; - case "weekendStart": - key = "day"; - display = "First day of weekend"; - break; - case "weekendEnd": - key = "day"; - display = "Last day of weekend"; - break; - case "measurementSystem": - // - key = "type"; - display = "Meas. system"; - break; - case "paperSize": - key = "type"; - display = "Paper Size"; - break; - case "weekOfPreference": - useTerritory = false; - break; - } - if (useTerritory) { - String type = attributes.get(key); - String territories = attributes.get("territories"); - addTerritoryInfo(territories, display, type); - } - } - if (path.indexOf("/generation") >= 0 || path.indexOf("/version") >= 0) continue; - skipped++; - if (SHOW_SKIPPED) { - System.out.println("Skipped Element: " + path); - } - } - - for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) { - for (String language : - supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( - territory)) { - language_territories.put(language, territory); - } - } - territory_languages = Multimaps.invertFrom(language_territories, TreeMultimap.create()); - script_languages = Multimaps.invertFrom(language_scripts, TreeMultimap.create()); - - // now get some metadata - localeAliasInfo.put("language", new TreeMap()); - localeAliasInfo.put("script", new TreeMap()); - localeAliasInfo.put("territory", new TreeMap()); - localeAliasInfo.put("variant", new TreeMap()); - localeAliasInfo.put("zone", new TreeMap()); - localeAliasInfo.put("subdivision", new TreeMap()); - localeAliasInfo.put("unit", new TreeMap()); - localeAliasInfo.put("usage", new TreeMap()); - - // localeAliasInfo.get("language").put("nb", "no"); - localeAliasInfo.get("language").put("zh_CN", "zh_Hans_CN"); - localeAliasInfo.get("language").put("zh_SG", "zh_Hans_SG"); - localeAliasInfo.get("language").put("zh_TW", "zh_Hant_TW"); - localeAliasInfo.get("language").put("zh_MO", "zh_Hant_MO"); - localeAliasInfo.get("language").put("zh_HK", "zh_Hant_HK"); - - // CLDRFile supp2 = cldrFactory.make(CLDRFile.SUPPLEMENTAL_METADATA, false); - Map, String>>> localeAliasInfo2 = - supplementalDataInfo.getLocaleAliasInfo(); - for (Entry, String>>> entry1 : - localeAliasInfo2.entrySet()) { - String element = entry1.getKey(); - for (Entry, String>> entry2 : - entry1.getValue().entrySet()) { - String type = entry2.getKey(); - R2, String> replacementReason = entry2.getValue(); - List replacementList = replacementReason.get0(); - String replacement = - replacementList == null ? null : Joiner.on(" ").join(replacementList); - String reason = replacementReason.get1(); - if (element.equals("timezone")) { - element = "zone"; - } - try { - localeAliasInfo - .get(element) - .put(type, replacement == null ? "?" : replacement); - } catch (Exception e) { - // TODO Auto-generated catch block - throw new IllegalArgumentException( - "Can't find alias data for '" + element + "'", e); - } - - String name = ""; - if (replacement == null) { - name = "(none)"; - } else if (element.equals("language")) { - name = getName(replacement, false); - } else if (element.equals("zone")) { - element = "timezone"; - name = replacement + "*"; - } else { - NameType nameType = NameType.typeNameToCode(element); - if (nameType != NameType.NONE) { - name = getName(nameType, replacement, false); - } else { - name = "*" + replacement; - } - } - if (element.equals("territory")) { - territoryAliases.put(type, name); - aliases.add( - new String[] { - element, getName(NameType.TERRITORY, type, false), name, reason - }); - } else { - aliases.add(new String[] {element, type, name, reason}); - } - continue; - } - } - Log.setLog(CLDRPaths.CHART_DIRECTORY + "supplemental/", "characterLog.txt"); - Log.close(); - } - - public void printLikelySubtags(PrintWriter index) throws IOException { - - PrintWriter pw = - new PrintWriter( - new FormattedFileWriter( - null, "Likely Subtags", null, SUPPLEMENTAL_INDEX_ANCHORS)); - - TablePrinter tablePrinter = - new TablePrinter() - .addColumn( - "Source Lang", "class='source'", null, "class='source'", true) - .setSortPriority(1) - .setSpanRows(false) - .addColumn( - "Source Script", "class='source'", null, "class='source'", true) - .setSortPriority(0) - .setSpanRows(false) - .setBreakSpans(true) - .addColumn( - "Source Region", "class='source'", null, "class='source'", true) - .setSortPriority(2) - .setSpanRows(false) - .addColumn( - "Target Lang", "class='target'", null, "class='target'", true) - .setSortPriority(3) - .setBreakSpans(true) - .addColumn( - "Target Script", "class='target'", null, "class='target'", true) - .setSortPriority(4) - .addColumn( - "Target Region", "class='target'", null, "class='target'", true) - .setSortPriority(5) - .addColumn( - "Source ID", - "class='source'", - CldrUtility.getDoubleLinkMsg(), - "class='source'", - true) - .addColumn("Target ID", "class='target'", null, "class='target'", true); - Map subtags = supplementalDataInfo.getLikelySubtags(); - LanguageTagParser sourceParsed = new LanguageTagParser(); - LanguageTagParser targetParsed = new LanguageTagParser(); - for (String source : subtags.keySet()) { - String target = subtags.get(source); - sourceParsed.set(source); - targetParsed.set(target); - tablePrinter - .addRow() - .addCell(getName(NameType.LANGUAGE, sourceParsed.getLanguage())) - .addCell(getName(NameType.SCRIPT, sourceParsed.getScript())) - .addCell(getName(NameType.TERRITORY, sourceParsed.getRegion())) - .addCell(getName(NameType.LANGUAGE, targetParsed.getLanguage())) - .addCell(getName(NameType.SCRIPT, targetParsed.getScript())) - .addCell(getName(NameType.TERRITORY, targetParsed.getRegion())) - .addCell(source) - .addCell(target) - .finishRow(); - } - pw.println(tablePrinter.toTable()); - pw.close(); - } - - static class LanguageData extends R4 { - public LanguageData(Double a, Double b, Double c, String d) { - super(a, b, c, d); - } - } - - private String getName(final NameType nameType, final String value) { - if (value == null || value.equals("") || value.equals("und")) { - return "\u00A0"; - } - String result = englishNameGetter.getNameFromTypeEnumCode(nameType, value); - if (result == null) { - result = value; - } - return result; - } - - static final Comparator INVERSE_COMPARABLE = - new Comparator() { - @Override - public int compare(Object o1, Object o2) { - return ((Comparable) o2).compareTo(o1); - } - }; - - // http://www.faqs.org/rfcs/rfc2396.html - // delims = "<" | ">" | "#" | "%" | <"> - // "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`" - // Within a query component, the characters ";", "/", "?", ":", "@", - // "&", "=", "+", ",", and "$" are reserved. - static final UnicodeSet ESCAPED_URI_QUERY = - new UnicodeSet( - "[\\u0000-\\u0020\\u007F <>#%\"\\{}|\\\\\\^\\[\\]`;/?:@\\&=+,$\\u0080-\\U0001FFFF]") - .freeze(); - - private static final int MINIMAL_BIG_VENDOR = 8; - - static { - System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement()); - } - - private String urlEncode(String input) { - try { - byte[] utf8 = input.getBytes("utf-8"); - StringBuffer output = new StringBuffer(); - for (int i = 0; i < utf8.length; ++i) { - int b = utf8[i] & 0xFF; - if (ESCAPED_URI_QUERY.contains(b)) { - output.append('%'); - if (b < 0x10) output.append('0'); - output.append(Integer.toString(b, 16)); - } else { - output.append((char) b); - } - } - return output.toString(); - } catch (UnsupportedEncodingException e) { - throw (IllegalArgumentException) new IllegalArgumentException().initCause(e); - } - } - - private String addBug( - int bugNumber, String text, String from, String subject, String body) { - return "" + text + ""; - } - - private void showLanguageCountryInfo(PrintWriter pw) throws IOException { - FormattedFileWriter ffw = - new FormattedFileWriter( - null, - "Language-Territory Information", - null - // "

The language data is provided for - // localization testing, and is under development for CLDR 1.5. " - // + - // "To add a new territory for a language, see the add new links - // below. " + - // "For more information, see Territory-Language - // Information." - // + - // "

" - , - SUPPLEMENTAL_INDEX_ANCHORS); - PrintWriter pw21 = new PrintWriter(ffw); - PrintWriter pw2 = pw21; - NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); - nf.setGroupingUsed(true); - // NumberFormat percent = new DecimalFormat("000.0%"); - TablePrinter tablePrinter = - new TablePrinter() - // tablePrinter.setSortPriorities(0,5) - .addColumn("L", "class='source'", null, "class='source'", true) - .setSortPriority(0) - .setBreakSpans(true) - .setRepeatHeader(true) - .setHidden(true) - .addColumn("Language", "class='source'", null, "class='source'", true) - .setSortPriority(0) - .setBreakSpans(true) - .addColumn( - "Code", - "class='source'", - CldrUtility.getDoubleLinkMsg(), - "class='source'", - true) - // .addColumn("Report Bug", "class='target'", null, "class='target'", - // false) - .addColumn("Territory", "class='target'", null, "class='target'", true) - .addColumn( - "Code", - "class='target'", - "{0}", - "class='target'", - true) - .addColumn( - "Language Population", - "class='target'", - "{0,number,#,#@@}", - "class='targetRight'", - true) - .setSortPriority(1) - .setSortAscending(false) - // .addColumn("Territory Population", "class='target'", "{0,number,#,##0}", - // "class='targetRight'", true) - // .addColumn("Language Literacy", "class='target'", "{0,number,00.0}%", - // "class='targetRight'", true) - // .addColumn("Territory Literacy", "class='target'", "{0,number,00.0}%", - // "class='targetRight'", true) - // .addColumn("Territory GDP (PPP)", "class='target'", "{0,number,#,##0}", - // "class='targetRight'", true) - ; - TreeSet languages = new TreeSet<>(); - Collection data = new ArrayList<>(); - String msg = "
Please click on each country code"; - - Collection plainData = new ArrayList<>(); - - for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { - // PopulationData territoryData = - // supplementalDataInfo.getPopulationDataForTerritory(territoryCode); - String territoryName = - englishNameGetter.getNameFromTypeEnumCode( - NameType.TERRITORY, territoryCode); - for (String languageCode : - supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( - territoryCode)) { - PopulationData languageData = - supplementalDataInfo.getLanguageAndTerritoryPopulationData( - languageCode, territoryCode); - languages.add(languageCode); - Comparable[] items = - new Comparable[] { - getFirstPrimaryWeight(getLanguageName(languageCode)), - getLanguageName(languageCode), // + getLanguagePluralMessage(msg, - // languageCode), - languageCode, - // bug, - territoryName + getOfficialStatus(territoryCode, languageCode), - territoryCode, - languageData.getPopulation(), - // population, - // languageliteracy, - // territoryLiteracy, - // gdp - }; - Comparable[] plainItems = - new Comparable[] { - getLanguageName(languageCode), // + getLanguagePluralMessage(msg, - // languageCode), - languageCode, - territoryName, - territoryCode, - getRawOfficialStatus(territoryCode, languageCode), - languageData.getPopulation(), - languageData.getLiteratePopulation() - }; - - data.add(items); - plainData.add(plainItems); - } - } - for (String languageCode : languages) { - Comparable[] items = - new Comparable[] { - getFirstPrimaryWeight(getLanguageName(languageCode)), - getLanguageName( - languageCode), // + getLanguagePluralMessage(msg, languageCode), - languageCode, - // bug, - addBug( - 1217, - "add new", - "", - "Add territory to " - + getLanguageName(languageCode) - + " (" - + languageCode - + ")", - ""), - "", - 0.0d, - // 0.0d, - // 0.0d, - // 0.0d, - // gdp - }; - data.add(items); - } - Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); - String value = tablePrinter.addRows(flattened).toTable(); - pw2.println(value); - pw2.close(); - try (PrintWriter pw21plain = - FileUtilities.openUTF8Writer(ffw.getDir(), ffw.getBaseFileName() + ".txt")) { - for (Comparable[] row : plainData) { - pw21plain.println(Joiner.on("\t").join(row)); - } - } - } - - private String getLanguagePluralMessage(String msg, String languageCode) { - String mainLanguageCode = new LanguageTagParser().set(languageCode).getLanguage(); - String messageWithPlurals = - msg - + ", on plurals" - + ", and on likely-subtags"; - return messageWithPlurals; - } - - private String getLanguageName(String languageCode) { - String result = - englishNameGetter.getNameFromIdentifierOptAlt( - languageCode, NameGetter.NameOpt.COMPOUND_ONLY, CLDRFile.SHORT_ALTS); - if (!result.equals(languageCode)) return result; - Set names = Iso639Data.getNames(languageCode); - if (names != null && names.size() != 0) { - return names.iterator().next(); - } - return languageCode; - } - - static final Set TC_Vendors = - Sets.union( - Organization.getTCOrgs(), - // This adds the CLDR org at the end of the list - Set.of(Organization.cldr)); - - private void showCoverageGoals(PrintWriter pw) throws IOException { - try (PrintWriter pw2 = - new PrintWriter( - new FormattedFileWriter( - null, - "Coverage Goals", - null, - SUPPLEMENTAL_INDEX_ANCHORS)); - PrintWriter coverageGoalsTsv = - FileUtilities.openUTF8Writer( - CLDRPaths.CHART_DIRECTORY + "tsv/", "coverage_goals.tsv"); ) { - - TablePrinter tablePrinter = - new TablePrinter() - // tablePrinter.setSortPriorities(0,4) - .addColumn( - "Language", "class='source'", null, "class='source'", false) - .setSortPriority(0) - .setBreakSpans(false) - .addColumn( - "Code", - "class='source'", - "{0}", - "class='source'", - false) - .addColumn( - "D. Votes", - "class='target'", - null, - "class='target'", - false); - - Map> vendordata = sc.getLocaleTypes(); - Set locales = new TreeSet<>(); - Set vendors = new LinkedHashSet<>(); - Set smallVendors = new LinkedHashSet<>(); - - for (Organization organization : TC_Vendors) { - // if (vendor.equals(Organization.java)) continue; - Map data = vendordata.get(organization); - vendors.add(organization); - tablePrinter - .addColumn( - organization.getDisplayName(), - "class='target'", - null, - "class='target'", - false) - .setSpanRows(false); - locales.addAll(data.keySet()); - showTabbedOrgLevels(coverageGoalsTsv, organization, data); - } - - for (Entry> vendorData : vendordata.entrySet()) { - Organization organization = vendorData.getKey(); - if (!TC_Vendors.contains(organization)) { - smallVendors.add(organization); - Map data = vendordata.get(organization); - showTabbedOrgLevels(coverageGoalsTsv, organization, data); - continue; - } - } - - Collection data = new ArrayList<>(); - List list = new ArrayList<>(); - LanguageTagParser ltp = new LanguageTagParser(); - // String alias2 = getAlias("sh_YU"); - - pw2.append("

TC Orgs

"); - - for (String locale : locales) { - list.clear(); - String localeCode = locale.equals("*") ? "und" : locale; - String alias = getAlias(localeCode); - if (!alias.equals(localeCode)) { - throw new IllegalArgumentException( - "Should use canonical form: " + locale + " => " + alias); - } - // String baseLang = ltp.set(localeCode).getLanguage(); - String baseLangName = getLanguageName(localeCode); - list.add("und".equals(localeCode) ? "other" : baseLangName); - list.add(locale); - int defaultVotes = - supplementalDataInfo.getRequiredVotes( - CLDRLocale.getInstance(locale), null); - list.add(String.valueOf(defaultVotes)); - for (Organization vendor : vendors) { - String status = getVendorStatus(locale, vendor, vendordata); - // if (!baseLang.equals(locale) && - // !status.startsWith("<")) { - // String langStatus = getVendorStatus(baseLang, - // vendor, - // vendordata); - // if (!langStatus.equals(status)) { - // status += "*"; - // } - // } - list.add(status); - } - data.add(list.toArray(new String[list.size()])); - } - Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); - String value = tablePrinter.addRows(flattened).toTable(); - pw2.println(value); - - pw2.append("

Others

    "); - - for (Organization vendor2 : smallVendors) { - pw2.append("
  • "); - pw2.append(TransliteratorUtilities.toHTML.transform(vendor2.getDisplayName())) - .append(": "); - boolean first1 = true; - for (Level level : Level.values()) { - boolean first2 = true; - Level other = null; - for (Entry data2 : vendordata.get(vendor2).entrySet()) { - String key = data2.getKey(); - Level level2 = data2.getValue(); - if (level != level2) { - continue; - } - if (key.equals("*")) { - other = level2; - continue; - } - if (first2) { - if (first1) { - first1 = false; - } else { - pw2.append("; "); - } - pw2.append(level2.toString()).append(": "); - first2 = false; - } else { - pw2.append(", "); - } - pw2.append(TransliteratorUtilities.toHTML.transform(key)); - } - if (other != null) { - if (first2) { - if (first1) { - first1 = false; - } else { - pw2.append("; "); - } - pw2.append(level.toString()).append(": "); - first2 = false; - } else { - pw2.append(", "); - } - pw2.append("other"); - } - } - pw2.append("
  • "); - } - pw2.append("
"); - } - } - - public void showTabbedOrgLevels( - PrintWriter coverageGoalsTsv, Organization organization, Map data) { - coverageGoalsTsv.println( - String.format( - "\n#%s\t;\t%s\t;\t%s\t;\t%s\n", - "Org", "Locale", "Level", "Locale Name")); - for (Entry entry : data.entrySet()) { - String locale = entry.getKey(); - Level level = entry.getValue(); - final String name = - locale.equals("*") - ? "ALL" - : englishNameGetter.getNameFromIdentifierOptAlt( - locale, - NameGetter.NameOpt.COMPOUND_ONLY, - CLDRFile.SHORT_ALTS); - coverageGoalsTsv.println( - String.format( - "%s\t;\t%s\t;\t%s\t;\t%s", organization, locale, level, name)); - } - } - - LanguageTagParser lpt2 = new LanguageTagParser(); - - // TODO replace this with standard call. - - private String getAlias(String locale) { - lpt2.set(locale); - locale = lpt2.toString(); // normalize - // String language = lpt2.getLanguage(); - String script = lpt2.getScript(); - String region = lpt2.getRegion(); - // List variants = lpt2.getVariants(); - String temp; - for (String old : localeAliasInfo.get("language").keySet()) { - if (locale.startsWith(old)) { - // the above is a rough check, and will fail with old=moh and locale=mo - if (!locale.equals(old) && !locale.startsWith(old + "_")) { - continue; - } - temp = localeAliasInfo.get("language").get(old); - lpt2.setLanguage(temp.split("\\s+")[0] + locale.substring(old.length())); - break; - } - } - temp = localeAliasInfo.get("script").get(script); - if (temp != null) { - lpt2.setScript(temp.split("\\s+")[0]); - } - temp = localeAliasInfo.get("territory").get(region); - if (temp != null) { - lpt2.setRegion(temp.split("\\s+")[0]); - } - return lpt2.toString(); - } - - private String getVendorStatus( - String locale, - Organization vendor, - Map> vendordata) { - Level statusLevel = vendordata.get(vendor).get(locale); - return statusLevel == null ? "" : statusLevel.toString(); - // String status = statusLevel == null ? null : statusLevel.toString(); - // String curLocale = locale; - // while (status == null) { - // curLocale = LocaleIDParser.getParent(curLocale); - // if ("root".equals(curLocale)) { - // status = " "; - // break; - // } - // statusLevel = vendordata.get(vendor).get(curLocale); - // if (statusLevel != null) { - // status = statusLevel + "†"; - // } - // } - // return status; - } - - private void showCountryLanguageInfo(PrintWriter pw) throws IOException { - PrintWriter pw21 = - new PrintWriter( - new FormattedFileWriter( - null, - "Territory-Language Information", - null, - SUPPLEMENTAL_INDEX_ANCHORS)); - PrintWriter pw2 = pw21; - NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); - nf.setGroupingUsed(true); - // NumberFormat percent = new DecimalFormat("000.0%"); - TablePrinter tablePrinter = - new TablePrinter() - // tablePrinter.setSortPriorities(0,4) - .addColumn("T", "class='source'", null, "class='source'", true) - .setSortPriority(0) - .setBreakSpans(true) - .setRepeatHeader(true) - .setHidden(true) - .addColumn("Territory", "class='source'", null, "class='source'", true) - .setSortPriority(0) - .setBreakSpans(true) - .addColumn( - "Code", - "class='source'", - CldrUtility.getDoubleLinkMsg(), - "class='source'", - true) - .addColumn( - "Terr. Literacy", - "class='target'", - "{0,number,@@}%", - "class='targetRight'", - true); - - tablePrinter - .addColumn("Language", "class='target'", null, "class='target'", false) - .addColumn( - "Code", - "class='target'", - "{0}", - "class='target'", - false) - .addColumn( - "Lang. Pop.", - "class='target'", - "{0,number,#,#@@}", - "class='targetRight'", - true) - .addColumn( - "Pop.%", - "class='target'", "{0,number,@@}%", "class='targetRight'", true) - .setSortAscending(false) - .setSortPriority(1) - .addColumn( - "Literacy%", - "class='target'", "{0,number,@@}%", "class='targetRight'", true) - .addColumn( - "Written%", - "class='target'", "{0,number,@@}%", "class='targetRight'", true) - .addColumn("Report Bug", "class='target'", null, "class='target'", false); - - for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { - String territoryName = - englishNameGetter.getNameFromTypeEnumCode( - NameType.TERRITORY, territoryCode); - PopulationData territoryData2 = - supplementalDataInfo.getPopulationDataForTerritory(territoryCode); - double territoryLiteracy = territoryData2.getLiteratePopulationPercent(); - - for (String languageCode : - supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( - territoryCode)) { - PopulationData languageData = - supplementalDataInfo.getLanguageAndTerritoryPopulationData( - languageCode, territoryCode); - double languagePopulationPercent = - 100 * languageData.getPopulation() / territoryData2.getPopulation(); - double languageliteracy = languageData.getLiteratePopulationPercent(); - double writingFrequency = languageData.getWritingPercent(); - - tablePrinter - .addRow() - .addCell(getFirstPrimaryWeight(territoryName)) - .addCell(territoryName) - .addCell(territoryCode) - .addCell(territoryLiteracy) - .addCell( - getLanguageName(languageCode) - + getOfficialStatus(territoryCode, languageCode)) - .addCell(languageCode) - .addCell(languageData.getPopulation()) - .addCell(languagePopulationPercent) - .addCell(languageliteracy) - .addCell(writingFrequency) - .addCell( - addBug( - 1217, - "bug", - "", - "Fix info for " - + getLanguageName(languageCode) - + " (" - + languageCode - + ")" - + " in " - + territoryName - + " (" - + territoryCode - + ")", - "")) - .finishRow(); - } - - tablePrinter - .addRow() - .addCell(getFirstPrimaryWeight(territoryName)) - .addCell(territoryName) - .addCell(territoryCode) - .addCell(territoryLiteracy) - .addCell( - addBug( - 1217, - "add new", - "", - "Add language to " - + territoryName - + "(" - + territoryCode - + ")", - "")) - .addCell("") - .addCell(0.0d) - .addCell(0.0d) - .addCell(0.0d) - .addCell(0.0d) - .addCell("") - .finishRow(); - } - String value = tablePrinter.toTable(); - pw2.println(value); - pw2.close(); - } - - private void showCountryInfo(PrintWriter pw) throws IOException { - PrintWriter pw21 = - new PrintWriter( - new FormattedFileWriter( - null, - "Territory Information", - null, - SUPPLEMENTAL_INDEX_ANCHORS)); - PrintWriter pw2 = pw21; - NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); - nf.setGroupingUsed(true); - // NumberFormat percent = new DecimalFormat("000.0%"); - TablePrinter tablePrinter = - new TablePrinter() - // tablePrinter.setSortPriorities(0,4) - .addColumn("T", "class='source'", null, "class='source'", true) - .setSortPriority(0) - .setBreakSpans(true) - .setRepeatHeader(true) - .setHidden(true) - .addColumn("Territory", "class='source'", null, "class='source'", true) - .setSortPriority(0) - .setBreakSpans(true) - .addColumn( - "Code", - "class='source'", - CldrUtility.getDoubleLinkMsg(), - "class='source'", - true) - .addColumn( - "Terr. Pop (M)", - "class='target'", - "{0,number,#,#@@}", - "class='targetRight'", - true) - .addColumn( - "Terr. GDP ($M PPP)", - "class='target'", - "{0,number,#,#@@}", - "class='targetRight'", - true) - .addColumn( - "Currencies (2006...)", - "class='target'", - null, - "class='target'", - true); - for (Iterator it = territoryTypes.iterator(); it.hasNext(); ) { - String header = it.next(); - if (header.equals("calendar")) header = "calendar (+gregorian)"; - tablePrinter - .addColumn(header) - .setHeaderAttributes("class='target'") - .setCellAttributes("class='target'") - .setSpanRows(true); - } - - tablePrinter.addColumn("Report Bug", "class='target'", null, "class='target'", false); - - for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { - String territoryName = - englishNameGetter.getNameFromTypeEnumCode( - NameType.TERRITORY, territoryCode); - PopulationData territoryData2 = - supplementalDataInfo.getPopulationDataForTerritory(territoryCode); - double population = territoryData2.getPopulation() / 1000000; - double gdp = territoryData2.getGdp() / 1000000; - - Map> worldData = - territoryData.get(getName(NameType.TERRITORY, "001", false)); - Map> countryData = - territoryData.get(getName(NameType.TERRITORY, territoryCode, false)); - - tablePrinter - .addRow() - .addCell(getFirstPrimaryWeight(territoryName)) - .addCell(territoryName) - .addCell(territoryCode) - .addCell(population) - .addCell(gdp) - .addCell(getCurrencyNames(territoryCode)); - - addOtherCountryData(tablePrinter, worldData, countryData); - - tablePrinter - .addCell( - addBug( - 1217, - "bug", - "", - "Fix info for " - + territoryName - + " (" - + territoryCode - + ")", - "")) - .finishRow(); - } - String value = tablePrinter.toTable(); - pw2.println(value); - pw2.close(); - } - - static Normalizer2 nfd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE); - - // Do just an approximation for now - - private String getFirstPrimaryWeight(String territoryName) { - char first = territoryName.charAt(0); - String result = nfd.getDecomposition(first); - if (result == null) { - return UTF16.valueOf(first); - } - return UTF16.valueOf(result.codePointAt(0)); - } - - // private String getTerritoryWithLikelyLink(String territoryCode) { - // return "" + territoryCode + - // ""; - // } - - private String getOfficialStatus(String territoryCode, String languageCode) { - PopulationData x = - supplementalDataInfo.getLanguageAndTerritoryPopulationData( - languageCode, territoryCode); - if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; - return " {" - + x.getOfficialStatus().toShortString() - + "}"; - } - - private String getRawOfficialStatus(String territoryCode, String languageCode) { - PopulationData x = - supplementalDataInfo.getLanguageAndTerritoryPopulationData( - languageCode, territoryCode); - if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; - return x.getOfficialStatus().toString(); - } - - private void addOtherCountryData( - TablePrinter tablePrinter, - Map> worldData, - Map> countryData) { - for (Iterator it2 = territoryTypes.iterator(); it2.hasNext(); ) { - String type = it2.next(); - Set worldResults = worldData.get(type); - Set territoryResults = null; - if (countryData != null) { - territoryResults = countryData.get(type); - } - if (territoryResults == null) { - territoryResults = worldResults; - } - String out = ""; - if (territoryResults != null) { - out = territoryResults + ""; - out = out.substring(1, out.length() - 1); // remove [ and ] - } - tablePrinter.addCell(out); - } - } - - private String getCurrencyNames(String territoryCode) { - Set currencies = territoriesToModernCurrencies.getAll(territoryCode); - if (currencies == null || currencies.size() == 0) return ""; - StringBuilder buffer = new StringBuilder(); - for (String code : currencies) { - if (buffer.length() != 0) buffer.append(",
"); - buffer.append(getName(NameType.CURRENCY, code, false)); - } - return buffer.toString(); - } - - private void addCharSubstitution(String value, String substitute) { - if (substitute.equals(value)) return; - LinkedHashSet already = charSubstitutions.get(value); - if (already == null) charSubstitutions.put(value, already = new LinkedHashSet<>(0)); - already.add(substitute); - Log.logln(hex(value, " ") + "; " + hex(substitute, " ")); - } - - /** */ - // public void showTerritoryInfo() { - // Map territory_parent = new TreeMap(); - // gather("001", territory_parent); - // for (Iterator it = territory_parent.keySet().iterator(); it.hasNext();) { - // String territory = (String) it.next(); - // String parent = (String) territory_parent.get(territory); - // System.out.println(territory + "\t" + - // englishNameGetter.getName(english.TERRITORY_NAME, territory) + "\t" - // + parent + "\t" + englishNameGetter.getName(english.TERRITORY_NAME, - // parent)); - // } - // } - - // private void gather(String item, Map territory_parent) { - // Collection containedByItem = (Collection) group_contains.get(item); - // if (containedByItem == null) - // return; - // for (Iterator it = containedByItem.iterator(); it.hasNext();) { - // String contained = (String) it.next(); - // territory_parent.put(contained, item); - // gather(contained, territory_parent); - // } - // } - - private void addTerritoryInfo(String territoriesList, String type, String info) { - String[] territories = territoriesList.split("\\s+"); - territoryTypes.add(type); - for (int i = 0; i < territories.length; ++i) { - String territory = getName(NameType.TERRITORY, territories[i], false); - Map> s = territoryData.get(territory); - if (s == null) { - territoryData.put(territory, s = new TreeMap<>()); - } - Set ss = s.get(type); - if (ss == null) { - s.put(type, ss = new TreeSet<>()); - } - ss.add(info); - } - } - - public void showCalendarData(PrintWriter pw0) throws IOException { - PrintWriter pw = - new PrintWriter( - new FormattedFileWriter( - null, - "Other Territory Data", - null, - SUPPLEMENTAL_INDEX_ANCHORS)); - pw.println(""); - pw.println(""); - for (Iterator it = territoryTypes.iterator(); it.hasNext(); ) { - String header = it.next(); - if (header.equals("calendar")) header = "calendar (+gregorian)"; - pw.println(""); - } - pw.println(""); - - String worldName = getName(NameType.TERRITORY, "001", false); - Map> worldData = territoryData.get(worldName); - for (Iterator it = territoryData.keySet().iterator(); it.hasNext(); ) { - String country = it.next(); - if (country.equals(worldName)) continue; - showCountry(pw, country, country, worldData); - } - showCountry(pw, worldName, "Other", worldData); - pw.println("
Territory" + header + "
"); - pw.close(); - } - - private void showCountry( - PrintWriter pw, - String country, - String countryTitle, - Map> worldData) { - pw.println("" + countryTitle + ""); - Map> data = territoryData.get(country); - for (Iterator it2 = territoryTypes.iterator(); it2.hasNext(); ) { - String type = it2.next(); - String target = "target"; - Set results = data.get(type); - Set worldResults = worldData.get(type); - if (results == null) { - results = worldResults; - target = "target2"; - } else if (results.equals(worldResults)) { - target = "target2"; - } - String out = ""; - if (results != null) { - out = results + ""; - out = out.substring(1, out.length() - 1); // remove [ and ] - } - pw.println("" + out + ""); - } - pw.println(""); - } - - public void showCorrespondances() { - // show correspondances between language and script - Map name_script = new TreeMap<>(); - for (Iterator it = sc.getAvailableCodes("script").iterator(); it.hasNext(); ) { - String script = it.next(); - String name = englishNameGetter.getNameFromTypeEnumCode(NameType.SCRIPT, script); - if (name == null) name = script; - name_script.put(name, script); - /* - * source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages - * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories - * : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages - * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts - */ } - String delimiter = "\\P{L}+"; - Map name_language = new TreeMap<>(); - for (Iterator it = sc.getAvailableCodes("language").iterator(); - it.hasNext(); ) { - String language = it.next(); - String names = - englishNameGetter.getNameFromTypeEnumCode(NameType.LANGUAGE, language); - if (names == null) names = language; - name_language.put(names, language); - } - for (Iterator it = sc.getAvailableCodes("language").iterator(); - it.hasNext(); ) { - String language = it.next(); - String names = - englishNameGetter.getNameFromTypeEnumCode(NameType.LANGUAGE, language); - if (names == null) names = language; - String[] words = names.split(delimiter); - if (words.length > 1) { - // System.out.println(names); - } - for (int i = 0; i < words.length; ++i) { - String name = words[i]; - String script = name_script.get(name); - if (script != null) { - Set langSet = (Set) script_languages.asMap().get(script); - if (langSet != null && langSet.contains(language)) System.out.print("*"); - System.out.println( - "\t" + name + " [" + language + "]\t=> " + name + " [" + script - + "]"); - } else { - String language2 = name_language.get(name); - if (language2 != null && !language.equals(language2)) { - Set langSet = (Set) language_scripts.get(language); - if (langSet != null) System.out.print("*"); - System.out.print( - "?\tSame script?\t + " - + getName(NameType.LANGUAGE, language, false) - + "\t & " - + getName(NameType.LANGUAGE, language2, false)); - langSet = (Set) language_scripts.get(language2); - if (langSet != null) System.out.print("*"); - System.out.println(); - } - } - } - } - } - - /** - * @throws IOException - */ - public void printCurrency(PrintWriter index) throws IOException { - PrintWriter pw = - new PrintWriter( - new FormattedFileWriter( - null, - "Detailed Territory-Currency Information", - null - // "

The following table shows when currencies were in use in - // different countries. " + - // "See also Decimal Digits and - // Rounding. " + - // "To correct any information here, please file a " + - // addBug(1274, "bug", "", "Currency Bug", - // "") + - // ".

" - , - SUPPLEMENTAL_INDEX_ANCHORS)); - String section1 = "Territory to Currency"; - String section2 = "Decimal Digits and Rounding"; - showContents(pw, "territory_currency", section1, "format_info", section2); - - pw.println( - "

" - + CldrUtility.getDoubleLinkedText( - "territory_currency", "1. " + section1) - + "

"); - - // doTitle(pw, "Territory \u2192 Currency"); - pw.println(""); - pw.println( - "" - + "" - + "" - + "" - + "" - + "" - + ""); - - Relation currencyToTerritory = - Relation.of(new HashMap>(), HashSet.class); - Relation modernCurrencyToTerritory = - Relation.of(new HashMap>(), HashSet.class); - - for (Entry nameCode : NAME_TO_REGION.entrySet()) { - String name = nameCode.getKey(); - String regionCode = nameCode.getValue(); - if (!StandardCodes.isCountry(regionCode)) { - continue; - } - if (sc.isLstregPrivateUse("region", regionCode)) { - continue; - } - Set info = supplementalDataInfo.getCurrencyDateInfo(regionCode); - - int infoSize = 1; - if (info != null) { - infoSize = info.size(); - } - pw.println( - "" - + "" - + ""); - if (info == null) { - pw.println( - "" - + "" - + "" - + "" - + ""); - continue; - } - boolean first = true; - for (CurrencyDateInfo infoItem : info) { - Date endData = infoItem.getEnd(); - if (endData.equals(CurrencyDateInfo.END_OF_TIME)) { - modernCurrencyToTerritory.put( - infoItem.getCurrency(), getTerritoryName(regionCode)); - } else { - currencyToTerritory.put( - infoItem.getCurrency(), getTerritoryName(regionCode)); - } - if (first) first = false; - else pw.println(""); - pw.println( - "" - + "" - + "" - + "" - + ""); - } - } - // doFooter(pw); - // pw.close(); - // pw = new PrintWriter(new FormattedFileWriter(index, "Currency Format Info", null)); - pw.write("
TerritoryCodeFromToCurrencyName
" - + name - + "" - + CldrUtility.getDoubleLinkedText(regionCode) - + "" - + "na" - + "" - + "na" - + "" - + "na" - + "" - + "na" - + "
" - + CurrencyDateInfo.formatDate(infoItem.getStart()) - + "" - + CurrencyDateInfo.formatDate(endData) - + "" - + infoItem.getCurrency() - + "" - + englishNameGetter.getNameFromTypeEnumCode( - NameType.CURRENCY, infoItem.getCurrency()) - + "
"); - - pw.println( - "

" - + CldrUtility.getDoubleLinkedText("format_info", "2. " + section2) - + "

"); - - pw.write( - "

This table shows the number of digits used for each currency, " - + " and the countries where it is or was in use. " - + "Countries where the currency is in current use are bolded. " - + "If the currency uses ‘nickel rounding’ in transactions, the digits are followed by ‘(5)’. " - + "Where the values are different in a cash context, that is shown in a second column." - + "

"); - pw.write("
"); - - // doTitle(pw, "Currency Format Info"); - // - - pw.println( - "" - + "" - + "" - + "" - + "" - + "" - + ""); - Set currencyList = new TreeSet(col); - currencyList.addAll(currency_fractions.keySet()); - currencyList.addAll(currency_territory.keySet()); - - for (Entry nameCode : NAME_TO_CURRENCY.entrySet()) { - // String name = nameCode.getKey(); - String currency = nameCode.getValue(); - CurrencyNumberInfo info = supplementalDataInfo.getCurrencyNumberInfo(currency); - Set territories = currencyToTerritory.get(currency); - Set modernTerritories = modernCurrencyToTerritory.get(currency); - - // String fractions = (String) currency_fractions.get(currency); - // if (fractions == null) - // fractions = defaultDigits; - // Set territories = (Set) currency_territory.get(currency); - pw.print( - "" - + "" - + "" - + "" - + "" - + ""); - } - pw.println("
NameCurrencyDigitsCash DigitsCountries
" - + TransliteratorUtilities.toHTML.transform( - englishNameGetter.getNameFromTypeEnumCode( - NameType.CURRENCY, currency)) - + "" - + CldrUtility.getDoubleLinkedText(currency) - + "" - + info.getDigits() - + (info.getRounding() == 0 ? "" : " (" + info.getRounding() + ")") - + "" - + (info.cashDigits == info.getDigits() - && info.cashRounding == info.getRounding() - ? "" - : (info.cashDigits - + (info.cashRounding == 0 - ? "" - : " (" + info.cashRounding + ")"))) - + ""); - boolean first = true; - boolean needBreak = false; - if (modernTerritories != null) { - needBreak = true; - for (String territory : modernTerritories) { - if (first) first = false; - else pw.print(", "); - pw.print("" + territory + ""); - } - } - // boolean haveBreak = true; - if (territories != null) { - for (String territory : territories) { - if (first) first = false; - else if (!needBreak) pw.print(", "); - else { - pw.print(",
"); - needBreak = false; - } - pw.print(territory); - } - } - pw.println("
"); - pw.close(); - // doFooter(pw); - - // if (false) { - // doTitle(pw, "Territories Versus Currencies"); - // pw.println("Territories Without CurrenciesCurrencies Without - // Territories"); - // pw.println(""); - // Set territoriesWithoutCurrencies = new TreeSet(); - // territoriesWithoutCurrencies.addAll(sc.getGoodAvailableCodes("territory")); - // territoriesWithoutCurrencies.removeAll(territoriesWithCurrencies); - // territoriesWithoutCurrencies.removeAll(group_contains.keySet()); - // boolean first = true; - // for (Iterator it = territoriesWithoutCurrencies.iterator(); it.hasNext();) { - // if (first) first = false; - // else pw.print(", "); - // pw.print(englishNameGetter.getName(NameType.TERRITORY, it.next().toString(), - // false)); - // } - // pw.println(""); - // Set currenciesWithoutTerritories = new TreeSet(); - // currenciesWithoutTerritories.addAll(sc.getGoodAvailableCodes("currency")); - // currenciesWithoutTerritories.removeAll(currenciesWithTerritories); - // first = true; - // for (Iterator it = currenciesWithoutTerritories.iterator(); it.hasNext();) { - // if (first) first = false; - // else pw.print(", "); - // pw.print(englishNameGetter.getName(NameType.CURRENCY, it.next().toString(), - // false)); - // } - // pw.println(""); - // doFooter(pw); - // } - } - - private String getTerritoryName(String territory) { - String name; - name = englishNameGetter.getNameFromTypeEnumCode(NameType.TERRITORY, territory); - if (name == null) { - name = sc.getData("territory", territory); - } - if (name != null) { - return TransliteratorUtilities.toHTML.transform(name) + " (" + territory + ")"; - } else { - return territory; - } - } - - /** - * @throws IOException - */ - public void printAliases(PrintWriter index) throws IOException { - PrintWriter pw = - new PrintWriter( - new FormattedFileWriter( - null, "Aliases", null, SUPPLEMENTAL_INDEX_ANCHORS)); - - // doTitle(pw, "Aliases"); - pw.println(""); - pw.println( - "" - + "" - + "" - + ""); - for (Iterator it = aliases.iterator(); it.hasNext(); ) { - String[] items = it.next(); - pw.println( - "" - + "" - + "" - + ""); - } - // doFooter(pw); - pw.println("
" - + "Type" - + "" - + "Code" - + "" - + "Reason" - + "" - + "Substitute (if available)" - + "
" - + items[0] - + "" - + CldrUtility.getDoubleLinkedText(items[1]) - + "" - + items[3] - + "" - + items[2] - + "
"); - pw.close(); - } - - // deprecatedItems - // public void printDeprecatedItems(PrintWriter pw) { - // doTitle(pw, "Deprecated Items"); - // pw.print("TypeElementsAttributesValues"); - // for (Iterator it = deprecatedItems.iterator(); it.hasNext();) { - // Map source = (Map)it.next(); - // Object item; - // pw.print(""); - // pw.print("" + ((item = source.get("type")) != null ? item : "any") - // + ""); - // pw.print("" + ((item = source.get("elements")) != null ? item : - // "any") + ""); - // pw.print("" + ((item = source.get("attributes")) != null ? item : - // "any") + ""); - // pw.print("" + ((item = source.get("values")) != null ? item : - // "any") + ""); - // pw.print(""); - // } - // doFooter(pw); - // } - - public void printWindows_Tzid(PrintWriter index) throws IOException { - Map>> zoneMapping = - supplementalDataInfo.getTypeToZoneToRegionToZone(); - PrintWriter pw = - new PrintWriter( - new FormattedFileWriter( - null, "Zone \u2192 Tzid", null, SUPPLEMENTAL_INDEX_ANCHORS)); - for (Entry>> typeAndZoneToRegionToZone : - zoneMapping.entrySet()) { - String type = typeAndZoneToRegionToZone.getKey(); - Map> zoneToRegionToZone = - typeAndZoneToRegionToZone.getValue(); - pw.println("

Mapping for: " + type + "


"); - // doTitle(pw, "Windows \u2192 Tzid"); - pw.println(""); - pw.println( - ""); - - for (Entry> zoneAndregionToZone : - zoneToRegionToZone.entrySet()) { - String source = zoneAndregionToZone.getKey(); - Map regionToZone = zoneAndregionToZone.getValue(); - for (Entry regionAndZone : regionToZone.entrySet()) { - String region = regionAndZone.getKey(); - String target = regionAndZone.getValue(); - if (region == null) region = "any"; - pw.println( - ""); - } - } - // doFooter(pw); - pw.println("
" - + type - + "" - + "Region" - + "" - + "TZID" - + "
" - + source - + "" - + region - + "" - + target - + "
"); - } - pw.close(); - } - - // - - public void printCharacters(PrintWriter index) throws IOException { - String title = "Character Fallback Substitutions"; - - PrintWriter pw = - new PrintWriter( - new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); - // doTitle(pw, title); - pw.println(""); - - pw.println( - ""); - UnicodeSet chars = new UnicodeSet("[:NFKC_QuickCheck=N:]"); - for (com.ibm.icu.text.UnicodeSetIterator it = - new com.ibm.icu.text.UnicodeSetIterator(chars); - it.next(); ) { - String value = it.getString(); - addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFC)); - addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFKC)); - } - int[] counts = new int[4]; - for (Iterator it = charSubstitutions.keySet().iterator(); it.hasNext(); ) { - String value = it.next(); - LinkedHashSet substitutes = charSubstitutions.get(value); - String nfc = Normalizer.normalize(value, Normalizer.NFC); - String nfkc = Normalizer.normalize(value, Normalizer.NFKC); - - String sourceTag = "" - + (!first - ? "" - : sourceTag - + hex(value, ", ") - + "" - + sourceTag - + TransliteratorUtilities.toHTML.transliterate( - value) - + "" - + sourceTag - + UCharacter.getName(value, ", ") - + "") - + targetTag - + type - + "" - + targetTag - + hex(substitute, ", ") - + "" - + targetTag - + TransliteratorUtilities.toHTML.transliterate(substitute) - + "" - + targetTag - + UCharacter.getName(substitute, ", ") - + ""); - first = false; - } - } - // doFooter(pw); - pw.println("
Substitute for character (if not in repertoire)The following (in priority order, first string that is in repertoire)
"; - if (substitutes.size() > 1) { - sourceTag = ""; - } - boolean first = true; - for (Iterator it2 = substitutes.iterator(); it2.hasNext(); ) { - String substitute = it2.next(); - String type = "Explicit"; - String targetTag = ""; - if (substitute.equals(nfc)) { - type = "NFC"; - targetTag = ""; - counts[2]++; - } else if (substitute.equals(nfkc)) { - type = "NFKC"; - targetTag = ""; - counts[3]++; - } else { - counts[0]++; - } - pw.println( - "
"); - - pw.close(); - for (int i = 0; i < counts.length; ++i) { - System.out.println("Count\t" + i + "\t" + counts[i]); - } - } - - public static String hex(String s, String separator) { - StringBuffer result = new StringBuffer(); - int cp; - for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { - cp = UTF16.charAt(s, i); - if (i != 0) result.append(separator); - result.append(com.ibm.icu.impl.Utility.hex(cp)); - } - return result.toString(); - } - - /** */ - // private PrintWriter doTitle(PrintWriter pw, String title) { - // //String anchor = FileUtilities.anchorize(title); - // pw.println("
"); - // //anchors.put(title, anchor); - // //PrintWriter result = null; - // //return result; - // } - - // private void doFooter(PrintWriter pw) { - // pw.println("
"); - // } - public void printContains2( - PrintWriter pw, String lead, String start, int depth, boolean isFirst) { - String name = depth == 4 ? start : getName(NameType.TERRITORY, start, false); - if (!isFirst) pw.print(lead); - int count = getTotalContainedItems(start, depth); - pw.print( - "" - + name - + ""); // colSpan='" + (5 - - // depth) + "' - if (depth == 4) pw.println(""); - Collection contains = getContainedCollection(start, depth); - if (contains != null) { - Collection contains2 = new TreeSet(territoryNameComparator); - contains2.addAll(contains); - boolean first = true; - for (Iterator it = contains2.iterator(); it.hasNext(); ) { - String item = it.next(); - printContains2(pw, lead, item, depth + 1, first); // + " " - first = false; - } - } - } - - private int getTotalContainedItems(String start, int depth) { - Collection c = getContainedCollection(start, depth); - if (c == null) return 1; - int sum = 0; - for (Iterator it = c.iterator(); it.hasNext(); ) { - sum += getTotalContainedItems(it.next(), depth + 1); - } - return sum; - } - - /** */ - private Collection getContainedCollection(String start, int depth) { - Collection contains = supplementalDataInfo.getContainmentCore().get(start); - if (contains == null) { - contains = sc.getCountryToZoneSet().get(start); - if (contains == null && depth == 3) { - contains = new TreeSet<>(); - if (start.compareTo("A") >= 0) { - contains.add("MISSING TZID"); - } else { - contains.add("Not yet ISO code"); - } - } - } - return contains; - } - - private String getName(NameType nameType, String oldcode, boolean codeFirst) { - if (oldcode.contains(" ")) { - String[] result = oldcode.split("\\s+"); - for (int i = 0; i < result.length; ++i) { - result[i] = getName(nameType, result[i], codeFirst); - } - return CldrUtility.join(Arrays.asList(result), ", "); - } else { - int pos = oldcode.indexOf('*'); - String code = pos < 0 ? oldcode : oldcode.substring(0, pos); - String ename = englishNameGetter.getNameFromTypeEnumCode(nameType, code); - String nameString = ename == null ? code : ename; - return nameString.equals(oldcode) - ? nameString - : codeFirst - ? "[" + oldcode + "]" + "\t" + nameString - : nameString + "\t" + "[" + oldcode + "]"; - } - } - - private String getName(String locale, boolean codeFirst) { - String ename = getLanguageName(locale); - return codeFirst - ? "[" + locale + "]\t" + (ename == null ? locale : ename) - : (ename == null ? locale : ename) + "\t[" + locale + "]"; - } - - Comparator territoryNameComparator = - new Comparator() { - @Override - public int compare(Object o1, Object o2) { - return col.compare( - getName(NameType.TERRITORY, (String) o1, false), - getName(NameType.TERRITORY, (String) o2, false)); - } - }; - - static String[] stringArrayPattern = new String[0]; - static String[][] string2ArrayPattern = new String[0][]; - - public static Map territoryAliases = new HashMap<>(); - - public void printContains(PrintWriter index) throws IOException { - String title = "Territory Containment (UN M.49)"; - - PrintWriter pw = - new PrintWriter( - new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); - // doTitle(pw, title); - List rows = new ArrayList<>(); - printContains3("001", rows, new ArrayList()); - TablePrinter tablePrinter = - new TablePrinter() - .addColumn("World", "class='source'", null, "class='z0'", true) - .setSortPriority(0) - .addColumn("Continent", "class='source'", null, "class='z1'", true) - .setSortPriority(1) - .addColumn("Subcontinent", "class='source'", null, "class='z2'", true) - .setSortPriority(2) - .addColumn( - "Country (Territory)", - "class='source'", - null, - "class='z3'", - true) - .setSortPriority(3) - .addColumn("Time Zone", "class='source'", null, "class='z4'", true) - .setSortPriority(4); - String[][] flatData = rows.toArray(string2ArrayPattern); - pw.println(tablePrinter.addRows(flatData).toTable()); - - showSubtable( - pw, ContainmentStyle.grouping, "Groupings", "Grouping", "Contained Regions"); - showSubtable( - pw, - ContainmentStyle.deprecated, - "Deprecated", - "Container", - "Deprecated Region"); - - // Relation deprecated = supplementalDataInfo - // .getTerritoryToContained(ContainmentStyle.deprecated); - // - // for (String region : deprecated.keySet()) { - // nameToContainers.add(region); - // } - // pw.println("

Groupings and Deprecated Regions

"); - // for (String region : nameToContainers) { - // String name = getName(NameType.TERRITORY, region, false); - // Set dep = deprecated.get(region); - // Set gro = grouping.get(region); - // Iterator depIt = (dep == null ? Collections.EMPTY_SET : - // dep).iterator(); - // Iterator groIt = (gro == null ? Collections.EMPTY_SET : - // gro).iterator(); - // while (depIt.hasNext() || groIt.hasNext()) { - // String dep1 = depIt.hasNext() ? getName(NameType.TERRITORY, - // depIt.next(), false) : ""; - // String gro1 = groIt.hasNext() ? getName(NameType.TERRITORY, - // groIt.next(), false) : ""; - // tablePrinter2.addRow() - // .addCell(name) - // .addCell(gro1) - // .addCell(dep1) - // .finishRow(); - // } - // } - // pw.println(tablePrinter2.toTable()); - // pw.println("

Other Groupings

"); - // for (Entry> regionContained : grouping.keyValuesSet()) - // { - // showContainers(pw, regionContained); - // } - // - // pw.println("

Deprecated Codes

"); - // for (Entry> regionContained : - // deprecated.keyValuesSet()) { - // showContainers(pw, regionContained); - // } - pw.close(); - } - - public void showSubtable( - PrintWriter pw, - ContainmentStyle containmentStyle, - String title, - String containerTitle, - String containeeTitle) { - pw.println("

" + title + "

"); - TablePrinter tablePrinter2 = - new TablePrinter() - .addColumn(containerTitle, "class='source'", null, "class='z0'", true) - .setSortPriority(0) - .addColumn(containeeTitle, "class='source'", null, "class='z4'", true) - .setSortPriority(1); - - Relation grouping = - supplementalDataInfo.getTerritoryToContained(containmentStyle); - - for (Entry containerRegion : grouping.keyValueSet()) { - String container = getName(NameType.TERRITORY, containerRegion.getKey(), false); - String containee = getName(NameType.TERRITORY, containerRegion.getValue(), false); - tablePrinter2.addRow().addCell(container).addCell(containee).finishRow(); - } - pw.println(tablePrinter2.toTable()); - } - - public void showContainers(PrintWriter pw, Entry> regionContained) { - String region = regionContained.getKey(); - Set contained = regionContained.getValue(); - pw.println("
  • " + getName(NameType.TERRITORY, region, false) + "
      "); - for (String sub : contained) { - pw.println("
    • " + getName(NameType.TERRITORY, sub, false) + "
    • "); - } - pw.println("
"); - } - - private void printContains3( - String start, List rows, ArrayList currentRow) { - int len = currentRow.size(); - if (len > 3) { - return; // skip long items - } - currentRow.add(getName(NameType.TERRITORY, start, false)); - // Collection contains = (Collection) group_contains.get(start); - Collection contains = supplementalDataInfo.getContainmentCore().get(start); - if (contains == null) { - contains = sc.getCountryToZoneSet().get(start); - currentRow.add(""); - if (contains == null) { - currentRow.set(len + 1, "???"); - rows.add(currentRow.toArray(stringArrayPattern)); - } else { - for (String item : contains) { - currentRow.set(len + 1, item); - rows.add(currentRow.toArray(stringArrayPattern)); - } - } - currentRow.remove(len + 1); - } else { - for (String item : contains) { - if (territoryAliases.keySet().contains(item)) { - continue; - } - printContains3(item, rows, currentRow); - } - } - currentRow.remove(len); - } - } - /** */ private static Map> getInverse( Map> language_territories) { @@ -2904,7 +801,7 @@ private static void addTokens( } } - private static void addTokens( + static void addTokens( String key, String values, String value_delimiter, Multimap key_value) { if (values != null) { key_value.putAll(key, Arrays.asList(values.split(value_delimiter))); diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/chart_messages.html b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/chart_messages.html index 6e2613a717e..c4dd06ae8a8 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/chart_messages.html +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/chart_messages.html @@ -45,44 +45,47 @@

Chart Messages

territory_language_information - The main goal for CLDR language data is to provide - approximate figures for the literate, functional population for - each language in each territory: that is, the population that is - able to read and write each language, and is comfortable enough to - use it with computers. -

The GDP and Literacy figures are taken from the World Bank - where available, otherwise supplemented by FactBook data and other - sources. The GDP figures are "PPP (constant 2000 international - $)". Much of the per-language data is taken from the Ethnologue, - but is supplemented and processed using many other sources, - including per-country census data. (The focus of the Ethnologue is - native speakers, which includes people who are not literate, and - excludes people who are functional second-langauge users.)

-

- The literacy rate may be discounted to reflect the actual usage of - the written form in normal daily life. Thus languages that are - typically not written, such as Swiss German, will be given a low - literacy rate, even though the whole population could write - in Swiss German. +

A main goal for CLDR language data is to provide + approximate figures for the population of + each language in each territory (country), + plus the writing population: + that is, the population that is + reading and writing the language on a daily basis — + and is comfortable with using it with computers. + The GDP and Literacy figures are taken from the World Bank + where available, supplemented by other data sources. + The GDP figures are "PPP (constant 2000 international$)". + The per-language data is processed using many sources, + including per-country census data. + These are referenced where possible, in the XML Source.

+

The literacy rate is used as a proxy for computing the writing population. + It may be discounted to reflect the actual usage of + the written form in normal daily life. + Thus languages that are typically not written, such as Swiss German, + will be given a low writing population, + even though all Swiss German speakers could read and write in Swiss German.

The percentages may add up to more than 100% due to multilingual populations, or may be less than 100% due to illiteracy or because the data has not yet been gathered or - processed. Languages with a small population may be omitted.

-

Official status is supplied where available, formatted as - {O}. Hovering with the mouse shows a short description.

+ processed. Languages with a small population in a territory may be omitted.

+

Official status is supplied where available, formatted as an abbreviation such as {O}. + Hovering with the mouse shows a short description.

  • Likely languages and scripts:To see (and verify) - the likely languages and scripts for this subtag, click on the - country code.
  • + the likely languages, scripts, and regions, see Likely Subtags.
  • Reporting Defects: If you find errors or omissions in this data, please report the information with the bug or add new links, below.
  • -
  • XML Source: - supplementalData.xml (see the <territoryInfo>, - <calendarData>, <weekData>, and - <measurementData> elements)
  • +
  • XML Source: + supplementalData.xml, with the element <territoryInfo>. + References, such as "R1055", can be found by searching in the document.
  • +
  • TSV format: A Tab-Separated Values version of the dat is available at + territory_language_information.tsv. +