diff --git a/source/common/uloc.cpp b/source/common/uloc.cpp index 7a1dc723..79427524 100644 --- a/source/common/uloc.cpp +++ b/source/common/uloc.cpp @@ -496,6 +496,27 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = { { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" }, { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" }, { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" }, + // sgn entries are redundant tags with preferred values in + // https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . + { "sgn_BR", "bzs", NULL, NULL }, + { "sgn_CO", "csn", NULL, NULL }, + { "sgn_DE", "gsg", NULL, NULL }, + { "sgn_DK", "dsl", NULL, NULL }, + { "sgn_ES", "ssp", NULL, NULL }, + { "sgn_FR", "fsl", NULL, NULL }, + { "sgn_GB", "bfi", NULL, NULL }, + { "sgn_GR", "gss", NULL, NULL }, + { "sgn_IE", "isg", NULL, NULL }, + { "sgn_IT", "ise", NULL, NULL }, + { "sgn_JP", "jsl", NULL, NULL }, + { "sgn_MX", "mfs", NULL, NULL }, + { "sgn_NI", "ncs", NULL, NULL }, + { "sgn_NL", "dse", NULL, NULL }, + { "sgn_NO", "nsl", NULL, NULL }, + { "sgn_PT", "psr", NULL, NULL }, + { "sgn_SE", "swl", NULL, NULL }, + { "sgn_US", "ase", NULL, NULL }, + { "sgn_ZA", "sfs", NULL, NULL }, { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */ { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */ { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */ diff --git a/source/common/uloc_tag.cpp b/source/common/uloc_tag.cpp index 87b9f63f..99d0c6c3 100644 --- a/source/common/uloc_tag.cpp +++ b/source/common/uloc_tag.cpp @@ -77,24 +77,34 @@ static const char LOCALE_TYPE_YES[] = "yes"; #define LANG_UND_LEN 3 +// Updated on 2018-04-24 from +// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . +// egrep 'Type: grandfathered' -A 7 language-subtag-registry | \ +// egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \ +// awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' +// Mapping entries marked with 'extra' do not have preferred values in the IANA registry, +// but are ICU-specific. +// ja-latn-hepburn-heploc is not grandfathered, but listed here to map to its +// preferred value. static const char* const GRANDFATHERED[] = { /* grandfathered preferred */ "art-lojban", "jbo", "cel-gaulish", "xtg-x-cel-gaulish", - "en-GB-oed", "en-GB-x-oed", + "en-GB-oed", "en-GB-oxendict", "i-ami", "ami", "i-bnn", "bnn", - "i-default", "en-x-i-default", - "i-enochian", "und-x-i-enochian", + "i-default", "en-x-i-default", // extra + "i-enochian", "und-x-i-enochian", // extra "i-hak", "hak", "i-klingon", "tlh", "i-lux", "lb", - "i-mingo", "see-x-i-mingo", + "i-mingo", "see-x-i-mingo", // extra "i-navajo", "nv", "i-pwn", "pwn", "i-tao", "tao", "i-tay", "tay", "i-tsu", "tsu", + "ja-latn-hepburn-heploc", "ja-latn-alalc97", // variant "no-bok", "nb", "no-nyn", "nn", "sgn-be-fr", "sfb", @@ -102,17 +112,112 @@ static const char* const GRANDFATHERED[] = { "sgn-ch-de", "sgg", "zh-guoyu", "cmn", "zh-hakka", "hak", - "zh-min", "nan-x-zh-min", + "zh-min", "nan-x-zh-min", // extra "zh-min-nan", "nan", "zh-xiang", "hsn", NULL, NULL }; +// Updated on 2018-04-24 from +// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . +// grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \ +// grep -B1 'Preferred' | grep -v '^--' | \ +// awk -n '/Subtag/ {printf("\"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' static const char DEPRECATEDLANGS[][4] = { /* deprecated new */ + "in", "id", "iw", "he", "ji", "yi", - "in", "id" + "jw", "jv", + "mo", "ro", + "aam", "aas", + "adp", "dz", + "aue", "ktz", + "ayx", "nun", + "bgm", "bcg", + "bjd", "drl", + "ccq", "rki", + "cjr", "mom", + "cka", "cmr", + "cmk", "xch", + "coy", "pij", + "cqu", "quh", + "drh", "khk", + "drw", "prs", + "gav", "dev", + "gfx", "vaj", + "ggn", "gvr", + "gti", "nyc", + "guv", "duz", + "hrr", "jal", + "ibi", "opa", + "ilw", "gal", + "jeg", "oyb", + "kgc", "tdf", + "kgh", "kml", + "koj", "kwv", + "krm", "bmf", + "ktr", "dtp", + "kvs", "gdj", + "kwq", "yam", + "kxe", "tvd", + "kzj", "dtp", + "kzt", "dtp", + "lii", "raq", + "lmm", "rmx", + "meg", "cir", + "mst", "mry", + "mwj", "vaj", + "myt", "mry", + "nad", "xny", + "ncp", "kdz", + "nnx", "ngv", + "nts", "pij", + "oun", "vaj", + "pcr", "adx", + "pmc", "huw", + "pmu", "phr", + "ppa", "bfy", + "ppr", "lcq", + "pry", "prt", + "puz", "pub", + "sca", "hle", + "skk", "oyb", + "tdu", "dtp", + "thc", "tpo", + "thx", "oyb", + "tie", "ras", + "tkk", "twm", + "tlw", "weo", + "tmp", "tyj", + "tne", "kak", + "tnf", "prs", + "tsf", "taj", + "uok", "ema", + "xba", "cax", + "xia", "acn", + "xkh", "waw", + "xsj", "suj", + "ybd", "rki", + "yma", "lrr", + "ymt", "mtm", + "yos", "zom", + "yuu", "yug", +}; + +// Updated on 2018-04-24 from +// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . +// grep 'Type: region' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \ +// grep -B1 'Preferred' | \ +// awk -n '/Subtag/ {printf("\"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' +static const char DEPRECATEDREGIONS[][3] = { +/* deprecated new */ + "BU", "MM", + "DD", "DE", + "FX", "FR", + "TP", "TL", + "YD", "YE", + "ZR", "CD", }; /* @@ -763,6 +868,14 @@ _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacit *(appendAt + reslen) = SEP; } reslen++; + /* resolve deprecated */ + for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) { + if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) { + uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]); + len = (int32_t)uprv_strlen(buf); + break; + } + } if (reslen < capacity) { uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));