summaryrefslogtreecommitdiff
path: root/js/src/builtin
diff options
context:
space:
mode:
authorMartok <martok@martoks-place.de>2023-06-29 23:07:20 +0200
committerMartok <martok@martoks-place.de>2023-06-30 00:01:34 +0200
commit2f940bdc9dcbfe83e17ed26c5d1af7fe874c24ac (patch)
tree2519366eb8057e265339261ab651a8cb5653a703 /js/src/builtin
parent6808e659ad137ac63466aad93e406efbf091c077 (diff)
downloaduxp-2f940bdc9dcbfe83e17ed26c5d1af7fe874c24ac.tar.gz
Issue #2259 - process Unicode langtags and locale identifiers according to BCP 47
Major spec change: text references are to BCP47 (not the implementing RFCs) and the single source of truth is now Unicode CLDR. - Switch from IANA to CLDR for make_unicode - Update grandfathered tag handling directly in the parser - Don't support extlang, irregular, privateuse or 4-letter subtags - Adjust comments to refer to Unicode BCP 47 locale identifiers, remove RFC 5646 - Canonicalize/order langtags correctly - Tokenize BCP47 in reusable class Based-on: m-c 1407674(partial), 1451082, 1530320, 1522070, 1531091
Diffstat (limited to 'js/src/builtin')
-rw-r--r--js/src/builtin/RegExp.cpp76
-rw-r--r--js/src/builtin/RegExp.h20
-rw-r--r--js/src/builtin/Utilities.js6
-rw-r--r--js/src/builtin/intl/Collator.js151
-rw-r--r--js/src/builtin/intl/CommonFunctions.js1703
-rw-r--r--js/src/builtin/intl/DateTimeFormat.js74
-rw-r--r--js/src/builtin/intl/LangTagMappingsGenerated.js1466
-rw-r--r--js/src/builtin/intl/NumberFormat.js165
-rw-r--r--js/src/builtin/intl/PluralRules.cpp2
-rw-r--r--js/src/builtin/intl/PluralRules.js106
-rw-r--r--js/src/builtin/intl/make_intl_data.py881
11 files changed, 3331 insertions, 1319 deletions
diff --git a/js/src/builtin/RegExp.cpp b/js/src/builtin/RegExp.cpp
index 46a2862909..f3d34762f6 100644
--- a/js/src/builtin/RegExp.cpp
+++ b/js/src/builtin/RegExp.cpp
@@ -974,8 +974,7 @@ IsTrailSurrogateWithLeadSurrogate(JSContext* cx, HandleLinearString input, int32
*/
static RegExpRunStatus
ExecuteRegExp(JSContext* cx, HandleObject regexp, HandleString string,
- int32_t lastIndex,
- MatchPairs* matches, size_t* endIndex, RegExpStaticsUpdate staticsUpdate)
+ int32_t lastIndex, MatchPairs* matches, size_t* endIndex)
{
/*
* WARNING: Despite the presence of spec step comment numbers, this
@@ -990,14 +989,9 @@ ExecuteRegExp(JSContext* cx, HandleObject regexp, HandleString string,
if (!RegExpObject::getShared(cx, reobj, &re))
return RegExpRunStatus_Error;
- RegExpStatics* res;
- if (staticsUpdate == UpdateRegExpStatics) {
- res = GlobalObject::getRegExpStatics(cx, cx->global());
- if (!res)
- return RegExpRunStatus_Error;
- } else {
- res = nullptr;
- }
+ RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
+ if (!res)
+ return RegExpRunStatus_Error;
RootedLinearString input(cx, string->ensureLinear(cx));
if (!input)
@@ -1051,15 +1045,14 @@ ExecuteRegExp(JSContext* cx, HandleObject regexp, HandleString string,
* steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
*/
static bool
-RegExpMatcherImpl(JSContext* cx, HandleObject regexp, HandleString string,
- int32_t lastIndex, RegExpStaticsUpdate staticsUpdate, MutableHandleValue rval)
+RegExpMatcherImpl(JSContext* cx, HandleObject regexp, HandleString string, int32_t lastIndex,
+ MutableHandleValue rval)
{
/* Execute regular expression and gather matches. */
ScopedMatchPairs matches(&cx->tempLifoAlloc());
/* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
- RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex,
- &matches, nullptr, staticsUpdate);
+ RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex, &matches, nullptr);
if (status == RegExpRunStatus_Error)
return false;
@@ -1099,8 +1092,7 @@ js::RegExpMatcher(JSContext* cx, unsigned argc, Value* vp)
return false;
/* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
- return RegExpMatcherImpl(cx, regexp, string, lastIndex,
- UpdateRegExpStatics, args.rval());
+ return RegExpMatcherImpl(cx, regexp, string, lastIndex, args.rval());
}
/*
@@ -1123,8 +1115,7 @@ js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp, HandleString input,
return false;
return CreateRegExpMatchResult(cx, *shared, input, *maybeMatches, output);
}
- return RegExpMatcherImpl(cx, regexp, input, lastIndex,
- UpdateRegExpStatics, output);
+ return RegExpMatcherImpl(cx, regexp, input, lastIndex, output);
}
/*
@@ -1135,14 +1126,13 @@ js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp, HandleString input,
*/
static bool
RegExpSearcherImpl(JSContext* cx, HandleObject regexp, HandleString string,
- int32_t lastIndex, RegExpStaticsUpdate staticsUpdate, int32_t* result)
+ int32_t lastIndex, int32_t* result)
{
/* Execute regular expression and gather matches. */
ScopedMatchPairs matches(&cx->tempLifoAlloc());
/* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
- RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex,
- &matches, nullptr, staticsUpdate);
+ RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex, &matches, nullptr);
if (status == RegExpRunStatus_Error)
return false;
@@ -1180,7 +1170,7 @@ js::RegExpSearcher(JSContext* cx, unsigned argc, Value* vp)
/* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
int32_t result = 0;
- if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, UpdateRegExpStatics, &result))
+ if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, &result))
return false;
args.rval().setInt32(result);
@@ -1203,23 +1193,7 @@ js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp, HandleString input,
*result = CreateRegExpSearchResult(cx, *maybeMatches);
return true;
}
- return RegExpSearcherImpl(cx, regexp, input, lastIndex,
- UpdateRegExpStatics, result);
-}
-
-bool
-js::regexp_exec_no_statics(JSContext* cx, unsigned argc, Value* vp)
-{
- CallArgs args = CallArgsFromVp(argc, vp);
- MOZ_ASSERT(args.length() == 2);
- MOZ_ASSERT(IsRegExpObject(args[0]));
- MOZ_ASSERT(args[1].isString());
-
- RootedObject regexp(cx, &args[0].toObject());
- RootedString string(cx, args[1].toString());
-
- return RegExpMatcherImpl(cx, regexp, string, 0,
- DontUpdateRegExpStatics, args.rval());
+ return RegExpSearcherImpl(cx, regexp, input, lastIndex, result);
}
/*
@@ -1245,8 +1219,7 @@ js::RegExpTester(JSContext* cx, unsigned argc, Value* vp)
/* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
size_t endIndex = 0;
- RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex,
- nullptr, &endIndex, UpdateRegExpStatics);
+ RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex, nullptr, &endIndex);
if (status == RegExpRunStatus_Error)
return false;
@@ -1271,8 +1244,7 @@ js::RegExpTesterRaw(JSContext* cx, HandleObject regexp, HandleString input,
MOZ_ASSERT(lastIndex >= 0);
size_t endIndexTmp = 0;
- RegExpRunStatus status = ExecuteRegExp(cx, regexp, input, lastIndex,
- nullptr, &endIndexTmp, UpdateRegExpStatics);
+ RegExpRunStatus status = ExecuteRegExp(cx, regexp, input, lastIndex, nullptr, &endIndexTmp);
if (status == RegExpRunStatus_Success) {
MOZ_ASSERT(endIndexTmp <= INT32_MAX);
@@ -1287,24 +1259,6 @@ js::RegExpTesterRaw(JSContext* cx, HandleObject regexp, HandleString input,
return false;
}
-bool
-js::regexp_test_no_statics(JSContext* cx, unsigned argc, Value* vp)
-{
- CallArgs args = CallArgsFromVp(argc, vp);
- MOZ_ASSERT(args.length() == 2);
- MOZ_ASSERT(IsRegExpObject(args[0]));
- MOZ_ASSERT(args[1].isString());
-
- RootedObject regexp(cx, &args[0].toObject());
- RootedString string(cx, args[1].toString());
-
- size_t ignored = 0;
- RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, 0,
- nullptr, &ignored, DontUpdateRegExpStatics);
- args.rval().setBoolean(status == RegExpRunStatus_Success);
- return status != RegExpRunStatus_Error;
-}
-
static void
GetParen(JSLinearString* matched, const JS::Value& capture, JSSubString* out)
{
diff --git a/js/src/builtin/RegExp.h b/js/src/builtin/RegExp.h
index f66c9b1b81..c0a7d59f77 100644
--- a/js/src/builtin/RegExp.h
+++ b/js/src/builtin/RegExp.h
@@ -18,10 +18,6 @@ namespace js {
JSObject*
InitRegExpClass(JSContext* cx, HandleObject obj);
-// Whether RegExp statics should be updated with the input and results of a
-// regular expression execution.
-enum RegExpStaticsUpdate { UpdateRegExpStatics, DontUpdateRegExpStatics };
-
/*
* Legacy behavior of ExecuteRegExp(), which is baked into the JSAPI.
*
@@ -72,22 +68,6 @@ intrinsic_GetStringDataProperty(JSContext* cx, unsigned argc, Value* vp);
*/
/*
- * Behaves like regexp.exec(string), but doesn't set RegExp statics.
- *
- * Usage: match = regexp_exec_no_statics(regexp, string)
- */
-extern MOZ_MUST_USE bool
-regexp_exec_no_statics(JSContext* cx, unsigned argc, Value* vp);
-
-/*
- * Behaves like regexp.test(string), but doesn't set RegExp statics.
- *
- * Usage: does_match = regexp_test_no_statics(regexp, string)
- */
-extern MOZ_MUST_USE bool
-regexp_test_no_statics(JSContext* cx, unsigned argc, Value* vp);
-
-/*
* Behaves like RegExp(pattern, flags).
* |pattern| should be a RegExp object, |flags| should be a raw integer value.
* Must be called without |new|.
diff --git a/js/src/builtin/Utilities.js b/js/src/builtin/Utilities.js
index 09c15957c6..51c5a574fd 100644
--- a/js/src/builtin/Utilities.js
+++ b/js/src/builtin/Utilities.js
@@ -80,12 +80,6 @@ MakeConstructible(Record, {});
/********** Abstract operations defined in ECMAScript Language Specification **********/
-/* Spec: ECMAScript Language Specification, 5.1 edition, 8.12.6 and 11.8.7 */
-function HasProperty(o, p) {
- return p in o;
-}
-
-
/* Spec: ECMAScript Language Specification, 5.1 edition, 9.2 and 11.4.9 */
function ToBoolean(v) {
return !!v;
diff --git a/js/src/builtin/intl/Collator.js b/js/src/builtin/intl/Collator.js
index ee6ea9a9b8..dffadab7c5 100644
--- a/js/src/builtin/intl/Collator.js
+++ b/js/src/builtin/intl/Collator.js
@@ -6,18 +6,6 @@
/**
- * Mapping from Unicode extension keys for collation to options properties,
- * their types and permissible values.
- *
- * Spec: ECMAScript Internationalization API Specification, 10.1.1.
- */
-var collatorKeyMappings = {
- kn: {property: "numeric", type: "boolean"},
- kf: {property: "caseFirst", type: "string", values: ["upper", "lower", "false"]}
-};
-
-
-/**
* Compute an internal properties object from |lazyCollatorData|.
*/
function resolveCollatorInternals(lazyCollatorData)
@@ -26,60 +14,49 @@ function resolveCollatorInternals(lazyCollatorData)
var internalProps = std_Object_create(null);
- // Step 7.
- internalProps.usage = lazyCollatorData.usage;
-
- // Step 8.
var Collator = collatorInternalProperties;
- // Step 9.
+ // Step 5.
+ internalProps.usage = lazyCollatorData.usage;
+
+ // Steps 6-7.
var collatorIsSorting = lazyCollatorData.usage === "sort";
var localeData = collatorIsSorting
? Collator.sortLocaleData
: Collator.searchLocaleData;
// Compute effective locale.
- // Step 14.
+ // Step 16.
var relevantExtensionKeys = Collator.relevantExtensionKeys;
- // Step 15.
+ // Step 17.
var r = ResolveLocale(callFunction(Collator.availableLocales, Collator),
lazyCollatorData.requestedLocales,
lazyCollatorData.opt,
relevantExtensionKeys,
localeData);
- // Step 16.
+ // Step 18.
internalProps.locale = r.locale;
- // Steps 17-19.
- var key, property, value, mapping;
- var i = 0, len = relevantExtensionKeys.length;
- while (i < len) {
- // Step 19.a.
- key = relevantExtensionKeys[i];
- if (key === "co") {
- // Step 19.b.
- property = "collation";
- value = r.co === null ? "default" : r.co;
- } else {
- // Step 19.c.
- mapping = collatorKeyMappings[key];
- property = mapping.property;
- value = r[key];
- if (mapping.type === "boolean")
- value = value === "true";
- }
+ // Step 19.
+ var collation = r.co;
+
+ // Step 20.
+ if (collation === null)
+ collation = "default";
- // Step 19.d.
- internalProps[property] = value;
+ // Step 21.
+ internalProps.collation = collation;
- // Step 19.e.
- i++;
- }
+ // Step 22.
+ internalProps.numeric = r.kn === "true";
+
+ // Step 23.
+ internalProps.caseFirst = r.kf;
// Compute remaining collation options.
- // Steps 21-22.
+ // Step 25.
var s = lazyCollatorData.rawSensitivity;
if (s === undefined) {
// In theory the default sensitivity for the "search" collator is
@@ -88,14 +65,13 @@ function resolveCollatorInternals(lazyCollatorData)
// both collation modes.
s = "variant";
}
+
+ // Step 26.
internalProps.sensitivity = s;
- // Step 24.
+ // Step 28.
internalProps.ignorePunctuation = lazyCollatorData.ignorePunctuation;
- // Step 25.
- internalProps.boundFormat = undefined;
-
// The caller is responsible for associating |internalProps| with the right
// object using |setInternalProperties|.
return internalProps;
@@ -139,9 +115,6 @@ function InitializeCollator(collator, locales, options) {
assert(IsObject(collator), "InitializeCollator called with non-object");
assert(IsCollator(collator), "InitializeCollator called with non-Collator");
- // Steps 1-2 (These steps are no longer required and should be removed
- // from the spec; https://github.com/tc39/ecma402/issues/115).;
-
// Lazy Collator data has the following structure:
//
// {
@@ -162,11 +135,11 @@ function InitializeCollator(collator, locales, options) {
// subset of them.
var lazyCollatorData = std_Object_create(null);
- // Step 3.
+ // Step 1.
var requestedLocales = CanonicalizeLocaleList(locales);
lazyCollatorData.requestedLocales = requestedLocales;
- // Steps 4-5.
+ // Steps 2-3.
//
// If we ever need more speed here at startup, we should try to detect the
// case where |options === undefined| and Object.prototype hasn't been
@@ -179,38 +152,39 @@ function InitializeCollator(collator, locales, options) {
options = ToObject(options);
// Compute options that impact interpretation of locale.
- // Step 6.
+ // Step 4.
var u = GetOption(options, "usage", "string", ["sort", "search"], "sort");
lazyCollatorData.usage = u;
- // Step 10.
+ // Step 8.
var opt = new Record();
lazyCollatorData.opt = opt;
- // Steps 11-12.
+ // Steps 9-10.
var matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit");
opt.localeMatcher = matcher;
- // Step 13, unrolled.
+ // Steps 11-13.
var numericValue = GetOption(options, "numeric", "boolean", undefined, undefined);
if (numericValue !== undefined)
numericValue = numericValue ? 'true' : 'false';
opt.kn = numericValue;
+ // Steps 14-15.
var caseFirstValue = GetOption(options, "caseFirst", "string", ["upper", "lower", "false"], undefined);
opt.kf = caseFirstValue;
// Compute remaining collation options.
- // Step 20.
+ // Step 24.
var s = GetOption(options, "sensitivity", "string",
["base", "accent", "case", "variant"], undefined);
lazyCollatorData.rawSensitivity = s;
- // Step 23.
+ // Step 27.
var ip = GetOption(options, "ignorePunctuation", "boolean", undefined, false);
lazyCollatorData.ignorePunctuation = ip;
- // Step 26.
+ // Step 29.
//
// We've done everything that must be done now: mark the lazy data as fully
// computed and install it.
@@ -228,9 +202,14 @@ function InitializeCollator(collator, locales, options) {
function Intl_Collator_supportedLocalesOf(locales /*, options*/) {
var options = arguments.length > 1 ? arguments[1] : undefined;
+ // Step 1.
var availableLocales = callFunction(collatorInternalProperties.availableLocales,
collatorInternalProperties);
+
+ // Step 2.
var requestedLocales = CanonicalizeLocaleList(locales);
+
+ // Step 3.
return SupportedLocales(availableLocales, requestedLocales, options);
}
@@ -353,9 +332,9 @@ function collatorSearchLocaleData() {
/**
- * Function to be bound and returned by Intl.Collator.prototype.format.
+ * Function to be bound and returned by Intl.Collator.prototype.compare.
*
- * Spec: ECMAScript Internationalization API Specification, 12.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 10.3.3.1.
*/
function collatorCompareToBind(x, y) {
// Steps 1.a.i-ii implemented by ECMAScript declaration binding instantiation,
@@ -375,26 +354,28 @@ function collatorCompareToBind(x, y) {
* than 0 if x > y according to the sort order for the locale and collation
* options of this Collator object.
*
- * Spec: ECMAScript Internationalization API Specification, 10.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 10.3.3.
*/
function Intl_Collator_compare_get() {
- // Check "this Collator object" per introduction of section 10.3.
- if (!IsObject(this) || !IsCollator(this))
+ // Step 1.
+ var collator = this;
+
+ // Steps 2-3.
+ if (!IsObject(collator) || !IsCollator(collator))
ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "Collator", "compare", "Collator");
- var internals = getCollatorInternals(this);
+ var internals = getCollatorInternals(collator);
- // Step 1.
+ // Step 4.
if (internals.boundCompare === undefined) {
- // Step 1.a.
- var F = collatorCompareToBind;
+ // Steps 4.a-b.
+ var F = callFunction(FunctionBind, collatorCompareToBind, collator);
- // Steps 1.b-d.
- var bc = callFunction(FunctionBind, F, this);
- internals.boundCompare = bc;
+ // Step 4.c.
+ internals.boundCompare = F;
}
- // Step 2.
+ // Step 5.
return internals.boundCompare;
}
_SetCanonicalName(Intl_Collator_compare_get, "get compare");
@@ -403,28 +384,30 @@ _SetCanonicalName(Intl_Collator_compare_get, "get compare");
/**
* Returns the resolved options for a Collator object.
*
- * Spec: ECMAScript Internationalization API Specification, 10.3.3 and 10.4.
+ * Spec: ECMAScript Internationalization API Specification, 10.3.4.
*/
function Intl_Collator_resolvedOptions() {
- // Check "this Collator object" per introduction of section 10.3.
- if (!IsObject(this) || !IsCollator(this))
+ // Step 1.
+ var collator = this;
+
+ // Steps 2-3.
+ if (!IsObject(collator) || !IsCollator(collator))
ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "Collator", "resolvedOptions", "Collator");
- var internals = getCollatorInternals(this);
+ var internals = getCollatorInternals(collator);
+ // Steps 4-5.
var result = {
locale: internals.locale,
usage: internals.usage,
sensitivity: internals.sensitivity,
- ignorePunctuation: internals.ignorePunctuation
+ ignorePunctuation: internals.ignorePunctuation,
+ collation: internals.collation,
+ numeric: internals.numeric,
+ caseFirst: internals.caseFirst,
};
- var relevantExtensionKeys = collatorInternalProperties.relevantExtensionKeys;
- for (var i = 0; i < relevantExtensionKeys.length; i++) {
- var key = relevantExtensionKeys[i];
- var property = (key === "co") ? "collation" : collatorKeyMappings[key].property;
- _DefineDataProperty(result, property, internals[property]);
- }
+ // Step 6.
return result;
}
diff --git a/js/src/builtin/intl/CommonFunctions.js b/js/src/builtin/intl/CommonFunctions.js
index cf5a615721..36b2bec9b2 100644
--- a/js/src/builtin/intl/CommonFunctions.js
+++ b/js/src/builtin/intl/CommonFunctions.js
@@ -14,35 +14,70 @@ function hasOwn(propName, object) {
}
/**
- * Holder object for encapsulating regexp instances.
- *
- * Regular expression instances should be created after the initialization of
- * self-hosted global.
- */
-var internalIntlRegExps = std_Object_create(null);
-internalIntlRegExps.unicodeLocaleExtensionSequenceRE = null;
-internalIntlRegExps.languageTagRE = null;
-internalIntlRegExps.duplicateVariantRE = null;
-internalIntlRegExps.duplicateSingletonRE = null;
-internalIntlRegExps.isWellFormedCurrencyCodeRE = null;
-internalIntlRegExps.currencyDigitsRE = null;
-
-/**
- * Regular expression matching a "Unicode locale extension sequence", which the
+ * Returns the start index of a "Unicode locale extension sequence", which the
* specification defines as: "any substring of a language tag that starts with
* a separator '-' and the singleton 'u' and includes the maximum sequence of
* following non-singleton subtags and their preceding '-' separators."
*
* Alternatively, this may be defined as: the components of a language tag that
- * match the extension production in RFC 5646, where the singleton component is
- * "u".
+ * match the `unicode_locale_extensions` production in UTS 35.
*
* Spec: ECMAScript Internationalization API Specification, 6.2.1.
*/
-function getUnicodeLocaleExtensionSequenceRE() {
- return internalIntlRegExps.unicodeLocaleExtensionSequenceRE ||
- (internalIntlRegExps.unicodeLocaleExtensionSequenceRE =
- RegExpCreate("-u(?:-[a-z0-9]{2,8})+"));
+function startOfUnicodeExtensions(locale) {
+ assert(typeof locale === "string", "locale is a string");
+
+ // Search for "-u-" marking the start of a Unicode extension sequence.
+ var start = callFunction(std_String_indexOf, locale, "-u-");
+ if (start < 0)
+ return -1;
+
+ // And search for "-x-" marking the start of any privateuse component to
+ // handle the case when "-u-" was only found within a privateuse subtag.
+ var privateExt = callFunction(std_String_indexOf, locale, "-x-");
+ if (privateExt >= 0 && privateExt < start)
+ return -1;
+
+ return start;
+}
+
+/**
+ * Returns the end index of a Unicode locale extension sequence.
+ */
+function endOfUnicodeExtensions(locale, start) {
+ assert(typeof locale === "string", "locale is a string");
+ assert(IsStructurallyValidLanguageTag(locale), "locale is a language tag");
+ assert(CanonicalizeLanguageTag(locale) === locale, "locale is a canonicalized language tag");
+ assert(0 <= start && start < locale.length, "start is an index into locale");
+ assert(Substring(locale, start, 3) === "-u-", "start points to Unicode extension sequence");
+
+ #define HYPHEN 0x2D
+ assert(std_String_fromCharCode(HYPHEN) === "-",
+ "code unit constant should match the expected character");
+
+ // Search for the start of the next singleton or privateuse subtag.
+ //
+ // Begin searching after the smallest possible Unicode locale extension
+ // sequence, namely |"-u-" 2alphanum|. End searching once the remaining
+ // characters can't fit the smallest possible singleton or privateuse
+ // subtag, namely |"-x-" alphanum|. Note the reduced end-limit means
+ // indexing inside the loop is always in-range.
+ for (var i = start + 5, end = locale.length - 4; i <= end; i++) {
+ if (callFunction(std_String_charCodeAt, locale, i) !== HYPHEN)
+ continue;
+ if (callFunction(std_String_charCodeAt, locale, i + 2) === HYPHEN)
+ return i;
+
+ // Skip over (i + 1) and (i + 2) because we've just verified they
+ // aren't "-", so the next possible delimiter can only be at (i + 3).
+ i += 2;
+ }
+
+ #undef HYPHEN
+
+ // If no singleton or privateuse subtag was found, the Unicode extension
+ // sequence extends until the end of the string.
+ return locale.length;
}
@@ -50,226 +85,602 @@ function getUnicodeLocaleExtensionSequenceRE() {
* Removes Unicode locale extension sequences from the given language tag.
*/
function removeUnicodeExtensions(locale) {
- // A wholly-privateuse locale has no extension sequences.
- if (callFunction(std_String_startsWith, locale, "x-"))
+ var start = startOfUnicodeExtensions(locale);
+ if (start < 0)
return locale;
- // Otherwise, split on "-x-" marking the start of any privateuse component.
- // Replace Unicode locale extension sequences in the left half, and return
- // the concatenation.
- var pos = callFunction(std_String_indexOf, locale, "-x-");
- if (pos < 0)
- pos = locale.length;
-
- var left = callFunction(String_substring, locale, 0, pos);
- var right = callFunction(String_substring, locale, pos);
-
- var extensions;
- var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE();
- while ((extensions = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, left)) !== null) {
- left = StringReplaceString(left, extensions[0], "");
- unicodeLocaleExtensionSequenceRE.lastIndex = 0;
- }
+ var end = endOfUnicodeExtensions(locale, start);
+ var left = Substring(locale, 0, start);
+ var right = Substring(locale, end, locale.length - end);
var combined = left + right;
- assert(IsStructurallyValidLanguageTag(combined), "recombination produced an invalid language tag");
- assert(function() {
- var uindex = callFunction(std_String_indexOf, combined, "-u-");
- if (uindex < 0)
- return true;
- var xindex = callFunction(std_String_indexOf, combined, "-x-");
- return xindex > 0 && xindex < uindex;
- }(), "recombination failed to remove all Unicode locale extension sequences");
+
+ assert(IsStructurallyValidLanguageTag(combined),
+ "recombination produced an invalid language tag");
+ assert(startOfUnicodeExtensions(combined) < 0,
+ "recombination failed to remove all Unicode locale extension sequences");
return combined;
}
-
/**
- * Regular expression defining BCP 47 language tags.
- *
- * Spec: RFC 5646 section 2.1.
+ * Returns Unicode locale extension sequences from the given language tag.
*/
-function getLanguageTagRE() {
- if (internalIntlRegExps.languageTagRE)
- return internalIntlRegExps.languageTagRE;
-
- // RFC 5234 section B.1
- // ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
- var ALPHA = "[a-zA-Z]";
- // DIGIT = %x30-39
- // ; 0-9
- var DIGIT = "[0-9]";
-
- // RFC 5646 section 2.1
- // alphanum = (ALPHA / DIGIT) ; letters and numbers
- var alphanum = "(?:" + ALPHA + "|" + DIGIT + ")";
- // regular = "art-lojban" ; these tags match the 'langtag'
- // / "cel-gaulish" ; production, but their subtags
- // / "no-bok" ; are not extended language
- // / "no-nyn" ; or variant subtags: their meaning
- // / "zh-guoyu" ; is defined by their registration
- // / "zh-hakka" ; and all of these are deprecated
- // / "zh-min" ; in favor of a more modern
- // / "zh-min-nan" ; subtag or sequence of subtags
- // / "zh-xiang"
- var regular = "(?:art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)";
- // irregular = "en-GB-oed" ; irregular tags do not match
- // / "i-ami" ; the 'langtag' production and
- // / "i-bnn" ; would not otherwise be
- // / "i-default" ; considered 'well-formed'
- // / "i-enochian" ; These tags are all valid,
- // / "i-hak" ; but most are deprecated
- // / "i-klingon" ; in favor of more modern
- // / "i-lux" ; subtags or subtag
- // / "i-mingo" ; combination
- // / "i-navajo"
- // / "i-pwn"
- // / "i-tao"
- // / "i-tay"
- // / "i-tsu"
- // / "sgn-BE-FR"
- // / "sgn-BE-NL"
- // / "sgn-CH-DE"
- var irregular = "(?:en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)";
- // grandfathered = irregular ; non-redundant tags registered
- // / regular ; during the RFC 3066 era
- var grandfathered = "(?:" + irregular + "|" + regular + ")";
- // privateuse = "x" 1*("-" (1*8alphanum))
- var privateuse = "(?:x(?:-[a-z0-9]{1,8})+)";
- // singleton = DIGIT ; 0 - 9
- // / %x41-57 ; A - W
- // / %x59-5A ; Y - Z
- // / %x61-77 ; a - w
- // / %x79-7A ; y - z
- var singleton = "(?:" + DIGIT + "|[A-WY-Za-wy-z])";
- // extension = singleton 1*("-" (2*8alphanum))
- var extension = "(?:" + singleton + "(?:-" + alphanum + "{2,8})+)";
- // variant = 5*8alphanum ; registered variants
- // / (DIGIT 3alphanum)
- var variant = "(?:" + alphanum + "{5,8}|(?:" + DIGIT + alphanum + "{3}))";
- // region = 2ALPHA ; ISO 3166-1 code
- // / 3DIGIT ; UN M.49 code
- var region = "(?:" + ALPHA + "{2}|" + DIGIT + "{3})";
- // script = 4ALPHA ; ISO 15924 code
- var script = "(?:" + ALPHA + "{4})";
- // extlang = 3ALPHA ; selected ISO 639 codes
- // *2("-" 3ALPHA) ; permanently reserved
- var extlang = "(?:" + ALPHA + "{3}(?:-" + ALPHA + "{3}){0,2})";
- // language = 2*3ALPHA ; shortest ISO 639 code
- // ["-" extlang] ; sometimes followed by
- // ; extended language subtags
- // / 4ALPHA ; or reserved for future use
- // / 5*8ALPHA ; or registered language subtag
- var language = "(?:" + ALPHA + "{2,3}(?:-" + extlang + ")?|" + ALPHA + "{4}|" + ALPHA + "{5,8})";
- // langtag = language
- // ["-" script]
- // ["-" region]
- // *("-" variant)
- // *("-" extension)
- // ["-" privateuse]
- var langtag = language + "(?:-" + script + ")?(?:-" + region + ")?(?:-" +
- variant + ")*(?:-" + extension + ")*(?:-" + privateuse + ")?";
- // Language-Tag = langtag ; normal language tags
- // / privateuse ; private use tag
- // / grandfathered ; grandfathered tags
- var languageTag = "^(?:" + langtag + "|" + privateuse + "|" + grandfathered + ")$";
-
- // Language tags are case insensitive (RFC 5646 section 2.1.1).
- return (internalIntlRegExps.languageTagRE = RegExpCreate(languageTag, "i"));
+function getUnicodeExtensions(locale) {
+ var start = startOfUnicodeExtensions(locale);
+ assert(start >= 0, "start of Unicode extension sequence not found");
+ var end = endOfUnicodeExtensions(locale, start);
+
+ return Substring(locale, start, end - start);
}
+// The three possible token type bits. Expressed as #defines to avoid
+// extra named lookups in the interpreter/jits.
+#define NONE 0b00
+#define ALPHA 0b01
+#define DIGIT 0b10
+
+// Constants for code units used below.
+#define HYPHEN 0x2D
+#define DIGIT_ZERO 0x30
+#define DIGIT_NINE 0x39
+#define UPPER_A 0x41
+#define UPPER_Z 0x5A
+#define LOWER_A 0x61
+#define LOWER_T 0x74
+#define LOWER_U 0x75
+#define LOWER_X 0x78
+#define LOWER_Z 0x7A
+
+// The requirement to use callFunction() for method calls makes the parser
+// harder to read. Use macros for the rescue.
+
+// Reads the next token.
+#define NEXT_TOKEN_OR_RETURN_NULL(ts) \
+ if (!callFunction(ts.nextToken, ts)) \
+ return null;
+
+#define NEXT_TOKEN_OR_ASSERT(ts) \
+ if (!callFunction(ts.nextToken, ts)) \
+ assert(false, "unexpected invalid subtag");
-function getDuplicateVariantRE() {
- if (internalIntlRegExps.duplicateVariantRE)
- return internalIntlRegExps.duplicateVariantRE;
-
- // RFC 5234 section B.1
- // ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
- var ALPHA = "[a-zA-Z]";
- // DIGIT = %x30-39
- // ; 0-9
- var DIGIT = "[0-9]";
-
- // RFC 5646 section 2.1
- // alphanum = (ALPHA / DIGIT) ; letters and numbers
- var alphanum = "(?:" + ALPHA + "|" + DIGIT + ")";
- // variant = 5*8alphanum ; registered variants
- // / (DIGIT 3alphanum)
- var variant = "(?:" + alphanum + "{5,8}|(?:" + DIGIT + alphanum + "{3}))";
-
- // Match a langtag that contains a duplicate variant.
- var duplicateVariant =
- // Match everything in a langtag prior to any variants, and maybe some
- // of the variants as well (which makes this pattern inefficient but
- // not wrong, for our purposes);
- "(?:" + alphanum + "{2,8}-)+" +
- // a variant, parenthesised so that we can refer back to it later;
- "(" + variant + ")-" +
- // zero or more subtags at least two characters long (thus stopping
- // before extension and privateuse components);
- "(?:" + alphanum + "{2,8}-)*" +
- // and the same variant again
- "\\1" +
- // ...but not followed by any characters that would turn it into a
- // different subtag.
- "(?!" + alphanum + ")";
-
- // Language tags are case insensitive (RFC 5646 section 2.1.1). Using
- // character classes covering both upper- and lower-case characters nearly
- // addresses this -- but for the possibility of variant repetition with
- // differing case, e.g. "en-variant-Variant". Use a case-insensitive
- // regular expression to address this. (Note that there's no worry about
- // case transformation accepting invalid characters here: users have
- // already verified the string is alphanumeric Latin plus "-".)
- return (internalIntlRegExps.duplicateVariantRE = RegExpCreate(duplicateVariant, "i"));
+// Assigns the current subtag part transformed to lower-case to the target.
+#define SUBTAG_VAR_OR_RETURN_NULL(ts, target) \
+ { \
+ target = Substring(ts.localeLowercase, ts.tokenStart, ts.tokenLength); \
+ NEXT_TOKEN_OR_RETURN_NULL(ts); \
+ }
+
+// Assigns the current subtag part transformed to lower-case to the target.
+#define SUBTAG_VAR_OR_ASSERT(ts, target) \
+ { \
+ target = Substring(ts.localeLowercase, ts.tokenStart, ts.tokenLength); \
+ NEXT_TOKEN_OR_ASSERT(ts) \
+ }
+
+/**
+ * Tokenizer for Unicode BCP 47 locale identifiers.
+ */
+function BCP47TokenStream(locale) {
+ this.locale = locale;
+
+ // Locale identifiers are compared and processed case-insensitively, so
+ // technically it's not necessary to adjust case. But for easier processing,
+ // and because the canonical form for most subtags is lower case, we start
+ // with lower case for all.
+ //
+ // Note that the tokenizer function keeps using the original input string
+ // to properly detect non-ASCII characters. The lower-case string can't be
+ // used to detect those characters, because some non-ASCII characters
+ // lower-case map into ASCII characters, e.g. U+212A (KELVIN SIGN) lower-
+ // case maps to U+006B (LATIN SMALL LETTER K).
+ this.localeLowercase = callFunction(std_String_toLowerCase, locale);
+
+ // Current parse index in |locale|.
+ this.index = 0;
+
+ // The current token type, its start index, and its length.
+ this.token = NONE;
+ this.tokenStart = 0;
+ this.tokenLength = 0;
+
+ assert(std_String_fromCharCode(HYPHEN) === "-" &&
+ std_String_fromCharCode(DIGIT_ZERO) === "0" &&
+ std_String_fromCharCode(DIGIT_NINE) === "9" &&
+ std_String_fromCharCode(UPPER_A) === "A" &&
+ std_String_fromCharCode(UPPER_Z) === "Z" &&
+ std_String_fromCharCode(LOWER_A) === "a" &&
+ std_String_fromCharCode(LOWER_T) === "t" &&
+ std_String_fromCharCode(LOWER_U) === "u" &&
+ std_String_fromCharCode(LOWER_X) === "x" &&
+ std_String_fromCharCode(LOWER_Z) === "z",
+ "code unit constants should match the expected characters");
}
+MakeConstructible(BCP47TokenStream, {
+ __proto__: null,
+
+ // Reads the next token, returns |false| if an illegal character was found,
+ // otherwise returns |true|.
+ //
+ // eslint-disable-next-line object-shorthand
+ nextToken: function() {
+ var type = NONE;
+ var {index, locale} = this;
+ for (var i = index; i < locale.length; i++) {
+ // UTS 35, section 3.1.
+ // alpha = [A-Z a-z] ;
+ // digit = [0-9] ;
+ var c = callFunction(std_String_charCodeAt, locale, i);
+ if ((UPPER_A <= c && c <= UPPER_Z) || (LOWER_A <= c && c <= LOWER_Z))
+ type |= ALPHA;
+ else if (DIGIT_ZERO <= c && c <= DIGIT_NINE)
+ type |= DIGIT;
+ else if (c === HYPHEN && i > index && i + 1 < locale.length)
+ break;
+ else
+ return false;
+ }
+
+ this.token = type;
+ this.tokenStart = index;
+ this.tokenLength = i - index;
+ this.index = i + 1;
+ return true;
+ },
+
+ // Returns true if the character at the requested index within the current
+ // token is a digit.
+ //
+ // eslint-disable-next-line object-shorthand
+ isDigitAt: function(index) {
+ assert(0 <= index && index < this.tokenLength,
+ "must be an index into the current token");
+ var c = callFunction(std_String_charCodeAt, this.localeLowercase, this.tokenStart + index);
+ assert(!(c <= DIGIT_NINE) || c >= DIGIT_ZERO,
+ "token-start-code-unit <= '9' implies token-start-code-unit is in '0'..'9' " +
+ "and because all digits are sorted before any letters");
+ return c <= DIGIT_NINE;
+ },
+
+ // Returns the code unit of the first character at the current token
+ // position. Always returns the lower-case form of an alphabetical
+ // character.
+ //
+ // eslint-disable-next-line object-shorthand
+ singletonKey: function() {
+ assert(this.tokenLength === 1, "token is not a singleton");
+ var c = callFunction(std_String_charCodeAt, this.localeLowercase, this.tokenStart);
+ assert((DIGIT_ZERO <= c && c <= DIGIT_NINE) || (LOWER_A <= c && c <= LOWER_Z),
+ "unexpected code unit");
+ return c;
+ },
+
+ // eslint-disable-next-line object-shorthand
+ singletonValue: function() {
+ var singletonStart = this.tokenStart;
+ var min = callFunction(this.singletonKey, this) === LOWER_X ? 1 : 2;
+
+ NEXT_TOKEN_OR_RETURN_NULL(this);
+
+ // At least one non-singleton subtag must be present.
+ if (!(min <= this.tokenLength && this.tokenLength <= 8))
+ return null;
+ do {
+ NEXT_TOKEN_OR_RETURN_NULL(this);
+ } while (min <= this.tokenLength && this.tokenLength <= 8);
+
+ return callFunction(this.singletonValueAt, this, singletonStart);
+ },
+
+ // eslint-disable-next-line object-shorthand
+ singletonValueAt: function(start) {
+ // Singletons must be followed by a non-singleton subtag, "en-a-b" is not allowed.
+ var length = this.tokenStart - 1 - start;
+ if (length <= 2)
+ return null;
+ return Substring(this.localeLowercase, start, length);
+ }
+});
+
+/* eslint-disable complexity */
+/**
+ * Parser for Unicode BCP 47 locale identifiers.
+ *
+ * Returns null if |locale| can't be parsed as a `unicode_locale_id`. If the
+ * input is a grandfathered language tag, it is directly canonicalized to its
+ * modern form. The returned object has the following structure:
+ *
+ * {
+ * language: `unicode_language_subtag`,
+ * script: `unicode_script_subtag` / undefined,
+ * region: `unicode_region_subtag` / undefined,
+ * variants: array of `unicode_variant_subtag`,
+ * extensions: array of `extensions`,
+ * privateuse: `pu_extensions` / undefined,
+ * }
+ *
+ * All locale identifier subtags are returned in their normalized case:
+ *
+ * var langtag = parseLanguageTag("en-latn-us");
+ * assertEq("en", langtag.language);
+ * assertEq("Latn", langtag.script);
+ * assertEq("US", langtag.region);
+ *
+ * Spec: https://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers
+ */
+function parseLanguageTag(locale) {
+ assert(typeof locale === "string", "locale is a string");
+
+ // unicode_locale_id = unicode_language_id
+ // extensions*
+ // pu_extensions? ;
+ var ts = new BCP47TokenStream(locale);
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+ var language, script, region, privateuse;
+ var variants = [];
+ var extensions = [];
+
+ // unicode_language_id = unicode_language_subtag
+ // (sep unicode_script_subtag)?
+ // (sep unicode_region_subtag)?
+ // (sep unicode_variant_subtag)* ;
+ //
+ // sep = "-"
+ //
+ // Note: Unicode CLDR locale identifier backward compatibility extensions
+ // removed from `unicode_language_id`.
+
+ // unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
+ if (ts.token !== ALPHA || ts.tokenLength === 1 || ts.tokenLength === 4 || ts.tokenLength > 8) {
+ // Four character language subtags are not allowed in Unicode BCP 47
+ // locale identifiers. Also see the comparison to Unicode CLDR locale
+ // identifiers in <https://unicode.org/reports/tr35/#BCP_47_Conformance>.
+ return null;
+ }
+ assert((2 <= ts.tokenLength && ts.tokenLength <= 3) ||
+ (5 <= ts.tokenLength && ts.tokenLength <= 8),
+ "language subtags have 2-3 or 5-8 letters");
+
+ SUBTAG_VAR_OR_RETURN_NULL(ts, language);
+
+ // unicode_script_subtag = alpha{4} ;
+ if (ts.tokenLength === 4 && ts.token === ALPHA) {
+ SUBTAG_VAR_OR_RETURN_NULL(ts, script);
-function getDuplicateSingletonRE() {
- if (internalIntlRegExps.duplicateSingletonRE)
- return internalIntlRegExps.duplicateSingletonRE;
-
- // RFC 5234 section B.1
- // ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
- var ALPHA = "[a-zA-Z]";
- // DIGIT = %x30-39
- // ; 0-9
- var DIGIT = "[0-9]";
-
- // RFC 5646 section 2.1
- // alphanum = (ALPHA / DIGIT) ; letters and numbers
- var alphanum = "(?:" + ALPHA + "|" + DIGIT + ")";
- // singleton = DIGIT ; 0 - 9
- // / %x41-57 ; A - W
- // / %x59-5A ; Y - Z
- // / %x61-77 ; a - w
- // / %x79-7A ; y - z
- var singleton = "(?:" + DIGIT + "|[A-WY-Za-wy-z])";
-
- // Match a langtag that contains a duplicate singleton.
- var duplicateSingleton =
- // Match a singleton subtag, parenthesised so that we can refer back to
- // it later;
- "-(" + singleton + ")-" +
- // then zero or more subtags;
- "(?:" + alphanum + "+-)*" +
- // and the same singleton again
- "\\1" +
- // ...but not followed by any characters that would turn it into a
- // different subtag.
- "(?!" + alphanum + ")";
-
- // Language tags are case insensitive (RFC 5646 section 2.1.1). Using
- // character classes covering both upper- and lower-case characters nearly
- // addresses this -- but for the possibility of singleton repetition with
- // differing case, e.g. "en-u-foo-U-foo". Use a case-insensitive regular
- // expression to address this. (Note that there's no worry about case
- // transformation accepting invalid characters here: users have already
- // verified the string is alphanumeric Latin plus "-".)
- return (internalIntlRegExps.duplicateSingletonRE = RegExpCreate(duplicateSingleton, "i"));
+ // The first character of a script code needs to be capitalized.
+ // "hans" -> "Hans"
+ script = callFunction(std_String_toUpperCase, script[0]) +
+ Substring(script, 1, script.length - 1);
+ }
+
+ // unicode_region_subtag = (alpha{2} | digit{3}) ;
+ if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
+ (ts.tokenLength === 3 && ts.token === DIGIT))
+ {
+ SUBTAG_VAR_OR_RETURN_NULL(ts, region);
+
+ // Region codes need to be in upper-case. "bu" -> "BU"
+ region = callFunction(std_String_toUpperCase, region);
+ }
+
+ // unicode_variant_subtag = (alphanum{5,8}
+ // | digit alphanum{3}) ;
+ //
+ // alphanum = [0-9 A-Z a-z] ;
+ while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
+ (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
+ {
+ // Locale identifiers are case insensitive (UTS 35, section 3.2).
+ // All seen variants are compared ignoring case differences by
+ // using the lower-case form. This allows to properly detect and
+ // reject variant repetitions with differing case, e.g.
+ // "en-variant-Variant".
+ var variant;
+ SUBTAG_VAR_OR_RETURN_NULL(ts, variant);
+
+ // Reject the Locale identifier if a duplicate variant was found.
+ //
+ // This linear-time verification step means the whole variant
+ // subtag checking is potentially quadratic, but we're okay doing
+ // that because language tags are unlikely to be deliberately
+ // pathological.
+ if (callFunction(ArrayIndexOf, variants, variant) !== -1)
+ return null;
+ _DefineDataProperty(variants, variants.length, variant);
+ }
+
+ // extensions = unicode_locale_extensions
+ // | transformed_extensions
+ // | other_extensions ;
+ //
+ // unicode_locale_extensions = sep [uU]
+ // ((sep keyword)+
+ // |(sep attribute)+ (sep keyword)*) ;
+ //
+ // transformed_extensions = sep [tT]
+ // ((sep tlang (sep tfield)*)
+ // |(sep tfield)+) ;
+ //
+ // other_extensions = [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
+ //
+ // keyword = key (sep type)? ;
+ //
+ // key = alphanum alpha ;
+ //
+ // type = alphanum{3,8} (sep alphanum{3,8})* ;
+ //
+ // attribute = alphanum{3,8} ;
+ //
+ // tlang = unicode_language_subtag
+ // (sep unicode_script_subtag)?
+ // (sep unicode_region_subtag)?
+ // (sep unicode_variant_subtag)* ;
+ //
+ // tfield = tkey tvalue;
+ //
+ // tkey = alpha digit ;
+ //
+ // tvalue = (sep alphanum{3,8})+ ;
+ var seenSingletons = [];
+ while (ts.tokenLength === 1) {
+ var singleton = callFunction(ts.singletonKey, ts);
+ if (singleton === LOWER_X)
+ break;
+
+ // Locale identifiers are case insensitive (UTS 35, section 3.2).
+ // Ensure |singletonKey()| does not return the code unit of an
+ // upper-case character, so we can properly detect and reject
+ // singletons with different case, e.g. "en-u-foo-U-foo".
+ assert(!(UPPER_A <= singleton && singleton <= UPPER_Z),
+ "unexpected upper-case code unit");
+
+ // Reject the input if a duplicate singleton was found.
+ //
+ // Similar to the variant validation step this check is O(n**2),
+ // but given that there are only 35 possible singletons the
+ // quadratic runtime is negligible.
+ if (callFunction(ArrayIndexOf, seenSingletons, singleton) !== -1)
+ return null;
+ _DefineDataProperty(seenSingletons, seenSingletons.length, singleton);
+
+ var extension;
+ if (singleton === LOWER_U) {
+ var extensionStart = ts.tokenStart;
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+ while (2 <= ts.tokenLength && ts.tokenLength <= 8) {
+ // `key` doesn't allow a digit as its second character.
+ if (ts.tokenLength === 2 && callFunction(ts.isDigitAt, ts, 1))
+ return null;
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+ }
+ extension = callFunction(ts.singletonValueAt, ts, extensionStart);
+ } else if (singleton === LOWER_T) {
+ var extensionStart = ts.tokenStart;
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+ // `tfield` starts with `tkey`, which in turn is `alpha digit`, so
+ // an alpha-only token must be a `tlang`.
+ if (ts.token === ALPHA) {
+ // `unicode_language_subtag`
+ if (ts.tokenLength === 1 || ts.tokenLength === 4 || ts.tokenLength > 8)
+ return null;
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+ // `unicode_script_subtag` (optional)
+ if (ts.tokenLength === 4 && ts.token === ALPHA) {
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+ }
+
+ // `unicode_region_subtag` (optional)
+ if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
+ (ts.tokenLength === 3 && ts.token === DIGIT))
+ {
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+ }
+
+ // `unicode_variant_subtag` (optional)
+ while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
+ (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
+ {
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+ }
+ }
+
+ // Trailing `tfield` subtags.
+ while (ts.tokenLength === 2) {
+ // `tkey` is `alpha digit`.
+ if (callFunction(ts.isDigitAt, ts, 0) ||
+ !callFunction(ts.isDigitAt, ts, 1))
+ {
+ return null;
+ }
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+ // `tfield` requires at least one `tvalue`.
+ if (!(3 <= ts.tokenLength && ts.tokenLength <= 8))
+ return null;
+ do {
+ NEXT_TOKEN_OR_RETURN_NULL(ts);
+ } while (3 <= ts.tokenLength && ts.tokenLength <= 8);
+ }
+ extension = callFunction(ts.singletonValueAt, ts, extensionStart);
+ } else {
+ extension = callFunction(ts.singletonValue, ts);
+ }
+ if (!extension)
+ return null;
+
+ _DefineDataProperty(extensions, extensions.length, extension);
+ }
+
+ // Trailing pu_extensions component of the unicode_locale_id production.
+ //
+ // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
+ if (ts.tokenLength === 1 && callFunction(ts.singletonKey, ts) === LOWER_X) {
+ privateuse = callFunction(ts.singletonValue, ts);
+ if (!privateuse)
+ return null;
+ }
+
+ // Reject the input if it couldn't be parsed completely.
+ if (ts.token !== NONE)
+ return null;
+
+ var tagObj = {
+ language,
+ script,
+ region,
+ variants,
+ extensions,
+ privateuse,
+ };
+
+ // Handle grandfathered tags right away, so we don't need to have extra
+ // paths for grandfathered tags later on.
+ //
+ // grandfathered = "art-lojban" ; non-redundant tags registered
+ // / "cel-gaulish" ; during the RFC 3066 era
+ // / "zh-guoyu" ; these tags match the 'langtag'
+ // / "zh-hakka" ; production, but their subtags
+ // / "zh-xiang" ; are not extended language
+ // ; or variant subtags: their meaning
+ // ; is defined by their registration
+ // ; and all of these are deprecated
+ // ; in favor of a more modern
+ // ; subtag or sequence of subtags
+ if (hasOwn(ts.localeLowercase, grandfatheredMappings))
+ updateGrandfatheredMappings(tagObj);
+
+ // Return if the complete input was successfully parsed.
+ return tagObj;
}
+/**
+ * Return the locale and fields components of the given valid Transform
+ * extension subtag.
+ */
+function TransformExtensionComponents(extension) {
+ assert(typeof extension === "string", "extension is a String value");
+ assert(callFunction(std_String_startsWith, extension, "t-"),
+ "extension starts with 't-'");
+
+ var ts = new BCP47TokenStream(Substring(extension, 2, extension.length - 2));
+ NEXT_TOKEN_OR_ASSERT(ts);
+
+ // `tfield` starts with `tkey`, which in turn is `alpha digit`, so
+ // an alpha-only token must be a `tlang`.
+ var localeObj;
+ if (ts.token === ALPHA) {
+ // `unicode_language_subtag`
+ assert((2 <= ts.tokenLength && ts.tokenLength <= 3) ||
+ (5 <= ts.tokenLength && ts.tokenLength <= 8),
+ "language subtags have 2-3 or 5-8 letters");
+
+ var language;
+ SUBTAG_VAR_OR_ASSERT(ts, language);
+
+ // unicode_script_subtag = alpha{4} ;
+ var script;
+ if (ts.tokenLength === 4 && ts.token === ALPHA) {
+ SUBTAG_VAR_OR_ASSERT(ts, script);
+
+ // The first character of a script code needs to be capitalized.
+ // "hans" -> "Hans"
+ script = callFunction(std_String_toUpperCase, script[0]) +
+ Substring(script, 1, script.length - 1);
+ }
+
+ // unicode_region_subtag = (alpha{2} | digit{3}) ;
+ var region;
+ if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
+ (ts.tokenLength === 3 && ts.token === DIGIT))
+ {
+ SUBTAG_VAR_OR_ASSERT(ts, region);
+
+ // Region codes need to be in upper-case. "bu" -> "BU"
+ region = callFunction(std_String_toUpperCase, region);
+ }
+
+ // unicode_variant_subtag = (alphanum{5,8}
+ // | digit alphanum{3}) ;
+ //
+ // alphanum = [0-9 A-Z a-z] ;
+ var variants = [];
+ while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
+ (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
+ {
+ var variant;
+ SUBTAG_VAR_OR_ASSERT(ts, variant);
+
+ _DefineDataProperty(variants, variants.length, variant);
+ }
+
+ localeObj = {
+ language,
+ script,
+ region,
+ variants,
+ extensions: [],
+ privateuse: undefined,
+ };
+ }
+
+ // Trailing `tfield` subtags. (Any other trailing subtags are an error,
+ // because we're guaranteed to only see a valid tranform extension here.)
+ var fields = [];
+ while (ts.tokenLength === 2) {
+ // `tkey` is `alpha digit`.
+ assert(!callFunction(ts.isDigitAt, ts, 0) && callFunction(ts.isDigitAt, ts, 1),
+ "unexpected invalid tkey subtag");
+
+ var key;
+ SUBTAG_VAR_OR_ASSERT(ts, key);
+
+ // `tfield` requires at least one `tvalue`.
+ assert(3 <= ts.tokenLength && ts.tokenLength <= 8,
+ "unexpected invalid tvalue subtag");
+
+ var value;
+ SUBTAG_VAR_OR_ASSERT(ts, value);
+
+ while (3 <= ts.tokenLength && ts.tokenLength <= 8) {
+ var part;
+ SUBTAG_VAR_OR_ASSERT(ts, part);
+ value += "-" + part;
+ }
+
+ _DefineDataProperty(fields, fields.length, {key, value});
+ }
+
+ assert(ts.token === NONE,
+ "unexpected trailing characters in promised-to-be-valid transform extension");
+
+ return {locale: localeObj, fields};
+}
+/* eslint-enable complexity */
+
+#undef NONE
+#undef ALPHA
+#undef DIGIT
+
+#undef HYPHEN
+#undef DIGIT_ZERO
+#undef DIGIT_NINE
+#undef UPPER_A
+#undef UPPER_Z
+#undef LOWER_A
+#undef LOWER_T
+#undef LOWER_U
+#undef LOWER_X
+#undef LOWER_Z
+
+#undef SUBTAG_VAR_OR_ASSERT
+#undef SUBTAG_VAR_OR_RETURN_NULL
+#undef NEXT_TOKEN_OR_ASSERT
+#undef NEXT_TOKEN_OR_RETURN_NULL
/**
* Verifies that the given string is a well-formed BCP 47 language tag
@@ -278,53 +689,369 @@ function getDuplicateSingletonRE() {
* Spec: ECMAScript Internationalization API Specification, 6.2.2.
*/
function IsStructurallyValidLanguageTag(locale) {
- assert(typeof locale === "string", "IsStructurallyValidLanguageTag");
- var languageTagRE = getLanguageTagRE();
- if (!regexp_test_no_statics(languageTagRE, locale))
- return false;
-
- // Before checking for duplicate variant or singleton subtags with
- // regular expressions, we have to get private use subtag sequences
- // out of the picture.
- if (callFunction(std_String_startsWith, locale, "x-"))
- return true;
- var pos = callFunction(std_String_indexOf, locale, "-x-");
- if (pos !== -1)
- locale = callFunction(String_substring, locale, 0, pos);
-
- // Check for duplicate variant or singleton subtags.
- var duplicateVariantRE = getDuplicateVariantRE();
- var duplicateSingletonRE = getDuplicateSingletonRE();
- return !regexp_test_no_statics(duplicateVariantRE, locale) &&
- !regexp_test_no_statics(duplicateSingletonRE, locale);
+ return parseLanguageTag(locale) !== null;
}
/**
- * Joins the array elements in the given range with the supplied separator.
+ * Canonicalizes the given structurally valid Unicode BCP 47 locale identifier,
+ * including regularized case of subtags. For example, the language tag
+ * Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
+ *
+ * Zh ; 2*3ALPHA
+ * -haNS ; ["-" script]
+ * -bu ; ["-" region]
+ * -variant2 ; *("-" variant)
+ * -Variant1
+ * -u-ca-chinese ; *("-" extension)
+ * -t-Zh-laTN
+ * -x-PRIVATE ; ["-" privateuse]
+ *
+ * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
+ *
+ * UTS 35 specifies two different canonicalization algorithms. There's one to
+ * canonicalize BCP 47 language tags and other one to canonicalize Unicode
+ * locale identifiers. The latter one wasn't present when ECMA-402 was changed
+ * to use Unicode BCP 47 locale identifiers instead of BCP 47 language tags, so
+ * ECMA-402 currently only uses the former to canonicalize Unicode BCP 47 locale
+ * identifiers.
+ *
+ * Spec: ECMAScript Internationalization API Specification, 6.2.3.
+ * Spec: https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
+ * Spec: https://unicode.org/reports/tr35/#BCP_47_Language_Tag_Conversion
*/
-function ArrayJoinRange(array, separator, from, to = array.length) {
- assert(typeof separator === "string", "|separator| is a string value");
- assert(typeof from === "number", "|from| is a number value");
- assert(typeof to === "number", "|to| is a number value");
- assert(0 <= from && from <= to && to <= array.length, "|from| and |to| form a valid range");
+function CanonicalizeLanguageTagObject(localeObj) {
+ assert(IsObject(localeObj), "CanonicalizeLanguageTagObject");
- if (from === to)
- return "";
+ // Per UTS 35, 3.3.1, the very first step is to canonicalize the syntax by
+ // normalizing the case and ordering all subtags. The canonical syntax form
+ // itself is specified in UTS 35, 3.2.1.
+
+ // The parser already normalized the case for all subtags.
- var result = array[from];
- for (var i = from + 1; i < to; i++) {
- result += separator + array[i];
+#ifdef DEBUG
+ function IsLowerCase(s) {
+ return s === callFunction(std_String_toLowerCase, s);
}
- return result;
+ function IsUpperCase(s) {
+ return s === callFunction(std_String_toUpperCase, s);
+ }
+ function IsTitleCase(s) {
+ assert(s.length > 0, "unexpected empy string");
+ var r = callFunction(std_String_toUpperCase, s[0]) +
+ callFunction(std_String_toLowerCase, Substring(s, 1, s.length - 1));
+ return s === r;
+ }
+#endif
+
+ // 1. Any script subtag is in title case.
+ assert(localeObj.script === undefined || IsTitleCase(localeObj.script),
+ "If present, script subtag is in title case");
+
+ // 2. Any region subtag is in uppercase.
+ assert(localeObj.region === undefined || IsUpperCase(localeObj.region),
+ "If present, region subtag is in upper case");
+
+ // 3. All other subtags are in lowercase.
+ assert(IsLowerCase(localeObj.language),
+ "language subtag is in lower case");
+ assert(callFunction(ArrayEvery, localeObj.variants, IsLowerCase),
+ "variant subtags are in lower case");
+ assert(callFunction(ArrayEvery, localeObj.extensions, IsLowerCase),
+ "extension subtags are in lower case");
+ assert(localeObj.privateuse === undefined || IsLowerCase(localeObj.privateuse),
+ "If present, privateuse subtag is in lower case");
+
+
+ // The second step in UTS 35, 3.2.1, is to order all subtags.
+
+ // 1. Any variants are in alphabetical order.
+ var variants = localeObj.variants;
+ if (variants.length > 0) {
+ callFunction(ArraySort, variants);
+ }
+
+ // 2. Any extensions are in alphabetical order by their singleton.
+ var extensions = localeObj.extensions;
+ if (extensions.length > 0) {
+ // Extension sequences are sorted by their singleton characters.
+ // "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese"
+ callFunction(ArraySort, extensions);
+
+ // The last three bullet points in UTS 35, 3.2.1 apply only to Unicode and Transform
+ // extensions.
+ //
+ // 3. All attributes are sorted in alphabetical order.
+ //
+ // 4. All keywords and tfields are sorted by alphabetical order of their
+ // keys, within their respective extensions.
+ //
+ // 5. Any type or tfield value "true" is removed.
+
+ for (var i = 0; i < extensions.length; i++) {
+ var ext = extensions[i];
+ assert(IsLowerCase(ext),
+ "extension subtags must be in lower-case");
+ assert(ext[1] === "-",
+ "extension subtags start with a singleton");
+
+ // Canonicalize Unicode locale extension subtag if present.
+ if (ext[0] === "u") {
+ var {attributes, keywords} = UnicodeExtensionComponents(ext);
+ extensions[i] = CanonicalizeUnicodeExtension(attributes, keywords);
+ }
+
+ // Canonicalize Unicode BCP 47 T extension if present.
+ if (ext[0] === "t") {
+ var {locale, fields} = TransformExtensionComponents(ext);
+ extensions[i] = CanonicalizeTransformExtension(locale, fields);
+ }
+ }
+ }
+
+ // The next two steps in 3.3.1 replace deprecated language and region
+ // subtags with their preferred mappings.
+ updateLocaleIdMappings(localeObj);
+
+ // The two final steps in 3.3.1, handling irregular grandfathered and
+ // private-use only language tags, don't apply, because these two forms
+ // can't occur in Unicode BCP 47 locale identifiers.
+}
+
+/**
+ * Intl.Locale proposal
+ *
+ * UnicodeExtensionComponents( extension )
+ *
+ * Returns the components of |extension| where |extension| is a "Unicode locale
+ * extension sequence" (ECMA-402, 6.2.1) without the starting separator
+ * character.
+ */
+function UnicodeExtensionComponents(extension) {
+ assert(typeof extension === "string", "extension is a String value");
+
+ // Step 1.
+ var attributes = [];
+
+ // Step 2.
+ var keywords = [];
+
+ // Step 3.
+ var isKeyword = false;
+
+ // Step 4.
+ var size = extension.length;
+
+ // Step 5.
+ // |extension| starts with "u-" instead of "-u-" in our implementation, so
+ // we need to initialize |k| with 2 instead of 3.
+ assert(callFunction(std_String_startsWith, extension, "u-"),
+ "extension starts with 'u-'");
+ var k = 2;
+
+ // Step 6.
+ var key, value;
+ while (k < size) {
+ // Step 6.a.
+ var e = callFunction(std_String_indexOf, extension, "-", k);
+
+ // Step 6.b.
+ var len = (e < 0 ? size : e) - k;
+
+ // Step 6.c.
+ var subtag = Substring(extension, k, len);
+
+ // Steps 6.d-e.
+ if (!isKeyword) {
+ // Step 6.d.
+ // NB: Duplicates are handled elsewhere in our implementation.
+ if (len !== 2)
+ _DefineDataProperty(attributes, attributes.length, subtag);
+ } else {
+ // Steps 6.e.i-ii.
+ if (len === 2) {
+ // Step 6.e.i.1.
+ // NB: Duplicates are handled elsewhere in our implementation.
+ _DefineDataProperty(keywords, keywords.length, {key, value});
+ } else {
+ // Step 6.e.ii.1.
+ if (value !== "")
+ value += "-";
+
+ // Step 6.e.ii.2.
+ value += subtag;
+ }
+ }
+
+ // Step 6.f.
+ if (len === 2) {
+ // Step 6.f.i.
+ isKeyword = true;
+
+ // Step 6.f.ii.
+ key = subtag;
+
+ // Step 6.f.iii.
+ value = "";
+ }
+
+ // Step 6.g.
+ k += len + 1;
+ }
+
+ // Step 7.
+ if (isKeyword) {
+ // Step 7.a.
+ // NB: Duplicates are handled elsewhere in our implementation.
+ _DefineDataProperty(keywords, keywords.length, {key, value});
+ }
+
+ // Step 8.
+ return {attributes, keywords};
+}
+
+/**
+ * CanonicalizeUnicodeExtension( attributes, keywords )
+ *
+ * Canonical syntax per <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
+ *
+ * - All attributes and keywords are in lowercase.
+ * - Note: The parser already converted keywords to lowercase.
+ * - All attributes are sorted in alphabetical order.
+ * - All keywords are sorted by alphabetical order of their keys.
+ * - Any type value "true" is removed.
+ *
+ * Canonical form:
+ * - All keys and types use the canonical form (from the name attribute;
+ * see Section 3.6.4 U Extension Data Files).
+ */
+function CanonicalizeUnicodeExtension(attributes, keywords) {
+ assert(attributes.length > 0 || keywords.length > 0,
+ "unexpected empty Unicode locale extension components");
+
+ // All attributes are sorted in alphabetical order.
+ if (attributes.length > 1)
+ callFunction(ArraySort, attributes);
+
+ // All keywords are sorted by alphabetical order of keys.
+ if (keywords.length > 1) {
+ function UnicodeKeySort(left, right) {
+ var leftKey = left.key;
+ var rightKey = right.key;
+ assert(leftKey.length === 2, "left key is a Unicode key");
+ assert(rightKey.length === 2, "right key is a Unicode key");
+
+ // Compare both strings using charCodeAt(), because relational
+ // string comparison always calls into the VM, whereas charCodeAt
+ // can be inlined by Ion.
+ var diff = callFunction(std_String_charCodeAt, leftKey, 0) -
+ callFunction(std_String_charCodeAt, rightKey, 0);
+ if (diff === 0) {
+ diff = callFunction(std_String_charCodeAt, leftKey, 1) -
+ callFunction(std_String_charCodeAt, rightKey, 1);
+ }
+ return diff;
+ }
+
+ callFunction(ArraySort, keywords, UnicodeKeySort);
+ }
+
+ var extension = "u";
+
+ // Append all attributes.
+ for (var i = 0; i < attributes.length; i++) {
+ extension += "-" + attributes[i];
+ }
+
+ // Append all keywords.
+ for (var i = 0; i < keywords.length; i++) {
+ var {key, value} = keywords[i];
+ extension += "-" + key;
+
+ // Type value "true" is removed.
+ if (value !== "" && value !== "true")
+ extension += "-" + value;
+ }
+
+ return extension;
+}
+
+/**
+ * CanonicalizeTransformExtension
+ *
+ * Canonical form per <https://unicode.org/reports/tr35/#BCP47_T_Extension>:
+ *
+ * - These subtags are all in lowercase (that is the canonical casing for these
+ * subtags), [...].
+ *
+ * And per <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
+ *
+ * - All keywords and tfields are sorted by alphabetical order of their keys,
+ * within their respective extensions.
+ */
+function CanonicalizeTransformExtension(localeObj, fields) {
+ assert(localeObj !== undefined || fields.length > 0,
+ "unexpected empty Transform locale extension components");
+
+ if (fields.length > 0) {
+ function TransformKeySort(left, right) {
+ var leftKey = left.key;
+ var rightKey = right.key;
+ assert(leftKey.length === 2, "left key is a Transform key");
+ assert(rightKey.length === 2, "right key is a Transform key");
+
+ // Compare both strings using charCodeAt(), because relational
+ // string comparison always calls into the VM, whereas charCodeAt
+ // can be inlined by Ion.
+ var diff = callFunction(std_String_charCodeAt, leftKey, 0) -
+ callFunction(std_String_charCodeAt, rightKey, 0);
+ if (diff === 0) {
+ diff = callFunction(std_String_charCodeAt, leftKey, 1) -
+ callFunction(std_String_charCodeAt, rightKey, 1);
+ }
+ return diff;
+ }
+
+ callFunction(ArraySort, fields, TransformKeySort);
+ }
+
+ var extension = "t";
+
+ // Append the language subtag if present.
+ if (localeObj !== undefined) {
+ // [1] is a bit unclear whether or not the `tlang` subtag also needs
+ // to be canonicalized (and case-adjusted). For now simply append it as
+ // is and change it to all lower-case. If we switch to [2], the `tlang`
+ // subtag also needs to be canonicalized according to the same rules as
+ // `unicode_language_id` subtags are canonicalized. Also see [3].
+ //
+ // [1] https://unicode.org/reports/tr35/#Language_Tag_to_Locale_Identifier
+ // [2] https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
+ // [3] https://github.com/tc39/ecma402/issues/330
+ var localeStr = StringFromLanguageTagObject(localeObj);
+ extension += "-" + callFunction(std_String_toLowerCase, localeStr);
+ }
+
+ // Append all fields.
+ for (var i = 0; i < fields.length; i++) {
+ // UTS 35, 3.2.1 specifies:
+ // - Any type or tfield value "true" is removed.
+ //
+ // But the `tvalue` subtag is mandatory in `tfield: tkey tvalue`, so
+ // ignore this apparently invalid part of the UTS 35 specification and
+ // simply append all `tfield` subtags.
+ var {key, value} = fields[i];
+ extension += "-" + key + "-" + value;
+ }
+
+ return extension;
}
/**
* Canonicalizes the given structurally valid BCP 47 language tag, including
* regularized case of subtags. For example, the language tag
- * Zh-NAN-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
+ * Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
*
* Zh ; 2*3ALPHA
- * -NAN ; ["-" extlang]
* -haNS ; ["-" script]
* -bu ; ["-" region]
* -variant2 ; *("-" variant)
@@ -333,120 +1060,54 @@ function ArrayJoinRange(array, separator, from, to = array.length) {
* -t-Zh-laTN
* -x-PRIVATE ; ["-" privateuse]
*
- * becomes nan-Hans-mm-variant2-variant1-t-zh-latn-u-ca-chinese-x-private
+ * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
*
* Spec: ECMAScript Internationalization API Specification, 6.2.3.
- * Spec: RFC 5646, section 4.5.
*/
function CanonicalizeLanguageTag(locale) {
- assert(IsStructurallyValidLanguageTag(locale), "CanonicalizeLanguageTag");
+ var localeObj = parseLanguageTag(locale);
+ assert(localeObj !== null, "CanonicalizeLanguageTag");
- // The input
- // "Zh-NAN-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE"
- // will be used throughout this method to illustrate how it works.
+ CanonicalizeLanguageTagObject(localeObj);
- // Language tags are compared and processed case-insensitively, so
- // technically it's not necessary to adjust case. But for easier processing,
- // and because the canonical form for most subtags is lower case, we start
- // with lower case for all.
- // "Zh-NAN-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE" ->
- // "zh-nan-hans-bu-variant2-variant1-u-ca-chinese-t-zh-latn-x-private"
- locale = callFunction(std_String_toLowerCase, locale);
-
- // Handle mappings for complete tags.
- if (hasOwn(locale, langTagMappings))
- return langTagMappings[locale];
-
- var subtags = StringSplitString(ToString(locale), "-");
- var i = 0;
-
- // Handle the standard part: All subtags before the first singleton or "x".
- // "zh-nan-hans-bu-variant2-variant1"
- while (i < subtags.length) {
- var subtag = subtags[i];
-
- // If we reach the start of an extension sequence or private use part,
- // we're done with this loop. We have to check for i > 0 because for
- // irregular language tags, such as i-klingon, the single-character
- // subtag "i" is not the start of an extension sequence.
- // In the example, we break at "u".
- if (subtag.length === 1 && (i > 0 || subtag === "x"))
- break;
+ return StringFromLanguageTagObject(localeObj);
+}
- if (i !== 0) {
- if (subtag.length === 4) {
- // 4-character subtags that are not in initial position are
- // script codes; their first character needs to be capitalized.
- // "hans" -> "Hans"
- subtag = callFunction(std_String_toUpperCase, subtag[0]) +
- callFunction(String_substring, subtag, 1);
- } else if (subtag.length === 2) {
- // 2-character subtags that are not in initial position are
- // region codes; they need to be upper case. "bu" -> "BU"
- subtag = callFunction(std_String_toUpperCase, subtag);
- }
- }
- if (hasOwn(subtag, langSubtagMappings)) {
- // Replace deprecated subtags with their preferred values.
- // "BU" -> "MM"
- // This has to come after we capitalize region codes because
- // otherwise some language and region codes could be confused.
- // For example, "in" is an obsolete language code for Indonesian,
- // but "IN" is the country code for India.
- // Note that the script generating langSubtagMappings makes sure
- // that no regular subtag mapping will replace an extlang code.
- subtag = langSubtagMappings[subtag];
- } else if (hasOwn(subtag, extlangMappings)) {
- // Replace deprecated extlang subtags with their preferred values,
- // and remove the preceding subtag if it's a redundant prefix.
- // "zh-nan" -> "nan"
- // Note that the script generating extlangMappings makes sure that
- // no extlang mapping will replace a normal language code.
- subtag = extlangMappings[subtag].preferred;
- if (i === 1 && extlangMappings[subtag].prefix === subtags[0]) {
- callFunction(std_Array_shift, subtags);
- i--;
- }
- }
- subtags[i] = subtag;
- i++;
- }
- var normal = ArrayJoinRange(subtags, "-", 0, i);
-
- // Extension sequences are sorted by their singleton characters.
- // "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese"
- var extensions = new List();
- while (i < subtags.length && subtags[i] !== "x") {
- var extensionStart = i;
- i++;
- while (i < subtags.length && subtags[i].length > 1)
- i++;
- var extension = ArrayJoinRange(subtags, "-", extensionStart, i);
- callFunction(std_Array_push, extensions, extension);
- }
- callFunction(std_Array_sort, extensions);
+/**
+ * Returns the string representation of the given language tag object.
+ */
+function StringFromLanguageTagObject(localeObj) {
+ assert(IsObject(localeObj), "StringFromLanguageTagObject");
+
+ var {
+ language,
+ script,
+ region,
+ variants,
+ extensions,
+ privateuse,
+ } = localeObj;
- // Private use sequences are left as is. "x-private"
- var privateUse = "";
- if (i < subtags.length)
- privateUse = ArrayJoinRange(subtags, "-", i);
+ var canonical = language;
+
+ if (script !== undefined)
+ canonical += "-" + script;
+
+ if (region !== undefined)
+ canonical += "-" + region;
+
+ if (variants.length > 0)
+ canonical += "-" + callFunction(std_Array_join, variants, "-");
- // Put everything back together.
- var canonical = normal;
if (extensions.length > 0)
canonical += "-" + callFunction(std_Array_join, extensions, "-");
- if (privateUse.length > 0) {
- // Be careful of a Language-Tag that is entirely privateuse.
- if (canonical.length > 0)
- canonical += "-" + privateUse;
- else
- canonical = privateUse;
- }
+
+ if (privateuse !== undefined)
+ canonical += "-" + privateuse;
return canonical;
}
-
/**
* Returns true if the input contains only ASCII alphabetical characters.
*/
@@ -469,13 +1130,11 @@ function ValidateAndCanonicalizeLanguageTag(locale) {
assert(typeof locale === "string", "ValidateAndCanonicalizeLanguageTag");
// Handle the common case (a standalone language) first.
- // Only the following BCP47 subset is accepted:
- // Language-Tag = langtag
- // langtag = language
- // language = 2*3ALPHA ; shortest ISO 639 code
- // For three character long strings we need to make sure it's not a
- // private use only language tag, for example "x-x".
- if (locale.length === 2 || (locale.length === 3 && locale[1] !== "-")) {
+ // Only the following Unicode BCP 47 locale identifier subset is accepted:
+ // unicode_locale_id = unicode_language_id
+ // unicode_language_id = unicode_language_subtag
+ // unicode_language_subtag = alpha{2,3}
+ if (locale.length === 2 || locale.length === 3) {
if (!IsASCIIAlphaString(locale))
ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale);
assert(IsStructurallyValidLanguageTag(locale), "2*3ALPHA is a valid language tag");
@@ -483,42 +1142,27 @@ function ValidateAndCanonicalizeLanguageTag(locale) {
// The language subtag is canonicalized to lower case.
locale = callFunction(std_String_toLowerCase, locale);
- // langTagMappings doesn't contain any 2*3ALPHA keys, so we don't need
- // to check for possible replacements in this map.
- assert(!callFunction(std_Object_hasOwnProperty, langTagMappings, locale),
- "langTagMappings contains no 2*3ALPHA mappings");
-
- // Replace deprecated subtags with their preferred values.
- locale = callFunction(std_Object_hasOwnProperty, langSubtagMappings, locale)
- ? langSubtagMappings[locale]
- : locale;
- assert(locale === CanonicalizeLanguageTag(locale), "expected same canonicalization");
+ // updateLocaleIdMappings may modify tags containing only |language|
+ // subtags, if the language is in |complexLanguageMappings|, so we need
+ // to handle that case first.
+ if (!hasOwn(locale, complexLanguageMappings)) {
+ // Replace deprecated subtags with their preferred values.
+ locale = hasOwn(locale, languageMappings)
+ ? languageMappings[locale]
+ : locale;
+ assert(locale === CanonicalizeLanguageTag(locale), "expected same canonicalization");
- return locale;
+ return locale;
+ }
}
- if (!IsStructurallyValidLanguageTag(locale))
+ var localeObj = parseLanguageTag(locale);
+ if (localeObj === null)
ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale);
- return CanonicalizeLanguageTag(locale);
-}
-
-
-function localeContainsNoUnicodeExtensions(locale) {
- // No "-u-", no possible Unicode extension.
- if (callFunction(std_String_indexOf, locale, "-u-") === -1)
- return true;
-
- // "-u-" within privateuse also isn't one.
- if (callFunction(std_String_indexOf, locale, "-u-") > callFunction(std_String_indexOf, locale, "-x-"))
- return true;
-
- // An entirely-privateuse tag doesn't contain extensions.
- if (callFunction(std_String_startsWith, locale, "x-"))
- return true;
+ CanonicalizeLanguageTagObject(localeObj);
- // Otherwise, we have a Unicode extension sequence.
- return false;
+ return StringFromLanguageTagObject(localeObj);
}
@@ -571,11 +1215,13 @@ function DefaultLocaleIgnoringAvailableLocales() {
// If we didn't get a cache hit, compute the candidate default locale and
// cache it. Fall back on the last-ditch locale when necessary.
- var candidate;
- if (!IsStructurallyValidLanguageTag(runtimeDefaultLocale)) {
+ var candidate = parseLanguageTag(runtimeDefaultLocale);
+ if (candidate === null) {
candidate = lastDitchLocale();
} else {
- candidate = CanonicalizeLanguageTag(runtimeDefaultLocale);
+ CanonicalizeLanguageTagObject(candidate);
+
+ candidate = StringFromLanguageTagObject(candidate);
// The default locale must be in [[availableLocales]], and that list
// must not contain any locales with Unicode extension sequences, so
@@ -592,7 +1238,7 @@ function DefaultLocaleIgnoringAvailableLocales() {
assert(IsStructurallyValidLanguageTag(candidate),
"the candidate must be structurally valid");
- assert(localeContainsNoUnicodeExtensions(candidate),
+ assert(startOfUnicodeExtensions(candidate) < 0,
"the candidate must not contain a Unicode extension sequence");
return candidate;
@@ -633,7 +1279,7 @@ function DefaultLocale() {
"the computed default locale must be structurally valid");
assert(locale === CanonicalizeLanguageTag(locale),
"the computed default locale must be canonical");
- assert(localeContainsNoUnicodeExtensions(locale),
+ assert(startOfUnicodeExtensions(locale) < 0,
"the computed default locale must not contain a Unicode extension sequence");
localeCache.defaultLocale = locale;
@@ -674,30 +1320,53 @@ function addSpecialMissingLanguageTags(availableLocales) {
* Spec: ECMAScript Internationalization API Specification, 9.2.1.
*/
function CanonicalizeLocaleList(locales) {
+ // Step 1.
if (locales === undefined)
- return new List();
- var seen = new List();
+ return [];
+
+ // Step 3 (and the remaining steps).
if (typeof locales === "string")
- locales = [locales];
+ return [ValidateAndCanonicalizeLanguageTag(locales)];
+
+ // Step 2.
+ var seen = [];
+
+ // Step 4.
var O = ToObject(locales);
+
+ // Step 5.
var len = ToLength(O.length);
+
+ // Step 6.
var k = 0;
+
+ // Step 7.
while (k < len) {
- // Don't call ToString(k) - SpiderMonkey is faster with integers.
- var kPresent = HasProperty(O, k);
- if (kPresent) {
+ // Steps 7.a-c.
+ if (k in O) {
+ // Step 7.c.i.
var kValue = O[k];
+
+ // Step 7.c.ii.
if (!(typeof kValue === "string" || IsObject(kValue)))
ThrowTypeError(JSMSG_INVALID_LOCALES_ELEMENT);
+
+ // Step 7.c.iii.
var tag = ToString(kValue);
- if (!IsStructurallyValidLanguageTag(tag))
- ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, tag);
- tag = CanonicalizeLanguageTag(tag);
+
+ // Step 7.c.iv.
+ tag = ValidateAndCanonicalizeLanguageTag(tag);
+
+ // Step 7.c.v.
if (callFunction(ArrayIndexOf, seen, tag) === -1)
- callFunction(std_Array_push, seen, tag);
+ _DefineDataProperty(seen, seen.length, tag);
}
+
+ // Step 7.d.
k++;
}
+
+ // Step 8.
return seen;
}
@@ -705,7 +1374,7 @@ function CanonicalizeLocaleList(locales) {
function BestAvailableLocaleHelper(availableLocales, locale, considerDefaultLocale) {
assert(IsStructurallyValidLanguageTag(locale), "invalid BestAvailableLocale locale structure");
assert(locale === CanonicalizeLanguageTag(locale), "non-canonical BestAvailableLocale locale");
- assert(localeContainsNoUnicodeExtensions(locale), "locale must contain no Unicode extensions");
+ assert(startOfUnicodeExtensions(locale) < 0, "locale must contain no Unicode extensions");
// In the spec, [[availableLocales]] is formally a list of all available
// locales. But in our implementation, it's an *incomplete* list, not
@@ -780,28 +1449,37 @@ function BestAvailableLocaleIgnoringDefault(availableLocales, locale) {
* Spec: RFC 4647, section 3.4.
*/
function LookupMatcher(availableLocales, requestedLocales) {
- var i = 0;
- var len = requestedLocales.length;
- var availableLocale;
- var locale, noExtensionsLocale;
- while (i < len && availableLocale === undefined) {
- locale = requestedLocales[i];
- noExtensionsLocale = removeUnicodeExtensions(locale);
- availableLocale = BestAvailableLocale(availableLocales, noExtensionsLocale);
- i++;
- }
-
+ // Step 1.
var result = new Record();
- if (availableLocale !== undefined) {
- result.locale = availableLocale;
- if (locale !== noExtensionsLocale) {
- var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE();
- var extensionMatch = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, locale);
- result.extension = extensionMatch[0];
+
+ // Step 2.
+ for (var i = 0; i < requestedLocales.length; i++) {
+ var locale = requestedLocales[i];
+
+ // Step 2.a.
+ var noExtensionsLocale = removeUnicodeExtensions(locale);
+
+ // Step 2.b.
+ var availableLocale = BestAvailableLocale(availableLocales, noExtensionsLocale);
+
+ // Step 2.c.
+ if (availableLocale !== undefined) {
+ // Step 2.c.i.
+ result.locale = availableLocale;
+
+ // Step 2.c.ii.
+ if (locale !== noExtensionsLocale)
+ result.extension = getUnicodeExtensions(locale);
+
+ // Step 2.c.iii.
+ return result;
}
- } else {
- result.locale = DefaultLocale();
}
+
+ // Steps 3-4.
+ result.locale = DefaultLocale();
+
+ // Step 5.
return result;
}
@@ -823,73 +1501,73 @@ function BestFitMatcher(availableLocales, requestedLocales) {
/**
* Returns the Unicode extension value subtags for the requested key subtag.
*
- * NOTE: PR to add UnicodeExtensionValue to ECMA-402 isn't yet written.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.5.
*/
function UnicodeExtensionValue(extension, key) {
assert(typeof extension === "string", "extension is a string value");
- assert(function() {
- var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE();
- var extensionMatch = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, extension);
- return extensionMatch !== null && extensionMatch[0] === extension;
- }(), "extension is a Unicode extension subtag");
+ assert(callFunction(std_String_startsWith, extension, "-u-") &&
+ getUnicodeExtensions("und" + extension) === extension,
+ "extension is a Unicode extension subtag");
assert(typeof key === "string", "key is a string value");
- assert(key.length === 2, "key is a Unicode extension key subtag");
// Step 1.
- var size = extension.length;
+ assert(key.length === 2, "key is a Unicode extension key subtag");
// Step 2.
- var searchValue = "-" + key + "-";
+ var size = extension.length;
// Step 3.
- var pos = callFunction(std_String_indexOf, extension, searchValue);
+ var searchValue = "-" + key + "-";
// Step 4.
+ var pos = callFunction(std_String_indexOf, extension, searchValue);
+
+ // Step 5.
if (pos !== -1) {
- // Step 4.a.
+ // Step 5.a.
var start = pos + 4;
- // Step 4.b.
+ // Step 5.b.
var end = start;
- // Step 4.c.
+ // Step 5.c.
var k = start;
- // Steps 4.d-e.
+ // Steps 5.d-e.
while (true) {
- // Step 4.e.i.
+ // Step 5.e.i.
var e = callFunction(std_String_indexOf, extension, "-", k);
- // Step 4.e.ii.
+ // Step 5.e.ii.
var len = e === -1 ? size - k : e - k;
- // Step 4.e.iii.
+ // Step 5.e.iii.
if (len === 2)
break;
- // Step 4.e.iv.
+ // Step 5.e.iv.
if (e === -1) {
end = size;
break;
}
- // Step 4.e.v.
+ // Step 5.e.v.
end = e;
k = e + 1;
}
- // Step 4.f.
+ // Step 5.f.
return callFunction(String_substring, extension, start, end);
}
- // Step 5.
+ // Step 6.
searchValue = "-" + key;
- // Steps 6-7.
+ // Steps 7-8.
if (callFunction(std_String_endsWith, extension, searchValue))
return "";
- // Step 8 (implicit).
+ // Step 9 (implicit).
}
/**
@@ -899,11 +1577,9 @@ function UnicodeExtensionValue(extension, key) {
* caller's relevant extensions and locale data as well as client-provided
* options into consideration.
*
- * Spec: ECMAScript Internationalization API Specification, 9.2.5.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.6.
*/
function ResolveLocale(availableLocales, requestedLocales, options, relevantExtensionKeys, localeData) {
- /*jshint laxbreak: true */
-
// Steps 1-3.
var matcher = options.localeMatcher;
var r = (matcher === "lookup")
@@ -912,79 +1588,82 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
// Step 4.
var foundLocale = r.locale;
-
- // Step 5 (Not applicable in this implementation).
var extension = r.extension;
- // Steps 6-7.
+ // Step 5.
var result = new Record();
+
+ // Step 6.
result.dataLocale = foundLocale;
- // Step 8.
+ // Step 7.
var supportedExtension = "-u";
// In this implementation, localeData is a function, not an object.
var localeDataProvider = localeData();
- // Steps 9-12.
+ // Step 8.
for (var i = 0; i < relevantExtensionKeys.length; i++) {
- // Steps 12.a-c.
var key = relevantExtensionKeys[i];
- // Steps 12.b-d (The locale data is only computed when needed).
+ // Steps 8.a-h (The locale data is only computed when needed).
var keyLocaleData = undefined;
var value = undefined;
// Locale tag may override.
- // Step 12.e.
+ // Step 8.g.
var supportedExtensionAddition = "";
- // Step 12.f.
+ // Step 8.h.
if (extension !== undefined) {
- // NB: The step annotations don't yet match the ES2017 Intl draft,
- // 94045d234762ad107a3d09bb6f7381a65f1a2f9b, because the PR to add
- // the new UnicodeExtensionValue abstract operation still needs to
- // be written.
-
- // Step 12.f.i.
+ // Step 8.h.i.
var requestedValue = UnicodeExtensionValue(extension, key);
- // Step 12.f.ii.
+ // Step 8.h.ii.
if (requestedValue !== undefined) {
- // Steps 12.b-c.
+ // Steps 8.a-d.
keyLocaleData = callFunction(localeDataProvider[key], null, foundLocale);
- // Step 12.f.ii.1.
+ // Step 8.h.ii.1.
if (requestedValue !== "") {
- // Step 12.f.ii.1.a.
+ // Step 8.h.ii.1.a.
if (callFunction(ArrayIndexOf, keyLocaleData, requestedValue) !== -1) {
value = requestedValue;
supportedExtensionAddition = "-" + key + "-" + value;
}
} else {
- // Step 12.f.ii.2.
+ // Step 8.h.ii.2.
// According to the LDML spec, if there's no type value,
// and true is an allowed value, it's used.
- if (callFunction(ArrayIndexOf, keyLocaleData, "true") !== -1)
+ if (callFunction(ArrayIndexOf, keyLocaleData, "true") !== -1) {
value = "true";
+ supportedExtensionAddition = "-" + key;
+ }
}
}
}
// Options override all.
- // Step 12.g.i.
+ // Step 8.i.i.
var optionsValue = options[key];
- // Step 12.g, 12.gg.ii.
+ // Step 8.i.ii.
+ assert(typeof optionsValue === "string" ||
+ optionsValue === undefined ||
+ optionsValue === null,
+ "unexpected type for options value");
+
+ // Steps 8.i, 8.i.iii.1.
if (optionsValue !== undefined && optionsValue !== value) {
- // Steps 12.b-c.
+ // Steps 8.a-d.
if (keyLocaleData === undefined)
keyLocaleData = callFunction(localeDataProvider[key], null, foundLocale);
+ // Step 8.i.iii.
if (callFunction(ArrayIndexOf, keyLocaleData, optionsValue) !== -1) {
value = optionsValue;
supportedExtensionAddition = "";
@@ -993,27 +1672,29 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
// Locale data provides default value.
if (value === undefined) {
- // Steps 12.b-d.
+ // Steps 8.a-f.
value = keyLocaleData === undefined
? callFunction(localeDataProvider.default[key], null, foundLocale)
: keyLocaleData[0];
}
- // Steps 12.h-j.
+ // Step 8.j.
assert(typeof value === "string" || value === null, "unexpected locale data value");
result[key] = value;
+
+ // Step 8.k.
supportedExtension += supportedExtensionAddition;
}
- // Step 13.
+ // Step 9.
if (supportedExtension.length > 2) {
assert(!callFunction(std_String_startsWith, foundLocale, "x-"),
"unexpected privateuse-only locale returned from ICU");
- // Step 13.a.
+ // Step 9.a.
var privateIndex = callFunction(std_String_indexOf, foundLocale, "-x-");
- // Steps 13.b-c.
+ // Steps 9.b-c.
if (privateIndex === -1) {
foundLocale += supportedExtension;
} else {
@@ -1022,19 +1703,19 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
foundLocale = preExtension + supportedExtension + postExtension;
}
- // Step 13.d.
+ // Step 9.d.
assert(IsStructurallyValidLanguageTag(foundLocale), "invalid locale after concatenation");
- // Step 13.e (Not required in this implementation, because we don't
+ // Step 9.e (Not required in this implementation, because we don't
// canonicalize Unicode extension subtags).
assert(foundLocale === CanonicalizeLanguageTag(foundLocale), "same locale with extension");
}
- // Step 14.
+ // Step 10.
result.locale = foundLocale;
- // Step 15.
+ // Step 11.
return result;
}
@@ -1044,31 +1725,29 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
* matching (possibly fallback) locale. Locales appear in the same order in the
* returned list as in the input list.
*
- * Spec: ECMAScript Internationalization API Specification, 9.2.6.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.7.
*/
function LookupSupportedLocales(availableLocales, requestedLocales) {
- // Steps 1-2.
- var len = requestedLocales.length;
- var subset = new List();
+ // Step 1.
+ var subset = [];
- // Steps 3-4.
- var k = 0;
- while (k < len) {
- // Steps 4.a-b.
- var locale = requestedLocales[k];
+ // Step 2.
+ for (var i = 0; i < requestedLocales.length; i++) {
+ var locale = requestedLocales[i];
+
+ // Step 2.a.
var noExtensionsLocale = removeUnicodeExtensions(locale);
- // Step 4.c-d.
+ // Step 2.b.
var availableLocale = BestAvailableLocale(availableLocales, noExtensionsLocale);
- if (availableLocale !== undefined)
- callFunction(std_Array_push, subset, locale);
- // Step 4.e.
- k++;
+ // Step 2.c.
+ if (availableLocale !== undefined)
+ _DefineDataProperty(subset, subset.length, locale);
}
- // Steps 5-6.
- return callFunction(std_Array_slice, subset, 0);
+ // Step 3.
+ return subset;
}
@@ -1077,7 +1756,7 @@ function LookupSupportedLocales(availableLocales, requestedLocales) {
* matching (possibly fallback) locale. Locales appear in the same order in the
* returned list as in the input list.
*
- * Spec: ECMAScript Internationalization API Specification, 9.2.7.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.8.
*/
function BestFitSupportedLocales(availableLocales, requestedLocales) {
// don't have anything better
@@ -1090,19 +1769,17 @@ function BestFitSupportedLocales(availableLocales, requestedLocales) {
* matching (possibly fallback) locale. Locales appear in the same order in the
* returned list as in the input list.
*
- * Spec: ECMAScript Internationalization API Specification, 9.2.8.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.9.
*/
function SupportedLocales(availableLocales, requestedLocales, options) {
- /*jshint laxbreak: true */
-
// Step 1.
var matcher;
if (options !== undefined) {
- // Steps 1.a-b.
+ // Step 1.a.
options = ToObject(options);
- matcher = options.localeMatcher;
- // Step 1.c.
+ // Step 1.b
+ matcher = options.localeMatcher;
if (matcher !== undefined) {
matcher = ToString(matcher);
if (matcher !== "lookup" && matcher !== "best fit")
@@ -1110,12 +1787,12 @@ function SupportedLocales(availableLocales, requestedLocales, options) {
}
}
- // Steps 2-3.
+ // Steps 2-5.
var subset = (matcher === undefined || matcher === "best fit")
? BestFitSupportedLocales(availableLocales, requestedLocales)
: LookupSupportedLocales(availableLocales, requestedLocales);
- // Step 4.
+ // Steps 6-7.
for (var i = 0; i < subset.length; i++) {
_DefineDataProperty(subset, i, subset[i],
ATTR_ENUMERABLE | ATTR_NONCONFIGURABLE | ATTR_NONWRITABLE);
@@ -1123,7 +1800,7 @@ function SupportedLocales(availableLocales, requestedLocales, options) {
_DefineDataProperty(subset, "length", subset.length,
ATTR_NONENUMERABLE | ATTR_NONCONFIGURABLE | ATTR_NONWRITABLE);
- // Step 5.
+ // Step 8.
return subset;
}
@@ -1133,7 +1810,7 @@ function SupportedLocales(availableLocales, requestedLocales, options) {
* the required type, checks whether it is one of a list of allowed values,
* and fills in a fallback value if necessary.
*
- * Spec: ECMAScript Internationalization API Specification, 9.2.9.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.10.
*/
function GetOption(options, property, type, values, fallback) {
// Step 1.
diff --git a/js/src/builtin/intl/DateTimeFormat.js b/js/src/builtin/intl/DateTimeFormat.js
index 4de3c084f2..a4feb50aa6 100644
--- a/js/src/builtin/intl/DateTimeFormat.js
+++ b/js/src/builtin/intl/DateTimeFormat.js
@@ -53,9 +53,10 @@ function resolveDateTimeFormatInternals(lazyDateTimeFormatData) {
// never a subset of them.
var internalProps = std_Object_create(null);
+
+ var DateTimeFormat = dateTimeFormatInternalProperties;
// Compute effective locale.
- var DateTimeFormat = dateTimeFormatInternalProperties;
// Step 10.
var localeData = DateTimeFormat.localeData;
@@ -73,7 +74,7 @@ function resolveDateTimeFormatInternals(lazyDateTimeFormatData) {
internalProps.numberingSystem = r.nu;
// Compute formatting options.
- // Step 16.
+ // Step 14.
var dataLocale = r.dataLocale;
// Steps 20.
@@ -119,8 +120,6 @@ function resolveDateTimeFormatInternals(lazyDateTimeFormatData) {
// Step 31.
internalProps.pattern = pattern;
- internalProps.boundFormat = undefined;
-
// The caller is responsible for associating |internalProps| with the right
// object using |setInternalProperties|.
return internalProps;
@@ -297,23 +296,25 @@ function DefaultTimeZone() {
/**
- * UnwrapDateTimeFormat(dtf)
+ * 12.1.10 UnwrapDateTimeFormat( dtf )
*/
function UnwrapDateTimeFormat(dtf, methodName) {
- // Step 1.
+ // Step 1 (not applicable in our implementation).
+
+ // Step 2.
if ((!IsObject(dtf) || !IsDateTimeFormat(dtf)) &&
dtf instanceof GetDateTimeFormatConstructor())
{
dtf = dtf[intlFallbackSymbol()];
}
- // Step 2.
+ // Step 3.
if (!IsObject(dtf) || !IsDateTimeFormat(dtf)) {
ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "DateTimeFormat", methodName,
"DateTimeFormat");
}
- // Step 3.
+ // Step 4.
return dtf;
}
@@ -334,9 +335,6 @@ function InitializeDateTimeFormat(dateTimeFormat, thisValue, locales, options, m
assert(IsDateTimeFormat(dateTimeFormat),
"InitializeDateTimeFormat called with non-DateTimeFormat");
- // Steps 1-2 (These steps are no longer required and should be removed
- // from the spec; https://github.com/tc39/ecma402/issues/115).
-
// Lazy DateTimeFormat data has the following structure:
//
// {
@@ -471,6 +469,8 @@ function InitializeDateTimeFormat(dateTimeFormat, thisValue, locales, options, m
initializeIntlObject(dateTimeFormat, "DateTimeFormat", lazyDateTimeFormatData);
// 12.2.1, steps 4-5.
+ // TODO: spec issue - The current spec doesn't have the IsObject check,
+ // which means |Intl.DateTimeFormat.call(null)| is supposed to throw here.
if (dateTimeFormat !== thisValue && thisValue instanceof GetDateTimeFormatConstructor()) {
if (!IsObject(thisValue))
ThrowTypeError(JSMSG_NOT_NONNULL_OBJECT, typeof thisValue);
@@ -687,17 +687,19 @@ function ToDateTimeOptions(options, required, defaults) {
assert(typeof required === "string", "ToDateTimeOptions");
assert(typeof defaults === "string", "ToDateTimeOptions");
- // Steps 1-3.
+ // Steps 1-2.
if (options === undefined)
options = null;
else
options = ToObject(options);
options = std_Object_create(options);
- // Step 4.
+ // Step 3.
var needDefaults = true;
- // Step 5.
+ // Step 4.
+ // TODO: spec issue - The spec requires to retrieve all options, so using
+ // the ||-operator with its lazy evaluation semantics is incorrect.
if ((required === "date" || required === "any") &&
(options.weekday !== undefined || options.year !== undefined ||
options.month !== undefined || options.day !== undefined))
@@ -705,7 +707,9 @@ function ToDateTimeOptions(options, required, defaults) {
needDefaults = false;
}
- // Step 6.
+ // Step 5.
+ // TODO: spec issue - The spec requires to retrieve all options, so using
+ // the ||-operator with its lazy evaluation semantics is incorrect.
if ((required === "time" || required === "any") &&
(options.hour !== undefined || options.minute !== undefined ||
options.second !== undefined))
@@ -713,7 +717,7 @@ function ToDateTimeOptions(options, required, defaults) {
needDefaults = false;
}
- // Step 7.
+ // Step 6.
if (needDefaults && (defaults === "date" || defaults === "all")) {
// The specification says to call [[DefineOwnProperty]] with false for
// the Throw parameter, while Object.defineProperty uses true. For the
@@ -724,7 +728,7 @@ function ToDateTimeOptions(options, required, defaults) {
_DefineDataProperty(options, "day", "numeric");
}
- // Step 8.
+ // Step 7.
if (needDefaults && (defaults === "time" || defaults === "all")) {
// See comment for step 7.
_DefineDataProperty(options, "hour", "numeric");
@@ -732,7 +736,7 @@ function ToDateTimeOptions(options, required, defaults) {
_DefineDataProperty(options, "second", "numeric");
}
- // Step 9.
+ // Step 8.
return options;
}
@@ -842,14 +846,19 @@ function BestFitFormatMatcher(options, formats) {
* matching (possibly fallback) locale. Locales appear in the same order in the
* returned list as in the input list.
*
- * Spec: ECMAScript Internationalization API Specification, 12.2.2.
+ * Spec: ECMAScript Internationalization API Specification, 12.3.2.
*/
function Intl_DateTimeFormat_supportedLocalesOf(locales /*, options*/) {
var options = arguments.length > 1 ? arguments[1] : undefined;
+ // Step 1.
var availableLocales = callFunction(dateTimeFormatInternalProperties.availableLocales,
dateTimeFormatInternalProperties);
+
+ // Step 2.
var requestedLocales = CanonicalizeLocaleList(locales);
+
+ // Step 3.
return SupportedLocales(availableLocales, requestedLocales, options);
}
@@ -857,7 +866,7 @@ function Intl_DateTimeFormat_supportedLocalesOf(locales /*, options*/) {
/**
* DateTimeFormat internal properties.
*
- * Spec: ECMAScript Internationalization API Specification, 9.1 and 12.2.3.
+ * Spec: ECMAScript Internationalization API Specification, 9.1 and 12.3.3.
*/
var dateTimeFormatInternalProperties = {
localeData: dateTimeFormatLocaleData,
@@ -897,7 +906,7 @@ function dateTimeFormatLocaleData() {
/**
* Function to be bound and returned by Intl.DateTimeFormat.prototype.format.
*
- * Spec: ECMAScript Internationalization API Specification, 12.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 12.1.5.
*/
function dateTimeFormatFormatToBind() {
// Steps 1.a.i-ii
@@ -913,7 +922,7 @@ function dateTimeFormatFormatToBind() {
* representing the result of calling ToNumber(date) according to the
* effective locale and the formatting options of this DateTimeFormat.
*
- * Spec: ECMAScript Internationalization API Specification, 12.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 12.4.3.
*/
function Intl_DateTimeFormat_format_get() {
// Steps 1-3.
@@ -923,12 +932,11 @@ function Intl_DateTimeFormat_format_get() {
// Step 4.
if (internals.boundFormat === undefined) {
- // Step 4.a.
- var F = dateTimeFormatFormatToBind;
+ // Steps 4.a-b.
+ var F = callFunction(FunctionBind, dateTimeFormatFormatToBind, dtf);
- // Steps 4.b-d.
- var bf = callFunction(FunctionBind, F, dtf);
- internals.boundFormat = bf;
+ // Step 4.c.
+ internals.boundFormat = F;
}
// Step 5.
@@ -937,6 +945,11 @@ function Intl_DateTimeFormat_format_get() {
_SetCanonicalName(Intl_DateTimeFormat_format_get, "get format");
+/**
+ * Intl.DateTimeFormat.prototype.formatToParts ( date )
+ *
+ * Spec: ECMAScript Internationalization API Specification, 12.4.4.
+ */
function Intl_DateTimeFormat_formatToParts() {
// Steps 1-3.
var dtf = UnwrapDateTimeFormat(this, "formatToParts");
@@ -956,14 +969,15 @@ function Intl_DateTimeFormat_formatToParts() {
/**
* Returns the resolved options for a DateTimeFormat object.
*
- * Spec: ECMAScript Internationalization API Specification, 12.3.3 and 12.4.
+ * Spec: ECMAScript Internationalization API Specification, 12.4.5.
*/
function Intl_DateTimeFormat_resolvedOptions() {
- // Invoke |UnwrapDateTimeFormat| per introduction of section 12.3.
+ // Steps 1-3.
var dtf = UnwrapDateTimeFormat(this, "resolvedOptions");
var internals = getDateTimeFormatInternals(dtf);
+ // Steps 4-5.
var result = {
locale: internals.locale,
calendar: internals.calendar,
@@ -981,6 +995,8 @@ function Intl_DateTimeFormat_resolvedOptions() {
}
resolveICUPattern(internals.pattern, result);
+
+ // Step 6.
return result;
}
diff --git a/js/src/builtin/intl/LangTagMappingsGenerated.js b/js/src/builtin/intl/LangTagMappingsGenerated.js
index 269cf9f93a..83a8ff8f60 100644
--- a/js/src/builtin/intl/LangTagMappingsGenerated.js
+++ b/js/src/builtin/intl/LangTagMappingsGenerated.js
@@ -1,382 +1,1246 @@
// Generated by make_intl_data.py. DO NOT EDIT.
-// Mappings from complete tags to preferred values.
-// Derived from IANA Language Subtag Registry, file date 2016-10-12.
-// http://www.iana.org/assignments/language-subtag-registry
-var langTagMappings = {
+// Mappings from grandfathered tags to preferred values.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+var grandfatheredMappings = {
"art-lojban": "jbo",
- "cel-gaulish": "cel-gaulish",
- "en-gb-oed": "en-GB-oxendict",
- "i-ami": "ami",
- "i-bnn": "bnn",
- "i-default": "i-default",
- "i-enochian": "i-enochian",
- "i-hak": "hak",
- "i-klingon": "tlh",
- "i-lux": "lb",
- "i-mingo": "i-mingo",
- "i-navajo": "nv",
- "i-pwn": "pwn",
- "i-tao": "tao",
- "i-tay": "tay",
- "i-tsu": "tsu",
- "ja-latn-hepburn-heploc": "ja-Latn-alalc97",
- "no-bok": "nb",
- "no-nyn": "nn",
- "sgn-be-fr": "sfb",
- "sgn-be-nl": "vgt",
- "sgn-br": "bzs",
- "sgn-ch-de": "sgg",
- "sgn-co": "csn",
- "sgn-de": "gsg",
- "sgn-dk": "dsl",
- "sgn-es": "ssp",
- "sgn-fr": "fsl",
- "sgn-gb": "bfi",
- "sgn-gr": "gss",
- "sgn-ie": "isg",
- "sgn-it": "ise",
- "sgn-jp": "jsl",
- "sgn-mx": "mfs",
- "sgn-ni": "ncs",
- "sgn-nl": "dse",
- "sgn-no": "nsl",
- "sgn-pt": "psr",
- "sgn-se": "swl",
- "sgn-us": "ase",
- "sgn-za": "sfs",
- "zh-cmn": "cmn",
- "zh-cmn-hans": "cmn-Hans",
- "zh-cmn-hant": "cmn-Hant",
- "zh-gan": "gan",
- "zh-guoyu": "cmn",
+ "cel-gaulish": "xtg-x-cel-gaulish",
+ "zh-guoyu": "zh",
"zh-hakka": "hak",
- "zh-min": "zh-min",
- "zh-min-nan": "nan",
- "zh-wuu": "wuu",
"zh-xiang": "hsn",
- "zh-yue": "yue",
};
-// Mappings from non-extlang subtags to preferred values.
-// Derived from IANA Language Subtag Registry, file date 2016-10-12.
-// http://www.iana.org/assignments/language-subtag-registry
-var langSubtagMappings = {
- "BU": "MM",
- "DD": "DE",
- "FX": "FR",
- "TP": "TL",
- "YD": "YE",
- "ZR": "CD",
+// Mappings from language subtags to preferred values.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+var languageMappings = {
"aam": "aas",
+ "aar": "aa",
+ "abk": "ab",
"adp": "dz",
+ "afr": "af",
+ "aju": "jrb",
+ "aka": "ak",
+ "alb": "sq",
+ "als": "sq",
+ "amh": "am",
+ "ara": "ar",
+ "arb": "ar",
+ "arg": "an",
+ "arm": "hy",
+ "asd": "snz",
+ "asm": "as",
"aue": "ktz",
+ "ava": "av",
+ "ave": "ae",
+ "aym": "ay",
+ "ayr": "ay",
"ayx": "nun",
+ "aze": "az",
+ "azj": "az",
+ "bak": "ba",
+ "bam": "bm",
+ "baq": "eu",
+ "bcc": "bal",
+ "bcl": "bik",
+ "bel": "be",
+ "ben": "bn",
"bgm": "bcg",
+ "bh": "bho",
+ "bih": "bho",
+ "bis": "bi",
"bjd": "drl",
+ "bod": "bo",
+ "bos": "bs",
+ "bre": "br",
+ "bul": "bg",
+ "bur": "my",
+ "bxk": "luy",
+ "bxr": "bua",
+ "cat": "ca",
"ccq": "rki",
+ "ces": "cs",
+ "cha": "ch",
+ "che": "ce",
+ "chi": "zh",
+ "chu": "cu",
+ "chv": "cv",
"cjr": "mom",
"cka": "cmr",
+ "cld": "syr",
"cmk": "xch",
+ "cmn": "zh",
+ "cor": "kw",
+ "cos": "co",
"coy": "pij",
"cqu": "quh",
- "drh": "khk",
- "drw": "prs",
+ "cre": "cr",
+ "cwd": "cr",
+ "cym": "cy",
+ "cze": "cs",
+ "dan": "da",
+ "deu": "de",
+ "dgo": "doi",
+ "dhd": "mwr",
+ "dik": "din",
+ "diq": "zza",
+ "dit": "dif",
+ "div": "dv",
+ "drh": "mn",
+ "dut": "nl",
+ "dzo": "dz",
+ "ekk": "et",
+ "ell": "el",
+ "emk": "man",
+ "eng": "en",
+ "epo": "eo",
+ "esk": "ik",
+ "est": "et",
+ "eus": "eu",
+ "ewe": "ee",
+ "fao": "fo",
+ "fas": "fa",
+ "fat": "ak",
+ "fij": "fj",
+ "fin": "fi",
+ "fra": "fr",
+ "fre": "fr",
+ "fry": "fy",
+ "fuc": "ff",
+ "ful": "ff",
"gav": "dev",
+ "gaz": "om",
+ "gbo": "grb",
+ "geo": "ka",
+ "ger": "de",
"gfx": "vaj",
"ggn": "gvr",
+ "gla": "gd",
+ "gle": "ga",
+ "glg": "gl",
+ "glv": "gv",
+ "gno": "gon",
+ "gre": "el",
+ "grn": "gn",
"gti": "nyc",
+ "gug": "gn",
+ "guj": "gu",
"guv": "duz",
+ "gya": "gba",
+ "hat": "ht",
+ "hau": "ha",
+ "hdn": "hai",
+ "hea": "hmn",
+ "heb": "he",
+ "her": "hz",
+ "him": "srx",
+ "hin": "hi",
+ "hmo": "ho",
"hrr": "jal",
+ "hrv": "hr",
+ "hun": "hu",
+ "hye": "hy",
"ibi": "opa",
+ "ibo": "ig",
+ "ice": "is",
+ "ido": "io",
+ "iii": "ii",
+ "ike": "iu",
+ "iku": "iu",
+ "ile": "ie",
"ilw": "gal",
"in": "id",
+ "ina": "ia",
+ "ind": "id",
+ "ipk": "ik",
+ "isl": "is",
+ "ita": "it",
"iw": "he",
+ "jav": "jv",
+ "jeg": "oyb",
"ji": "yi",
+ "jpn": "ja",
"jw": "jv",
+ "kal": "kl",
+ "kan": "kn",
+ "kas": "ks",
+ "kat": "ka",
+ "kau": "kr",
+ "kaz": "kk",
"kgc": "tdf",
"kgh": "kml",
+ "khk": "mn",
+ "khm": "km",
+ "kik": "ki",
+ "kin": "rw",
+ "kir": "ky",
+ "kmr": "ku",
+ "knc": "kr",
+ "kng": "kg",
+ "knn": "kok",
"koj": "kwv",
+ "kom": "kv",
+ "kon": "kg",
+ "kor": "ko",
+ "kpv": "kv",
+ "krm": "bmf",
"ktr": "dtp",
+ "kua": "kj",
+ "kur": "ku",
"kvs": "gdj",
"kwq": "yam",
"kxe": "tvd",
"kzj": "dtp",
"kzt": "dtp",
+ "lao": "lo",
+ "lat": "la",
+ "lav": "lv",
+ "lbk": "bnc",
"lii": "raq",
+ "lim": "li",
+ "lin": "ln",
+ "lit": "lt",
+ "llo": "ngt",
"lmm": "rmx",
+ "ltz": "lb",
+ "lub": "lu",
+ "lug": "lg",
+ "lvs": "lv",
+ "mac": "mk",
+ "mah": "mh",
+ "mal": "ml",
+ "mao": "mi",
+ "mar": "mr",
+ "may": "ms",
"meg": "cir",
+ "mhr": "chm",
+ "mkd": "mk",
+ "mlg": "mg",
+ "mlt": "mt",
+ "mnk": "man",
"mo": "ro",
+ "mol": "ro",
+ "mon": "mn",
+ "mri": "mi",
+ "msa": "ms",
"mst": "mry",
+ "mup": "raj",
"mwj": "vaj",
+ "mya": "my",
+ "myd": "aog",
"myt": "mry",
"nad": "xny",
+ "nau": "na",
+ "nav": "nv",
+ "nbl": "nr",
+ "ncp": "kdz",
+ "nde": "nd",
+ "ndo": "ng",
+ "nep": "ne",
+ "nld": "nl",
+ "nno": "nn",
+ "nns": "nbr",
"nnx": "ngv",
+ "no": "nb",
+ "nob": "nb",
+ "nor": "nb",
+ "npi": "ne",
"nts": "pij",
+ "nya": "ny",
+ "oci": "oc",
+ "ojg": "oj",
+ "oji": "oj",
+ "ori": "or",
+ "orm": "om",
+ "ory": "or",
+ "oss": "os",
"oun": "vaj",
+ "pan": "pa",
+ "pbu": "ps",
"pcr": "adx",
+ "per": "fa",
+ "pes": "fa",
+ "pli": "pi",
+ "plt": "mg",
"pmc": "huw",
"pmu": "phr",
+ "pnb": "lah",
+ "pol": "pl",
+ "por": "pt",
"ppa": "bfy",
"ppr": "lcq",
"pry": "prt",
+ "pus": "ps",
"puz": "pub",
+ "que": "qu",
+ "quz": "qu",
+ "rmy": "rom",
+ "roh": "rm",
+ "ron": "ro",
+ "rum": "ro",
+ "run": "rn",
+ "rus": "ru",
+ "sag": "sg",
+ "san": "sa",
"sca": "hle",
+ "scc": "sr",
+ "scr": "hr",
+ "sin": "si",
+ "skk": "oyb",
+ "slk": "sk",
+ "slo": "sk",
+ "slv": "sl",
+ "sme": "se",
+ "smo": "sm",
+ "sna": "sn",
+ "snd": "sd",
+ "som": "so",
+ "sot": "st",
+ "spa": "es",
+ "spy": "kln",
+ "sqi": "sq",
+ "src": "sc",
+ "srd": "sc",
+ "srp": "sr",
+ "ssw": "ss",
+ "sun": "su",
+ "swa": "sw",
+ "swe": "sv",
+ "swh": "sw",
+ "tah": "ty",
+ "tam": "ta",
+ "tat": "tt",
"tdu": "dtp",
+ "tel": "te",
+ "tgk": "tg",
+ "tgl": "fil",
+ "tha": "th",
"thc": "tpo",
"thx": "oyb",
+ "tib": "bo",
"tie": "ras",
+ "tir": "ti",
"tkk": "twm",
+ "tl": "fil",
"tlw": "weo",
"tmp": "tyj",
"tne": "kak",
- "tnf": "prs",
+ "ton": "to",
"tsf": "taj",
+ "tsn": "tn",
+ "tso": "ts",
+ "ttq": "tmh",
+ "tuk": "tk",
+ "tur": "tr",
+ "tw": "ak",
+ "twi": "ak",
+ "uig": "ug",
+ "ukr": "uk",
+ "umu": "del",
"uok": "ema",
+ "urd": "ur",
+ "uzb": "uz",
+ "uzn": "uz",
+ "ven": "ve",
+ "vie": "vi",
+ "vol": "vo",
+ "wel": "cy",
+ "wln": "wa",
+ "wol": "wo",
"xba": "cax",
+ "xho": "xh",
"xia": "acn",
"xkh": "waw",
+ "xpe": "kpe",
"xsj": "suj",
+ "xsl": "den",
"ybd": "rki",
+ "ydd": "yi",
+ "yid": "yi",
"yma": "lrr",
"ymt": "mtm",
+ "yor": "yo",
"yos": "zom",
"yuu": "yug",
+ "zai": "zap",
+ "zha": "za",
+ "zho": "zh",
+ "zsm": "ms",
+ "zul": "zu",
+ "zyb": "za",
};
-// Mappings from extlang subtags to preferred values.
-// Derived from IANA Language Subtag Registry, file date 2016-10-12.
-// http://www.iana.org/assignments/language-subtag-registry
-var extlangMappings = {
- "aao": {preferred: "aao", prefix: "ar"},
- "abh": {preferred: "abh", prefix: "ar"},
- "abv": {preferred: "abv", prefix: "ar"},
- "acm": {preferred: "acm", prefix: "ar"},
- "acq": {preferred: "acq", prefix: "ar"},
- "acw": {preferred: "acw", prefix: "ar"},
- "acx": {preferred: "acx", prefix: "ar"},
- "acy": {preferred: "acy", prefix: "ar"},
- "adf": {preferred: "adf", prefix: "ar"},
- "ads": {preferred: "ads", prefix: "sgn"},
- "aeb": {preferred: "aeb", prefix: "ar"},
- "aec": {preferred: "aec", prefix: "ar"},
- "aed": {preferred: "aed", prefix: "sgn"},
- "aen": {preferred: "aen", prefix: "sgn"},
- "afb": {preferred: "afb", prefix: "ar"},
- "afg": {preferred: "afg", prefix: "sgn"},
- "ajp": {preferred: "ajp", prefix: "ar"},
- "apc": {preferred: "apc", prefix: "ar"},
- "apd": {preferred: "apd", prefix: "ar"},
- "arb": {preferred: "arb", prefix: "ar"},
- "arq": {preferred: "arq", prefix: "ar"},
- "ars": {preferred: "ars", prefix: "ar"},
- "ary": {preferred: "ary", prefix: "ar"},
- "arz": {preferred: "arz", prefix: "ar"},
- "ase": {preferred: "ase", prefix: "sgn"},
- "asf": {preferred: "asf", prefix: "sgn"},
- "asp": {preferred: "asp", prefix: "sgn"},
- "asq": {preferred: "asq", prefix: "sgn"},
- "asw": {preferred: "asw", prefix: "sgn"},
- "auz": {preferred: "auz", prefix: "ar"},
- "avl": {preferred: "avl", prefix: "ar"},
- "ayh": {preferred: "ayh", prefix: "ar"},
- "ayl": {preferred: "ayl", prefix: "ar"},
- "ayn": {preferred: "ayn", prefix: "ar"},
- "ayp": {preferred: "ayp", prefix: "ar"},
- "bbz": {preferred: "bbz", prefix: "ar"},
- "bfi": {preferred: "bfi", prefix: "sgn"},
- "bfk": {preferred: "bfk", prefix: "sgn"},
- "bjn": {preferred: "bjn", prefix: "ms"},
- "bog": {preferred: "bog", prefix: "sgn"},
- "bqn": {preferred: "bqn", prefix: "sgn"},
- "bqy": {preferred: "bqy", prefix: "sgn"},
- "btj": {preferred: "btj", prefix: "ms"},
- "bve": {preferred: "bve", prefix: "ms"},
- "bvl": {preferred: "bvl", prefix: "sgn"},
- "bvu": {preferred: "bvu", prefix: "ms"},
- "bzs": {preferred: "bzs", prefix: "sgn"},
- "cdo": {preferred: "cdo", prefix: "zh"},
- "cds": {preferred: "cds", prefix: "sgn"},
- "cjy": {preferred: "cjy", prefix: "zh"},
- "cmn": {preferred: "cmn", prefix: "zh"},
- "coa": {preferred: "coa", prefix: "ms"},
- "cpx": {preferred: "cpx", prefix: "zh"},
- "csc": {preferred: "csc", prefix: "sgn"},
- "csd": {preferred: "csd", prefix: "sgn"},
- "cse": {preferred: "cse", prefix: "sgn"},
- "csf": {preferred: "csf", prefix: "sgn"},
- "csg": {preferred: "csg", prefix: "sgn"},
- "csl": {preferred: "csl", prefix: "sgn"},
- "csn": {preferred: "csn", prefix: "sgn"},
- "csq": {preferred: "csq", prefix: "sgn"},
- "csr": {preferred: "csr", prefix: "sgn"},
- "czh": {preferred: "czh", prefix: "zh"},
- "czo": {preferred: "czo", prefix: "zh"},
- "doq": {preferred: "doq", prefix: "sgn"},
- "dse": {preferred: "dse", prefix: "sgn"},
- "dsl": {preferred: "dsl", prefix: "sgn"},
- "dup": {preferred: "dup", prefix: "ms"},
- "ecs": {preferred: "ecs", prefix: "sgn"},
- "esl": {preferred: "esl", prefix: "sgn"},
- "esn": {preferred: "esn", prefix: "sgn"},
- "eso": {preferred: "eso", prefix: "sgn"},
- "eth": {preferred: "eth", prefix: "sgn"},
- "fcs": {preferred: "fcs", prefix: "sgn"},
- "fse": {preferred: "fse", prefix: "sgn"},
- "fsl": {preferred: "fsl", prefix: "sgn"},
- "fss": {preferred: "fss", prefix: "sgn"},
- "gan": {preferred: "gan", prefix: "zh"},
- "gds": {preferred: "gds", prefix: "sgn"},
- "gom": {preferred: "gom", prefix: "kok"},
- "gse": {preferred: "gse", prefix: "sgn"},
- "gsg": {preferred: "gsg", prefix: "sgn"},
- "gsm": {preferred: "gsm", prefix: "sgn"},
- "gss": {preferred: "gss", prefix: "sgn"},
- "gus": {preferred: "gus", prefix: "sgn"},
- "hab": {preferred: "hab", prefix: "sgn"},
- "haf": {preferred: "haf", prefix: "sgn"},
- "hak": {preferred: "hak", prefix: "zh"},
- "hds": {preferred: "hds", prefix: "sgn"},
- "hji": {preferred: "hji", prefix: "ms"},
- "hks": {preferred: "hks", prefix: "sgn"},
- "hos": {preferred: "hos", prefix: "sgn"},
- "hps": {preferred: "hps", prefix: "sgn"},
- "hsh": {preferred: "hsh", prefix: "sgn"},
- "hsl": {preferred: "hsl", prefix: "sgn"},
- "hsn": {preferred: "hsn", prefix: "zh"},
- "icl": {preferred: "icl", prefix: "sgn"},
- "iks": {preferred: "iks", prefix: "sgn"},
- "ils": {preferred: "ils", prefix: "sgn"},
- "inl": {preferred: "inl", prefix: "sgn"},
- "ins": {preferred: "ins", prefix: "sgn"},
- "ise": {preferred: "ise", prefix: "sgn"},
- "isg": {preferred: "isg", prefix: "sgn"},
- "isr": {preferred: "isr", prefix: "sgn"},
- "jak": {preferred: "jak", prefix: "ms"},
- "jax": {preferred: "jax", prefix: "ms"},
- "jcs": {preferred: "jcs", prefix: "sgn"},
- "jhs": {preferred: "jhs", prefix: "sgn"},
- "jls": {preferred: "jls", prefix: "sgn"},
- "jos": {preferred: "jos", prefix: "sgn"},
- "jsl": {preferred: "jsl", prefix: "sgn"},
- "jus": {preferred: "jus", prefix: "sgn"},
- "kgi": {preferred: "kgi", prefix: "sgn"},
- "knn": {preferred: "knn", prefix: "kok"},
- "kvb": {preferred: "kvb", prefix: "ms"},
- "kvk": {preferred: "kvk", prefix: "sgn"},
- "kvr": {preferred: "kvr", prefix: "ms"},
- "kxd": {preferred: "kxd", prefix: "ms"},
- "lbs": {preferred: "lbs", prefix: "sgn"},
- "lce": {preferred: "lce", prefix: "ms"},
- "lcf": {preferred: "lcf", prefix: "ms"},
- "liw": {preferred: "liw", prefix: "ms"},
- "lls": {preferred: "lls", prefix: "sgn"},
- "lsg": {preferred: "lsg", prefix: "sgn"},
- "lsl": {preferred: "lsl", prefix: "sgn"},
- "lso": {preferred: "lso", prefix: "sgn"},
- "lsp": {preferred: "lsp", prefix: "sgn"},
- "lst": {preferred: "lst", prefix: "sgn"},
- "lsy": {preferred: "lsy", prefix: "sgn"},
- "ltg": {preferred: "ltg", prefix: "lv"},
- "lvs": {preferred: "lvs", prefix: "lv"},
- "lzh": {preferred: "lzh", prefix: "zh"},
- "max": {preferred: "max", prefix: "ms"},
- "mdl": {preferred: "mdl", prefix: "sgn"},
- "meo": {preferred: "meo", prefix: "ms"},
- "mfa": {preferred: "mfa", prefix: "ms"},
- "mfb": {preferred: "mfb", prefix: "ms"},
- "mfs": {preferred: "mfs", prefix: "sgn"},
- "min": {preferred: "min", prefix: "ms"},
- "mnp": {preferred: "mnp", prefix: "zh"},
- "mqg": {preferred: "mqg", prefix: "ms"},
- "mre": {preferred: "mre", prefix: "sgn"},
- "msd": {preferred: "msd", prefix: "sgn"},
- "msi": {preferred: "msi", prefix: "ms"},
- "msr": {preferred: "msr", prefix: "sgn"},
- "mui": {preferred: "mui", prefix: "ms"},
- "mzc": {preferred: "mzc", prefix: "sgn"},
- "mzg": {preferred: "mzg", prefix: "sgn"},
- "mzy": {preferred: "mzy", prefix: "sgn"},
- "nan": {preferred: "nan", prefix: "zh"},
- "nbs": {preferred: "nbs", prefix: "sgn"},
- "ncs": {preferred: "ncs", prefix: "sgn"},
- "nsi": {preferred: "nsi", prefix: "sgn"},
- "nsl": {preferred: "nsl", prefix: "sgn"},
- "nsp": {preferred: "nsp", prefix: "sgn"},
- "nsr": {preferred: "nsr", prefix: "sgn"},
- "nzs": {preferred: "nzs", prefix: "sgn"},
- "okl": {preferred: "okl", prefix: "sgn"},
- "orn": {preferred: "orn", prefix: "ms"},
- "ors": {preferred: "ors", prefix: "ms"},
- "pel": {preferred: "pel", prefix: "ms"},
- "pga": {preferred: "pga", prefix: "ar"},
- "pgz": {preferred: "pgz", prefix: "sgn"},
- "pks": {preferred: "pks", prefix: "sgn"},
- "prl": {preferred: "prl", prefix: "sgn"},
- "prz": {preferred: "prz", prefix: "sgn"},
- "psc": {preferred: "psc", prefix: "sgn"},
- "psd": {preferred: "psd", prefix: "sgn"},
- "pse": {preferred: "pse", prefix: "ms"},
- "psg": {preferred: "psg", prefix: "sgn"},
- "psl": {preferred: "psl", prefix: "sgn"},
- "pso": {preferred: "pso", prefix: "sgn"},
- "psp": {preferred: "psp", prefix: "sgn"},
- "psr": {preferred: "psr", prefix: "sgn"},
- "pys": {preferred: "pys", prefix: "sgn"},
- "rms": {preferred: "rms", prefix: "sgn"},
- "rsi": {preferred: "rsi", prefix: "sgn"},
- "rsl": {preferred: "rsl", prefix: "sgn"},
- "rsm": {preferred: "rsm", prefix: "sgn"},
- "sdl": {preferred: "sdl", prefix: "sgn"},
- "sfb": {preferred: "sfb", prefix: "sgn"},
- "sfs": {preferred: "sfs", prefix: "sgn"},
- "sgg": {preferred: "sgg", prefix: "sgn"},
- "sgx": {preferred: "sgx", prefix: "sgn"},
- "shu": {preferred: "shu", prefix: "ar"},
- "slf": {preferred: "slf", prefix: "sgn"},
- "sls": {preferred: "sls", prefix: "sgn"},
- "sqk": {preferred: "sqk", prefix: "sgn"},
- "sqs": {preferred: "sqs", prefix: "sgn"},
- "ssh": {preferred: "ssh", prefix: "ar"},
- "ssp": {preferred: "ssp", prefix: "sgn"},
- "ssr": {preferred: "ssr", prefix: "sgn"},
- "svk": {preferred: "svk", prefix: "sgn"},
- "swc": {preferred: "swc", prefix: "sw"},
- "swh": {preferred: "swh", prefix: "sw"},
- "swl": {preferred: "swl", prefix: "sgn"},
- "syy": {preferred: "syy", prefix: "sgn"},
- "tmw": {preferred: "tmw", prefix: "ms"},
- "tse": {preferred: "tse", prefix: "sgn"},
- "tsm": {preferred: "tsm", prefix: "sgn"},
- "tsq": {preferred: "tsq", prefix: "sgn"},
- "tss": {preferred: "tss", prefix: "sgn"},
- "tsy": {preferred: "tsy", prefix: "sgn"},
- "tza": {preferred: "tza", prefix: "sgn"},
- "ugn": {preferred: "ugn", prefix: "sgn"},
- "ugy": {preferred: "ugy", prefix: "sgn"},
- "ukl": {preferred: "ukl", prefix: "sgn"},
- "uks": {preferred: "uks", prefix: "sgn"},
- "urk": {preferred: "urk", prefix: "ms"},
- "uzn": {preferred: "uzn", prefix: "uz"},
- "uzs": {preferred: "uzs", prefix: "uz"},
- "vgt": {preferred: "vgt", prefix: "sgn"},
- "vkk": {preferred: "vkk", prefix: "ms"},
- "vkt": {preferred: "vkt", prefix: "ms"},
- "vsi": {preferred: "vsi", prefix: "sgn"},
- "vsl": {preferred: "vsl", prefix: "sgn"},
- "vsv": {preferred: "vsv", prefix: "sgn"},
- "wuu": {preferred: "wuu", prefix: "zh"},
- "xki": {preferred: "xki", prefix: "sgn"},
- "xml": {preferred: "xml", prefix: "sgn"},
- "xmm": {preferred: "xmm", prefix: "ms"},
- "xms": {preferred: "xms", prefix: "sgn"},
- "ygs": {preferred: "ygs", prefix: "sgn"},
- "yhs": {preferred: "yhs", prefix: "sgn"},
- "ysl": {preferred: "ysl", prefix: "sgn"},
- "yue": {preferred: "yue", prefix: "zh"},
- "zib": {preferred: "zib", prefix: "sgn"},
- "zlm": {preferred: "zlm", prefix: "ms"},
- "zmi": {preferred: "zmi", prefix: "ms"},
- "zsl": {preferred: "zsl", prefix: "sgn"},
- "zsm": {preferred: "zsm", prefix: "ms"},
+// Language subtags with complex mappings.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+var complexLanguageMappings = {
+ "cnr": true,
+ "drw": true,
+ "hbs": true,
+ "prs": true,
+ "sh": true,
+ "swc": true,
+ "tnf": true,
};
+
+// Mappings from region subtags to preferred values.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+var regionMappings = {
+ "004": "AF",
+ "008": "AL",
+ "010": "AQ",
+ "012": "DZ",
+ "016": "AS",
+ "020": "AD",
+ "024": "AO",
+ "028": "AG",
+ "031": "AZ",
+ "032": "AR",
+ "036": "AU",
+ "040": "AT",
+ "044": "BS",
+ "048": "BH",
+ "050": "BD",
+ "051": "AM",
+ "052": "BB",
+ "056": "BE",
+ "060": "BM",
+ "062": "034",
+ "064": "BT",
+ "068": "BO",
+ "070": "BA",
+ "072": "BW",
+ "074": "BV",
+ "076": "BR",
+ "084": "BZ",
+ "086": "IO",
+ "090": "SB",
+ "092": "VG",
+ "096": "BN",
+ "100": "BG",
+ "104": "MM",
+ "108": "BI",
+ "112": "BY",
+ "116": "KH",
+ "120": "CM",
+ "124": "CA",
+ "132": "CV",
+ "136": "KY",
+ "140": "CF",
+ "144": "LK",
+ "148": "TD",
+ "152": "CL",
+ "156": "CN",
+ "158": "TW",
+ "162": "CX",
+ "166": "CC",
+ "170": "CO",
+ "174": "KM",
+ "175": "YT",
+ "178": "CG",
+ "180": "CD",
+ "184": "CK",
+ "188": "CR",
+ "191": "HR",
+ "192": "CU",
+ "196": "CY",
+ "203": "CZ",
+ "204": "BJ",
+ "208": "DK",
+ "212": "DM",
+ "214": "DO",
+ "218": "EC",
+ "222": "SV",
+ "226": "GQ",
+ "230": "ET",
+ "231": "ET",
+ "232": "ER",
+ "233": "EE",
+ "234": "FO",
+ "238": "FK",
+ "239": "GS",
+ "242": "FJ",
+ "246": "FI",
+ "248": "AX",
+ "249": "FR",
+ "250": "FR",
+ "254": "GF",
+ "258": "PF",
+ "260": "TF",
+ "262": "DJ",
+ "266": "GA",
+ "268": "GE",
+ "270": "GM",
+ "275": "PS",
+ "276": "DE",
+ "278": "DE",
+ "280": "DE",
+ "288": "GH",
+ "292": "GI",
+ "296": "KI",
+ "300": "GR",
+ "304": "GL",
+ "308": "GD",
+ "312": "GP",
+ "316": "GU",
+ "320": "GT",
+ "324": "GN",
+ "328": "GY",
+ "332": "HT",
+ "334": "HM",
+ "336": "VA",
+ "340": "HN",
+ "344": "HK",
+ "348": "HU",
+ "352": "IS",
+ "356": "IN",
+ "360": "ID",
+ "364": "IR",
+ "368": "IQ",
+ "372": "IE",
+ "376": "IL",
+ "380": "IT",
+ "384": "CI",
+ "388": "JM",
+ "392": "JP",
+ "398": "KZ",
+ "400": "JO",
+ "404": "KE",
+ "408": "KP",
+ "410": "KR",
+ "414": "KW",
+ "417": "KG",
+ "418": "LA",
+ "422": "LB",
+ "426": "LS",
+ "428": "LV",
+ "430": "LR",
+ "434": "LY",
+ "438": "LI",
+ "440": "LT",
+ "442": "LU",
+ "446": "MO",
+ "450": "MG",
+ "454": "MW",
+ "458": "MY",
+ "462": "MV",
+ "466": "ML",
+ "470": "MT",
+ "474": "MQ",
+ "478": "MR",
+ "480": "MU",
+ "484": "MX",
+ "492": "MC",
+ "496": "MN",
+ "498": "MD",
+ "499": "ME",
+ "500": "MS",
+ "504": "MA",
+ "508": "MZ",
+ "512": "OM",
+ "516": "NA",
+ "520": "NR",
+ "524": "NP",
+ "528": "NL",
+ "531": "CW",
+ "533": "AW",
+ "534": "SX",
+ "535": "BQ",
+ "540": "NC",
+ "548": "VU",
+ "554": "NZ",
+ "558": "NI",
+ "562": "NE",
+ "566": "NG",
+ "570": "NU",
+ "574": "NF",
+ "578": "NO",
+ "580": "MP",
+ "581": "UM",
+ "583": "FM",
+ "584": "MH",
+ "585": "PW",
+ "586": "PK",
+ "591": "PA",
+ "598": "PG",
+ "600": "PY",
+ "604": "PE",
+ "608": "PH",
+ "612": "PN",
+ "616": "PL",
+ "620": "PT",
+ "624": "GW",
+ "626": "TL",
+ "630": "PR",
+ "634": "QA",
+ "638": "RE",
+ "642": "RO",
+ "643": "RU",
+ "646": "RW",
+ "652": "BL",
+ "654": "SH",
+ "659": "KN",
+ "660": "AI",
+ "662": "LC",
+ "663": "MF",
+ "666": "PM",
+ "670": "VC",
+ "674": "SM",
+ "678": "ST",
+ "682": "SA",
+ "686": "SN",
+ "688": "RS",
+ "690": "SC",
+ "694": "SL",
+ "702": "SG",
+ "703": "SK",
+ "704": "VN",
+ "705": "SI",
+ "706": "SO",
+ "710": "ZA",
+ "716": "ZW",
+ "720": "YE",
+ "724": "ES",
+ "728": "SS",
+ "729": "SD",
+ "732": "EH",
+ "736": "SD",
+ "740": "SR",
+ "744": "SJ",
+ "748": "SZ",
+ "752": "SE",
+ "756": "CH",
+ "760": "SY",
+ "762": "TJ",
+ "764": "TH",
+ "768": "TG",
+ "772": "TK",
+ "776": "TO",
+ "780": "TT",
+ "784": "AE",
+ "788": "TN",
+ "792": "TR",
+ "795": "TM",
+ "796": "TC",
+ "798": "TV",
+ "800": "UG",
+ "804": "UA",
+ "807": "MK",
+ "818": "EG",
+ "826": "GB",
+ "830": "JE",
+ "831": "GG",
+ "832": "JE",
+ "833": "IM",
+ "834": "TZ",
+ "840": "US",
+ "850": "VI",
+ "854": "BF",
+ "858": "UY",
+ "860": "UZ",
+ "862": "VE",
+ "876": "WF",
+ "882": "WS",
+ "886": "YE",
+ "887": "YE",
+ "891": "RS",
+ "894": "ZM",
+ "958": "AA",
+ "959": "QM",
+ "960": "QN",
+ "962": "QP",
+ "963": "QQ",
+ "964": "QR",
+ "965": "QS",
+ "966": "QT",
+ "967": "EU",
+ "968": "QV",
+ "969": "QW",
+ "970": "QX",
+ "971": "QY",
+ "972": "QZ",
+ "973": "XA",
+ "974": "XB",
+ "975": "XC",
+ "976": "XD",
+ "977": "XE",
+ "978": "XF",
+ "979": "XG",
+ "980": "XH",
+ "981": "XI",
+ "982": "XJ",
+ "983": "XK",
+ "984": "XL",
+ "985": "XM",
+ "986": "XN",
+ "987": "XO",
+ "988": "XP",
+ "989": "XQ",
+ "990": "XR",
+ "991": "XS",
+ "992": "XT",
+ "993": "XU",
+ "994": "XV",
+ "995": "XW",
+ "996": "XX",
+ "997": "XY",
+ "998": "XZ",
+ "999": "ZZ",
+ "BU": "MM",
+ "CS": "RS",
+ "CT": "KI",
+ "DD": "DE",
+ "DY": "BJ",
+ "FQ": "AQ",
+ "FX": "FR",
+ "HV": "BF",
+ "JT": "UM",
+ "MI": "UM",
+ "NH": "VU",
+ "NQ": "AQ",
+ "PU": "UM",
+ "PZ": "PA",
+ "QU": "EU",
+ "RH": "ZW",
+ "TP": "TL",
+ "UK": "GB",
+ "VD": "VN",
+ "WK": "UM",
+ "YD": "YE",
+ "YU": "RS",
+ "ZR": "CD",
+};
+
+// Region subtags with complex mappings.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+var complexRegionMappings = {
+ "172": true,
+ "200": true,
+ "530": true,
+ "532": true,
+ "536": true,
+ "582": true,
+ "810": true,
+ "890": true,
+ "AN": true,
+ "NT": true,
+ "PC": true,
+ "SU": true,
+};
+
+// Canonicalize Unicode BCP 47 locale identifiers.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+/* eslint-disable complexity */
+function updateLocaleIdMappings(tag) {
+ assert(IsObject(tag), "tag is an object");
+
+ // Replace deprecated language tags with their preferred values.
+ var language = tag.language;
+ if (hasOwn(language, languageMappings)) {
+ tag.language = languageMappings[language];
+ } else if (hasOwn(language, complexLanguageMappings)) {
+ switch (language) {
+ case "cnr":
+ tag.language = "sr";
+ if (tag.region === undefined)
+ tag.region = "ME";
+ break;
+ case "drw":
+ case "prs":
+ case "tnf":
+ tag.language = "fa";
+ if (tag.region === undefined)
+ tag.region = "AF";
+ break;
+ case "hbs":
+ case "sh":
+ tag.language = "sr";
+ if (tag.script === undefined)
+ tag.script = "Latn";
+ break;
+ case "swc":
+ tag.language = "sw";
+ if (tag.region === undefined)
+ tag.region = "CD";
+ break;
+ default:
+ assert(false, "language not handled: " + language);
+ }
+ }
+
+ // No script replacements are currently present.
+
+ // Replace deprecated subtags with their preferred values.
+ var region = tag.region;
+ if (region !== undefined) {
+ if (hasOwn(region, regionMappings)) {
+ tag.region = regionMappings[region];
+ } else if (hasOwn(region, complexRegionMappings)) {
+ switch (region) {
+ case "172":
+ if (tag.language === "ab") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "az") {
+ tag.region = "AZ";
+ break;
+ }
+ if (tag.language === "be") {
+ tag.region = "BY";
+ break;
+ }
+ if (tag.language === "crh") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "gag") {
+ tag.region = "MD";
+ break;
+ }
+ if (tag.language === "got") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "hy") {
+ tag.region = "AM";
+ break;
+ }
+ if (tag.language === "ji") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "ka") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "kaa") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "kk") {
+ tag.region = "KZ";
+ break;
+ }
+ if (tag.language === "ku" && tag.script === "Yezi") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "ky") {
+ tag.region = "KG";
+ break;
+ }
+ if (tag.language === "os") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "rue") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "sog") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "tg") {
+ tag.region = "TJ";
+ break;
+ }
+ if (tag.language === "tk") {
+ tag.region = "TM";
+ break;
+ }
+ if (tag.language === "tkr") {
+ tag.region = "AZ";
+ break;
+ }
+ if (tag.language === "tly") {
+ tag.region = "AZ";
+ break;
+ }
+ if (tag.language === "ttt") {
+ tag.region = "AZ";
+ break;
+ }
+ if (tag.language === "ug" && tag.script === "Cyrl") {
+ tag.region = "KZ";
+ break;
+ }
+ if (tag.language === "uk") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Geor") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Armn") {
+ tag.region = "AM";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Sogo") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Goth") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Chrs") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Sogd") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Yezi") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "uz") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "xco") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "xmf") {
+ tag.region = "GE";
+ break;
+ }
+ tag.region = "RU";
+ break;
+ case "200":
+ if (tag.language === "sk") {
+ tag.region = "SK";
+ break;
+ }
+ tag.region = "CZ";
+ break;
+ case "530":
+ case "532":
+ case "AN":
+ if (tag.language === "vic") {
+ tag.region = "SX";
+ break;
+ }
+ tag.region = "CW";
+ break;
+ case "536":
+ case "NT":
+ if (tag.language === "akk") {
+ tag.region = "IQ";
+ break;
+ }
+ if (tag.language === "ckb") {
+ tag.region = "IQ";
+ break;
+ }
+ if (tag.language === "ku" && tag.script === "Arab") {
+ tag.region = "IQ";
+ break;
+ }
+ if (tag.language === "mis") {
+ tag.region = "IQ";
+ break;
+ }
+ if (tag.language === "syr") {
+ tag.region = "IQ";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Syrc") {
+ tag.region = "IQ";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Hatr") {
+ tag.region = "IQ";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Xsux") {
+ tag.region = "IQ";
+ break;
+ }
+ tag.region = "SA";
+ break;
+ case "582":
+ case "PC":
+ if (tag.language === "mh") {
+ tag.region = "MH";
+ break;
+ }
+ if (tag.language === "pau") {
+ tag.region = "PW";
+ break;
+ }
+ tag.region = "FM";
+ break;
+ case "810":
+ case "SU":
+ if (tag.language === "ab") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "az") {
+ tag.region = "AZ";
+ break;
+ }
+ if (tag.language === "be") {
+ tag.region = "BY";
+ break;
+ }
+ if (tag.language === "crh") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "et") {
+ tag.region = "EE";
+ break;
+ }
+ if (tag.language === "gag") {
+ tag.region = "MD";
+ break;
+ }
+ if (tag.language === "got") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "hy") {
+ tag.region = "AM";
+ break;
+ }
+ if (tag.language === "ji") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "ka") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "kaa") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "kk") {
+ tag.region = "KZ";
+ break;
+ }
+ if (tag.language === "ku" && tag.script === "Yezi") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "ky") {
+ tag.region = "KG";
+ break;
+ }
+ if (tag.language === "lt") {
+ tag.region = "LT";
+ break;
+ }
+ if (tag.language === "ltg") {
+ tag.region = "LV";
+ break;
+ }
+ if (tag.language === "lv") {
+ tag.region = "LV";
+ break;
+ }
+ if (tag.language === "os") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "rue") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "sgs") {
+ tag.region = "LT";
+ break;
+ }
+ if (tag.language === "sog") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "tg") {
+ tag.region = "TJ";
+ break;
+ }
+ if (tag.language === "tk") {
+ tag.region = "TM";
+ break;
+ }
+ if (tag.language === "tkr") {
+ tag.region = "AZ";
+ break;
+ }
+ if (tag.language === "tly") {
+ tag.region = "AZ";
+ break;
+ }
+ if (tag.language === "ttt") {
+ tag.region = "AZ";
+ break;
+ }
+ if (tag.language === "ug" && tag.script === "Cyrl") {
+ tag.region = "KZ";
+ break;
+ }
+ if (tag.language === "uk") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Geor") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Armn") {
+ tag.region = "AM";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Sogo") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Goth") {
+ tag.region = "UA";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Chrs") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Sogd") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "und" && tag.script === "Yezi") {
+ tag.region = "GE";
+ break;
+ }
+ if (tag.language === "uz") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "vro") {
+ tag.region = "EE";
+ break;
+ }
+ if (tag.language === "xco") {
+ tag.region = "UZ";
+ break;
+ }
+ if (tag.language === "xmf") {
+ tag.region = "GE";
+ break;
+ }
+ tag.region = "RU";
+ break;
+ case "890":
+ if (tag.language === "bs") {
+ tag.region = "BA";
+ break;
+ }
+ if (tag.language === "hr") {
+ tag.region = "HR";
+ break;
+ }
+ if (tag.language === "mk") {
+ tag.region = "MK";
+ break;
+ }
+ if (tag.language === "sl") {
+ tag.region = "SI";
+ break;
+ }
+ tag.region = "RS";
+ break;
+ default:
+ assert(false, "region not handled: " + region);
+ }
+ }
+
+ // No variant replacements are currently present.
+ // No extension replacements are currently present.
+ // Private use sequences are left as is.
+
+ }
+}
+/* eslint-enable complexity */
+
+// Canonicalize grandfathered locale identifiers.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+function updateGrandfatheredMappings(tag) {
+ assert(IsObject(tag), "tag is an object");
+
+ // We're mapping regular grandfathered tags to non-grandfathered form here.
+ // Other tags remain unchanged.
+ //
+ // regular = "art-lojban"
+ // / "cel-gaulish"
+ // / "no-bok"
+ // / "no-nyn"
+ // / "zh-guoyu"
+ // / "zh-hakka"
+ // / "zh-min"
+ // / "zh-min-nan"
+ // / "zh-xiang"
+ //
+ // Therefore we can quickly exclude most tags by checking every
+ // |unicode_locale_id| subcomponent for characteristics not shared by any of
+ // the regular grandfathered (RG) tags:
+ //
+ // * Real-world |unicode_language_subtag|s are all two or three letters,
+ // so don't waste time running a useless |language.length > 3| fast-path.
+ // * No RG tag has a "script"-looking component.
+ // * No RG tag has a "region"-looking component.
+ // * The RG tags that match |unicode_locale_id| (art-lojban, cel-gaulish,
+ // zh-guoyu, zh-hakka, zh-xiang) have exactly one "variant". (no-bok,
+ // no-nyn, zh-min, and zh-min-nan require BCP47's extlang subtag
+ // that |unicode_locale_id| doesn't support.)
+ // * No RG tag contains |extensions| or |pu_extensions|.
+ if (tag.script !== undefined ||
+ tag.region !== undefined ||
+ tag.variants.length !== 1 ||
+ tag.extensions.length !== 0 ||
+ tag.privateuse !== undefined)
+ {
+ return;
+ }
+
+ // art-lojban -> jbo
+ if (tag.language === "art" && tag.variants[0] === "lojban") {
+ tag.language = "jbo";
+ tag.variants.length = 0;
+ }
+
+ // cel-gaulish -> xtg-x-cel-gaulish
+ else if (tag.language === "cel" && tag.variants[0] === "gaulish") {
+ tag.language = "xtg";
+ tag.variants.length = 0;
+ tag.privateuse = "x-cel-gaulish";
+ }
+
+ // zh-guoyu -> zh
+ else if (tag.language === "zh" && tag.variants[0] === "guoyu") {
+ tag.language = "zh";
+ tag.variants.length = 0;
+ }
+
+ // zh-hakka -> hak
+ else if (tag.language === "zh" && tag.variants[0] === "hakka") {
+ tag.language = "hak";
+ tag.variants.length = 0;
+ }
+
+ // zh-xiang -> hsn
+ else if (tag.language === "zh" && tag.variants[0] === "xiang") {
+ tag.language = "hsn";
+ tag.variants.length = 0;
+ }
+}
diff --git a/js/src/builtin/intl/NumberFormat.js b/js/src/builtin/intl/NumberFormat.js
index bba78d7a0d..64158c1103 100644
--- a/js/src/builtin/intl/NumberFormat.js
+++ b/js/src/builtin/intl/NumberFormat.js
@@ -8,7 +8,7 @@
/**
* NumberFormat internal properties.
*
- * Spec: ECMAScript Internationalization API Specification, 9.1 and 11.2.3.
+ * Spec: ECMAScript Internationalization API Specification, 9.1 and 11.3.3.
*/
var numberFormatInternalProperties = {
localeData: numberFormatLocaleData,
@@ -35,44 +35,38 @@ function resolveNumberFormatInternals(lazyNumberFormatData) {
var internalProps = std_Object_create(null);
- // Step 3.
- var requestedLocales = lazyNumberFormatData.requestedLocales;
-
- // Compute options that impact interpretation of locale.
- // Step 6.
- var opt = lazyNumberFormatData.opt;
-
var NumberFormat = numberFormatInternalProperties;
- // Step 9.
+ // Compute effective locale.
+
+ // Step 7.
var localeData = NumberFormat.localeData;
- // Step 10.
+ // Step 8.
var r = ResolveLocale(callFunction(NumberFormat.availableLocales, NumberFormat),
lazyNumberFormatData.requestedLocales,
lazyNumberFormatData.opt,
NumberFormat.relevantExtensionKeys,
localeData);
- // Steps 11-12. (Step 13 is not relevant to our implementation.)
+ // Steps 9-10. (Step 11 is not relevant to our implementation.)
internalProps.locale = r.locale;
internalProps.numberingSystem = r.nu;
// Compute formatting options.
- // Step 15.
+ // Step 13.
var s = lazyNumberFormatData.style;
internalProps.style = s;
- // Steps 19, 21.
+ // Steps 17, 19.
if (s === "currency") {
internalProps.currency = lazyNumberFormatData.currency;
internalProps.currencyDisplay = lazyNumberFormatData.currencyDisplay;
}
+ // Step 22.
internalProps.minimumIntegerDigits = lazyNumberFormatData.minimumIntegerDigits;
-
internalProps.minimumFractionDigits = lazyNumberFormatData.minimumFractionDigits;
-
internalProps.maximumFractionDigits = lazyNumberFormatData.maximumFractionDigits;
if ("minimumSignificantDigits" in lazyNumberFormatData) {
@@ -83,12 +77,9 @@ function resolveNumberFormatInternals(lazyNumberFormatData) {
internalProps.maximumSignificantDigits = lazyNumberFormatData.maximumSignificantDigits;
}
- // Step 27.
+ // Step 24.
internalProps.useGrouping = lazyNumberFormatData.useGrouping;
- // Step 34.
- internalProps.boundFormat = undefined;
-
// The caller is responsible for associating |internalProps| with the right
// object using |setInternalProperties|.
return internalProps;
@@ -118,19 +109,21 @@ function getNumberFormatInternals(obj) {
/**
- * UnwrapNumberFormat(nf)
+ * 11.1.11 UnwrapNumberFormat( nf )
*/
function UnwrapNumberFormat(nf, methodName) {
- // Step 1.
+ // Step 1 (not applicable in our implementation).
+
+ // Step 2.
if ((!IsObject(nf) || !IsNumberFormat(nf)) && nf instanceof GetNumberFormatConstructor()) {
nf = nf[intlFallbackSymbol()];
}
- // Step 2.
+ // Step 3.
if (!IsObject(nf) || !IsNumberFormat(nf))
ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "NumberFormat", methodName, "NumberFormat");
- // Step 3.
+ // Step 4.
return nf;
}
@@ -141,18 +134,18 @@ function UnwrapNumberFormat(nf, methodName) {
* Spec: ECMAScript Internationalization API Specification, 11.1.1.
*/
function SetNumberFormatDigitOptions(lazyData, options, mnfdDefault) {
- // We skip Step 1 because we set the properties on a lazyData object.
+ // We skip step 1 because we set the properties on a lazyData object.
- // Step 2-3.
+ // Steps 2-4.
assert(IsObject(options), "SetNumberFormatDigitOptions");
assert(typeof mnfdDefault === "number", "SetNumberFormatDigitOptions");
- // Steps 4-6.
+ // Steps 5-8.
const mnid = GetNumberOption(options, "minimumIntegerDigits", 1, 21, 1);
const mnfd = GetNumberOption(options, "minimumFractionDigits", 0, 20, mnfdDefault);
const mxfd = GetNumberOption(options, "maximumFractionDigits", mnfd, 20);
- // Steps 7-8.
+ // Steps 9-10.
let mnsd = options.minimumSignificantDigits;
let mxsd = options.maximumSignificantDigits;
@@ -196,17 +189,9 @@ function toASCIIUpperCase(s) {
*
* Spec: ECMAScript Internationalization API Specification, 6.3.1.
*/
-function getIsWellFormedCurrencyCodeRE() {
- return internalIntlRegExps.isWellFormedCurrencyCodeRE ||
- (internalIntlRegExps.isWellFormedCurrencyCodeRE = RegExpCreate("[^A-Z]"));
-}
-
function IsWellFormedCurrencyCode(currency) {
- var c = ToString(currency);
- var normalized = toASCIIUpperCase(c);
- if (normalized.length !== 3)
- return false;
- return !regexp_test_no_statics(getIsWellFormedCurrencyCodeRE(), normalized);
+ assert(typeof currency === "string", "currency is a string value");
+ return currency.length === 3 && IsASCIIAlphaString(currency);
}
/**
@@ -218,15 +203,12 @@ function IsWellFormedCurrencyCode(currency) {
* This later work occurs in |resolveNumberFormatInternals|; steps not noted
* here occur there.
*
- * Spec: ECMAScript Internationalization API Specification, 11.1.1.
+ * Spec: ECMAScript Internationalization API Specification, 11.1.2.
*/
function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
assert(IsObject(numberFormat), "InitializeNumberFormat called with non-object");
assert(IsNumberFormat(numberFormat), "InitializeNumberFormat called with non-NumberFormat");
- // Steps 1-2 (These steps are no longer required and should be removed
- // from the spec; https://github.com/tc39/ecma402/issues/115).
-
// Lazy NumberFormat data has the following structure:
//
// {
@@ -258,11 +240,11 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
// subset of them.
var lazyNumberFormatData = std_Object_create(null);
- // Step 3.
+ // Step 1.
var requestedLocales = CanonicalizeLocaleList(locales);
lazyNumberFormatData.requestedLocales = requestedLocales;
- // Steps 4-5.
+ // Steps 2-3.
//
// If we ever need more speed here at startup, we should try to detect the
// case where |options === undefined| and Object.prototype hasn't been
@@ -275,20 +257,20 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
options = ToObject(options);
// Compute options that impact interpretation of locale.
- // Step 6.
+ // Step 4.
var opt = new Record();
lazyNumberFormatData.opt = opt;
- // Steps 7-8.
+ // Steps 5-6.
var matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit");
opt.localeMatcher = matcher;
// Compute formatting options.
- // Step 14.
+ // Step 12.
var s = GetOption(options, "style", "string", ["decimal", "percent", "currency"], "decimal");
lazyNumberFormatData.style = s;
- // Steps 16-19.
+ // Steps 14-17.
var c = GetOption(options, "currency", "string", undefined, undefined);
if (c !== undefined && !IsWellFormedCurrencyCode(c))
ThrowRangeError(JSMSG_INVALID_CURRENCY_CODE, c);
@@ -303,12 +285,12 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
cDigits = CurrencyDigits(c);
}
- // Step 20.
+ // Step 18.
var cd = GetOption(options, "currencyDisplay", "string", ["code", "symbol", "name"], "symbol");
if (s === "currency")
lazyNumberFormatData.currencyDisplay = cd;
- // Steps 22-24.
+ // Steps 20-22.
SetNumberFormatDigitOptions(lazyNumberFormatData, options, s === "currency" ? cDigits: 0);
// Step 25.
@@ -322,16 +304,19 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
std_Math_max(lazyNumberFormatData.minimumFractionDigits, mxfdDefault);
}
- // Step 26.
+ // Steps 23.
var g = GetOption(options, "useGrouping", "boolean", undefined, true);
lazyNumberFormatData.useGrouping = g;
- // Steps 35-36.
+ // Step 31.
//
// We've done everything that must be done now: mark the lazy data as fully
// computed and install it.
initializeIntlObject(numberFormat, "NumberFormat", lazyNumberFormatData);
+ // 11.2.1, steps 4-5.
+ // TODO: spec issue - The current spec doesn't have the IsObject check,
+ // which means |Intl.NumberFormat.call(null)| is supposed to throw here.
if (numberFormat !== thisValue && thisValue instanceof GetNumberFormatConstructor()) {
if (!IsObject(thisValue))
ThrowTypeError(JSMSG_NOT_NONNULL_OBJECT, typeof thisValue);
@@ -342,6 +327,7 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
return thisValue;
}
+ // 11.2.1, step 6.
return numberFormat;
}
@@ -386,15 +372,12 @@ var currencyDigits = {
/**
* Returns the number of decimal digits to be used for the given currency.
*
- * Spec: ECMAScript Internationalization API Specification, 11.1.1.
+ * Spec: ECMAScript Internationalization API Specification, 11.1.3.
*/
-function getCurrencyDigitsRE() {
- return internalIntlRegExps.currencyDigitsRE ||
- (internalIntlRegExps.currencyDigitsRE = RegExpCreate("^[A-Z]{3}$"));
-}
function CurrencyDigits(currency) {
- assert(typeof currency === "string", "CurrencyDigits");
- assert(regexp_test_no_statics(getCurrencyDigitsRE(), currency), "CurrencyDigits");
+ assert(typeof currency === "string", "currency is a string value");
+ assert(IsWellFormedCurrencyCode(currency), "currency is well-formed");
+ assert(currency == toASCIIUpperCase(currency), "currency is all upper-case");
if (hasOwn(currency, currencyDigits))
return currencyDigits[currency];
@@ -407,14 +390,19 @@ function CurrencyDigits(currency) {
* matching (possibly fallback) locale. Locales appear in the same order in the
* returned list as in the input list.
*
- * Spec: ECMAScript Internationalization API Specification, 11.2.2.
+ * Spec: ECMAScript Internationalization API Specification, 11.3.2.
*/
function Intl_NumberFormat_supportedLocalesOf(locales /*, options*/) {
var options = arguments.length > 1 ? arguments[1] : undefined;
+ // Step 1.
var availableLocales = callFunction(numberFormatInternalProperties.availableLocales,
numberFormatInternalProperties);
+
+ // Step 2.
var requestedLocales = CanonicalizeLocaleList(locales);
+
+ // Step 3.
return SupportedLocales(availableLocales, requestedLocales, options);
}
@@ -427,8 +415,8 @@ function getNumberingSystems(locale) {
// Algorithmic numbering systems are typically tied to one locale, so for
// lack of information we don't offer them. To increase chances that
// other software will process output correctly, we further restrict to
- // those decimal numbering systems explicitly listed in table 2 of
- // the ECMAScript Internationalization API Specification, 11.3.2, which
+ // those decimal numbering systems explicitly listed in table 3 of
+ // the ECMAScript Internationalization API Specification, 11.1.6, which
// in turn are those with full specifications in version 21 of Unicode
// Technical Standard #35 using digits that were defined in Unicode 5.0,
// the Unicode version supported in Windows Vista.
@@ -459,7 +447,7 @@ function numberFormatLocaleData() {
/**
* Function to be bound and returned by Intl.NumberFormat.prototype.format.
*
- * Spec: ECMAScript Internationalization API Specification, 11.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 11.1.4.
*/
function numberFormatFormatToBind(value) {
// Steps 1.a.i implemented by ECMAScript declaration binding instantiation,
@@ -476,7 +464,7 @@ function numberFormatFormatToBind(value) {
* representing the result of calling ToNumber(value) according to the
* effective locale and the formatting options of this NumberFormat.
*
- * Spec: ECMAScript Internationalization API Specification, 11.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 11.4.3.
*/
function Intl_NumberFormat_format_get() {
// Steps 1-3.
@@ -486,12 +474,11 @@ function Intl_NumberFormat_format_get() {
// Step 4.
if (internals.boundFormat === undefined) {
- // Step 4.a.
- var F = numberFormatFormatToBind;
+ // Steps 4.a-b.
+ var F = callFunction(FunctionBind, numberFormatFormatToBind, nf);
- // Steps 4.b-d.
- var bf = callFunction(FunctionBind, F, nf);
- internals.boundFormat = bf;
+ // Step 4.c.
+ internals.boundFormat = F;
}
// Step 5.
@@ -499,6 +486,9 @@ function Intl_NumberFormat_format_get() {
}
_SetCanonicalName(Intl_NumberFormat_format_get, "get format");
+/**
+ * 11.4.4 Intl.NumberFormat.prototype.formatToParts ( value )
+ */
function Intl_NumberFormat_formatToParts(value) {
// Steps 1-3.
var nf = UnwrapNumberFormat(this, "formatToParts");
@@ -516,14 +506,15 @@ function Intl_NumberFormat_formatToParts(value) {
/**
* Returns the resolved options for a NumberFormat object.
*
- * Spec: ECMAScript Internationalization API Specification, 11.3.3 and 11.4.
+ * Spec: ECMAScript Internationalization API Specification, 11.4.5.
*/
function Intl_NumberFormat_resolvedOptions() {
- // Invoke |UnwrapNumberFormat| per introduction of section 11.3.
+ // Steps 1-3.
var nf = UnwrapNumberFormat(this, "resolvedOptions");
var internals = getNumberFormatInternals(nf);
+ // Steps 4-5.
var result = {
locale: internals.locale,
numberingSystem: internals.numberingSystem,
@@ -533,17 +524,31 @@ function Intl_NumberFormat_resolvedOptions() {
maximumFractionDigits: internals.maximumFractionDigits,
useGrouping: internals.useGrouping
};
- var optionalProperties = [
- "currency",
- "currencyDisplay",
- "minimumSignificantDigits",
- "maximumSignificantDigits"
- ];
- for (var i = 0; i < optionalProperties.length; i++) {
- var p = optionalProperties[i];
- if (hasOwn(p, internals))
- _DefineDataProperty(result, p, internals[p]);
+
+ // currency and currencyDisplay are only present for currency formatters.
+ assert(hasOwn("currency", internals) === (internals.style === "currency"),
+ "currency is present iff style is 'currency'");
+ assert(hasOwn("currencyDisplay", internals) === (internals.style === "currency"),
+ "currencyDisplay is present iff style is 'currency'");
+
+ if (hasOwn("currency", internals)) {
+ _DefineDataProperty(result, "currency", internals.currency);
+ _DefineDataProperty(result, "currencyDisplay", internals.currencyDisplay);
+ }
+
+ // Min/Max significant digits are either both present or not at all.
+ assert(hasOwn("minimumSignificantDigits", internals) ===
+ hasOwn("maximumSignificantDigits", internals),
+ "minimumSignificantDigits is present iff maximumSignificantDigits is present");
+
+ if (hasOwn("minimumSignificantDigits", internals)) {
+ _DefineDataProperty(result, "minimumSignificantDigits",
+ internals.minimumSignificantDigits);
+ _DefineDataProperty(result, "maximumSignificantDigits",
+ internals.maximumSignificantDigits);
}
+
+ // Step 6.
return result;
}
diff --git a/js/src/builtin/intl/PluralRules.cpp b/js/src/builtin/intl/PluralRules.cpp
index 78bd9e5d74..63d399f818 100644
--- a/js/src/builtin/intl/PluralRules.cpp
+++ b/js/src/builtin/intl/PluralRules.cpp
@@ -79,7 +79,7 @@ static const JSFunctionSpec pluralRules_methods[] = {
/**
* PluralRules constructor.
- * Spec: ECMAScript 402 API, PluralRules, 1.1
+ * Spec: ECMAScript 402 API, PluralRules, 13.2.1
*/
static bool
PluralRules(JSContext* cx, const CallArgs& args, bool construct)
diff --git a/js/src/builtin/intl/PluralRules.js b/js/src/builtin/intl/PluralRules.js
index 1e138a8830..d687296245 100644
--- a/js/src/builtin/intl/PluralRules.js
+++ b/js/src/builtin/intl/PluralRules.js
@@ -7,7 +7,7 @@
/**
* PluralRules internal properties.
*
- * Spec: ECMAScript 402 API, PluralRules, 1.3.3.
+ * Spec: ECMAScript 402 API, PluralRules, 13.3.3.
*/
var pluralRulesInternalProperties = {
localeData: pluralRulesLocaleData,
@@ -44,20 +44,25 @@ function resolvePluralRulesInternals(lazyPluralRulesData) {
var PluralRules = pluralRulesInternalProperties;
- // Step 13.
+ // Compute effective locale.
+
+ // Step 10.
+ var localeData = PluralRules.localeData;
+
+ // Step 11.
const r = ResolveLocale(callFunction(PluralRules.availableLocales, PluralRules),
- lazyPluralRulesData.requestedLocales,
- lazyPluralRulesData.opt,
- PluralRules.relevantExtensionKeys, PluralRules.localeData);
+ lazyPluralRulesData.requestedLocales,
+ lazyPluralRulesData.opt,
+ PluralRules.relevantExtensionKeys,
+ localeData);
- // Step 14.
+ // Step 12.
internalProps.locale = r.locale;
- internalProps.type = lazyPluralRulesData.type;
- internalProps.pluralCategories = intl_GetPluralCategories(
- internalProps.locale,
- internalProps.type);
+ // Step 8.
+ internalProps.type = lazyPluralRulesData.type;
+ // Step 9.
internalProps.minimumIntegerDigits = lazyPluralRulesData.minimumIntegerDigits;
internalProps.minimumFractionDigits = lazyPluralRulesData.minimumFractionDigits;
internalProps.maximumFractionDigits = lazyPluralRulesData.maximumFractionDigits;
@@ -68,6 +73,9 @@ function resolvePluralRulesInternals(lazyPluralRulesData) {
internalProps.maximumSignificantDigits = lazyPluralRulesData.maximumSignificantDigits;
}
+ // Step 13 (lazily computed on first access).
+ internalProps.pluralCategories = null;
+
return internalProps;
}
@@ -99,15 +107,12 @@ function getPluralRulesInternals(obj) {
* This later work occurs in |resolvePluralRulesInternals|; steps not noted
* here occur there.
*
- * Spec: ECMAScript 402 API, PluralRules, 1.1.1.
+ * Spec: ECMAScript 402 API, PluralRules, 13.1.1.
*/
function InitializePluralRules(pluralRules, locales, options) {
assert(IsObject(pluralRules), "InitializePluralRules called with non-object");
assert(IsPluralRules(pluralRules), "InitializePluralRules called with non-PluralRules");
- // Steps 1-2 (These steps are no longer required and should be removed
- // from the spec; https://github.com/tc39/ecma402/issues/115).
-
// Lazy PluralRules data has the following structure:
//
// {
@@ -133,30 +138,29 @@ function InitializePluralRules(pluralRules, locales, options) {
// subset of them.
const lazyPluralRulesData = std_Object_create(null);
- // Step 3.
+ // Step 1.
let requestedLocales = CanonicalizeLocaleList(locales);
lazyPluralRulesData.requestedLocales = requestedLocales;
- // Steps 4-5.
+ // Steps 2-3.
if (options === undefined)
options = {};
else
options = ToObject(options);
- // Step 6.
- const type = GetOption(options, "type", "string", ["cardinal", "ordinal"], "cardinal");
- lazyPluralRulesData.type = type;
-
- // Step 8.
+ // Step 4.
let opt = new Record();
lazyPluralRulesData.opt = opt;
- // Steps 9-10.
+ // Steps 5-6.
let matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit");
opt.localeMatcher = matcher;
+ // Step 7.
+ const type = GetOption(options, "type", "string", ["cardinal", "ordinal"], "cardinal");
+ lazyPluralRulesData.type = type;
- // Step 11.
+ // Step 9.
SetNumberFormatDigitOptions(lazyPluralRulesData, options, 0);
// Step 12.
@@ -165,6 +169,10 @@ function InitializePluralRules(pluralRules, locales, options) {
std_Math_max(lazyPluralRulesData.minimumFractionDigits, 3);
}
+ // Step 15.
+ //
+ // We've done everything that must be done now: mark the lazy data as fully
+ // computed and install it.
initializeIntlObject(pluralRules, "PluralRules", lazyPluralRulesData)
}
@@ -173,7 +181,7 @@ function InitializePluralRules(pluralRules, locales, options) {
* matching (possibly fallback) locale. Locales appear in the same order in the
* returned list as in the input list.
*
- * Spec: ECMAScript 402 API, PluralRules, 1.3.2.
+ * Spec: ECMAScript 402 API, PluralRules, 13.3.2.
*/
function Intl_PluralRules_supportedLocalesOf(locales /*, options*/) {
var options = arguments.length > 1 ? arguments[1] : undefined;
@@ -193,20 +201,20 @@ function Intl_PluralRules_supportedLocalesOf(locales /*, options*/) {
* the number passed as value according to the
* effective locale and the formatting options of this PluralRules.
*
- * Spec: ECMAScript 402 API, PluralRules, 1.4.3.
+ * Spec: ECMAScript 402 API, PluralRules, 13.4.3.
*/
function Intl_PluralRules_select(value) {
// Step 1.
let pluralRules = this;
- // Step 2.
+ // Steps 2-3.
if (!IsObject(pluralRules) || !IsPluralRules(pluralRules))
ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "PluralRules", "select", "PluralRules");
// Ensure the PluralRules internals are resolved.
getPluralRulesInternals(pluralRules);
- // Steps 3-4.
+ // Step 4.
let n = ToNumber(value);
// Step 5.
@@ -216,17 +224,34 @@ function Intl_PluralRules_select(value) {
/**
* Returns the resolved options for a PluralRules object.
*
- * Spec: ECMAScript 402 API, PluralRules, 1.4.4.
+ * Spec: ECMAScript 402 API, PluralRules, 13.4.4.
*/
function Intl_PluralRules_resolvedOptions() {
- // Check "this PluralRules object" per introduction of section 1.4.
- if (!IsObject(this) || !IsPluralRules(this)) {
+ // Step 1.
+ var pluralRules = this;
+
+ // Steps 2-3.
+ if (!IsObject(pluralRules) || !IsPluralRules(pluralRules)) {
ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "PluralRules", "resolvedOptions",
"PluralRules");
}
- var internals = getPluralRulesInternals(this);
+ var internals = getPluralRulesInternals(pluralRules);
+
+ var internalsPluralCategories = internals.pluralCategories;
+ if (internalsPluralCategories === null) {
+ internalsPluralCategories = intl_GetPluralCategories(internals.locale, internals.type);
+ internals.pluralCategories = internalsPluralCategories;
+ }
+
+ // TODO: The current spec actually requires to return the internal array
+ // object and not a copy of it.
+ // <https://github.com/tc39/proposal-intl-plural-rules/issues/28#issuecomment-341557030>
+ var pluralCategories = [];
+ for (var i = 0; i < internalsPluralCategories.length; i++)
+ _DefineDataProperty(pluralCategories, i, internalsPluralCategories[i]);
+ // Steps 4-5.
var result = {
locale: internals.locale,
type: internals.type,
@@ -236,16 +261,19 @@ function Intl_PluralRules_resolvedOptions() {
maximumFractionDigits: internals.maximumFractionDigits,
};
- var optionalProperties = [
- "minimumSignificantDigits",
- "maximumSignificantDigits"
- ];
+ // Min/Max significant digits are either both present or not at all.
+ assert(hasOwn("minimumSignificantDigits", internals) ===
+ hasOwn("maximumSignificantDigits", internals),
+ "minimumSignificantDigits is present iff maximumSignificantDigits is present");
- for (var i = 0; i < optionalProperties.length; i++) {
- var p = optionalProperties[i];
- if (hasOwn(p, internals))
- _DefineDataProperty(result, p, internals[p]);
+ if (hasOwn("minimumSignificantDigits", internals)) {
+ _DefineDataProperty(result, "minimumSignificantDigits",
+ internals.minimumSignificantDigits);
+ _DefineDataProperty(result, "maximumSignificantDigits",
+ internals.maximumSignificantDigits);
}
+
+ // Step 6.
return result;
}
diff --git a/js/src/builtin/intl/make_intl_data.py b/js/src/builtin/intl/make_intl_data.py
index 02bf350814..f2a6b32082 100644
--- a/js/src/builtin/intl/make_intl_data.py
+++ b/js/src/builtin/intl/make_intl_data.py
@@ -6,19 +6,14 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
""" Usage:
- make_intl_data.py langtags [language-subtag-registry.txt]
+ make_intl_data.py langtags [ldmlSupplemental.dtd supplementalMetadata.xml likelySubtags.xml]
make_intl_data.py tzdata
Target "langtags":
This script extracts information about mappings between deprecated and
- current BCP 47 language tags from the IANA Language Subtag Registry and
- converts it to JavaScript object definitions in
- LangTagMappingsGenerated.js. The definitions are used in Intl.js.
-
- The IANA Language Subtag Registry is imported from
- https://www.iana.org/assignments/language-subtag-registry
- and uses the syntax specified in
- https://tools.ietf.org/html/rfc5646#section-3
+ current Unicode BCP 47 locale identifiers from CLDR and converts it to
+ JavaScript object definitions in LangTagMappingsGenerated.js. The
+ definitions are used in Intl.js.
Target "tzdata":
@@ -32,202 +27,714 @@ import os
import re
import io
import codecs
+import shutil
+import subprocess
import sys
import tarfile
import tempfile
import urllib2
-import urlparse
-from contextlib import closing
+from contextlib import closing, contextmanager
from functools import partial
from itertools import chain, ifilter, ifilterfalse, imap, tee
from operator import attrgetter, itemgetter
+from urlparse import urlsplit, urlunsplit
+
+def writeMappingHeader(println, description, source, url):
+ if type(description) is not list:
+ description = [description]
+ for desc in description:
+ println(u"// {0}".format(desc))
+ println(u"// Derived from {0}.".format(source))
+ println(u"// {0}".format(url))
+
+def writeMappingsVar(println, mapping, name, description, source, url):
+ """ Writes a variable definition with a mapping table.
+
+ Writes the contents of dictionary |mapping| through the |println|
+ function with the given variable name and a comment with description,
+ source, and URL.
+ """
+ println(u"")
+ writeMappingHeader(println, description, source, url)
+ println(u"var {0} = {{".format(name))
+ for key in sorted(mapping):
+ if not isinstance(mapping[key], dict):
+ value = mapping[key]
+ if isinstance(value, bool):
+ value = "true" if value else "false"
+ else:
+ value = '"{0}"'.format(value)
+ else:
+ preferred = mapping[key]["preferred"]
+ prefix = mapping[key]["prefix"]
+ if key != preferred:
+ raise Exception("Expected '{0}' matches preferred locale '{1}'".format(key, preferred))
+ value = '"{0}"'.format(prefix)
+ println(u' "{0}": {1},'.format(key, value))
+ println(u"};")
+
+def writeUpdateLocaleIdMappingsFunction(println,
+ complex_language_mappings,
+ complex_region_mappings,
+ description, source, url):
+ """ Writes a function definition that performs language tag mapping. """
+ println(u"")
+ writeMappingHeader(println, description, source, url)
+ println(u"""\
+/* eslint-disable complexity */
+function updateLocaleIdMappings(tag) {
+ assert(IsObject(tag), "tag is an object");
+
+ // Replace deprecated language tags with their preferred values.
+ var language = tag.language;
+ if (hasOwn(language, languageMappings)) {
+ tag.language = languageMappings[language];
+ } else if (hasOwn(language, complexLanguageMappings)) {
+ switch (language) {""")
+
+ # Merge duplicate language entries.
+ language_aliases = {}
+ for (deprecated_language, (language, script, region)) in (
+ sorted(complex_language_mappings.items(), key=itemgetter(0))
+ ):
+ key = (language, script, region)
+ if key not in language_aliases:
+ language_aliases[key] = []
+ else:
+ language_aliases[key].append(deprecated_language)
-def readRegistryRecord(registry):
- """ Yields the records of the IANA Language Subtag Registry as dictionaries. """
- record = {}
- for line in registry:
- line = line.strip()
- if line == "":
+ for (deprecated_language, (language, script, region)) in (
+ sorted(complex_language_mappings.items(), key=itemgetter(0))
+ ):
+ key = (language, script, region)
+ if deprecated_language in language_aliases[key]:
continue
- if line == "%%":
- yield record
- record = {}
+
+ for lang in [deprecated_language] + language_aliases[key]:
+ println(u"""
+ case "{}":
+ """.format(lang).rstrip().strip("\n"))
+
+ println(u"""
+ tag.language = "{}";
+ """.format(language).rstrip().strip("\n"))
+ if script is not None:
+ println(u"""
+ if (tag.script === undefined)
+ tag.script = "{}";
+ """.format(script).rstrip().strip("\n"))
+ if region is not None:
+ println(u"""
+ if (tag.region === undefined)
+ tag.region = "{}";
+ """.format(region).rstrip().strip("\n"))
+ println(u"""
+ break;
+ """.rstrip().strip("\n"))
+
+ println(u"""
+ default:
+ assert(false, "language not handled: " + language);
+ }
+ }
+
+ // No script replacements are currently present.
+
+ // Replace deprecated subtags with their preferred values.
+ var region = tag.region;
+ if (region !== undefined) {
+ if (hasOwn(region, regionMappings)) {
+ tag.region = regionMappings[region];
+ } else if (hasOwn(region, complexRegionMappings)) {
+ switch (region) {""".lstrip("\n"))
+
+ # |non_default_replacements| is a list and hence not hashable. Convert it
+ # to a string to get a proper hashable value.
+ def hash_key(default, non_default_replacements):
+ return (default, str(sorted(str(v) for v in non_default_replacements)))
+
+ # Merge duplicate region entries.
+ region_aliases = {}
+ for (deprecated_region, (default, non_default_replacements)) in (
+ sorted(complex_region_mappings.items(), key=itemgetter(0))
+ ):
+ key = hash_key(default, non_default_replacements)
+ if key not in region_aliases:
+ region_aliases[key] = []
else:
- if ":" in line:
- key, value = line.split(":", 1)
- key, value = key.strip(), value.strip()
- record[key] = value
+ region_aliases[key].append(deprecated_region)
+
+ for (deprecated_region, (default, non_default_replacements)) in (
+ sorted(complex_region_mappings.items(), key=itemgetter(0))
+ ):
+ key = hash_key(default, non_default_replacements)
+ if deprecated_region in region_aliases[key]:
+ continue
+
+ for region in [deprecated_region] + region_aliases[key]:
+ println(u"""
+ case "{}":
+ """.format(region).rstrip().strip("\n"))
+
+ for (language, script, region) in sorted(non_default_replacements, key=itemgetter(0)):
+ if script is None:
+ println(u"""
+ if (tag.language === "{}") {{
+ """.format(language).rstrip().strip("\n"))
else:
- # continuation line
- record[key] += " " + line
- if record:
- yield record
- return
+ println(u"""
+ if (tag.language === "{}" && tag.script === "{}") {{
+ """.format(language, script).rstrip().strip("\n"))
+ println(u"""
+ tag.region = "{}";
+ break;
+ }}
+ """.format(region).rstrip().strip("\n"))
+
+ println(u"""
+ tag.region = "{}";
+ break;
+ """.format(default).rstrip().strip("\n"))
+
+ println(u"""
+ default:
+ assert(false, "region not handled: " + region);
+ }
+ }
+
+ // No variant replacements are currently present.
+ // No extension replacements are currently present.
+ // Private use sequences are left as is.
+
+ }
+}
+/* eslint-enable complexity */
+""".strip("\n"))
+
+
+def writeGrandfatheredMappingsFunction(println,
+ grandfathered_mappings,
+ description, source, url):
+ """ Writes a function definition that maps grandfathered language tags. """
+ println(u"")
+ writeMappingHeader(println, description, source, url)
+ println(u"""\
+function updateGrandfatheredMappings(tag) {
+ assert(IsObject(tag), "tag is an object");
+
+ // We're mapping regular grandfathered tags to non-grandfathered form here.
+ // Other tags remain unchanged.
+ //
+ // regular = "art-lojban"
+ // / "cel-gaulish"
+ // / "no-bok"
+ // / "no-nyn"
+ // / "zh-guoyu"
+ // / "zh-hakka"
+ // / "zh-min"
+ // / "zh-min-nan"
+ // / "zh-xiang"
+ //
+ // Therefore we can quickly exclude most tags by checking every
+ // |unicode_locale_id| subcomponent for characteristics not shared by any of
+ // the regular grandfathered (RG) tags:
+ //
+ // * Real-world |unicode_language_subtag|s are all two or three letters,
+ // so don't waste time running a useless |language.length > 3| fast-path.
+ // * No RG tag has a "script"-looking component.
+ // * No RG tag has a "region"-looking component.
+ // * The RG tags that match |unicode_locale_id| (art-lojban, cel-gaulish,
+ // zh-guoyu, zh-hakka, zh-xiang) have exactly one "variant". (no-bok,
+ // no-nyn, zh-min, and zh-min-nan require BCP47's extlang subtag
+ // that |unicode_locale_id| doesn't support.)
+ // * No RG tag contains |extensions| or |pu_extensions|.
+ if (tag.script !== undefined ||
+ tag.region !== undefined ||
+ tag.variants.length !== 1 ||
+ tag.extensions.length !== 0 ||
+ tag.privateuse !== undefined)
+ {
+ return;
+ }""")
+
+ # From Unicode BCP 47 locale identifier <https://unicode.org/reports/tr35/>.
+ #
+ # Doesn't allow any 'extensions' subtags.
+ re_unicode_locale_id = re.compile(
+ r"""
+ ^
+ # unicode_language_id = unicode_language_subtag
+ # unicode_language_subtag = alpha{2,3} | alpha{5,8}
+ (?P<language>[a-z]{2,3}|[a-z]{5,8})
+
+ # (sep unicode_script_subtag)?
+ # unicode_script_subtag = alpha{4}
+ (?:-(?P<script>[a-z]{4}))?
+
+ # (sep unicode_region_subtag)?
+ # unicode_region_subtag = (alpha{2} | digit{3})
+ (?:-(?P<region>([a-z]{2}|[0-9]{3})))?
+
+ # (sep unicode_variant_subtag)*
+ # unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3})
+ (?P<variants>(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+)?
+
+ # pu_extensions?
+ # pu_extensions = sep [xX] (sep alphanum{1,8})+
+ (?:-(?P<privateuse>x(-[a-z0-9]{1,8})+))?
+ $
+ """, re.IGNORECASE | re.VERBOSE)
+
+ is_first = True
+
+ for (tag, modern) in sorted(grandfathered_mappings.items(), key=itemgetter(0)):
+ tag_match = re_unicode_locale_id.match(tag)
+ assert tag_match is not None
+
+ tag_language = tag_match.group("language")
+ assert tag_match.group("script") is None, (
+ "{} does not contain a script subtag".format(tag))
+ assert tag_match.group("region") is None, (
+ "{} does not contain a region subtag".format(tag))
+ tag_variants = tag_match.group("variants")
+ assert tag_variants is not None, (
+ "{} contains a variant subtag".format(tag))
+ assert tag_match.group("privateuse") is None, (
+ "{} does not contain a privateuse subtag".format(tag))
+
+ tag_variant = tag_variants[1:]
+ assert "-" not in tag_variant, (
+ "{} contains only a single variant".format(tag))
+
+ modern_match = re_unicode_locale_id.match(modern)
+ assert modern_match is not None
+
+ modern_language = modern_match.group("language")
+ modern_script = modern_match.group("script")
+ modern_region = modern_match.group("region")
+ modern_variants = modern_match.group("variants")
+ modern_privateuse = modern_match.group("privateuse")
+
+ println(u"""
+ // {} -> {}
+""".format(tag, modern).rstrip())
+
+ println(u"""
+ {}if (tag.language === "{}" && tag.variants[0] === "{}") {{
+ """.format("" if is_first else "else ", tag_language, tag_variant).rstrip().strip("\n"))
+
+ is_first = False
+
+ println(u"""
+ tag.language = "{}";
+ """.format(modern_language).rstrip().strip("\n"))
+
+ if modern_script is not None:
+ println(u"""
+ tag.script = "{}";
+ """.format(modern_script).rstrip().strip("\n"))
+
+ if modern_region is not None:
+ println(u"""
+ tag.region = "{}";
+ """.format(modern_region).rstrip().strip("\n"))
+
+ if modern_variants is not None:
+ println(u"""
+ tag.variants = {};
+ """.format(sorted(modern_variants[1:].split("-"))).rstrip().strip("\n"))
+ else:
+ println(u"""
+ tag.variants.length = 0;
+ """.rstrip().strip("\n"))
+
+ if modern_privateuse is not None:
+ println(u"""
+ tag.privateuse = "{}";
+ """.format(modern_privateuse).rstrip().strip("\n"))
+
+ println(u"""
+ }""".rstrip().strip("\n"))
+ println(u"""
+}""".lstrip("\n"))
-def readRegistry(registry):
- """ Reads IANA Language Subtag Registry and extracts information for Intl.js.
+
+@contextmanager
+def TemporaryDirectory():
+ tmpDir = tempfile.mkdtemp()
+ try:
+ yield tmpDir
+ finally:
+ shutil.rmtree(tmpDir)
+
+
+def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, likely_subtags_file):
+ """ Reads CLDR Supplemental Data and extracts information for Intl.js.
Information extracted:
- - langTagMappings: mappings from complete language tags to preferred
+ - grandfatheredMappings: mappings from grandfathered tags to preferred
complete language tags
- - langSubtagMappings: mappings from subtags to preferred subtags
- - extlangMappings: mappings from extlang subtags to preferred subtags,
- with prefix to be removed
- Returns these three mappings as dictionaries, along with the registry's
- file date.
-
- We also check that mappings for language subtags don't affect extlang
- subtags and vice versa, so that CanonicalizeLanguageTag doesn't have
- to separate them for processing. Region codes are separated by case,
- and script codes by length, so they're unproblematic.
+ - languageMappings: mappings from language subtags to preferred subtags
+ - complexLanguageMappings: mappings from language subtags with complex rules
+ - regionMappings: mappings from region subtags to preferred subtags
+ - complexRegionMappings: mappings from region subtags with complex rules
+ Returns these five mappings as dictionaries.
"""
- langTagMappings = {}
- langSubtagMappings = {}
- extlangMappings = {}
- languageSubtags = set()
- extlangSubtags = set()
-
- for record in readRegistryRecord(registry):
- if "File-Date" in record:
- fileDate = record["File-Date"]
- continue
+ import xml.etree.ElementTree as ET
+
+ # <!ATTLIST version cldrVersion CDATA #FIXED "36" >
+ re_cldr_version = re.compile(
+ r"""<!ATTLIST version cldrVersion CDATA #FIXED "(?P<version>[\d|\.]+)" >""")
+
+ with io.open(supplemental_dtd_file, mode="r", encoding="utf-8") as f:
+ version_match = re_cldr_version.search(f.read())
+ assert version_match is not None, "CLDR version string not found"
+ cldr_version = version_match.group("version")
+
+ # From Unicode BCP 47 locale identifier <https://unicode.org/reports/tr35/>.
+ re_unicode_language_id = re.compile(
+ r"""
+ ^
+ # unicode_language_id = unicode_language_subtag
+ # unicode_language_subtag = alpha{2,3} | alpha{5,8}
+ (?P<language>[a-z]{2,3}|[a-z]{5,8})
+
+ # (sep unicode_script_subtag)?
+ # unicode_script_subtag = alpha{4}
+ (?:-(?P<script>[a-z]{4}))?
+
+ # (sep unicode_region_subtag)?
+ # unicode_region_subtag = (alpha{2} | digit{3})
+ (?:-(?P<region>([a-z]{2}|[0-9]{3})))?
+
+ # (sep unicode_variant_subtag)*
+ # unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3})
+ (?P<variants>(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+)?
+ $
+ """, re.IGNORECASE | re.VERBOSE)
+
+ re_unicode_language_subtag = re.compile(
+ r"""
+ ^
+ # unicode_language_subtag = alpha{2,3} | alpha{5,8}
+ ([a-z]{2,3}|[a-z]{5,8})
+ $
+ """, re.IGNORECASE | re.VERBOSE)
+
+ re_unicode_region_subtag = re.compile(
+ r"""
+ ^
+ # unicode_region_subtag = (alpha{2} | digit{3})
+ ([a-z]{2}|[0-9]{3})
+ $
+ """, re.IGNORECASE | re.VERBOSE)
+
+ # The fixed list of BCP 47 grandfathered language tags.
+ grandfathered_tags = (
+ "art-lojban",
+ "cel-gaulish",
+ "en-GB-oed",
+ "i-ami",
+ "i-bnn",
+ "i-default",
+ "i-enochian",
+ "i-hak",
+ "i-klingon",
+ "i-lux",
+ "i-mingo",
+ "i-navajo",
+ "i-pwn",
+ "i-tao",
+ "i-tay",
+ "i-tsu",
+ "no-bok",
+ "no-nyn",
+ "sgn-BE-FR",
+ "sgn-BE-NL",
+ "sgn-CH-DE",
+ "zh-guoyu",
+ "zh-hakka",
+ "zh-min",
+ "zh-min-nan",
+ "zh-xiang",
+ )
- if record["Type"] == "grandfathered":
- # Grandfathered tags don't use standard syntax, so
- # CanonicalizeLanguageTag expects the mapping table to provide
- # the final form for all.
- # For langTagMappings, keys must be in lower case; values in
- # the case used in the registry.
- tag = record["Tag"]
- if "Preferred-Value" in record:
- langTagMappings[tag.lower()] = record["Preferred-Value"]
- else:
- langTagMappings[tag.lower()] = tag
- elif record["Type"] == "redundant":
- # For langTagMappings, keys must be in lower case; values in
- # the case used in the registry.
- if "Preferred-Value" in record:
- langTagMappings[record["Tag"].lower()] = record["Preferred-Value"]
- elif record["Type"] in ("language", "script", "region", "variant"):
- # For langSubtagMappings, keys and values must be in the case used
- # in the registry.
- subtag = record["Subtag"]
- if record["Type"] == "language":
- languageSubtags.add(subtag)
- if "Preferred-Value" in record:
- if subtag == "heploc":
- # The entry for heploc is unique in its complexity; handle
- # it as special case below.
- continue
- if "Prefix" in record:
- # This might indicate another heploc-like complex case.
- raise Exception("Please evaluate: subtag mapping with prefix value.")
- langSubtagMappings[subtag] = record["Preferred-Value"]
- elif record["Type"] == "extlang":
- # For extlangMappings, keys must be in the case used in the
- # registry; values are records with the preferred value and the
- # prefix to be removed.
- subtag = record["Subtag"]
- extlangSubtags.add(subtag)
- if "Preferred-Value" in record:
- preferred = record["Preferred-Value"]
- prefix = record["Prefix"]
- extlangMappings[subtag] = {"preferred": preferred, "prefix": prefix}
- else:
- # No other types are allowed by
- # https://tools.ietf.org/html/rfc5646#section-3.1.3
- assert False, "Unrecognized Type: {0}".format(record["Type"])
+ # The list of grandfathered tags which are valid Unicode BCP 47 locale identifiers.
+ unicode_bcp47_grandfathered_tags = {tag for tag in grandfathered_tags
+ if re_unicode_language_id.match(tag)}
- # Check that mappings for language subtags and extlang subtags don't affect
- # each other.
- for lang in languageSubtags:
- if lang in extlangMappings and extlangMappings[lang]["preferred"] != lang:
- raise Exception("Conflict: lang with extlang mapping: " + lang)
- for extlang in extlangSubtags:
- if extlang in langSubtagMappings:
- raise Exception("Conflict: extlang with lang mapping: " + extlang)
+ # Dictionary of simple language subtag mappings, e.g. "in" -> "id".
+ language_mappings = {}
- # Special case for heploc.
- langTagMappings["ja-latn-hepburn-heploc"] = "ja-Latn-alalc97"
+ # Dictionary of complex language subtag mappings, modifying more than one
+ # subtag, e.g. "sh" -> ("sr", "Latn", None) and "cnr" -> ("sr", None, "ME").
+ complex_language_mappings = {}
- # ValidateAndCanonicalizeLanguageTag in Intl.js expects langTagMappings
- # contains no 2*3ALPHA.
- assert all(len(lang) > 3 for lang in langTagMappings.iterkeys())
+ # Dictionary of simple region subtag mappings, e.g. "DD" -> "DE".
+ region_mappings = {}
- return {"fileDate": fileDate,
- "langTagMappings": langTagMappings,
- "langSubtagMappings": langSubtagMappings,
- "extlangMappings": extlangMappings}
+ # Dictionary of complex region subtag mappings, containing more than one
+ # replacement, e.g. "SU" -> ("RU", ["AM",complex_region_mappings[type] = replacements "AZ", "BY", ...]).
+ complex_region_mappings = {}
+ # Dictionary of grandfathered mappings to preferred values.
+ grandfathered_mappings = {}
-def writeMappingsVar(intlData, dict, name, description, fileDate, url):
- """ Writes a variable definition with a mapping table to file intlData.
+ # CLDR uses "_" as the separator for some elements. Replace it with "-".
+ def bcp47_id(cldr_id):
+ return cldr_id.replace("_", "-")
- Writes the contents of dictionary dict to file intlData with the given
- variable name and a comment with description, fileDate, and URL.
- """
- intlData.write("\n")
- intlData.write("// {0}.\n".format(description))
- intlData.write("// Derived from IANA Language Subtag Registry, file date {0}.\n".format(fileDate))
- intlData.write("// {0}\n".format(url))
- intlData.write("var {0} = {{\n".format(name))
- keys = sorted(dict)
- for key in keys:
- if isinstance(dict[key], basestring):
- value = '"{0}"'.format(dict[key])
+ # CLDR uses the canonical case for most entries, but there are some
+ # exceptions, like:
+ # <languageAlias type="drw" replacement="fa_af" reason="deprecated"/>
+ # Therefore canonicalize all tags to be on the safe side.
+ def bcp47_canonical(language, script, region):
+ # Canonical case for language subtags is lower case.
+ # Canonical case for script subtags is title case.
+ # Canonical case for region subtags is upper case.
+ return (language.lower() if language else None,
+ script.title() if script else None,
+ region.upper() if region else None)
+
+ tree = ET.parse(supplemental_metadata_file)
+
+ for language_alias in tree.iterfind(".//languageAlias"):
+ type = bcp47_id(language_alias.get("type"))
+ replacement = bcp47_id(language_alias.get("replacement"))
+
+ # Handle grandfathered mappings first.
+ if type in unicode_bcp47_grandfathered_tags:
+ grandfathered_mappings[type] = replacement
+ continue
+
+ # We're only interested in language subtag matches, so ignore any
+ # entries which have additional subtags.
+ if re_unicode_language_subtag.match(type) is None:
+ continue
+
+ if re_unicode_language_subtag.match(replacement) is not None:
+ # Canonical case for language subtags is lower-case.
+ language_mappings[type] = replacement.lower()
+ else:
+ replacement_match = re_unicode_language_id.match(replacement)
+ assert replacement_match is not None, (
+ "{} invalid Unicode BCP 47 locale identifier".format(replacement))
+ assert replacement_match.group("variants") is None, (
+ "{}: unexpected variant subtags in {}".format(type, replacement))
+
+ complex_language_mappings[type] = bcp47_canonical(replacement_match.group("language"),
+ replacement_match.group("script"),
+ replacement_match.group("region"))
+
+ for territory_alias in tree.iterfind(".//territoryAlias"):
+ type = territory_alias.get("type")
+ replacement = territory_alias.get("replacement")
+
+ # We're only interested in region subtag matches, so ignore any entries
+ # which contain legacy formats, e.g. three letter region codes.
+ if re_unicode_region_subtag.match(type) is None:
+ continue
+
+ if re_unicode_region_subtag.match(replacement) is not None:
+ # Canonical case for region subtags is upper-case.
+ region_mappings[type] = replacement.upper()
else:
- preferred = dict[key]["preferred"]
- prefix = dict[key]["prefix"]
- value = '{{preferred: "{0}", prefix: "{1}"}}'.format(preferred, prefix)
- intlData.write(' "{0}": {1},\n'.format(key, value))
- intlData.write("};\n")
+ # Canonical case for region subtags is upper-case.
+ replacements = [r.upper() for r in replacement.split(" ")]
+ assert all(
+ re_unicode_region_subtag.match(loc) is not None for loc in replacements
+ ), "{} invalid region subtags".format(replacement)
+ complex_region_mappings[type] = replacements
+
+ tree = ET.parse(likely_subtags_file)
+
+ likely_subtags = {}
+
+ for likely_subtag in tree.iterfind(".//likelySubtag"):
+ from_tag = bcp47_id(likely_subtag.get("from"))
+ from_match = re_unicode_language_id.match(from_tag)
+ assert from_match is not None, (
+ "{} invalid Unicode BCP 47 locale identifier".format(from_tag))
+ assert from_match.group("variants") is None, (
+ "unexpected variant subtags in {}".format(from_tag))
+
+ to_tag = bcp47_id(likely_subtag.get("to"))
+ to_match = re_unicode_language_id.match(to_tag)
+ assert to_match is not None, (
+ "{} invalid Unicode BCP 47 locale identifier".format(to_tag))
+ assert to_match.group("variants") is None, (
+ "unexpected variant subtags in {}".format(to_tag))
+
+ from_canonical = bcp47_canonical(from_match.group("language"),
+ from_match.group("script"),
+ from_match.group("region"))
+
+ to_canonical = bcp47_canonical(to_match.group("language"),
+ to_match.group("script"),
+ to_match.group("region"))
+
+ likely_subtags[from_canonical] = to_canonical
+
+ complex_region_mappings_final = {}
+
+ for (deprecated_region, replacements) in complex_region_mappings.items():
+ # Find all likely subtag entries which don't already contain a region
+ # subtag and whose target region is in the list of replacement regions.
+ region_likely_subtags = [(from_language, from_script, to_region)
+ for ((from_language, from_script, from_region),
+ (_, _, to_region)) in likely_subtags.items()
+ if from_region is None and to_region in replacements]
+
+ # The first replacement entry is the default region.
+ default = replacements[0]
+
+ # Find all likely subtag entries whose region matches the default region.
+ default_replacements = {(language, script)
+ for (language, script, region) in region_likely_subtags
+ if region == default}
+
+ # And finally find those entries which don't use the default region.
+ # These are the entries we're actually interested in, because those need
+ # to be handled specially when selecting the correct preferred region.
+ non_default_replacements = [(language, script, region)
+ for (language, script, region) in region_likely_subtags
+ if (language, script) not in default_replacements]
+
+ # If there are no non-default replacements, we can handle the region as
+ # part of the simple region mapping.
+ if non_default_replacements:
+ complex_region_mappings_final[deprecated_region] = (default, non_default_replacements)
+ else:
+ region_mappings[deprecated_region] = default
+ return {"version": cldr_version,
+ "grandfatheredMappings": grandfathered_mappings,
+ "languageMappings": language_mappings,
+ "complexLanguageMappings": complex_language_mappings,
+ "regionMappings": region_mappings,
+ "complexRegionMappings": complex_region_mappings_final,
+ }
-def writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings):
+def writeCLDRLanguageTagData(println, data, url):
""" Writes the language tag data to the Intl data file. """
- writeMappingsVar(intlData, langTagMappings, "langTagMappings",
- "Mappings from complete tags to preferred values", fileDate, url)
- writeMappingsVar(intlData, langSubtagMappings, "langSubtagMappings",
- "Mappings from non-extlang subtags to preferred values", fileDate, url)
- writeMappingsVar(intlData, extlangMappings, "extlangMappings",
- "Mappings from extlang subtags to preferred values", fileDate, url)
-
-def updateLangTags(args):
- """ Update the LangTagMappingsGenerated.js file. """
+
+ source = u"CLDR Supplemental Data, version {}".format(data["version"])
+ grandfathered_mappings = data["grandfatheredMappings"]
+ language_mappings = data["languageMappings"]
+ complex_language_mappings = data["complexLanguageMappings"]
+ region_mappings = data["regionMappings"]
+ complex_region_mappings = data["complexRegionMappings"]
+
+ writeMappingsVar(println, grandfathered_mappings, "grandfatheredMappings",
+ "Mappings from grandfathered tags to preferred values.", source, url)
+ writeMappingsVar(println, language_mappings, "languageMappings",
+ "Mappings from language subtags to preferred values.", source, url)
+ writeMappingsVar(println, {key: True for key in complex_language_mappings},
+ "complexLanguageMappings",
+ "Language subtags with complex mappings.", source, url)
+ writeMappingsVar(println, region_mappings, "regionMappings",
+ "Mappings from region subtags to preferred values.", source, url)
+ writeMappingsVar(println, {key: True for key in complex_region_mappings},
+ "complexRegionMappings",
+ "Region subtags with complex mappings.", source, url)
+
+ writeUpdateLocaleIdMappingsFunction(println, complex_language_mappings,
+ complex_region_mappings,
+ "Canonicalize Unicode BCP 47 locale identifiers.",
+ source, url)
+ writeGrandfatheredMappingsFunction(println, grandfathered_mappings,
+ "Canonicalize grandfathered locale identifiers.",
+ source, url)
+
+
+def updateCLDRLangTags(args):
+ """ Update the LangTagMappingsCLDRGenerated.js file. """
url = args.url
+ branch = args.branch
+ revision = args.revision
out = args.out
- filename = args.file
+ files = args.files
print("Arguments:")
print("\tDownload url: %s" % url)
- print("\tLocal registry: %s" % filename)
+ print("\tBranch: %s" % branch)
+ print("\tRevision: %s" % revision)
+ print("\tLocal supplemental data and likely subtags: %s" % files)
print("\tOutput file: %s" % out)
print("")
- if filename is not None:
- print("Always make sure you have the newest language-subtag-registry.txt!")
- registry = codecs.open(filename, "r", encoding="utf-8")
+ if files:
+ if len(files) != 3:
+ raise Exception("Expected three files, but got: {}".format(files))
+
+ print(("Always make sure you have the newest ldmlSupplemental.dtd, "
+ "supplementalMetadata.xml, and likelySubtags.xml!"))
+
+ supplemental_dtd_file = files[0]
+ supplemental_metadata_file = files[1]
+ likely_subtags_file = files[2]
else:
- print("Downloading IANA Language Subtag Registry...")
- with closing(urllib2.urlopen(url)) as reader:
- text = reader.read().decode("utf-8")
- registry = codecs.open("language-subtag-registry.txt", "w+", encoding="utf-8")
- registry.write(text)
- registry.seek(0)
-
- print("Processing IANA Language Subtag Registry...")
- with closing(registry) as reg:
- data = readRegistry(reg)
- fileDate = data["fileDate"]
- langTagMappings = data["langTagMappings"]
- langSubtagMappings = data["langSubtagMappings"]
- extlangMappings = data["extlangMappings"]
+ print("Downloading CLDR supplemental data...")
+
+ supplemental_dtd_filename = "ldmlSupplemental.dtd"
+ supplemental_dtd_path = "common/dtd/{}".format(supplemental_dtd_filename)
+ supplemental_dtd_file = os.path.join(os.getcwd(), supplemental_dtd_filename)
+
+ supplemental_metadata_filename = "supplementalMetadata.xml"
+ supplemental_metadata_path = "common/supplemental/{}".format(
+ supplemental_metadata_filename)
+ supplemental_metadata_file = os.path.join(os.getcwd(), supplemental_metadata_filename)
+
+ likely_subtags_filename = "likelySubtags.xml"
+ likely_subtags_path = "common/supplemental/{}".format(likely_subtags_filename)
+ likely_subtags_file = os.path.join(os.getcwd(), likely_subtags_filename)
+
+ # Try to download the raw file directly from GitHub if possible.
+ split = urlsplit(url)
+ if split.netloc == "github.com" and split.path.endswith(".git") and revision == "HEAD":
+ def download(path, file):
+ urlpath = "{}/raw/{}/{}".format(urlsplit(url).path[:-4], branch, path)
+ raw_url = urlunsplit((split.scheme, split.netloc, urlpath, split.query,
+ split.fragment))
+
+ with closing(urllib2.urlopen(raw_url)) as reader:
+ text = reader.read().decode("utf-8")
+ with io.open(file, "w", encoding="utf-8") as saved_file:
+ saved_file.write(text)
+
+ download(supplemental_dtd_path, supplemental_dtd_file)
+ download(supplemental_metadata_path, supplemental_metadata_file)
+ download(likely_subtags_path, likely_subtags_file)
+ else:
+ # Download the requested branch in a temporary directory.
+ with TemporaryDirectory() as inDir:
+ if revision == "HEAD":
+ subprocess.check_call(["git", "clone", "--depth=1",
+ "--branch=%s" % branch, url, inDir])
+ else:
+ subprocess.check_call(["git", "clone", "--single-branch",
+ "--branch=%s" % branch, url, inDir])
+ subprocess.check_call(["git", "-C", inDir, "reset", "--hard", revision])
+
+ shutil.copyfile(os.path.join(inDir, supplemental_dtd_path),
+ supplemental_dtd_file)
+ shutil.copyfile(os.path.join(inDir, supplemental_metadata_path),
+ supplemental_metadata_file)
+ shutil.copyfile(os.path.join(inDir, likely_subtags_path), likely_subtags_file)
+
+ print("Processing CLDR supplemental data...")
+ data = readSupplementalData(supplemental_dtd_file,
+ supplemental_metadata_file,
+ likely_subtags_file)
print("Writing Intl data...")
- with codecs.open(out, "w", encoding="utf-8") as intlData:
- intlData.write("// Generated by make_intl_data.py. DO NOT EDIT.\n")
- writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings)
+ with io.open(out, mode="w", encoding="utf-8", newline="") as f:
+ println = partial(print, file=f)
+
+ println(u"// Generated by make_intl_data.py. DO NOT EDIT.")
+ writeCLDRLanguageTagData(println, data, url)
+
def flines(filepath, encoding="utf-8"):
""" Open filepath and iterate over its content. """
@@ -707,11 +1214,11 @@ def processTimeZones(tzdataDir, icuDir, icuTzDir, version, ignoreBackzone, ignor
println(u"// Format:")
println(u'// "LinkName", "Target" // ICU-Target [time zone file]')
- println(u"struct LinkAndTarget");
- println(u"{");
- println(u" const char* const link;");
- println(u" const char* const target;");
- println(u"};");
+ println(u"struct LinkAndTarget")
+ println(u"{")
+ println(u" const char* const link;")
+ println(u" const char* const target;")
+ println(u"};")
println(u"")
println(u"const LinkAndTarget ianaLinksCanonicalizedDifferentlyByICU[] = {")
for (zone, target, icuTarget) in incorrectLinks:
@@ -932,7 +1439,7 @@ def updateTzdata(topsrcdir, args):
if tzDir is None:
print("Downloading tzdata file...")
with closing(urllib2.urlopen(url)) as tzfile:
- fname = urlparse.urlsplit(tzfile.geturl()).path.split("/")[-1]
+ fname = urlsplit(tzfile.geturl()).path.split("/")[-1]
with tempfile.NamedTemporaryFile(suffix=fname) as tztmpfile:
print("File stored in %s" % tztmpfile.name)
tztmpfile.write(tzfile.read())
@@ -959,20 +1466,24 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Update intl data.")
subparsers = parser.add_subparsers(help="Select update mode")
- parser_tags = subparsers.add_parser("langtags",
- help="Update language-subtag-registry")
- parser_tags.add_argument("--url",
- metavar="URL",
- default="https://www.iana.org/assignments/language-subtag-registry",
- type=EnsureHttps,
- help="Download url for language-subtag-registry.txt (default: %(default)s)")
- parser_tags.add_argument("--out",
- default="LangTagMappingsGenerated.js",
- help="Output file (default: %(default)s)")
- parser_tags.add_argument("file",
- nargs="?",
- help="Local language-subtag-registry.txt file, if omitted uses <URL>")
- parser_tags.set_defaults(func=updateLangTags)
+ parser_cldr_tags = subparsers.add_parser("langtags",
+ help="Update CLDR language tags data")
+ parser_cldr_tags.add_argument("--url",
+ metavar="URL",
+ default="https://github.com/unicode-org/cldr.git",
+ help="URL to git repository (default: %(default)s)")
+ parser_cldr_tags.add_argument("--branch", default="latest",
+ help="Git branch (default: %(default)s)")
+ parser_cldr_tags.add_argument("--revision", default="HEAD",
+ help="Git revision (default: %(default)s)")
+ parser_cldr_tags.add_argument("--out",
+ default="LangTagMappingsGenerated.js",
+ help="Output file (default: %(default)s)")
+ parser_cldr_tags.add_argument("files",
+ nargs="*",
+ help="Local ldmlSupplemental.dtd, supplementalMetadata.xml, "
+ "and likelySubtags.xml files, if omitted uses <URL>")
+ parser_cldr_tags.set_defaults(func=updateCLDRLangTags)
parser_tz = subparsers.add_parser("tzdata", help="Update tzdata")
parser_tz.add_argument("--tz",