Issue #2259 - process Unicode langtags and locale identifiers according to BCP 47

Major spec change: text references are to BCP47 (not the implementing RFCs) and the single source of truth is now Unicode CLDR. - Switch from IANA to CLDR for make_unicode - Update grandfathered tag handling directly in the parser - Don't support extlang, irregular, privateuse or 4-letter subtags - Adjust comments to refer to Unicode BCP 47 locale identifiers, remove RFC 5646 - Canonicalize/order langtags correctly - Tokenize BCP47 in reusable class Based-on: m-c 1407674(partial), 1451082, 1530320, 1522070, 1531091
author: Martok <martok@martoks-place.de> 2023-06-29 23:07:20 +0200
committer: Martok <martok@martoks-place.de> 2023-06-30 00:01:34 +0200
commit: 2f940bdc9dcbfe83e17ed26c5d1af7fe874c24ac (patch)
tree: 2519366eb8057e265339261ab651a8cb5653a703 /js/src/builtin
parent: 6808e659ad137ac63466aad93e406efbf091c077 (diff)
download: uxp-2f940bdc9dcbfe83e17ed26c5d1af7fe874c24ac.tar.gz
11 files changed, 3331 insertions, 1319 deletions
diff --git a/js/src/builtin/RegExp.cpp b/js/src/builtin/RegExp.cpp
index 46a2862909..f3d34762f6 100644
--- a/js/src/builtin/RegExp.cpp
+++ b/js/src/builtin/RegExp.cpp
@@ -974,8 +974,7 @@ IsTrailSurrogateWithLeadSurrogate(JSContext* cx, HandleLinearString input, int32
  */
 static RegExpRunStatus
 ExecuteRegExp(JSContext* cx, HandleObject regexp, HandleString string,
-              int32_t lastIndex,
-              MatchPairs* matches, size_t* endIndex, RegExpStaticsUpdate staticsUpdate)
+              int32_t lastIndex, MatchPairs* matches, size_t* endIndex)
 {
     /*
      * WARNING: Despite the presence of spec step comment numbers, this
@@ -990,14 +989,9 @@ ExecuteRegExp(JSContext* cx, HandleObject regexp, HandleString string,
     if (!RegExpObject::getShared(cx, reobj, &re))
         return RegExpRunStatus_Error;
 
-    RegExpStatics* res;
-    if (staticsUpdate == UpdateRegExpStatics) {
-        res = GlobalObject::getRegExpStatics(cx, cx->global());
-        if (!res)
-            return RegExpRunStatus_Error;
-    } else {
-        res = nullptr;
-    }
+    RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
+    if (!res)
+        return RegExpRunStatus_Error;
 
     RootedLinearString input(cx, string->ensureLinear(cx));
     if (!input)
@@ -1051,15 +1045,14 @@ ExecuteRegExp(JSContext* cx, HandleObject regexp, HandleString string,
  * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
  */
 static bool
-RegExpMatcherImpl(JSContext* cx, HandleObject regexp, HandleString string,
-                  int32_t lastIndex, RegExpStaticsUpdate staticsUpdate, MutableHandleValue rval)
+RegExpMatcherImpl(JSContext* cx, HandleObject regexp, HandleString string, int32_t lastIndex,
+                  MutableHandleValue rval)
 {
     /* Execute regular expression and gather matches. */
     ScopedMatchPairs matches(&cx->tempLifoAlloc());
 
     /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
-    RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex,
-                                           &matches, nullptr, staticsUpdate);
+    RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex, &matches, nullptr);
     if (status == RegExpRunStatus_Error)
         return false;
 
@@ -1099,8 +1092,7 @@ js::RegExpMatcher(JSContext* cx, unsigned argc, Value* vp)
         return false;
 
     /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
-    return RegExpMatcherImpl(cx, regexp, string, lastIndex,
-                             UpdateRegExpStatics, args.rval());
+    return RegExpMatcherImpl(cx, regexp, string, lastIndex, args.rval());
 }
 
 /*
@@ -1123,8 +1115,7 @@ js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp, HandleString input,
           return false;
       return CreateRegExpMatchResult(cx, *shared, input, *maybeMatches, output);
     }
-    return RegExpMatcherImpl(cx, regexp, input, lastIndex,
-                             UpdateRegExpStatics, output);
+    return RegExpMatcherImpl(cx, regexp, input, lastIndex, output);
 }
 
 /*
@@ -1135,14 +1126,13 @@ js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp, HandleString input,
  */
 static bool
 RegExpSearcherImpl(JSContext* cx, HandleObject regexp, HandleString string,
-                   int32_t lastIndex, RegExpStaticsUpdate staticsUpdate, int32_t* result)
+                   int32_t lastIndex, int32_t* result)
 {
     /* Execute regular expression and gather matches. */
     ScopedMatchPairs matches(&cx->tempLifoAlloc());
 
     /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
-    RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex,
-                                           &matches, nullptr, staticsUpdate);
+    RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex, &matches, nullptr);
     if (status == RegExpRunStatus_Error)
         return false;
 
@@ -1180,7 +1170,7 @@ js::RegExpSearcher(JSContext* cx, unsigned argc, Value* vp)
 
     /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
     int32_t result = 0;
-    if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, UpdateRegExpStatics, &result))
+    if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, &result))
         return false;
 
     args.rval().setInt32(result);
@@ -1203,23 +1193,7 @@ js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp, HandleString input,
         *result = CreateRegExpSearchResult(cx, *maybeMatches);
         return true;
     }
-    return RegExpSearcherImpl(cx, regexp, input, lastIndex,
-                              UpdateRegExpStatics, result);
-}
-
-bool
-js::regexp_exec_no_statics(JSContext* cx, unsigned argc, Value* vp)
-{
-    CallArgs args = CallArgsFromVp(argc, vp);
-    MOZ_ASSERT(args.length() == 2);
-    MOZ_ASSERT(IsRegExpObject(args[0]));
-    MOZ_ASSERT(args[1].isString());
-
-    RootedObject regexp(cx, &args[0].toObject());
-    RootedString string(cx, args[1].toString());
-
-    return RegExpMatcherImpl(cx, regexp, string, 0,
-                             DontUpdateRegExpStatics, args.rval());
+    return RegExpSearcherImpl(cx, regexp, input, lastIndex, result);
 }
 
 /*
@@ -1245,8 +1219,7 @@ js::RegExpTester(JSContext* cx, unsigned argc, Value* vp)
 
     /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
     size_t endIndex = 0;
-    RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex,
-                                           nullptr, &endIndex, UpdateRegExpStatics);
+    RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex, nullptr, &endIndex);
 
     if (status == RegExpRunStatus_Error)
         return false;
@@ -1271,8 +1244,7 @@ js::RegExpTesterRaw(JSContext* cx, HandleObject regexp, HandleString input,
     MOZ_ASSERT(lastIndex >= 0);
 
     size_t endIndexTmp = 0;
-    RegExpRunStatus status = ExecuteRegExp(cx, regexp, input, lastIndex,
-                                           nullptr, &endIndexTmp, UpdateRegExpStatics);
+    RegExpRunStatus status = ExecuteRegExp(cx, regexp, input, lastIndex, nullptr, &endIndexTmp);
 
     if (status == RegExpRunStatus_Success) {
         MOZ_ASSERT(endIndexTmp <= INT32_MAX);
@@ -1287,24 +1259,6 @@ js::RegExpTesterRaw(JSContext* cx, HandleObject regexp, HandleString input,
     return false;
 }
 
-bool
-js::regexp_test_no_statics(JSContext* cx, unsigned argc, Value* vp)
-{
-    CallArgs args = CallArgsFromVp(argc, vp);
-    MOZ_ASSERT(args.length() == 2);
-    MOZ_ASSERT(IsRegExpObject(args[0]));
-    MOZ_ASSERT(args[1].isString());
-
-    RootedObject regexp(cx, &args[0].toObject());
-    RootedString string(cx, args[1].toString());
-
-    size_t ignored = 0;
-    RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, 0,
-                                           nullptr, &ignored, DontUpdateRegExpStatics);
-    args.rval().setBoolean(status == RegExpRunStatus_Success);
-    return status != RegExpRunStatus_Error;
-}
-
 static void
 GetParen(JSLinearString* matched, const JS::Value& capture, JSSubString* out)
 {
diff --git a/js/src/builtin/RegExp.h b/js/src/builtin/RegExp.h
index f66c9b1b81..c0a7d59f77 100644
--- a/js/src/builtin/RegExp.h
+++ b/js/src/builtin/RegExp.h
@@ -18,10 +18,6 @@ namespace js {
 JSObject*
 InitRegExpClass(JSContext* cx, HandleObject obj);
 
-// Whether RegExp statics should be updated with the input and results of a
-// regular expression execution.
-enum RegExpStaticsUpdate { UpdateRegExpStatics, DontUpdateRegExpStatics };
-
 /*
  * Legacy behavior of ExecuteRegExp(), which is baked into the JSAPI.
  *
@@ -72,22 +68,6 @@ intrinsic_GetStringDataProperty(JSContext* cx, unsigned argc, Value* vp);
  */
 
 /*
- * Behaves like regexp.exec(string), but doesn't set RegExp statics.
- *
- * Usage: match = regexp_exec_no_statics(regexp, string)
- */
-extern MOZ_MUST_USE bool
-regexp_exec_no_statics(JSContext* cx, unsigned argc, Value* vp);
-
-/*
- * Behaves like regexp.test(string), but doesn't set RegExp statics.
- *
- * Usage: does_match = regexp_test_no_statics(regexp, string)
- */
-extern MOZ_MUST_USE bool
-regexp_test_no_statics(JSContext* cx, unsigned argc, Value* vp);
-
-/*
  * Behaves like RegExp(pattern, flags).
  * |pattern| should be a RegExp object, |flags| should be a raw integer value.
  * Must be called without |new|.
diff --git a/js/src/builtin/Utilities.js b/js/src/builtin/Utilities.js
index 09c15957c6..51c5a574fd 100644
--- a/js/src/builtin/Utilities.js
+++ b/js/src/builtin/Utilities.js
@@ -80,12 +80,6 @@ MakeConstructible(Record, {});
 /********** Abstract operations defined in ECMAScript Language Specification **********/
 
 
-/* Spec: ECMAScript Language Specification, 5.1 edition, 8.12.6 and 11.8.7 */
-function HasProperty(o, p) {
-    return p in o;
-}
-
-
 /* Spec: ECMAScript Language Specification, 5.1 edition, 9.2 and 11.4.9 */
 function ToBoolean(v) {
     return !!v;
diff --git a/js/src/builtin/intl/Collator.js b/js/src/builtin/intl/Collator.js
index ee6ea9a9b8..dffadab7c5 100644
--- a/js/src/builtin/intl/Collator.js
+++ b/js/src/builtin/intl/Collator.js
@@ -6,18 +6,6 @@
 
 
 /**
- * Mapping from Unicode extension keys for collation to options properties,
- * their types and permissible values.
- *
- * Spec: ECMAScript Internationalization API Specification, 10.1.1.
- */
-var collatorKeyMappings = {
-    kn: {property: "numeric", type: "boolean"},
-    kf: {property: "caseFirst", type: "string", values: ["upper", "lower", "false"]}
-};
-
-
-/**
  * Compute an internal properties object from |lazyCollatorData|.
  */
 function resolveCollatorInternals(lazyCollatorData)
@@ -26,60 +14,49 @@ function resolveCollatorInternals(lazyCollatorData)
 
     var internalProps = std_Object_create(null);
 
-    // Step 7.
-    internalProps.usage = lazyCollatorData.usage;
-
-    // Step 8.
     var Collator = collatorInternalProperties;
 
-    // Step 9.
+    // Step 5.
+    internalProps.usage = lazyCollatorData.usage;
+
+    // Steps 6-7.
     var collatorIsSorting = lazyCollatorData.usage === "sort";
     var localeData = collatorIsSorting
                      ? Collator.sortLocaleData
                      : Collator.searchLocaleData;
 
     // Compute effective locale.
-    // Step 14.
+    // Step 16.
     var relevantExtensionKeys = Collator.relevantExtensionKeys;
 
-    // Step 15.
+    // Step 17.
     var r = ResolveLocale(callFunction(Collator.availableLocales, Collator),
                           lazyCollatorData.requestedLocales,
                           lazyCollatorData.opt,
                           relevantExtensionKeys,
                           localeData);
 
-    // Step 16.
+    // Step 18.
     internalProps.locale = r.locale;
 
-    // Steps 17-19.
-    var key, property, value, mapping;
-    var i = 0, len = relevantExtensionKeys.length;
-    while (i < len) {
-        // Step 19.a.
-        key = relevantExtensionKeys[i];
-        if (key === "co") {
-            // Step 19.b.
-            property = "collation";
-            value = r.co === null ? "default" : r.co;
-        } else {
-            // Step 19.c.
-            mapping = collatorKeyMappings[key];
-            property = mapping.property;
-            value = r[key];
-            if (mapping.type === "boolean")
-                value = value === "true";
-        }
+    // Step 19.
+    var collation = r.co;
+
+    // Step 20.
+    if (collation === null)
+        collation = "default";
 
-        // Step 19.d.
-        internalProps[property] = value;
+    // Step 21.
+    internalProps.collation = collation;
 
-        // Step 19.e.
-        i++;
-    }
+    // Step 22.
+    internalProps.numeric = r.kn === "true";
+
+    // Step 23.
+    internalProps.caseFirst = r.kf;
 
     // Compute remaining collation options.
-    // Steps 21-22.
+    // Step 25.
     var s = lazyCollatorData.rawSensitivity;
     if (s === undefined) {
         // In theory the default sensitivity for the "search" collator is
@@ -88,14 +65,13 @@ function resolveCollatorInternals(lazyCollatorData)
         // both collation modes.
         s = "variant";
     }
+
+    // Step 26.
     internalProps.sensitivity = s;
 
-    // Step 24.
+    // Step 28.
     internalProps.ignorePunctuation = lazyCollatorData.ignorePunctuation;
 
-    // Step 25.
-    internalProps.boundFormat = undefined;
-
     // The caller is responsible for associating |internalProps| with the right
     // object using |setInternalProperties|.
     return internalProps;
@@ -139,9 +115,6 @@ function InitializeCollator(collator, locales, options) {
     assert(IsObject(collator), "InitializeCollator called with non-object");
     assert(IsCollator(collator), "InitializeCollator called with non-Collator");
 
-    // Steps 1-2 (These steps are no longer required and should be removed
-    // from the spec; https://github.com/tc39/ecma402/issues/115).;
-
     // Lazy Collator data has the following structure:
     //
     //   {
@@ -162,11 +135,11 @@ function InitializeCollator(collator, locales, options) {
     // subset of them.
     var lazyCollatorData = std_Object_create(null);
 
-    // Step 3.
+    // Step 1.
     var requestedLocales = CanonicalizeLocaleList(locales);
     lazyCollatorData.requestedLocales = requestedLocales;
 
-    // Steps 4-5.
+    // Steps 2-3.
     //
     // If we ever need more speed here at startup, we should try to detect the
     // case where |options === undefined| and Object.prototype hasn't been
@@ -179,38 +152,39 @@ function InitializeCollator(collator, locales, options) {
         options = ToObject(options);
 
     // Compute options that impact interpretation of locale.
-    // Step 6.
+    // Step 4.
     var u = GetOption(options, "usage", "string", ["sort", "search"], "sort");
     lazyCollatorData.usage = u;
 
-    // Step 10.
+    // Step 8.
     var opt = new Record();
     lazyCollatorData.opt = opt;
 
-    // Steps 11-12.
+    // Steps 9-10.
     var matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit");
     opt.localeMatcher = matcher;
 
-    // Step 13, unrolled.
+    // Steps 11-13.
     var numericValue = GetOption(options, "numeric", "boolean", undefined, undefined);
     if (numericValue !== undefined)
         numericValue = numericValue ? 'true' : 'false';
     opt.kn = numericValue;
 
+    // Steps 14-15.
     var caseFirstValue = GetOption(options, "caseFirst", "string", ["upper", "lower", "false"], undefined);
     opt.kf = caseFirstValue;
 
     // Compute remaining collation options.
-    // Step 20.
+    // Step 24.
     var s = GetOption(options, "sensitivity", "string",
                       ["base", "accent", "case", "variant"], undefined);
     lazyCollatorData.rawSensitivity = s;
 
-    // Step 23.
+    // Step 27.
     var ip = GetOption(options, "ignorePunctuation", "boolean", undefined, false);
     lazyCollatorData.ignorePunctuation = ip;
 
-    // Step 26.
+    // Step 29.
     //
     // We've done everything that must be done now: mark the lazy data as fully
     // computed and install it.
@@ -228,9 +202,14 @@ function InitializeCollator(collator, locales, options) {
 function Intl_Collator_supportedLocalesOf(locales /*, options*/) {
     var options = arguments.length > 1 ? arguments[1] : undefined;
 
+    // Step 1.
     var availableLocales = callFunction(collatorInternalProperties.availableLocales,
                                         collatorInternalProperties);
+
+    // Step 2.
     var requestedLocales = CanonicalizeLocaleList(locales);
+
+    // Step 3.
     return SupportedLocales(availableLocales, requestedLocales, options);
 }
 
@@ -353,9 +332,9 @@ function collatorSearchLocaleData() {
 
 
 /**
- * Function to be bound and returned by Intl.Collator.prototype.format.
+ * Function to be bound and returned by Intl.Collator.prototype.compare.
  *
- * Spec: ECMAScript Internationalization API Specification, 12.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 10.3.3.1.
  */
 function collatorCompareToBind(x, y) {
     // Steps 1.a.i-ii implemented by ECMAScript declaration binding instantiation,
@@ -375,26 +354,28 @@ function collatorCompareToBind(x, y) {
  * than 0 if x > y according to the sort order for the locale and collation
  * options of this Collator object.
  *
- * Spec: ECMAScript Internationalization API Specification, 10.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 10.3.3.
  */
 function Intl_Collator_compare_get() {
-    // Check "this Collator object" per introduction of section 10.3.
-    if (!IsObject(this) || !IsCollator(this))
+    // Step 1.
+    var collator = this;
+
+    // Steps 2-3.
+    if (!IsObject(collator) || !IsCollator(collator))
         ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "Collator", "compare", "Collator");
 
-    var internals = getCollatorInternals(this);
+    var internals = getCollatorInternals(collator);
 
-    // Step 1.
+    // Step 4.
     if (internals.boundCompare === undefined) {
-        // Step 1.a.
-        var F = collatorCompareToBind;
+        // Steps 4.a-b.
+        var F = callFunction(FunctionBind, collatorCompareToBind, collator);
 
-        // Steps 1.b-d.
-        var bc = callFunction(FunctionBind, F, this);
-        internals.boundCompare = bc;
+        // Step 4.c.
+        internals.boundCompare = F;
     }
 
-    // Step 2.
+    // Step 5.
     return internals.boundCompare;
 }
 _SetCanonicalName(Intl_Collator_compare_get, "get compare");
@@ -403,28 +384,30 @@ _SetCanonicalName(Intl_Collator_compare_get, "get compare");
 /**
  * Returns the resolved options for a Collator object.
  *
- * Spec: ECMAScript Internationalization API Specification, 10.3.3 and 10.4.
+ * Spec: ECMAScript Internationalization API Specification, 10.3.4.
  */
 function Intl_Collator_resolvedOptions() {
-    // Check "this Collator object" per introduction of section 10.3.
-    if (!IsObject(this) || !IsCollator(this))
+    // Step 1.
+    var collator = this;
+
+    // Steps 2-3.
+    if (!IsObject(collator) || !IsCollator(collator))
         ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "Collator", "resolvedOptions", "Collator");
 
-    var internals = getCollatorInternals(this);
+    var internals = getCollatorInternals(collator);
 
+    // Steps 4-5.
     var result = {
         locale: internals.locale,
         usage: internals.usage,
         sensitivity: internals.sensitivity,
-        ignorePunctuation: internals.ignorePunctuation
+        ignorePunctuation: internals.ignorePunctuation,
+        collation: internals.collation,
+        numeric: internals.numeric,
+        caseFirst: internals.caseFirst,
     };
 
-    var relevantExtensionKeys = collatorInternalProperties.relevantExtensionKeys;
-    for (var i = 0; i < relevantExtensionKeys.length; i++) {
-        var key = relevantExtensionKeys[i];
-        var property = (key === "co") ? "collation" : collatorKeyMappings[key].property;
-        _DefineDataProperty(result, property, internals[property]);
-    }
+    // Step 6.
     return result;
 }
 
diff --git a/js/src/builtin/intl/CommonFunctions.js b/js/src/builtin/intl/CommonFunctions.js
index cf5a615721..36b2bec9b2 100644
--- a/js/src/builtin/intl/CommonFunctions.js
+++ b/js/src/builtin/intl/CommonFunctions.js
@@ -14,35 +14,70 @@ function hasOwn(propName, object) {
 }
 
 /**
- * Holder object for encapsulating regexp instances.
- *
- * Regular expression instances should be created after the initialization of
- * self-hosted global.
- */
-var internalIntlRegExps = std_Object_create(null);
-internalIntlRegExps.unicodeLocaleExtensionSequenceRE = null;
-internalIntlRegExps.languageTagRE = null;
-internalIntlRegExps.duplicateVariantRE = null;
-internalIntlRegExps.duplicateSingletonRE = null;
-internalIntlRegExps.isWellFormedCurrencyCodeRE = null;
-internalIntlRegExps.currencyDigitsRE = null;
-
-/**
- * Regular expression matching a "Unicode locale extension sequence", which the
+ * Returns the start index of a "Unicode locale extension sequence", which the
  * specification defines as: "any substring of a language tag that starts with
  * a separator '-' and the singleton 'u' and includes the maximum sequence of
  * following non-singleton subtags and their preceding '-' separators."
  *
  * Alternatively, this may be defined as: the components of a language tag that
- * match the extension production in RFC 5646, where the singleton component is
- * "u".
+ * match the `unicode_locale_extensions` production in UTS 35.
  *
  * Spec: ECMAScript Internationalization API Specification, 6.2.1.
  */
-function getUnicodeLocaleExtensionSequenceRE() {
-    return internalIntlRegExps.unicodeLocaleExtensionSequenceRE ||
-           (internalIntlRegExps.unicodeLocaleExtensionSequenceRE =
-            RegExpCreate("-u(?:-[a-z0-9]{2,8})+"));
+function startOfUnicodeExtensions(locale) {
+    assert(typeof locale === "string", "locale is a string");
+
+    // Search for "-u-" marking the start of a Unicode extension sequence.
+    var start = callFunction(std_String_indexOf, locale, "-u-");
+    if (start < 0)
+        return -1;
+
+    // And search for "-x-" marking the start of any privateuse component to
+    // handle the case when "-u-" was only found within a privateuse subtag.
+    var privateExt = callFunction(std_String_indexOf, locale, "-x-");
+    if (privateExt >= 0 && privateExt < start)
+        return -1;
+
+    return start;
+}
+
+/**
+ * Returns the end index of a Unicode locale extension sequence.
+ */
+function endOfUnicodeExtensions(locale, start) {
+    assert(typeof locale === "string", "locale is a string");
+    assert(IsStructurallyValidLanguageTag(locale), "locale is a language tag");
+    assert(CanonicalizeLanguageTag(locale) === locale, "locale is a canonicalized language tag");
+    assert(0 <= start && start < locale.length, "start is an index into locale");
+    assert(Substring(locale, start, 3) === "-u-", "start points to Unicode extension sequence");
+
+    #define HYPHEN 0x2D
+    assert(std_String_fromCharCode(HYPHEN) === "-",
+           "code unit constant should match the expected character");
+
+    // Search for the start of the next singleton or privateuse subtag.
+    //
+    // Begin searching after the smallest possible Unicode locale extension
+    // sequence, namely |"-u-" 2alphanum|. End searching once the remaining
+    // characters can't fit the smallest possible singleton or privateuse
+    // subtag, namely |"-x-" alphanum|. Note the reduced end-limit means
+    // indexing inside the loop is always in-range.
+    for (var i = start + 5, end = locale.length - 4; i <= end; i++) {
+        if (callFunction(std_String_charCodeAt, locale, i) !== HYPHEN)
+            continue;
+        if (callFunction(std_String_charCodeAt, locale, i + 2) === HYPHEN)
+            return i;
+
+        // Skip over (i + 1) and (i + 2) because we've just verified they
+        // aren't "-", so the next possible delimiter can only be at (i + 3).
+        i += 2;
+    }
+
+    #undef HYPHEN
+
+    // If no singleton or privateuse subtag was found, the Unicode extension
+    // sequence extends until the end of the string.
+    return locale.length;
 }
 
 
@@ -50,226 +85,602 @@ function getUnicodeLocaleExtensionSequenceRE() {
  * Removes Unicode locale extension sequences from the given language tag.
  */
 function removeUnicodeExtensions(locale) {
-    // A wholly-privateuse locale has no extension sequences.
-    if (callFunction(std_String_startsWith, locale, "x-"))
+    var start = startOfUnicodeExtensions(locale);
+    if (start < 0)
         return locale;
 
-    // Otherwise, split on "-x-" marking the start of any privateuse component.
-    // Replace Unicode locale extension sequences in the left half, and return
-    // the concatenation.
-    var pos = callFunction(std_String_indexOf, locale, "-x-");
-    if (pos < 0)
-        pos = locale.length;
-
-    var left = callFunction(String_substring, locale, 0, pos);
-    var right = callFunction(String_substring, locale, pos);
-
-    var extensions;
-    var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE();
-    while ((extensions = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, left)) !== null) {
-        left = StringReplaceString(left, extensions[0], "");
-        unicodeLocaleExtensionSequenceRE.lastIndex = 0;
-    }
+    var end = endOfUnicodeExtensions(locale, start);
 
+    var left = Substring(locale, 0, start);
+    var right = Substring(locale, end, locale.length - end);
     var combined = left + right;
-    assert(IsStructurallyValidLanguageTag(combined), "recombination produced an invalid language tag");
-    assert(function() {
-        var uindex = callFunction(std_String_indexOf, combined, "-u-");
-        if (uindex < 0)
-            return true;
-        var xindex = callFunction(std_String_indexOf, combined, "-x-");
-        return xindex > 0 && xindex < uindex;
-    }(), "recombination failed to remove all Unicode locale extension sequences");
+
+    assert(IsStructurallyValidLanguageTag(combined),
+           "recombination produced an invalid language tag");
+    assert(startOfUnicodeExtensions(combined) < 0,
+           "recombination failed to remove all Unicode locale extension sequences");
 
     return combined;
 }
 
-
 /**
- * Regular expression defining BCP 47 language tags.
- *
- * Spec: RFC 5646 section 2.1.
+ * Returns Unicode locale extension sequences from the given language tag.
  */
-function getLanguageTagRE() {
-    if (internalIntlRegExps.languageTagRE)
-        return internalIntlRegExps.languageTagRE;
-
-    // RFC 5234 section B.1
-    // ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
-    var ALPHA = "[a-zA-Z]";
-    // DIGIT          =  %x30-39
-    //                        ; 0-9
-    var DIGIT = "[0-9]";
-
-    // RFC 5646 section 2.1
-    // alphanum      = (ALPHA / DIGIT)     ; letters and numbers
-    var alphanum = "(?:" + ALPHA + "|" + DIGIT + ")";
-    // regular       = "art-lojban"        ; these tags match the 'langtag'
-    //               / "cel-gaulish"       ; production, but their subtags
-    //               / "no-bok"            ; are not extended language
-    //               / "no-nyn"            ; or variant subtags: their meaning
-    //               / "zh-guoyu"          ; is defined by their registration
-    //               / "zh-hakka"          ; and all of these are deprecated
-    //               / "zh-min"            ; in favor of a more modern
-    //               / "zh-min-nan"        ; subtag or sequence of subtags
-    //               / "zh-xiang"
-    var regular = "(?:art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)";
-    // irregular     = "en-GB-oed"         ; irregular tags do not match
-    //                / "i-ami"             ; the 'langtag' production and
-    //                / "i-bnn"             ; would not otherwise be
-    //                / "i-default"         ; considered 'well-formed'
-    //                / "i-enochian"        ; These tags are all valid,
-    //                / "i-hak"             ; but most are deprecated
-    //                / "i-klingon"         ; in favor of more modern
-    //                / "i-lux"             ; subtags or subtag
-    //                / "i-mingo"           ; combination
-    //                / "i-navajo"
-    //                / "i-pwn"
-    //                / "i-tao"
-    //                / "i-tay"
-    //                / "i-tsu"
-    //                / "sgn-BE-FR"
-    //                / "sgn-BE-NL"
-    //                / "sgn-CH-DE"
-    var irregular = "(?:en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)";
-    // grandfathered = irregular           ; non-redundant tags registered
-    //               / regular             ; during the RFC 3066 era
-    var grandfathered = "(?:" + irregular + "|" + regular + ")";
-    // privateuse    = "x" 1*("-" (1*8alphanum))
-    var privateuse = "(?:x(?:-[a-z0-9]{1,8})+)";
-    // singleton     = DIGIT               ; 0 - 9
-    //               / %x41-57             ; A - W
-    //               / %x59-5A             ; Y - Z
-    //               / %x61-77             ; a - w
-    //               / %x79-7A             ; y - z
-    var singleton = "(?:" + DIGIT + "|[A-WY-Za-wy-z])";
-    // extension     = singleton 1*("-" (2*8alphanum))
-    var extension = "(?:" + singleton + "(?:-" + alphanum + "{2,8})+)";
-    // variant       = 5*8alphanum         ; registered variants
-    //               / (DIGIT 3alphanum)
-    var variant = "(?:" + alphanum + "{5,8}|(?:" + DIGIT + alphanum + "{3}))";
-    // region        = 2ALPHA              ; ISO 3166-1 code
-    //               / 3DIGIT              ; UN M.49 code
-    var region = "(?:" + ALPHA + "{2}|" + DIGIT + "{3})";
-    // script        = 4ALPHA              ; ISO 15924 code
-    var script = "(?:" + ALPHA + "{4})";
-    // extlang       = 3ALPHA              ; selected ISO 639 codes
-    //                 *2("-" 3ALPHA)      ; permanently reserved
-    var extlang = "(?:" + ALPHA + "{3}(?:-" + ALPHA + "{3}){0,2})";
-    // language      = 2*3ALPHA            ; shortest ISO 639 code
-    //                 ["-" extlang]       ; sometimes followed by
-    //                                     ; extended language subtags
-    //               / 4ALPHA              ; or reserved for future use
-    //               / 5*8ALPHA            ; or registered language subtag
-    var language = "(?:" + ALPHA + "{2,3}(?:-" + extlang + ")?|" + ALPHA + "{4}|" + ALPHA + "{5,8})";
-    // langtag       = language
-    //                 ["-" script]
-    //                 ["-" region]
-    //                 *("-" variant)
-    //                 *("-" extension)
-    //                 ["-" privateuse]
-    var langtag = language + "(?:-" + script + ")?(?:-" + region + ")?(?:-" +
-                  variant + ")*(?:-" + extension + ")*(?:-" + privateuse + ")?";
-    // Language-Tag  = langtag             ; normal language tags
-    //               / privateuse          ; private use tag
-    //               / grandfathered       ; grandfathered tags
-    var languageTag = "^(?:" + langtag + "|" + privateuse + "|" + grandfathered + ")$";
-
-    // Language tags are case insensitive (RFC 5646 section 2.1.1).
-    return (internalIntlRegExps.languageTagRE = RegExpCreate(languageTag, "i"));
+function getUnicodeExtensions(locale) {
+    var start = startOfUnicodeExtensions(locale);
+    assert(start >= 0, "start of Unicode extension sequence not found");
+    var end = endOfUnicodeExtensions(locale, start);
+
+    return Substring(locale, start, end - start);
 }
 
+// The three possible token type bits. Expressed as #defines to avoid
+// extra named lookups in the interpreter/jits.
+#define NONE  0b00
+#define ALPHA 0b01
+#define DIGIT 0b10
+
+// Constants for code units used below.
+#define HYPHEN  0x2D
+#define DIGIT_ZERO 0x30
+#define DIGIT_NINE 0x39
+#define UPPER_A 0x41
+#define UPPER_Z 0x5A
+#define LOWER_A 0x61
+#define LOWER_T 0x74
+#define LOWER_U 0x75
+#define LOWER_X 0x78
+#define LOWER_Z 0x7A
+
+// The requirement to use callFunction() for method calls makes the parser
+// harder to read. Use macros for the rescue.
+
+// Reads the next token.
+#define NEXT_TOKEN_OR_RETURN_NULL(ts)       \
+    if (!callFunction(ts.nextToken, ts))    \
+        return null;
+
+#define NEXT_TOKEN_OR_ASSERT(ts)            \
+    if (!callFunction(ts.nextToken, ts))    \
+        assert(false, "unexpected invalid subtag");
 
-function getDuplicateVariantRE() {
-    if (internalIntlRegExps.duplicateVariantRE)
-        return internalIntlRegExps.duplicateVariantRE;
-
-    // RFC 5234 section B.1
-    // ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
-    var ALPHA = "[a-zA-Z]";
-    // DIGIT          =  %x30-39
-    //                        ; 0-9
-    var DIGIT = "[0-9]";
-
-    // RFC 5646 section 2.1
-    // alphanum      = (ALPHA / DIGIT)     ; letters and numbers
-    var alphanum = "(?:" + ALPHA + "|" + DIGIT + ")";
-    // variant       = 5*8alphanum         ; registered variants
-    //               / (DIGIT 3alphanum)
-    var variant = "(?:" + alphanum + "{5,8}|(?:" + DIGIT + alphanum + "{3}))";
-
-    // Match a langtag that contains a duplicate variant.
-    var duplicateVariant =
-        // Match everything in a langtag prior to any variants, and maybe some
-        // of the variants as well (which makes this pattern inefficient but
-        // not wrong, for our purposes);
-        "(?:" + alphanum + "{2,8}-)+" +
-        // a variant, parenthesised so that we can refer back to it later;
-        "(" + variant + ")-" +
-        // zero or more subtags at least two characters long (thus stopping
-        // before extension and privateuse components);
-        "(?:" + alphanum + "{2,8}-)*" +
-        // and the same variant again
-        "\\1" +
-        // ...but not followed by any characters that would turn it into a
-        // different subtag.
-        "(?!" + alphanum + ")";
-
-    // Language tags are case insensitive (RFC 5646 section 2.1.1).  Using
-    // character classes covering both upper- and lower-case characters nearly
-    // addresses this -- but for the possibility of variant repetition with
-    // differing case, e.g. "en-variant-Variant".  Use a case-insensitive
-    // regular expression to address this.  (Note that there's no worry about
-    // case transformation accepting invalid characters here: users have
-    // already verified the string is alphanumeric Latin plus "-".)
-    return (internalIntlRegExps.duplicateVariantRE = RegExpCreate(duplicateVariant, "i"));
+// Assigns the current subtag part transformed to lower-case to the target.
+#define SUBTAG_VAR_OR_RETURN_NULL(ts, target)                                   \
+    {                                                                           \
+        target = Substring(ts.localeLowercase, ts.tokenStart, ts.tokenLength);  \
+        NEXT_TOKEN_OR_RETURN_NULL(ts);                                          \
+    }
+
+// Assigns the current subtag part transformed to lower-case to the target.
+#define SUBTAG_VAR_OR_ASSERT(ts, target)                                        \
+    {                                                                           \
+        target = Substring(ts.localeLowercase, ts.tokenStart, ts.tokenLength);  \
+        NEXT_TOKEN_OR_ASSERT(ts)                                                \
+    }
+
+/**
+ * Tokenizer for Unicode BCP 47 locale identifiers.
+ */
+function BCP47TokenStream(locale) {
+    this.locale = locale;
+
+    // Locale identifiers are compared and processed case-insensitively, so
+    // technically it's not necessary to adjust case. But for easier processing,
+    // and because the canonical form for most subtags is lower case, we start
+    // with lower case for all.
+    //
+    // Note that the tokenizer function keeps using the original input string
+    // to properly detect non-ASCII characters. The lower-case string can't be
+    // used to detect those characters, because some non-ASCII characters
+    // lower-case map into ASCII characters, e.g. U+212A (KELVIN SIGN) lower-
+    // case maps to U+006B (LATIN SMALL LETTER K).
+    this.localeLowercase = callFunction(std_String_toLowerCase, locale);
+
+    // Current parse index in |locale|.
+    this.index = 0;
+
+    // The current token type, its start index, and its length.
+    this.token = NONE;
+    this.tokenStart = 0;
+    this.tokenLength = 0;
+
+    assert(std_String_fromCharCode(HYPHEN) === "-" &&
+           std_String_fromCharCode(DIGIT_ZERO) === "0" &&
+           std_String_fromCharCode(DIGIT_NINE) === "9" &&
+           std_String_fromCharCode(UPPER_A) === "A" &&
+           std_String_fromCharCode(UPPER_Z) === "Z" &&
+           std_String_fromCharCode(LOWER_A) === "a" &&
+           std_String_fromCharCode(LOWER_T) === "t" &&
+           std_String_fromCharCode(LOWER_U) === "u" &&
+           std_String_fromCharCode(LOWER_X) === "x" &&
+           std_String_fromCharCode(LOWER_Z) === "z",
+           "code unit constants should match the expected characters");
 }
 
+MakeConstructible(BCP47TokenStream, {
+    __proto__: null,
+
+    // Reads the next token, returns |false| if an illegal character was found,
+    // otherwise returns |true|.
+    //
+    // eslint-disable-next-line object-shorthand
+    nextToken: function() {
+        var type = NONE;
+        var {index, locale} = this;
+        for (var i = index; i < locale.length; i++) {
+            // UTS 35, section 3.1.
+            // alpha = [A-Z a-z] ;
+            // digit = [0-9] ;
+            var c = callFunction(std_String_charCodeAt, locale, i);
+            if ((UPPER_A <= c && c <= UPPER_Z) || (LOWER_A <= c && c <= LOWER_Z))
+                type |= ALPHA;
+            else if (DIGIT_ZERO <= c && c <= DIGIT_NINE)
+                type |= DIGIT;
+            else if (c === HYPHEN && i > index && i + 1 < locale.length)
+                break;
+            else
+                return false;
+        }
+
+        this.token = type;
+        this.tokenStart = index;
+        this.tokenLength = i - index;
+        this.index = i + 1;
+        return true;
+    },
+
+    // Returns true if the character at the requested index within the current
+    // token is a digit.
+    //
+    // eslint-disable-next-line object-shorthand
+    isDigitAt: function(index) {
+        assert(0 <= index && index < this.tokenLength,
+               "must be an index into the current token");
+        var c = callFunction(std_String_charCodeAt, this.localeLowercase, this.tokenStart + index);
+        assert(!(c <= DIGIT_NINE) || c >= DIGIT_ZERO,
+               "token-start-code-unit <= '9' implies token-start-code-unit is in '0'..'9' " +
+               "and because all digits are sorted before any letters");
+        return c <= DIGIT_NINE;
+    },
+
+    // Returns the code unit of the first character at the current token
+    // position. Always returns the lower-case form of an alphabetical
+    // character.
+    //
+    // eslint-disable-next-line object-shorthand
+    singletonKey: function() {
+        assert(this.tokenLength === 1, "token is not a singleton");
+        var c = callFunction(std_String_charCodeAt, this.localeLowercase, this.tokenStart);
+        assert((DIGIT_ZERO <= c && c <= DIGIT_NINE) || (LOWER_A <= c && c <= LOWER_Z),
+               "unexpected code unit");
+        return c;
+    },
+
+    // eslint-disable-next-line object-shorthand
+    singletonValue: function() {
+        var singletonStart = this.tokenStart;
+        var min = callFunction(this.singletonKey, this) === LOWER_X ? 1 : 2;
+
+        NEXT_TOKEN_OR_RETURN_NULL(this);
+
+        // At least one non-singleton subtag must be present.
+        if (!(min <= this.tokenLength && this.tokenLength <= 8))
+            return null;
+        do {
+            NEXT_TOKEN_OR_RETURN_NULL(this);
+        } while (min <= this.tokenLength && this.tokenLength <= 8);
+
+        return callFunction(this.singletonValueAt, this, singletonStart);
+    },
+
+    // eslint-disable-next-line object-shorthand
+    singletonValueAt: function(start) {
+        // Singletons must be followed by a non-singleton subtag, "en-a-b" is not allowed.
+        var length = this.tokenStart - 1 - start;
+        if (length <= 2)
+            return null;
+        return Substring(this.localeLowercase, start, length);
+    }
+});
+
+/* eslint-disable complexity */
+/**
+ * Parser for Unicode BCP 47 locale identifiers.
+ *
+ * Returns null if |locale| can't be parsed as a `unicode_locale_id`. If the
+ * input is a grandfathered language tag, it is directly canonicalized to its
+ * modern form. The returned object has the following structure:
+ *
+ *   {
+ *     language: `unicode_language_subtag`,
+ *     script: `unicode_script_subtag` / undefined,
+ *     region: `unicode_region_subtag` / undefined,
+ *     variants: array of `unicode_variant_subtag`,
+ *     extensions: array of `extensions`,
+ *     privateuse: `pu_extensions` / undefined,
+ *   }
+ *
+ * All locale identifier subtags are returned in their normalized case:
+ *
+ *   var langtag = parseLanguageTag("en-latn-us");
+ *   assertEq("en", langtag.language);
+ *   assertEq("Latn", langtag.script);
+ *   assertEq("US", langtag.region);
+ *
+ * Spec: https://unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers
+ */
+function parseLanguageTag(locale) {
+    assert(typeof locale === "string", "locale is a string");
+
+    // unicode_locale_id = unicode_language_id
+    //                     extensions*
+    //                     pu_extensions? ;
+    var ts = new BCP47TokenStream(locale);
+    NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+    var language, script, region, privateuse;
+    var variants = [];
+    var extensions = [];
+
+    // unicode_language_id = unicode_language_subtag
+    //                       (sep unicode_script_subtag)?
+    //                       (sep unicode_region_subtag)?
+    //                       (sep unicode_variant_subtag)* ;
+    //
+    // sep                 = "-"
+    //
+    // Note: Unicode CLDR locale identifier backward compatibility extensions
+    //       removed from `unicode_language_id`.
+
+    // unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
+    if (ts.token !== ALPHA || ts.tokenLength === 1 || ts.tokenLength === 4 || ts.tokenLength > 8) {
+        // Four character language subtags are not allowed in Unicode BCP 47
+        // locale identifiers. Also see the comparison to Unicode CLDR locale
+        // identifiers in <https://unicode.org/reports/tr35/#BCP_47_Conformance>.
+        return null;
+    }
+    assert((2 <= ts.tokenLength && ts.tokenLength <= 3) ||
+           (5 <= ts.tokenLength && ts.tokenLength <= 8),
+           "language subtags have 2-3 or 5-8 letters");
+
+    SUBTAG_VAR_OR_RETURN_NULL(ts, language);
+
+    // unicode_script_subtag = alpha{4} ;
+    if (ts.tokenLength === 4 && ts.token === ALPHA) {
+        SUBTAG_VAR_OR_RETURN_NULL(ts, script);
 
-function getDuplicateSingletonRE() {
-    if (internalIntlRegExps.duplicateSingletonRE)
-        return internalIntlRegExps.duplicateSingletonRE;
-
-    // RFC 5234 section B.1
-    // ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
-    var ALPHA = "[a-zA-Z]";
-    // DIGIT          =  %x30-39
-    //                        ; 0-9
-    var DIGIT = "[0-9]";
-
-    // RFC 5646 section 2.1
-    // alphanum      = (ALPHA / DIGIT)     ; letters and numbers
-    var alphanum = "(?:" + ALPHA + "|" + DIGIT + ")";
-    // singleton     = DIGIT               ; 0 - 9
-    //               / %x41-57             ; A - W
-    //               / %x59-5A             ; Y - Z
-    //               / %x61-77             ; a - w
-    //               / %x79-7A             ; y - z
-    var singleton = "(?:" + DIGIT + "|[A-WY-Za-wy-z])";
-
-    // Match a langtag that contains a duplicate singleton.
-    var duplicateSingleton =
-        // Match a singleton subtag, parenthesised so that we can refer back to
-        // it later;
-        "-(" + singleton + ")-" +
-        // then zero or more subtags;
-        "(?:" + alphanum + "+-)*" +
-        // and the same singleton again
-        "\\1" +
-        // ...but not followed by any characters that would turn it into a
-        // different subtag.
-        "(?!" + alphanum + ")";
-
-    // Language tags are case insensitive (RFC 5646 section 2.1.1).  Using
-    // character classes covering both upper- and lower-case characters nearly
-    // addresses this -- but for the possibility of singleton repetition with
-    // differing case, e.g. "en-u-foo-U-foo".  Use a case-insensitive regular
-    // expression to address this.  (Note that there's no worry about case
-    // transformation accepting invalid characters here: users have already
-    // verified the string is alphanumeric Latin plus "-".)
-    return (internalIntlRegExps.duplicateSingletonRE = RegExpCreate(duplicateSingleton, "i"));
+        // The first character of a script code needs to be capitalized.
+        // "hans" -> "Hans"
+        script = callFunction(std_String_toUpperCase, script[0]) +
+                 Substring(script, 1, script.length - 1);
+    }
+
+    // unicode_region_subtag = (alpha{2} | digit{3}) ;
+    if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
+        (ts.tokenLength === 3 && ts.token === DIGIT))
+    {
+        SUBTAG_VAR_OR_RETURN_NULL(ts, region);
+
+        // Region codes need to be in upper-case. "bu" -> "BU"
+        region = callFunction(std_String_toUpperCase, region);
+    }
+
+    // unicode_variant_subtag = (alphanum{5,8}
+    //                        | digit alphanum{3}) ;
+    //
+    // alphanum               = [0-9 A-Z a-z] ;
+    while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
+           (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
+    {
+        // Locale identifiers are case insensitive (UTS 35, section 3.2).
+        // All seen variants are compared ignoring case differences by
+        // using the lower-case form. This allows to properly detect and
+        // reject variant repetitions with differing case, e.g.
+        // "en-variant-Variant".
+        var variant;
+        SUBTAG_VAR_OR_RETURN_NULL(ts, variant);
+
+        // Reject the Locale identifier if a duplicate variant was found.
+        //
+        // This linear-time verification step means the whole variant
+        // subtag checking is potentially quadratic, but we're okay doing
+        // that because language tags are unlikely to be deliberately
+        // pathological.
+        if (callFunction(ArrayIndexOf, variants, variant) !== -1)
+            return null;
+        _DefineDataProperty(variants, variants.length, variant);
+    }
+
+    // extensions = unicode_locale_extensions
+    //            | transformed_extensions
+    //            | other_extensions ;
+    //
+    // unicode_locale_extensions = sep [uU]
+    //                             ((sep keyword)+
+    //                             |(sep attribute)+ (sep keyword)*) ;
+    //
+    // transformed_extensions = sep [tT]
+    //                          ((sep tlang (sep tfield)*)
+    //                          |(sep tfield)+) ;
+    //
+    // other_extensions = [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
+    //
+    // keyword = key (sep type)? ;
+    //
+    // key = alphanum alpha ;
+    //
+    // type = alphanum{3,8} (sep alphanum{3,8})* ;
+    //
+    // attribute = alphanum{3,8} ;
+    //
+    // tlang = unicode_language_subtag
+    //         (sep unicode_script_subtag)?
+    //         (sep unicode_region_subtag)?
+    //         (sep unicode_variant_subtag)* ;
+    //
+    // tfield = tkey tvalue;
+    //
+    // tkey = alpha digit ;
+    //
+    // tvalue = (sep alphanum{3,8})+ ;
+    var seenSingletons = [];
+    while (ts.tokenLength === 1) {
+        var singleton = callFunction(ts.singletonKey, ts);
+        if (singleton === LOWER_X)
+            break;
+
+        // Locale identifiers are case insensitive (UTS 35, section 3.2).
+        // Ensure |singletonKey()| does not return the code unit of an
+        // upper-case character, so we can properly detect and reject
+        // singletons with different case, e.g. "en-u-foo-U-foo".
+        assert(!(UPPER_A <= singleton && singleton <= UPPER_Z),
+               "unexpected upper-case code unit");
+
+        // Reject the input if a duplicate singleton was found.
+        //
+        // Similar to the variant validation step this check is O(n**2),
+        // but given that there are only 35 possible singletons the
+        // quadratic runtime is negligible.
+        if (callFunction(ArrayIndexOf, seenSingletons, singleton) !== -1)
+            return null;
+        _DefineDataProperty(seenSingletons, seenSingletons.length, singleton);
+
+        var extension;
+        if (singleton === LOWER_U) {
+            var extensionStart = ts.tokenStart;
+            NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+            while (2 <= ts.tokenLength && ts.tokenLength <= 8) {
+                // `key` doesn't allow a digit as its second character.
+                if (ts.tokenLength === 2 && callFunction(ts.isDigitAt, ts, 1))
+                    return null;
+                NEXT_TOKEN_OR_RETURN_NULL(ts);
+            }
+            extension = callFunction(ts.singletonValueAt, ts, extensionStart);
+        } else if (singleton === LOWER_T) {
+            var extensionStart = ts.tokenStart;
+            NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+            // `tfield` starts with `tkey`, which in turn is `alpha digit`, so
+            // an alpha-only token must be a `tlang`.
+            if (ts.token === ALPHA) {
+                // `unicode_language_subtag`
+                if (ts.tokenLength === 1 || ts.tokenLength === 4 || ts.tokenLength > 8)
+                    return null;
+                NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+                // `unicode_script_subtag` (optional)
+                if (ts.tokenLength === 4 && ts.token === ALPHA) {
+                    NEXT_TOKEN_OR_RETURN_NULL(ts);
+                }
+
+                // `unicode_region_subtag` (optional)
+                if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
+                    (ts.tokenLength === 3 && ts.token === DIGIT))
+                {
+                    NEXT_TOKEN_OR_RETURN_NULL(ts);
+                }
+
+                // `unicode_variant_subtag` (optional)
+                while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
+                       (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
+                {
+                    NEXT_TOKEN_OR_RETURN_NULL(ts);
+                }
+            }
+
+            // Trailing `tfield` subtags.
+            while (ts.tokenLength === 2) {
+                // `tkey` is `alpha digit`.
+                if (callFunction(ts.isDigitAt, ts, 0) ||
+                    !callFunction(ts.isDigitAt, ts, 1))
+                {
+                    return null;
+                }
+                NEXT_TOKEN_OR_RETURN_NULL(ts);
+
+                // `tfield` requires at least one `tvalue`.
+                if (!(3 <= ts.tokenLength && ts.tokenLength <= 8))
+                    return null;
+                do {
+                    NEXT_TOKEN_OR_RETURN_NULL(ts);
+                } while (3 <= ts.tokenLength && ts.tokenLength <= 8);
+            }
+            extension = callFunction(ts.singletonValueAt, ts, extensionStart);
+        } else {
+            extension = callFunction(ts.singletonValue, ts);
+        }
+        if (!extension)
+            return null;
+
+        _DefineDataProperty(extensions, extensions.length, extension);
+    }
+
+    // Trailing pu_extensions component of the unicode_locale_id production.
+    //
+    // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
+    if (ts.tokenLength === 1 && callFunction(ts.singletonKey, ts) === LOWER_X) {
+        privateuse = callFunction(ts.singletonValue, ts);
+        if (!privateuse)
+            return null;
+    }
+
+    // Reject the input if it couldn't be parsed completely.
+    if (ts.token !== NONE)
+        return null;
+
+    var tagObj = {
+        language,
+        script,
+        region,
+        variants,
+        extensions,
+        privateuse,
+    };
+
+    // Handle grandfathered tags right away, so we don't need to have extra
+    // paths for grandfathered tags later on.
+    //
+    // grandfathered = "art-lojban"     ; non-redundant tags registered
+    //               / "cel-gaulish"    ; during the RFC 3066 era
+    //               / "zh-guoyu"       ; these tags match the 'langtag'
+    //               / "zh-hakka"       ; production, but their subtags
+    //               / "zh-xiang"       ; are not extended language
+    //                                  ; or variant subtags: their meaning
+    //                                  ; is defined by their registration
+    //                                  ; and all of these are deprecated
+    //                                  ; in favor of a more modern
+    //                                  ; subtag or sequence of subtags
+    if (hasOwn(ts.localeLowercase, grandfatheredMappings))
+        updateGrandfatheredMappings(tagObj);
+
+    // Return if the complete input was successfully parsed.
+    return tagObj;
 }
 
+/**
+ * Return the locale and fields components of the given valid Transform
+ * extension subtag.
+ */
+function TransformExtensionComponents(extension) {
+    assert(typeof extension === "string", "extension is a String value");
+    assert(callFunction(std_String_startsWith, extension, "t-"),
+           "extension starts with 't-'");
+
+    var ts = new BCP47TokenStream(Substring(extension, 2, extension.length - 2));
+    NEXT_TOKEN_OR_ASSERT(ts);
+
+    // `tfield` starts with `tkey`, which in turn is `alpha digit`, so
+    // an alpha-only token must be a `tlang`.
+    var localeObj;
+    if (ts.token === ALPHA) {
+        // `unicode_language_subtag`
+        assert((2 <= ts.tokenLength && ts.tokenLength <= 3) ||
+                (5 <= ts.tokenLength && ts.tokenLength <= 8),
+                "language subtags have 2-3 or 5-8 letters");
+
+        var language;
+        SUBTAG_VAR_OR_ASSERT(ts, language);
+
+        // unicode_script_subtag = alpha{4} ;
+        var script;
+        if (ts.tokenLength === 4 && ts.token === ALPHA) {
+            SUBTAG_VAR_OR_ASSERT(ts, script);
+
+            // The first character of a script code needs to be capitalized.
+            // "hans" -> "Hans"
+            script = callFunction(std_String_toUpperCase, script[0]) +
+                     Substring(script, 1, script.length - 1);
+        }
+
+        // unicode_region_subtag = (alpha{2} | digit{3}) ;
+        var region;
+        if ((ts.tokenLength === 2 && ts.token === ALPHA) ||
+            (ts.tokenLength === 3 && ts.token === DIGIT))
+        {
+            SUBTAG_VAR_OR_ASSERT(ts, region);
+
+            // Region codes need to be in upper-case. "bu" -> "BU"
+            region = callFunction(std_String_toUpperCase, region);
+        }
+
+        // unicode_variant_subtag = (alphanum{5,8}
+        //                        | digit alphanum{3}) ;
+        //
+        // alphanum               = [0-9 A-Z a-z] ;
+        var variants = [];
+        while ((5 <= ts.tokenLength && ts.tokenLength <= 8) ||
+               (ts.tokenLength === 4 && callFunction(ts.isDigitAt, ts, 0)))
+        {
+            var variant;
+            SUBTAG_VAR_OR_ASSERT(ts, variant);
+
+            _DefineDataProperty(variants, variants.length, variant);
+        }
+
+        localeObj = {
+            language,
+            script,
+            region,
+            variants,
+            extensions: [],
+            privateuse: undefined,
+        };
+    }
+
+    // Trailing `tfield` subtags. (Any other trailing subtags are an error,
+    // because we're guaranteed to only see a valid tranform extension here.)
+    var fields = [];
+    while (ts.tokenLength === 2) {
+        // `tkey` is `alpha digit`.
+        assert(!callFunction(ts.isDigitAt, ts, 0) && callFunction(ts.isDigitAt, ts, 1),
+               "unexpected invalid tkey subtag");
+
+        var key;
+        SUBTAG_VAR_OR_ASSERT(ts, key);
+
+        // `tfield` requires at least one `tvalue`.
+        assert(3 <= ts.tokenLength && ts.tokenLength <= 8,
+               "unexpected invalid tvalue subtag");
+
+        var value;
+        SUBTAG_VAR_OR_ASSERT(ts, value);
+
+        while (3 <= ts.tokenLength && ts.tokenLength <= 8) {
+            var part;
+            SUBTAG_VAR_OR_ASSERT(ts, part);
+            value += "-" + part;
+        }
+
+        _DefineDataProperty(fields, fields.length, {key, value});
+    }
+
+    assert(ts.token === NONE,
+           "unexpected trailing characters in promised-to-be-valid transform extension");
+
+    return {locale: localeObj, fields};
+}
+/* eslint-enable complexity */
+
+#undef NONE
+#undef ALPHA
+#undef DIGIT
+
+#undef HYPHEN
+#undef DIGIT_ZERO
+#undef DIGIT_NINE
+#undef UPPER_A
+#undef UPPER_Z
+#undef LOWER_A
+#undef LOWER_T
+#undef LOWER_U
+#undef LOWER_X
+#undef LOWER_Z
+
+#undef SUBTAG_VAR_OR_ASSERT
+#undef SUBTAG_VAR_OR_RETURN_NULL
+#undef NEXT_TOKEN_OR_ASSERT
+#undef NEXT_TOKEN_OR_RETURN_NULL
 
 /**
  * Verifies that the given string is a well-formed BCP 47 language tag
@@ -278,53 +689,369 @@ function getDuplicateSingletonRE() {
  * Spec: ECMAScript Internationalization API Specification, 6.2.2.
  */
 function IsStructurallyValidLanguageTag(locale) {
-    assert(typeof locale === "string", "IsStructurallyValidLanguageTag");
-    var languageTagRE = getLanguageTagRE();
-    if (!regexp_test_no_statics(languageTagRE, locale))
-        return false;
-
-    // Before checking for duplicate variant or singleton subtags with
-    // regular expressions, we have to get private use subtag sequences
-    // out of the picture.
-    if (callFunction(std_String_startsWith, locale, "x-"))
-        return true;
-    var pos = callFunction(std_String_indexOf, locale, "-x-");
-    if (pos !== -1)
-        locale = callFunction(String_substring, locale, 0, pos);
-
-    // Check for duplicate variant or singleton subtags.
-    var duplicateVariantRE = getDuplicateVariantRE();
-    var duplicateSingletonRE = getDuplicateSingletonRE();
-    return !regexp_test_no_statics(duplicateVariantRE, locale) &&
-           !regexp_test_no_statics(duplicateSingletonRE, locale);
+    return parseLanguageTag(locale) !== null;
 }
 
 /**
- * Joins the array elements in the given range with the supplied separator.
+ * Canonicalizes the given structurally valid Unicode BCP 47 locale identifier,
+ * including regularized case of subtags. For example, the language tag
+ * Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
+ *
+ *     Zh             ; 2*3ALPHA
+ *     -haNS          ; ["-" script]
+ *     -bu            ; ["-" region]
+ *     -variant2      ; *("-" variant)
+ *     -Variant1
+ *     -u-ca-chinese  ; *("-" extension)
+ *     -t-Zh-laTN
+ *     -x-PRIVATE     ; ["-" privateuse]
+ *
+ * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
+ *
+ * UTS 35 specifies two different canonicalization algorithms. There's one to
+ * canonicalize BCP 47 language tags and other one to canonicalize Unicode
+ * locale identifiers. The latter one wasn't present when ECMA-402 was changed
+ * to use Unicode BCP 47 locale identifiers instead of BCP 47 language tags, so
+ * ECMA-402 currently only uses the former to canonicalize Unicode BCP 47 locale
+ * identifiers.
+ *
+ * Spec: ECMAScript Internationalization API Specification, 6.2.3.
+ * Spec: https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
+ * Spec: https://unicode.org/reports/tr35/#BCP_47_Language_Tag_Conversion
  */
-function ArrayJoinRange(array, separator, from, to = array.length) {
-    assert(typeof separator === "string", "|separator| is a string value");
-    assert(typeof from === "number", "|from| is a number value");
-    assert(typeof to === "number", "|to| is a number value");
-    assert(0 <= from && from <= to && to <= array.length, "|from| and |to| form a valid range");
+function CanonicalizeLanguageTagObject(localeObj) {
+    assert(IsObject(localeObj), "CanonicalizeLanguageTagObject");
 
-    if (from === to)
-        return "";
+    // Per UTS 35, 3.3.1, the very first step is to canonicalize the syntax by
+    // normalizing the case and ordering all subtags. The canonical syntax form
+    // itself is specified in UTS 35, 3.2.1.
+
+    // The parser already normalized the case for all subtags.
 
-    var result = array[from];
-    for (var i = from + 1; i < to; i++) {
-        result += separator + array[i];
+#ifdef DEBUG
+    function IsLowerCase(s) {
+        return s === callFunction(std_String_toLowerCase, s);
     }
-    return result;
+    function IsUpperCase(s) {
+        return s === callFunction(std_String_toUpperCase, s);
+    }
+    function IsTitleCase(s) {
+        assert(s.length > 0, "unexpected empy string");
+        var r = callFunction(std_String_toUpperCase, s[0]) +
+                callFunction(std_String_toLowerCase, Substring(s, 1, s.length - 1));
+        return s === r;
+    }
+#endif
+
+    // 1. Any script subtag is in title case.
+    assert(localeObj.script === undefined || IsTitleCase(localeObj.script),
+           "If present, script subtag is in title case");
+
+    // 2. Any region subtag is in uppercase.
+    assert(localeObj.region === undefined || IsUpperCase(localeObj.region),
+           "If present, region subtag is in upper case");
+
+    // 3. All other subtags are in lowercase.
+    assert(IsLowerCase(localeObj.language),
+           "language subtag is in lower case");
+    assert(callFunction(ArrayEvery, localeObj.variants, IsLowerCase),
+           "variant subtags are in lower case");
+    assert(callFunction(ArrayEvery, localeObj.extensions, IsLowerCase),
+           "extension subtags are in lower case");
+    assert(localeObj.privateuse === undefined || IsLowerCase(localeObj.privateuse),
+           "If present, privateuse subtag is in lower case");
+
+
+    // The second step in UTS 35, 3.2.1, is to order all subtags.
+
+    // 1. Any variants are in alphabetical order.
+    var variants = localeObj.variants;
+    if (variants.length > 0) {
+        callFunction(ArraySort, variants);
+    }
+
+    // 2. Any extensions are in alphabetical order by their singleton.
+    var extensions = localeObj.extensions;
+    if (extensions.length > 0) {
+        // Extension sequences are sorted by their singleton characters.
+        // "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese"
+        callFunction(ArraySort, extensions);
+
+        // The last three bullet points in UTS 35, 3.2.1 apply only to Unicode and Transform
+        // extensions.
+        //
+        // 3. All attributes are sorted in alphabetical order.
+        //
+        // 4. All keywords and tfields are sorted by alphabetical order of their
+        //    keys, within their respective extensions.
+        //
+        // 5. Any type or tfield value "true" is removed.
+
+        for (var i = 0; i < extensions.length; i++) {
+            var ext = extensions[i];
+            assert(IsLowerCase(ext),
+                   "extension subtags must be in lower-case");
+            assert(ext[1] === "-",
+                   "extension subtags start with a singleton");
+
+            // Canonicalize Unicode locale extension subtag if present.
+            if (ext[0] === "u") {
+                var {attributes, keywords} = UnicodeExtensionComponents(ext);
+                extensions[i] = CanonicalizeUnicodeExtension(attributes, keywords);
+            }
+
+            // Canonicalize Unicode BCP 47 T extension if present.
+            if (ext[0] === "t") {
+                var {locale, fields} = TransformExtensionComponents(ext);
+                extensions[i] = CanonicalizeTransformExtension(locale, fields);
+            }
+        }
+    }
+
+    // The next two steps in 3.3.1 replace deprecated language and region
+    // subtags with their preferred mappings.
+    updateLocaleIdMappings(localeObj);
+
+    // The two final steps in 3.3.1, handling irregular grandfathered and
+    // private-use only language tags, don't apply, because these two forms
+    // can't occur in Unicode BCP 47 locale identifiers.
+}
+
+/**
+ * Intl.Locale proposal
+ *
+ * UnicodeExtensionComponents( extension )
+ *
+ * Returns the components of |extension| where |extension| is a "Unicode locale
+ * extension sequence" (ECMA-402, 6.2.1) without the starting separator
+ * character.
+ */
+function UnicodeExtensionComponents(extension) {
+    assert(typeof extension === "string", "extension is a String value");
+
+    // Step 1.
+    var attributes = [];
+
+    // Step 2.
+    var keywords = [];
+
+    // Step 3.
+    var isKeyword = false;
+
+    // Step 4.
+    var size = extension.length;
+
+    // Step 5.
+    // |extension| starts with "u-" instead of "-u-" in our implementation, so
+    // we need to initialize |k| with 2 instead of 3.
+    assert(callFunction(std_String_startsWith, extension, "u-"),
+           "extension starts with 'u-'");
+    var k = 2;
+
+    // Step 6.
+    var key, value;
+    while (k < size) {
+        // Step 6.a.
+        var e = callFunction(std_String_indexOf, extension, "-", k);
+
+        // Step 6.b.
+        var len = (e < 0 ? size : e) - k;
+
+        // Step 6.c.
+        var subtag = Substring(extension, k, len);
+
+        // Steps 6.d-e.
+        if (!isKeyword) {
+            // Step 6.d.
+            // NB: Duplicates are handled elsewhere in our implementation.
+            if (len !== 2)
+                _DefineDataProperty(attributes, attributes.length, subtag);
+        } else {
+            // Steps 6.e.i-ii.
+            if (len === 2) {
+                // Step 6.e.i.1.
+                // NB: Duplicates are handled elsewhere in our implementation.
+                _DefineDataProperty(keywords, keywords.length, {key, value});
+            } else {
+                // Step 6.e.ii.1.
+                if (value !== "")
+                    value += "-";
+
+                // Step 6.e.ii.2.
+                value += subtag;
+            }
+        }
+
+        // Step 6.f.
+        if (len === 2) {
+            // Step 6.f.i.
+            isKeyword = true;
+
+            // Step 6.f.ii.
+            key = subtag;
+
+            // Step 6.f.iii.
+            value = "";
+        }
+
+        // Step 6.g.
+        k += len + 1;
+    }
+
+    // Step 7.
+    if (isKeyword) {
+        // Step 7.a.
+        // NB: Duplicates are handled elsewhere in our implementation.
+        _DefineDataProperty(keywords, keywords.length, {key, value});
+    }
+
+    // Step 8.
+    return {attributes, keywords};
+}
+
+/**
+ * CanonicalizeUnicodeExtension( attributes, keywords )
+ *
+ * Canonical syntax per <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
+ *
+ * - All attributes and keywords are in lowercase.
+ *   - Note: The parser already converted keywords to lowercase.
+ * - All attributes are sorted in alphabetical order.
+ * - All keywords are sorted by alphabetical order of their keys.
+ * - Any type value "true" is removed.
+ *
+ * Canonical form:
+ * - All keys and types use the canonical form (from the name attribute;
+ *   see Section 3.6.4 U Extension Data Files).
+ */
+function CanonicalizeUnicodeExtension(attributes, keywords) {
+    assert(attributes.length > 0 || keywords.length > 0,
+           "unexpected empty Unicode locale extension components");
+
+    // All attributes are sorted in alphabetical order.
+    if (attributes.length > 1)
+        callFunction(ArraySort, attributes);
+
+    // All keywords are sorted by alphabetical order of keys.
+    if (keywords.length > 1) {
+        function UnicodeKeySort(left, right) {
+            var leftKey = left.key;
+            var rightKey = right.key;
+            assert(leftKey.length === 2, "left key is a Unicode key");
+            assert(rightKey.length === 2, "right key is a Unicode key");
+
+            // Compare both strings using charCodeAt(), because relational
+            // string comparison always calls into the VM, whereas charCodeAt
+            // can be inlined by Ion.
+            var diff = callFunction(std_String_charCodeAt, leftKey, 0) -
+                       callFunction(std_String_charCodeAt, rightKey, 0);
+            if (diff === 0) {
+                diff = callFunction(std_String_charCodeAt, leftKey, 1) -
+                       callFunction(std_String_charCodeAt, rightKey, 1);
+            }
+            return diff;
+        }
+
+        callFunction(ArraySort, keywords, UnicodeKeySort);
+    }
+
+    var extension = "u";
+
+    // Append all attributes.
+    for (var i = 0; i < attributes.length; i++) {
+        extension += "-" + attributes[i];
+    }
+
+    // Append all keywords.
+    for (var i = 0; i < keywords.length; i++) {
+        var {key, value} = keywords[i];
+        extension += "-" + key;
+
+        // Type value "true" is removed.
+        if (value !== "" && value !== "true")
+            extension += "-" + value;
+    }
+
+    return extension;
+}
+
+/**
+ * CanonicalizeTransformExtension
+ *
+ * Canonical form per <https://unicode.org/reports/tr35/#BCP47_T_Extension>:
+ *
+ * - These subtags are all in lowercase (that is the canonical casing for these
+ *   subtags), [...].
+ *
+ * And per <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>:
+ *
+ * - All keywords and tfields are sorted by alphabetical order of their keys,
+ *   within their respective extensions.
+ */
+function CanonicalizeTransformExtension(localeObj, fields) {
+    assert(localeObj !== undefined || fields.length > 0,
+           "unexpected empty Transform locale extension components");
+
+    if (fields.length > 0) {
+        function TransformKeySort(left, right) {
+            var leftKey = left.key;
+            var rightKey = right.key;
+            assert(leftKey.length === 2, "left key is a Transform key");
+            assert(rightKey.length === 2, "right key is a Transform key");
+
+            // Compare both strings using charCodeAt(), because relational
+            // string comparison always calls into the VM, whereas charCodeAt
+            // can be inlined by Ion.
+            var diff = callFunction(std_String_charCodeAt, leftKey, 0) -
+                       callFunction(std_String_charCodeAt, rightKey, 0);
+            if (diff === 0) {
+                diff = callFunction(std_String_charCodeAt, leftKey, 1) -
+                       callFunction(std_String_charCodeAt, rightKey, 1);
+            }
+            return diff;
+        }
+
+        callFunction(ArraySort, fields, TransformKeySort);
+    }
+
+    var extension = "t";
+
+    // Append the language subtag if present.
+    if (localeObj !== undefined) {
+        // [1] is a bit unclear whether or not the `tlang` subtag also needs
+        // to be canonicalized (and case-adjusted). For now simply append it as
+        // is and change it to all lower-case. If we switch to [2], the `tlang`
+        // subtag also needs to be canonicalized according to the same rules as
+        // `unicode_language_id` subtags are canonicalized. Also see [3].
+        //
+        // [1] https://unicode.org/reports/tr35/#Language_Tag_to_Locale_Identifier
+        // [2] https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
+        // [3] https://github.com/tc39/ecma402/issues/330
+        var localeStr = StringFromLanguageTagObject(localeObj);
+        extension += "-" + callFunction(std_String_toLowerCase, localeStr);
+    }
+
+    // Append all fields.
+    for (var i = 0; i < fields.length; i++) {
+        // UTS 35, 3.2.1 specifies:
+        // - Any type or tfield value "true" is removed.
+        //
+        // But the `tvalue` subtag is mandatory in `tfield: tkey tvalue`, so
+        // ignore this apparently invalid part of the UTS 35 specification and
+        // simply append all `tfield` subtags.
+        var {key, value} = fields[i];
+        extension += "-" + key + "-" + value;
+    }
+
+    return extension;
 }
 
 /**
  * Canonicalizes the given structurally valid BCP 47 language tag, including
  * regularized case of subtags. For example, the language tag
- * Zh-NAN-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
+ * Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE, where
  *
  *     Zh             ; 2*3ALPHA
- *     -NAN           ; ["-" extlang]
  *     -haNS          ; ["-" script]
  *     -bu            ; ["-" region]
  *     -variant2      ; *("-" variant)
@@ -333,120 +1060,54 @@ function ArrayJoinRange(array, separator, from, to = array.length) {
  *     -t-Zh-laTN
  *     -x-PRIVATE     ; ["-" privateuse]
  *
- * becomes nan-Hans-mm-variant2-variant1-t-zh-latn-u-ca-chinese-x-private
+ * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
  *
  * Spec: ECMAScript Internationalization API Specification, 6.2.3.
- * Spec: RFC 5646, section 4.5.
  */
 function CanonicalizeLanguageTag(locale) {
-    assert(IsStructurallyValidLanguageTag(locale), "CanonicalizeLanguageTag");
+    var localeObj = parseLanguageTag(locale);
+    assert(localeObj !== null, "CanonicalizeLanguageTag");
 
-    // The input
-    // "Zh-NAN-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE"
-    // will be used throughout this method to illustrate how it works.
+    CanonicalizeLanguageTagObject(localeObj);
 
-    // Language tags are compared and processed case-insensitively, so
-    // technically it's not necessary to adjust case. But for easier processing,
-    // and because the canonical form for most subtags is lower case, we start
-    // with lower case for all.
-    // "Zh-NAN-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE" ->
-    // "zh-nan-hans-bu-variant2-variant1-u-ca-chinese-t-zh-latn-x-private"
-    locale = callFunction(std_String_toLowerCase, locale);
-
-    // Handle mappings for complete tags.
-    if (hasOwn(locale, langTagMappings))
-        return langTagMappings[locale];
-
-    var subtags = StringSplitString(ToString(locale), "-");
-    var i = 0;
-
-    // Handle the standard part: All subtags before the first singleton or "x".
-    // "zh-nan-hans-bu-variant2-variant1"
-    while (i < subtags.length) {
-        var subtag = subtags[i];
-
-        // If we reach the start of an extension sequence or private use part,
-        // we're done with this loop. We have to check for i > 0 because for
-        // irregular language tags, such as i-klingon, the single-character
-        // subtag "i" is not the start of an extension sequence.
-        // In the example, we break at "u".
-        if (subtag.length === 1 && (i > 0 || subtag === "x"))
-            break;
+    return StringFromLanguageTagObject(localeObj);
+}
 
-        if (i !== 0) {
-            if (subtag.length === 4) {
-                // 4-character subtags that are not in initial position are
-                // script codes; their first character needs to be capitalized.
-                // "hans" -> "Hans"
-                subtag = callFunction(std_String_toUpperCase, subtag[0]) +
-                         callFunction(String_substring, subtag, 1);
-            } else if (subtag.length === 2) {
-                // 2-character subtags that are not in initial position are
-                // region codes; they need to be upper case. "bu" -> "BU"
-                subtag = callFunction(std_String_toUpperCase, subtag);
-            }
-        }
-        if (hasOwn(subtag, langSubtagMappings)) {
-            // Replace deprecated subtags with their preferred values.
-            // "BU" -> "MM"
-            // This has to come after we capitalize region codes because
-            // otherwise some language and region codes could be confused.
-            // For example, "in" is an obsolete language code for Indonesian,
-            // but "IN" is the country code for India.
-            // Note that the script generating langSubtagMappings makes sure
-            // that no regular subtag mapping will replace an extlang code.
-            subtag = langSubtagMappings[subtag];
-        } else if (hasOwn(subtag, extlangMappings)) {
-            // Replace deprecated extlang subtags with their preferred values,
-            // and remove the preceding subtag if it's a redundant prefix.
-            // "zh-nan" -> "nan"
-            // Note that the script generating extlangMappings makes sure that
-            // no extlang mapping will replace a normal language code.
-            subtag = extlangMappings[subtag].preferred;
-            if (i === 1 && extlangMappings[subtag].prefix === subtags[0]) {
-                callFunction(std_Array_shift, subtags);
-                i--;
-            }
-        }
-        subtags[i] = subtag;
-        i++;
-    }
-    var normal = ArrayJoinRange(subtags, "-", 0, i);
-
-    // Extension sequences are sorted by their singleton characters.
-    // "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese"
-    var extensions = new List();
-    while (i < subtags.length && subtags[i] !== "x") {
-        var extensionStart = i;
-        i++;
-        while (i < subtags.length && subtags[i].length > 1)
-            i++;
-        var extension = ArrayJoinRange(subtags, "-", extensionStart, i);
-        callFunction(std_Array_push, extensions, extension);
-    }
-    callFunction(std_Array_sort, extensions);
+/**
+ * Returns the string representation of the given language tag object.
+ */
+function StringFromLanguageTagObject(localeObj) {
+    assert(IsObject(localeObj), "StringFromLanguageTagObject");
+
+    var {
+        language,
+        script,
+        region,
+        variants,
+        extensions,
+        privateuse,
+    } = localeObj;
 
-    // Private use sequences are left as is. "x-private"
-    var privateUse = "";
-    if (i < subtags.length)
-        privateUse = ArrayJoinRange(subtags, "-", i);
+    var canonical = language;
+
+    if (script !== undefined)
+        canonical += "-" + script;
+
+    if (region !== undefined)
+        canonical += "-" + region;
+
+    if (variants.length > 0)
+        canonical += "-" + callFunction(std_Array_join, variants, "-");
 
-    // Put everything back together.
-    var canonical = normal;
     if (extensions.length > 0)
         canonical += "-" + callFunction(std_Array_join, extensions, "-");
-    if (privateUse.length > 0) {
-        // Be careful of a Language-Tag that is entirely privateuse.
-        if (canonical.length > 0)
-            canonical += "-" + privateUse;
-        else
-            canonical = privateUse;
-    }
+
+    if (privateuse !== undefined)
+        canonical += "-" + privateuse;
 
     return canonical;
 }
 
-
 /**
  * Returns true if the input contains only ASCII alphabetical characters.
  */
@@ -469,13 +1130,11 @@ function ValidateAndCanonicalizeLanguageTag(locale) {
     assert(typeof locale === "string", "ValidateAndCanonicalizeLanguageTag");
 
     // Handle the common case (a standalone language) first.
-    // Only the following BCP47 subset is accepted:
-    //   Language-Tag  = langtag
-    //   langtag       = language
-    //   language      = 2*3ALPHA ; shortest ISO 639 code
-    // For three character long strings we need to make sure it's not a
-    // private use only language tag, for example "x-x".
-    if (locale.length === 2 || (locale.length === 3 && locale[1] !== "-")) {
+    // Only the following Unicode BCP 47 locale identifier subset is accepted:
+    //   unicode_locale_id = unicode_language_id
+    //   unicode_language_id = unicode_language_subtag
+    //   unicode_language_subtag = alpha{2,3}
+    if (locale.length === 2 || locale.length === 3) {
         if (!IsASCIIAlphaString(locale))
             ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale);
         assert(IsStructurallyValidLanguageTag(locale), "2*3ALPHA is a valid language tag");
@@ -483,42 +1142,27 @@ function ValidateAndCanonicalizeLanguageTag(locale) {
         // The language subtag is canonicalized to lower case.
         locale = callFunction(std_String_toLowerCase, locale);
 
-        // langTagMappings doesn't contain any 2*3ALPHA keys, so we don't need
-        // to check for possible replacements in this map.
-        assert(!callFunction(std_Object_hasOwnProperty, langTagMappings, locale),
-               "langTagMappings contains no 2*3ALPHA mappings");
-
-        // Replace deprecated subtags with their preferred values.
-        locale = callFunction(std_Object_hasOwnProperty, langSubtagMappings, locale)
-                 ? langSubtagMappings[locale]
-                 : locale;
-        assert(locale === CanonicalizeLanguageTag(locale), "expected same canonicalization");
+        // updateLocaleIdMappings may modify tags containing only |language|
+        // subtags, if the language is in |complexLanguageMappings|, so we need
+        // to handle that case first.
+        if (!hasOwn(locale, complexLanguageMappings)) {
+            // Replace deprecated subtags with their preferred values.
+            locale = hasOwn(locale, languageMappings)
+                     ? languageMappings[locale]
+                     : locale;
+            assert(locale === CanonicalizeLanguageTag(locale), "expected same canonicalization");
 
-        return locale;
+            return locale;
+        }
     }
 
-    if (!IsStructurallyValidLanguageTag(locale))
+    var localeObj = parseLanguageTag(locale);
+    if (localeObj === null)
         ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale);
 
-    return CanonicalizeLanguageTag(locale);
-}
-
-
-function localeContainsNoUnicodeExtensions(locale) {
-    // No "-u-", no possible Unicode extension.
-    if (callFunction(std_String_indexOf, locale, "-u-") === -1)
-        return true;
-
-    // "-u-" within privateuse also isn't one.
-    if (callFunction(std_String_indexOf, locale, "-u-") > callFunction(std_String_indexOf, locale, "-x-"))
-        return true;
-
-    // An entirely-privateuse tag doesn't contain extensions.
-    if (callFunction(std_String_startsWith, locale, "x-"))
-        return true;
+    CanonicalizeLanguageTagObject(localeObj);
 
-    // Otherwise, we have a Unicode extension sequence.
-    return false;
+    return StringFromLanguageTagObject(localeObj);
 }
 
 
@@ -571,11 +1215,13 @@ function DefaultLocaleIgnoringAvailableLocales() {
 
     // If we didn't get a cache hit, compute the candidate default locale and
     // cache it.  Fall back on the last-ditch locale when necessary.
-    var candidate;
-    if (!IsStructurallyValidLanguageTag(runtimeDefaultLocale)) {
+    var candidate = parseLanguageTag(runtimeDefaultLocale);
+    if (candidate === null) {
         candidate = lastDitchLocale();
     } else {
-        candidate = CanonicalizeLanguageTag(runtimeDefaultLocale);
+        CanonicalizeLanguageTagObject(candidate);
+
+        candidate = StringFromLanguageTagObject(candidate);
 
         // The default locale must be in [[availableLocales]], and that list
         // must not contain any locales with Unicode extension sequences, so
@@ -592,7 +1238,7 @@ function DefaultLocaleIgnoringAvailableLocales() {
 
     assert(IsStructurallyValidLanguageTag(candidate),
            "the candidate must be structurally valid");
-    assert(localeContainsNoUnicodeExtensions(candidate),
+    assert(startOfUnicodeExtensions(candidate) < 0,
            "the candidate must not contain a Unicode extension sequence");
 
     return candidate;
@@ -633,7 +1279,7 @@ function DefaultLocale() {
            "the computed default locale must be structurally valid");
     assert(locale === CanonicalizeLanguageTag(locale),
            "the computed default locale must be canonical");
-    assert(localeContainsNoUnicodeExtensions(locale),
+    assert(startOfUnicodeExtensions(locale) < 0,
            "the computed default locale must not contain a Unicode extension sequence");
 
     localeCache.defaultLocale = locale;
@@ -674,30 +1320,53 @@ function addSpecialMissingLanguageTags(availableLocales) {
  * Spec: ECMAScript Internationalization API Specification, 9.2.1.
  */
 function CanonicalizeLocaleList(locales) {
+    // Step 1.
     if (locales === undefined)
-        return new List();
-    var seen = new List();
+        return [];
+
+    // Step 3 (and the remaining steps).
     if (typeof locales === "string")
-        locales = [locales];
+        return [ValidateAndCanonicalizeLanguageTag(locales)];
+
+    // Step 2.
+    var seen = [];
+
+    // Step 4.
     var O = ToObject(locales);
+
+    // Step 5.
     var len = ToLength(O.length);
+
+    // Step 6.
     var k = 0;
+
+    // Step 7.
     while (k < len) {
-        // Don't call ToString(k) - SpiderMonkey is faster with integers.
-        var kPresent = HasProperty(O, k);
-        if (kPresent) {
+        // Steps 7.a-c.
+        if (k in O) {
+            // Step 7.c.i.
             var kValue = O[k];
+
+            // Step 7.c.ii.
             if (!(typeof kValue === "string" || IsObject(kValue)))
                 ThrowTypeError(JSMSG_INVALID_LOCALES_ELEMENT);
+
+            // Step 7.c.iii.
             var tag = ToString(kValue);
-            if (!IsStructurallyValidLanguageTag(tag))
-                ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, tag);
-            tag = CanonicalizeLanguageTag(tag);
+
+            // Step 7.c.iv.
+            tag = ValidateAndCanonicalizeLanguageTag(tag);
+
+            // Step 7.c.v.
             if (callFunction(ArrayIndexOf, seen, tag) === -1)
-                callFunction(std_Array_push, seen, tag);
+                _DefineDataProperty(seen, seen.length, tag);
         }
+
+        // Step 7.d.
         k++;
     }
+
+    // Step 8.
     return seen;
 }
 
@@ -705,7 +1374,7 @@ function CanonicalizeLocaleList(locales) {
 function BestAvailableLocaleHelper(availableLocales, locale, considerDefaultLocale) {
     assert(IsStructurallyValidLanguageTag(locale), "invalid BestAvailableLocale locale structure");
     assert(locale === CanonicalizeLanguageTag(locale), "non-canonical BestAvailableLocale locale");
-    assert(localeContainsNoUnicodeExtensions(locale), "locale must contain no Unicode extensions");
+    assert(startOfUnicodeExtensions(locale) < 0, "locale must contain no Unicode extensions");
 
     // In the spec, [[availableLocales]] is formally a list of all available
     // locales.  But in our implementation, it's an *incomplete* list, not
@@ -780,28 +1449,37 @@ function BestAvailableLocaleIgnoringDefault(availableLocales, locale) {
  * Spec: RFC 4647, section 3.4.
  */
 function LookupMatcher(availableLocales, requestedLocales) {
-    var i = 0;
-    var len = requestedLocales.length;
-    var availableLocale;
-    var locale, noExtensionsLocale;
-    while (i < len && availableLocale === undefined) {
-        locale = requestedLocales[i];
-        noExtensionsLocale = removeUnicodeExtensions(locale);
-        availableLocale = BestAvailableLocale(availableLocales, noExtensionsLocale);
-        i++;
-    }
-
+    // Step 1.
     var result = new Record();
-    if (availableLocale !== undefined) {
-        result.locale = availableLocale;
-        if (locale !== noExtensionsLocale) {
-            var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE();
-            var extensionMatch = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, locale);
-            result.extension = extensionMatch[0];
+
+    // Step 2.
+    for (var i = 0; i < requestedLocales.length; i++) {
+        var locale = requestedLocales[i];
+
+        // Step 2.a.
+        var noExtensionsLocale = removeUnicodeExtensions(locale);
+
+        // Step 2.b.
+        var availableLocale = BestAvailableLocale(availableLocales, noExtensionsLocale);
+
+        // Step 2.c.
+        if (availableLocale !== undefined) {
+            // Step 2.c.i.
+            result.locale = availableLocale;
+
+            // Step 2.c.ii.
+            if (locale !== noExtensionsLocale)
+                result.extension = getUnicodeExtensions(locale);
+
+            // Step 2.c.iii.
+            return result;
         }
-    } else {
-        result.locale = DefaultLocale();
     }
+
+    // Steps 3-4.
+    result.locale = DefaultLocale();
+
+    // Step 5.
     return result;
 }
 
@@ -823,73 +1501,73 @@ function BestFitMatcher(availableLocales, requestedLocales) {
 /**
  * Returns the Unicode extension value subtags for the requested key subtag.
  *
- * NOTE: PR to add UnicodeExtensionValue to ECMA-402 isn't yet written.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.5.
  */
 function UnicodeExtensionValue(extension, key) {
     assert(typeof extension === "string", "extension is a string value");
-    assert(function() {
-        var unicodeLocaleExtensionSequenceRE = getUnicodeLocaleExtensionSequenceRE();
-        var extensionMatch = regexp_exec_no_statics(unicodeLocaleExtensionSequenceRE, extension);
-        return extensionMatch !== null && extensionMatch[0] === extension;
-    }(), "extension is a Unicode extension subtag");
+    assert(callFunction(std_String_startsWith, extension, "-u-") &&
+           getUnicodeExtensions("und" + extension) === extension,
+           "extension is a Unicode extension subtag");
     assert(typeof key === "string", "key is a string value");
-    assert(key.length === 2, "key is a Unicode extension key subtag");
 
     // Step 1.
-    var size = extension.length;
+    assert(key.length === 2, "key is a Unicode extension key subtag");
 
     // Step 2.
-    var searchValue = "-" + key + "-";
+    var size = extension.length;
 
     // Step 3.
-    var pos = callFunction(std_String_indexOf, extension, searchValue);
+    var searchValue = "-" + key + "-";
 
     // Step 4.
+    var pos = callFunction(std_String_indexOf, extension, searchValue);
+
+    // Step 5.
     if (pos !== -1) {
-        // Step 4.a.
+        // Step 5.a.
         var start = pos + 4;
 
-        // Step 4.b.
+        // Step 5.b.
         var end = start;
 
-        // Step 4.c.
+        // Step 5.c.
         var k = start;
 
-        // Steps 4.d-e.
+        // Steps 5.d-e.
         while (true) {
-            // Step 4.e.i.
+            // Step 5.e.i.
             var e = callFunction(std_String_indexOf, extension, "-", k);
 
-            // Step 4.e.ii.
+            // Step 5.e.ii.
             var len = e === -1 ? size - k : e - k;
 
-            // Step 4.e.iii.
+            // Step 5.e.iii.
             if (len === 2)
                 break;
 
-            // Step 4.e.iv.
+            // Step 5.e.iv.
             if (e === -1) {
                 end = size;
                 break;
             }
 
-            // Step 4.e.v.
+            // Step 5.e.v.
             end = e;
             k = e + 1;
         }
 
-        // Step 4.f.
+        // Step 5.f.
         return callFunction(String_substring, extension, start, end);
     }
 
-    // Step 5.
+    // Step 6.
     searchValue = "-" + key;
 
-    // Steps 6-7.
+    // Steps 7-8.
     if (callFunction(std_String_endsWith, extension, searchValue))
         return "";
 
-    // Step 8 (implicit).
+    // Step 9 (implicit).
 }
 
 /**
@@ -899,11 +1577,9 @@ function UnicodeExtensionValue(extension, key) {
  * caller's relevant extensions and locale data as well as client-provided
  * options into consideration.
  *
- * Spec: ECMAScript Internationalization API Specification, 9.2.5.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.6.
  */
 function ResolveLocale(availableLocales, requestedLocales, options, relevantExtensionKeys, localeData) {
-    /*jshint laxbreak: true */
-
     // Steps 1-3.
     var matcher = options.localeMatcher;
     var r = (matcher === "lookup")
@@ -912,79 +1588,82 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
 
     // Step 4.
     var foundLocale = r.locale;
-
-    // Step 5 (Not applicable in this implementation).
     var extension = r.extension;
 
-    // Steps 6-7.
+    // Step 5.
     var result = new Record();
+
+    // Step 6.
     result.dataLocale = foundLocale;
 
-    // Step 8.
+    // Step 7.
     var supportedExtension = "-u";
 
     // In this implementation, localeData is a function, not an object.
     var localeDataProvider = localeData();
 
-    // Steps 9-12.
+    // Step 8.
     for (var i = 0; i < relevantExtensionKeys.length; i++) {
-        // Steps 12.a-c.
         var key = relevantExtensionKeys[i];
 
-        // Steps 12.b-d (The locale data is only computed when needed).
+        // Steps 8.a-h (The locale data is only computed when needed).
         var keyLocaleData = undefined;
         var value = undefined;
 
         // Locale tag may override.
 
-        // Step 12.e.
+        // Step 8.g.
         var supportedExtensionAddition = "";
 
-        // Step 12.f.
+        // Step 8.h.
         if (extension !== undefined) {
-            // NB: The step annotations don't yet match the ES2017 Intl draft,
-            // 94045d234762ad107a3d09bb6f7381a65f1a2f9b, because the PR to add
-            // the new UnicodeExtensionValue abstract operation still needs to
-            // be written.
-
-            // Step 12.f.i.
+            // Step 8.h.i.
             var requestedValue = UnicodeExtensionValue(extension, key);
 
-            // Step 12.f.ii.
+            // Step 8.h.ii.
             if (requestedValue !== undefined) {
-                // Steps 12.b-c.
+                // Steps 8.a-d.
                 keyLocaleData = callFunction(localeDataProvider[key], null, foundLocale);
 
-                // Step 12.f.ii.1.
+                // Step 8.h.ii.1.
                 if (requestedValue !== "") {
-                    // Step 12.f.ii.1.a.
+                    // Step 8.h.ii.1.a.
                     if (callFunction(ArrayIndexOf, keyLocaleData, requestedValue) !== -1) {
                         value = requestedValue;
                         supportedExtensionAddition = "-" + key + "-" + value;
                     }
                 } else {
-                    // Step 12.f.ii.2.
+                    // Step 8.h.ii.2.
 
                     // According to the LDML spec, if there's no type value,
                     // and true is an allowed value, it's used.
 
-                    if (callFunction(ArrayIndexOf, keyLocaleData, "true") !== -1)
+                    if (callFunction(ArrayIndexOf, keyLocaleData, "true") !== -1) {
                         value = "true";
+                        supportedExtensionAddition = "-" + key;
+                    }
                 }
             }
         }
 
         // Options override all.
 
-        // Step 12.g.i.
+        // Step 8.i.i.
         var optionsValue = options[key];
 
-        // Step 12.g, 12.gg.ii.
+        // Step 8.i.ii.
+        assert(typeof optionsValue === "string" ||
+               optionsValue === undefined ||
+               optionsValue === null,
+               "unexpected type for options value");
+
+        // Steps 8.i, 8.i.iii.1.
         if (optionsValue !== undefined && optionsValue !== value) {
-            // Steps 12.b-c.
+            // Steps 8.a-d.
             if (keyLocaleData === undefined)
                 keyLocaleData = callFunction(localeDataProvider[key], null, foundLocale);
 
+            // Step 8.i.iii.
             if (callFunction(ArrayIndexOf, keyLocaleData, optionsValue) !== -1) {
                 value = optionsValue;
                 supportedExtensionAddition = "";
@@ -993,27 +1672,29 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
 
         // Locale data provides default value.
         if (value === undefined) {
-            // Steps 12.b-d.
+            // Steps 8.a-f.
             value = keyLocaleData === undefined
                     ? callFunction(localeDataProvider.default[key], null, foundLocale)
                     : keyLocaleData[0];
         }
 
-        // Steps 12.h-j.
+        // Step 8.j.
         assert(typeof value === "string" || value === null, "unexpected locale data value");
         result[key] = value;
+
+        // Step 8.k.
         supportedExtension += supportedExtensionAddition;
     }
 
-    // Step 13.
+    // Step 9.
     if (supportedExtension.length > 2) {
         assert(!callFunction(std_String_startsWith, foundLocale, "x-"),
                "unexpected privateuse-only locale returned from ICU");
 
-        // Step 13.a.
+        // Step 9.a.
         var privateIndex = callFunction(std_String_indexOf, foundLocale, "-x-");
 
-        // Steps 13.b-c.
+        // Steps 9.b-c.
         if (privateIndex === -1) {
             foundLocale += supportedExtension;
         } else {
@@ -1022,19 +1703,19 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
             foundLocale = preExtension + supportedExtension + postExtension;
         }
 
-        // Step 13.d.
+        // Step 9.d.
         assert(IsStructurallyValidLanguageTag(foundLocale), "invalid locale after concatenation");
 
-        // Step 13.e (Not required in this implementation, because we don't
+        // Step 9.e (Not required in this implementation, because we don't
         // canonicalize Unicode extension subtags).
         assert(foundLocale === CanonicalizeLanguageTag(foundLocale), "same locale with extension");
 
     }
 
-    // Step 14.
+    // Step 10.
     result.locale = foundLocale;
 
-    // Step 15.
+    // Step 11.
     return result;
 }
 
@@ -1044,31 +1725,29 @@ function ResolveLocale(availableLocales, requestedLocales, options, relevantExte
  * matching (possibly fallback) locale. Locales appear in the same order in the
  * returned list as in the input list.
  *
- * Spec: ECMAScript Internationalization API Specification, 9.2.6.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.7.
  */
 function LookupSupportedLocales(availableLocales, requestedLocales) {
-    // Steps 1-2.
-    var len = requestedLocales.length;
-    var subset = new List();
+    // Step 1.
+    var subset = [];
 
-    // Steps 3-4.
-    var k = 0;
-    while (k < len) {
-        // Steps 4.a-b.
-        var locale = requestedLocales[k];
+    // Step 2.
+    for (var i = 0; i < requestedLocales.length; i++) {
+        var locale = requestedLocales[i];
+
+        // Step 2.a.
         var noExtensionsLocale = removeUnicodeExtensions(locale);
 
-        // Step 4.c-d.
+        // Step 2.b.
         var availableLocale = BestAvailableLocale(availableLocales, noExtensionsLocale);
-        if (availableLocale !== undefined)
-            callFunction(std_Array_push, subset, locale);
 
-        // Step 4.e.
-        k++;
+        // Step 2.c.
+        if (availableLocale !== undefined)
+            _DefineDataProperty(subset, subset.length, locale);
     }
 
-    // Steps 5-6.
-    return callFunction(std_Array_slice, subset, 0);
+    // Step 3.
+    return subset;
 }
 
 
@@ -1077,7 +1756,7 @@ function LookupSupportedLocales(availableLocales, requestedLocales) {
  * matching (possibly fallback) locale. Locales appear in the same order in the
  * returned list as in the input list.
  *
- * Spec: ECMAScript Internationalization API Specification, 9.2.7.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.8.
  */
 function BestFitSupportedLocales(availableLocales, requestedLocales) {
     // don't have anything better
@@ -1090,19 +1769,17 @@ function BestFitSupportedLocales(availableLocales, requestedLocales) {
  * matching (possibly fallback) locale. Locales appear in the same order in the
  * returned list as in the input list.
  *
- * Spec: ECMAScript Internationalization API Specification, 9.2.8.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.9.
  */
 function SupportedLocales(availableLocales, requestedLocales, options) {
-    /*jshint laxbreak: true */
-
     // Step 1.
     var matcher;
     if (options !== undefined) {
-        // Steps 1.a-b.
+        // Step 1.a.
         options = ToObject(options);
-        matcher = options.localeMatcher;
 
-        // Step 1.c.
+        // Step 1.b
+        matcher = options.localeMatcher;
         if (matcher !== undefined) {
             matcher = ToString(matcher);
             if (matcher !== "lookup" && matcher !== "best fit")
@@ -1110,12 +1787,12 @@ function SupportedLocales(availableLocales, requestedLocales, options) {
         }
     }
 
-    // Steps 2-3.
+    // Steps 2-5.
     var subset = (matcher === undefined || matcher === "best fit")
                  ? BestFitSupportedLocales(availableLocales, requestedLocales)
                  : LookupSupportedLocales(availableLocales, requestedLocales);
 
-    // Step 4.
+    // Steps 6-7.
     for (var i = 0; i < subset.length; i++) {
         _DefineDataProperty(subset, i, subset[i],
                             ATTR_ENUMERABLE | ATTR_NONCONFIGURABLE | ATTR_NONWRITABLE);
@@ -1123,7 +1800,7 @@ function SupportedLocales(availableLocales, requestedLocales, options) {
     _DefineDataProperty(subset, "length", subset.length,
                         ATTR_NONENUMERABLE | ATTR_NONCONFIGURABLE | ATTR_NONWRITABLE);
 
-    // Step 5.
+    // Step 8.
     return subset;
 }
 
@@ -1133,7 +1810,7 @@ function SupportedLocales(availableLocales, requestedLocales, options) {
  * the required type, checks whether it is one of a list of allowed values,
  * and fills in a fallback value if necessary.
  *
- * Spec: ECMAScript Internationalization API Specification, 9.2.9.
+ * Spec: ECMAScript Internationalization API Specification, 9.2.10.
  */
 function GetOption(options, property, type, values, fallback) {
     // Step 1.
diff --git a/js/src/builtin/intl/DateTimeFormat.js b/js/src/builtin/intl/DateTimeFormat.js
index 4de3c084f2..a4feb50aa6 100644
--- a/js/src/builtin/intl/DateTimeFormat.js
+++ b/js/src/builtin/intl/DateTimeFormat.js
@@ -53,9 +53,10 @@ function resolveDateTimeFormatInternals(lazyDateTimeFormatData) {
     // never a subset of them.
 
     var internalProps = std_Object_create(null);
+    
+    var DateTimeFormat = dateTimeFormatInternalProperties;
 
     // Compute effective locale.
-    var DateTimeFormat = dateTimeFormatInternalProperties;
 
     // Step 10.
     var localeData = DateTimeFormat.localeData;
@@ -73,7 +74,7 @@ function resolveDateTimeFormatInternals(lazyDateTimeFormatData) {
     internalProps.numberingSystem = r.nu;
 
     // Compute formatting options.
-    // Step 16.
+    // Step 14.
     var dataLocale = r.dataLocale;
 
     // Steps 20.
@@ -119,8 +120,6 @@ function resolveDateTimeFormatInternals(lazyDateTimeFormatData) {
     // Step 31.
     internalProps.pattern = pattern;
 
-    internalProps.boundFormat = undefined;
-
     // The caller is responsible for associating |internalProps| with the right
     // object using |setInternalProperties|.
     return internalProps;
@@ -297,23 +296,25 @@ function DefaultTimeZone() {
 
 
 /**
- * UnwrapDateTimeFormat(dtf)
+ * 12.1.10 UnwrapDateTimeFormat( dtf )
  */
 function UnwrapDateTimeFormat(dtf, methodName) {
-    // Step 1.
+    // Step 1 (not applicable in our implementation).
+
+    // Step 2.
     if ((!IsObject(dtf) || !IsDateTimeFormat(dtf)) &&
         dtf instanceof GetDateTimeFormatConstructor())
     {
         dtf = dtf[intlFallbackSymbol()];
     }
 
-    // Step 2.
+    // Step 3.
     if (!IsObject(dtf) || !IsDateTimeFormat(dtf)) {
         ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "DateTimeFormat", methodName,
                        "DateTimeFormat");
     }
 
-    // Step 3.
+    // Step 4.
     return dtf;
 }
 
@@ -334,9 +335,6 @@ function InitializeDateTimeFormat(dateTimeFormat, thisValue, locales, options, m
     assert(IsDateTimeFormat(dateTimeFormat),
            "InitializeDateTimeFormat called with non-DateTimeFormat");
 
-    // Steps 1-2 (These steps are no longer required and should be removed
-    // from the spec; https://github.com/tc39/ecma402/issues/115).
-
     // Lazy DateTimeFormat data has the following structure:
     //
     //   {
@@ -471,6 +469,8 @@ function InitializeDateTimeFormat(dateTimeFormat, thisValue, locales, options, m
     initializeIntlObject(dateTimeFormat, "DateTimeFormat", lazyDateTimeFormatData);
 
     // 12.2.1, steps 4-5.
+    // TODO: spec issue - The current spec doesn't have the IsObject check,
+    // which means |Intl.DateTimeFormat.call(null)| is supposed to throw here.
     if (dateTimeFormat !== thisValue && thisValue instanceof GetDateTimeFormatConstructor()) {
         if (!IsObject(thisValue))
             ThrowTypeError(JSMSG_NOT_NONNULL_OBJECT, typeof thisValue);
@@ -687,17 +687,19 @@ function ToDateTimeOptions(options, required, defaults) {
     assert(typeof required === "string", "ToDateTimeOptions");
     assert(typeof defaults === "string", "ToDateTimeOptions");
 
-    // Steps 1-3.
+    // Steps 1-2.
     if (options === undefined)
         options = null;
     else
         options = ToObject(options);
     options = std_Object_create(options);
 
-    // Step 4.
+    // Step 3.
     var needDefaults = true;
 
-    // Step 5.
+    // Step 4.
+    // TODO: spec issue - The spec requires to retrieve all options, so using
+    // the ||-operator with its lazy evaluation semantics is incorrect.
     if ((required === "date" || required === "any") &&
         (options.weekday !== undefined || options.year !== undefined ||
          options.month !== undefined || options.day !== undefined))
@@ -705,7 +707,9 @@ function ToDateTimeOptions(options, required, defaults) {
         needDefaults = false;
     }
 
-    // Step 6.
+    // Step 5.
+    // TODO: spec issue - The spec requires to retrieve all options, so using
+    // the ||-operator with its lazy evaluation semantics is incorrect.
     if ((required === "time" || required === "any") &&
         (options.hour !== undefined || options.minute !== undefined ||
          options.second !== undefined))
@@ -713,7 +717,7 @@ function ToDateTimeOptions(options, required, defaults) {
         needDefaults = false;
     }
 
-    // Step 7.
+    // Step 6.
     if (needDefaults && (defaults === "date" || defaults === "all")) {
         // The specification says to call [[DefineOwnProperty]] with false for
         // the Throw parameter, while Object.defineProperty uses true. For the
@@ -724,7 +728,7 @@ function ToDateTimeOptions(options, required, defaults) {
         _DefineDataProperty(options, "day", "numeric");
     }
 
-    // Step 8.
+    // Step 7.
     if (needDefaults && (defaults === "time" || defaults === "all")) {
         // See comment for step 7.
         _DefineDataProperty(options, "hour", "numeric");
@@ -732,7 +736,7 @@ function ToDateTimeOptions(options, required, defaults) {
         _DefineDataProperty(options, "second", "numeric");
     }
 
-    // Step 9.
+    // Step 8.
     return options;
 }
 
@@ -842,14 +846,19 @@ function BestFitFormatMatcher(options, formats) {
  * matching (possibly fallback) locale. Locales appear in the same order in the
  * returned list as in the input list.
  *
- * Spec: ECMAScript Internationalization API Specification, 12.2.2.
+ * Spec: ECMAScript Internationalization API Specification, 12.3.2.
  */
 function Intl_DateTimeFormat_supportedLocalesOf(locales /*, options*/) {
     var options = arguments.length > 1 ? arguments[1] : undefined;
 
+    // Step 1.
     var availableLocales = callFunction(dateTimeFormatInternalProperties.availableLocales,
                                         dateTimeFormatInternalProperties);
+
+    // Step 2.
     var requestedLocales = CanonicalizeLocaleList(locales);
+
+    // Step 3.
     return SupportedLocales(availableLocales, requestedLocales, options);
 }
 
@@ -857,7 +866,7 @@ function Intl_DateTimeFormat_supportedLocalesOf(locales /*, options*/) {
 /**
  * DateTimeFormat internal properties.
  *
- * Spec: ECMAScript Internationalization API Specification, 9.1 and 12.2.3.
+ * Spec: ECMAScript Internationalization API Specification, 9.1 and 12.3.3.
  */
 var dateTimeFormatInternalProperties = {
     localeData: dateTimeFormatLocaleData,
@@ -897,7 +906,7 @@ function dateTimeFormatLocaleData() {
 /**
  * Function to be bound and returned by Intl.DateTimeFormat.prototype.format.
  *
- * Spec: ECMAScript Internationalization API Specification, 12.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 12.1.5.
  */
 function dateTimeFormatFormatToBind() {
     // Steps 1.a.i-ii
@@ -913,7 +922,7 @@ function dateTimeFormatFormatToBind() {
  * representing the result of calling ToNumber(date) according to the
  * effective locale and the formatting options of this DateTimeFormat.
  *
- * Spec: ECMAScript Internationalization API Specification, 12.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 12.4.3.
  */
 function Intl_DateTimeFormat_format_get() {
     // Steps 1-3.
@@ -923,12 +932,11 @@ function Intl_DateTimeFormat_format_get() {
 
     // Step 4.
     if (internals.boundFormat === undefined) {
-        // Step 4.a.
-        var F = dateTimeFormatFormatToBind;
+        // Steps 4.a-b.
+        var F = callFunction(FunctionBind, dateTimeFormatFormatToBind, dtf);
 
-        // Steps 4.b-d.
-        var bf = callFunction(FunctionBind, F, dtf);
-        internals.boundFormat = bf;
+        // Step 4.c.
+        internals.boundFormat = F;
     }
 
     // Step 5.
@@ -937,6 +945,11 @@ function Intl_DateTimeFormat_format_get() {
 _SetCanonicalName(Intl_DateTimeFormat_format_get, "get format");
 
 
+/**
+ * Intl.DateTimeFormat.prototype.formatToParts ( date )
+ *
+ * Spec: ECMAScript Internationalization API Specification, 12.4.4.
+ */
 function Intl_DateTimeFormat_formatToParts() {
     // Steps 1-3.
     var dtf = UnwrapDateTimeFormat(this, "formatToParts");
@@ -956,14 +969,15 @@ function Intl_DateTimeFormat_formatToParts() {
 /**
  * Returns the resolved options for a DateTimeFormat object.
  *
- * Spec: ECMAScript Internationalization API Specification, 12.3.3 and 12.4.
+ * Spec: ECMAScript Internationalization API Specification, 12.4.5.
  */
 function Intl_DateTimeFormat_resolvedOptions() {
-    // Invoke |UnwrapDateTimeFormat| per introduction of section 12.3.
+    // Steps 1-3.
     var dtf = UnwrapDateTimeFormat(this, "resolvedOptions");
 
     var internals = getDateTimeFormatInternals(dtf);
 
+    // Steps 4-5.
     var result = {
         locale: internals.locale,
         calendar: internals.calendar,
@@ -981,6 +995,8 @@ function Intl_DateTimeFormat_resolvedOptions() {
     }
 
     resolveICUPattern(internals.pattern, result);
+
+    // Step 6.
     return result;
 }
 
diff --git a/js/src/builtin/intl/LangTagMappingsGenerated.js b/js/src/builtin/intl/LangTagMappingsGenerated.js
index 269cf9f93a..83a8ff8f60 100644
--- a/js/src/builtin/intl/LangTagMappingsGenerated.js
+++ b/js/src/builtin/intl/LangTagMappingsGenerated.js
@@ -1,382 +1,1246 @@
 // Generated by make_intl_data.py. DO NOT EDIT.
 
-// Mappings from complete tags to preferred values.
-// Derived from IANA Language Subtag Registry, file date 2016-10-12.
-// http://www.iana.org/assignments/language-subtag-registry
-var langTagMappings = {
+// Mappings from grandfathered tags to preferred values.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+var grandfatheredMappings = {
     "art-lojban": "jbo",
-    "cel-gaulish": "cel-gaulish",
-    "en-gb-oed": "en-GB-oxendict",
-    "i-ami": "ami",
-    "i-bnn": "bnn",
-    "i-default": "i-default",
-    "i-enochian": "i-enochian",
-    "i-hak": "hak",
-    "i-klingon": "tlh",
-    "i-lux": "lb",
-    "i-mingo": "i-mingo",
-    "i-navajo": "nv",
-    "i-pwn": "pwn",
-    "i-tao": "tao",
-    "i-tay": "tay",
-    "i-tsu": "tsu",
-    "ja-latn-hepburn-heploc": "ja-Latn-alalc97",
-    "no-bok": "nb",
-    "no-nyn": "nn",
-    "sgn-be-fr": "sfb",
-    "sgn-be-nl": "vgt",
-    "sgn-br": "bzs",
-    "sgn-ch-de": "sgg",
-    "sgn-co": "csn",
-    "sgn-de": "gsg",
-    "sgn-dk": "dsl",
-    "sgn-es": "ssp",
-    "sgn-fr": "fsl",
-    "sgn-gb": "bfi",
-    "sgn-gr": "gss",
-    "sgn-ie": "isg",
-    "sgn-it": "ise",
-    "sgn-jp": "jsl",
-    "sgn-mx": "mfs",
-    "sgn-ni": "ncs",
-    "sgn-nl": "dse",
-    "sgn-no": "nsl",
-    "sgn-pt": "psr",
-    "sgn-se": "swl",
-    "sgn-us": "ase",
-    "sgn-za": "sfs",
-    "zh-cmn": "cmn",
-    "zh-cmn-hans": "cmn-Hans",
-    "zh-cmn-hant": "cmn-Hant",
-    "zh-gan": "gan",
-    "zh-guoyu": "cmn",
+    "cel-gaulish": "xtg-x-cel-gaulish",
+    "zh-guoyu": "zh",
     "zh-hakka": "hak",
-    "zh-min": "zh-min",
-    "zh-min-nan": "nan",
-    "zh-wuu": "wuu",
     "zh-xiang": "hsn",
-    "zh-yue": "yue",
 };
 
-// Mappings from non-extlang subtags to preferred values.
-// Derived from IANA Language Subtag Registry, file date 2016-10-12.
-// http://www.iana.org/assignments/language-subtag-registry
-var langSubtagMappings = {
-    "BU": "MM",
-    "DD": "DE",
-    "FX": "FR",
-    "TP": "TL",
-    "YD": "YE",
-    "ZR": "CD",
+// Mappings from language subtags to preferred values.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+var languageMappings = {
     "aam": "aas",
+    "aar": "aa",
+    "abk": "ab",
     "adp": "dz",
+    "afr": "af",
+    "aju": "jrb",
+    "aka": "ak",
+    "alb": "sq",
+    "als": "sq",
+    "amh": "am",
+    "ara": "ar",
+    "arb": "ar",
+    "arg": "an",
+    "arm": "hy",
+    "asd": "snz",
+    "asm": "as",
     "aue": "ktz",
+    "ava": "av",
+    "ave": "ae",
+    "aym": "ay",
+    "ayr": "ay",
     "ayx": "nun",
+    "aze": "az",
+    "azj": "az",
+    "bak": "ba",
+    "bam": "bm",
+    "baq": "eu",
+    "bcc": "bal",
+    "bcl": "bik",
+    "bel": "be",
+    "ben": "bn",
     "bgm": "bcg",
+    "bh": "bho",
+    "bih": "bho",
+    "bis": "bi",
     "bjd": "drl",
+    "bod": "bo",
+    "bos": "bs",
+    "bre": "br",
+    "bul": "bg",
+    "bur": "my",
+    "bxk": "luy",
+    "bxr": "bua",
+    "cat": "ca",
     "ccq": "rki",
+    "ces": "cs",
+    "cha": "ch",
+    "che": "ce",
+    "chi": "zh",
+    "chu": "cu",
+    "chv": "cv",
     "cjr": "mom",
     "cka": "cmr",
+    "cld": "syr",
     "cmk": "xch",
+    "cmn": "zh",
+    "cor": "kw",
+    "cos": "co",
     "coy": "pij",
     "cqu": "quh",
-    "drh": "khk",
-    "drw": "prs",
+    "cre": "cr",
+    "cwd": "cr",
+    "cym": "cy",
+    "cze": "cs",
+    "dan": "da",
+    "deu": "de",
+    "dgo": "doi",
+    "dhd": "mwr",
+    "dik": "din",
+    "diq": "zza",
+    "dit": "dif",
+    "div": "dv",
+    "drh": "mn",
+    "dut": "nl",
+    "dzo": "dz",
+    "ekk": "et",
+    "ell": "el",
+    "emk": "man",
+    "eng": "en",
+    "epo": "eo",
+    "esk": "ik",
+    "est": "et",
+    "eus": "eu",
+    "ewe": "ee",
+    "fao": "fo",
+    "fas": "fa",
+    "fat": "ak",
+    "fij": "fj",
+    "fin": "fi",
+    "fra": "fr",
+    "fre": "fr",
+    "fry": "fy",
+    "fuc": "ff",
+    "ful": "ff",
     "gav": "dev",
+    "gaz": "om",
+    "gbo": "grb",
+    "geo": "ka",
+    "ger": "de",
     "gfx": "vaj",
     "ggn": "gvr",
+    "gla": "gd",
+    "gle": "ga",
+    "glg": "gl",
+    "glv": "gv",
+    "gno": "gon",
+    "gre": "el",
+    "grn": "gn",
     "gti": "nyc",
+    "gug": "gn",
+    "guj": "gu",
     "guv": "duz",
+    "gya": "gba",
+    "hat": "ht",
+    "hau": "ha",
+    "hdn": "hai",
+    "hea": "hmn",
+    "heb": "he",
+    "her": "hz",
+    "him": "srx",
+    "hin": "hi",
+    "hmo": "ho",
     "hrr": "jal",
+    "hrv": "hr",
+    "hun": "hu",
+    "hye": "hy",
     "ibi": "opa",
+    "ibo": "ig",
+    "ice": "is",
+    "ido": "io",
+    "iii": "ii",
+    "ike": "iu",
+    "iku": "iu",
+    "ile": "ie",
     "ilw": "gal",
     "in": "id",
+    "ina": "ia",
+    "ind": "id",
+    "ipk": "ik",
+    "isl": "is",
+    "ita": "it",
     "iw": "he",
+    "jav": "jv",
+    "jeg": "oyb",
     "ji": "yi",
+    "jpn": "ja",
     "jw": "jv",
+    "kal": "kl",
+    "kan": "kn",
+    "kas": "ks",
+    "kat": "ka",
+    "kau": "kr",
+    "kaz": "kk",
     "kgc": "tdf",
     "kgh": "kml",
+    "khk": "mn",
+    "khm": "km",
+    "kik": "ki",
+    "kin": "rw",
+    "kir": "ky",
+    "kmr": "ku",
+    "knc": "kr",
+    "kng": "kg",
+    "knn": "kok",
     "koj": "kwv",
+    "kom": "kv",
+    "kon": "kg",
+    "kor": "ko",
+    "kpv": "kv",
+    "krm": "bmf",
     "ktr": "dtp",
+    "kua": "kj",
+    "kur": "ku",
     "kvs": "gdj",
     "kwq": "yam",
     "kxe": "tvd",
     "kzj": "dtp",
     "kzt": "dtp",
+    "lao": "lo",
+    "lat": "la",
+    "lav": "lv",
+    "lbk": "bnc",
     "lii": "raq",
+    "lim": "li",
+    "lin": "ln",
+    "lit": "lt",
+    "llo": "ngt",
     "lmm": "rmx",
+    "ltz": "lb",
+    "lub": "lu",
+    "lug": "lg",
+    "lvs": "lv",
+    "mac": "mk",
+    "mah": "mh",
+    "mal": "ml",
+    "mao": "mi",
+    "mar": "mr",
+    "may": "ms",
     "meg": "cir",
+    "mhr": "chm",
+    "mkd": "mk",
+    "mlg": "mg",
+    "mlt": "mt",
+    "mnk": "man",
     "mo": "ro",
+    "mol": "ro",
+    "mon": "mn",
+    "mri": "mi",
+    "msa": "ms",
     "mst": "mry",
+    "mup": "raj",
     "mwj": "vaj",
+    "mya": "my",
+    "myd": "aog",
     "myt": "mry",
     "nad": "xny",
+    "nau": "na",
+    "nav": "nv",
+    "nbl": "nr",
+    "ncp": "kdz",
+    "nde": "nd",
+    "ndo": "ng",
+    "nep": "ne",
+    "nld": "nl",
+    "nno": "nn",
+    "nns": "nbr",
     "nnx": "ngv",
+    "no": "nb",
+    "nob": "nb",
+    "nor": "nb",
+    "npi": "ne",
     "nts": "pij",
+    "nya": "ny",
+    "oci": "oc",
+    "ojg": "oj",
+    "oji": "oj",
+    "ori": "or",
+    "orm": "om",
+    "ory": "or",
+    "oss": "os",
     "oun": "vaj",
+    "pan": "pa",
+    "pbu": "ps",
     "pcr": "adx",
+    "per": "fa",
+    "pes": "fa",
+    "pli": "pi",
+    "plt": "mg",
     "pmc": "huw",
     "pmu": "phr",
+    "pnb": "lah",
+    "pol": "pl",
+    "por": "pt",
     "ppa": "bfy",
     "ppr": "lcq",
     "pry": "prt",
+    "pus": "ps",
     "puz": "pub",
+    "que": "qu",
+    "quz": "qu",
+    "rmy": "rom",
+    "roh": "rm",
+    "ron": "ro",
+    "rum": "ro",
+    "run": "rn",
+    "rus": "ru",
+    "sag": "sg",
+    "san": "sa",
     "sca": "hle",
+    "scc": "sr",
+    "scr": "hr",
+    "sin": "si",
+    "skk": "oyb",
+    "slk": "sk",
+    "slo": "sk",
+    "slv": "sl",
+    "sme": "se",
+    "smo": "sm",
+    "sna": "sn",
+    "snd": "sd",
+    "som": "so",
+    "sot": "st",
+    "spa": "es",
+    "spy": "kln",
+    "sqi": "sq",
+    "src": "sc",
+    "srd": "sc",
+    "srp": "sr",
+    "ssw": "ss",
+    "sun": "su",
+    "swa": "sw",
+    "swe": "sv",
+    "swh": "sw",
+    "tah": "ty",
+    "tam": "ta",
+    "tat": "tt",
     "tdu": "dtp",
+    "tel": "te",
+    "tgk": "tg",
+    "tgl": "fil",
+    "tha": "th",
     "thc": "tpo",
     "thx": "oyb",
+    "tib": "bo",
     "tie": "ras",
+    "tir": "ti",
     "tkk": "twm",
+    "tl": "fil",
     "tlw": "weo",
     "tmp": "tyj",
     "tne": "kak",
-    "tnf": "prs",
+    "ton": "to",
     "tsf": "taj",
+    "tsn": "tn",
+    "tso": "ts",
+    "ttq": "tmh",
+    "tuk": "tk",
+    "tur": "tr",
+    "tw": "ak",
+    "twi": "ak",
+    "uig": "ug",
+    "ukr": "uk",
+    "umu": "del",
     "uok": "ema",
+    "urd": "ur",
+    "uzb": "uz",
+    "uzn": "uz",
+    "ven": "ve",
+    "vie": "vi",
+    "vol": "vo",
+    "wel": "cy",
+    "wln": "wa",
+    "wol": "wo",
     "xba": "cax",
+    "xho": "xh",
     "xia": "acn",
     "xkh": "waw",
+    "xpe": "kpe",
     "xsj": "suj",
+    "xsl": "den",
     "ybd": "rki",
+    "ydd": "yi",
+    "yid": "yi",
     "yma": "lrr",
     "ymt": "mtm",
+    "yor": "yo",
     "yos": "zom",
     "yuu": "yug",
+    "zai": "zap",
+    "zha": "za",
+    "zho": "zh",
+    "zsm": "ms",
+    "zul": "zu",
+    "zyb": "za",
 };
 
-// Mappings from extlang subtags to preferred values.
-// Derived from IANA Language Subtag Registry, file date 2016-10-12.
-// http://www.iana.org/assignments/language-subtag-registry
-var extlangMappings = {
-    "aao": {preferred: "aao", prefix: "ar"},
-    "abh": {preferred: "abh", prefix: "ar"},
-    "abv": {preferred: "abv", prefix: "ar"},
-    "acm": {preferred: "acm", prefix: "ar"},
-    "acq": {preferred: "acq", prefix: "ar"},
-    "acw": {preferred: "acw", prefix: "ar"},
-    "acx": {preferred: "acx", prefix: "ar"},
-    "acy": {preferred: "acy", prefix: "ar"},
-    "adf": {preferred: "adf", prefix: "ar"},
-    "ads": {preferred: "ads", prefix: "sgn"},
-    "aeb": {preferred: "aeb", prefix: "ar"},
-    "aec": {preferred: "aec", prefix: "ar"},
-    "aed": {preferred: "aed", prefix: "sgn"},
-    "aen": {preferred: "aen", prefix: "sgn"},
-    "afb": {preferred: "afb", prefix: "ar"},
-    "afg": {preferred: "afg", prefix: "sgn"},
-    "ajp": {preferred: "ajp", prefix: "ar"},
-    "apc": {preferred: "apc", prefix: "ar"},
-    "apd": {preferred: "apd", prefix: "ar"},
-    "arb": {preferred: "arb", prefix: "ar"},
-    "arq": {preferred: "arq", prefix: "ar"},
-    "ars": {preferred: "ars", prefix: "ar"},
-    "ary": {preferred: "ary", prefix: "ar"},
-    "arz": {preferred: "arz", prefix: "ar"},
-    "ase": {preferred: "ase", prefix: "sgn"},
-    "asf": {preferred: "asf", prefix: "sgn"},
-    "asp": {preferred: "asp", prefix: "sgn"},
-    "asq": {preferred: "asq", prefix: "sgn"},
-    "asw": {preferred: "asw", prefix: "sgn"},
-    "auz": {preferred: "auz", prefix: "ar"},
-    "avl": {preferred: "avl", prefix: "ar"},
-    "ayh": {preferred: "ayh", prefix: "ar"},
-    "ayl": {preferred: "ayl", prefix: "ar"},
-    "ayn": {preferred: "ayn", prefix: "ar"},
-    "ayp": {preferred: "ayp", prefix: "ar"},
-    "bbz": {preferred: "bbz", prefix: "ar"},
-    "bfi": {preferred: "bfi", prefix: "sgn"},
-    "bfk": {preferred: "bfk", prefix: "sgn"},
-    "bjn": {preferred: "bjn", prefix: "ms"},
-    "bog": {preferred: "bog", prefix: "sgn"},
-    "bqn": {preferred: "bqn", prefix: "sgn"},
-    "bqy": {preferred: "bqy", prefix: "sgn"},
-    "btj": {preferred: "btj", prefix: "ms"},
-    "bve": {preferred: "bve", prefix: "ms"},
-    "bvl": {preferred: "bvl", prefix: "sgn"},
-    "bvu": {preferred: "bvu", prefix: "ms"},
-    "bzs": {preferred: "bzs", prefix: "sgn"},
-    "cdo": {preferred: "cdo", prefix: "zh"},
-    "cds": {preferred: "cds", prefix: "sgn"},
-    "cjy": {preferred: "cjy", prefix: "zh"},
-    "cmn": {preferred: "cmn", prefix: "zh"},
-    "coa": {preferred: "coa", prefix: "ms"},
-    "cpx": {preferred: "cpx", prefix: "zh"},
-    "csc": {preferred: "csc", prefix: "sgn"},
-    "csd": {preferred: "csd", prefix: "sgn"},
-    "cse": {preferred: "cse", prefix: "sgn"},
-    "csf": {preferred: "csf", prefix: "sgn"},
-    "csg": {preferred: "csg", prefix: "sgn"},
-    "csl": {preferred: "csl", prefix: "sgn"},
-    "csn": {preferred: "csn", prefix: "sgn"},
-    "csq": {preferred: "csq", prefix: "sgn"},
-    "csr": {preferred: "csr", prefix: "sgn"},
-    "czh": {preferred: "czh", prefix: "zh"},
-    "czo": {preferred: "czo", prefix: "zh"},
-    "doq": {preferred: "doq", prefix: "sgn"},
-    "dse": {preferred: "dse", prefix: "sgn"},
-    "dsl": {preferred: "dsl", prefix: "sgn"},
-    "dup": {preferred: "dup", prefix: "ms"},
-    "ecs": {preferred: "ecs", prefix: "sgn"},
-    "esl": {preferred: "esl", prefix: "sgn"},
-    "esn": {preferred: "esn", prefix: "sgn"},
-    "eso": {preferred: "eso", prefix: "sgn"},
-    "eth": {preferred: "eth", prefix: "sgn"},
-    "fcs": {preferred: "fcs", prefix: "sgn"},
-    "fse": {preferred: "fse", prefix: "sgn"},
-    "fsl": {preferred: "fsl", prefix: "sgn"},
-    "fss": {preferred: "fss", prefix: "sgn"},
-    "gan": {preferred: "gan", prefix: "zh"},
-    "gds": {preferred: "gds", prefix: "sgn"},
-    "gom": {preferred: "gom", prefix: "kok"},
-    "gse": {preferred: "gse", prefix: "sgn"},
-    "gsg": {preferred: "gsg", prefix: "sgn"},
-    "gsm": {preferred: "gsm", prefix: "sgn"},
-    "gss": {preferred: "gss", prefix: "sgn"},
-    "gus": {preferred: "gus", prefix: "sgn"},
-    "hab": {preferred: "hab", prefix: "sgn"},
-    "haf": {preferred: "haf", prefix: "sgn"},
-    "hak": {preferred: "hak", prefix: "zh"},
-    "hds": {preferred: "hds", prefix: "sgn"},
-    "hji": {preferred: "hji", prefix: "ms"},
-    "hks": {preferred: "hks", prefix: "sgn"},
-    "hos": {preferred: "hos", prefix: "sgn"},
-    "hps": {preferred: "hps", prefix: "sgn"},
-    "hsh": {preferred: "hsh", prefix: "sgn"},
-    "hsl": {preferred: "hsl", prefix: "sgn"},
-    "hsn": {preferred: "hsn", prefix: "zh"},
-    "icl": {preferred: "icl", prefix: "sgn"},
-    "iks": {preferred: "iks", prefix: "sgn"},
-    "ils": {preferred: "ils", prefix: "sgn"},
-    "inl": {preferred: "inl", prefix: "sgn"},
-    "ins": {preferred: "ins", prefix: "sgn"},
-    "ise": {preferred: "ise", prefix: "sgn"},
-    "isg": {preferred: "isg", prefix: "sgn"},
-    "isr": {preferred: "isr", prefix: "sgn"},
-    "jak": {preferred: "jak", prefix: "ms"},
-    "jax": {preferred: "jax", prefix: "ms"},
-    "jcs": {preferred: "jcs", prefix: "sgn"},
-    "jhs": {preferred: "jhs", prefix: "sgn"},
-    "jls": {preferred: "jls", prefix: "sgn"},
-    "jos": {preferred: "jos", prefix: "sgn"},
-    "jsl": {preferred: "jsl", prefix: "sgn"},
-    "jus": {preferred: "jus", prefix: "sgn"},
-    "kgi": {preferred: "kgi", prefix: "sgn"},
-    "knn": {preferred: "knn", prefix: "kok"},
-    "kvb": {preferred: "kvb", prefix: "ms"},
-    "kvk": {preferred: "kvk", prefix: "sgn"},
-    "kvr": {preferred: "kvr", prefix: "ms"},
-    "kxd": {preferred: "kxd", prefix: "ms"},
-    "lbs": {preferred: "lbs", prefix: "sgn"},
-    "lce": {preferred: "lce", prefix: "ms"},
-    "lcf": {preferred: "lcf", prefix: "ms"},
-    "liw": {preferred: "liw", prefix: "ms"},
-    "lls": {preferred: "lls", prefix: "sgn"},
-    "lsg": {preferred: "lsg", prefix: "sgn"},
-    "lsl": {preferred: "lsl", prefix: "sgn"},
-    "lso": {preferred: "lso", prefix: "sgn"},
-    "lsp": {preferred: "lsp", prefix: "sgn"},
-    "lst": {preferred: "lst", prefix: "sgn"},
-    "lsy": {preferred: "lsy", prefix: "sgn"},
-    "ltg": {preferred: "ltg", prefix: "lv"},
-    "lvs": {preferred: "lvs", prefix: "lv"},
-    "lzh": {preferred: "lzh", prefix: "zh"},
-    "max": {preferred: "max", prefix: "ms"},
-    "mdl": {preferred: "mdl", prefix: "sgn"},
-    "meo": {preferred: "meo", prefix: "ms"},
-    "mfa": {preferred: "mfa", prefix: "ms"},
-    "mfb": {preferred: "mfb", prefix: "ms"},
-    "mfs": {preferred: "mfs", prefix: "sgn"},
-    "min": {preferred: "min", prefix: "ms"},
-    "mnp": {preferred: "mnp", prefix: "zh"},
-    "mqg": {preferred: "mqg", prefix: "ms"},
-    "mre": {preferred: "mre", prefix: "sgn"},
-    "msd": {preferred: "msd", prefix: "sgn"},
-    "msi": {preferred: "msi", prefix: "ms"},
-    "msr": {preferred: "msr", prefix: "sgn"},
-    "mui": {preferred: "mui", prefix: "ms"},
-    "mzc": {preferred: "mzc", prefix: "sgn"},
-    "mzg": {preferred: "mzg", prefix: "sgn"},
-    "mzy": {preferred: "mzy", prefix: "sgn"},
-    "nan": {preferred: "nan", prefix: "zh"},
-    "nbs": {preferred: "nbs", prefix: "sgn"},
-    "ncs": {preferred: "ncs", prefix: "sgn"},
-    "nsi": {preferred: "nsi", prefix: "sgn"},
-    "nsl": {preferred: "nsl", prefix: "sgn"},
-    "nsp": {preferred: "nsp", prefix: "sgn"},
-    "nsr": {preferred: "nsr", prefix: "sgn"},
-    "nzs": {preferred: "nzs", prefix: "sgn"},
-    "okl": {preferred: "okl", prefix: "sgn"},
-    "orn": {preferred: "orn", prefix: "ms"},
-    "ors": {preferred: "ors", prefix: "ms"},
-    "pel": {preferred: "pel", prefix: "ms"},
-    "pga": {preferred: "pga", prefix: "ar"},
-    "pgz": {preferred: "pgz", prefix: "sgn"},
-    "pks": {preferred: "pks", prefix: "sgn"},
-    "prl": {preferred: "prl", prefix: "sgn"},
-    "prz": {preferred: "prz", prefix: "sgn"},
-    "psc": {preferred: "psc", prefix: "sgn"},
-    "psd": {preferred: "psd", prefix: "sgn"},
-    "pse": {preferred: "pse", prefix: "ms"},
-    "psg": {preferred: "psg", prefix: "sgn"},
-    "psl": {preferred: "psl", prefix: "sgn"},
-    "pso": {preferred: "pso", prefix: "sgn"},
-    "psp": {preferred: "psp", prefix: "sgn"},
-    "psr": {preferred: "psr", prefix: "sgn"},
-    "pys": {preferred: "pys", prefix: "sgn"},
-    "rms": {preferred: "rms", prefix: "sgn"},
-    "rsi": {preferred: "rsi", prefix: "sgn"},
-    "rsl": {preferred: "rsl", prefix: "sgn"},
-    "rsm": {preferred: "rsm", prefix: "sgn"},
-    "sdl": {preferred: "sdl", prefix: "sgn"},
-    "sfb": {preferred: "sfb", prefix: "sgn"},
-    "sfs": {preferred: "sfs", prefix: "sgn"},
-    "sgg": {preferred: "sgg", prefix: "sgn"},
-    "sgx": {preferred: "sgx", prefix: "sgn"},
-    "shu": {preferred: "shu", prefix: "ar"},
-    "slf": {preferred: "slf", prefix: "sgn"},
-    "sls": {preferred: "sls", prefix: "sgn"},
-    "sqk": {preferred: "sqk", prefix: "sgn"},
-    "sqs": {preferred: "sqs", prefix: "sgn"},
-    "ssh": {preferred: "ssh", prefix: "ar"},
-    "ssp": {preferred: "ssp", prefix: "sgn"},
-    "ssr": {preferred: "ssr", prefix: "sgn"},
-    "svk": {preferred: "svk", prefix: "sgn"},
-    "swc": {preferred: "swc", prefix: "sw"},
-    "swh": {preferred: "swh", prefix: "sw"},
-    "swl": {preferred: "swl", prefix: "sgn"},
-    "syy": {preferred: "syy", prefix: "sgn"},
-    "tmw": {preferred: "tmw", prefix: "ms"},
-    "tse": {preferred: "tse", prefix: "sgn"},
-    "tsm": {preferred: "tsm", prefix: "sgn"},
-    "tsq": {preferred: "tsq", prefix: "sgn"},
-    "tss": {preferred: "tss", prefix: "sgn"},
-    "tsy": {preferred: "tsy", prefix: "sgn"},
-    "tza": {preferred: "tza", prefix: "sgn"},
-    "ugn": {preferred: "ugn", prefix: "sgn"},
-    "ugy": {preferred: "ugy", prefix: "sgn"},
-    "ukl": {preferred: "ukl", prefix: "sgn"},
-    "uks": {preferred: "uks", prefix: "sgn"},
-    "urk": {preferred: "urk", prefix: "ms"},
-    "uzn": {preferred: "uzn", prefix: "uz"},
-    "uzs": {preferred: "uzs", prefix: "uz"},
-    "vgt": {preferred: "vgt", prefix: "sgn"},
-    "vkk": {preferred: "vkk", prefix: "ms"},
-    "vkt": {preferred: "vkt", prefix: "ms"},
-    "vsi": {preferred: "vsi", prefix: "sgn"},
-    "vsl": {preferred: "vsl", prefix: "sgn"},
-    "vsv": {preferred: "vsv", prefix: "sgn"},
-    "wuu": {preferred: "wuu", prefix: "zh"},
-    "xki": {preferred: "xki", prefix: "sgn"},
-    "xml": {preferred: "xml", prefix: "sgn"},
-    "xmm": {preferred: "xmm", prefix: "ms"},
-    "xms": {preferred: "xms", prefix: "sgn"},
-    "ygs": {preferred: "ygs", prefix: "sgn"},
-    "yhs": {preferred: "yhs", prefix: "sgn"},
-    "ysl": {preferred: "ysl", prefix: "sgn"},
-    "yue": {preferred: "yue", prefix: "zh"},
-    "zib": {preferred: "zib", prefix: "sgn"},
-    "zlm": {preferred: "zlm", prefix: "ms"},
-    "zmi": {preferred: "zmi", prefix: "ms"},
-    "zsl": {preferred: "zsl", prefix: "sgn"},
-    "zsm": {preferred: "zsm", prefix: "ms"},
+// Language subtags with complex mappings.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+var complexLanguageMappings = {
+    "cnr": true,
+    "drw": true,
+    "hbs": true,
+    "prs": true,
+    "sh": true,
+    "swc": true,
+    "tnf": true,
 };
+
+// Mappings from region subtags to preferred values.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+var regionMappings = {
+    "004": "AF",
+    "008": "AL",
+    "010": "AQ",
+    "012": "DZ",
+    "016": "AS",
+    "020": "AD",
+    "024": "AO",
+    "028": "AG",
+    "031": "AZ",
+    "032": "AR",
+    "036": "AU",
+    "040": "AT",
+    "044": "BS",
+    "048": "BH",
+    "050": "BD",
+    "051": "AM",
+    "052": "BB",
+    "056": "BE",
+    "060": "BM",
+    "062": "034",
+    "064": "BT",
+    "068": "BO",
+    "070": "BA",
+    "072": "BW",
+    "074": "BV",
+    "076": "BR",
+    "084": "BZ",
+    "086": "IO",
+    "090": "SB",
+    "092": "VG",
+    "096": "BN",
+    "100": "BG",
+    "104": "MM",
+    "108": "BI",
+    "112": "BY",
+    "116": "KH",
+    "120": "CM",
+    "124": "CA",
+    "132": "CV",
+    "136": "KY",
+    "140": "CF",
+    "144": "LK",
+    "148": "TD",
+    "152": "CL",
+    "156": "CN",
+    "158": "TW",
+    "162": "CX",
+    "166": "CC",
+    "170": "CO",
+    "174": "KM",
+    "175": "YT",
+    "178": "CG",
+    "180": "CD",
+    "184": "CK",
+    "188": "CR",
+    "191": "HR",
+    "192": "CU",
+    "196": "CY",
+    "203": "CZ",
+    "204": "BJ",
+    "208": "DK",
+    "212": "DM",
+    "214": "DO",
+    "218": "EC",
+    "222": "SV",
+    "226": "GQ",
+    "230": "ET",
+    "231": "ET",
+    "232": "ER",
+    "233": "EE",
+    "234": "FO",
+    "238": "FK",
+    "239": "GS",
+    "242": "FJ",
+    "246": "FI",
+    "248": "AX",
+    "249": "FR",
+    "250": "FR",
+    "254": "GF",
+    "258": "PF",
+    "260": "TF",
+    "262": "DJ",
+    "266": "GA",
+    "268": "GE",
+    "270": "GM",
+    "275": "PS",
+    "276": "DE",
+    "278": "DE",
+    "280": "DE",
+    "288": "GH",
+    "292": "GI",
+    "296": "KI",
+    "300": "GR",
+    "304": "GL",
+    "308": "GD",
+    "312": "GP",
+    "316": "GU",
+    "320": "GT",
+    "324": "GN",
+    "328": "GY",
+    "332": "HT",
+    "334": "HM",
+    "336": "VA",
+    "340": "HN",
+    "344": "HK",
+    "348": "HU",
+    "352": "IS",
+    "356": "IN",
+    "360": "ID",
+    "364": "IR",
+    "368": "IQ",
+    "372": "IE",
+    "376": "IL",
+    "380": "IT",
+    "384": "CI",
+    "388": "JM",
+    "392": "JP",
+    "398": "KZ",
+    "400": "JO",
+    "404": "KE",
+    "408": "KP",
+    "410": "KR",
+    "414": "KW",
+    "417": "KG",
+    "418": "LA",
+    "422": "LB",
+    "426": "LS",
+    "428": "LV",
+    "430": "LR",
+    "434": "LY",
+    "438": "LI",
+    "440": "LT",
+    "442": "LU",
+    "446": "MO",
+    "450": "MG",
+    "454": "MW",
+    "458": "MY",
+    "462": "MV",
+    "466": "ML",
+    "470": "MT",
+    "474": "MQ",
+    "478": "MR",
+    "480": "MU",
+    "484": "MX",
+    "492": "MC",
+    "496": "MN",
+    "498": "MD",
+    "499": "ME",
+    "500": "MS",
+    "504": "MA",
+    "508": "MZ",
+    "512": "OM",
+    "516": "NA",
+    "520": "NR",
+    "524": "NP",
+    "528": "NL",
+    "531": "CW",
+    "533": "AW",
+    "534": "SX",
+    "535": "BQ",
+    "540": "NC",
+    "548": "VU",
+    "554": "NZ",
+    "558": "NI",
+    "562": "NE",
+    "566": "NG",
+    "570": "NU",
+    "574": "NF",
+    "578": "NO",
+    "580": "MP",
+    "581": "UM",
+    "583": "FM",
+    "584": "MH",
+    "585": "PW",
+    "586": "PK",
+    "591": "PA",
+    "598": "PG",
+    "600": "PY",
+    "604": "PE",
+    "608": "PH",
+    "612": "PN",
+    "616": "PL",
+    "620": "PT",
+    "624": "GW",
+    "626": "TL",
+    "630": "PR",
+    "634": "QA",
+    "638": "RE",
+    "642": "RO",
+    "643": "RU",
+    "646": "RW",
+    "652": "BL",
+    "654": "SH",
+    "659": "KN",
+    "660": "AI",
+    "662": "LC",
+    "663": "MF",
+    "666": "PM",
+    "670": "VC",
+    "674": "SM",
+    "678": "ST",
+    "682": "SA",
+    "686": "SN",
+    "688": "RS",
+    "690": "SC",
+    "694": "SL",
+    "702": "SG",
+    "703": "SK",
+    "704": "VN",
+    "705": "SI",
+    "706": "SO",
+    "710": "ZA",
+    "716": "ZW",
+    "720": "YE",
+    "724": "ES",
+    "728": "SS",
+    "729": "SD",
+    "732": "EH",
+    "736": "SD",
+    "740": "SR",
+    "744": "SJ",
+    "748": "SZ",
+    "752": "SE",
+    "756": "CH",
+    "760": "SY",
+    "762": "TJ",
+    "764": "TH",
+    "768": "TG",
+    "772": "TK",
+    "776": "TO",
+    "780": "TT",
+    "784": "AE",
+    "788": "TN",
+    "792": "TR",
+    "795": "TM",
+    "796": "TC",
+    "798": "TV",
+    "800": "UG",
+    "804": "UA",
+    "807": "MK",
+    "818": "EG",
+    "826": "GB",
+    "830": "JE",
+    "831": "GG",
+    "832": "JE",
+    "833": "IM",
+    "834": "TZ",
+    "840": "US",
+    "850": "VI",
+    "854": "BF",
+    "858": "UY",
+    "860": "UZ",
+    "862": "VE",
+    "876": "WF",
+    "882": "WS",
+    "886": "YE",
+    "887": "YE",
+    "891": "RS",
+    "894": "ZM",
+    "958": "AA",
+    "959": "QM",
+    "960": "QN",
+    "962": "QP",
+    "963": "QQ",
+    "964": "QR",
+    "965": "QS",
+    "966": "QT",
+    "967": "EU",
+    "968": "QV",
+    "969": "QW",
+    "970": "QX",
+    "971": "QY",
+    "972": "QZ",
+    "973": "XA",
+    "974": "XB",
+    "975": "XC",
+    "976": "XD",
+    "977": "XE",
+    "978": "XF",
+    "979": "XG",
+    "980": "XH",
+    "981": "XI",
+    "982": "XJ",
+    "983": "XK",
+    "984": "XL",
+    "985": "XM",
+    "986": "XN",
+    "987": "XO",
+    "988": "XP",
+    "989": "XQ",
+    "990": "XR",
+    "991": "XS",
+    "992": "XT",
+    "993": "XU",
+    "994": "XV",
+    "995": "XW",
+    "996": "XX",
+    "997": "XY",
+    "998": "XZ",
+    "999": "ZZ",
+    "BU": "MM",
+    "CS": "RS",
+    "CT": "KI",
+    "DD": "DE",
+    "DY": "BJ",
+    "FQ": "AQ",
+    "FX": "FR",
+    "HV": "BF",
+    "JT": "UM",
+    "MI": "UM",
+    "NH": "VU",
+    "NQ": "AQ",
+    "PU": "UM",
+    "PZ": "PA",
+    "QU": "EU",
+    "RH": "ZW",
+    "TP": "TL",
+    "UK": "GB",
+    "VD": "VN",
+    "WK": "UM",
+    "YD": "YE",
+    "YU": "RS",
+    "ZR": "CD",
+};
+
+// Region subtags with complex mappings.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+var complexRegionMappings = {
+    "172": true,
+    "200": true,
+    "530": true,
+    "532": true,
+    "536": true,
+    "582": true,
+    "810": true,
+    "890": true,
+    "AN": true,
+    "NT": true,
+    "PC": true,
+    "SU": true,
+};
+
+// Canonicalize Unicode BCP 47 locale identifiers.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+/* eslint-disable complexity */
+function updateLocaleIdMappings(tag) {
+    assert(IsObject(tag), "tag is an object");
+
+    // Replace deprecated language tags with their preferred values.
+    var language = tag.language;
+    if (hasOwn(language, languageMappings)) {
+        tag.language = languageMappings[language];
+    } else if (hasOwn(language, complexLanguageMappings)) {
+        switch (language) {
+          case "cnr":
+            tag.language = "sr";
+            if (tag.region === undefined)
+                tag.region = "ME";
+            break;
+          case "drw":
+          case "prs":
+          case "tnf":
+            tag.language = "fa";
+            if (tag.region === undefined)
+                tag.region = "AF";
+            break;
+          case "hbs":
+          case "sh":
+            tag.language = "sr";
+            if (tag.script === undefined)
+                tag.script = "Latn";
+            break;
+          case "swc":
+            tag.language = "sw";
+            if (tag.region === undefined)
+                tag.region = "CD";
+            break;
+          default:
+            assert(false, "language not handled: " + language);
+        }
+    }
+
+    // No script replacements are currently present.
+
+    // Replace deprecated subtags with their preferred values.
+    var region = tag.region;
+    if (region !== undefined) {
+        if (hasOwn(region, regionMappings)) {
+            tag.region = regionMappings[region];
+        } else if (hasOwn(region, complexRegionMappings)) {
+            switch (region) {
+              case "172":
+                if (tag.language === "ab") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "az") {
+                    tag.region = "AZ";
+                    break;
+                }
+                if (tag.language === "be") {
+                    tag.region = "BY";
+                    break;
+                }
+                if (tag.language === "crh") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "gag") {
+                    tag.region = "MD";
+                    break;
+                }
+                if (tag.language === "got") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "hy") {
+                    tag.region = "AM";
+                    break;
+                }
+                if (tag.language === "ji") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "ka") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "kaa") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "kk") {
+                    tag.region = "KZ";
+                    break;
+                }
+                if (tag.language === "ku" && tag.script === "Yezi") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "ky") {
+                    tag.region = "KG";
+                    break;
+                }
+                if (tag.language === "os") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "rue") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "sog") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "tg") {
+                    tag.region = "TJ";
+                    break;
+                }
+                if (tag.language === "tk") {
+                    tag.region = "TM";
+                    break;
+                }
+                if (tag.language === "tkr") {
+                    tag.region = "AZ";
+                    break;
+                }
+                if (tag.language === "tly") {
+                    tag.region = "AZ";
+                    break;
+                }
+                if (tag.language === "ttt") {
+                    tag.region = "AZ";
+                    break;
+                }
+                if (tag.language === "ug" && tag.script === "Cyrl") {
+                    tag.region = "KZ";
+                    break;
+                }
+                if (tag.language === "uk") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Geor") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Armn") {
+                    tag.region = "AM";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Sogo") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Goth") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Chrs") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Sogd") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Yezi") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "uz") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "xco") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "xmf") {
+                    tag.region = "GE";
+                    break;
+                }
+                tag.region = "RU";
+                break;
+              case "200":
+                if (tag.language === "sk") {
+                    tag.region = "SK";
+                    break;
+                }
+                tag.region = "CZ";
+                break;
+              case "530":
+              case "532":
+              case "AN":
+                if (tag.language === "vic") {
+                    tag.region = "SX";
+                    break;
+                }
+                tag.region = "CW";
+                break;
+              case "536":
+              case "NT":
+                if (tag.language === "akk") {
+                    tag.region = "IQ";
+                    break;
+                }
+                if (tag.language === "ckb") {
+                    tag.region = "IQ";
+                    break;
+                }
+                if (tag.language === "ku" && tag.script === "Arab") {
+                    tag.region = "IQ";
+                    break;
+                }
+                if (tag.language === "mis") {
+                    tag.region = "IQ";
+                    break;
+                }
+                if (tag.language === "syr") {
+                    tag.region = "IQ";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Syrc") {
+                    tag.region = "IQ";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Hatr") {
+                    tag.region = "IQ";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Xsux") {
+                    tag.region = "IQ";
+                    break;
+                }
+                tag.region = "SA";
+                break;
+              case "582":
+              case "PC":
+                if (tag.language === "mh") {
+                    tag.region = "MH";
+                    break;
+                }
+                if (tag.language === "pau") {
+                    tag.region = "PW";
+                    break;
+                }
+                tag.region = "FM";
+                break;
+              case "810":
+              case "SU":
+                if (tag.language === "ab") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "az") {
+                    tag.region = "AZ";
+                    break;
+                }
+                if (tag.language === "be") {
+                    tag.region = "BY";
+                    break;
+                }
+                if (tag.language === "crh") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "et") {
+                    tag.region = "EE";
+                    break;
+                }
+                if (tag.language === "gag") {
+                    tag.region = "MD";
+                    break;
+                }
+                if (tag.language === "got") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "hy") {
+                    tag.region = "AM";
+                    break;
+                }
+                if (tag.language === "ji") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "ka") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "kaa") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "kk") {
+                    tag.region = "KZ";
+                    break;
+                }
+                if (tag.language === "ku" && tag.script === "Yezi") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "ky") {
+                    tag.region = "KG";
+                    break;
+                }
+                if (tag.language === "lt") {
+                    tag.region = "LT";
+                    break;
+                }
+                if (tag.language === "ltg") {
+                    tag.region = "LV";
+                    break;
+                }
+                if (tag.language === "lv") {
+                    tag.region = "LV";
+                    break;
+                }
+                if (tag.language === "os") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "rue") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "sgs") {
+                    tag.region = "LT";
+                    break;
+                }
+                if (tag.language === "sog") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "tg") {
+                    tag.region = "TJ";
+                    break;
+                }
+                if (tag.language === "tk") {
+                    tag.region = "TM";
+                    break;
+                }
+                if (tag.language === "tkr") {
+                    tag.region = "AZ";
+                    break;
+                }
+                if (tag.language === "tly") {
+                    tag.region = "AZ";
+                    break;
+                }
+                if (tag.language === "ttt") {
+                    tag.region = "AZ";
+                    break;
+                }
+                if (tag.language === "ug" && tag.script === "Cyrl") {
+                    tag.region = "KZ";
+                    break;
+                }
+                if (tag.language === "uk") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Geor") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Armn") {
+                    tag.region = "AM";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Sogo") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Goth") {
+                    tag.region = "UA";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Chrs") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Sogd") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "und" && tag.script === "Yezi") {
+                    tag.region = "GE";
+                    break;
+                }
+                if (tag.language === "uz") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "vro") {
+                    tag.region = "EE";
+                    break;
+                }
+                if (tag.language === "xco") {
+                    tag.region = "UZ";
+                    break;
+                }
+                if (tag.language === "xmf") {
+                    tag.region = "GE";
+                    break;
+                }
+                tag.region = "RU";
+                break;
+              case "890":
+                if (tag.language === "bs") {
+                    tag.region = "BA";
+                    break;
+                }
+                if (tag.language === "hr") {
+                    tag.region = "HR";
+                    break;
+                }
+                if (tag.language === "mk") {
+                    tag.region = "MK";
+                    break;
+                }
+                if (tag.language === "sl") {
+                    tag.region = "SI";
+                    break;
+                }
+                tag.region = "RS";
+                break;
+              default:
+                assert(false, "region not handled: " + region);
+            }
+        }
+
+        // No variant replacements are currently present.
+        // No extension replacements are currently present.
+        // Private use sequences are left as is.
+
+    }
+}
+/* eslint-enable complexity */
+
+// Canonicalize grandfathered locale identifiers.
+// Derived from CLDR Supplemental Data, version 36.1.
+// https://github.com/unicode-org/cldr.git
+function updateGrandfatheredMappings(tag) {
+    assert(IsObject(tag), "tag is an object");
+
+    // We're mapping regular grandfathered tags to non-grandfathered form here.
+    // Other tags remain unchanged.
+    //
+    // regular       = "art-lojban"
+    //               / "cel-gaulish"
+    //               / "no-bok"
+    //               / "no-nyn"
+    //               / "zh-guoyu"
+    //               / "zh-hakka"
+    //               / "zh-min"
+    //               / "zh-min-nan"
+    //               / "zh-xiang"
+    //
+    // Therefore we can quickly exclude most tags by checking every
+    // |unicode_locale_id| subcomponent for characteristics not shared by any of
+    // the regular grandfathered (RG) tags:
+    //
+    //   * Real-world |unicode_language_subtag|s are all two or three letters,
+    //     so don't waste time running a useless |language.length > 3| fast-path.
+    //   * No RG tag has a "script"-looking component.
+    //   * No RG tag has a "region"-looking component.
+    //   * The RG tags that match |unicode_locale_id| (art-lojban, cel-gaulish,
+    //     zh-guoyu, zh-hakka, zh-xiang) have exactly one "variant". (no-bok,
+    //     no-nyn, zh-min, and zh-min-nan require BCP47's extlang subtag
+    //     that |unicode_locale_id| doesn't support.)
+    //   * No RG tag contains |extensions| or |pu_extensions|.
+    if (tag.script !== undefined ||
+        tag.region !== undefined ||
+        tag.variants.length !== 1 ||
+        tag.extensions.length !== 0 ||
+        tag.privateuse !== undefined)
+    {
+        return;
+    }
+
+    // art-lojban -> jbo
+    if (tag.language === "art" && tag.variants[0] === "lojban") {
+        tag.language = "jbo";
+        tag.variants.length = 0;
+    }
+
+    // cel-gaulish -> xtg-x-cel-gaulish
+    else if (tag.language === "cel" && tag.variants[0] === "gaulish") {
+        tag.language = "xtg";
+        tag.variants.length = 0;
+        tag.privateuse = "x-cel-gaulish";
+    }
+
+    // zh-guoyu -> zh
+    else if (tag.language === "zh" && tag.variants[0] === "guoyu") {
+        tag.language = "zh";
+        tag.variants.length = 0;
+    }
+
+    // zh-hakka -> hak
+    else if (tag.language === "zh" && tag.variants[0] === "hakka") {
+        tag.language = "hak";
+        tag.variants.length = 0;
+    }
+
+    // zh-xiang -> hsn
+    else if (tag.language === "zh" && tag.variants[0] === "xiang") {
+        tag.language = "hsn";
+        tag.variants.length = 0;
+    }
+}
diff --git a/js/src/builtin/intl/NumberFormat.js b/js/src/builtin/intl/NumberFormat.js
index bba78d7a0d..64158c1103 100644
--- a/js/src/builtin/intl/NumberFormat.js
+++ b/js/src/builtin/intl/NumberFormat.js
@@ -8,7 +8,7 @@
 /**
  * NumberFormat internal properties.
  *
- * Spec: ECMAScript Internationalization API Specification, 9.1 and 11.2.3.
+ * Spec: ECMAScript Internationalization API Specification, 9.1 and 11.3.3.
  */
 var numberFormatInternalProperties = {
     localeData: numberFormatLocaleData,
@@ -35,44 +35,38 @@ function resolveNumberFormatInternals(lazyNumberFormatData) {
 
     var internalProps = std_Object_create(null);
 
-    // Step 3.
-    var requestedLocales = lazyNumberFormatData.requestedLocales;
-
-    // Compute options that impact interpretation of locale.
-    // Step 6.
-    var opt = lazyNumberFormatData.opt;
-
     var NumberFormat = numberFormatInternalProperties;
 
-    // Step 9.
+    // Compute effective locale.
+
+    // Step 7.
     var localeData = NumberFormat.localeData;
 
-    // Step 10.
+    // Step 8.
     var r = ResolveLocale(callFunction(NumberFormat.availableLocales, NumberFormat),
                           lazyNumberFormatData.requestedLocales,
                           lazyNumberFormatData.opt,
                           NumberFormat.relevantExtensionKeys,
                           localeData);
 
-    // Steps 11-12.  (Step 13 is not relevant to our implementation.)
+    // Steps 9-10. (Step 11 is not relevant to our implementation.)
     internalProps.locale = r.locale;
     internalProps.numberingSystem = r.nu;
 
     // Compute formatting options.
-    // Step 15.
+    // Step 13.
     var s = lazyNumberFormatData.style;
     internalProps.style = s;
 
-    // Steps 19, 21.
+    // Steps 17, 19.
     if (s === "currency") {
         internalProps.currency = lazyNumberFormatData.currency;
         internalProps.currencyDisplay = lazyNumberFormatData.currencyDisplay;
     }
 
+    // Step 22.
     internalProps.minimumIntegerDigits = lazyNumberFormatData.minimumIntegerDigits;
-
     internalProps.minimumFractionDigits = lazyNumberFormatData.minimumFractionDigits;
-
     internalProps.maximumFractionDigits = lazyNumberFormatData.maximumFractionDigits;
 
     if ("minimumSignificantDigits" in lazyNumberFormatData) {
@@ -83,12 +77,9 @@ function resolveNumberFormatInternals(lazyNumberFormatData) {
         internalProps.maximumSignificantDigits = lazyNumberFormatData.maximumSignificantDigits;
     }
 
-    // Step 27.
+    // Step 24.
     internalProps.useGrouping = lazyNumberFormatData.useGrouping;
 
-    // Step 34.
-    internalProps.boundFormat = undefined;
-
     // The caller is responsible for associating |internalProps| with the right
     // object using |setInternalProperties|.
     return internalProps;
@@ -118,19 +109,21 @@ function getNumberFormatInternals(obj) {
 
 
 /**
- * UnwrapNumberFormat(nf)
+ * 11.1.11 UnwrapNumberFormat( nf )
  */
 function UnwrapNumberFormat(nf, methodName) {
-    // Step 1.
+    // Step 1 (not applicable in our implementation).
+
+    // Step 2.
     if ((!IsObject(nf) || !IsNumberFormat(nf)) && nf instanceof GetNumberFormatConstructor()) {
         nf = nf[intlFallbackSymbol()];
     }
 
-    // Step 2.
+    // Step 3.
     if (!IsObject(nf) || !IsNumberFormat(nf))
         ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "NumberFormat", methodName, "NumberFormat");
 
-    // Step 3.
+    // Step 4.
     return nf;
 }
 
@@ -141,18 +134,18 @@ function UnwrapNumberFormat(nf, methodName) {
  * Spec: ECMAScript Internationalization API Specification, 11.1.1.
  */
 function SetNumberFormatDigitOptions(lazyData, options, mnfdDefault) {
-    // We skip Step 1 because we set the properties on a lazyData object.
+    // We skip step 1 because we set the properties on a lazyData object.
 
-    // Step 2-3.
+    // Steps 2-4.
     assert(IsObject(options), "SetNumberFormatDigitOptions");
     assert(typeof mnfdDefault === "number", "SetNumberFormatDigitOptions");
 
-    // Steps 4-6.
+    // Steps 5-8.
     const mnid = GetNumberOption(options, "minimumIntegerDigits", 1, 21, 1);
     const mnfd = GetNumberOption(options, "minimumFractionDigits", 0, 20, mnfdDefault);
     const mxfd = GetNumberOption(options, "maximumFractionDigits", mnfd, 20);
 
-    // Steps 7-8.
+    // Steps 9-10.
     let mnsd = options.minimumSignificantDigits;
     let mxsd = options.maximumSignificantDigits;
 
@@ -196,17 +189,9 @@ function toASCIIUpperCase(s) {
  *
  * Spec: ECMAScript Internationalization API Specification, 6.3.1.
  */
-function getIsWellFormedCurrencyCodeRE() {
-    return internalIntlRegExps.isWellFormedCurrencyCodeRE ||
-           (internalIntlRegExps.isWellFormedCurrencyCodeRE = RegExpCreate("[^A-Z]"));
-}
-
 function IsWellFormedCurrencyCode(currency) {
-    var c = ToString(currency);
-    var normalized = toASCIIUpperCase(c);
-    if (normalized.length !== 3)
-        return false;
-    return !regexp_test_no_statics(getIsWellFormedCurrencyCodeRE(), normalized);
+    assert(typeof currency === "string", "currency is a string value");
+    return currency.length === 3 && IsASCIIAlphaString(currency);
 }
 
 /**
@@ -218,15 +203,12 @@ function IsWellFormedCurrencyCode(currency) {
  * This later work occurs in |resolveNumberFormatInternals|; steps not noted
  * here occur there.
  *
- * Spec: ECMAScript Internationalization API Specification, 11.1.1.
+ * Spec: ECMAScript Internationalization API Specification, 11.1.2.
  */
 function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
     assert(IsObject(numberFormat), "InitializeNumberFormat called with non-object");
     assert(IsNumberFormat(numberFormat), "InitializeNumberFormat called with non-NumberFormat");
 
-    // Steps 1-2 (These steps are no longer required and should be removed
-    // from the spec; https://github.com/tc39/ecma402/issues/115).
-
     // Lazy NumberFormat data has the following structure:
     //
     //   {
@@ -258,11 +240,11 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
     // subset of them.
     var lazyNumberFormatData = std_Object_create(null);
 
-    // Step 3.
+    // Step 1.
     var requestedLocales = CanonicalizeLocaleList(locales);
     lazyNumberFormatData.requestedLocales = requestedLocales;
 
-    // Steps 4-5.
+    // Steps 2-3.
     //
     // If we ever need more speed here at startup, we should try to detect the
     // case where |options === undefined| and Object.prototype hasn't been
@@ -275,20 +257,20 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
         options = ToObject(options);
 
     // Compute options that impact interpretation of locale.
-    // Step 6.
+    // Step 4.
     var opt = new Record();
     lazyNumberFormatData.opt = opt;
 
-    // Steps 7-8.
+    // Steps 5-6.
     var matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit");
     opt.localeMatcher = matcher;
 
     // Compute formatting options.
-    // Step 14.
+    // Step 12.
     var s = GetOption(options, "style", "string", ["decimal", "percent", "currency"], "decimal");
     lazyNumberFormatData.style = s;
 
-    // Steps 16-19.
+    // Steps 14-17.
     var c = GetOption(options, "currency", "string", undefined, undefined);
     if (c !== undefined && !IsWellFormedCurrencyCode(c))
         ThrowRangeError(JSMSG_INVALID_CURRENCY_CODE, c);
@@ -303,12 +285,12 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
         cDigits = CurrencyDigits(c);
     }
 
-    // Step 20.
+    // Step 18.
     var cd = GetOption(options, "currencyDisplay", "string", ["code", "symbol", "name"], "symbol");
     if (s === "currency")
         lazyNumberFormatData.currencyDisplay = cd;
 
-    // Steps 22-24.
+    // Steps 20-22.
     SetNumberFormatDigitOptions(lazyNumberFormatData, options, s === "currency" ? cDigits: 0);
 
     // Step 25.
@@ -322,16 +304,19 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
             std_Math_max(lazyNumberFormatData.minimumFractionDigits, mxfdDefault);
     }
 
-    // Step 26.
+    // Steps 23.
     var g = GetOption(options, "useGrouping", "boolean", undefined, true);
     lazyNumberFormatData.useGrouping = g;
 
-    // Steps 35-36.
+    // Step 31.
     //
     // We've done everything that must be done now: mark the lazy data as fully
     // computed and install it.
     initializeIntlObject(numberFormat, "NumberFormat", lazyNumberFormatData);
 
+    // 11.2.1, steps 4-5.
+    // TODO: spec issue - The current spec doesn't have the IsObject check,
+    // which means |Intl.NumberFormat.call(null)| is supposed to throw here.
     if (numberFormat !== thisValue && thisValue instanceof GetNumberFormatConstructor()) {
         if (!IsObject(thisValue))
             ThrowTypeError(JSMSG_NOT_NONNULL_OBJECT, typeof thisValue);
@@ -342,6 +327,7 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
         return thisValue;
     }
 
+    // 11.2.1, step 6.
     return numberFormat;
 }
 
@@ -386,15 +372,12 @@ var currencyDigits = {
 /**
  * Returns the number of decimal digits to be used for the given currency.
  *
- * Spec: ECMAScript Internationalization API Specification, 11.1.1.
+ * Spec: ECMAScript Internationalization API Specification, 11.1.3.
  */
-function getCurrencyDigitsRE() {
-    return internalIntlRegExps.currencyDigitsRE ||
-           (internalIntlRegExps.currencyDigitsRE = RegExpCreate("^[A-Z]{3}$"));
-}
 function CurrencyDigits(currency) {
-    assert(typeof currency === "string", "CurrencyDigits");
-    assert(regexp_test_no_statics(getCurrencyDigitsRE(), currency), "CurrencyDigits");
+    assert(typeof currency === "string", "currency is a string value");
+    assert(IsWellFormedCurrencyCode(currency), "currency is well-formed");
+    assert(currency == toASCIIUpperCase(currency), "currency is all upper-case");
 
     if (hasOwn(currency, currencyDigits))
         return currencyDigits[currency];
@@ -407,14 +390,19 @@ function CurrencyDigits(currency) {
  * matching (possibly fallback) locale. Locales appear in the same order in the
  * returned list as in the input list.
  *
- * Spec: ECMAScript Internationalization API Specification, 11.2.2.
+ * Spec: ECMAScript Internationalization API Specification, 11.3.2.
  */
 function Intl_NumberFormat_supportedLocalesOf(locales /*, options*/) {
     var options = arguments.length > 1 ? arguments[1] : undefined;
 
+    // Step 1.
     var availableLocales = callFunction(numberFormatInternalProperties.availableLocales,
                                         numberFormatInternalProperties);
+
+    // Step 2.
     var requestedLocales = CanonicalizeLocaleList(locales);
+
+    // Step 3.
     return SupportedLocales(availableLocales, requestedLocales, options);
 }
 
@@ -427,8 +415,8 @@ function getNumberingSystems(locale) {
     // Algorithmic numbering systems are typically tied to one locale, so for
     // lack of information we don't offer them. To increase chances that
     // other software will process output correctly, we further restrict to
-    // those decimal numbering systems explicitly listed in table 2 of
-    // the ECMAScript Internationalization API Specification, 11.3.2, which
+    // those decimal numbering systems explicitly listed in table 3 of
+    // the ECMAScript Internationalization API Specification, 11.1.6, which
     // in turn are those with full specifications in version 21 of Unicode
     // Technical Standard #35 using digits that were defined in Unicode 5.0,
     // the Unicode version supported in Windows Vista.
@@ -459,7 +447,7 @@ function numberFormatLocaleData() {
 /**
  * Function to be bound and returned by Intl.NumberFormat.prototype.format.
  *
- * Spec: ECMAScript Internationalization API Specification, 11.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 11.1.4.
  */
 function numberFormatFormatToBind(value) {
     // Steps 1.a.i implemented by ECMAScript declaration binding instantiation,
@@ -476,7 +464,7 @@ function numberFormatFormatToBind(value) {
  * representing the result of calling ToNumber(value) according to the
  * effective locale and the formatting options of this NumberFormat.
  *
- * Spec: ECMAScript Internationalization API Specification, 11.3.2.
+ * Spec: ECMAScript Internationalization API Specification, 11.4.3.
  */
 function Intl_NumberFormat_format_get() {
     // Steps 1-3.
@@ -486,12 +474,11 @@ function Intl_NumberFormat_format_get() {
 
     // Step 4.
     if (internals.boundFormat === undefined) {
-        // Step 4.a.
-        var F = numberFormatFormatToBind;
+        // Steps 4.a-b.
+        var F = callFunction(FunctionBind, numberFormatFormatToBind, nf);
 
-        // Steps 4.b-d.
-        var bf = callFunction(FunctionBind, F, nf);
-        internals.boundFormat = bf;
+        // Step 4.c.
+        internals.boundFormat = F;
     }
 
     // Step 5.
@@ -499,6 +486,9 @@ function Intl_NumberFormat_format_get() {
 }
 _SetCanonicalName(Intl_NumberFormat_format_get, "get format");
 
+/**
+ * 11.4.4 Intl.NumberFormat.prototype.formatToParts ( value )
+ */
 function Intl_NumberFormat_formatToParts(value) {
     // Steps 1-3.
     var nf = UnwrapNumberFormat(this, "formatToParts");
@@ -516,14 +506,15 @@ function Intl_NumberFormat_formatToParts(value) {
 /**
  * Returns the resolved options for a NumberFormat object.
  *
- * Spec: ECMAScript Internationalization API Specification, 11.3.3 and 11.4.
+ * Spec: ECMAScript Internationalization API Specification, 11.4.5.
  */
 function Intl_NumberFormat_resolvedOptions() {
-    // Invoke |UnwrapNumberFormat| per introduction of section 11.3.
+    // Steps 1-3.
     var nf = UnwrapNumberFormat(this, "resolvedOptions");
 
     var internals = getNumberFormatInternals(nf);
 
+    // Steps 4-5.
     var result = {
         locale: internals.locale,
         numberingSystem: internals.numberingSystem,
@@ -533,17 +524,31 @@ function Intl_NumberFormat_resolvedOptions() {
         maximumFractionDigits: internals.maximumFractionDigits,
         useGrouping: internals.useGrouping
     };
-    var optionalProperties = [
-        "currency",
-        "currencyDisplay",
-        "minimumSignificantDigits",
-        "maximumSignificantDigits"
-    ];
-    for (var i = 0; i < optionalProperties.length; i++) {
-        var p = optionalProperties[i];
-        if (hasOwn(p, internals))
-            _DefineDataProperty(result, p, internals[p]);
+
+    // currency and currencyDisplay are only present for currency formatters.
+    assert(hasOwn("currency", internals) === (internals.style === "currency"),
+           "currency is present iff style is 'currency'");
+    assert(hasOwn("currencyDisplay", internals) === (internals.style === "currency"),
+           "currencyDisplay is present iff style is 'currency'");
+
+    if (hasOwn("currency", internals)) {
+        _DefineDataProperty(result, "currency", internals.currency);
+        _DefineDataProperty(result, "currencyDisplay", internals.currencyDisplay);
+    }
+
+    // Min/Max significant digits are either both present or not at all.
+    assert(hasOwn("minimumSignificantDigits", internals) ===
+           hasOwn("maximumSignificantDigits", internals),
+           "minimumSignificantDigits is present iff maximumSignificantDigits is present");
+
+    if (hasOwn("minimumSignificantDigits", internals)) {
+        _DefineDataProperty(result, "minimumSignificantDigits",
+                            internals.minimumSignificantDigits);
+        _DefineDataProperty(result, "maximumSignificantDigits",
+                            internals.maximumSignificantDigits);
     }
+
+    // Step 6.
     return result;
 }
 
diff --git a/js/src/builtin/intl/PluralRules.cpp b/js/src/builtin/intl/PluralRules.cpp
index 78bd9e5d74..63d399f818 100644
--- a/js/src/builtin/intl/PluralRules.cpp
+++ b/js/src/builtin/intl/PluralRules.cpp
@@ -79,7 +79,7 @@ static const JSFunctionSpec pluralRules_methods[] = {
 
 /**
  * PluralRules constructor.
- * Spec: ECMAScript 402 API, PluralRules, 1.1
+ * Spec: ECMAScript 402 API, PluralRules, 13.2.1
  */
 static bool
 PluralRules(JSContext* cx, const CallArgs& args, bool construct)
diff --git a/js/src/builtin/intl/PluralRules.js b/js/src/builtin/intl/PluralRules.js
index 1e138a8830..d687296245 100644
--- a/js/src/builtin/intl/PluralRules.js
+++ b/js/src/builtin/intl/PluralRules.js
@@ -7,7 +7,7 @@
 /**
  * PluralRules internal properties.
  *
- * Spec: ECMAScript 402 API, PluralRules, 1.3.3.
+ * Spec: ECMAScript 402 API, PluralRules, 13.3.3.
  */
 var pluralRulesInternalProperties = {
     localeData: pluralRulesLocaleData,
@@ -44,20 +44,25 @@ function resolvePluralRulesInternals(lazyPluralRulesData) {
 
     var PluralRules = pluralRulesInternalProperties;
 
-    // Step 13.
+    // Compute effective locale.
+
+    // Step 10.
+    var localeData = PluralRules.localeData;
+
+    // Step 11.
     const r = ResolveLocale(callFunction(PluralRules.availableLocales, PluralRules),
-                          lazyPluralRulesData.requestedLocales,
-                          lazyPluralRulesData.opt,
-                          PluralRules.relevantExtensionKeys, PluralRules.localeData);
+                            lazyPluralRulesData.requestedLocales,
+                            lazyPluralRulesData.opt,
+                            PluralRules.relevantExtensionKeys,
+                            localeData);
 
-    // Step 14.
+    // Step 12.
     internalProps.locale = r.locale;
-    internalProps.type = lazyPluralRulesData.type;
 
-    internalProps.pluralCategories = intl_GetPluralCategories(
-        internalProps.locale,
-        internalProps.type);
+    // Step 8.
+    internalProps.type = lazyPluralRulesData.type;
 
+    // Step 9.
     internalProps.minimumIntegerDigits = lazyPluralRulesData.minimumIntegerDigits;
     internalProps.minimumFractionDigits = lazyPluralRulesData.minimumFractionDigits;
     internalProps.maximumFractionDigits = lazyPluralRulesData.maximumFractionDigits;
@@ -68,6 +73,9 @@ function resolvePluralRulesInternals(lazyPluralRulesData) {
         internalProps.maximumSignificantDigits = lazyPluralRulesData.maximumSignificantDigits;
     }
 
+    // Step 13 (lazily computed on first access).
+    internalProps.pluralCategories = null;
+
     return internalProps;
 }
 
@@ -99,15 +107,12 @@ function getPluralRulesInternals(obj) {
  * This later work occurs in |resolvePluralRulesInternals|; steps not noted
  * here occur there.
  *
- * Spec: ECMAScript 402 API, PluralRules, 1.1.1.
+ * Spec: ECMAScript 402 API, PluralRules, 13.1.1.
  */
 function InitializePluralRules(pluralRules, locales, options) {
     assert(IsObject(pluralRules), "InitializePluralRules called with non-object");
     assert(IsPluralRules(pluralRules), "InitializePluralRules called with non-PluralRules");
 
-    // Steps 1-2 (These steps are no longer required and should be removed
-    // from the spec; https://github.com/tc39/ecma402/issues/115).
-
     // Lazy PluralRules data has the following structure:
     //
     //   {
@@ -133,30 +138,29 @@ function InitializePluralRules(pluralRules, locales, options) {
     // subset of them.
     const lazyPluralRulesData = std_Object_create(null);
 
-    // Step 3.
+    // Step 1.
     let requestedLocales = CanonicalizeLocaleList(locales);
     lazyPluralRulesData.requestedLocales = requestedLocales;
 
-    // Steps 4-5.
+    // Steps 2-3.
     if (options === undefined)
         options = {};
     else
         options = ToObject(options);
 
-    // Step 6.
-    const type = GetOption(options, "type", "string", ["cardinal", "ordinal"], "cardinal");
-    lazyPluralRulesData.type = type;
-
-    // Step 8.
+    // Step 4.
     let opt = new Record();
     lazyPluralRulesData.opt = opt;
 
-    // Steps 9-10.
+    // Steps 5-6.
     let matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit");
     opt.localeMatcher = matcher;
 
+    // Step 7.
+    const type = GetOption(options, "type", "string", ["cardinal", "ordinal"], "cardinal");
+    lazyPluralRulesData.type = type;
 
-    // Step 11.
+    // Step 9.
     SetNumberFormatDigitOptions(lazyPluralRulesData, options, 0);
 
     // Step 12.
@@ -165,6 +169,10 @@ function InitializePluralRules(pluralRules, locales, options) {
            std_Math_max(lazyPluralRulesData.minimumFractionDigits, 3);
     }
 
+    // Step 15.
+    //
+    // We've done everything that must be done now: mark the lazy data as fully
+    // computed and install it.
     initializeIntlObject(pluralRules, "PluralRules", lazyPluralRulesData)
 }
 
@@ -173,7 +181,7 @@ function InitializePluralRules(pluralRules, locales, options) {
  * matching (possibly fallback) locale. Locales appear in the same order in the
  * returned list as in the input list.
  *
- * Spec: ECMAScript 402 API, PluralRules, 1.3.2.
+ * Spec: ECMAScript 402 API, PluralRules, 13.3.2.
  */
 function Intl_PluralRules_supportedLocalesOf(locales /*, options*/) {
     var options = arguments.length > 1 ? arguments[1] : undefined;
@@ -193,20 +201,20 @@ function Intl_PluralRules_supportedLocalesOf(locales /*, options*/) {
  * the number passed as value according to the
  * effective locale and the formatting options of this PluralRules.
  *
- * Spec: ECMAScript 402 API, PluralRules, 1.4.3.
+ * Spec: ECMAScript 402 API, PluralRules, 13.4.3.
  */
 function Intl_PluralRules_select(value) {
     // Step 1.
     let pluralRules = this;
 
-    // Step 2.
+    // Steps 2-3.
     if (!IsObject(pluralRules) || !IsPluralRules(pluralRules))
         ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "PluralRules", "select", "PluralRules");
 
     // Ensure the PluralRules internals are resolved.
     getPluralRulesInternals(pluralRules);
 
-    // Steps 3-4.
+    // Step 4.
     let n = ToNumber(value);
 
     // Step 5.
@@ -216,17 +224,34 @@ function Intl_PluralRules_select(value) {
 /**
  * Returns the resolved options for a PluralRules object.
  *
- * Spec: ECMAScript 402 API, PluralRules, 1.4.4.
+ * Spec: ECMAScript 402 API, PluralRules, 13.4.4.
  */
 function Intl_PluralRules_resolvedOptions() {
-    // Check "this PluralRules object" per introduction of section 1.4.
-    if (!IsObject(this) || !IsPluralRules(this)) {
+    // Step 1.
+    var pluralRules = this;
+
+    // Steps 2-3.
+    if (!IsObject(pluralRules) || !IsPluralRules(pluralRules)) {
         ThrowTypeError(JSMSG_INTL_OBJECT_NOT_INITED, "PluralRules", "resolvedOptions",
                        "PluralRules");
     }
 
-    var internals = getPluralRulesInternals(this);
+    var internals = getPluralRulesInternals(pluralRules);
+
+    var internalsPluralCategories = internals.pluralCategories;
+    if (internalsPluralCategories === null) {
+        internalsPluralCategories = intl_GetPluralCategories(internals.locale, internals.type);
+        internals.pluralCategories = internalsPluralCategories;
+    }
+
+    // TODO: The current spec actually requires to return the internal array
+    // object and not a copy of it.
+    // <https://github.com/tc39/proposal-intl-plural-rules/issues/28#issuecomment-341557030>
+    var pluralCategories = [];
+    for (var i = 0; i < internalsPluralCategories.length; i++)
+        _DefineDataProperty(pluralCategories, i, internalsPluralCategories[i]);
 
+    // Steps 4-5.
     var result = {
         locale: internals.locale,
         type: internals.type,
@@ -236,16 +261,19 @@ function Intl_PluralRules_resolvedOptions() {
         maximumFractionDigits: internals.maximumFractionDigits,
     };
 
-    var optionalProperties = [
-        "minimumSignificantDigits",
-        "maximumSignificantDigits"
-    ];
+    // Min/Max significant digits are either both present or not at all.
+    assert(hasOwn("minimumSignificantDigits", internals) ===
+           hasOwn("maximumSignificantDigits", internals),
+           "minimumSignificantDigits is present iff maximumSignificantDigits is present");
 
-    for (var i = 0; i < optionalProperties.length; i++) {
-        var p = optionalProperties[i];
-        if (hasOwn(p, internals))
-            _DefineDataProperty(result, p, internals[p]);
+    if (hasOwn("minimumSignificantDigits", internals)) {
+        _DefineDataProperty(result, "minimumSignificantDigits",
+                            internals.minimumSignificantDigits);
+        _DefineDataProperty(result, "maximumSignificantDigits",
+                            internals.maximumSignificantDigits);
     }
+
+    // Step 6.
     return result;
 }
 
diff --git a/js/src/builtin/intl/make_intl_data.py b/js/src/builtin/intl/make_intl_data.py
index 02bf350814..f2a6b32082 100644
--- a/js/src/builtin/intl/make_intl_data.py
+++ b/js/src/builtin/intl/make_intl_data.py
@@ -6,19 +6,14 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 """ Usage:
-    make_intl_data.py langtags [language-subtag-registry.txt]
+    make_intl_data.py langtags [ldmlSupplemental.dtd supplementalMetadata.xml likelySubtags.xml]
     make_intl_data.py tzdata
 
     Target "langtags":
     This script extracts information about mappings between deprecated and
-    current BCP 47 language tags from the IANA Language Subtag Registry and
-    converts it to JavaScript object definitions in
-    LangTagMappingsGenerated.js. The definitions are used in Intl.js.
-
-    The IANA Language Subtag Registry is imported from
-    https://www.iana.org/assignments/language-subtag-registry
-    and uses the syntax specified in
-    https://tools.ietf.org/html/rfc5646#section-3
+    current Unicode BCP 47 locale identifiers from CLDR and converts it to
+    JavaScript object definitions in LangTagMappingsGenerated.js. The
+    definitions are used in Intl.js.
 
 
     Target "tzdata":
@@ -32,202 +27,714 @@ import os
 import re
 import io
 import codecs
+import shutil
+import subprocess
 import sys
 import tarfile
 import tempfile
 import urllib2
-import urlparse
-from contextlib import closing
+from contextlib import closing, contextmanager
 from functools import partial
 from itertools import chain, ifilter, ifilterfalse, imap, tee
 from operator import attrgetter, itemgetter
+from urlparse import urlsplit, urlunsplit
+
+def writeMappingHeader(println, description, source, url):
+    if type(description) is not list:
+        description = [description]
+    for desc in description:
+        println(u"// {0}".format(desc))
+    println(u"// Derived from {0}.".format(source))
+    println(u"// {0}".format(url))
+
+def writeMappingsVar(println, mapping, name, description, source, url):
+    """ Writes a variable definition with a mapping table.
+
+        Writes the contents of dictionary |mapping| through the |println|
+        function with the given variable name and a comment with description,
+        source, and URL.
+    """
+    println(u"")
+    writeMappingHeader(println, description, source, url)
+    println(u"var {0} = {{".format(name))
+    for key in sorted(mapping):
+        if not isinstance(mapping[key], dict):
+            value = mapping[key]
+            if isinstance(value, bool):
+                value = "true" if value else "false"
+            else:
+                value = '"{0}"'.format(value)
+        else:
+            preferred = mapping[key]["preferred"]
+            prefix = mapping[key]["prefix"]
+            if key != preferred:
+                raise Exception("Expected '{0}' matches preferred locale '{1}'".format(key, preferred))
+            value = '"{0}"'.format(prefix)
+        println(u'    "{0}": {1},'.format(key, value))
+    println(u"};")
+
+def writeUpdateLocaleIdMappingsFunction(println,
+                                        complex_language_mappings,
+                                        complex_region_mappings,
+                                        description, source, url):
+    """ Writes a function definition that performs language tag mapping. """
+    println(u"")
+    writeMappingHeader(println, description, source, url)
+    println(u"""\
+/* eslint-disable complexity */
+function updateLocaleIdMappings(tag) {
+    assert(IsObject(tag), "tag is an object");
+
+    // Replace deprecated language tags with their preferred values.
+    var language = tag.language;
+    if (hasOwn(language, languageMappings)) {
+        tag.language = languageMappings[language];
+    } else if (hasOwn(language, complexLanguageMappings)) {
+        switch (language) {""")
+
+    # Merge duplicate language entries.
+    language_aliases = {}
+    for (deprecated_language, (language, script, region)) in (
+        sorted(complex_language_mappings.items(), key=itemgetter(0))
+    ):
+        key = (language, script, region)
+        if key not in language_aliases:
+            language_aliases[key] = []
+        else:
+            language_aliases[key].append(deprecated_language)
 
-def readRegistryRecord(registry):
-    """ Yields the records of the IANA Language Subtag Registry as dictionaries. """
-    record = {}
-    for line in registry:
-        line = line.strip()
-        if line == "":
+    for (deprecated_language, (language, script, region)) in (
+        sorted(complex_language_mappings.items(), key=itemgetter(0))
+    ):
+        key = (language, script, region)
+        if deprecated_language in language_aliases[key]:
             continue
-        if line == "%%":
-            yield record
-            record = {}
+
+        for lang in [deprecated_language] + language_aliases[key]:
+            println(u"""
+          case "{}":
+            """.format(lang).rstrip().strip("\n"))
+
+        println(u"""
+            tag.language = "{}";
+        """.format(language).rstrip().strip("\n"))
+        if script is not None:
+            println(u"""
+            if (tag.script === undefined)
+                tag.script = "{}";
+            """.format(script).rstrip().strip("\n"))
+        if region is not None:
+            println(u"""
+            if (tag.region === undefined)
+                tag.region = "{}";
+            """.format(region).rstrip().strip("\n"))
+        println(u"""
+            break;
+        """.rstrip().strip("\n"))
+
+    println(u"""
+          default:
+            assert(false, "language not handled: " + language);
+        }
+    }
+
+    // No script replacements are currently present.
+
+    // Replace deprecated subtags with their preferred values.
+    var region = tag.region;
+    if (region !== undefined) {
+        if (hasOwn(region, regionMappings)) {
+            tag.region = regionMappings[region];
+        } else if (hasOwn(region, complexRegionMappings)) {
+            switch (region) {""".lstrip("\n"))
+
+    # |non_default_replacements| is a list and hence not hashable. Convert it
+    # to a string to get a proper hashable value.
+    def hash_key(default, non_default_replacements):
+        return (default, str(sorted(str(v) for v in non_default_replacements)))
+
+    # Merge duplicate region entries.
+    region_aliases = {}
+    for (deprecated_region, (default, non_default_replacements)) in (
+        sorted(complex_region_mappings.items(), key=itemgetter(0))
+    ):
+        key = hash_key(default, non_default_replacements)
+        if key not in region_aliases:
+            region_aliases[key] = []
         else:
-            if ":" in line:
-                key, value = line.split(":", 1)
-                key, value = key.strip(), value.strip()
-                record[key] = value
+            region_aliases[key].append(deprecated_region)
+
+    for (deprecated_region, (default, non_default_replacements)) in (
+        sorted(complex_region_mappings.items(), key=itemgetter(0))
+    ):
+        key = hash_key(default, non_default_replacements)
+        if deprecated_region in region_aliases[key]:
+            continue
+
+        for region in [deprecated_region] + region_aliases[key]:
+            println(u"""
+              case "{}":
+            """.format(region).rstrip().strip("\n"))
+
+        for (language, script, region) in sorted(non_default_replacements, key=itemgetter(0)):
+            if script is None:
+                println(u"""
+                if (tag.language === "{}") {{
+                """.format(language).rstrip().strip("\n"))
             else:
-                # continuation line
-                record[key] += " " + line
-    if record:
-        yield record
-    return
+                println(u"""
+                if (tag.language === "{}" && tag.script === "{}") {{
+                """.format(language, script).rstrip().strip("\n"))
+            println(u"""
+                    tag.region = "{}";
+                    break;
+                }}
+            """.format(region).rstrip().strip("\n"))
+
+        println(u"""
+                tag.region = "{}";
+                break;
+        """.format(default).rstrip().strip("\n"))
+
+    println(u"""
+              default:
+                assert(false, "region not handled: " + region);
+            }
+        }
+
+        // No variant replacements are currently present.
+        // No extension replacements are currently present.
+        // Private use sequences are left as is.
+
+    }
+}
+/* eslint-enable complexity */
+""".strip("\n"))
+
+
+def writeGrandfatheredMappingsFunction(println,
+                                       grandfathered_mappings,
+                                       description, source, url):
+    """ Writes a function definition that maps grandfathered language tags. """
+    println(u"")
+    writeMappingHeader(println, description, source, url)
+    println(u"""\
+function updateGrandfatheredMappings(tag) {
+    assert(IsObject(tag), "tag is an object");
+
+    // We're mapping regular grandfathered tags to non-grandfathered form here.
+    // Other tags remain unchanged.
+    //
+    // regular       = "art-lojban"
+    //               / "cel-gaulish"
+    //               / "no-bok"
+    //               / "no-nyn"
+    //               / "zh-guoyu"
+    //               / "zh-hakka"
+    //               / "zh-min"
+    //               / "zh-min-nan"
+    //               / "zh-xiang"
+    //
+    // Therefore we can quickly exclude most tags by checking every
+    // |unicode_locale_id| subcomponent for characteristics not shared by any of
+    // the regular grandfathered (RG) tags:
+    //
+    //   * Real-world |unicode_language_subtag|s are all two or three letters,
+    //     so don't waste time running a useless |language.length > 3| fast-path.
+    //   * No RG tag has a "script"-looking component.
+    //   * No RG tag has a "region"-looking component.
+    //   * The RG tags that match |unicode_locale_id| (art-lojban, cel-gaulish,
+    //     zh-guoyu, zh-hakka, zh-xiang) have exactly one "variant". (no-bok,
+    //     no-nyn, zh-min, and zh-min-nan require BCP47's extlang subtag
+    //     that |unicode_locale_id| doesn't support.)
+    //   * No RG tag contains |extensions| or |pu_extensions|.
+    if (tag.script !== undefined ||
+        tag.region !== undefined ||
+        tag.variants.length !== 1 ||
+        tag.extensions.length !== 0 ||
+        tag.privateuse !== undefined)
+    {
+        return;
+    }""")
+
+    # From Unicode BCP 47 locale identifier <https://unicode.org/reports/tr35/>.
+    #
+    # Doesn't allow any 'extensions' subtags.
+    re_unicode_locale_id = re.compile(
+        r"""
+        ^
+        # unicode_language_id = unicode_language_subtag
+        #     unicode_language_subtag = alpha{2,3} | alpha{5,8}
+        (?P<language>[a-z]{2,3}|[a-z]{5,8})
+
+        # (sep unicode_script_subtag)?
+        #     unicode_script_subtag = alpha{4}
+        (?:-(?P<script>[a-z]{4}))?
+
+        # (sep unicode_region_subtag)?
+        #     unicode_region_subtag = (alpha{2} | digit{3})
+        (?:-(?P<region>([a-z]{2}|[0-9]{3})))?
+
+        # (sep unicode_variant_subtag)*
+        #     unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3})
+        (?P<variants>(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+)?
+
+        # pu_extensions?
+        #     pu_extensions = sep [xX] (sep alphanum{1,8})+
+        (?:-(?P<privateuse>x(-[a-z0-9]{1,8})+))?
+        $
+        """, re.IGNORECASE | re.VERBOSE)
+
+    is_first = True
+
+    for (tag, modern) in sorted(grandfathered_mappings.items(), key=itemgetter(0)):
+        tag_match = re_unicode_locale_id.match(tag)
+        assert tag_match is not None
+
+        tag_language = tag_match.group("language")
+        assert tag_match.group("script") is None, (
+               "{} does not contain a script subtag".format(tag))
+        assert tag_match.group("region") is None, (
+               "{} does not contain a region subtag".format(tag))
+        tag_variants = tag_match.group("variants")
+        assert tag_variants is not None, (
+               "{} contains a variant subtag".format(tag))
+        assert tag_match.group("privateuse") is None, (
+               "{} does not contain a privateuse subtag".format(tag))
+
+        tag_variant = tag_variants[1:]
+        assert "-" not in tag_variant, (
+               "{} contains only a single variant".format(tag))
+
+        modern_match = re_unicode_locale_id.match(modern)
+        assert modern_match is not None
+
+        modern_language = modern_match.group("language")
+        modern_script = modern_match.group("script")
+        modern_region = modern_match.group("region")
+        modern_variants = modern_match.group("variants")
+        modern_privateuse = modern_match.group("privateuse")
+
+        println(u"""
+    // {} -> {}
+""".format(tag, modern).rstrip())
+
+        println(u"""
+    {}if (tag.language === "{}" && tag.variants[0] === "{}") {{
+        """.format("" if is_first else "else ", tag_language, tag_variant).rstrip().strip("\n"))
+
+        is_first = False
+
+        println(u"""
+        tag.language = "{}";
+        """.format(modern_language).rstrip().strip("\n"))
+
+        if modern_script is not None:
+            println(u"""
+        tag.script = "{}";
+        """.format(modern_script).rstrip().strip("\n"))
+
+        if modern_region is not None:
+            println(u"""
+        tag.region = "{}";
+        """.format(modern_region).rstrip().strip("\n"))
+
+        if modern_variants is not None:
+            println(u"""
+        tag.variants = {};
+        """.format(sorted(modern_variants[1:].split("-"))).rstrip().strip("\n"))
+        else:
+            println(u"""
+        tag.variants.length = 0;
+        """.rstrip().strip("\n"))
+
+        if modern_privateuse is not None:
+            println(u"""
+        tag.privateuse = "{}";
+        """.format(modern_privateuse).rstrip().strip("\n"))
+
+        println(u"""
+    }""".rstrip().strip("\n"))
 
+    println(u"""
+}""".lstrip("\n"))
 
-def readRegistry(registry):
-    """ Reads IANA Language Subtag Registry and extracts information for Intl.js.
+
+@contextmanager
+def TemporaryDirectory():
+    tmpDir = tempfile.mkdtemp()
+    try:
+        yield tmpDir
+    finally:
+        shutil.rmtree(tmpDir)
+
+
+def readSupplementalData(supplemental_dtd_file, supplemental_metadata_file, likely_subtags_file):
+    """ Reads CLDR Supplemental Data and extracts information for Intl.js.
 
         Information extracted:
-        - langTagMappings: mappings from complete language tags to preferred
+        - grandfatheredMappings: mappings from grandfathered tags to preferred
           complete language tags
-        - langSubtagMappings: mappings from subtags to preferred subtags
-        - extlangMappings: mappings from extlang subtags to preferred subtags,
-          with prefix to be removed
-        Returns these three mappings as dictionaries, along with the registry's
-        file date.
-
-        We also check that mappings for language subtags don't affect extlang
-        subtags and vice versa, so that CanonicalizeLanguageTag doesn't have
-        to separate them for processing. Region codes are separated by case,
-        and script codes by length, so they're unproblematic.
+        - languageMappings: mappings from language subtags to preferred subtags
+        - complexLanguageMappings: mappings from language subtags with complex rules
+        - regionMappings: mappings from region subtags to preferred subtags
+        - complexRegionMappings: mappings from region subtags with complex rules
+        Returns these five mappings as dictionaries.
     """
-    langTagMappings = {}
-    langSubtagMappings = {}
-    extlangMappings = {}
-    languageSubtags = set()
-    extlangSubtags = set()
-
-    for record in readRegistryRecord(registry):
-        if "File-Date" in record:
-            fileDate = record["File-Date"]
-            continue
+    import xml.etree.ElementTree as ET
+
+    # <!ATTLIST version cldrVersion CDATA #FIXED "36" >
+    re_cldr_version = re.compile(
+        r"""<!ATTLIST version cldrVersion CDATA #FIXED "(?P<version>[\d|\.]+)" >""")
+
+    with io.open(supplemental_dtd_file, mode="r", encoding="utf-8") as f:
+        version_match = re_cldr_version.search(f.read())
+        assert version_match is not None, "CLDR version string not found"
+        cldr_version = version_match.group("version")
+
+    # From Unicode BCP 47 locale identifier <https://unicode.org/reports/tr35/>.
+    re_unicode_language_id = re.compile(
+        r"""
+        ^
+        # unicode_language_id = unicode_language_subtag
+        #     unicode_language_subtag = alpha{2,3} | alpha{5,8}
+        (?P<language>[a-z]{2,3}|[a-z]{5,8})
+
+        # (sep unicode_script_subtag)?
+        #     unicode_script_subtag = alpha{4}
+        (?:-(?P<script>[a-z]{4}))?
+
+        # (sep unicode_region_subtag)?
+        #     unicode_region_subtag = (alpha{2} | digit{3})
+        (?:-(?P<region>([a-z]{2}|[0-9]{3})))?
+
+        # (sep unicode_variant_subtag)*
+        #     unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3})
+        (?P<variants>(-([a-z0-9]{5,8}|[0-9][a-z0-9]{3}))+)?
+        $
+        """, re.IGNORECASE | re.VERBOSE)
+
+    re_unicode_language_subtag = re.compile(
+        r"""
+        ^
+        # unicode_language_subtag = alpha{2,3} | alpha{5,8}
+        ([a-z]{2,3}|[a-z]{5,8})
+        $
+        """, re.IGNORECASE | re.VERBOSE)
+
+    re_unicode_region_subtag = re.compile(
+        r"""
+        ^
+        # unicode_region_subtag = (alpha{2} | digit{3})
+        ([a-z]{2}|[0-9]{3})
+        $
+        """, re.IGNORECASE | re.VERBOSE)
+
+    # The fixed list of BCP 47 grandfathered language tags.
+    grandfathered_tags = (
+        "art-lojban",
+        "cel-gaulish",
+        "en-GB-oed",
+        "i-ami",
+        "i-bnn",
+        "i-default",
+        "i-enochian",
+        "i-hak",
+        "i-klingon",
+        "i-lux",
+        "i-mingo",
+        "i-navajo",
+        "i-pwn",
+        "i-tao",
+        "i-tay",
+        "i-tsu",
+        "no-bok",
+        "no-nyn",
+        "sgn-BE-FR",
+        "sgn-BE-NL",
+        "sgn-CH-DE",
+        "zh-guoyu",
+        "zh-hakka",
+        "zh-min",
+        "zh-min-nan",
+        "zh-xiang",
+    )
 
-        if record["Type"] == "grandfathered":
-            # Grandfathered tags don't use standard syntax, so
-            # CanonicalizeLanguageTag expects the mapping table to provide
-            # the final form for all.
-            # For langTagMappings, keys must be in lower case; values in
-            # the case used in the registry.
-            tag = record["Tag"]
-            if "Preferred-Value" in record:
-                langTagMappings[tag.lower()] = record["Preferred-Value"]
-            else:
-                langTagMappings[tag.lower()] = tag
-        elif record["Type"] == "redundant":
-            # For langTagMappings, keys must be in lower case; values in
-            # the case used in the registry.
-            if "Preferred-Value" in record:
-                langTagMappings[record["Tag"].lower()] = record["Preferred-Value"]
-        elif record["Type"] in ("language", "script", "region", "variant"):
-            # For langSubtagMappings, keys and values must be in the case used
-            # in the registry.
-            subtag = record["Subtag"]
-            if record["Type"] == "language":
-                languageSubtags.add(subtag)
-            if "Preferred-Value" in record:
-                if subtag == "heploc":
-                    # The entry for heploc is unique in its complexity; handle
-                    # it as special case below.
-                    continue
-                if "Prefix" in record:
-                    # This might indicate another heploc-like complex case.
-                    raise Exception("Please evaluate: subtag mapping with prefix value.")
-                langSubtagMappings[subtag] = record["Preferred-Value"]
-        elif record["Type"] == "extlang":
-            # For extlangMappings, keys must be in the case used in the
-            # registry; values are records with the preferred value and the
-            # prefix to be removed.
-            subtag = record["Subtag"]
-            extlangSubtags.add(subtag)
-            if "Preferred-Value" in record:
-                preferred = record["Preferred-Value"]
-                prefix = record["Prefix"]
-                extlangMappings[subtag] = {"preferred": preferred, "prefix": prefix}
-        else:
-            # No other types are allowed by
-            # https://tools.ietf.org/html/rfc5646#section-3.1.3
-            assert False, "Unrecognized Type: {0}".format(record["Type"])
+    # The list of grandfathered tags which are valid Unicode BCP 47 locale identifiers.
+    unicode_bcp47_grandfathered_tags = {tag for tag in grandfathered_tags
+                                        if re_unicode_language_id.match(tag)}
 
-    # Check that mappings for language subtags and extlang subtags don't affect
-    # each other.
-    for lang in languageSubtags:
-        if lang in extlangMappings and extlangMappings[lang]["preferred"] != lang:
-            raise Exception("Conflict: lang with extlang mapping: " + lang)
-    for extlang in extlangSubtags:
-        if extlang in langSubtagMappings:
-            raise Exception("Conflict: extlang with lang mapping: " + extlang)
+    # Dictionary of simple language subtag mappings, e.g. "in" -> "id".
+    language_mappings = {}
 
-    # Special case for heploc.
-    langTagMappings["ja-latn-hepburn-heploc"] = "ja-Latn-alalc97"
+    # Dictionary of complex language subtag mappings, modifying more than one
+    # subtag, e.g. "sh" -> ("sr", "Latn", None) and "cnr" -> ("sr", None, "ME").
+    complex_language_mappings = {}
 
-    # ValidateAndCanonicalizeLanguageTag in Intl.js expects langTagMappings
-    # contains no 2*3ALPHA.
-    assert all(len(lang) > 3 for lang in langTagMappings.iterkeys())
+    # Dictionary of simple region subtag mappings, e.g. "DD" -> "DE".
+    region_mappings = {}
 
-    return {"fileDate": fileDate,
-            "langTagMappings": langTagMappings,
-            "langSubtagMappings": langSubtagMappings,
-            "extlangMappings": extlangMappings}
+    # Dictionary of complex region subtag mappings, containing more than one
+    # replacement, e.g. "SU" -> ("RU", ["AM",complex_region_mappings[type] = replacements "AZ", "BY", ...]).
+    complex_region_mappings = {}
 
+    # Dictionary of grandfathered mappings to preferred values.
+    grandfathered_mappings = {}
 
-def writeMappingsVar(intlData, dict, name, description, fileDate, url):
-    """ Writes a variable definition with a mapping table to file intlData.
+    # CLDR uses "_" as the separator for some elements. Replace it with "-".
+    def bcp47_id(cldr_id):
+        return cldr_id.replace("_", "-")
 
-        Writes the contents of dictionary dict to file intlData with the given
-        variable name and a comment with description, fileDate, and URL.
-    """
-    intlData.write("\n")
-    intlData.write("// {0}.\n".format(description))
-    intlData.write("// Derived from IANA Language Subtag Registry, file date {0}.\n".format(fileDate))
-    intlData.write("// {0}\n".format(url))
-    intlData.write("var {0} = {{\n".format(name))
-    keys = sorted(dict)
-    for key in keys:
-        if isinstance(dict[key], basestring):
-            value = '"{0}"'.format(dict[key])
+    # CLDR uses the canonical case for most entries, but there are some
+    # exceptions, like:
+    #   <languageAlias type="drw" replacement="fa_af" reason="deprecated"/>
+    # Therefore canonicalize all tags to be on the safe side.
+    def bcp47_canonical(language, script, region):
+        # Canonical case for language subtags is lower case.
+        # Canonical case for script subtags is title case.
+        # Canonical case for region subtags is upper case.
+        return (language.lower() if language else None,
+                script.title() if script else None,
+                region.upper() if region else None)
+
+    tree = ET.parse(supplemental_metadata_file)
+
+    for language_alias in tree.iterfind(".//languageAlias"):
+        type = bcp47_id(language_alias.get("type"))
+        replacement = bcp47_id(language_alias.get("replacement"))
+
+        # Handle grandfathered mappings first.
+        if type in unicode_bcp47_grandfathered_tags:
+            grandfathered_mappings[type] = replacement
+            continue
+
+        # We're only interested in language subtag matches, so ignore any
+        # entries which have additional subtags.
+        if re_unicode_language_subtag.match(type) is None:
+            continue
+
+        if re_unicode_language_subtag.match(replacement) is not None:
+            # Canonical case for language subtags is lower-case.
+            language_mappings[type] = replacement.lower()
+        else:
+            replacement_match = re_unicode_language_id.match(replacement)
+            assert replacement_match is not None, (
+                   "{} invalid Unicode BCP 47 locale identifier".format(replacement))
+            assert replacement_match.group("variants") is None, (
+                   "{}: unexpected variant subtags in {}".format(type, replacement))
+
+            complex_language_mappings[type] = bcp47_canonical(replacement_match.group("language"),
+                                                              replacement_match.group("script"),
+                                                              replacement_match.group("region"))
+
+    for territory_alias in tree.iterfind(".//territoryAlias"):
+        type = territory_alias.get("type")
+        replacement = territory_alias.get("replacement")
+
+        # We're only interested in region subtag matches, so ignore any entries
+        # which contain legacy formats, e.g. three letter region codes.
+        if re_unicode_region_subtag.match(type) is None:
+            continue
+
+        if re_unicode_region_subtag.match(replacement) is not None:
+            # Canonical case for region subtags is upper-case.
+            region_mappings[type] = replacement.upper()
         else:
-            preferred = dict[key]["preferred"]
-            prefix = dict[key]["prefix"]
-            value = '{{preferred: "{0}", prefix: "{1}"}}'.format(preferred, prefix)
-        intlData.write('    "{0}": {1},\n'.format(key, value))
-    intlData.write("};\n")
+            # Canonical case for region subtags is upper-case.
+            replacements = [r.upper() for r in replacement.split(" ")]
+            assert all(
+                re_unicode_region_subtag.match(loc) is not None for loc in replacements
+            ), "{} invalid region subtags".format(replacement)
+            complex_region_mappings[type] = replacements
+
+    tree = ET.parse(likely_subtags_file)
+
+    likely_subtags = {}
+
+    for likely_subtag in tree.iterfind(".//likelySubtag"):
+        from_tag = bcp47_id(likely_subtag.get("from"))
+        from_match = re_unicode_language_id.match(from_tag)
+        assert from_match is not None, (
+               "{} invalid Unicode BCP 47 locale identifier".format(from_tag))
+        assert from_match.group("variants") is None, (
+               "unexpected variant subtags in {}".format(from_tag))
+
+        to_tag = bcp47_id(likely_subtag.get("to"))
+        to_match = re_unicode_language_id.match(to_tag)
+        assert to_match is not None, (
+               "{} invalid Unicode BCP 47 locale identifier".format(to_tag))
+        assert to_match.group("variants") is None, (
+               "unexpected variant subtags in {}".format(to_tag))
+
+        from_canonical = bcp47_canonical(from_match.group("language"),
+                                         from_match.group("script"),
+                                         from_match.group("region"))
+
+        to_canonical = bcp47_canonical(to_match.group("language"),
+                                       to_match.group("script"),
+                                       to_match.group("region"))
+
+        likely_subtags[from_canonical] = to_canonical
+
+    complex_region_mappings_final = {}
+
+    for (deprecated_region, replacements) in complex_region_mappings.items():
+        # Find all likely subtag entries which don't already contain a region
+        # subtag and whose target region is in the list of replacement regions.
+        region_likely_subtags = [(from_language, from_script, to_region)
+                                 for ((from_language, from_script, from_region),
+                                      (_, _, to_region)) in likely_subtags.items()
+                                 if from_region is None and to_region in replacements]
+
+        # The first replacement entry is the default region.
+        default = replacements[0]
+
+        # Find all likely subtag entries whose region matches the default region.
+        default_replacements = {(language, script)
+                                for (language, script, region) in region_likely_subtags
+                                if region == default}
+
+        # And finally find those entries which don't use the default region.
+        # These are the entries we're actually interested in, because those need
+        # to be handled specially when selecting the correct preferred region.
+        non_default_replacements = [(language, script, region)
+                                    for (language, script, region) in region_likely_subtags
+                                    if (language, script) not in default_replacements]
+
+        # If there are no non-default replacements, we can handle the region as
+        # part of the simple region mapping.
+        if non_default_replacements:
+            complex_region_mappings_final[deprecated_region] = (default, non_default_replacements)
+        else:
+            region_mappings[deprecated_region] = default
 
+    return {"version": cldr_version,
+            "grandfatheredMappings": grandfathered_mappings,
+            "languageMappings": language_mappings,
+            "complexLanguageMappings": complex_language_mappings,
+            "regionMappings": region_mappings,
+            "complexRegionMappings": complex_region_mappings_final,
+            }
 
-def writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings):
+def writeCLDRLanguageTagData(println, data, url):
     """ Writes the language tag data to the Intl data file. """
-    writeMappingsVar(intlData, langTagMappings, "langTagMappings",
-                     "Mappings from complete tags to preferred values", fileDate, url)
-    writeMappingsVar(intlData, langSubtagMappings, "langSubtagMappings",
-                     "Mappings from non-extlang subtags to preferred values", fileDate, url)
-    writeMappingsVar(intlData, extlangMappings, "extlangMappings",
-                     "Mappings from extlang subtags to preferred values", fileDate, url)
-
-def updateLangTags(args):
-    """ Update the LangTagMappingsGenerated.js file. """
+
+    source = u"CLDR Supplemental Data, version {}".format(data["version"])
+    grandfathered_mappings = data["grandfatheredMappings"]
+    language_mappings = data["languageMappings"]
+    complex_language_mappings = data["complexLanguageMappings"]
+    region_mappings = data["regionMappings"]
+    complex_region_mappings = data["complexRegionMappings"]
+
+    writeMappingsVar(println, grandfathered_mappings, "grandfatheredMappings",
+                     "Mappings from grandfathered tags to preferred values.", source, url)
+    writeMappingsVar(println, language_mappings, "languageMappings",
+                     "Mappings from language subtags to preferred values.", source, url)
+    writeMappingsVar(println, {key: True for key in complex_language_mappings},
+                     "complexLanguageMappings",
+                     "Language subtags with complex mappings.", source, url)
+    writeMappingsVar(println, region_mappings, "regionMappings",
+                     "Mappings from region subtags to preferred values.", source, url)
+    writeMappingsVar(println, {key: True for key in complex_region_mappings},
+                     "complexRegionMappings",
+                     "Region subtags with complex mappings.", source, url)
+
+    writeUpdateLocaleIdMappingsFunction(println, complex_language_mappings,
+                                        complex_region_mappings,
+                                        "Canonicalize Unicode BCP 47 locale identifiers.",
+                                        source, url)
+    writeGrandfatheredMappingsFunction(println, grandfathered_mappings,
+                                       "Canonicalize grandfathered locale identifiers.",
+                                       source, url)
+
+
+def updateCLDRLangTags(args):
+    """ Update the LangTagMappingsCLDRGenerated.js file. """
     url = args.url
+    branch = args.branch
+    revision = args.revision
     out = args.out
-    filename = args.file
+    files = args.files
 
     print("Arguments:")
     print("\tDownload url: %s" % url)
-    print("\tLocal registry: %s" % filename)
+    print("\tBranch: %s" % branch)
+    print("\tRevision: %s" % revision)
+    print("\tLocal supplemental data and likely subtags: %s" % files)
     print("\tOutput file: %s" % out)
     print("")
 
-    if filename is not None:
-        print("Always make sure you have the newest language-subtag-registry.txt!")
-        registry = codecs.open(filename, "r", encoding="utf-8")
+    if files:
+        if len(files) != 3:
+            raise Exception("Expected three files, but got: {}".format(files))
+
+        print(("Always make sure you have the newest ldmlSupplemental.dtd, "
+               "supplementalMetadata.xml, and likelySubtags.xml!"))
+
+        supplemental_dtd_file = files[0]
+        supplemental_metadata_file = files[1]
+        likely_subtags_file = files[2]
     else:
-        print("Downloading IANA Language Subtag Registry...")
-        with closing(urllib2.urlopen(url)) as reader:
-            text = reader.read().decode("utf-8")
-        registry = codecs.open("language-subtag-registry.txt", "w+", encoding="utf-8")
-        registry.write(text)
-        registry.seek(0)
-
-    print("Processing IANA Language Subtag Registry...")
-    with closing(registry) as reg:
-        data = readRegistry(reg)
-    fileDate = data["fileDate"]
-    langTagMappings = data["langTagMappings"]
-    langSubtagMappings = data["langSubtagMappings"]
-    extlangMappings = data["extlangMappings"]
+        print("Downloading CLDR supplemental data...")
+
+        supplemental_dtd_filename = "ldmlSupplemental.dtd"
+        supplemental_dtd_path = "common/dtd/{}".format(supplemental_dtd_filename)
+        supplemental_dtd_file = os.path.join(os.getcwd(), supplemental_dtd_filename)
+
+        supplemental_metadata_filename = "supplementalMetadata.xml"
+        supplemental_metadata_path = "common/supplemental/{}".format(
+            supplemental_metadata_filename)
+        supplemental_metadata_file = os.path.join(os.getcwd(), supplemental_metadata_filename)
+
+        likely_subtags_filename = "likelySubtags.xml"
+        likely_subtags_path = "common/supplemental/{}".format(likely_subtags_filename)
+        likely_subtags_file = os.path.join(os.getcwd(), likely_subtags_filename)
+
+        # Try to download the raw file directly from GitHub if possible.
+        split = urlsplit(url)
+        if split.netloc == "github.com" and split.path.endswith(".git") and revision == "HEAD":
+            def download(path, file):
+                urlpath = "{}/raw/{}/{}".format(urlsplit(url).path[:-4], branch, path)
+                raw_url = urlunsplit((split.scheme, split.netloc, urlpath, split.query,
+                                      split.fragment))
+
+                with closing(urllib2.urlopen(raw_url)) as reader:
+                    text = reader.read().decode("utf-8")
+                with io.open(file, "w", encoding="utf-8") as saved_file:
+                    saved_file.write(text)
+
+            download(supplemental_dtd_path, supplemental_dtd_file)
+            download(supplemental_metadata_path, supplemental_metadata_file)
+            download(likely_subtags_path, likely_subtags_file)
+        else:
+            # Download the requested branch in a temporary directory.
+            with TemporaryDirectory() as inDir:
+                if revision == "HEAD":
+                    subprocess.check_call(["git", "clone", "--depth=1",
+                                           "--branch=%s" % branch, url, inDir])
+                else:
+                    subprocess.check_call(["git", "clone", "--single-branch",
+                                           "--branch=%s" % branch, url, inDir])
+                    subprocess.check_call(["git", "-C", inDir, "reset", "--hard", revision])
+
+                    shutil.copyfile(os.path.join(inDir, supplemental_dtd_path),
+                                    supplemental_dtd_file)
+                    shutil.copyfile(os.path.join(inDir, supplemental_metadata_path),
+                                    supplemental_metadata_file)
+                    shutil.copyfile(os.path.join(inDir, likely_subtags_path), likely_subtags_file)
+
+    print("Processing CLDR supplemental data...")
+    data = readSupplementalData(supplemental_dtd_file,
+                                supplemental_metadata_file,
+                                likely_subtags_file)
 
     print("Writing Intl data...")
-    with codecs.open(out, "w", encoding="utf-8") as intlData:
-        intlData.write("// Generated by make_intl_data.py. DO NOT EDIT.\n")
-        writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings)
+    with io.open(out, mode="w", encoding="utf-8", newline="") as f:
+        println = partial(print, file=f)
+
+        println(u"// Generated by make_intl_data.py. DO NOT EDIT.")
+        writeCLDRLanguageTagData(println, data, url)
+
 
 def flines(filepath, encoding="utf-8"):
     """ Open filepath and iterate over its content. """
@@ -707,11 +1214,11 @@ def processTimeZones(tzdataDir, icuDir, icuTzDir, version, ignoreBackzone, ignor
 
         println(u"// Format:")
         println(u'// "LinkName", "Target" // ICU-Target [time zone file]')
-        println(u"struct LinkAndTarget");
-        println(u"{");
-        println(u"    const char* const link;");
-        println(u"    const char* const target;");
-        println(u"};");
+        println(u"struct LinkAndTarget")
+        println(u"{")
+        println(u"    const char* const link;")
+        println(u"    const char* const target;")
+        println(u"};")
         println(u"")
         println(u"const LinkAndTarget ianaLinksCanonicalizedDifferentlyByICU[] = {")
         for (zone, target, icuTarget) in incorrectLinks:
@@ -932,7 +1439,7 @@ def updateTzdata(topsrcdir, args):
     if tzDir is None:
         print("Downloading tzdata file...")
         with closing(urllib2.urlopen(url)) as tzfile:
-            fname = urlparse.urlsplit(tzfile.geturl()).path.split("/")[-1]
+            fname = urlsplit(tzfile.geturl()).path.split("/")[-1]
             with tempfile.NamedTemporaryFile(suffix=fname) as tztmpfile:
                 print("File stored in %s" % tztmpfile.name)
                 tztmpfile.write(tzfile.read())
@@ -959,20 +1466,24 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Update intl data.")
     subparsers = parser.add_subparsers(help="Select update mode")
 
-    parser_tags = subparsers.add_parser("langtags",
-                                        help="Update language-subtag-registry")
-    parser_tags.add_argument("--url",
-                             metavar="URL",
-                             default="https://www.iana.org/assignments/language-subtag-registry",
-                             type=EnsureHttps,
-                             help="Download url for language-subtag-registry.txt (default: %(default)s)")
-    parser_tags.add_argument("--out",
-                             default="LangTagMappingsGenerated.js",
-                             help="Output file (default: %(default)s)")
-    parser_tags.add_argument("file",
-                             nargs="?",
-                             help="Local language-subtag-registry.txt file, if omitted uses <URL>")
-    parser_tags.set_defaults(func=updateLangTags)
+    parser_cldr_tags = subparsers.add_parser("langtags",
+                                             help="Update CLDR language tags data")
+    parser_cldr_tags.add_argument("--url",
+                                  metavar="URL",
+                                  default="https://github.com/unicode-org/cldr.git",
+                                  help="URL to git repository (default: %(default)s)")
+    parser_cldr_tags.add_argument("--branch", default="latest",
+                                  help="Git branch (default: %(default)s)")
+    parser_cldr_tags.add_argument("--revision", default="HEAD",
+                                  help="Git revision (default: %(default)s)")
+    parser_cldr_tags.add_argument("--out",
+                                  default="LangTagMappingsGenerated.js",
+                                  help="Output file (default: %(default)s)")
+    parser_cldr_tags.add_argument("files",
+                                  nargs="*",
+                                  help="Local ldmlSupplemental.dtd, supplementalMetadata.xml, "
+                                       "and likelySubtags.xml files, if omitted uses <URL>")
+    parser_cldr_tags.set_defaults(func=updateCLDRLangTags)
 
     parser_tz = subparsers.add_parser("tzdata", help="Update tzdata")
     parser_tz.add_argument("--tz",
author	Martok <martok@martoks-place.de>	2023-06-29 23:07:20 +0200
committer	Martok <martok@martoks-place.de>	2023-06-30 00:01:34 +0200
commit	2f940bdc9dcbfe83e17ed26c5d1af7fe874c24ac (patch)
tree	2519366eb8057e265339261ab651a8cb5653a703 /js/src/builtin
parent	6808e659ad137ac63466aad93e406efbf091c077 (diff)
download	uxp-2f940bdc9dcbfe83e17ed26c5d1af7fe874c24ac.tar.gz