Issue #1819 - Further align Intl.Locale to spec

- Reference updates (UTS 35) - variant subtag and transform extension canonicalisation
author: Martok <martok@martoks-place.de> 2023-06-29 23:09:26 +0200
committer: Martok <martok@martoks-place.de> 2023-06-30 00:01:35 +0200
commit: af47a256b5cf2b81e4c3bf8f36682f8b9f31be42 (patch)
tree: af1b472d545dcd80afa9de5e468912f39cf8ee12 /js/src
parent: e96f965422528636e13adc3473679248941540e7 (diff)
download: uxp-af47a256b5cf2b81e4c3bf8f36682f8b9f31be42.tar.gz
13 files changed, 958 insertions, 320 deletions
diff --git a/js/src/builtin/intl/Collator.cpp b/js/src/builtin/intl/Collator.cpp
index 5f142d7e6d..450c654620 100644
--- a/js/src/builtin/intl/Collator.cpp
+++ b/js/src/builtin/intl/Collator.cpp
@@ -8,12 +8,14 @@
 #include "builtin/intl/Collator.h"
 
 #include "mozilla/Assertions.h"
+#include "mozilla/Span.h"
 
 #include "jsapi.h"
 #include "jscntxt.h"
 
 #include "builtin/intl/CommonFunctions.h"
 #include "builtin/intl/ICUHeader.h"
+#include "builtin/intl/LanguageTag.h"
 #include "builtin/intl/ScopedICUObject.h"
 #include "builtin/intl/SharedIntlData.h"
 #include "js/TypeDecls.h"
@@ -283,32 +285,33 @@ NewUCollator(JSContext* cx, Handle<CollatorObject*> collator)
         return nullptr;
     if (StringsAreEqual(usage, "search")) {
         // ICU expects search as a Unicode locale extension on locale.
-        // Unicode locale extensions must occur before private use extensions.
-        const char* oldLocale = locale.ptr();
-        const char* p;
-        size_t index;
-        size_t localeLen = strlen(oldLocale);
-        if ((p = strstr(oldLocale, "-x-")))
-            index = p - oldLocale;
-        else
-            index = localeLen;
-
-        const char* insert;
-        if ((p = strstr(oldLocale, "-u-")) && static_cast<size_t>(p - oldLocale) < index) {
-            index = p - oldLocale + 2;
-            insert = "-co-search";
-        } else {
-            insert = "-u-co-search";
+        intl::LanguageTag tag(cx);
+        if (!intl::LanguageTagParser::parse(
+                cx, mozilla::MakeCStringSpan(locale.ptr()), tag)) {
+            return nullptr;
+        }
+
+        JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx);
+
+        if (!keywords.emplaceBack("co", cx->names().search)) {
+            return nullptr;
         }
-        size_t insertLen = strlen(insert);
-        char* newLocale = cx->pod_malloc<char>(localeLen + insertLen + 1);
-        if (!newLocale)
+
+        // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of
+        // the Unicode extension subtag. We're then relying on ICU to follow RFC
+        // 6067, which states that any trailing keywords using the same key
+        // should be ignored.
+        if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords)) {
             return nullptr;
-        memcpy(newLocale, oldLocale, index);
-        memcpy(newLocale + index, insert, insertLen);
-        memcpy(newLocale + index + insertLen, oldLocale + index, localeLen - index + 1); // '\0'
+        }
+
         locale.clear();
-        locale.initBytes(newLocale);
+        locale.encodeLatin1(cx, tag.toString(cx));
+        if (!locale) {
+            return nullptr;
+        }
+    } else {
+        MOZ_ASSERT(StringsAreEqual(usage, "sort"));
     }
 
     // We don't need to look at the collation property - it can only be set
diff --git a/js/src/builtin/intl/DateTimeFormat.cpp b/js/src/builtin/intl/DateTimeFormat.cpp
index 78e863eedf..0dd724bf2e 100644
--- a/js/src/builtin/intl/DateTimeFormat.cpp
+++ b/js/src/builtin/intl/DateTimeFormat.cpp
@@ -15,6 +15,7 @@
 
 #include "builtin/intl/CommonFunctions.h"
 #include "builtin/intl/ICUHeader.h"
+#include "builtin/intl/LanguageTag.h"
 #include "builtin/intl/ScopedICUObject.h"
 #include "builtin/intl/SharedIntlData.h"
 #include "builtin/intl/TimeZoneDataGenerated.h"
@@ -582,14 +583,57 @@ NewUDateFormat(JSContext* cx, Handle<DateTimeFormatObject*> dateTimeFormat)
 
     if (!GetProperty(cx, internals, internals, cx->names().locale, &value))
         return nullptr;
-    JSAutoByteString locale(cx, value.toString());
-    if (!locale)
-        return nullptr;
 
-    // We don't need to look at calendar and numberingSystem - they can only be
-    // set via the Unicode locale extension and are therefore already set on
+    // ICU expects calendar and numberingSystem as Unicode locale extensions on
     // locale.
 
+    intl::LanguageTag tag(cx);
+    {
+        JSLinearString* locale = value.toString()->ensureLinear(cx);
+        if (!locale)
+            return nullptr;
+
+        if (!intl::LanguageTagParser::parse(cx, locale, tag))
+            return nullptr;
+    }
+
+    JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx);
+
+    if (!GetProperty(cx, internals, internals, cx->names().calendar, &value))
+        return nullptr;
+
+    {
+        JSLinearString* calendar = value.toString()->ensureLinear(cx);
+        if (!calendar)
+            return nullptr;
+
+        if (!keywords.emplaceBack("ca", calendar))
+            return nullptr;
+    }
+
+    if (!GetProperty(cx, internals, internals, cx->names().numberingSystem, &value))
+        return nullptr;
+
+    {
+        JSLinearString* numberingSystem = value.toString()->ensureLinear(cx);
+        if (!numberingSystem)
+            return nullptr;
+
+        if (!keywords.emplaceBack("nu", numberingSystem))
+            return nullptr;
+    }
+
+    // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of
+    // the Unicode extension subtag. We're then relying on ICU to follow RFC
+    // 6067, which states that any trailing keywords using the same key
+    // should be ignored.
+    if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords))
+        return nullptr;
+
+    UniqueChars locale = tag.toStringZ(cx);
+    if (!locale)
+        return nullptr;
+
     if (!GetProperty(cx, internals, internals, cx->names().timeZone, &value))
         return nullptr;
 
@@ -614,7 +658,7 @@ NewUDateFormat(JSContext* cx, Handle<DateTimeFormatObject*> dateTimeFormat)
 
     UErrorCode status = U_ZERO_ERROR;
     UDateFormat* df =
-        udat_open(UDAT_PATTERN, UDAT_PATTERN, IcuLocale(locale.ptr()), uTimeZone, uTimeZoneLength,
+        udat_open(UDAT_PATTERN, UDAT_PATTERN, IcuLocale(locale.get()), uTimeZone, uTimeZoneLength,
                   uPattern, uPatternLength, &status);
     if (U_FAILURE(status)) {
         intl::ReportInternalError(cx);
diff --git a/js/src/builtin/intl/DateTimeFormat.js b/js/src/builtin/intl/DateTimeFormat.js
index 77e10fa5f7..9d1adc8687 100644
--- a/js/src/builtin/intl/DateTimeFormat.js
+++ b/js/src/builtin/intl/DateTimeFormat.js
@@ -20,9 +20,11 @@ function resolveDateTimeFormatInternals(lazyDateTimeFormatData) {
     //       {
     //         localeMatcher: "lookup" / "best fit",
     //
-    //         hour12: true / false,  // optional
+    //         ca: string matching a Unicode extension type, // optional
+    //
+    //         nu: string matching a Unicode extension type, // optional
     //
-    //         hourCycle: "h11" / "h12" / "h23" / "h24", // optional
+    //         hc: "h11" / "h12" / "h23" / "h24", // optional
     //       }
     //
     //     timeZone: IANA time zone name,
@@ -31,6 +33,8 @@ function resolveDateTimeFormatInternals(lazyDateTimeFormatData) {
     //       {
     //         // all the properties/values listed in Table 3
     //         // (weekday, era, year, month, day, &c.)
+    //
+    //         hour12: true / false,  // optional
     //       }
     //
     //     formatMatcher: "basic" / "best fit",
@@ -343,6 +347,12 @@ function InitializeDateTimeFormat(dateTimeFormat, thisValue, locales, options, m
     //     localeOpt: // *first* opt computed in InitializeDateTimeFormat
     //       {
     //         localeMatcher: "lookup" / "best fit",
+    //
+    //         ca: string matching a Unicode extension type, // optional
+    //
+    //         nu: string matching a Unicode extension type, // optional
+    //
+    //         hc: "h11" / "h12" / "h23" / "h24", // optional
     //       }
     //
     //     timeZone: IANA time zone name,
@@ -353,7 +363,6 @@ function InitializeDateTimeFormat(dateTimeFormat, thisValue, locales, options, m
     //         // (weekday, era, year, month, day, &c.)
     //
     //         hour12: true / false,  // optional
-    //         hourCycle: "h11" / "h12" / "h23" / "h24", // optional
     //       }
     //
     //     formatMatcher: "basic" / "best fit",
@@ -382,6 +391,24 @@ function InitializeDateTimeFormat(dateTimeFormat, thisValue, locales, options, m
                   "best fit");
     localeOpt.localeMatcher = localeMatcher;
 
+    var calendar = GetOption(options, "calendar", "string", undefined, undefined);
+
+    if (calendar !== undefined) {
+        calendar = intl_ValidateAndCanonicalizeUnicodeExtensionType(calendar, "calendar", "ca");
+    }
+
+    localeOpt.ca = calendar;
+
+    var numberingSystem = GetOption(options, "numberingSystem", "string", undefined, undefined);
+
+    if (numberingSystem !== undefined) {
+        numberingSystem = intl_ValidateAndCanonicalizeUnicodeExtensionType(numberingSystem,
+                                                                           "numberingSystem",
+                                                                           "nu");
+    }
+
+    localeOpt.nu = numberingSystem;
+
     // Step 6.
     var hr12  = GetOption(options, "hour12", "boolean", undefined, undefined);
 
diff --git a/js/src/builtin/intl/IntlObject.cpp b/js/src/builtin/intl/IntlObject.cpp
index e0dd36dac4..2f42e1df76 100644
--- a/js/src/builtin/intl/IntlObject.cpp
+++ b/js/src/builtin/intl/IntlObject.cpp
@@ -548,7 +548,7 @@ js::intl_BestAvailableLocale(JSContext* cx, unsigned argc, Value* vp)
         MOZ_ASSERT(!tag.unicodeExtension(),
                    "locale must contain no Unicode extensions");
 
-        if (!tag.canonicalize(cx, intl::LanguageTag::UnicodeExtensionCanonicalForm::No)) {
+        if (!tag.canonicalize(cx)) {
             return false;
         }
 
@@ -608,7 +608,7 @@ js::intl_supportedLocaleOrFallback(JSContext* cx, unsigned argc, Value* vp)
             return false;
         }
     } else {
-        if (!tag.canonicalize(cx, intl::LanguageTag::UnicodeExtensionCanonicalForm::No)) {
+        if (!tag.canonicalize(cx)) {
             return false;
         }
 
diff --git a/js/src/builtin/intl/LanguageTag.cpp b/js/src/builtin/intl/LanguageTag.cpp
index 583033f629..501885dd9d 100644
--- a/js/src/builtin/intl/LanguageTag.cpp
+++ b/js/src/builtin/intl/LanguageTag.cpp
@@ -27,7 +27,9 @@
 
 #include "builtin/intl/CommonFunctions.h"
 #include "ds/Sort.h"
+#include "gc/Tracer.h"
 #include "js/Result.h"
+#include "js/TracingAPI.h"
 #include "js/Utility.h"
 #include "js/Vector.h"
 #include "unicode/uloc.h"
@@ -259,10 +261,11 @@ static bool SortAlphabetically(JSContext* cx,
   return true;
 }
 
-bool LanguageTag::canonicalizeBaseName(JSContext* cx) {
-  // Per UTS 35, 3.3.1, the very first step is to canonicalize the syntax by
-  // normalizing the case and ordering all subtags. The canonical syntax form
-  // itself is specified in UTS 35, 3.2.1.
+bool LanguageTag::canonicalizeBaseName(JSContext* cx,
+                                       DuplicateVariants duplicateVariants) {
+  // Per 6.2.3 CanonicalizeUnicodeLocaleId, the very first step is to
+  // canonicalize the syntax by normalizing the case and ordering all subtags.
+  // The canonical syntax form is specified in UTS 35, 3.2.1.
 
   // Language codes need to be in lower case. "JA" -> "ja"
   language_.toLowerCase();
@@ -299,25 +302,42 @@ bool LanguageTag::canonicalizeBaseName(JSContext* cx) {
       return false;
     }
 
-    // Reject the Locale identifier if a duplicate variant was found, e.g.
-    // "en-variant-Variant".
-    const UniqueChars* duplicate = std::adjacent_find(
-        variants().begin(), variants().end(), [](const auto& a, const auto& b) {
-          return strcmp(a.get(), b.get()) == 0;
-        });
-    if (duplicate != variants().end()) {
-      JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
-                                JSMSG_DUPLICATE_VARIANT_SUBTAG,
-                                duplicate->get());
-      return false;
+    if (duplicateVariants == DuplicateVariants::Reject) {
+      // Reject the Locale identifier if a duplicate variant was found, e.g.
+      // "en-variant-Variant".
+      const UniqueChars* duplicate =
+          std::adjacent_find(variants().begin(), variants().end(),
+                             [](const auto& a, const auto& b) {
+                               return strcmp(a.get(), b.get()) == 0;
+                             });
+      if (duplicate != variants().end()) {
+        JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+                                  JSMSG_DUPLICATE_VARIANT_SUBTAG,
+                                  duplicate->get());
+        return false;
+      }
     }
   }
 
   // 2. Any extensions are in alphabetical order by their singleton.
-  // - A subsequent call to canonicalizeExtensions() will perform this.
+  // 3. All attributes are sorted in alphabetical order.
+  // 4. All keywords and tfields are sorted by alphabetical order of their keys,
+  //    within their respective extensions.
+  // 5. Any type or tfield value "true" is removed.
+  // - A subsequent call to canonicalizeExtensions() will perform these steps.
+
+  // 6.2.3 CanonicalizeUnicodeLocaleId, step 2 transforms the locale identifier
+  // into its canonical form per UTS 3.2.1.
+
+  // 1. Use the bcp47 data to replace keys, types, tfields, and tvalues by their
+  // canonical forms.
+  // - A subsequent call to canonicalizeExtensions() will perform this step.
 
-  // The next two steps in 3.3.1 replace deprecated language and region
-  // subtags with their preferred mappings.
+  // 2. Replace aliases in the unicode_language_id and tlang (if any).
+  // - tlang is handled in canonicalizeExtensions().
+
+  // Replace deprecated language, region, and variant subtags with their
+  // preferred mappings.
 
   if (!updateGrandfatheredMappings(cx)) {
     return false;
@@ -337,19 +357,34 @@ bool LanguageTag::canonicalizeBaseName(JSContext* cx) {
     }
   }
 
-  // No variant subtag replacements are currently present.
+  // Replace deprecated variant subtags with their preferred values.
+  if (!performVariantMappings(cx)) {
+    return false;
+  }
+
   // No extension replacements are currently present.
   // Private use sequences are left as is.
 
-  // The two final steps in 3.3.1, handling irregular grandfathered and
-  // private-use only language tags, don't apply, because these two forms
-  // can't occur in Unicode BCP 47 locale identifiers.
+  // 3. Replace aliases in special key values.
+  // - A subsequent call to canonicalizeExtensions() will perform this step.
 
   return true;
 }
 
-bool LanguageTag::canonicalizeExtensions(
-    JSContext* cx, UnicodeExtensionCanonicalForm canonicalForm) {
+#ifdef DEBUG
+template <typename CharT>
+static bool IsAsciiLowercaseAlphanumericOrDash(
+    mozilla::Span<const CharT> span) {
+  const CharT* ptr = span.data();
+  size_t length = span.size();
+  return std::all_of(ptr, ptr + length, [](auto c) {
+    return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c) ||
+           c == '-';
+  });
+}
+#endif
+
+bool LanguageTag::canonicalizeExtensions(JSContext* cx) {
   // The canonical case for all extension subtags is lowercase.
   for (UniqueChars& extension : extensions_) {
     char* extensionChars = extension.get();
@@ -368,7 +403,7 @@ bool LanguageTag::canonicalizeExtensions(
 
   for (UniqueChars& extension : extensions_) {
     if (extension[0] == 'u') {
-      if (!canonicalizeUnicodeExtension(cx, extension, canonicalForm)) {
+      if (!canonicalizeUnicodeExtension(cx, extension)) {
         return false;
       }
     } else if (extension[0] == 't') {
@@ -376,6 +411,9 @@ bool LanguageTag::canonicalizeExtensions(
         return false;
       }
     }
+
+    MOZ_ASSERT(IsAsciiLowercaseAlphanumericOrDash(
+        mozilla::MakeCStringSpan(extension.get())));
   }
 
   // The canonical case for privateuse subtags is lowercase.
@@ -406,8 +444,7 @@ bool LanguageTag::canonicalizeExtensions(
  *   see Section 3.6.4 U Extension Data Files).
  */
 bool LanguageTag::canonicalizeUnicodeExtension(
-    JSContext* cx, JS::UniqueChars& unicodeExtension,
-    UnicodeExtensionCanonicalForm canonicalForm) {
+    JSContext* cx, JS::UniqueChars& unicodeExtension) {
   const char* const extension = unicodeExtension.get();
   MOZ_ASSERT(extension[0] == 'u');
   MOZ_ASSERT(extension[1] == '-');
@@ -504,7 +541,7 @@ bool LanguageTag::canonicalizeUnicodeExtension(
     const auto& attribute = attributes[i];
 
     // Skip duplicate attributes.
-    if (canonicalForm == UnicodeExtensionCanonicalForm::Yes && i > 0) {
+    if (i > 0) {
       const auto& lastAttribute = attributes[i - 1];
       if (attribute.length() == lastAttribute.length() &&
           std::char_traits<char>::compare(attribute.begin(extension),
@@ -570,7 +607,7 @@ bool LanguageTag::canonicalizeUnicodeExtension(
     const auto& keyword = keywords[i];
 
     // Skip duplicate keywords.
-    if (canonicalForm == UnicodeExtensionCanonicalForm::Yes && i > 0) {
+    if (i > 0) {
       const auto& lastKeyword = keywords[i - 1];
       if (std::char_traits<char>::compare(keyword.begin(extension),
                                           lastKeyword.begin(extension),
@@ -594,17 +631,10 @@ bool LanguageTag::canonicalizeUnicodeExtension(
       StringSpan type(keyword.begin(extension) + UnicodeKeyWithSepLength,
                       keyword.length() - UnicodeKeyWithSepLength);
 
-      if (canonicalForm == UnicodeExtensionCanonicalForm::Yes) {
-        // Search if there's a replacement for the current Unicode keyword.
-        if (const char* replacement = replaceUnicodeExtensionType(key, type)) {
-          if (!appendReplacement(keyword,
-                                 mozilla::MakeCStringSpan(replacement))) {
-            return false;
-          }
-        } else {
-          if (!appendKeyword(keyword, type)) {
-            return false;
-          }
+      // Search if there's a replacement for the current Unicode keyword.
+      if (const char* replacement = replaceUnicodeExtensionType(key, type)) {
+        if (!appendReplacement(keyword, mozilla::MakeCStringSpan(replacement))) {
+          return false;
         }
       } else {
         if (!appendKeyword(keyword, type)) {
@@ -761,26 +791,35 @@ bool LanguageTag::canonicalizeTransformExtension(
 
   // Append the language subtag if present.
   //
-  // [1] is a bit unclear whether or not the `tlang` subtag also needs to be
-  // canonicalized (and case-adjusted). For now simply append it as is.
-  // (|parseTransformExtension| doesn't alter case from the lowercased form we
-  // have previously taken pains to ensure is present in the extension, so no
-  // special effort is required to ensure lowercasing.) If we switch to [2], the
-  // `tlang` subtag also needs to be canonicalized according to the same rules
-  // as `unicode_language_id` subtags are canonicalized. Also see [3].
-  //
-  // [1] https://unicode.org/reports/tr35/#Language_Tag_to_Locale_Identifier
-  // [2] https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
-  // [3] https://github.com/tc39/ecma402/issues/330
+  // Replace aliases in tlang per
+  // <https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers>.
   if (tag.language().present()) {
     if (!sb.append('-')) {
       return false;
     }
+
+    // ECMA-402 is unclear whether or not duplicate variants are allowed in
+    // transform extensions. Tentatively allow duplicates until
+    // https://github.com/tc39/ecma402/issues/330 has been addressed.
+    if (!tag.canonicalizeBaseName(cx, DuplicateVariants::Accept)) {
+      return false;
+    }
+
+    // The canonical case for Transform extensions is lowercase per
+    // <https://unicode.org/reports/tr35/#BCP47_T_Extension>. Convert the two
+    // subtags which don't use lowercase for their canonical syntax.
+    tag.script_.toLowerCase();
+    tag.region_.toLowerCase();
+
     if (!LanguageTagToString(cx, tag, sb)) {
       return false;
     }
   }
 
+  static constexpr size_t TransformKeyWithSepLength = TransformKeyLength + 1;
+
+  using StringSpan = mozilla::Span<const char>;
+
   // Append all fields.
   //
   // UTS 35, 3.2.1 specifies:
@@ -793,8 +832,23 @@ bool LanguageTag::canonicalizeTransformExtension(
     if (!sb.append('-')) {
       return false;
     }
-    if (!sb.append(field.begin(extension), field.length())) {
-      return false;
+
+    StringSpan key(field.begin(extension), TransformKeyLength);
+    StringSpan value(field.begin(extension) + TransformKeyWithSepLength,
+                     field.length() - TransformKeyWithSepLength);
+
+    // Search if there's a replacement for the current transform keyword.
+    if (const char* replacement = replaceTransformExtensionType(key, value)) {
+      if (!sb.append(field.begin(extension), TransformKeyWithSepLength)) {
+        return false;
+      }
+      if (!sb.append(replacement, strlen(replacement))) {
+        return false;
+      }
+    } else {
+      if (!sb.append(field.begin(extension), field.length())) {
+        return false;
+      }
     }
   }
 
@@ -824,6 +878,18 @@ JSString* LanguageTag::toString(JSContext* cx) const {
   return sb.finishString();
 }
 
+UniqueChars LanguageTag::toStringZ(JSContext* cx) const {
+  Vector<char, 16> sb(cx);
+  if (!LanguageTagToString(cx, *this, sb)) {
+    return nullptr;
+  }
+  if (!sb.append('\0')) {
+    return nullptr;
+  }
+
+  return UniqueChars(sb.extractOrCopyRawBuffer());
+}
+
 // Zero-terminated ICU Locale ID.
 using LocaleId =
     js::Vector<char, LanguageLength + 1 + ScriptLength + 1 + RegionLength + 1>;
@@ -1158,12 +1224,25 @@ JS::Result<bool> LanguageTagParser::tryParse(JSContext* cx,
                                              LanguageTag& tag) {
   JS::AutoCheckCannotGC nogc;
   LocaleChars localeChars = StringChars(locale, nogc);
+  return tryParse(cx, localeChars, locale->length(), tag);
+}
+
+JS::Result<bool> LanguageTagParser::tryParse(JSContext* cx,
+                                             mozilla::Span<const char> locale,
+                                             LanguageTag& tag) {
+  LocaleChars localeChars = StringChars(locale.data());
+  return tryParse(cx, localeChars, locale.size(), tag);
+}
 
+JS::Result<bool> LanguageTagParser::tryParse(JSContext* cx,
+                                             LocaleChars& localeChars,
+                                             size_t localeLength,
+                                             LanguageTag& tag) {
   // unicode_locale_id = unicode_language_id
   //                     extensions*
   //                     pu_extensions? ;
 
-  LanguageTagParser ts(localeChars, locale->length());
+  LanguageTagParser ts(localeChars, localeLength);
   Token tok = ts.nextToken();
 
   bool ok;
@@ -1301,6 +1380,20 @@ bool LanguageTagParser::parse(JSContext* cx, JSLinearString* locale,
   return false;
 }
 
+bool LanguageTagParser::parse(JSContext* cx, mozilla::Span<const char> locale,
+                              LanguageTag& tag) {
+  bool ok;
+  JS_TRY_VAR_OR_RETURN_FALSE(cx, ok, tryParse(cx, locale, tag));
+  if (ok) {
+    return true;
+  }
+  if (UniqueChars localeChars = DuplicateString(cx, locale.data())) {
+    JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr,
+                             JSMSG_INVALID_LANGUAGE_TAG, localeChars.get());
+  }
+  return false;
+}
+
 bool LanguageTagParser::parseBaseName(JSContext* cx,
                                       mozilla::Span<const char> locale,
                                       LanguageTag& tag) {
@@ -1314,8 +1407,7 @@ bool LanguageTagParser::parseBaseName(JSContext* cx,
   if (ok) {
     return true;
   }
-  if (UniqueChars localeChars = DuplicateString(cx, locale.data(),
-                                                locale.size())) {
+  if (UniqueChars localeChars = DuplicateString(cx, locale.data())) {
     JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr,
                              JSMSG_INVALID_LANGUAGE_TAG, localeChars.get());
   }
@@ -1477,6 +1569,8 @@ bool LanguageTagParser::canParseUnicodeExtension(
 
 bool LanguageTagParser::canParseUnicodeExtensionType(
     JSLinearString* unicodeType) {
+  MOZ_ASSERT(unicodeType->length() > 0, "caller must exclude empty strings");
+
   JS::AutoCheckCannotGC nogc;
   LocaleChars unicodeTypeChars = StringChars(unicodeType, nogc);
 
@@ -1627,5 +1721,9 @@ JS::Result<JSString*> ParseStandaloneISO639LanguageTag(JSContext* cx,
   return result;
 }
 
+void js::intl::UnicodeExtensionKeyword::trace(JSTracer* trc) {
+  TraceRoot(trc, &type_, "UnicodeExtensionKeyword::type");
+}
+
 }  // namespace intl
 }  // namespace js
diff --git a/js/src/builtin/intl/LanguageTag.h b/js/src/builtin/intl/LanguageTag.h
index 384ff4bb7a..5fcce26480 100644
--- a/js/src/builtin/intl/LanguageTag.h
+++ b/js/src/builtin/intl/LanguageTag.h
@@ -31,6 +31,7 @@
 struct JSContext;
 class JSLinearString;
 class JSString;
+class JSTracer;
 
 namespace js {
 
@@ -204,14 +205,8 @@ class MOZ_STACK_CLASS LanguageTag final {
 
   friend class LanguageTagParser;
 
- public:
-  // Flag to request canonicalized Unicode extensions.
-  enum class UnicodeExtensionCanonicalForm : bool { No, Yes };
-
- private:
-  bool canonicalizeUnicodeExtension(
-      JSContext* cx, JS::UniqueChars& unicodeExtension,
-      UnicodeExtensionCanonicalForm canonicalForm);
+  bool canonicalizeUnicodeExtension(JSContext* cx,
+                                    JS::UniqueChars& unicodeExtension);
 
   bool canonicalizeTransformExtension(JSContext* cx,
                                       JS::UniqueChars& transformExtension);
@@ -226,9 +221,22 @@ class MOZ_STACK_CLASS LanguageTag final {
 
   void performComplexLanguageMappings();
   void performComplexRegionMappings();
+  MOZ_MUST_USE bool performVariantMappings(JSContext* cx);
 
   MOZ_MUST_USE bool updateGrandfatheredMappings(JSContext* cx);
 
+  static const char* replaceTransformExtensionType(
+      mozilla::Span<const char> key, mozilla::Span<const char> type);
+
+ public:
+  /**
+   * Given a Unicode key and type, return the null-terminated preferred
+   * replacement for that type if there is one, or null if there is none, e.g.
+   * in effect
+   * |replaceUnicodeExtensionType("ca", "islamicc") == "islamic-civil"|
+   * and
+   * |replaceUnicodeExtensionType("ca", "islamic-civil") == nullptr|.
+   */
   static const char* replaceUnicodeExtensionType(
       mozilla::Span<const char> key, mozilla::Span<const char> type);
 
@@ -337,17 +345,24 @@ class MOZ_STACK_CLASS LanguageTag final {
     privateuse_ = std::move(privateuse);
   }
 
+ private:
+  enum class DuplicateVariants { Reject, Accept };
+
+  bool canonicalizeBaseName(JSContext* cx, DuplicateVariants duplicateVariants);
+
+ public:
   /**
    * Canonicalize the base-name subtags, that means the language, script,
    * region, and variant subtags.
    */
-  bool canonicalizeBaseName(JSContext* cx);
+  bool canonicalizeBaseName(JSContext* cx) {
+    return canonicalizeBaseName(cx, DuplicateVariants::Reject);
+  }
 
   /**
    * Canonicalize all extension subtags.
    */
-  bool canonicalizeExtensions(JSContext* cx,
-                              UnicodeExtensionCanonicalForm canonicalForm);
+  bool canonicalizeExtensions(JSContext* cx);
 
   /**
    * Canonicalizes the given structurally valid Unicode BCP 47 locale
@@ -366,22 +381,10 @@ class MOZ_STACK_CLASS LanguageTag final {
    *
    * becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
    *
-   * UTS 35 specifies two different canonicalization algorithms. There's one to
-   * canonicalize BCP 47 language tags and other one to canonicalize Unicode
-   * locale identifiers. The latter one wasn't present when ECMA-402 was changed
-   * to use Unicode BCP 47 locale identifiers instead of BCP 47 language tags,
-   * so ECMA-402 currently only uses the former to canonicalize Unicode BCP 47
-   * locale identifiers.
-   *
    * Spec: ECMAScript Internationalization API Specification, 6.2.3.
-   * Spec:
-   * https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
-   * Spec: https://unicode.org/reports/tr35/#BCP_47_Language_Tag_Conversion
    */
-  bool canonicalize(JSContext* cx,
-                    UnicodeExtensionCanonicalForm canonicalForm) {
-    return canonicalizeBaseName(cx) &&
-           canonicalizeExtensions(cx, canonicalForm);
+  bool canonicalize(JSContext* cx) {
+    return canonicalizeBaseName(cx) && canonicalizeExtensions(cx);
   }
 
   /**
@@ -390,6 +393,12 @@ class MOZ_STACK_CLASS LanguageTag final {
   JSString* toString(JSContext* cx) const;
 
   /**
+   * Return the string representation of this language tag as a null-terminated
+   * C-string.
+   */
+  JS::UniqueChars toStringZ(JSContext* cx) const;
+
+  /**
    * Add likely-subtags to the language tag.
    *
    * Spec: <https://www.unicode.org/reports/tr35/#Likely_Subtags>
@@ -664,17 +673,32 @@ class MOZ_STACK_CLASS LanguageTagParser final {
       JSContext* cx, mozilla::Span<const char> extension,
       AttributesVector& attributes, KeywordsVector& keywords);
 
+  static JS::Result<bool> tryParse(JSContext* cx, LocaleChars& localeChars,
+                                   size_t localeLength, LanguageTag& tag);
+
  public:
   // Parse the input string as a language tag. Reports an error to the context
   // if the input can't be parsed completely.
   static bool parse(JSContext* cx, JSLinearString* locale, LanguageTag& tag);
 
+  // Parse the input string as a language tag. Reports an error to the context
+  // if the input can't be parsed completely.
+  static bool parse(JSContext* cx, mozilla::Span<const char> locale,
+                    LanguageTag& tag);
+
   // Parse the input string as a language tag. Returns Ok(true) if the input
   // could be completely parsed, Ok(false) if the input couldn't be parsed,
   // or Err() in case of internal error.
   static JS::Result<bool> tryParse(JSContext* cx, JSLinearString* locale,
                                    LanguageTag& tag);
 
+  // Parse the input string as a language tag. Returns Ok(true) if the input
+  // could be completely parsed, Ok(false) if the input couldn't be parsed,
+  // or Err() in case of internal error.
+  static JS::Result<bool> tryParse(JSContext* cx,
+                                   mozilla::Span<const char> locale,
+                                   LanguageTag& tag);
+
   // Parse the input string as the base-name parts (language, script, region,
   // variants) of a language tag. Ignores any trailing characters.
   static bool parseBaseName(JSContext* cx, mozilla::Span<const char> locale,
@@ -718,6 +742,28 @@ MOZ_MUST_USE bool ParseStandaloneRegionTag(JS::Handle<JSLinearString*> str,
 JS::Result<JSString*> ParseStandaloneISO639LanguageTag(
     JSContext* cx, JS::Handle<JSLinearString*> str);
 
+class UnicodeExtensionKeyword final {
+  char key_[LanguageTagLimits::UnicodeKeyLength];
+  JSLinearString* type_;
+
+ public:
+  using UnicodeKey = const char (&)[LanguageTagLimits::UnicodeKeyLength + 1];
+  using UnicodeKeySpan =
+      mozilla::Span<const char, LanguageTagLimits::UnicodeKeyLength>;
+
+  UnicodeExtensionKeyword(UnicodeKey key, JSLinearString* type)
+      : key_{key[0], key[1]}, type_(type) {}
+
+  UnicodeKeySpan key() const { return {key_, sizeof(key_)}; }
+  JSLinearString* type() const { return type_; }
+
+  void trace(JSTracer* trc);
+};
+
+extern MOZ_MUST_USE bool ApplyUnicodeExtensionToTag(
+    JSContext* cx, LanguageTag& tag,
+    JS::HandleVector<UnicodeExtensionKeyword> keywords);
+
 }  // namespace intl
 
 }  // namespace js
diff --git a/js/src/builtin/intl/LanguageTagGenerated.cpp b/js/src/builtin/intl/LanguageTagGenerated.cpp
index 6255861141..bd99140ace 100644
--- a/js/src/builtin/intl/LanguageTagGenerated.cpp
+++ b/js/src/builtin/intl/LanguageTagGenerated.cpp
@@ -10,6 +10,7 @@
 #include <cstdint>
 #include <cstring>
 #include <iterator>
+#include <string>
 #include <type_traits>
 
 #include "jscntxt.h"
@@ -53,6 +54,14 @@ static inline const char* SearchReplacement(
 }
 
 #ifdef DEBUG
+static bool IsAsciiLowercaseAlphanumeric(char c) {
+  return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
+}
+
+static bool IsAsciiLowercaseAlphanumericOrDash(char c) {
+  return IsAsciiLowercaseAlphanumeric(c) || c == '-';
+}
+
 static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) {
   // Tell the analysis the |std::all_of| function can't GC.
   JS::AutoSuppressGCAnalysis nogc;
@@ -69,14 +78,26 @@ static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) {
 }
 
 static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) {
-  auto isAsciiLowercaseAlphaOrDigit = [](char c) {
-    return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
-  };
-
   // Tell the analysis the |std::all_of| function can't GC.
   JS::AutoSuppressGCAnalysis nogc;
 
-  return std::all_of(span.begin(), span.end(), isAsciiLowercaseAlphaOrDigit);
+  return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric);
+}
+
+static bool IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key) {
+  return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric);
+}
+
+static bool IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type) {
+  return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash);
+}
+
+static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) {
+  return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric);
+}
+
+static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) {
+  return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash);
 }
 #endif
 
@@ -566,6 +587,80 @@ void js::intl::LanguageTag::performComplexRegionMappings() {
   }
 }
 
+static const char* ToCharPointer(const char* str) {
+  return str;
+}
+
+static const char* ToCharPointer(const js::UniqueChars& str) {
+  return str.get();
+}
+
+template <typename T, typename U = T>
+static bool IsLessThan(const T& a, const U& b) {
+  return strcmp(ToCharPointer(a), ToCharPointer(b)) < 0;
+}
+
+// Mappings from variant subtags to preferred values.
+// Derived from CLDR Supplemental Data, version 35.1.
+// https://unicode.org/Public/cldr/35.1/core.zip
+bool js::intl::LanguageTag::performVariantMappings(JSContext* cx) {
+  // The variant subtags need to be sorted for binary search.
+  MOZ_ASSERT(std::is_sorted(variants_.begin(), variants_.end(),
+                            IsLessThan<decltype(variants_)::ElementType>));
+
+  auto insertVariantSortedIfNotPresent = [&](const char* variant) {
+    auto* p = std::lower_bound(variants_.begin(), variants_.end(), variant,
+                               IsLessThan<decltype(variants_)::ElementType,
+                                          decltype(variant)>);
+
+    // Don't insert the replacement when already present.
+    if (p != variants_.end() && strcmp(p->get(), variant) == 0) {
+      return true;
+    }
+
+    // Insert the preferred variant in sort order.
+    auto preferred = DuplicateString(cx, variant);
+    if (!preferred) {
+      return false;
+    }
+    return !!variants_.insert(p, std::move(preferred));
+  };
+
+  for (size_t i = 0; i < variants_.length(); ) {
+    auto& variant = variants_[i];
+    MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeCStringSpan(variant.get())));
+
+    if (strcmp(variant.get(), "aaland") == 0) {
+      variants_.erase(variants_.begin() + i);
+      setRegion("AX");
+    }
+    else if (strcmp(variant.get(), "arevela") == 0) {
+      variants_.erase(variants_.begin() + i);
+      setLanguage("hy");
+    }
+    else if (strcmp(variant.get(), "arevmda") == 0) {
+      variants_.erase(variants_.begin() + i);
+      setLanguage("hyw");
+    }
+    else if (strcmp(variant.get(), "heploc") == 0) {
+      variants_.erase(variants_.begin() + i);
+      if (!insertVariantSortedIfNotPresent("alalc97")) {
+        return false;
+      }
+    }
+    else if (strcmp(variant.get(), "polytoni") == 0) {
+      variants_.erase(variants_.begin() + i);
+      if (!insertVariantSortedIfNotPresent("polyton")) {
+        return false;
+      }
+    }
+    else {
+      i++;
+    }
+  }
+  return true;
+}
+
 // Canonicalize grandfathered locale identifiers.
 // Derived from CLDR Supplemental Data, version 35.1.
 // https://unicode.org/Public/cldr/35.1/core.zip
@@ -656,16 +751,16 @@ bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
 }
 
 template <size_t Length>
-static inline bool IsUnicodeKey(mozilla::Span<const char> key,
-                                const char (&str)[Length]) {
+static inline bool IsUnicodeKey(
+  mozilla::Span<const char> key, const char (&str)[Length]) {
   static_assert(Length == UnicodeKeyLength + 1,
                 "Unicode extension key is two characters long");
   return memcmp(key.data(), str, Length - 1) == 0;
 }
 
 template <size_t Length>
-static inline bool IsUnicodeType(mozilla::Span<const char> type,
-                                 const char (&str)[Length]) {
+static inline bool IsUnicodeType(
+  mozilla::Span<const char> type, const char (&str)[Length]) {
   static_assert(Length > UnicodeKeyLength + 1,
                 "Unicode extension type contains more than two characters");
   return type.size() == (Length - 1) &&
@@ -673,13 +768,7 @@ static inline bool IsUnicodeType(mozilla::Span<const char> type,
 }
 
 static int32_t CompareUnicodeType(const char* a, mozilla::Span<const char> b) {
-#ifdef DEBUG
-  auto isNull = [](char c) {
-    return c == '\0';
-  };
-#endif
-
-  MOZ_ASSERT(std::none_of(b.begin(), b.end(), isNull),
+  MOZ_ASSERT(!std::char_traits<char>::find(b.data(), b.size(), '\0'),
              "unexpected null-character in string");
 
   using UnsignedChar = unsigned char;
@@ -695,12 +784,12 @@ static int32_t CompareUnicodeType(const char* a, mozilla::Span<const char> b) {
   // Return zero if both strings are equal or a negative number if |b| is a
   // prefix of |a|.
   return -int32_t(UnsignedChar(a[b.size()]));
-};
+}
 
 template <size_t Length>
-static inline const char* SearchReplacement(const char* (&types)[Length],
-                                            const char* (&aliases)[Length],
-                                            mozilla::Span<const char> type) {
+static inline const char* SearchUnicodeReplacement(
+  const char* (&types)[Length], const char* (&aliases)[Length],
+  mozilla::Span<const char> type) {
 
   auto p = std::lower_bound(std::begin(types), std::end(types), type,
                             [](const auto& a, const auto& b) {
@@ -717,26 +806,15 @@ static inline const char* SearchReplacement(const char* (&types)[Length],
  * values.
  *
  * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
+ * Spec: https://www.unicode.org/reports/tr35/#t_Extension
  */
 const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
     mozilla::Span<const char> key, mozilla::Span<const char> type) {
-#ifdef DEBUG
-  static auto isAsciiLowercaseAlphanumeric = [](char c) {
-    return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
-  };
-
-  static auto isAsciiLowercaseAlphanumericOrDash = [](char c) {
-    return isAsciiLowercaseAlphanumeric(c) || c == '-';
-  };
-#endif
-
   MOZ_ASSERT(key.size() == UnicodeKeyLength);
-  MOZ_ASSERT(std::all_of(key.begin(), key.end(),
-                         isAsciiLowercaseAlphanumeric));
+  MOZ_ASSERT(IsCanonicallyCasedUnicodeKey(key));
 
   MOZ_ASSERT(type.size() > UnicodeKeyLength);
-  MOZ_ASSERT(std::all_of(type.begin(), type.end(),
-                         isAsciiLowercaseAlphanumericOrDash));
+  MOZ_ASSERT(IsCanonicallyCasedUnicodeType(type));
 
   if (IsUnicodeKey(key, "ca")) {
     if (IsUnicodeType(type, "ethiopic-amete-alem")) {
@@ -804,7 +882,7 @@ const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
        "pl26",  "pl24",  "pl28",  "pl30",  "pl32", "tttob", "ttmrc", "tttob",
       "twkhh", "twtnn", "twnwt", "twtxg",
     };
-    return SearchReplacement(types, aliases, type);
+    return SearchUnicodeReplacement(types, aliases, type);
   }
   else if (IsUnicodeKey(key, "tz")) {
     static const char* types[28] = {
@@ -821,7 +899,52 @@ const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
          "usden",    "plwaw",    "ptlis",    "cnsha",    "twtpe",    "krsel",
          "trist",      "utc",    "usden",      "utc",
     };
-    return SearchReplacement(types, aliases, type);
+    return SearchUnicodeReplacement(types, aliases, type);
+  }
+  return nullptr;
+}
+
+template <size_t Length>
+static inline bool IsTransformKey(
+  mozilla::Span<const char> key, const char (&str)[Length]) {
+  static_assert(Length == TransformKeyLength + 1,
+                "Transform extension key is two characters long");
+  return memcmp(key.data(), str, Length - 1) == 0;
+}
+
+template <size_t Length>
+static inline bool IsTransformType(
+  mozilla::Span<const char> type, const char (&str)[Length]) {
+  static_assert(Length > TransformKeyLength + 1,
+                "Transform extension type contains more than two characters");
+  return type.size() == (Length - 1) &&
+         memcmp(type.data(), str, Length - 1) == 0;
+}
+
+/**
+ * Mapping from deprecated BCP 47 Transform extension types to their preferred
+ * values.
+ *
+ * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
+ * Spec: https://www.unicode.org/reports/tr35/#t_Extension
+ */
+const char* js::intl::LanguageTag::replaceTransformExtensionType(
+    mozilla::Span<const char> key, mozilla::Span<const char> type) {
+  MOZ_ASSERT(key.size() == TransformKeyLength);
+  MOZ_ASSERT(IsCanonicallyCasedTransformKey(key));
+
+  MOZ_ASSERT(type.size() > TransformKeyLength);
+  MOZ_ASSERT(IsCanonicallyCasedTransformType(type));
+
+  if (IsTransformKey(key, "d0")) {
+    if (IsTransformType(type, "name")) {
+      return "charname";
+    }
+  }
+  else if (IsTransformKey(key, "m0")) {
+    if (IsTransformType(type, "names")) {
+      return "prprname";
+    }
   }
   return nullptr;
 }
diff --git a/js/src/builtin/intl/Locale.cpp b/js/src/builtin/intl/Locale.cpp
index 5d55fad2a1..ee70c0b06f 100644
--- a/js/src/builtin/intl/Locale.cpp
+++ b/js/src/builtin/intl/Locale.cpp
@@ -362,17 +362,12 @@ static bool ApplyOptionsToTag(JSContext* cx, LanguageTag& tag,
 /**
  * ApplyUnicodeExtensionToTag( tag, options, relevantExtensionKeys )
  */
-static bool ApplyUnicodeExtensionToTag(JSContext* cx, LanguageTag& tag,
-                                       HandleLinearString calendar,
-                                       HandleLinearString collation,
-                                       HandleLinearString hourCycle,
-                                       HandleLinearString caseFirst,
-                                       HandleLinearString numeric,
-                                       HandleLinearString numberingSystem) {
+bool js::intl::ApplyUnicodeExtensionToTag(
+    JSContext* cx, LanguageTag& tag,
+    JS::HandleVector<intl::UnicodeExtensionKeyword> keywords) {
   // If no Unicode extensions were present in the options object, we can skip
   // everything below and directly return.
-  if (!calendar && !collation && !caseFirst && !hourCycle && !numeric &&
-      !numberingSystem) {
+  if (keywords.length() == 0) {
     return true;
   }
 
@@ -402,53 +397,32 @@ static bool ApplyUnicodeExtensionToTag(JSContext* cx, LanguageTag& tag,
     }
   }
 
-  using UnicodeKeyWithSeparator = const char(&)[UnicodeKeyLength + 3];
-
-  auto appendKeyword = [&newExtension](UnicodeKeyWithSeparator key,
-                                       JSLinearString* value) {
-    if (!newExtension.append(key, UnicodeKeyLength + 2)) {
-      return false;
-    }
-
-    JS::AutoCheckCannotGC nogc;
-    return value->hasLatin1Chars()
-               ? newExtension.append(value->latin1Chars(nogc), value->length())
-               : newExtension.append(value->twoByteChars(nogc),
-                                     value->length());
-  };
-
   // Append the new keywords before any existing keywords. That way any previous
   // keyword with the same key is detected as a duplicate when canonicalizing
   // the Unicode extension subtag and gets discarded.
 
-  if (calendar) {
-    if (!appendKeyword("-ca-", calendar)) {
-      return false;
-    }
-  }
-  if (collation) {
-    if (!appendKeyword("-co-", collation)) {
-      return false;
-    }
-  }
-  if (hourCycle) {
-    if (!appendKeyword("-hc-", hourCycle)) {
+  for (const auto& keyword : keywords) {
+    UnicodeExtensionKeyword::UnicodeKeySpan key = keyword.key();
+    if (!newExtension.append('-')) {
       return false;
     }
-  }
-  if (caseFirst) {
-    if (!appendKeyword("-kf-", caseFirst)) {
+    if (!newExtension.append(key.data(), key.size())) {
       return false;
     }
-  }
-  if (numeric) {
-    if (!appendKeyword("-kn-", numeric)) {
+    if (!newExtension.append('-')) {
       return false;
     }
-  }
-  if (numberingSystem) {
-    if (!appendKeyword("-nu-", numberingSystem)) {
-      return false;
+
+    JS::AutoCheckCannotGC nogc;
+    JSLinearString* type = keyword.type();
+    if (type->hasLatin1Chars()) {
+      if (!newExtension.append(type->latin1Chars(nogc), type->length())) {
+        return false;
+      }
+    } else {
+      if (!newExtension.append(type->twoByteChars(nogc), type->length())) {
+        return false;
+      }
     }
   }
 
@@ -560,15 +534,16 @@ static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
       return false;
     }
 
-    // Step 13 (not applicable).
+    // Step 13.
+    JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx);
 
-    // Steps 14, 16.
+    // Step 14.
     RootedLinearString calendar(cx);
     if (!GetStringOption(cx, options, cx->names().calendar, &calendar)) {
       return false;
     }
 
-    // Step 15.
+    // Steps 15-16.
     if (calendar) {
       if (!IsValidUnicodeExtensionValue(calendar)) {
         if (UniqueChars str = StringToNewUTF8CharsZ(cx, *calendar)) {
@@ -578,15 +553,19 @@ static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
         }
         return false;
       }
+
+      if (!keywords.emplaceBack("ca", calendar)) {
+        return false;
+      }
     }
 
-    // Steps 17, 19.
+    // Step 17.
     RootedLinearString collation(cx);
     if (!GetStringOption(cx, options, cx->names().collation, &collation)) {
       return false;
     }
 
-    // Step 18.
+    // Steps 18-19.
     if (collation) {
       if (!IsValidUnicodeExtensionValue(collation)) {
         if (UniqueChars str = StringToNewUTF8CharsZ(cx, *collation)) {
@@ -596,14 +575,19 @@ static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
         }
         return false;
       }
+
+      if (!keywords.emplaceBack("co", collation)) {
+        return false;
+      }
     }
 
-    // Steps 20-21.
+    // Step 20 (without validation).
     RootedLinearString hourCycle(cx);
     if (!GetStringOption(cx, options, cx->names().hourCycle, &hourCycle)) {
       return false;
     }
 
+    // Steps 20-21.
     if (hourCycle) {
       if (!StringEqualsAscii(hourCycle, "h11") &&
           !StringEqualsAscii(hourCycle, "h12") &&
@@ -616,14 +600,19 @@ static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
         }
         return false;
       }
+
+      if (!keywords.emplaceBack("hc", hourCycle)) {
+        return false;
+      }
     }
 
-    // Steps 22-23.
+    // Step 22 (without validation).
     RootedLinearString caseFirst(cx);
     if (!GetStringOption(cx, options, cx->names().caseFirst, &caseFirst)) {
       return false;
     }
 
+    // Steps 22-23.
     if (caseFirst) {
       if (!StringEqualsAscii(caseFirst, "upper") &&
           !StringEqualsAscii(caseFirst, "lower") &&
@@ -635,22 +624,33 @@ static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
         }
         return false;
       }
+
+      if (!keywords.emplaceBack("kf", caseFirst)) {
+        return false;
+      }
     }
 
-    // Steps 24-26.
+    // Steps 24-25.
     RootedLinearString numeric(cx);
     if (!GetBooleanOption(cx, options, cx->names().numeric, &numeric)) {
       return false;
     }
 
-    // Steps 27, 29.
+    // Step 26.
+    if (numeric) {
+      if (!keywords.emplaceBack("kn", numeric)) {
+        return false;
+      }
+    }
+
+    // Step 27.
     RootedLinearString numberingSystem(cx);
     if (!GetStringOption(cx, options, cx->names().numberingSystem,
                          &numberingSystem)) {
       return false;
     }
 
-    // Step 28.
+    // Steps 28-29.
     if (numberingSystem) {
       if (!IsValidUnicodeExtensionValue(numberingSystem)) {
         if (UniqueChars str = StringToNewUTF8CharsZ(cx, *numberingSystem)) {
@@ -660,19 +660,21 @@ static bool Locale(JSContext* cx, unsigned argc, Value* vp) {
         }
         return false;
       }
+
+      if (!keywords.emplaceBack("nu", numberingSystem)) {
+        return false;
+      }
     }
 
     // Step 30.
-    if (!ApplyUnicodeExtensionToTag(cx, tag, calendar, collation, hourCycle,
-                                    caseFirst, numeric, numberingSystem)) {
+    if (!ApplyUnicodeExtensionToTag(cx, tag, keywords)) {
       return false;
     }
   }
 
   // ApplyOptionsToTag, steps 9 and 13.
-  // ApplyUnicodeExtensionToTag, step 8.
-  if (!tag.canonicalizeExtensions(
-          cx, LanguageTag::UnicodeExtensionCanonicalForm::Yes)) {
+  // ApplyUnicodeExtensionToTag, step 9.
+  if (!tag.canonicalizeExtensions(cx)) {
     return false;
   }
 
@@ -954,10 +956,7 @@ static bool Locale_toString(JSContext* cx, unsigned argc, Value* vp) {
 static bool Locale_baseName(JSContext* cx, const CallArgs& args) {
   MOZ_ASSERT(IsLocale(args.thisv()));
 
-  // FIXME: spec bug - invalid assertion in step 4.
-  // FIXME: spec bug - subtag production names not updated.
-
-  // Steps 3, 5.
+  // Steps 3-4.
   auto* locale = &args.thisv().toObject().as<LocaleObject>();
   args.rval().setString(locale->baseName());
   return true;
@@ -986,6 +985,22 @@ static bool Locale_calendar(JSContext* cx, unsigned argc, Value* vp) {
   return CallNonGenericMethod<IsLocale, Locale_calendar>(cx, args);
 }
 
+// get Intl.Locale.prototype.caseFirst
+static bool Locale_caseFirst(JSContext* cx, const CallArgs& args) {
+  MOZ_ASSERT(IsLocale(args.thisv()));
+
+  // Step 3.
+  auto* locale = &args.thisv().toObject().as<LocaleObject>();
+  return GetUnicodeExtension(cx, locale, "kf", args.rval());
+}
+
+// get Intl.Locale.prototype.caseFirst
+static bool Locale_caseFirst(JSContext* cx, unsigned argc, Value* vp) {
+  // Steps 1-2.
+  CallArgs args = CallArgsFromVp(argc, vp);
+  return CallNonGenericMethod<IsLocale, Locale_caseFirst>(cx, args);
+}
+
 // get Intl.Locale.prototype.collation
 static bool Locale_collation(JSContext* cx, const CallArgs& args) {
   MOZ_ASSERT(IsLocale(args.thisv()));
@@ -1018,22 +1033,6 @@ static bool Locale_hourCycle(JSContext* cx, unsigned argc, Value* vp) {
   return CallNonGenericMethod<IsLocale, Locale_hourCycle>(cx, args);
 }
 
-// get Intl.Locale.prototype.caseFirst
-static bool Locale_caseFirst(JSContext* cx, const CallArgs& args) {
-  MOZ_ASSERT(IsLocale(args.thisv()));
-
-  // Step 3.
-  auto* locale = &args.thisv().toObject().as<LocaleObject>();
-  return GetUnicodeExtension(cx, locale, "kf", args.rval());
-}
-
-// get Intl.Locale.prototype.caseFirst
-static bool Locale_caseFirst(JSContext* cx, unsigned argc, Value* vp) {
-  // Steps 1-2.
-  CallArgs args = CallArgsFromVp(argc, vp);
-  return CallNonGenericMethod<IsLocale, Locale_caseFirst>(cx, args);
-}
-
 // get Intl.Locale.prototype.numeric
 static bool Locale_numeric(JSContext* cx, const CallArgs& args) {
   MOZ_ASSERT(IsLocale(args.thisv()));
@@ -1045,8 +1044,13 @@ static bool Locale_numeric(JSContext* cx, const CallArgs& args) {
     return false;
   }
 
-  // FIXME: spec bug - comparison should be against the empty string, too.
+  // Compare against the empty string per Intl.Locale, step 36.a. The Unicode
+  // extension is already canonicalized, so we don't need to compare against
+  // "true" at this point.
   MOZ_ASSERT(value.isUndefined() || value.isString());
+  MOZ_ASSERT_IF(value.isString(),
+                !StringEqualsAscii(&value.toString()->asLinear(), "true"));
+
   args.rval().setBoolean(value.isString() && value.toString()->empty());
   return true;
 }
@@ -1093,7 +1097,6 @@ static bool Locale_language(JSContext* cx, const CallArgs& args) {
   size_t length = language.length;
 
   // Step 5.
-  // FIXME: spec bug - not all production names updated.
   JSString* str = NewDependentString(cx, baseName, index, length);
   if (!str) {
     return false;
@@ -1126,7 +1129,6 @@ static bool Locale_script(JSContext* cx, const CallArgs& args) {
   auto script = BaseNameParts(baseName).script;
 
   // Step 5.
-  // FIXME: spec bug - not all production names updated.
   if (!script) {
     args.rval().setUndefined();
     return true;
@@ -1208,9 +1210,9 @@ static const JSFunctionSpec locale_methods[] = {
 static const JSPropertySpec locale_properties[] = {
     JS_PSG("baseName", Locale_baseName, 0),
     JS_PSG("calendar", Locale_calendar, 0),
+    JS_PSG("caseFirst", Locale_caseFirst, 0),
     JS_PSG("collation", Locale_collation, 0),
     JS_PSG("hourCycle", Locale_hourCycle, 0),
-    JS_PSG("caseFirst", Locale_caseFirst, 0),
     JS_PSG("numeric", Locale_numeric, 0),
     JS_PSG("numberingSystem", Locale_numberingSystem, 0),
     JS_PSG("language", Locale_language, 0),
@@ -1301,7 +1303,7 @@ bool js::intl_ValidateAndCanonicalizeLanguageTag(JSContext* cx, unsigned argc,
     return false;
   }
 
-  if (!tag.canonicalize(cx, LanguageTag::UnicodeExtensionCanonicalForm::No)) {
+  if (!tag.canonicalize(cx)) {
     return false;
   }
 
@@ -1334,7 +1336,7 @@ bool js::intl_TryValidateAndCanonicalizeLanguageTag(JSContext* cx,
     return true;
   }
 
-  if (!tag.canonicalize(cx, LanguageTag::UnicodeExtensionCanonicalForm::No)) {
+  if (!tag.canonicalize(cx)) {
     return false;
   }
 
@@ -1345,3 +1347,85 @@ bool js::intl_TryValidateAndCanonicalizeLanguageTag(JSContext* cx,
   args.rval().setString(resultStr);
   return true;
 }
+
+bool js::intl_ValidateAndCanonicalizeUnicodeExtensionType(JSContext* cx,
+                                                          unsigned argc,
+                                                          Value* vp) {
+  CallArgs args = CallArgsFromVp(argc, vp);
+  MOZ_ASSERT(args.length() == 3);
+
+  HandleValue typeArg = args[0];
+  MOZ_ASSERT(typeArg.isString(), "type must be a string");
+
+  HandleValue optionArg = args[1];
+  MOZ_ASSERT(optionArg.isString(), "option name must be a string");
+
+  HandleValue keyArg = args[2];
+  MOZ_ASSERT(keyArg.isString(), "key must be a string");
+
+  RootedLinearString unicodeType(cx, typeArg.toString()->ensureLinear(cx));
+  if (!unicodeType) {
+    return false;
+  }
+
+  if (!IsValidUnicodeExtensionValue(unicodeType)) {
+    JSAutoByteString optionStr(cx, optionArg.toString());
+    if (!optionStr) {
+      return false;
+    }
+
+    JSAutoByteString unicodeTypeQuot(cx, QuoteString(cx, unicodeType, '"'));
+    if (!unicodeTypeQuot) {
+      return false;
+    }
+
+    JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr,
+                              JSMSG_INVALID_OPTION_VALUE, optionStr.ptr(),
+                              unicodeTypeQuot.ptr());
+    return false;
+  }
+
+  char unicodeKey[UnicodeKeyLength];
+  {
+    JSLinearString* str = keyArg.toString()->ensureLinear(cx);
+    if (!str) {
+      return false;
+    }
+    MOZ_ASSERT(str->length() == UnicodeKeyLength);
+
+    for (size_t i = 0; i < UnicodeKeyLength; i++) {
+      char16_t ch = str->latin1OrTwoByteChar(i);
+      MOZ_ASSERT(mozilla::IsAscii(ch));
+      unicodeKey[i] = char(ch);
+    }
+  }
+
+  JSAutoByteString unicodeTypeChars(cx, unicodeType);
+  if (!unicodeTypeChars) {
+    return false;
+  }
+
+  size_t unicodeTypeLength = unicodeType->length();
+  MOZ_ASSERT(strlen(unicodeTypeChars.ptr()) == unicodeTypeLength);
+
+  // Convert into canonical case before searching for replacements.
+  intl::AsciiToLowerCase(unicodeTypeChars.ptr(), unicodeTypeLength,
+                         unicodeTypeChars.ptr());
+
+  auto key = mozilla::MakeSpan(unicodeKey, UnicodeKeyLength);
+  auto type = mozilla::MakeSpan(unicodeTypeChars.ptr(), unicodeTypeLength);
+
+  // Search if there's a replacement for the current Unicode keyword.
+  JSString* result;
+  if (const char* replacement = LanguageTag::replaceUnicodeExtensionType(key, type)) {
+    result = NewStringCopyZ<CanGC>(cx, replacement);
+  } else {
+    result = StringToLowerCase(cx, unicodeType);
+  }
+  if (!result) {
+    return false;
+  }
+
+  args.rval().setString(result);
+  return true;
+}
diff --git a/js/src/builtin/intl/Locale.h b/js/src/builtin/intl/Locale.h
index 31b3caca5c..74ff4b5a71 100644
--- a/js/src/builtin/intl/Locale.h
+++ b/js/src/builtin/intl/Locale.h
@@ -56,6 +56,9 @@ extern MOZ_MUST_USE bool intl_ValidateAndCanonicalizeLanguageTag(JSContext* cx,
 extern MOZ_MUST_USE bool intl_TryValidateAndCanonicalizeLanguageTag(
     JSContext* cx, unsigned argc, Value* vp);
 
+extern MOZ_MUST_USE bool intl_ValidateAndCanonicalizeUnicodeExtensionType(
+    JSContext* cx, unsigned argc, Value* vp);
+
 }  // namespace js
 
 #endif /* builtin_intl_Locale_h */
diff --git a/js/src/builtin/intl/NumberFormat.cpp b/js/src/builtin/intl/NumberFormat.cpp
index df40e751c8..9ee3b02109 100644
--- a/js/src/builtin/intl/NumberFormat.cpp
+++ b/js/src/builtin/intl/NumberFormat.cpp
@@ -18,6 +18,7 @@
 
 #include "builtin/intl/CommonFunctions.h"
 #include "builtin/intl/ICUHeader.h"
+#include "builtin/intl/LanguageTag.h"
 #include "builtin/intl/ScopedICUObject.h"
 #include "ds/Sort.h"
 #include "js/RootingAPI.h"
@@ -246,7 +247,41 @@ NewUNumberFormat(JSContext* cx, Handle<NumberFormatObject*> numberFormat)
 
     if (!GetProperty(cx, internals, internals, cx->names().locale, &value))
         return nullptr;
-    JSAutoByteString locale(cx, value.toString());
+
+    // ICU expects numberingSystem as a Unicode locale extensions on locale.
+
+    intl::LanguageTag tag(cx);
+    {
+        JSLinearString* locale = value.toString()->ensureLinear(cx);
+        if (!locale)
+            return nullptr;
+
+        if (!intl::LanguageTagParser::parse(cx, locale, tag))
+            return nullptr;
+    }
+
+    JS::RootedVector<intl::UnicodeExtensionKeyword> keywords(cx);
+
+    if (!GetProperty(cx, internals, internals, cx->names().numberingSystem, &value))
+        return nullptr;
+
+    {
+        JSLinearString* numberingSystem = value.toString()->ensureLinear(cx);
+        if (!numberingSystem)
+            return nullptr;
+
+        if (!keywords.emplaceBack("nu", numberingSystem))
+            return nullptr;
+    }
+
+    // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of
+    // the Unicode extension subtag. We're then relying on ICU to follow RFC
+    // 6067, which states that any trailing keywords using the same key
+    // should be ignored.
+    if (!intl::ApplyUnicodeExtensionToTag(cx, tag, keywords))
+        return nullptr;
+
+    UniqueChars locale = tag.toStringZ(cx);
     if (!locale)
         return nullptr;
 
@@ -264,9 +299,6 @@ NewUNumberFormat(JSContext* cx, Handle<NumberFormatObject*> numberFormat)
     RootedString currency(cx);
     AutoStableStringChars stableChars(cx);
 
-    // We don't need to look at numberingSystem - it can only be set via
-    // the Unicode locale extension and is therefore already set on locale.
-
     if (!GetProperty(cx, internals, internals, cx->names().style, &value))
         return nullptr;
     JSAutoByteString style(cx, value.toString());
@@ -339,7 +371,7 @@ NewUNumberFormat(JSContext* cx, Handle<NumberFormatObject*> numberFormat)
     uUseGrouping = value.toBoolean();
 
     UErrorCode status = U_ZERO_ERROR;
-    UNumberFormat* nf = unum_open(uStyle, nullptr, 0, IcuLocale(locale.ptr()), nullptr, &status);
+    UNumberFormat* nf = unum_open(uStyle, nullptr, 0, IcuLocale(locale.get()), nullptr, &status);
     if (U_FAILURE(status)) {
         intl::ReportInternalError(cx);
         return nullptr;
diff --git a/js/src/builtin/intl/NumberFormat.js b/js/src/builtin/intl/NumberFormat.js
index 973abd026a..238a59405b 100644
--- a/js/src/builtin/intl/NumberFormat.js
+++ b/js/src/builtin/intl/NumberFormat.js
@@ -211,6 +211,8 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
     //     opt: // opt object computed in InitializeNumberFormat
     //       {
     //         localeMatcher: "lookup" / "best fit",
+    //
+    //         nu: string matching a Unicode extension type, // optional
     //       }
     //
     //     minimumIntegerDigits: integer ∈ [1, 21],
@@ -253,6 +255,16 @@ function InitializeNumberFormat(numberFormat, thisValue, locales, options) {
     // Steps 5-6.
     var matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit");
     opt.localeMatcher = matcher;
+    
+    var numberingSystem = GetOption(options, "numberingSystem", "string", undefined, undefined);
+
+    if (numberingSystem !== undefined) {
+        numberingSystem = intl_ValidateAndCanonicalizeUnicodeExtensionType(numberingSystem,
+                                                                           "numberingSystem",
+                                                                           "nu");
+    }
+
+    opt.nu = numberingSystem;
 
     // Compute formatting options.
     // Step 12.
diff --git a/js/src/builtin/intl/make_intl_data.py b/js/src/builtin/intl/make_intl_data.py
index 0370d422d9..59ff14d76c 100644
--- a/js/src/builtin/intl/make_intl_data.py
+++ b/js/src/builtin/intl/make_intl_data.py
@@ -331,6 +331,96 @@ void js::intl::LanguageTag::performComplexRegionMappings() {
 """.strip("\n"))
 
 
+def writeVariantTagMappings(println, variant_mappings, description, source,
+                            url):
+    """ Writes a function definition that maps variant subtags. """
+    println(u"""
+static const char* ToCharPointer(const char* str) {
+  return str;
+}
+
+static const char* ToCharPointer(const js::UniqueChars& str) {
+  return str.get();
+}
+
+template <typename T, typename U = T>
+static bool IsLessThan(const T& a, const U& b) {
+  return strcmp(ToCharPointer(a), ToCharPointer(b)) < 0;
+}
+""")
+    writeMappingHeader(println, description, source, url)
+    println(u"""
+bool js::intl::LanguageTag::performVariantMappings(JSContext* cx) {
+  // The variant subtags need to be sorted for binary search.
+  MOZ_ASSERT(std::is_sorted(variants_.begin(), variants_.end(),
+                            IsLessThan<decltype(variants_)::ElementType>));
+
+  auto insertVariantSortedIfNotPresent = [&](const char* variant) {
+    auto* p = std::lower_bound(variants_.begin(), variants_.end(), variant,
+                               IsLessThan<decltype(variants_)::ElementType,
+                                          decltype(variant)>);
+
+    // Don't insert the replacement when already present.
+    if (p != variants_.end() && strcmp(p->get(), variant) == 0) {
+      return true;
+    }
+
+    // Insert the preferred variant in sort order.
+    auto preferred = DuplicateString(cx, variant);
+    if (!preferred) {
+      return false;
+    }
+    return !!variants_.insert(p, std::move(preferred));
+  };
+
+  for (size_t i = 0; i < variants_.length(); ) {
+    auto& variant = variants_[i];
+    MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeCStringSpan(variant.get())));
+""".lstrip())
+
+    first_variant = True
+
+    for (deprecated_variant, (type, replacement)) in (
+        sorted(variant_mappings.items(), key=itemgetter(0))
+    ):
+        if_kind = u"if" if first_variant else u"else if"
+        first_variant = False
+
+        println(u"""
+    {} (strcmp(variant.get(), "{}") == 0) {{
+      variants_.erase(variants_.begin() + i);
+""".format(if_kind, deprecated_variant).strip("\n"))
+
+        if type == "language":
+            println(u"""
+      setLanguage("{}");
+""".format(replacement).strip("\n"))
+        elif type == "region":
+            println(u"""
+      setRegion("{}");
+""".format(replacement).strip("\n"))
+        else:
+            assert type == "variant"
+            println(u"""
+      if (!insertVariantSortedIfNotPresent("{}")) {{
+        return false;
+      }}
+""".format(replacement).strip("\n"))
+
+        println(u"""
+    }
+""".strip("\n"))
+
+    println(u"""
+    else {
+      i++;
+    }
+  }
+  return true;
+}
+""".strip("\n"))
+
+
 def writeGrandfatheredMappingsFunction(println, grandfathered_mappings,
                                        description, source, url):
     """ Writes a function definition that maps grandfathered language tags. """
@@ -498,6 +588,7 @@ def readSupplementalData(core_file):
         - complexLanguageMappings: mappings from language subtags with complex rules
         - regionMappings: mappings from region subtags to preferred subtags
         - complexRegionMappings: mappings from region subtags with complex rules
+        - variantMappings: mappings from variant subtags to preferred subtags
         - likelySubtags: likely subtags used for generating test data only
         Returns these mappings as dictionaries.
     """
@@ -541,6 +632,14 @@ def readSupplementalData(core_file):
         $
         """, re.IGNORECASE | re.VERBOSE)
 
+    re_unicode_variant_subtag = re.compile(
+        r"""
+        ^
+        # unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3})
+        ([a-z0-9]{5,8}|(?:[0-9][a-z0-9]{3}))
+        $
+        """, re.IGNORECASE | re.VERBOSE)
+
     # The fixed list of BCP 47 grandfathered language tags.
     grandfathered_tags = (
         "art-lojban",
@@ -589,6 +688,11 @@ def readSupplementalData(core_file):
     # replacement, e.g. "SU" -> ("RU", ["AM",complex_region_mappings[type] = replacements "AZ", "BY", ...]).
     complex_region_mappings = {}
 
+    # Dictionary of aliased variant subtags to a tuple of preferred replacement
+    # type and replacement, e.g. "arevela" -> ("language", "hy") or
+    # "aaland" -> ("region", "AX") or "heploc" -> ("variant", "alalc97").
+    variant_mappings = {}
+
     # Dictionary of grandfathered mappings to preferred values.
     grandfathered_mappings = {}
 
@@ -624,6 +728,8 @@ def readSupplementalData(core_file):
         if re_unicode_language_subtag.match(type) is None:
             continue
 
+        assert type.islower()
+
         if re_unicode_language_subtag.match(replacement) is not None:
             # Canonical case for language subtags is lower-case.
             language_mappings[type] = replacement.lower()
@@ -647,6 +753,8 @@ def readSupplementalData(core_file):
         if re_unicode_region_subtag.match(type) is None:
             continue
 
+        assert type.isupper() or type.isdigit()
+
         if re_unicode_region_subtag.match(replacement) is not None:
             # Canonical case for region subtags is upper-case.
             region_mappings[type] = replacement.upper()
@@ -658,6 +766,33 @@ def readSupplementalData(core_file):
             ), "{} invalid region subtags".format(replacement)
             complex_region_mappings[type] = replacements
 
+    for variant_alias in tree.iterfind(".//variantAlias"):
+        type = variant_alias.get("type")
+        replacement = variant_alias.get("replacement")
+
+        assert re_unicode_variant_subtag.match(type) is not None, (
+               "{} invalid variant subtag".format(type))
+
+        # Normalize the case, because some variants are in upper case.
+        type = type.lower()
+
+        # The replacement can be a language, a region, or a variant subtag.
+        # Language and region subtags are case normalized, variant subtags can
+        # be in any case.
+
+        if re_unicode_language_subtag.match(replacement) is not None and replacement.islower():
+            variant_mappings[type] = ("language", replacement)
+
+        elif re_unicode_region_subtag.match(replacement) is not None:
+            assert replacement.isupper() or replacement.isdigit(), (
+                   "{} invalid variant subtag replacement".format(replacement))
+            variant_mappings[type] = ("region", replacement)
+
+        else:
+            assert re_unicode_variant_subtag.match(replacement) is not None, (
+                   "{} invalid variant subtag replacement".format(replacement))
+            variant_mappings[type] = ("variant", replacement.lower())
+
     tree = ET.parse(core_file.open("common/supplemental/likelySubtags.xml"))
 
     likely_subtags = {}
@@ -724,6 +859,7 @@ def readSupplementalData(core_file):
             "complexLanguageMappings": complex_language_mappings,
             "regionMappings": region_mappings,
             "complexRegionMappings": complex_region_mappings_final,
+            "variantMappings": variant_mappings,
             "likelySubtags": likely_subtags,
             }
 
@@ -740,14 +876,20 @@ def readUnicodeExtensions(core_file):
 
     # Mapping from Unicode extension types to dict of deprecated to
     # preferred values.
-    mapping = {}
+    mapping = {
+        # Unicode BCP 47 U Extension
+        "u": {},
+
+        # Unicode BCP 47 T Extension
+        "t": {},
+    }
 
     def readBCP47File(file):
         tree = ET.parse(file)
         for keyword in tree.iterfind(".//keyword/key"):
-            # Skip over keywords whose extension is not "u".
-            if keyword.get("extension", "u") != "u":
-                continue
+            extension = keyword.get("extension", "u")
+            assert extension == "u" or extension == "t", (
+                   "unknown extension type: {}".format(extension))
 
             extension_name = keyword.get("name")
 
@@ -806,7 +948,7 @@ def readUnicodeExtensions(core_file):
 
                 if preferred is not None:
                     assert typeRE.match(preferred), preferred
-                    mapping.setdefault(extension_name, {})[name] = preferred
+                    mapping[extension].setdefault(extension_name, {})[name] = preferred
 
                 if alias is not None:
                     for alias_name in alias.lower().split(" "):
@@ -816,7 +958,7 @@ def readUnicodeExtensions(core_file):
 
                         # See comment above when 'alias' and 'preferred' are both present.
                         if (preferred is not None and
-                            name in mapping[extension_name]):
+                            name in mapping[extension][extension_name]):
                             continue
 
                         # Skip over entries where 'name' and 'alias' are equal.
@@ -828,7 +970,7 @@ def readUnicodeExtensions(core_file):
                         if name == alias_name:
                             continue
 
-                        mapping.setdefault(extension_name, {})[alias_name] = name
+                        mapping[extension].setdefault(extension_name, {})[alias_name] = name
 
     def readSupplementalMetadata(file):
         # Find subdivision and region replacements.
@@ -857,8 +999,8 @@ def readUnicodeExtensions(core_file):
                 continue
 
             # 'subdivisionAlias' applies to 'rg' and 'sd' keys.
-            mapping.setdefault("rg", {})[type] = replacement
-            mapping.setdefault("sd", {})[type] = replacement
+            mapping["u"].setdefault("rg", {})[type] = replacement
+            mapping["u"].setdefault("sd", {})[type] = replacement
 
     for name in core_file.namelist():
         if bcpFileRE.match(name):
@@ -866,7 +1008,10 @@ def readUnicodeExtensions(core_file):
 
     readSupplementalMetadata(core_file.open("common/supplemental/supplementalMetadata.xml"))
 
-    return mapping
+    return {
+        "unicodeMappings": mapping["u"],
+        "transformMappings": mapping["t"],
+    }
 
 def writeCLDRLanguageTagData(println, data, url):
     """ Writes the language tag data to the Intl data file. """
@@ -884,6 +1029,7 @@ def writeCLDRLanguageTagData(println, data, url):
 #include <cstdint>
 #include <cstring>
 #include <iterator>
+#include <string>
 #include <type_traits>
 
 #include "jscntxt.h"
@@ -927,6 +1073,14 @@ static inline const char* SearchReplacement(
 }
 
 #ifdef DEBUG
+static bool IsAsciiLowercaseAlphanumeric(char c) {
+  return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
+}
+
+static bool IsAsciiLowercaseAlphanumericOrDash(char c) {
+  return IsAsciiLowercaseAlphanumeric(c) || c == '-';
+}
+
 static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) {
   // Tell the analysis the |std::all_of| function can't GC.
   JS::AutoSuppressGCAnalysis nogc;
@@ -943,14 +1097,26 @@ static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) {
 }
 
 static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) {
-  auto isAsciiLowercaseAlphaOrDigit = [](char c) {
-    return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
-  };
-
   // Tell the analysis the |std::all_of| function can't GC.
   JS::AutoSuppressGCAnalysis nogc;
 
-  return std::all_of(span.begin(), span.end(), isAsciiLowercaseAlphaOrDigit);
+  return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric);
+}
+
+static bool IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key) {
+  return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric);
+}
+
+static bool IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type) {
+  return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash);
+}
+
+static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) {
+  return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric);
+}
+
+static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) {
+  return std::all_of(type.begin(), type.end(), IsAsciiLowercaseAlphanumericOrDash);
 }
 #endif
 """.rstrip())
@@ -961,7 +1127,9 @@ static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) {
     complex_language_mappings = data["complexLanguageMappings"]
     region_mappings = data["regionMappings"]
     complex_region_mappings = data["complexRegionMappings"]
+    variant_mappings = data["variantMappings"]
     unicode_mappings = data["unicodeMappings"]
+    transform_mappings = data["transformMappings"]
 
     # unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
     language_maxlength = 8
@@ -999,11 +1167,15 @@ static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) {
     writeComplexRegionTagMappings(println, complex_region_mappings,
                                   "Region subtags with complex mappings.", source, url)
 
+    writeVariantTagMappings(println, variant_mappings,
+                            "Mappings from variant subtags to preferred values.", source, url)
+
     writeGrandfatheredMappingsFunction(println, grandfathered_mappings,
                                        "Canonicalize grandfathered locale identifiers.", source,
                                        url)
 
-    writeUnicodeExtensionsMappings(println, unicode_mappings)
+    writeUnicodeExtensionsMappings(println, unicode_mappings, "Unicode")
+    writeUnicodeExtensionsMappings(println, transform_mappings, "Transform")
 
 
 def writeCLDRLanguageTagLikelySubtagsTest(println, data, url):
@@ -1157,7 +1329,7 @@ def updateCLDRLangTags(args):
     def readFiles(cldr_file):
         with ZipFile(cldr_file) as zip_file:
             data.update(readSupplementalData(zip_file))
-            data["unicodeMappings"] = readUnicodeExtensions(zip_file)
+            data.update(readUnicodeExtensions(zip_file))
 
     print("Processing CLDR data...")
     if filename is not None:
@@ -1181,8 +1353,7 @@ def updateCLDRLangTags(args):
     with io.open(test_file, mode="w", encoding="utf-8", newline="") as f:
         println = partial(print, file=f)
 
-        println(u"// |reftest| skip-if(!this.hasOwnProperty('Intl')||"
-                u"(!this.Intl.Locale&&!this.hasOwnProperty('addIntlExtras')))")
+        println(u"// |reftest| skip-if(!this.hasOwnProperty('Intl'))")
         writeCLDRLanguageTagLikelySubtagsTest(println, data, url)
 
 
@@ -1898,91 +2069,84 @@ def updateTzdata(topsrcdir, args):
     else:
         updateFrom(tzDir)
 
-def writeUnicodeExtensionsMappings(println, mapping):
+def writeUnicodeExtensionsMappings(println, mapping, extension):
     println(u"""
 template <size_t Length>
-static inline bool IsUnicodeKey(mozilla::Span<const char> key,
-                                const char (&str)[Length]) {
-  static_assert(Length == UnicodeKeyLength + 1,
-                "Unicode extension key is two characters long");
+static inline bool Is{0}Key(
+  mozilla::Span<const char> key, const char (&str)[Length]) {{
+  static_assert(Length == {0}KeyLength + 1,
+                "{0} extension key is two characters long");
   return memcmp(key.data(), str, Length - 1) == 0;
-}
+}}
 
 template <size_t Length>
-static inline bool IsUnicodeType(mozilla::Span<const char> type,
-                                 const char (&str)[Length]) {
-  static_assert(Length > UnicodeKeyLength + 1,
-                "Unicode extension type contains more than two characters");
+static inline bool Is{0}Type(
+  mozilla::Span<const char> type, const char (&str)[Length]) {{
+  static_assert(Length > {0}KeyLength + 1,
+                "{0} extension type contains more than two characters");
   return type.size() == (Length - 1) &&
          memcmp(type.data(), str, Length - 1) == 0;
-}
+}}
+""".format(extension).rstrip("\n"))
 
-static int32_t CompareUnicodeType(const char* a, mozilla::Span<const char> b) {
-#ifdef DEBUG
-  auto isNull = [](char c) {
-    return c == '\\0';
-  };
-#endif
+    linear_search_max_length = 4
+
+    needs_binary_search = any(len(replacements.items()) > linear_search_max_length
+                              for replacements in mapping.values())
 
-  MOZ_ASSERT(std::none_of(b.begin(), b.end(), isNull),
+    if needs_binary_search:
+        println(u"""
+static int32_t Compare{0}Type(const char* a, mozilla::Span<const char> b) {{
+  MOZ_ASSERT(!std::char_traits<char>::find(b.data(), b.size(), '\\0'),
              "unexpected null-character in string");
 
   using UnsignedChar = unsigned char;
-  for (size_t i = 0; i < b.size(); i++) {
+  for (size_t i = 0; i < b.size(); i++) {{
     // |a| is zero-terminated and |b| doesn't contain a null-terminator. So if
     // we've reached the end of |a|, the below if-statement will always be true.
     // That ensures we don't read past the end of |a|.
-    if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {
+    if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) {{
       return r;
-    }
-  }
+    }}
+  }}
 
   // Return zero if both strings are equal or a negative number if |b| is a
   // prefix of |a|.
   return -int32_t(UnsignedChar(a[b.size()]));
-};
+}}
 
 template <size_t Length>
-static inline const char* SearchReplacement(const char* (&types)[Length],
-                                            const char* (&aliases)[Length],
-                                            mozilla::Span<const char> type) {
+static inline const char* Search{0}Replacement(
+  const char* (&types)[Length], const char* (&aliases)[Length],
+  mozilla::Span<const char> type) {{
 
   auto p = std::lower_bound(std::begin(types), std::end(types), type,
-                            [](const auto& a, const auto& b) {
-    return CompareUnicodeType(a, b) < 0;
-  });
-  if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) {
+                            [](const auto& a, const auto& b) {{
+    return Compare{0}Type(a, b) < 0;
+  }});
+  if (p != std::end(types) && Compare{0}Type(*p, type) == 0) {{
     return aliases[std::distance(std::begin(types), p)];
-  }
+  }}
   return nullptr;
-}
+}}
+""".format(extension).rstrip("\n"))
 
+    println(u"""
 /**
- * Mapping from deprecated BCP 47 Unicode extension types to their preferred
+ * Mapping from deprecated BCP 47 {0} extension types to their preferred
  * values.
  *
  * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
+ * Spec: https://www.unicode.org/reports/tr35/#t_Extension
  */
-const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
-    mozilla::Span<const char> key, mozilla::Span<const char> type) {
-#ifdef DEBUG
-  static auto isAsciiLowercaseAlphanumeric = [](char c) {
-    return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
-  };
+const char* js::intl::LanguageTag::replace{0}ExtensionType(
+    mozilla::Span<const char> key, mozilla::Span<const char> type) {{
+  MOZ_ASSERT(key.size() == {0}KeyLength);
+  MOZ_ASSERT(IsCanonicallyCased{0}Key(key));
 
-  static auto isAsciiLowercaseAlphanumericOrDash = [](char c) {
-    return isAsciiLowercaseAlphanumeric(c) || c == '-';
-  };
-#endif
-
-  MOZ_ASSERT(key.size() == UnicodeKeyLength);
-  MOZ_ASSERT(std::all_of(key.begin(), key.end(),
-                         isAsciiLowercaseAlphanumeric));
-
-  MOZ_ASSERT(type.size() > UnicodeKeyLength);
-  MOZ_ASSERT(std::all_of(type.begin(), type.end(),
-                         isAsciiLowercaseAlphanumericOrDash));
-""")
+  MOZ_ASSERT(type.size() > {0}KeyLength);
+  MOZ_ASSERT(IsCanonicallyCased{0}Type(type));
+""".format(extension))
 
     def to_hash_key(replacements):
         return str(sorted(replacements.items()))
@@ -2014,7 +2178,8 @@ const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
         if key in key_aliases[hash_key]:
             continue
 
-        cond = (u"IsUnicodeKey(key, \"{}\")".format(k) for k in [key] + key_aliases[hash_key])
+        cond = (u"Is{}Key(key, \"{}\")".format(extension, k)
+                for k in [key] + key_aliases[hash_key])
 
         if_kind = u"if" if first_key else u"else if"
         cond = (u" ||\n" + u" " * (2 + len(if_kind) + 2)).join(cond)
@@ -2024,7 +2189,7 @@ const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
 
         replacements = sorted(replacements.items(), key=itemgetter(0))
 
-        if len(replacements) > 4:
+        if len(replacements) > linear_search_max_length:
             types = [t for (t, _) in replacements]
             preferred = [r for (_, r) in replacements]
             max_len = max(len(k) for k in types + preferred)
@@ -2032,14 +2197,14 @@ const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
             write_array(types, "types", max_len)
             write_array(preferred, "aliases", max_len)
             println(u"""
-    return SearchReplacement(types, aliases, type);
-""".strip("\n"))
+    return Search{}Replacement(types, aliases, type);
+""".format(extension).strip("\n"))
         else:
             for (type, replacement) in replacements:
                 println(u"""
-    if (IsUnicodeType(type, "{}")) {{
+    if (Is{}Type(type, "{}")) {{
       return "{}";
-    }}""".format(type, replacement).strip("\n"))
+    }}""".format(extension, type, replacement).strip("\n"))
 
         println(u"""
   }""".lstrip("\n"))
diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp
index ef007a69db..6446cbb4be 100644
--- a/js/src/vm/SelfHosting.cpp
+++ b/js/src/vm/SelfHosting.cpp
@@ -2487,6 +2487,7 @@ static const JSFunctionSpec intrinsic_functions[] = {
     JS_FN("intl_toLocaleUpperCase", intl_toLocaleUpperCase, 2,0),
     JS_FN("intl_ValidateAndCanonicalizeLanguageTag", intl_ValidateAndCanonicalizeLanguageTag, 2, 0),
     JS_FN("intl_TryValidateAndCanonicalizeLanguageTag", intl_TryValidateAndCanonicalizeLanguageTag, 1, 0),
+    JS_FN("intl_ValidateAndCanonicalizeUnicodeExtensionType", intl_ValidateAndCanonicalizeUnicodeExtensionType, 3, 0),
     JS_FN("intl_FormatRelativeTime", intl_FormatRelativeTime, 3,0),
 
     JS_INLINABLE_FN("IsCollator",
author	Martok <martok@martoks-place.de>	2023-06-29 23:09:26 +0200
committer	Martok <martok@martoks-place.de>	2023-06-30 00:01:35 +0200
commit	af47a256b5cf2b81e4c3bf8f36682f8b9f31be42 (patch)
tree	af1b472d545dcd80afa9de5e468912f39cf8ee12 /js/src
parent	e96f965422528636e13adc3473679248941540e7 (diff)
download	uxp-af47a256b5cf2b81e4c3bf8f36682f8b9f31be42.tar.gz