summaryrefslogtreecommitdiff
path: root/js
diff options
context:
space:
mode:
authorMartok <martok@martoks-place.de>2023-06-18 15:05:33 +0200
committerMartok <martok@martoks-place.de>2023-06-30 00:01:35 +0200
commite96f965422528636e13adc3473679248941540e7 (patch)
treea6bd9d0f9a34add576553833f527d76224b157ad /js
parent7c3aa6a8b63d7d1ba2a5ae96ea065379634f3de1 (diff)
downloaduxp-e96f965422528636e13adc3473679248941540e7.tar.gz
Issue #2259 - Performance improvements for LanguageTag parsing
- parsing: dont normalise things that don't need to be normalised anymore: extension, private-use, variant, language, script, and region subtags - Add missing() and present() methods to LanguageSubtag - Change mozilla::Range to mozilla::Span for slightly better code Based-on: m-c 1592588
Diffstat (limited to 'js')
-rw-r--r--js/src/builtin/intl/IntlObject.cpp5
-rw-r--r--js/src/builtin/intl/LanguageTag.cpp544
-rw-r--r--js/src/builtin/intl/LanguageTag.h201
-rw-r--r--js/src/builtin/intl/LanguageTagGenerated.cpp109
-rw-r--r--js/src/builtin/intl/Locale.cpp44
-rw-r--r--js/src/builtin/intl/SharedIntlData.cpp4
-rw-r--r--js/src/builtin/intl/make_intl_data.py100
-rw-r--r--js/src/js.msg1
8 files changed, 505 insertions, 503 deletions
diff --git a/js/src/builtin/intl/IntlObject.cpp b/js/src/builtin/intl/IntlObject.cpp
index 9caa2709a8..e0dd36dac4 100644
--- a/js/src/builtin/intl/IntlObject.cpp
+++ b/js/src/builtin/intl/IntlObject.cpp
@@ -545,10 +545,7 @@ js::intl_BestAvailableLocale(JSContext* cx, unsigned argc, Value* vp)
JS_TRY_VAR_OR_RETURN_FALSE(cx, ok, intl::LanguageTagParser::tryParse(cx, locale, tag));
MOZ_ASSERT(ok, "locale is a structurally valid language tag");
- auto isUnicodeExtension = [](const auto& extension) {
- return extension[0] == 'u';
- };
- MOZ_ASSERT(std::none_of(tag.extensions().begin(), tag.extensions().end(), isUnicodeExtension),
+ MOZ_ASSERT(!tag.unicodeExtension(),
"locale must contain no Unicode extensions");
if (!tag.canonicalize(cx, intl::LanguageTag::UnicodeExtensionCanonicalForm::No)) {
diff --git a/js/src/builtin/intl/LanguageTag.cpp b/js/src/builtin/intl/LanguageTag.cpp
index e675e19686..583033f629 100644
--- a/js/src/builtin/intl/LanguageTag.cpp
+++ b/js/src/builtin/intl/LanguageTag.cpp
@@ -8,7 +8,7 @@
#include "mozilla/Assertions.h"
#include "mozilla/MathAlgorithms.h"
-#include "mozilla/Range.h"
+#include "mozilla/Span.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Variant.h"
@@ -40,102 +40,93 @@ namespace intl {
using namespace js::intl::LanguageTagLimits;
-using ConstCharRange = mozilla::Range<const char>;
-
-#ifdef DEBUG
template <typename CharT>
-bool IsStructurallyValidLanguageTag(
- const mozilla::Range<const CharT>& language) {
+bool IsStructurallyValidLanguageTag(mozilla::Span<const CharT> language) {
// Tell the analysis the |std::all_of| function can't GC.
JS::AutoSuppressGCAnalysis nogc;
// unicode_language_subtag = alpha{2,3} | alpha{5,8};
- size_t length = language.length();
- const CharT* str = language.begin().get();
+ size_t length = language.size();
+ const CharT* str = language.data();
return ((2 <= length && length <= 3) || (5 <= length && length <= 8)) &&
- std::all_of(str, str + length, mozilla::IsAsciiLowercaseAlpha<CharT>);
+ std::all_of(str, str + length, mozilla::IsAsciiAlpha<CharT>);
}
template bool IsStructurallyValidLanguageTag(
- const mozilla::Range<const Latin1Char>& language);
+ mozilla::Span<const char> language);
+template bool IsStructurallyValidLanguageTag(
+ mozilla::Span<const Latin1Char> language);
template bool IsStructurallyValidLanguageTag(
- const mozilla::Range<const char16_t>& language);
+ mozilla::Span<const char16_t> language);
template <typename CharT>
-bool IsStructurallyValidScriptTag(const mozilla::Range<const CharT>& script) {
+bool IsStructurallyValidScriptTag(mozilla::Span<const CharT> script) {
// Tell the analysis the |std::all_of| function can't GC.
JS::AutoSuppressGCAnalysis nogc;
// unicode_script_subtag = alpha{4} ;
- size_t length = script.length();
- const CharT* str = script.begin().get();
- return length == 4 && mozilla::IsAsciiUppercaseAlpha<CharT>(str[0]) &&
- std::all_of(str + 1, str + length,
- mozilla::IsAsciiLowercaseAlpha<CharT>);
+ size_t length = script.size();
+ const CharT* str = script.data();
+ return length == 4 &&
+ std::all_of(str, str + length, mozilla::IsAsciiAlpha<CharT>);
}
template bool IsStructurallyValidScriptTag(
- const mozilla::Range<const Latin1Char>& script);
+ mozilla::Span<const char> script);
template bool IsStructurallyValidScriptTag(
- const mozilla::Range<const char16_t>& script);
+ mozilla::Span<const Latin1Char> script);
+template bool IsStructurallyValidScriptTag(
+ mozilla::Span<const char16_t> script);
template <typename CharT>
-bool IsStructurallyValidRegionTag(const mozilla::Range<const CharT>& region) {
+bool IsStructurallyValidRegionTag(mozilla::Span<const CharT> region) {
// Tell the analysis the |std::all_of| function can't GC.
JS::AutoSuppressGCAnalysis nogc;
// unicode_region_subtag = (alpha{2} | digit{3}) ;
- size_t length = region.length();
- const CharT* str = region.begin().get();
- return (length == 2 && std::all_of(str, str + length,
- mozilla::IsAsciiUppercaseAlpha<CharT>)) ||
+ size_t length = region.size();
+ const CharT* str = region.data();
+ return (length == 2 &&
+ std::all_of(str, str + length, mozilla::IsAsciiAlpha<CharT>)) ||
(length == 3 &&
std::all_of(str, str + length, mozilla::IsAsciiDigit<CharT>));
}
template bool IsStructurallyValidRegionTag(
- const mozilla::Range<const Latin1Char>& region);
+ mozilla::Span<const char> region);
+template bool IsStructurallyValidRegionTag(
+ mozilla::Span<const Latin1Char> region);
template bool IsStructurallyValidRegionTag(
- const mozilla::Range<const char16_t>& region);
+ mozilla::Span<const char16_t> region);
-bool IsStructurallyValidVariantTag(const ConstCharRange& variant) {
+#ifdef DEBUG
+bool IsStructurallyValidVariantTag(mozilla::Span<const char> variant) {
// unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
- auto isAsciiLowercaseAlphanumeric = [](char c) {
- return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
- };
- size_t length = variant.length();
- const char* str = variant.begin().get();
+ size_t length = variant.size();
+ const char* str = variant.data();
return ((5 <= length && length <= 8) ||
(length == 4 && mozilla::IsAsciiDigit(str[0]))) &&
- std::all_of(str, str + length, isAsciiLowercaseAlphanumeric);
+ std::all_of(str, str + length, mozilla::IsAsciiAlphanumeric<char>);
}
-bool IsStructurallyValidUnicodeExtensionTag(const ConstCharRange& extension) {
- auto isAsciiLowercaseAlphanumericOrDash = [](char c) {
- return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c) ||
- c == '-';
- };
-
- size_t length = extension.length();
- const char* str = extension.begin().get();
- return LanguageTagParser::canParseUnicodeExtension(extension) &&
- std::all_of(str, str + length, isAsciiLowercaseAlphanumericOrDash);
+bool IsStructurallyValidUnicodeExtensionTag(
+ mozilla::Span<const char> extension) {
+ return LanguageTagParser::canParseUnicodeExtension(extension);
}
-static bool IsStructurallyValidExtensionTag(const ConstCharRange& extension) {
+static bool IsStructurallyValidExtensionTag(
+ mozilla::Span<const char> extension) {
// other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
// NB: Allow any extension, including Unicode and Transform here, because
// this function is only used for an assertion.
- auto isAsciiDigitOrLowercaseAlpha = [](char c) {
- return mozilla::IsAsciiDigit(c) || mozilla::IsAsciiLowercaseAlpha(c);
- };
- size_t length = extension.length();
- const char* str = extension.begin().get();
+ size_t length = extension.size();
+ const char* str = extension.data();
+ const char* const end = extension.data() + length;
if (length <= 2) {
return false;
}
- if (!isAsciiDigitOrLowercaseAlpha(str[0]) || str[0] == 'x') {
+ if (!mozilla::IsAsciiAlphanumeric(str[0]) || str[0] == 'x' || str[0] == 'X') {
return false;
}
str++;
@@ -143,11 +134,11 @@ static bool IsStructurallyValidExtensionTag(const ConstCharRange& extension) {
return false;
}
while (true) {
- const char* sep = reinterpret_cast<const char*>(
- memchr(str, '-', extension.end().get() - str));
- size_t len = (sep ? sep : extension.end().get()) - str;
+ const char* sep =
+ reinterpret_cast<const char*>(memchr(str, '-', end - str));
+ size_t len = (sep ? sep : end) - str;
if (len < 2 || len > 8 ||
- !std::all_of(str, str + len, isAsciiDigitOrLowercaseAlpha)) {
+ !std::all_of(str, str + len, mozilla::IsAsciiAlphanumeric<char>)) {
return false;
}
if (!sep) {
@@ -157,23 +148,28 @@ static bool IsStructurallyValidExtensionTag(const ConstCharRange& extension) {
}
}
-bool IsStructurallyValidPrivateUseTag(const ConstCharRange& privateUse) {
+bool IsStructurallyValidPrivateUseTag(mozilla::Span<const char> privateUse) {
// pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
- auto isAsciiDigitOrLowercaseAlpha = [](char c) {
- return mozilla::IsAsciiDigit(c) || mozilla::IsAsciiLowercaseAlpha(c);
- };
- size_t length = privateUse.length();
- const char* str = privateUse.begin().get();
- if (length <= 2 || *str++ != 'x' || *str++ != '-') {
+ size_t length = privateUse.size();
+ const char* str = privateUse.data();
+ const char* const end = privateUse.data() + length;
+ if (length <= 2) {
+ return false;
+ }
+ if (str[0] != 'x' && str[0] != 'X') {
+ return false;
+ }
+ str++;
+ if (*str++ != '-') {
return false;
}
while (true) {
- const char* sep = reinterpret_cast<const char*>(
- memchr(str, '-', privateUse.end().get() - str));
- size_t len = (sep ? sep : privateUse.end().get()) - str;
+ const char* sep =
+ reinterpret_cast<const char*>(memchr(str, '-', end - str));
+ size_t len = (sep ? sep : end) - str;
if (len == 0 || len > 8 ||
- !std::all_of(str, str + len, isAsciiDigitOrLowercaseAlpha)) {
+ !std::all_of(str, str + len, mozilla::IsAsciiAlphanumeric<char>)) {
return false;
}
if (!sep) {
@@ -184,15 +180,33 @@ bool IsStructurallyValidPrivateUseTag(const ConstCharRange& privateUse) {
}
#endif
+ptrdiff_t LanguageTag::unicodeExtensionIndex() const {
+ // The extension subtags aren't necessarily sorted, so we can't use binary
+ // search here.
+ auto p = std::find_if(
+ extensions().begin(), extensions().end(),
+ [](const auto& ext) { return ext[0] == 'u' || ext[0] == 'U'; });
+ if (p != extensions().end()) {
+ return std::distance(extensions().begin(), p);
+ }
+ return -1;
+}
+
+const char* LanguageTag::unicodeExtension() const {
+ ptrdiff_t index = unicodeExtensionIndex();
+ if (index >= 0) {
+ return extensions()[index].get();
+ }
+ return nullptr;
+}
+
bool LanguageTag::setUnicodeExtension(UniqueChars extension) {
MOZ_ASSERT(IsStructurallyValidUnicodeExtensionTag(
- {extension.get(), strlen(extension.get())}));
+ mozilla::MakeCStringSpan(extension.get())));
// Replace the existing Unicode extension subtag or append a new one.
- auto p = std::find_if(extensions().begin(), extensions().end(),
- [](const auto& ext) { return ext[0] == 'u'; });
- if (p != extensions().end()) {
- size_t index = std::distance(extensions().begin(), p);
+ ptrdiff_t index = unicodeExtensionIndex();
+ if (index >= 0) {
extensions_[index] = std::move(extension);
return true;
}
@@ -200,10 +214,8 @@ bool LanguageTag::setUnicodeExtension(UniqueChars extension) {
}
void LanguageTag::clearUnicodeExtension() {
- auto p = std::find_if(extensions().begin(), extensions().end(),
- [](const auto& ext) { return ext[0] == 'u'; });
- if (p != extensions().end()) {
- size_t index = std::distance(extensions().begin(), p);
+ ptrdiff_t index = unicodeExtensionIndex();
+ if (index >= 0) {
extensions_.erase(extensions_.begin() + index);
}
}
@@ -252,35 +264,53 @@ bool LanguageTag::canonicalizeBaseName(JSContext* cx) {
// normalizing the case and ordering all subtags. The canonical syntax form
// itself is specified in UTS 35, 3.2.1.
- // The |LanguageTag| fields are already in normalized case, so we can skip
- // this step.
- MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
- MOZ_ASSERT(script().length() == 0 ||
- IsStructurallyValidScriptTag(script().range()));
- MOZ_ASSERT(region().length() == 0 ||
- IsStructurallyValidRegionTag(region().range()));
-#ifdef DEBUG
- auto validVariant = [](const auto& variant) {
- const char* str = variant.get();
- return IsStructurallyValidVariantTag({str, strlen(str)});
- };
- MOZ_ASSERT(std::all_of(variants().begin(), variants().end(), validVariant));
+ // Language codes need to be in lower case. "JA" -> "ja"
+ language_.toLowerCase();
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span()));
- auto validExtension = [](const auto& extension) {
- const char* str = extension.get();
- return IsStructurallyValidExtensionTag({str, strlen(str)});
- };
- MOZ_ASSERT(
- std::all_of(extensions().begin(), extensions().end(), validExtension));
-#endif
- MOZ_ASSERT(!privateuse() || IsStructurallyValidPrivateUseTag(
- {privateuse(), strlen(privateuse())}));
+ // The first character of a script code needs to be capitalized.
+ // "hans" -> "Hans"
+ script_.toTitleCase();
+ MOZ_ASSERT(script().missing() ||
+ IsStructurallyValidScriptTag(script().span()));
+
+ // Region codes need to be in upper case. "bu" -> "BU"
+ region_.toUpperCase();
+ MOZ_ASSERT(region().missing() ||
+ IsStructurallyValidRegionTag(region().span()));
+
+ // The canonical case for variant subtags is lowercase.
+ for (UniqueChars& variant : variants_) {
+ char* variantChars = variant.get();
+ size_t variantLength = strlen(variantChars);
+ AsciiToLowerCase(variantChars, variantLength, variantChars);
+
+ MOZ_ASSERT(IsStructurallyValidVariantTag({variantChars, variantLength}));
+ }
+
+ // Extensions and privateuse subtags are case normalized in the
+ // |canonicalizeExtensions| method.
// The second step in UTS 35, 3.2.1, is to order all subtags.
- // 1. Any variants are in alphabetical order.
- if (!SortAlphabetically(cx, variants_)) {
- return false;
+ if (variants_.length() > 1) {
+ // 1. Any variants are in alphabetical order.
+ if (!SortAlphabetically(cx, variants_)) {
+ return false;
+ }
+
+ // Reject the Locale identifier if a duplicate variant was found, e.g.
+ // "en-variant-Variant".
+ const UniqueChars* duplicate = std::adjacent_find(
+ variants().begin(), variants().end(), [](const auto& a, const auto& b) {
+ return strcmp(a.get(), b.get()) == 0;
+ });
+ if (duplicate != variants().end()) {
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
+ JSMSG_DUPLICATE_VARIANT_SUBTAG,
+ duplicate->get());
+ return false;
+ }
}
// 2. Any extensions are in alphabetical order by their singleton.
@@ -301,7 +331,7 @@ bool LanguageTag::canonicalizeBaseName(JSContext* cx) {
// No script replacements are currently present.
// Replace deprecated region subtags with their preferred values.
- if (region().length() > 0) {
+ if (region().present()) {
if (!regionMapping(region_) && complexRegionMapping(region_)) {
performComplexRegionMappings();
}
@@ -320,6 +350,16 @@ bool LanguageTag::canonicalizeBaseName(JSContext* cx) {
bool LanguageTag::canonicalizeExtensions(
JSContext* cx, UnicodeExtensionCanonicalForm canonicalForm) {
+ // The canonical case for all extension subtags is lowercase.
+ for (UniqueChars& extension : extensions_) {
+ char* extensionChars = extension.get();
+ size_t extensionLength = strlen(extensionChars);
+ AsciiToLowerCase(extensionChars, extensionLength, extensionChars);
+
+ MOZ_ASSERT(
+ IsStructurallyValidExtensionTag({extensionChars, extensionLength}));
+ }
+
// Any extensions are in alphabetical order by their singleton.
// "u-ca-chinese-t-zh-latn" -> "t-zh-latn-u-ca-chinese"
if (!SortAlphabetically(cx, extensions_)) {
@@ -337,6 +377,15 @@ bool LanguageTag::canonicalizeExtensions(
}
}
}
+
+ // The canonical case for privateuse subtags is lowercase.
+ if (char* privateuse = privateuse_.get()) {
+ size_t privateuseLength = strlen(privateuse);
+ AsciiToLowerCase(privateuse, privateuseLength, privateuse);
+
+ MOZ_ASSERT(
+ IsStructurallyValidPrivateUseTag({privateuse, privateuseLength}));
+ }
return true;
}
@@ -362,7 +411,8 @@ bool LanguageTag::canonicalizeUnicodeExtension(
const char* const extension = unicodeExtension.get();
MOZ_ASSERT(extension[0] == 'u');
MOZ_ASSERT(extension[1] == '-');
- MOZ_ASSERT(IsStructurallyValidExtensionTag({extension, strlen(extension)}));
+ MOZ_ASSERT(
+ IsStructurallyValidExtensionTag(mozilla::MakeCStringSpan(extension)));
size_t length = strlen(extension);
@@ -376,7 +426,7 @@ bool LanguageTag::canonicalizeUnicodeExtension(
JS_TRY_VAR_OR_RETURN_FALSE(
cx, ok,
LanguageTagParser::parseUnicodeExtension(
- cx, ConstCharRange(extension, length), attributes, keywords));
+ cx, mozilla::MakeSpan(extension, length), attributes, keywords));
MOZ_ASSERT(ok, "unexpected invalid Unicode extension subtag");
auto attributesLessOrEqual = [extension](const Attribute& a,
@@ -475,12 +525,13 @@ bool LanguageTag::canonicalizeUnicodeExtension(
static constexpr size_t UnicodeKeyWithSepLength = UnicodeKeyLength + 1;
- static auto isTrue = [](const ConstCharRange& type) {
+ using StringSpan = mozilla::Span<const char>;
+
+ static auto isTrue = [](StringSpan type) {
constexpr char True[] = "true";
const size_t TrueLength = strlen(True);
- return type.length() == TrueLength &&
- std::char_traits<char>::compare(type.begin().get(), True,
- TrueLength) == 0;
+ return type.size() == TrueLength &&
+ std::char_traits<char>::compare(type.data(), True, TrueLength) == 0;
};
auto appendKey = [&sb, extension](const Keyword& keyword) {
@@ -489,7 +540,7 @@ bool LanguageTag::canonicalizeUnicodeExtension(
};
auto appendKeyword = [&sb, extension](const Keyword& keyword,
- const ConstCharRange& type) {
+ StringSpan type) {
MOZ_ASSERT(keyword.length() > UnicodeKeyLength);
// Elide the Unicode extension type "true".
@@ -501,7 +552,7 @@ bool LanguageTag::canonicalizeUnicodeExtension(
};
auto appendReplacement = [&sb, extension](const Keyword& keyword,
- const ConstCharRange& replacement) {
+ StringSpan replacement) {
MOZ_ASSERT(keyword.length() > UnicodeKeyLength);
// Elide the type "true" if present in the replacement.
@@ -511,7 +562,7 @@ bool LanguageTag::canonicalizeUnicodeExtension(
// Otherwise append the Unicode key (including the separator) and the
// replaced type.
return sb.append(keyword.begin(extension), UnicodeKeyWithSepLength) &&
- sb.append(replacement.begin().get(), replacement.length());
+ sb.append(replacement.data(), replacement.size());
};
// Append all Unicode extension keywords.
@@ -539,15 +590,15 @@ bool LanguageTag::canonicalizeUnicodeExtension(
return false;
}
} else {
- ConstCharRange key(keyword.begin(extension), UnicodeKeyLength);
- ConstCharRange type(keyword.begin(extension) + UnicodeKeyWithSepLength,
- keyword.length() - UnicodeKeyWithSepLength);
+ StringSpan key(keyword.begin(extension), UnicodeKeyLength);
+ StringSpan type(keyword.begin(extension) + UnicodeKeyWithSepLength,
+ keyword.length() - UnicodeKeyWithSepLength);
if (canonicalForm == UnicodeExtensionCanonicalForm::Yes) {
// Search if there's a replacement for the current Unicode keyword.
if (const char* replacement = replaceUnicodeExtensionType(key, type)) {
- if (!appendReplacement(
- keyword, ConstCharRange(replacement, strlen(replacement)))) {
+ if (!appendReplacement(keyword,
+ mozilla::MakeCStringSpan(replacement))) {
return false;
}
} else {
@@ -584,9 +635,9 @@ template <class Buffer>
static bool LanguageTagToString(JSContext* cx, const LanguageTag& tag,
Buffer& sb) {
auto appendSubtag = [&sb](const auto& subtag) {
- auto range = subtag.range();
- MOZ_ASSERT(range.length() > 0);
- return sb.append(range.begin().get(), range.length());
+ auto span = subtag.span();
+ MOZ_ASSERT(span.size() > 0);
+ return sb.append(span.data(), span.size());
};
auto appendSubtagZ = [&sb](const char* subtag) {
@@ -609,14 +660,14 @@ static bool LanguageTagToString(JSContext* cx, const LanguageTag& tag,
}
// Append the script subtag if present.
- if (tag.script().length() > 0) {
+ if (tag.script().present()) {
if (!sb.append('-') || !appendSubtag(tag.script())) {
return false;
}
}
// Append the region subtag if present.
- if (tag.region().length() > 0) {
+ if (tag.region().present()) {
if (!sb.append('-') || !appendSubtag(tag.region())) {
return false;
}
@@ -661,7 +712,8 @@ bool LanguageTag::canonicalizeTransformExtension(
const char* const extension = transformExtension.get();
MOZ_ASSERT(extension[0] == 't');
MOZ_ASSERT(extension[1] == '-');
- MOZ_ASSERT(IsStructurallyValidExtensionTag({extension, strlen(extension)}));
+ MOZ_ASSERT(
+ IsStructurallyValidExtensionTag(mozilla::MakeCStringSpan(extension)));
size_t length = strlen(extension);
@@ -674,7 +726,7 @@ bool LanguageTag::canonicalizeTransformExtension(
JS_TRY_VAR_OR_RETURN_FALSE(
cx, ok,
LanguageTagParser::parseTransformExtension(
- cx, ConstCharRange(extension, length), tag, fields));
+ cx, mozilla::MakeSpan(extension, length), tag, fields));
MOZ_ASSERT(ok, "unexpected invalid transform extension subtag");
auto tfieldLessOrEqual = [extension](const TField& a, const TField& b) {
@@ -720,7 +772,7 @@ bool LanguageTag::canonicalizeTransformExtension(
// [1] https://unicode.org/reports/tr35/#Language_Tag_to_Locale_Identifier
// [2] https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers
// [3] https://github.com/tc39/ecma402/issues/330
- if (tag.language().length() > 0) {
+ if (tag.language().present()) {
if (!sb.append('-')) {
return false;
}
@@ -786,14 +838,14 @@ static bool HasLikelySubtags(LikelySubtags likelySubtags,
// used.
if (likelySubtags == LikelySubtags::Add) {
return !tag.language().equalTo("und") &&
- (tag.script().length() > 0 && !tag.script().equalTo("Zzzz")) &&
- (tag.region().length() > 0 && !tag.region().equalTo("ZZ"));
+ (tag.script().present() && !tag.script().equalTo("Zzzz")) &&
+ (tag.region().present() && !tag.region().equalTo("ZZ"));
}
// The language tag is already minimized if it only contains a language
// subtag whose value is not the placeholder value "und".
- return !tag.language().equalTo("und") && tag.script().length() == 0 &&
- tag.region().length() == 0;
+ return !tag.language().equalTo("und") && tag.script().missing() &&
+ tag.region().missing();
}
// Create an ICU locale ID from the given language tag.
@@ -802,9 +854,9 @@ static bool CreateLocaleForLikelySubtags(const LanguageTag& tag,
MOZ_ASSERT(locale.length() == 0);
auto appendSubtag = [&locale](const auto& subtag) {
- auto range = subtag.range();
- MOZ_ASSERT(range.length() > 0);
- return locale.append(range.begin().get(), range.length());
+ auto span = subtag.span();
+ MOZ_ASSERT(span.size() > 0);
+ return locale.append(span.data(), span.size());
};
// Append the language subtag.
@@ -813,14 +865,14 @@ static bool CreateLocaleForLikelySubtags(const LanguageTag& tag,
}
// Append the script subtag if present.
- if (tag.script().length() > 0) {
+ if (tag.script().present()) {
if (!locale.append('_') || !appendSubtag(tag.script())) {
return false;
}
}
// Append the region subtag if present.
- if (tag.region().length() > 0) {
+ if (tag.region().present()) {
if (!locale.append('_') || !appendSubtag(tag.region())) {
return false;
}
@@ -857,12 +909,12 @@ static bool AssignFromLocaleId(JSContext* cx, LocaleId& localeId,
memmove(localeId.begin(), und, length);
}
- ConstCharRange localeRange(localeId.begin(), localeId.length() - 1);
+ mozilla::Span<const char> localeSpan(localeId.begin(), localeId.length() - 1);
// Retrieve the language, script, and region subtags from the locale ID, but
// ignore any other subtags.
LanguageTag localeTag(cx);
- if (!LanguageTagParser::parseBaseName(cx, localeRange, localeTag)) {
+ if (!LanguageTagParser::parseBaseName(cx, localeSpan, localeTag)) {
return false;
}
@@ -1025,18 +1077,6 @@ UniqueChars LanguageTagParser::chars(JSContext* cx, size_t index,
return chars;
}
-UniqueChars LanguageTagParser::extension(JSContext* cx, const Token& start,
- const Token& end) const {
- MOZ_ASSERT(start.index() < end.index());
-
- size_t length = end.index() - 1 - start.index();
- UniqueChars extension = chars(cx, start.index(), length);
- if (extension) {
- AsciiToLowerCase(extension.get(), length, extension.get());
- }
- return extension;
-}
-
// Parse the `unicode_language_id` production.
//
// unicode_language_id = unicode_language_subtag
@@ -1051,55 +1091,22 @@ UniqueChars LanguageTagParser::extension(JSContext* cx, const Token& start,
//
// |tok| is the current token from |ts|.
//
-// The trailing |parseType| argument corresponds to one of two modes.
-//
-// In the |BaseNameParsing::Normal| mode, our input is in unknown case and is
-// potentially invalid. |tag| will be filled with canonically-cased output, and
-// duplicate variants will lead to an error.
-//
-// In the |BaseNameParsing::WithinTransformExtension| mode, our input is the
-// `tlang` in a lowercased `transform_extensions`. |tag| subtags will be
-// directly copied from the input (i.e. in lowercase). Variant subtags in the
-// `tlang` subtag may contain duplicates.
+// All subtags will be added unaltered to |tag|, without canonicalizing their
+// case or, in the case of variant subtags, detecting and rejecting duplicate
+// variants. Users must subsequently |canonicalizeBaseName| to perform these
+// actions.
//
// Do not use this function directly: use |parseBaseName| or
// |parseTlangFromTransformExtension| instead.
-JS::Result<bool> LanguageTagParser::internalParseBaseName(
- JSContext* cx, LanguageTagParser& ts, LanguageTag& tag, Token& tok,
- BaseNameParsing parseType) {
-#ifdef DEBUG
- auto isAsciiLowerCase = [](const auto& range) {
- // Tell the analysis the |std::all_of| function can't GC.
- JS::AutoSuppressGCAnalysis nogc;
-
- const char* ptr = range.begin().get();
- size_t length = range.length();
- return std::all_of(ptr, ptr + length, mozilla::IsAsciiLowercaseAlpha<char>);
- };
- auto isAsciiDigit = [](const auto& range) {
- // Tell the analysis the |std::all_of| function can't GC.
- JS::AutoSuppressGCAnalysis nogc;
-
- const char* ptr = range.begin().get();
- size_t length = range.length();
- return std::all_of(ptr, ptr + length, mozilla::IsAsciiDigit<char>);
- };
-#endif
-
+JS::Result<bool> LanguageTagParser::internalParseBaseName(JSContext* cx,
+ LanguageTagParser& ts,
+ LanguageTag& tag,
+ Token& tok) {
if (ts.isLanguage(tok)) {
ts.copyChars(tok, tag.language_);
- // Language codes need to be in lower case. "JA" -> "ja"
- if (parseType == BaseNameParsing::Normal) {
- tag.language_.toLowerCase();
- } else {
- MOZ_ASSERT(isAsciiLowerCase(tag.language_.range()));
- }
-
tok = ts.nextToken();
} else {
- MOZ_ASSERT(parseType == BaseNameParsing::Normal);
-
// The language subtag is mandatory.
return false;
}
@@ -1107,28 +1114,12 @@ JS::Result<bool> LanguageTagParser::internalParseBaseName(
if (ts.isScript(tok)) {
ts.copyChars(tok, tag.script_);
- // The first character of a script code needs to be capitalized.
- // "hans" -> "Hans"
- if (parseType == BaseNameParsing::Normal) {
- tag.script_.toTitleCase();
- } else {
- MOZ_ASSERT(isAsciiLowerCase(tag.script_.range()));
- }
-
tok = ts.nextToken();
}
if (ts.isRegion(tok)) {
ts.copyChars(tok, tag.region_);
- // Region codes need to be in upper case. "bu" -> "BU"
- if (parseType == BaseNameParsing::Normal) {
- tag.region_.toUpperCase();
- } else {
- MOZ_ASSERT_IF(tok.length() == 2, isAsciiLowerCase(tag.region_.range()));
- MOZ_ASSERT_IF(tok.length() == 3, isAsciiDigit(tag.region_.range()));
- }
-
tok = ts.nextToken();
}
@@ -1139,28 +1130,6 @@ JS::Result<bool> LanguageTagParser::internalParseBaseName(
if (!variant) {
return cx->alreadyReportedOOM();
}
-
- if (parseType == BaseNameParsing::Normal) {
- // Locale identifiers are case insensitive (UTS 35, section 3.2).
- // All seen variants are compared ignoring case differences by using the
- // lower case form. This allows to properly detect and reject variant
- // repetitions with differing case, e.g. "en-variant-Variant".
- AsciiToLowerCase(variant.get(), tok.length(), variant.get());
-
- // Reject the Locale identifier if a duplicate variant was found.
- //
- // This linear-time verification step means the whole variant subtag
- // checking is potentially quadratic. Language tags are unlikely to be
- // deliberately pathological, so this is okay at least for now.
- for (const auto& seenVariant : variants) {
- if (strcmp(variant.get(), seenVariant.get()) == 0) {
- return false;
- }
- }
- } else {
- // When parsing variants in a `tlang` subtag, duplicates are allowed.
- }
-
if (!variants.append(std::move(variant))) {
return cx->alreadyReportedOOM();
}
@@ -1332,10 +1301,11 @@ bool LanguageTagParser::parse(JSContext* cx, JSLinearString* locale,
return false;
}
-bool LanguageTagParser::parseBaseName(JSContext* cx, ConstCharRange locale,
+bool LanguageTagParser::parseBaseName(JSContext* cx,
+ mozilla::Span<const char> locale,
LanguageTag& tag) {
- LocaleChars localeChars = StringChars(locale.begin().get());
- LanguageTagParser ts(localeChars, locale.length());
+ LocaleChars localeChars = StringChars(locale.data());
+ LanguageTagParser ts(localeChars, locale.size());
Token tok = ts.nextToken();
// Parse only the base-name part and ignore any trailing characters.
@@ -1344,12 +1314,10 @@ bool LanguageTagParser::parseBaseName(JSContext* cx, ConstCharRange locale,
if (ok) {
return true;
}
- if (UniqueChars localeChars =
- DuplicateString(locale.begin().get(), locale.length())) {
+ if (UniqueChars localeChars = DuplicateString(cx, locale.data(),
+ locale.size())) {
JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr,
JSMSG_INVALID_LANGUAGE_TAG, localeChars.get());
- } else {
- JS_ReportOutOfMemory(cx);
}
return false;
}
@@ -1357,10 +1325,10 @@ bool LanguageTagParser::parseBaseName(JSContext* cx, ConstCharRange locale,
// Parse |extension|, which must be a valid `transformed_extensions` subtag, and
// fill |tag| and |fields| from the `tlang` and `tfield` components.
JS::Result<bool> LanguageTagParser::parseTransformExtension(
- JSContext* cx, ConstCharRange extension, LanguageTag& tag,
+ JSContext* cx, mozilla::Span<const char> extension, LanguageTag& tag,
TFieldVector& fields) {
- LocaleChars extensionChars = StringChars(extension.begin().get());
- LanguageTagParser ts(extensionChars, extension.length());
+ LocaleChars extensionChars = StringChars(extension.data());
+ LanguageTagParser ts(extensionChars, extension.size());
Token tok = ts.nextToken();
if (!ts.isExtensionStart(tok) || ts.singletonKey(tok) != 't') {
@@ -1417,10 +1385,10 @@ JS::Result<bool> LanguageTagParser::parseTransformExtension(
// and fill |attributes| and |keywords| from the `attribute` and `keyword`
// components.
JS::Result<bool> LanguageTagParser::parseUnicodeExtension(
- JSContext* cx, ConstCharRange extension, AttributesVector& attributes,
- KeywordsVector& keywords) {
- LocaleChars extensionChars = StringChars(extension.begin().get());
- LanguageTagParser ts(extensionChars, extension.length());
+ JSContext* cx, mozilla::Span<const char> extension,
+ AttributesVector& attributes, KeywordsVector& keywords) {
+ LocaleChars extensionChars = StringChars(extension.data());
+ LanguageTagParser ts(extensionChars, extension.size());
Token tok = ts.nextToken();
// unicode_locale_extensions = sep [uU] ((sep keyword)+ |
@@ -1467,9 +1435,10 @@ JS::Result<bool> LanguageTagParser::parseUnicodeExtension(
return tok.isNone();
}
-bool LanguageTagParser::canParseUnicodeExtension(ConstCharRange extension) {
- LocaleChars extensionChars = StringChars(extension.begin().get());
- LanguageTagParser ts(extensionChars, extension.length());
+bool LanguageTagParser::canParseUnicodeExtension(
+ mozilla::Span<const char> extension) {
+ LocaleChars extensionChars = StringChars(extension.data());
+ LanguageTagParser ts(extensionChars, extension.size());
Token tok = ts.nextToken();
// unicode_locale_extensions = sep [uU] ((sep keyword)+ |
@@ -1522,118 +1491,89 @@ bool LanguageTagParser::canParseUnicodeExtensionType(
return tok.isNone();
}
-bool ParseStandaloneLanguagTag(HandleLinearString str, LanguageSubtag& result) {
- auto isLanguage = [](const auto* language, size_t length) {
- // Tell the analysis the |std::all_of| function can't GC.
- JS::AutoSuppressGCAnalysis nogc;
-
- using T = std::remove_pointer_t<decltype(language)>;
- return length >= 2 && length != 4 && length <= 8 &&
- std::all_of(language, language + length, mozilla::IsAsciiAlpha<T>);
- };
-
+bool ParseStandaloneLanguageTag(HandleLinearString str,
+ LanguageSubtag& result) {
JS::AutoCheckCannotGC nogc;
if (str->hasLatin1Chars()) {
- if (!isLanguage(str->latin1Chars(nogc), str->length())) {
+ if (!IsStructurallyValidLanguageTag<Latin1Char>(str->latin1Range(nogc))) {
return false;
}
- result.set(str->latin1Range(nogc));
+ result.set<Latin1Char>(str->latin1Range(nogc));
} else {
- if (!isLanguage(str->twoByteChars(nogc), str->length())) {
+ if (!IsStructurallyValidLanguageTag<char16_t>(str->twoByteRange(nogc))) {
return false;
}
- result.set(str->twoByteRange(nogc));
+ result.set<char16_t>(str->twoByteRange(nogc));
}
- result.toLowerCase();
return true;
}
bool ParseStandaloneScriptTag(HandleLinearString str, ScriptSubtag& result) {
- auto isScript = [](const auto* script, size_t length) {
- // Tell the analysis the |std::all_of| function can't GC.
- JS::AutoSuppressGCAnalysis nogc;
-
- using T = std::remove_pointer_t<decltype(script)>;
- return length == ScriptLength &&
- std::all_of(script, script + ScriptLength, mozilla::IsAsciiAlpha<T>);
- };
-
JS::AutoCheckCannotGC nogc;
if (str->hasLatin1Chars()) {
- if (!isScript(str->latin1Chars(nogc), str->length())) {
+ if (!IsStructurallyValidScriptTag<Latin1Char>(str->latin1Range(nogc))) {
return false;
}
- result.set(str->latin1Range(nogc));
+ result.set<Latin1Char>(str->latin1Range(nogc));
} else {
- if (!isScript(str->twoByteChars(nogc), str->length())) {
+ if (!IsStructurallyValidScriptTag<char16_t>(str->twoByteRange(nogc))) {
return false;
}
- result.set(str->twoByteRange(nogc));
+ result.set<char16_t>(str->twoByteRange(nogc));
}
- result.toTitleCase();
return true;
}
bool ParseStandaloneRegionTag(HandleLinearString str, RegionSubtag& result) {
- auto isRegion = [](const auto* region, size_t length) {
- // Tell the analysis the |std::all_of| function can't GC.
- JS::AutoSuppressGCAnalysis nogc;
-
- using T = std::remove_pointer_t<decltype(region)>;
- return (length == AlphaRegionLength &&
- std::all_of(region, region + AlphaRegionLength,
- mozilla::IsAsciiAlpha<T>)) ||
- (length == DigitRegionLength &&
- std::all_of(region, region + DigitRegionLength,
- mozilla::IsAsciiDigit<T>));
- };
-
JS::AutoCheckCannotGC nogc;
if (str->hasLatin1Chars()) {
- if (!isRegion(str->latin1Chars(nogc), str->length())) {
+ if (!IsStructurallyValidRegionTag<Latin1Char>(str->latin1Range(nogc))) {
return false;
}
- result.set(str->latin1Range(nogc));
+ result.set<Latin1Char>(str->latin1Range(nogc));
} else {
- if (!isRegion(str->twoByteChars(nogc), str->length())) {
+ if (!IsStructurallyValidRegionTag<char16_t>(str->twoByteRange(nogc))) {
return false;
}
- result.set(str->twoByteRange(nogc));
+ result.set<char16_t>(str->twoByteRange(nogc));
}
- result.toUpperCase();
return true;
}
template <typename CharT>
-static bool IsAsciiLowercaseAlpha(const mozilla::Range<const CharT>& range) {
+static bool IsAsciiLowercaseAlpha(mozilla::Span<const CharT> span) {
// Tell the analysis the |std::all_of| function can't GC.
JS::AutoSuppressGCAnalysis nogc;
- const CharT* ptr = range.begin().get();
- size_t length = range.length();
+ const CharT* ptr = span.data();
+ size_t length = span.size();
return std::all_of(ptr, ptr + length, mozilla::IsAsciiLowercaseAlpha<CharT>);
}
static bool IsAsciiLowercaseAlpha(JSLinearString* str) {
JS::AutoCheckCannotGC nogc;
- return str->hasLatin1Chars() ? IsAsciiLowercaseAlpha(str->latin1Range(nogc))
- : IsAsciiLowercaseAlpha(str->twoByteRange(nogc));
+ if (str->hasLatin1Chars()) {
+ return IsAsciiLowercaseAlpha<Latin1Char>(str->latin1Range(nogc));
+ }
+ return IsAsciiLowercaseAlpha<char16_t>(str->twoByteRange(nogc));
}
template <typename CharT>
-static bool IsAsciiAlpha(const mozilla::Range<const CharT>& range) {
+static bool IsAsciiAlpha(mozilla::Span<const CharT> span) {
// Tell the analysis the |std::all_of| function can't GC.
JS::AutoSuppressGCAnalysis nogc;
- const CharT* ptr = range.begin().get();
- size_t length = range.length();
+ const CharT* ptr = span.data();
+ size_t length = span.size();
return std::all_of(ptr, ptr + length, mozilla::IsAsciiAlpha<CharT>);
}
static bool IsAsciiAlpha(JSLinearString* str) {
JS::AutoCheckCannotGC nogc;
- return str->hasLatin1Chars() ? IsAsciiAlpha(str->latin1Range(nogc))
- : IsAsciiAlpha(str->twoByteRange(nogc));
+ if (str->hasLatin1Chars()) {
+ return IsAsciiAlpha<Latin1Char>(str->latin1Range(nogc));
+ }
+ return IsAsciiAlpha<char16_t>(str->twoByteRange(nogc));
}
JS::Result<JSString*> ParseStandaloneISO639LanguageTag(JSContext* cx,
@@ -1656,10 +1596,10 @@ JS::Result<JSString*> ParseStandaloneISO639LanguageTag(JSContext* cx,
LanguageSubtag languageTag;
if (str->hasLatin1Chars()) {
JS::AutoCheckCannotGC nogc;
- languageTag.set(str->latin1Range(nogc));
+ languageTag.set<Latin1Char>(str->latin1Range(nogc));
} else {
JS::AutoCheckCannotGC nogc;
- languageTag.set(str->twoByteRange(nogc));
+ languageTag.set<char16_t>(str->twoByteRange(nogc));
}
if (!isLowerCase) {
@@ -1676,8 +1616,8 @@ JS::Result<JSString*> ParseStandaloneISO639LanguageTag(JSContext* cx,
// Take care to replace deprecated subtags with their preferred values.
JSString* result;
if (LanguageTag::languageMapping(languageTag) || !isLowerCase) {
- auto range = languageTag.range();
- result = NewStringCopyN<CanGC>(cx, range.begin().get(), range.length());
+ auto span = languageTag.span();
+ result = NewStringCopyN<CanGC>(cx, span.data(), span.size());
} else {
result = str;
}
diff --git a/js/src/builtin/intl/LanguageTag.h b/js/src/builtin/intl/LanguageTag.h
index 657458cecb..384ff4bb7a 100644
--- a/js/src/builtin/intl/LanguageTag.h
+++ b/js/src/builtin/intl/LanguageTag.h
@@ -10,7 +10,7 @@
#define builtin_intl_LanguageTag_h
#include "mozilla/Assertions.h"
-#include "mozilla/Range.h"
+#include "mozilla/Span.h"
#include "mozilla/TextUtils.h"
#include "mozilla/TypedEnumBits.h"
#include "mozilla/Variant.h"
@@ -36,57 +36,53 @@ namespace js {
namespace intl {
-#ifdef DEBUG
-
/**
- * Return true if |language| is a valid, case-normalized language subtag.
+ * Return true if |language| is a valid language subtag.
*/
template <typename CharT>
-bool IsStructurallyValidLanguageTag(
- const mozilla::Range<const CharT>& language);
+bool IsStructurallyValidLanguageTag(mozilla::Span<const CharT> language);
/**
- * Return true if |script| is a valid, case-normalized script subtag.
+ * Return true if |script| is a valid script subtag.
*/
template <typename CharT>
-bool IsStructurallyValidScriptTag(const mozilla::Range<const CharT>& script);
+bool IsStructurallyValidScriptTag(mozilla::Span<const CharT> script);
/**
- * Return true if |region| is a valid, case-normalized region subtag.
+ * Return true if |region| is a valid region subtag.
*/
template <typename CharT>
-bool IsStructurallyValidRegionTag(const mozilla::Range<const CharT>& region);
+bool IsStructurallyValidRegionTag(mozilla::Span<const CharT> region);
+#ifdef DEBUG
/**
- * Return true if |variant| is a valid, case-normalized variant subtag.
+ * Return true if |variant| is a valid variant subtag.
*/
-bool IsStructurallyValidVariantTag(const mozilla::Range<const char>& variant);
+bool IsStructurallyValidVariantTag(mozilla::Span<const char> variant);
/**
- * Return true if |extension| is a valid, case-normalized Unicode extension
- * subtag.
+ * Return true if |extension| is a valid Unicode extension subtag.
*/
bool IsStructurallyValidUnicodeExtensionTag(
- const mozilla::Range<const char>& extension);
+ mozilla::Span<const char> extension);
/**
- * Return true if |privateUse| is a valid, case-normalized private-use subtag.
+ * Return true if |privateUse| is a valid private-use subtag.
*/
-bool IsStructurallyValidPrivateUseTag(
- const mozilla::Range<const char>& privateUse);
+bool IsStructurallyValidPrivateUseTag(mozilla::Span<const char> privateUse);
#endif
template <typename CharT>
char AsciiToLowerCase(CharT c) {
MOZ_ASSERT(mozilla::IsAscii(c));
- return mozilla::IsAsciiUppercaseAlpha(c) ? (c | 0x20) : c;
+ return mozilla::IsAsciiUppercaseAlpha(c) ? (c + 0x20) : c;
}
template <typename CharT>
char AsciiToUpperCase(CharT c) {
MOZ_ASSERT(mozilla::IsAscii(c));
- return mozilla::IsAsciiLowercaseAlpha(c) ? (c & ~0x20) : c;
+ return mozilla::IsAsciiLowercaseAlpha(c) ? (c - 0x20) : c;
}
template <typename CharT>
@@ -141,7 +137,7 @@ static constexpr size_t TransformKeyLength = 2;
template <size_t Length>
class LanguageTagSubtag final {
uint8_t length_ = 0;
- char chars_[Length];
+ char chars_[Length] = {}; // zero initialize
public:
LanguageTagSubtag() = default;
@@ -150,21 +146,31 @@ class LanguageTagSubtag final {
LanguageTagSubtag& operator=(const LanguageTagSubtag&) = delete;
size_t length() const { return length_; }
+ bool missing() const { return length_ == 0; }
+ bool present() const { return length_ > 0; }
- mozilla::Range<const char> range() const { return {chars_, length_}; }
+ mozilla::Span<const char> span() const { return {chars_, length_}; }
template <typename CharT>
- void set(const mozilla::Range<const CharT>& str) {
- MOZ_ASSERT(str.length() <= Length);
- std::copy_n(str.begin().get(), str.length(), chars_);
- length_ = str.length();
+ void set(mozilla::Span<const CharT> str) {
+ MOZ_ASSERT(str.size() <= Length);
+ std::copy_n(str.data(), str.size(), chars_);
+ length_ = str.size();
}
- void toLowerCase() { AsciiToLowerCase(chars_, length(), chars_); }
+ // The toXYZCase() methods are using |Length| instead of |length()|, because
+ // current compilers (tested GCC and Clang) can't infer the maximum string
+ // length - even when using hints like |std::min| - and instead are emitting
+ // SIMD optimized code. Using a fixed sized length avoids emitting the SIMD
+ // code. (Emitting SIMD code doesn't make sense here, because the SIMD code
+ // only kicks in for long strings.) A fixed length will additionally ensure
+ // the compiler unrolls the loop in the case conversion code.
- void toUpperCase() { AsciiToUpperCase(chars_, length(), chars_); }
+ void toLowerCase() { AsciiToLowerCase(chars_, Length, chars_); }
- void toTitleCase() { AsciiToTitleCase(chars_, length(), chars_); }
+ void toUpperCase() { AsciiToUpperCase(chars_, Length, chars_); }
+
+ void toTitleCase() { AsciiToTitleCase(chars_, Length, chars_); }
template <size_t N>
bool equalTo(const char (&str)[N]) const {
@@ -224,8 +230,7 @@ class MOZ_STACK_CLASS LanguageTag final {
MOZ_MUST_USE bool updateGrandfatheredMappings(JSContext* cx);
static const char* replaceUnicodeExtensionType(
- const mozilla::Range<const char>& key,
- const mozilla::Range<const char>& type);
+ mozilla::Span<const char> key, mozilla::Span<const char> type);
public:
explicit LanguageTag(JSContext* cx) : variants_(cx), extensions_(cx) {}
@@ -241,65 +246,68 @@ class MOZ_STACK_CLASS LanguageTag final {
const char* privateuse() const { return privateuse_.get(); }
/**
- * Set the language subtag. The input must be a valid, case-normalized
- * language subtag.
+ * Return the Unicode extension subtag or nullptr if not present.
+ */
+ const char* unicodeExtension() const;
+
+ private:
+ ptrdiff_t unicodeExtensionIndex() const;
+
+ public:
+ /**
+ * Set the language subtag. The input must be a valid language subtag.
*/
template <size_t N>
void setLanguage(const char (&language)[N]) {
- mozilla::Range<const char> range(language, N - 1);
- MOZ_ASSERT(IsStructurallyValidLanguageTag(range));
- language_.set(range);
+ mozilla::Span<const char> span(language, N - 1);
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(span));
+ language_.set(span);
}
/**
- * Set the language subtag. The input must be a valid, case-normalized
- * language subtag.
+ * Set the language subtag. The input must be a valid language subtag.
*/
void setLanguage(const LanguageSubtag& language) {
- MOZ_ASSERT(IsStructurallyValidLanguageTag(language.range()));
- language_.set(language.range());
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span()));
+ language_.set(language.span());
}
/**
- * Set the script subtag. The input must be a valid, case-normalized
- * script subtag or the empty string.
+ * Set the script subtag. The input must be a valid script subtag.
*/
template <size_t N>
void setScript(const char (&script)[N]) {
- mozilla::Range<const char> range(script, N - 1);
- MOZ_ASSERT(IsStructurallyValidScriptTag(range));
- script_.set(range);
+ mozilla::Span<const char> span(script, N - 1);
+ MOZ_ASSERT(IsStructurallyValidScriptTag(span));
+ script_.set(span);
}
/**
- * Set the script subtag. The input must be a valid, case-normalized
- * script subtag or the empty string.
+ * Set the script subtag. The input must be a valid script subtag or the empty
+ * string.
*/
void setScript(const ScriptSubtag& script) {
- MOZ_ASSERT(script.length() == 0 ||
- IsStructurallyValidScriptTag(script.range()));
- script_.set(script.range());
+ MOZ_ASSERT(script.missing() || IsStructurallyValidScriptTag(script.span()));
+ script_.set(script.span());
}
/**
- * Set the region subtag. The input must be a valid, case-normalized
- * region subtag or the empty string.
+ * Set the region subtag. The input must be a valid region subtag.
*/
template <size_t N>
void setRegion(const char (&region)[N]) {
- mozilla::Range<const char> range(region, N - 1);
- MOZ_ASSERT(IsStructurallyValidRegionTag(range));
- region_.set(range);
+ mozilla::Span<const char> span(region, N - 1);
+ MOZ_ASSERT(IsStructurallyValidRegionTag(span));
+ region_.set(span);
}
/**
- * Set the region subtag. The input must be a valid, case-normalized
- * region subtag or the empty string.
+ * Set the region subtag. The input must be a valid region subtag or the empty
+ * empty string.
*/
void setRegion(const RegionSubtag& region) {
- MOZ_ASSERT(region.length() == 0 ||
- IsStructurallyValidRegionTag(region.range()));
- region_.set(region.range());
+ MOZ_ASSERT(region.missing() || IsStructurallyValidRegionTag(region.span()));
+ region_.set(region.span());
}
/**
@@ -308,8 +316,8 @@ class MOZ_STACK_CLASS LanguageTag final {
void clearVariants() { variants_.clearAndFree(); }
/**
- * Set the Unicode extension subtag. The input must be a valid,
- * case-normalized Unicode extension subtag.
+ * Set the Unicode extension subtag. The input must be a valid Unicode
+ * extension subtag.
*/
bool setUnicodeExtension(JS::UniqueChars extension);
@@ -319,8 +327,8 @@ class MOZ_STACK_CLASS LanguageTag final {
void clearUnicodeExtension();
/**
- * Set the private-use subtag. The input must be a valid, case-normalized
- * private-use subtag or the empty string.
+ * Set the private-use subtag. The input must be a valid private-use subtag
+ * or nullptr.
*/
void setPrivateuse(JS::UniqueChars privateuse) {
MOZ_ASSERT(!privateuse ||
@@ -462,10 +470,10 @@ class MOZ_STACK_CLASS LanguageTagParser final {
size_t length = tok.length();
if (locale_.is<const JS::Latin1Char*>()) {
using T = const JS::Latin1Char;
- subtag.set(mozilla::Range<T>(locale_.as<T*>() + index, length));
+ subtag.set(mozilla::MakeSpan(locale_.as<T*>() + index, length));
} else {
using T = const char16_t;
- subtag.set(mozilla::Range<T>(locale_.as<T*>() + index, length));
+ subtag.set(mozilla::MakeSpan(locale_.as<T*>() + index, length));
}
}
@@ -477,10 +485,15 @@ class MOZ_STACK_CLASS LanguageTagParser final {
return chars(cx, tok.index(), tok.length());
}
- Token nextToken();
-
JS::UniqueChars extension(JSContext* cx, const Token& start,
- const Token& end) const;
+ const Token& end) const {
+ MOZ_ASSERT(start.index() < end.index());
+
+ size_t length = end.index() - 1 - start.index();
+ return chars(cx, start.index(), length);
+ }
+
+ Token nextToken();
// unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
//
@@ -513,8 +526,7 @@ class MOZ_STACK_CLASS LanguageTagParser final {
// Always returns the lower case form of an alphabetical character.
char singletonKey(const Token& tok) const {
MOZ_ASSERT(tok.length() == 1);
- char c = charAt(tok.index());
- return mozilla::IsAsciiUppercaseAlpha(c) ? (c | 0x20) : c;
+ return AsciiToLowerCase(charAt(tok.index()));
}
// extensions = unicode_locale_extensions |
@@ -581,23 +593,18 @@ class MOZ_STACK_CLASS LanguageTagParser final {
return 1 <= tok.length() && tok.length() <= 8;
}
- enum class BaseNameParsing : bool { Normal, WithinTransformExtension };
-
// Helper function for use in |parseBaseName| and
// |parseTlangInTransformExtension|. Do not use this directly!
static JS::Result<bool> internalParseBaseName(JSContext* cx,
LanguageTagParser& ts,
- LanguageTag& tag, Token& tok,
- BaseNameParsing parseType);
+ LanguageTag& tag, Token& tok);
// Parse the `unicode_language_id` production, i.e. the
- // language/script/region/variants portion of a language tag, into |tag|,
- // which will be filled with canonical-cased components (lowercase language,
- // titlecase script, uppercase region, lowercased and alphabetized and
- // deduplicated variants). |tok| must be the current token.
+ // language/script/region/variants portion of a language tag, into |tag|.
+ // |tok| must be the current token.
static JS::Result<bool> parseBaseName(JSContext* cx, LanguageTagParser& ts,
LanguageTag& tag, Token& tok) {
- return internalParseBaseName(cx, ts, tag, tok, BaseNameParsing::Normal);
+ return internalParseBaseName(cx, ts, tag, tok);
}
// Parse the `tlang` production within a parsed 't' transform extension.
@@ -611,17 +618,14 @@ class MOZ_STACK_CLASS LanguageTagParser final {
// Return an error on internal failure. Otherwise, return a success value. If
// there was no `tlang`, then |tag.language().missing()|. But if there was a
// `tlang`, then |tag| is filled with subtags exactly as they appeared in the
- // parse input: fully lowercase, variants in alphabetical order without
- // duplicates.
+ // parse input.
static JS::Result<JS::Ok> parseTlangInTransformExtension(
JSContext* cx, LanguageTagParser& ts, LanguageTag& tag, Token& tok) {
MOZ_ASSERT(ts.isLanguage(tok));
- return internalParseBaseName(cx, ts, tag, tok,
- BaseNameParsing::WithinTransformExtension)
- .map([](bool parsed) {
- MOZ_ASSERT(parsed);
- return JS::Ok();
- });
+ return internalParseBaseName(cx, ts, tag, tok).map([](bool parsed) {
+ MOZ_ASSERT(parsed);
+ return JS::Ok();
+ });
}
friend class LanguageTag;
@@ -650,14 +654,14 @@ class MOZ_STACK_CLASS LanguageTagParser final {
// `tlang` and `tfield` components. Data in |tag| is lowercase, consistent
// with |extension|.
static JS::Result<bool> parseTransformExtension(
- JSContext* cx, mozilla::Range<const char> extension, LanguageTag& tag,
+ JSContext* cx, mozilla::Span<const char> extension, LanguageTag& tag,
TFieldVector& fields);
// Parse |extension|, which must be a validated, fully lowercase
// `unicode_locale_extensions` subtag, and fill |attributes| and |keywords|
// from the `attribute` and `keyword` components.
static JS::Result<bool> parseUnicodeExtension(
- JSContext* cx, mozilla::Range<const char> extension,
+ JSContext* cx, mozilla::Span<const char> extension,
AttributesVector& attributes, KeywordsVector& keywords);
public:
@@ -673,11 +677,11 @@ class MOZ_STACK_CLASS LanguageTagParser final {
// Parse the input string as the base-name parts (language, script, region,
// variants) of a language tag. Ignores any trailing characters.
- static bool parseBaseName(JSContext* cx, mozilla::Range<const char> locale,
+ static bool parseBaseName(JSContext* cx, mozilla::Span<const char> locale,
LanguageTag& tag);
// Return true iff |extension| can be parsed as a Unicode extension subtag.
- static bool canParseUnicodeExtension(mozilla::Range<const char> extension);
+ static bool canParseUnicodeExtension(mozilla::Span<const char> extension);
// Return true iff |unicodeType| can be parsed as a Unicode extension type.
static bool canParseUnicodeExtensionType(JSLinearString* unicodeType);
@@ -687,24 +691,21 @@ MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(LanguageTagParser::TokenKind)
/**
* Parse a string as a standalone |language| tag. If |str| is a standalone
- * language tag, store it in case-normalized form in |result| and return true.
- * Otherwise return false.
+ * language tag, store it in |result| and return true. Otherwise return false.
*/
-MOZ_MUST_USE bool ParseStandaloneLanguagTag(JS::Handle<JSLinearString*> str,
- LanguageSubtag& result);
+MOZ_MUST_USE bool ParseStandaloneLanguageTag(JS::Handle<JSLinearString*> str,
+ LanguageSubtag& result);
/**
* Parse a string as a standalone |script| tag. If |str| is a standalone script
- * tag, store it in case-normalized form in |result| and return true. Otherwise
- * return false.
+ * tag, store it in |result| and return true. Otherwise return false.
*/
MOZ_MUST_USE bool ParseStandaloneScriptTag(JS::Handle<JSLinearString*> str,
ScriptSubtag& result);
/**
* Parse a string as a standalone |region| tag. If |str| is a standalone region
- * tag, store it in case-normalized form in |result| and return true. Otherwise
- * return false.
+ * tag, store it in |result| and return true. Otherwise return false.
*/
MOZ_MUST_USE bool ParseStandaloneRegionTag(JS::Handle<JSLinearString*> str,
RegionSubtag& result);
diff --git a/js/src/builtin/intl/LanguageTagGenerated.cpp b/js/src/builtin/intl/LanguageTagGenerated.cpp
index 8952286976..6255861141 100644
--- a/js/src/builtin/intl/LanguageTagGenerated.cpp
+++ b/js/src/builtin/intl/LanguageTagGenerated.cpp
@@ -3,7 +3,7 @@
// URL: https://unicode.org/Public/cldr/35.1/core.zip
#include "mozilla/Assertions.h"
-#include "mozilla/Range.h"
+#include "mozilla/Span.h"
#include "mozilla/TextUtils.h"
#include <algorithm>
@@ -18,7 +18,6 @@
#include "builtin/intl/LanguageTag.h"
using namespace js::intl::LanguageTagLimits;
-using ConstCharRange = mozilla::Range<const char>;
template <size_t Length, size_t TagLength, size_t SubtagLength>
static inline bool HasReplacement(
@@ -27,7 +26,7 @@ static inline bool HasReplacement(
MOZ_ASSERT(subtag.length() == TagLength - 1,
"subtag must have the same length as the list of subtags");
- const char* ptr = subtag.range().begin().get();
+ const char* ptr = subtag.span().data();
return std::binary_search(std::begin(subtags), std::end(subtags), ptr,
[](const char* a, const char* b) {
return memcmp(a, b, TagLength - 1) < 0;
@@ -42,7 +41,7 @@ static inline const char* SearchReplacement(
MOZ_ASSERT(subtag.length() == TagLength - 1,
"subtag must have the same length as the list of subtags");
- const char* ptr = subtag.range().begin().get();
+ const char* ptr = subtag.span().data();
auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr,
[](const char* a, const char* b) {
return memcmp(a, b, TagLength - 1) < 0;
@@ -53,11 +52,40 @@ static inline const char* SearchReplacement(
return nullptr;
}
+#ifdef DEBUG
+static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) {
+ // Tell the analysis the |std::all_of| function can't GC.
+ JS::AutoSuppressGCAnalysis nogc;
+
+ return std::all_of(span.begin(), span.end(), mozilla::IsAsciiLowercaseAlpha<char>);
+}
+
+static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) {
+ // Tell the analysis the |std::all_of| function can't GC.
+ JS::AutoSuppressGCAnalysis nogc;
+
+ return std::all_of(span.begin(), span.end(), mozilla::IsAsciiUppercaseAlpha<char>) ||
+ std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit<char>);
+}
+
+static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) {
+ auto isAsciiLowercaseAlphaOrDigit = [](char c) {
+ return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
+ };
+
+ // Tell the analysis the |std::all_of| function can't GC.
+ JS::AutoSuppressGCAnalysis nogc;
+
+ return std::all_of(span.begin(), span.end(), isAsciiLowercaseAlphaOrDigit);
+}
+#endif
+
// Mappings from language subtags to preferred values.
// Derived from CLDR Supplemental Data, version 35.1.
// https://unicode.org/Public/cldr/35.1/core.zip
bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) {
- MOZ_ASSERT(IsStructurallyValidLanguageTag(language.range()));
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span()));
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.span()));
if (language.length() == 2) {
static const char languages[9][3] = {
@@ -68,7 +96,7 @@ bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) {
};
if (const char* replacement = SearchReplacement(languages, aliases, language)) {
- language.set(ConstCharRange(replacement, strlen(replacement)));
+ language.set(mozilla::MakeCStringSpan(replacement));
return true;
}
return false;
@@ -149,7 +177,7 @@ bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) {
};
if (const char* replacement = SearchReplacement(languages, aliases, language)) {
- language.set(ConstCharRange(replacement, strlen(replacement)));
+ language.set(mozilla::MakeCStringSpan(replacement));
return true;
}
return false;
@@ -162,7 +190,8 @@ bool js::intl::LanguageTag::languageMapping(LanguageSubtag& language) {
// Derived from CLDR Supplemental Data, version 35.1.
// https://unicode.org/Public/cldr/35.1/core.zip
bool js::intl::LanguageTag::complexLanguageMapping(const LanguageSubtag& language) {
- MOZ_ASSERT(IsStructurallyValidLanguageTag(language.range()));
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span()));
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.span()));
if (language.length() == 2) {
return language.equalTo("sh");
@@ -183,7 +212,8 @@ bool js::intl::LanguageTag::complexLanguageMapping(const LanguageSubtag& languag
// Derived from CLDR Supplemental Data, version 35.1.
// https://unicode.org/Public/cldr/35.1/core.zip
bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) {
- MOZ_ASSERT(IsStructurallyValidRegionTag(region.range()));
+ MOZ_ASSERT(IsStructurallyValidRegionTag(region.span()));
+ MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.span()));
if (region.length() == 2) {
static const char regions[23][3] = {
@@ -198,7 +228,7 @@ bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) {
};
if (const char* replacement = SearchReplacement(regions, aliases, region)) {
- region.set(ConstCharRange(replacement, strlen(replacement)));
+ region.set(mozilla::MakeCStringSpan(replacement));
return true;
}
return false;
@@ -271,7 +301,7 @@ bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) {
};
if (const char* replacement = SearchReplacement(regions, aliases, region)) {
- region.set(ConstCharRange(replacement, strlen(replacement)));
+ region.set(mozilla::MakeCStringSpan(replacement));
return true;
}
return false;
@@ -282,7 +312,8 @@ bool js::intl::LanguageTag::regionMapping(RegionSubtag& region) {
// Derived from CLDR Supplemental Data, version 35.1.
// https://unicode.org/Public/cldr/35.1/core.zip
bool js::intl::LanguageTag::complexRegionMapping(const RegionSubtag& region) {
- MOZ_ASSERT(IsStructurallyValidRegionTag(region.range()));
+ MOZ_ASSERT(IsStructurallyValidRegionTag(region.span()));
+ MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.span()));
if (region.length() == 2) {
return region.equalTo("AN") ||
@@ -304,11 +335,12 @@ bool js::intl::LanguageTag::complexRegionMapping(const RegionSubtag& region) {
// Derived from CLDR Supplemental Data, version 35.1.
// https://unicode.org/Public/cldr/35.1/core.zip
void js::intl::LanguageTag::performComplexLanguageMappings() {
- MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span()));
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
if (language().equalTo("cnr")) {
setLanguage("sr");
- if (region().length() == 0) {
+ if (region().missing()) {
setRegion("ME");
}
}
@@ -316,20 +348,20 @@ void js::intl::LanguageTag::performComplexLanguageMappings() {
language().equalTo("prs") ||
language().equalTo("tnf")) {
setLanguage("fa");
- if (region().length() == 0) {
+ if (region().missing()) {
setRegion("AF");
}
}
else if (language().equalTo("hbs") ||
language().equalTo("sh")) {
setLanguage("sr");
- if (script().length() == 0) {
+ if (script().missing()) {
setScript("Latn");
}
}
else if (language().equalTo("swc")) {
setLanguage("sw");
- if (region().length() == 0) {
+ if (region().missing()) {
setRegion("CD");
}
}
@@ -339,8 +371,10 @@ void js::intl::LanguageTag::performComplexLanguageMappings() {
// Derived from CLDR Supplemental Data, version 35.1.
// https://unicode.org/Public/cldr/35.1/core.zip
void js::intl::LanguageTag::performComplexRegionMappings() {
- MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
- MOZ_ASSERT(IsStructurallyValidRegionTag(region().range()));
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span()));
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
+ MOZ_ASSERT(IsStructurallyValidRegionTag(region().span()));
+ MOZ_ASSERT(IsCanonicallyCasedRegionTag(region().span()));
if (region().equalTo("172")) {
if (language().equalTo("hy") ||
@@ -562,14 +596,17 @@ bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
// no-nyn, zh-min, and zh-min-nan require BCP47's extlang subtag
// that |unicode_locale_id| doesn't support.)
// * No RG tag contains |extensions| or |pu_extensions|.
- if (script().length() != 0 ||
- region().length() != 0 ||
+ if (script().present() ||
+ region().present() ||
variants().length() != 1 ||
extensions().length() != 0 ||
privateuse()) {
return true;
}
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
+ MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeCStringSpan(variants()[0].get())));
+
auto variantEqualTo = [this](const char* variant) {
return strcmp(variants()[0].get(), variant) == 0;
};
@@ -619,34 +656,34 @@ bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
}
template <size_t Length>
-static inline bool IsUnicodeKey(const ConstCharRange& key,
+static inline bool IsUnicodeKey(mozilla::Span<const char> key,
const char (&str)[Length]) {
static_assert(Length == UnicodeKeyLength + 1,
"Unicode extension key is two characters long");
- return memcmp(key.begin().get(), str, Length - 1) == 0;
+ return memcmp(key.data(), str, Length - 1) == 0;
}
template <size_t Length>
-static inline bool IsUnicodeType(const ConstCharRange& type,
+static inline bool IsUnicodeType(mozilla::Span<const char> type,
const char (&str)[Length]) {
static_assert(Length > UnicodeKeyLength + 1,
"Unicode extension type contains more than two characters");
- return type.length() == (Length - 1) &&
- memcmp(type.begin().get(), str, Length - 1) == 0;
+ return type.size() == (Length - 1) &&
+ memcmp(type.data(), str, Length - 1) == 0;
}
-static int32_t CompareUnicodeType(const char* a, const ConstCharRange& b) {
+static int32_t CompareUnicodeType(const char* a, mozilla::Span<const char> b) {
#ifdef DEBUG
auto isNull = [](char c) {
return c == '\0';
};
#endif
- MOZ_ASSERT(std::none_of(b.begin().get(), b.end().get(), isNull),
+ MOZ_ASSERT(std::none_of(b.begin(), b.end(), isNull),
"unexpected null-character in string");
using UnsignedChar = unsigned char;
- for (size_t i = 0; i < b.length(); i++) {
+ for (size_t i = 0; i < b.size(); i++) {
// |a| is zero-terminated and |b| doesn't contain a null-terminator. So if
// we've reached the end of |a|, the below if-statement will always be true.
// That ensures we don't read past the end of |a|.
@@ -657,13 +694,13 @@ static int32_t CompareUnicodeType(const char* a, const ConstCharRange& b) {
// Return zero if both strings are equal or a negative number if |b| is a
// prefix of |a|.
- return -int32_t(UnsignedChar(a[b.length()]));
+ return -int32_t(UnsignedChar(a[b.size()]));
};
template <size_t Length>
static inline const char* SearchReplacement(const char* (&types)[Length],
const char* (&aliases)[Length],
- const ConstCharRange& type) {
+ mozilla::Span<const char> type) {
auto p = std::lower_bound(std::begin(types), std::end(types), type,
[](const auto& a, const auto& b) {
@@ -682,7 +719,7 @@ static inline const char* SearchReplacement(const char* (&types)[Length],
* Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
*/
const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
- const ConstCharRange& key, const ConstCharRange& type) {
+ mozilla::Span<const char> key, mozilla::Span<const char> type) {
#ifdef DEBUG
static auto isAsciiLowercaseAlphanumeric = [](char c) {
return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
@@ -693,12 +730,12 @@ const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
};
#endif
- MOZ_ASSERT(key.length() == UnicodeKeyLength);
- MOZ_ASSERT(std::all_of(key.begin().get(), key.end().get(),
+ MOZ_ASSERT(key.size() == UnicodeKeyLength);
+ MOZ_ASSERT(std::all_of(key.begin(), key.end(),
isAsciiLowercaseAlphanumeric));
- MOZ_ASSERT(type.length() > UnicodeKeyLength);
- MOZ_ASSERT(std::all_of(type.begin().get(), type.end().get(),
+ MOZ_ASSERT(type.size() > UnicodeKeyLength);
+ MOZ_ASSERT(std::all_of(type.begin(), type.end(),
isAsciiLowercaseAlphanumericOrDash));
if (IsUnicodeKey(key, "ca")) {
diff --git a/js/src/builtin/intl/Locale.cpp b/js/src/builtin/intl/Locale.cpp
index 7e26add298..5d55fad2a1 100644
--- a/js/src/builtin/intl/Locale.cpp
+++ b/js/src/builtin/intl/Locale.cpp
@@ -12,7 +12,7 @@
#include "mozilla/Assertions.h"
#include "mozilla/Casting.h"
#include "mozilla/Maybe.h"
-#include "mozilla/Range.h"
+#include "mozilla/Span.h"
#include "mozilla/TextUtils.h"
#include <algorithm>
@@ -56,10 +56,10 @@ static inline bool IsLocale(HandleValue v) {
// Return the length of the base-name subtags.
static size_t BaseNameLength(const LanguageTag& tag) {
size_t baseNameLength = tag.language().length();
- if (tag.script().length() > 0) {
+ if (tag.script().present()) {
baseNameLength += 1 + tag.script().length();
}
- if (tag.region().length() > 0) {
+ if (tag.region().present()) {
baseNameLength += 1 + tag.region().length();
}
for (const auto& variant : tag.variants()) {
@@ -75,7 +75,7 @@ struct IndexAndLength {
IndexAndLength(size_t index, size_t length) : index(index), length(length){};
template <typename T>
- mozilla::Range<const T> rangeOf(const T* ptr) const {
+ mozilla::Span<const T> spanOf(const T* ptr) const {
return {ptr + index, length};
}
};
@@ -85,6 +85,9 @@ static mozilla::Maybe<IndexAndLength> UnicodeExtensionPosition(
const LanguageTag& tag) {
size_t index = 0;
for (const auto& extension : tag.extensions()) {
+ MOZ_ASSERT(!mozilla::IsAsciiUppercaseAlpha(extension[0]),
+ "extensions are case normalized to lowercase");
+
size_t extensionLength = strlen(extension.get());
if (extension[0] == 'u') {
return mozilla::Some(IndexAndLength{index, extensionLength});
@@ -287,7 +290,7 @@ static bool ApplyOptionsToTag(JSContext* cx, LanguageTag& tag,
// Step 4.
intl::LanguageSubtag language;
- if (option && !intl::ParseStandaloneLanguagTag(option, language)) {
+ if (option && !intl::ParseStandaloneLanguageTag(option, language)) {
if (UniqueChars str = StringToNewUTF8CharsZ(cx, *option)) {
JS_ReportErrorNumberUTF8(cx, js::GetErrorMessage, nullptr,
JSMSG_INVALID_OPTION_VALUE, "language",
@@ -329,19 +332,19 @@ static bool ApplyOptionsToTag(JSContext* cx, LanguageTag& tag,
// Step 9 (Already performed in caller).
// Skip steps 10-13 when no subtags were modified.
- if (language.length() > 0 || script.length() > 0 || region.length() > 0) {
+ if (language.present() || script.present() || region.present()) {
// Step 10.
- if (language.length() > 0) {
+ if (language.present()) {
tag.setLanguage(language);
}
// Step 11.
- if (script.length() > 0) {
+ if (script.present()) {
tag.setScript(script);
}
// Step 12.
- if (region.length() > 0) {
+ if (region.present()) {
tag.setRegion(region);
}
@@ -378,16 +381,11 @@ static bool ApplyUnicodeExtensionToTag(JSContext* cx, LanguageTag& tag,
return false;
}
- // Check if there's an existing Unicode extension subtag. (The extension
- // subtags aren't necessarily sorted, so we can't use binary search here.)
- const UniqueChars* existingUnicodeExtension =
- std::find_if(tag.extensions().begin(), tag.extensions().end(),
- [](const auto& extension) { return extension[0] == 'u'; });
+ // Check if there's an existing Unicode extension subtag.
const char* unicodeExtensionEnd = nullptr;
const char* unicodeExtensionKeywords = nullptr;
- if (existingUnicodeExtension != tag.extensions().end()) {
- const char* unicodeExtension = existingUnicodeExtension->get();
+ if (const char* unicodeExtension = tag.unicodeExtension()) {
unicodeExtensionEnd = unicodeExtension + strlen(unicodeExtension);
SepKeywordIterator<char> iter(unicodeExtension, unicodeExtensionEnd);
@@ -423,8 +421,6 @@ static bool ApplyUnicodeExtensionToTag(JSContext* cx, LanguageTag& tag,
// keyword with the same key is detected as a duplicate when canonicalizing
// the Unicode extension subtag and gets discarded.
- size_t startNewKeywords = newExtension.length();
-
if (calendar) {
if (!appendKeyword("-ca-", calendar)) {
return false;
@@ -456,12 +452,6 @@ static bool ApplyUnicodeExtensionToTag(JSContext* cx, LanguageTag& tag,
}
}
- // Normalize the case of the new keywords.
- std::transform(newExtension.begin() + startNewKeywords, newExtension.end(),
- newExtension.begin() + startNewKeywords, [](char c) {
- return mozilla::IsAsciiUppercaseAlpha(c) ? (c | 0x20) : c;
- });
-
// Append the remaining keywords from the previous Unicode extension subtag.
if (unicodeExtensionKeywords) {
if (!newExtension.append(unicodeExtensionKeywords, unicodeExtensionEnd)) {
@@ -847,18 +837,18 @@ static BaseNamePartsResult BaseNameParts(const CharT* baseName, size_t length) {
}
IndexAndLength language{0, languageLength};
- MOZ_ASSERT(intl::IsStructurallyValidLanguageTag(language.rangeOf(baseName)));
+ MOZ_ASSERT(intl::IsStructurallyValidLanguageTag(language.spanOf(baseName)));
mozilla::Maybe<IndexAndLength> script{};
if (scriptIndex) {
script.emplace(scriptIndex, ScriptLength);
- MOZ_ASSERT(intl::IsStructurallyValidScriptTag(script->rangeOf(baseName)));
+ MOZ_ASSERT(intl::IsStructurallyValidScriptTag(script->spanOf(baseName)));
}
mozilla::Maybe<IndexAndLength> region{};
if (regionIndex) {
region.emplace(regionIndex, regionLength);
- MOZ_ASSERT(intl::IsStructurallyValidRegionTag(region->rangeOf(baseName)));
+ MOZ_ASSERT(intl::IsStructurallyValidRegionTag(region->spanOf(baseName)));
}
return {language, script, region};
diff --git a/js/src/builtin/intl/SharedIntlData.cpp b/js/src/builtin/intl/SharedIntlData.cpp
index 01db1d38cb..6aeea0ad93 100644
--- a/js/src/builtin/intl/SharedIntlData.cpp
+++ b/js/src/builtin/intl/SharedIntlData.cpp
@@ -31,9 +31,7 @@ template<typename Char>
static constexpr Char
ToUpperASCII(Char c)
{
- return ('a' <= c && c <= 'z')
- ? (c & ~0x20)
- : c;
+ return mozilla::IsAsciiLowercaseAlpha(c) ? (c - 0x20) : c;
}
static_assert(ToUpperASCII('a') == 'A', "verifying 'a' uppercases correctly");
diff --git a/js/src/builtin/intl/make_intl_data.py b/js/src/builtin/intl/make_intl_data.py
index 670a46357b..0370d422d9 100644
--- a/js/src/builtin/intl/make_intl_data.py
+++ b/js/src/builtin/intl/make_intl_data.py
@@ -68,8 +68,8 @@ def writeMappingsVar(println, mapping, name, description, source, url):
println(u' "{0}": "{1}",'.format(key, value))
println(u"};")
-def writeMappingsBinarySearch(println, fn_name, type_name, name, validate_fn, mappings,
- tag_maxlength, description, source, url):
+def writeMappingsBinarySearch(println, fn_name, type_name, name, validate_fn, validate_case_fn,
+ mappings, tag_maxlength, description, source, url):
""" Emit code to perform a binary search on language tag subtags.
Uses the contents of |mapping|, which can either be a dictionary or set,
@@ -79,8 +79,9 @@ def writeMappingsBinarySearch(println, fn_name, type_name, name, validate_fn, ma
writeMappingHeader(println, description, source, url)
println(u"""
bool js::intl::LanguageTag::{0}({1} {2}) {{
- MOZ_ASSERT({3}({2}.range()));
-""".format(fn_name, type_name, name, validate_fn).strip())
+ MOZ_ASSERT({3}({2}.span()));
+ MOZ_ASSERT({4}({2}.span()));
+""".format(fn_name, type_name, name, validate_fn, validate_case_fn).strip())
def write_array(subtags, name, length, fixed):
if fixed:
@@ -162,7 +163,7 @@ bool js::intl::LanguageTag::{0}({1} {2}) {{
println(u"""
if (const char* replacement = SearchReplacement({0}s, aliases, {0})) {{
- {0}.set(ConstCharRange(replacement, strlen(replacement)));
+ {0}.set(mozilla::MakeCStringSpan(replacement));
return true;
}}
return false;
@@ -190,7 +191,8 @@ def writeComplexLanguageTagMappings(println, complex_language_mappings,
writeMappingHeader(println, description, source, url)
println(u"""
void js::intl::LanguageTag::performComplexLanguageMappings() {
- MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span()));
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
""".lstrip())
# Merge duplicate language entries.
@@ -227,12 +229,12 @@ void js::intl::LanguageTag::performComplexLanguageMappings() {
if script is not None:
println(u"""
- if (script().length() == 0) {{
+ if (script().missing()) {{
setScript("{}");
}}""".format(script).strip("\n"))
if region is not None:
println(u"""
- if (region().length() == 0) {{
+ if (region().missing()) {{
setRegion("{}");
}}""".format(region).strip("\n"))
println(u"""
@@ -249,8 +251,10 @@ def writeComplexRegionTagMappings(println, complex_region_mappings,
writeMappingHeader(println, description, source, url)
println(u"""
void js::intl::LanguageTag::performComplexRegionMappings() {
- MOZ_ASSERT(IsStructurallyValidLanguageTag(language().range()));
- MOZ_ASSERT(IsStructurallyValidRegionTag(region().range()));
+ MOZ_ASSERT(IsStructurallyValidLanguageTag(language().span()));
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
+ MOZ_ASSERT(IsStructurallyValidRegionTag(region().span()));
+ MOZ_ASSERT(IsCanonicallyCasedRegionTag(region().span()));
""".lstrip())
# |non_default_replacements| is a list and hence not hashable. Convert it
@@ -360,14 +364,17 @@ bool js::intl::LanguageTag::updateGrandfatheredMappings(JSContext* cx) {
// no-nyn, zh-min, and zh-min-nan require BCP47's extlang subtag
// that |unicode_locale_id| doesn't support.)
// * No RG tag contains |extensions| or |pu_extensions|.
- if (script().length() != 0 ||
- region().length() != 0 ||
+ if (script().present() ||
+ region().present() ||
variants().length() != 1 ||
extensions().length() != 0 ||
privateuse()) {
return true;
}
+ MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language().span()));
+ MOZ_ASSERT(IsCanonicallyCasedVariantTag(mozilla::MakeCStringSpan(variants()[0].get())));
+
auto variantEqualTo = [this](const char* variant) {
return strcmp(variants()[0].get(), variant) == 0;
};""")
@@ -870,7 +877,7 @@ def writeCLDRLanguageTagData(println, data, url):
println(u"""
#include "mozilla/Assertions.h"
-#include "mozilla/Range.h"
+#include "mozilla/Span.h"
#include "mozilla/TextUtils.h"
#include <algorithm>
@@ -885,7 +892,6 @@ def writeCLDRLanguageTagData(println, data, url):
#include "builtin/intl/LanguageTag.h"
using namespace js::intl::LanguageTagLimits;
-using ConstCharRange = mozilla::Range<const char>;
template <size_t Length, size_t TagLength, size_t SubtagLength>
static inline bool HasReplacement(
@@ -894,7 +900,7 @@ static inline bool HasReplacement(
MOZ_ASSERT(subtag.length() == TagLength - 1,
"subtag must have the same length as the list of subtags");
- const char* ptr = subtag.range().begin().get();
+ const char* ptr = subtag.span().data();
return std::binary_search(std::begin(subtags), std::end(subtags), ptr,
[](const char* a, const char* b) {
return memcmp(a, b, TagLength - 1) < 0;
@@ -909,7 +915,7 @@ static inline const char* SearchReplacement(
MOZ_ASSERT(subtag.length() == TagLength - 1,
"subtag must have the same length as the list of subtags");
- const char* ptr = subtag.range().begin().get();
+ const char* ptr = subtag.span().data();
auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr,
[](const char* a, const char* b) {
return memcmp(a, b, TagLength - 1) < 0;
@@ -919,6 +925,34 @@ static inline const char* SearchReplacement(
}
return nullptr;
}
+
+#ifdef DEBUG
+static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) {
+ // Tell the analysis the |std::all_of| function can't GC.
+ JS::AutoSuppressGCAnalysis nogc;
+
+ return std::all_of(span.begin(), span.end(), mozilla::IsAsciiLowercaseAlpha<char>);
+}
+
+static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) {
+ // Tell the analysis the |std::all_of| function can't GC.
+ JS::AutoSuppressGCAnalysis nogc;
+
+ return std::all_of(span.begin(), span.end(), mozilla::IsAsciiUppercaseAlpha<char>) ||
+ std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit<char>);
+}
+
+static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) {
+ auto isAsciiLowercaseAlphaOrDigit = [](char c) {
+ return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
+ };
+
+ // Tell the analysis the |std::all_of| function can't GC.
+ JS::AutoSuppressGCAnalysis nogc;
+
+ return std::all_of(span.begin(), span.end(), isAsciiLowercaseAlphaOrDigit);
+}
+#endif
""".rstrip())
source = u"CLDR Supplemental Data, version {}".format(data["version"])
@@ -938,21 +972,25 @@ static inline const char* SearchReplacement(
writeMappingsBinarySearch(println, "languageMapping",
"LanguageSubtag&", "language",
"IsStructurallyValidLanguageTag",
+ "IsCanonicallyCasedLanguageTag",
language_mappings, language_maxlength,
"Mappings from language subtags to preferred values.", source, url)
writeMappingsBinarySearch(println, "complexLanguageMapping",
"const LanguageSubtag&", "language",
"IsStructurallyValidLanguageTag",
+ "IsCanonicallyCasedLanguageTag",
complex_language_mappings.keys(), language_maxlength,
"Language subtags with complex mappings.", source, url)
writeMappingsBinarySearch(println, "regionMapping",
"RegionSubtag&", "region",
"IsStructurallyValidRegionTag",
+ "IsCanonicallyCasedRegionTag",
region_mappings, region_maxlength,
"Mappings from region subtags to preferred values.", source, url)
writeMappingsBinarySearch(println, "complexRegionMapping",
"const RegionSubtag&", "region",
"IsStructurallyValidRegionTag",
+ "IsCanonicallyCasedRegionTag",
complex_region_mappings.keys(), region_maxlength,
"Region subtags with complex mappings.", source, url)
@@ -1863,34 +1901,34 @@ def updateTzdata(topsrcdir, args):
def writeUnicodeExtensionsMappings(println, mapping):
println(u"""
template <size_t Length>
-static inline bool IsUnicodeKey(const ConstCharRange& key,
+static inline bool IsUnicodeKey(mozilla::Span<const char> key,
const char (&str)[Length]) {
static_assert(Length == UnicodeKeyLength + 1,
"Unicode extension key is two characters long");
- return memcmp(key.begin().get(), str, Length - 1) == 0;
+ return memcmp(key.data(), str, Length - 1) == 0;
}
template <size_t Length>
-static inline bool IsUnicodeType(const ConstCharRange& type,
+static inline bool IsUnicodeType(mozilla::Span<const char> type,
const char (&str)[Length]) {
static_assert(Length > UnicodeKeyLength + 1,
"Unicode extension type contains more than two characters");
- return type.length() == (Length - 1) &&
- memcmp(type.begin().get(), str, Length - 1) == 0;
+ return type.size() == (Length - 1) &&
+ memcmp(type.data(), str, Length - 1) == 0;
}
-static int32_t CompareUnicodeType(const char* a, const ConstCharRange& b) {
+static int32_t CompareUnicodeType(const char* a, mozilla::Span<const char> b) {
#ifdef DEBUG
auto isNull = [](char c) {
return c == '\\0';
};
#endif
- MOZ_ASSERT(std::none_of(b.begin().get(), b.end().get(), isNull),
+ MOZ_ASSERT(std::none_of(b.begin(), b.end(), isNull),
"unexpected null-character in string");
using UnsignedChar = unsigned char;
- for (size_t i = 0; i < b.length(); i++) {
+ for (size_t i = 0; i < b.size(); i++) {
// |a| is zero-terminated and |b| doesn't contain a null-terminator. So if
// we've reached the end of |a|, the below if-statement will always be true.
// That ensures we don't read past the end of |a|.
@@ -1901,13 +1939,13 @@ static int32_t CompareUnicodeType(const char* a, const ConstCharRange& b) {
// Return zero if both strings are equal or a negative number if |b| is a
// prefix of |a|.
- return -int32_t(UnsignedChar(a[b.length()]));
+ return -int32_t(UnsignedChar(a[b.size()]));
};
template <size_t Length>
static inline const char* SearchReplacement(const char* (&types)[Length],
const char* (&aliases)[Length],
- const ConstCharRange& type) {
+ mozilla::Span<const char> type) {
auto p = std::lower_bound(std::begin(types), std::end(types), type,
[](const auto& a, const auto& b) {
@@ -1926,7 +1964,7 @@ static inline const char* SearchReplacement(const char* (&types)[Length],
* Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files
*/
const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
- const ConstCharRange& key, const ConstCharRange& type) {
+ mozilla::Span<const char> key, mozilla::Span<const char> type) {
#ifdef DEBUG
static auto isAsciiLowercaseAlphanumeric = [](char c) {
return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c);
@@ -1937,12 +1975,12 @@ const char* js::intl::LanguageTag::replaceUnicodeExtensionType(
};
#endif
- MOZ_ASSERT(key.length() == UnicodeKeyLength);
- MOZ_ASSERT(std::all_of(key.begin().get(), key.end().get(),
+ MOZ_ASSERT(key.size() == UnicodeKeyLength);
+ MOZ_ASSERT(std::all_of(key.begin(), key.end(),
isAsciiLowercaseAlphanumeric));
- MOZ_ASSERT(type.length() > UnicodeKeyLength);
- MOZ_ASSERT(std::all_of(type.begin().get(), type.end().get(),
+ MOZ_ASSERT(type.size() > UnicodeKeyLength);
+ MOZ_ASSERT(std::all_of(type.begin(), type.end(),
isAsciiLowercaseAlphanumericOrDash));
""")
diff --git a/js/src/js.msg b/js/src/js.msg
index 1b77cf6a31..a2a1e3f3d2 100644
--- a/js/src/js.msg
+++ b/js/src/js.msg
@@ -485,6 +485,7 @@ MSG_DEF(JSMSG_TRACELOGGER_ENABLE_FAIL, 1, JSEXN_ERR, "enabling tracelogger faile
// Intl
MSG_DEF(JSMSG_DATE_NOT_FINITE, 2, JSEXN_RANGEERR, "date value is not finite in {0}.{1}()")
+MSG_DEF(JSMSG_DUPLICATE_VARIANT_SUBTAG, 1, JSEXN_RANGEERR, "duplicate variant subtag: {0}")
MSG_DEF(JSMSG_INTERNAL_INTL_ERROR, 0, JSEXN_ERR, "internal error while computing Intl data")
MSG_DEF(JSMSG_INTL_OBJECT_NOT_INITED, 3, JSEXN_TYPEERR, "Intl.{0}.prototype.{1} called on value that's not an object initialized as a {2}")
MSG_DEF(JSMSG_INVALID_CURRENCY_CODE, 1, JSEXN_RANGEERR, "invalid currency code in NumberFormat(): {0}")