summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartok <martok@martoks-place.de>2023-06-29 23:05:33 +0200
committerMartok <martok@martoks-place.de>2023-06-29 23:05:33 +0200
commitf168e0afe965d2d860e9f2ad8e2ca6cf26ec0b41 (patch)
treef2d89d26004389b9dad51896f19293915773a846
parent9eb285a9fb89cfd64ca9c9cba77746af4547f0a4 (diff)
downloaduxp-f168e0afe965d2d860e9f2ad8e2ca6cf26ec0b41.tar.gz
Issue #2259 - Reimplement String.prototype.toLocale{Lower,Upper}Case per ECMAScript Intl specification
- Update make_unicode to output SpecialCasing - Handle special casing - Use realloc instead of malloc when resizing a newly created string buffer Based-on: m-c 1318403, 1431957
-rw-r--r--config/check_spidermonkey_style.py1
-rw-r--r--js/src/builtin/String.js88
-rw-r--r--js/src/builtin/intl/CommonFunctions.js58
-rw-r--r--js/src/builtin/intl/make_intl_data.py4
-rw-r--r--js/src/jsapi.h4
-rw-r--r--js/src/jscntxt.h1
-rw-r--r--js/src/jsstr.cpp639
-rw-r--r--js/src/jsstr.h27
-rw-r--r--js/src/vm/SelfHosting.cpp6
-rw-r--r--js/src/vm/SpecialCasing.txt281
-rw-r--r--js/src/vm/Unicode.cpp2616
-rw-r--r--js/src/vm/Unicode.h57
-rw-r--r--js/src/vm/UnicodeNonBMP.h24
-rwxr-xr-xjs/src/vm/make_unicode.py675
14 files changed, 3190 insertions, 1291 deletions
diff --git a/config/check_spidermonkey_style.py b/config/check_spidermonkey_style.py
index cb9e2418f2..5f06e6ad93 100644
--- a/config/check_spidermonkey_style.py
+++ b/config/check_spidermonkey_style.py
@@ -82,6 +82,7 @@ included_inclnames_to_ignore = set([
'unicode/plurrule.h', # ICU
'unicode/timezone.h', # ICU
'unicode/ucal.h', # ICU
+ 'unicode/uchar.h', # ICU
'unicode/uclean.h', # ICU
'unicode/ucol.h', # ICU
'unicode/udat.h', # ICU
diff --git a/js/src/builtin/String.js b/js/src/builtin/String.js
index b0928fe88c..0fab35966a 100644
--- a/js/src/builtin/String.js
+++ b/js/src/builtin/String.js
@@ -731,6 +731,88 @@ function String_localeCompare(that) {
return intl_CompareStrings(collator, S, That);
}
+/**
+ * 13.1.2 String.prototype.toLocaleLowerCase ( [ locales ] )
+ *
+ * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b
+ */
+function String_toLocaleLowerCase() {
+ // Step 1.
+ RequireObjectCoercible(this);
+
+ // Step 2.
+ var string = ToString(this);
+
+ // Handle the common cases (no locales argument or a single string
+ // argument) first.
+ var locales = arguments.length > 0 ? arguments[0] : undefined;
+ var requestedLocale;
+ if (locales === undefined) {
+ // Steps 3, 6.
+ requestedLocale = undefined;
+ } else if (typeof locales === "string") {
+ // Steps 3, 5.
+ requestedLocale = ValidateAndCanonicalizeLanguageTag(locales);
+ } else {
+ // Step 3.
+ var requestedLocales = CanonicalizeLocaleList(locales);
+
+ // Steps 4-6.
+ requestedLocale = requestedLocales.length > 0 ? requestedLocales[0] : undefined;
+ }
+
+ // Trivial case: When the input is empty, directly return the empty string.
+ if (string.length === 0)
+ return "";
+
+ if (requestedLocale === undefined)
+ requestedLocale = DefaultLocale();
+
+ // Steps 7-16.
+ return intl_toLocaleLowerCase(string, requestedLocale);
+}
+
+/**
+ * 13.1.3 String.prototype.toLocaleUpperCase ( [ locales ] )
+ *
+ * ES2017 Intl draft rev 94045d234762ad107a3d09bb6f7381a65f1a2f9b
+ */
+function String_toLocaleUpperCase() {
+ // Step 1.
+ RequireObjectCoercible(this);
+
+ // Step 2.
+ var string = ToString(this);
+
+ // Handle the common cases (no locales argument or a single string
+ // argument) first.
+ var locales = arguments.length > 0 ? arguments[0] : undefined;
+ var requestedLocale;
+ if (locales === undefined) {
+ // Steps 3, 6.
+ requestedLocale = undefined;
+ } else if (typeof locales === "string") {
+ // Steps 3, 5.
+ requestedLocale = ValidateAndCanonicalizeLanguageTag(locales);
+ } else {
+ // Step 3.
+ var requestedLocales = CanonicalizeLocaleList(locales);
+
+ // Steps 4-6.
+ requestedLocale = requestedLocales.length > 0 ? requestedLocales[0] : undefined;
+ }
+
+ // Trivial case: When the input is empty, directly return the empty string.
+ if (string.length === 0)
+ return "";
+
+ if (requestedLocale === undefined)
+ requestedLocale = DefaultLocale();
+
+ // Steps 7-16.
+ return intl_toLocaleUpperCase(string, requestedLocale);
+}
+
/* ES6 Draft May 22, 2014 21.1.2.4 */
function String_static_raw(callSite, ...substitutions) {
// Step 1 (implicit).
@@ -1014,13 +1096,15 @@ _SetCanonicalName(String_static_trimEnd, "trimEnd");
function String_static_toLocaleLowerCase(string) {
if (arguments.length < 1)
ThrowTypeError(JSMSG_MISSING_FUN_ARG, 0, 'String.toLocaleLowerCase');
- return callFunction(std_String_toLocaleLowerCase, string);
+ var locales = arguments.length > 1 ? arguments[1] : undefined;
+ return callFunction(String_toLocaleLowerCase, string, locales);
}
function String_static_toLocaleUpperCase(string) {
if (arguments.length < 1)
ThrowTypeError(JSMSG_MISSING_FUN_ARG, 0, 'String.toLocaleUpperCase');
- return callFunction(std_String_toLocaleUpperCase, string);
+ var locales = arguments.length > 1 ? arguments[1] : undefined;
+ return callFunction(String_toLocaleUpperCase, string, locales);
}
function String_static_normalize(string) {
diff --git a/js/src/builtin/intl/CommonFunctions.js b/js/src/builtin/intl/CommonFunctions.js
index 10e02a5ac6..c1999f001e 100644
--- a/js/src/builtin/intl/CommonFunctions.js
+++ b/js/src/builtin/intl/CommonFunctions.js
@@ -446,6 +446,64 @@ function CanonicalizeLanguageTag(locale) {
return canonical;
}
+
+/**
+ * Returns true if the input contains only ASCII alphabetical characters.
+ */
+function IsASCIIAlphaString(s) {
+ assert(typeof s === "string", "IsASCIIAlphaString");
+
+ for (var i = 0; i < s.length; i++) {
+ var c = callFunction(std_String_charCodeAt, s, i);
+ if (!((0x41 <= c && c <= 0x5A) || (0x61 <= c && c <= 0x7A)))
+ return false
+ }
+ return true;
+}
+
+
+/**
+ * Validates and canonicalizes the given language tag.
+ */
+function ValidateAndCanonicalizeLanguageTag(locale) {
+ assert(typeof locale === "string", "ValidateAndCanonicalizeLanguageTag");
+
+ // Handle the common case (a standalone language) first.
+ // Only the following BCP47 subset is accepted:
+ // Language-Tag = langtag
+ // langtag = language
+ // language = 2*3ALPHA ; shortest ISO 639 code
+ // For three character long strings we need to make sure it's not a
+ // private use only language tag, for example "x-x".
+ if (locale.length === 2 || (locale.length === 3 && locale[1] !== "-")) {
+ if (!IsASCIIAlphaString(locale))
+ ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale);
+ assert(IsStructurallyValidLanguageTag(locale), "2*3ALPHA is a valid language tag");
+
+ // The language subtag is canonicalized to lower case.
+ locale = callFunction(std_String_toLowerCase, locale);
+
+ // langTagMappings doesn't contain any 2*3ALPHA keys, so we don't need
+ // to check for possible replacements in this map.
+ assert(!callFunction(std_Object_hasOwnProperty, langTagMappings, locale),
+ "langTagMappings contains no 2*3ALPHA mappings");
+
+ // Replace deprecated subtags with their preferred values.
+ locale = callFunction(std_Object_hasOwnProperty, langSubtagMappings, locale)
+ ? langSubtagMappings[locale]
+ : locale;
+ assert(locale === CanonicalizeLanguageTag(locale), "expected same canonicalization");
+
+ return locale;
+ }
+
+ if (!IsStructurallyValidLanguageTag(locale))
+ ThrowRangeError(JSMSG_INVALID_LANGUAGE_TAG, locale);
+
+ return CanonicalizeLanguageTag(locale);
+}
+
+
function localeContainsNoUnicodeExtensions(locale) {
// No "-u-", no possible Unicode extension.
if (callFunction(std_String_indexOf, locale, "-u-") === -1)
diff --git a/js/src/builtin/intl/make_intl_data.py b/js/src/builtin/intl/make_intl_data.py
index a81001e0f3..02bf350814 100644
--- a/js/src/builtin/intl/make_intl_data.py
+++ b/js/src/builtin/intl/make_intl_data.py
@@ -151,6 +151,10 @@ def readRegistry(registry):
# Special case for heploc.
langTagMappings["ja-latn-hepburn-heploc"] = "ja-Latn-alalc97"
+ # ValidateAndCanonicalizeLanguageTag in Intl.js expects langTagMappings
+ # contains no 2*3ALPHA.
+ assert all(len(lang) > 3 for lang in langTagMappings.iterkeys())
+
return {"fileDate": fileDate,
"langTagMappings": langTagMappings,
"langSubtagMappings": langSubtagMappings,
diff --git a/js/src/jsapi.h b/js/src/jsapi.h
index 923aa2bb05..f80d2602e6 100644
--- a/js/src/jsapi.h
+++ b/js/src/jsapi.h
@@ -5327,8 +5327,8 @@ JS_ResetDefaultLocale(JSContext* cx);
* Locale specific string conversion and error message callbacks.
*/
struct JSLocaleCallbacks {
- JSLocaleToUpperCase localeToUpperCase;
- JSLocaleToLowerCase localeToLowerCase;
+ JSLocaleToUpperCase localeToUpperCase; // not used
+ JSLocaleToLowerCase localeToLowerCase; // not used
JSLocaleCompare localeCompare; // not used
JSLocaleToUnicode localeToUnicode;
};
diff --git a/js/src/jscntxt.h b/js/src/jscntxt.h
index 1bc426e14e..c4ef783d3a 100644
--- a/js/src/jscntxt.h
+++ b/js/src/jscntxt.h
@@ -365,6 +365,7 @@ struct JSContext : public js::ExclusiveContext,
using ExclusiveContext::permanentAtoms;
using ExclusiveContext::pod_calloc;
using ExclusiveContext::pod_malloc;
+ using ExclusiveContext::pod_realloc;
using ExclusiveContext::staticStrings;
using ExclusiveContext::updateMallocCounter;
using ExclusiveContext::wellKnownSymbols;
diff --git a/js/src/jsstr.cpp b/js/src/jsstr.cpp
index 6726da9457..fdee274c32 100644
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -31,10 +31,12 @@
#include "jsutil.h"
#include "builtin/intl/ICUHeader.h"
+#include "builtin/intl/CommonFunctions.h"
#include "builtin/RegExp.h"
#include "jit/InlinableNatives.h"
#include "js/Conversions.h"
#include "js/UniquePtr.h"
+#include "unicode/uchar.h"
#include "unicode/unorm2.h"
#include "vm/GlobalObject.h"
#include "vm/Interpreter.h"
@@ -599,18 +601,209 @@ js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t l
}
template <typename CharT>
+static auto
+ReallocChars(JSContext* cx, UniquePtr<CharT[], JS::FreePolicy> chars, size_t oldLength,
+ size_t newLength)
+ -> decltype(chars)
+{
+ using AnyCharPtr = decltype(chars);
+
+ CharT* oldChars = chars.release();
+ CharT* newChars = cx->pod_realloc<CharT>(oldChars, oldLength, newLength);
+ if (!newChars) {
+ js_free(oldChars);
+ return AnyCharPtr();
+ }
+
+ return AnyCharPtr(newChars);
+}
+
+/**
+ * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
+ * depending on its context:
+ * When it's preceded by a cased character and not followed by another cased
+ * character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
+ * Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
+ *
+ * Unicode 9.0, §3.13 Default Case Algorithms
+ */
+static char16_t
+Final_Sigma(const char16_t* chars, size_t length, size_t index)
+{
+ MOZ_ASSERT(index < length);
+ MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
+ MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
+ unicode::GREEK_SMALL_LETTER_SIGMA);
+
+ // Tell the analysis the BinaryProperty.contains function pointer called by
+ // u_hasBinaryProperty cannot GC.
+ JS::AutoSuppressGCAnalysis nogc;
+
+ bool precededByCased = false;
+ for (size_t i = index; i > 0; ) {
+ char16_t c = chars[--i];
+ uint32_t codePoint = c;
+ if (unicode::IsTrailSurrogate(c) && i > 0) {
+ char16_t lead = chars[i - 1];
+ if (unicode::IsLeadSurrogate(lead)) {
+ codePoint = unicode::UTF16Decode(lead, c);
+ i--;
+ }
+ }
+
+ // Ignore any characters with the property Case_Ignorable.
+ // NB: We need to skip over all Case_Ignorable characters, even when
+ // they also have the Cased binary property.
+ if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE))
+ continue;
+
+ precededByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
+ break;
+ }
+ if (!precededByCased)
+ return unicode::GREEK_SMALL_LETTER_SIGMA;
+
+ bool followedByCased = false;
+ for (size_t i = index + 1; i < length; ) {
+ char16_t c = chars[i++];
+ uint32_t codePoint = c;
+ if (unicode::IsLeadSurrogate(c) && i < length) {
+ char16_t trail = chars[i];
+ if (unicode::IsTrailSurrogate(trail)) {
+ codePoint = unicode::UTF16Decode(c, trail);
+ i++;
+ }
+ }
+
+ // Ignore any characters with the property Case_Ignorable.
+ // NB: We need to skip over all Case_Ignorable characters, even when
+ // they also have the Cased binary property.
+ if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE))
+ continue;
+
+ followedByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
+ break;
+ }
+ if (!followedByCased)
+ return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
+
+ return unicode::GREEK_SMALL_LETTER_SIGMA;
+}
+
+static Latin1Char
+Final_Sigma(const Latin1Char* chars, size_t length, size_t index)
+{
+ MOZ_ASSERT_UNREACHABLE("U+03A3 is not a Latin-1 character");
+ return 0;
+}
+
+// If |srcLength == destLength| is true, the destination buffer was allocated
+// with the same size as the source buffer. When we append characters which
+// have special casing mappings, we test |srcLength == destLength| to decide
+// if we need to back out and reallocate a sufficiently large destination
+// buffer. Otherwise the destination buffer was allocated with the correct
+// size to hold all lower case mapped characters, i.e.
+// |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
+template <typename CharT>
+static size_t
+ToLowerCaseImpl(CharT* destChars, const CharT* srcChars, size_t startIndex, size_t srcLength,
+ size_t destLength)
+{
+ MOZ_ASSERT(startIndex < srcLength);
+ MOZ_ASSERT(srcLength <= destLength);
+ MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), srcLength == destLength);
+
+ size_t j = startIndex;
+ for (size_t i = startIndex; i < srcLength; i++) {
+ char16_t c = srcChars[i];
+ if (!IsSame<CharT, Latin1Char>::value) {
+ if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
+ char16_t trail = srcChars[i + 1];
+ if (unicode::IsTrailSurrogate(trail)) {
+ trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
+ destChars[j++] = c;
+ destChars[j++] = trail;
+ i++;
+ continue;
+ }
+ }
+
+ // Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
+ // lowercases to <U+0069 U+0307>.
+ if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
+ // Return if the output buffer is too small.
+ if (srcLength == destLength)
+ return i;
+
+ destChars[j++] = CharT('i');
+ destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
+ continue;
+ }
+
+ // Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
+ // one of two codepoints depending on context.
+ if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
+ destChars[j++] = Final_Sigma(srcChars, srcLength, i);
+ continue;
+ }
+ }
+
+ c = unicode::ToLowerCase(c);
+ MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
+ destChars[j++] = c;
+ }
+
+ MOZ_ASSERT(j == destLength);
+ destChars[destLength] = '\0';
+
+ return srcLength;
+}
+
+static size_t
+ToLowerCaseLength(const char16_t* chars, size_t startIndex, size_t length)
+{
+ size_t lowerLength = length;
+ for (size_t i = startIndex; i < length; i++) {
+ char16_t c = chars[i];
+
+ // U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
+ if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE)
+ lowerLength += 1;
+ }
+ return lowerLength;
+}
+
+static size_t
+ToLowerCaseLength(const Latin1Char* chars, size_t startIndex, size_t length)
+{
+ MOZ_ASSERT_UNREACHABLE("never called for Latin-1 strings");
+ return 0;
+}
+
+template <typename CharT>
static JSString*
ToLowerCase(JSContext* cx, JSLinearString* str)
{
- // Unlike toUpperCase, toLowerCase has the nice invariant that if the input
- // is a Latin1 string, the output is also a Latin1 string.
- UniquePtr<CharT[], JS::FreePolicy> newChars;
- size_t length = str->length();
+ // Unlike toUpperCase, toLowerCase has the nice invariant that if the
+ // input is a Latin-1 string, the output is also a Latin-1 string.
+ using AnyCharPtr = UniquePtr<CharT[], JS::FreePolicy>;
+
+ AnyCharPtr newChars;
+ const size_t length = str->length();
+ size_t resultLength;
{
AutoCheckCannotGC nogc;
const CharT* chars = str->chars<CharT>(nogc);
- // Look for the first upper case character.
+ // We don't need extra special casing checks in the loop below,
+ // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
+ // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
+ MOZ_ASSERT(unicode::CanLowerCase(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
+ "U+0130 has a simple lower case mapping");
+ MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA),
+ "U+03A3 has a simple lower case mapping");
+
+ // Look for the first character that changes when lowercased.
size_t i = 0;
for (; i < length; i++) {
char16_t c = chars[i];
@@ -630,40 +823,35 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
break;
}
- // If all characters are lower case, return the input string.
+ // If no character needs to change, return the input string.
if (i == length)
return str;
- newChars = cx->make_pod_array<CharT>(length + 1);
+ resultLength = length;
+ newChars = cx->make_pod_array<CharT>(resultLength + 1);
if (!newChars)
return nullptr;
PodCopy(newChars.get(), chars, i);
- for (; i < length; i++) {
- char16_t c = chars[i];
- if (!IsSame<CharT, Latin1Char>::value) {
- if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
- char16_t trail = chars[i + 1];
- if (unicode::IsTrailSurrogate(trail)) {
- trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
- newChars[i] = c;
- newChars[i + 1] = trail;
- i++;
- continue;
- }
- }
- }
+ size_t readChars = ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
+ if (readChars < length) {
+ MOZ_ASSERT((!IsSame<CharT, Latin1Char>::value),
+ "Latin-1 strings don't have special lower case mappings");
+ resultLength = ToLowerCaseLength(chars, readChars, length);
- c = unicode::ToLowerCase(c);
- MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
- newChars[i] = c;
- }
+ AnyCharPtr buf = ReallocChars(cx, Move(newChars), length + 1, resultLength + 1);
+ if (!buf)
+ return nullptr;
- newChars[length] = 0;
+ newChars = Move(buf);
+
+ MOZ_ALWAYS_TRUE(length ==
+ ToLowerCaseImpl(newChars.get(), chars, readChars, length, resultLength));
+ }
}
- JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length);
+ JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), resultLength);
if (!res)
return nullptr;
@@ -671,104 +859,295 @@ ToLowerCase(JSContext* cx, JSLinearString* str)
return res;
}
-static inline bool
-ToLowerCaseHelper(JSContext* cx, const CallArgs& args)
+JSString*
+js::StringToLowerCase(JSContext* cx, HandleLinearString string)
+{
+ if (string->hasLatin1Chars())
+ return ToLowerCase<Latin1Char>(cx, string);
+ return ToLowerCase<char16_t>(cx, string);
+}
+
+bool
+js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp)
{
+ CallArgs args = CallArgsFromVp(argc, vp);
+
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
if (!str)
return false;
- JSLinearString* linear = str->ensureLinear(cx);
+ RootedLinearString linear(cx, str->ensureLinear(cx));
if (!linear)
return false;
- if (linear->hasLatin1Chars())
- str = ToLowerCase<Latin1Char>(cx, linear);
- else
- str = ToLowerCase<char16_t>(cx, linear);
- if (!str)
+ JSString* result = StringToLowerCase(cx, linear);
+ if (!result)
return false;
- args.rval().setString(str);
+ args.rval().setString(result);
return true;
}
-bool
-js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp)
+static const char*
+CaseMappingLocale(JSContext* cx, JSString* str)
{
- return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
+ JSLinearString* locale = str->ensureLinear(cx);
+ if (!locale)
+ return nullptr;
+
+ MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag");
+
+ // Lithuanian, Turkish, and Azeri have language dependent case mappings.
+ static const char languagesWithSpecialCasing[][3] = { "lt", "tr", "az" };
+
+ // All strings in |languagesWithSpecialCasing| are of length two, so we
+ // only need to compare the first two characters to find a matching locale.
+ // ES2017 Intl, §9.2.2 BestAvailableLocale
+ if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') {
+ for (const auto& language : languagesWithSpecialCasing) {
+ if (locale->latin1OrTwoByteChar(0) == language[0] &&
+ locale->latin1OrTwoByteChar(1) == language[1])
+ {
+ return language;
+ }
+ }
+ }
+
+ return ""; // ICU root locale
}
bool
-js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
+js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
+ MOZ_ASSERT(args.length() == 2);
+ MOZ_ASSERT(args[0].isString());
+ MOZ_ASSERT(args[1].isString());
- /*
- * Forcefully ignore the first (or any) argument and return toLowerCase(),
- * ECMA has reserved that argument, presumably for defining the locale.
- */
- if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) {
- RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
- if (!str)
- return false;
+ RootedLinearString linear(cx, args[0].toString()->ensureLinear(cx));
+ if (!linear)
+ return false;
- RootedValue result(cx);
- if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result))
+ const char* locale = CaseMappingLocale(cx, args[1].toString());
+ if (!locale)
+ return false;
+
+ // Call String.prototype.toLowerCase() for language independent casing.
+ if (intl::StringsAreEqual(locale, "")) {
+ JSString* str = StringToLowerCase(cx, linear);
+ if (!str)
return false;
- args.rval().set(result);
+ args.rval().setString(str);
return true;
}
- return ToLowerCaseHelper(cx, args);
+ AutoStableStringChars inputChars(cx);
+ if (!inputChars.initTwoByte(cx, linear))
+ return false;
+ mozilla::Range<const char16_t> input = inputChars.twoByteRange();
+
+ // Maximum case mapping length is three characters.
+ static_assert(JSString::MAX_LENGTH < INT32_MAX / 3,
+ "Case conversion doesn't overflow int32_t indices");
+
+ JSString* str = intl::CallICU(cx, [&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
+ return u_strToLower(chars, size, Char16ToUChar(input.begin().get()), input.length(),
+ locale, status);
+ });
+ if (!str)
+ return false;
+
+ args.rval().setString(str);
+ return true;
}
-template <typename DestChar, typename SrcChar>
-static void
-ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length)
+static inline bool
+CanUpperCaseSpecialCasing(Latin1Char charCode)
{
- MOZ_ASSERT(firstLowerCase < length);
+ // Handle U+00DF LATIN SMALL LETTER SHARP S inline, all other Latin-1
+ // characters don't have special casing rules.
+ MOZ_ASSERT_IF(charCode != unicode::LATIN_SMALL_LETTER_SHARP_S,
+ !unicode::CanUpperCaseSpecialCasing(charCode));
- for (size_t i = 0; i < firstLowerCase; i++)
- destChars[i] = srcChars[i];
+ return charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
+}
+
+static inline bool
+CanUpperCaseSpecialCasing(char16_t charCode)
+{
+ return unicode::CanUpperCaseSpecialCasing(charCode);
+}
+
+static inline size_t
+LengthUpperCaseSpecialCasing(Latin1Char charCode)
+{
+ // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
+ MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
+
+ return 2;
+}
+
+static inline size_t
+LengthUpperCaseSpecialCasing(char16_t charCode)
+{
+ MOZ_ASSERT(CanUpperCaseSpecialCasing(charCode));
+
+ return unicode::LengthUpperCaseSpecialCasing(charCode);
+}
+
+static inline void
+AppendUpperCaseSpecialCasing(char16_t charCode, Latin1Char* elements, size_t* index)
+{
+ // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
+ MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
+ static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
+
+ elements[(*index)++] = 'S';
+ elements[(*index)++] = 'S';
+}
- for (size_t i = firstLowerCase; i < length; i++) {
+static inline void
+AppendUpperCaseSpecialCasing(char16_t charCode, char16_t* elements, size_t* index)
+{
+ unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
+}
+
+// See ToLowerCaseImpl for an explanation of the parameters.
+template <typename DestChar, typename SrcChar>
+static size_t
+ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t startIndex, size_t srcLength,
+ size_t destLength)
+{
+ static_assert(IsSame<SrcChar, Latin1Char>::value || !IsSame<DestChar, Latin1Char>::value,
+ "cannot write non-Latin-1 characters into Latin-1 string");
+ MOZ_ASSERT(startIndex < srcLength);
+ MOZ_ASSERT(srcLength <= destLength);
+
+ size_t j = startIndex;
+ for (size_t i = startIndex; i < srcLength; i++) {
char16_t c = srcChars[i];
if (!IsSame<DestChar, Latin1Char>::value) {
- if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
+ if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
char16_t trail = srcChars[i + 1];
if (unicode::IsTrailSurrogate(trail)) {
trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
- destChars[i] = c;
- destChars[i + 1] = trail;
+ destChars[j++] = c;
+ destChars[j++] = trail;
i++;
continue;
}
}
}
+
+ if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(static_cast<SrcChar>(c)))) {
+ // Return if the output buffer is too small.
+ if (srcLength == destLength)
+ return i;
+
+ AppendUpperCaseSpecialCasing(c, destChars, &j);
+ continue;
+ }
+
c = unicode::ToUpperCase(c);
MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
- destChars[i] = c;
+ destChars[j++] = c;
+ }
+
+ MOZ_ASSERT(j == destLength);
+ destChars[destLength] = '\0';
+
+ return srcLength;
+}
+
+// Explicit instantiation so we don't hit the static_assert from above.
+static bool
+ToUpperCaseImpl(Latin1Char* destChars, const char16_t* srcChars, size_t startIndex,
+ size_t srcLength, size_t destLength)
+{
+ MOZ_ASSERT_UNREACHABLE("cannot write non-Latin-1 characters into Latin-1 string");
+ return false;
+}
+
+template <typename CharT>
+static size_t
+ToUpperCaseLength(const CharT* chars, size_t startIndex, size_t length)
+{
+ size_t upperLength = length;
+ for (size_t i = startIndex; i < length; i++) {
+ char16_t c = chars[i];
+
+ if (c > 0x7f && CanUpperCaseSpecialCasing(static_cast<CharT>(c)))
+ upperLength += LengthUpperCaseSpecialCasing(static_cast<CharT>(c)) - 1;
+ }
+ return upperLength;
+}
+
+template <typename DestChar, typename SrcChar>
+static inline void
+CopyChars(DestChar* destChars, const SrcChar* srcChars, size_t length)
+{
+ static_assert(!IsSame<DestChar, SrcChar>::value, "PodCopy is used for the same type case");
+ for (size_t i = 0; i < length; i++)
+ destChars[i] = srcChars[i];
+}
+
+template <typename CharT>
+static inline void
+CopyChars(CharT* destChars, const CharT* srcChars, size_t length)
+{
+ PodCopy(destChars, srcChars, length);
+}
+
+template <typename DestChar, typename SrcChar>
+static inline UniquePtr<DestChar[], JS::FreePolicy>
+ToUpperCase(JSContext* cx, const SrcChar* chars, size_t startIndex, size_t length,
+ size_t* resultLength)
+{
+ MOZ_ASSERT(startIndex < length);
+
+ using DestCharPtr = UniquePtr<DestChar[], JS::FreePolicy>;
+
+ *resultLength = length;
+ DestCharPtr buf = cx->make_pod_array<DestChar>(length + 1);
+ if (!buf)
+ return buf;
+
+ CopyChars(buf.get(), chars, startIndex);
+
+ size_t readChars = ToUpperCaseImpl(buf.get(), chars, startIndex, length, length);
+ if (readChars < length) {
+ size_t actualLength = ToUpperCaseLength(chars, readChars, length);
+
+ *resultLength = actualLength;
+ DestCharPtr buf2 = ReallocChars(cx, Move(buf), length + 1, actualLength + 1);
+ if (!buf2)
+ return buf2;
+
+ buf = Move(buf2);
+
+ MOZ_ALWAYS_TRUE(length ==
+ ToUpperCaseImpl(buf.get(), chars, readChars, length, actualLength));
}
- destChars[length] = '\0';
+ return buf;
}
template <typename CharT>
static JSString*
ToUpperCase(JSContext* cx, JSLinearString* str)
{
- typedef UniquePtr<Latin1Char[], JS::FreePolicy> Latin1CharPtr;
- typedef UniquePtr<char16_t[], JS::FreePolicy> TwoByteCharPtr;
+ using Latin1CharPtr = UniquePtr<Latin1Char[], JS::FreePolicy>;
+ using TwoByteCharPtr = UniquePtr<char16_t[], JS::FreePolicy>;
mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars;
- size_t length = str->length();
+ const size_t length = str->length();
+ size_t resultLength;
{
AutoCheckCannotGC nogc;
const CharT* chars = str->chars<CharT>(nogc);
- // Look for the first lower case character.
+ // Look for the first character that changes when uppercased.
size_t i = 0;
for (; i < length; i++) {
char16_t c = chars[i];
@@ -786,21 +1165,33 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
}
if (unicode::CanUpperCase(c))
break;
+ if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(static_cast<CharT>(c))))
+ break;
}
- // If all characters are upper case, return the input string.
+ // If no character needs to change, return the input string.
if (i == length)
return str;
- // If the string is Latin1, check if it contains the MICRO SIGN (0xb5)
- // or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding
- // upper case characters are not in the Latin1 range.
+ // The string changes when uppercased, so we must create a new string.
+ // Can it be Latin-1?
+ //
+ // If the original string is Latin-1, it can -- unless the string
+ // contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
+ // the only Latin-1 codepoints that don't uppercase within Latin-1.
+ // Search for those codepoints to decide whether the new string can be
+ // Latin-1.
+ // If the original string is a two-byte string, its uppercase form is
+ // so rarely Latin-1 that we don't even consider creating a new
+ // Latin-1 string.
bool resultIsLatin1;
if (IsSame<CharT, Latin1Char>::value) {
resultIsLatin1 = true;
for (size_t j = i; j < length; j++) {
Latin1Char c = chars[j];
- if (c == 0xb5 || c == 0xff) {
+ if (c == unicode::MICRO_SIGN ||
+ c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)
+ {
MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
resultIsLatin1 = false;
break;
@@ -813,31 +1204,29 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
}
if (resultIsLatin1) {
- Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(length + 1);
+ Latin1CharPtr buf = ToUpperCase<Latin1Char>(cx, chars, i, length, &resultLength);
if (!buf)
return nullptr;
- ToUpperCaseImpl(buf.get(), chars, i, length);
newChars.construct<Latin1CharPtr>(Move(buf));
} else {
- TwoByteCharPtr buf = cx->make_pod_array<char16_t>(length + 1);
+ TwoByteCharPtr buf = ToUpperCase<char16_t>(cx, chars, i, length, &resultLength);
if (!buf)
return nullptr;
- ToUpperCaseImpl(buf.get(), chars, i, length);
newChars.construct<TwoByteCharPtr>(Move(buf));
}
}
JSString* res;
if (newChars.constructed<Latin1CharPtr>()) {
- res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), length);
+ res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), resultLength);
if (!res)
return nullptr;
mozilla::Unused << newChars.ref<Latin1CharPtr>().release();
} else {
- res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), length);
+ res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), resultLength);
if (!res)
return nullptr;
@@ -847,57 +1236,79 @@ ToUpperCase(JSContext* cx, JSLinearString* str)
return res;
}
-static bool
-ToUpperCaseHelper(JSContext* cx, const CallArgs& args)
+JSString*
+js::StringToUpperCase(JSContext* cx, HandleLinearString string)
+{
+ if (string->hasLatin1Chars())
+ return ToUpperCase<Latin1Char>(cx, string);
+ return ToUpperCase<char16_t>(cx, string);
+}
+
+bool
+js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp)
{
+ CallArgs args = CallArgsFromVp(argc, vp);
+
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
if (!str)
return false;
- JSLinearString* linear = str->ensureLinear(cx);
+ RootedLinearString linear(cx, str->ensureLinear(cx));
if (!linear)
return false;
- if (linear->hasLatin1Chars())
- str = ToUpperCase<Latin1Char>(cx, linear);
- else
- str = ToUpperCase<char16_t>(cx, linear);
- if (!str)
+ JSString* result = StringToUpperCase(cx, linear);
+ if (!result)
return false;
- args.rval().setString(str);
+ args.rval().setString(result);
return true;
}
bool
-js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp)
-{
- return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
-}
-
-bool
-js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
+js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
+ MOZ_ASSERT(args.length() == 2);
+ MOZ_ASSERT(args[0].isString());
+ MOZ_ASSERT(args[1].isString());
- /*
- * Forcefully ignore the first (or any) argument and return toUpperCase(),
- * ECMA has reserved that argument, presumably for defining the locale.
- */
- if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) {
- RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
- if (!str)
- return false;
+ RootedLinearString linear(cx, args[0].toString()->ensureLinear(cx));
+ if (!linear)
+ return false;
+
+ const char* locale = CaseMappingLocale(cx, args[1].toString());
+ if (!locale)
+ return false;
- RootedValue result(cx);
- if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result))
+ // Call String.prototype.toUpperCase() for language independent casing.
+ if (intl::StringsAreEqual(locale, "")) {
+ JSString* str = StringToUpperCase(cx, linear);
+ if (!str)
return false;
- args.rval().set(result);
+ args.rval().setString(str);
return true;
}
- return ToUpperCaseHelper(cx, args);
+ AutoStableStringChars inputChars(cx);
+ if (!inputChars.initTwoByte(cx, linear))
+ return false;
+ mozilla::Range<const char16_t> input = inputChars.twoByteRange();
+
+ // Maximum case mapping length is three characters.
+ static_assert(JSString::MAX_LENGTH < INT32_MAX / 3,
+ "Case conversion doesn't overflow int32_t indices");
+
+ JSString* str = intl::CallICU(cx, [&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
+ return u_strToUpper(chars, size, Char16ToUChar(input.begin().get()), input.length(),
+ locale, status);
+ });
+ if (!str)
+ return false;
+
+ args.rval().setString(str);
+ return true;
}
/* ES2017 21.1.3.12. */
@@ -944,7 +1355,7 @@ js::str_normalize(JSContext* cx, unsigned argc, Value* vp)
if (!linear)
return false;
- // Latin1 strings are already in Normalization Form C.
+ // Latin-1 strings are already in Normalization Form C.
if (form == NFC && linear->hasLatin1Chars()) {
// Step 7.
args.rval().setString(str);
@@ -1359,7 +1770,7 @@ StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t
/*
* For big patterns with large potential overlap we want the SIMD-optimized
* speed of memcmp. For small patterns, a simple loop is faster. We also can't
- * use memcmp if one of the strings is TwoByte and the other is Latin1.
+ * use memcmp if one of the strings is TwoByte and the other is Latin-1.
*
* FIXME: Linux memcmp performance is sad and the manual loop is faster.
*/
@@ -1555,7 +1966,7 @@ RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat, int* match)
* need to build the list of leaf nodes. Do both here: iterate over the
* nodes so long as there are not too many.
*
- * We also don't use rope matching if the rope contains both Latin1 and
+ * We also don't use rope matching if the rope contains both Latin-1 and
* TwoByte nodes, to simplify the match algorithm.
*/
{
@@ -2890,8 +3301,8 @@ static const JSFunctionSpec string_methods[] = {
JS_FN("trimStart", str_trimStart, 0,0),
JS_FN("trimRight", str_trimEnd, 0,0),
JS_FN("trimEnd", str_trimEnd, 0,0),
- JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,0),
- JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,0),
+ JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_toLocaleLowerCase", 0,0),
+ JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_toLocaleUpperCase", 0,0),
JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1,0),
JS_SELF_HOSTED_FN("repeat", "String_repeat", 1,0),
JS_FN("normalize", str_normalize, 0,0),
@@ -3000,7 +3411,7 @@ js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp)
// string (thin or fat) and so we don't need to malloc the chars. (We could
// cover some cases where args.length() goes up to
// JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are
- // all Latin1, but it doesn't seem worth the effort.)
+ // all Latin-1, but it doesn't seem worth the effort.)
if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE)
return str_fromCharCode_few_args(cx, args);
@@ -3143,7 +3554,7 @@ js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp)
// string (thin or fat) and so we don't need to malloc the chars. (We could
// cover some cases where |args.length()| goes up to
// JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars
- // are all Latin1, but it doesn't seem worth the effort.)
+ // are all Latin-1, but it doesn't seem worth the effort.)
if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2)
return str_fromCodePoint_few_args(cx, args);
diff --git a/js/src/jsstr.h b/js/src/jsstr.h
index 0e31276a86..cd2be4e59b 100644
--- a/js/src/jsstr.h
+++ b/js/src/jsstr.h
@@ -371,11 +371,24 @@ str_trimStart(JSContext* cx, unsigned argc, Value* vp);
extern bool
str_trimEnd(JSContext* cx, unsigned argc, Value* vp);
-extern bool
-str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp);
+/**
+ * Returns the input string converted to lower case based on the language
+ * specific case mappings for the input locale.
+ *
+ * Usage: lowerCase = intl_toLocaleLowerCase(string, locale)
+ */
+extern MOZ_MUST_USE bool
+intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp);
+
+/**
+ * Returns the input string converted to upper case based on the language
+ * specific case mappings for the input locale.
+ *
+ * Usage: upperCase = intl_toLocaleUpperCase(string, locale)
+ */
+extern MOZ_MUST_USE bool
+intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp);
-extern bool
-str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp);
extern bool
str_normalize(JSContext* cx, unsigned argc, Value* vp);
@@ -480,6 +493,12 @@ JSString*
str_replaceAll_string_raw(JSContext* cx, HandleString string, HandleString pattern,
HandleString replacement);
+extern JSString*
+StringToLowerCase(JSContext* cx, HandleLinearString string);
+
+extern JSString*
+StringToUpperCase(JSContext* cx, HandleLinearString string);
+
extern bool
StringConstructor(JSContext* cx, unsigned argc, Value* vp);
diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp
index bc66d6aa1e..0717bfd490 100644
--- a/js/src/vm/SelfHosting.cpp
+++ b/js/src/vm/SelfHosting.cpp
@@ -2207,11 +2207,9 @@ static const JSFunctionSpec intrinsic_functions[] = {
JS_FN("std_String_trimStart", str_trimStart, 0,0),
JS_FN("std_String_trimRight", str_trimEnd, 0,0),
JS_FN("std_String_trimEnd", str_trimEnd, 0,0),
- JS_FN("std_String_toLocaleLowerCase", str_toLocaleLowerCase, 0,0),
- JS_FN("std_String_toLocaleUpperCase", str_toLocaleUpperCase, 0,0),
JS_FN("std_String_normalize", str_normalize, 0,0),
JS_FN("std_String_concat", str_concat, 1,0),
-
+
JS_FN("std_TypedArray_buffer", js::TypedArray_bufferGetter, 1,0),
JS_FN("std_WeakMap_has", WeakMap_has, 1,0),
@@ -2485,6 +2483,8 @@ static const JSFunctionSpec intrinsic_functions[] = {
JS_FN("intl_PluralRules_availableLocales", intl_PluralRules_availableLocales, 0,0),
JS_FN("intl_GetPluralCategories", intl_GetPluralCategories, 2, 0),
JS_FN("intl_SelectPluralRule", intl_SelectPluralRule, 2,0),
+ JS_FN("intl_toLocaleLowerCase", intl_toLocaleLowerCase, 2,0),
+ JS_FN("intl_toLocaleUpperCase", intl_toLocaleUpperCase, 2,0),
JS_FN("intl_RelativeTimeFormat_availableLocales", intl_RelativeTimeFormat_availableLocales, 0,0),
JS_FN("intl_FormatRelativeTime", intl_FormatRelativeTime, 3,0),
diff --git a/js/src/vm/SpecialCasing.txt b/js/src/vm/SpecialCasing.txt
new file mode 100644
index 0000000000..c90d09acb3
--- /dev/null
+++ b/js/src/vm/SpecialCasing.txt
@@ -0,0 +1,281 @@
+# SpecialCasing-11.0.0.txt
+# Date: 2018-02-22, 06:16:47 GMT
+# © 2018 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Special Casing
+#
+# This file is a supplement to the UnicodeData.txt file. It does not define any
+# properties, but rather provides additional information about the casing of
+# Unicode characters, for situations when casing incurs a change in string length
+# or is dependent on context or locale. For compatibility, the UnicodeData.txt
+# file only contains simple case mappings for characters where they are one-to-one
+# and independent of context and language. The data in this file, combined with
+# the simple case mappings in UnicodeData.txt, defines the full case mappings
+# Lowercase_Mapping (lc), Titlecase_Mapping (tc), and Uppercase_Mapping (uc).
+#
+# Note that the preferred mechanism for defining tailored casing operations is
+# the Unicode Common Locale Data Repository (CLDR). For more information, see the
+# discussion of case mappings and case algorithms in the Unicode Standard.
+#
+# All code points not listed in this file that do not have a simple case mappings
+# in UnicodeData.txt map to themselves.
+# ================================================================================
+# Format
+# ================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# <code>; <lower>; <title>; <upper>; (<condition_list>;)? # <comment>
+#
+# <code>, <lower>, <title>, and <upper> provide the respective full case mappings
+# of <code>, expressed as character values in hex. If there is more than one character,
+# they are separated by spaces. Other than as used to separate elements, spaces are
+# to be ignored.
+#
+# The <condition_list> is optional. Where present, it consists of one or more language IDs
+# or casing contexts, separated by spaces. In these conditions:
+# - A condition list overrides the normal behavior if all of the listed conditions are true.
+# - The casing context is always the context of the characters in the original string,
+# NOT in the resulting string.
+# - Case distinctions in the condition list are not significant.
+# - Conditions preceded by "Not_" represent the negation of the condition.
+# The condition list is not represented in the UCD as a formal property.
+#
+# A language ID is defined by BCP 47, with '-' and '_' treated equivalently.
+#
+# A casing context for a character is defined by Section 3.13 Default Case Algorithms
+# of The Unicode Standard.
+#
+# Parsers of this file must be prepared to deal with future additions to this format:
+# * Additional contexts
+# * Additional fields
+# ================================================================================
+
+# ================================================================================
+# Unconditional mappings
+# ================================================================================
+
+# The German es-zed is special--the normal mapping is to SS.
+# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))
+
+00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S
+
+# Preserve canonical equivalence for I with dot. Turkic is handled below.
+
+0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+
+# Ligatures
+
+FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF
+FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI
+FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL
+FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI
+FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL
+FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T
+FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST
+
+0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN
+FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW
+FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH
+FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI
+FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW
+FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
+
+# No corresponding uppercase precomposed character
+
+0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON
+1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW
+1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS
+1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE
+1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
+1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
+1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
+1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
+1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
+1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI
+1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+
+# IMPORTANT-when iota-subscript (0345) is uppercased or titlecased,
+# the result will be incorrect unless the iota-subscript is moved to the end
+# of any sequence of combining marks. Otherwise, the accents will go on the capital iota.
+# This process can be achieved by first transforming the text to NFC before casing.
+# E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA>
+
+# The following cases are already in the UnicodeData.txt file, so are only commented here.
+
+# 0345; 0345; 0399; 0399; # COMBINING GREEK YPOGEGRAMMENI
+
+# All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript)
+# have special uppercases.
+# Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase!
+
+1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+
+# Some characters with YPOGEGRAMMENI also have no corresponding titlecases
+
+1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+
+1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+
+# ================================================================================
+# Conditional Mappings
+# The remainder of this file provides conditional casing data used to produce
+# full case mappings.
+# ================================================================================
+# Language-Insensitive Mappings
+# These are characters whose full case mappings do not depend on language, but do
+# depend on context (which characters come before or after). For more information
+# see the header of this file and the Unicode Standard.
+# ================================================================================
+
+# Special case for final form of sigma
+
+03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
+
+# Note: the following cases for non-final are already in the UnicodeData.txt file.
+
+# 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
+# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
+# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
+
+# Note: the following cases are not included, since they would case-fold in lowercasing
+
+# 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA
+# 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA
+
+# ================================================================================
+# Language-Sensitive Mappings
+# These are characters whose full case mappings depend on language and perhaps also
+# context (which characters come before or after). For more information
+# see the header of this file and the Unicode Standard.
+# ================================================================================
+
+# Lithuanian
+
+# Lithuanian retains the dot in a lowercase i when followed by accents.
+
+# Remove DOT ABOVE after "i" with upper or titlecase
+
+0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
+
+# Introduce an explicit dot above when lowercasing capital I's and J's
+# whenever there are more accents above.
+# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
+
+0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
+004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
+012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
+00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
+0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
+
+# ================================================================================
+
+# Turkish and Azeri
+
+# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
+# The following rules handle those cases.
+
+0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+
+# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
+# This matches the behavior of the canonically equivalent I-dot_above
+
+0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
+0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
+
+# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
+
+0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
+0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
+
+# When uppercasing, i turns into a dotted capital I
+
+0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
+0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
+
+# Note: the following case is already in the UnicodeData.txt file.
+
+# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
+
+# EOF
+
diff --git a/js/src/vm/Unicode.cpp b/js/src/vm/Unicode.cpp
index bc4566c825..0236f7c649 100644
--- a/js/src/vm/Unicode.cpp
+++ b/js/src/vm/Unicode.cpp
@@ -55,176 +55,176 @@ using namespace js::unicode;
* stop if you found the best shift
*/
const CharacterInfo unicode::js_charinfo[] = {
- {0, 0, 0},
- {0, 0, 1},
- {0, 0, 4},
- {0, 32, 2},
- {65504, 0, 2},
- {0, 0, 2},
- {743, 0, 2},
- {121, 0, 2},
- {0, 1, 2},
- {65535, 0, 2},
- {0, 65337, 2},
- {65304, 0, 2},
- {0, 65415, 2},
- {65236, 0, 2},
- {195, 0, 2},
- {0, 210, 2},
- {0, 206, 2},
- {0, 205, 2},
- {0, 79, 2},
- {0, 202, 2},
- {0, 203, 2},
- {0, 207, 2},
- {97, 0, 2},
- {0, 211, 2},
- {0, 209, 2},
- {163, 0, 2},
- {0, 213, 2},
- {130, 0, 2},
- {0, 214, 2},
- {0, 218, 2},
- {0, 217, 2},
- {0, 219, 2},
- {56, 0, 2},
- {0, 2, 2},
- {65535, 1, 2},
- {65534, 0, 2},
- {65457, 0, 2},
- {0, 65439, 2},
- {0, 65480, 2},
- {0, 65406, 2},
- {0, 10795, 2},
- {0, 65373, 2},
- {0, 10792, 2},
- {10815, 0, 2},
- {0, 65341, 2},
- {0, 69, 2},
- {0, 71, 2},
- {10783, 0, 2},
- {10780, 0, 2},
- {10782, 0, 2},
- {65326, 0, 2},
- {65330, 0, 2},
- {65331, 0, 2},
- {65334, 0, 2},
- {65333, 0, 2},
- {42319, 0, 2},
- {42315, 0, 2},
- {65329, 0, 2},
- {42280, 0, 2},
- {42308, 0, 2},
- {65327, 0, 2},
- {65325, 0, 2},
- {10743, 0, 2},
- {42305, 0, 2},
- {10749, 0, 2},
- {65323, 0, 2},
- {65322, 0, 2},
- {10727, 0, 2},
- {65318, 0, 2},
- {42282, 0, 2},
- {65467, 0, 2},
- {65319, 0, 2},
- {65465, 0, 2},
- {65317, 0, 2},
- {42261, 0, 2},
- {42258, 0, 2},
- {84, 0, 4},
- {0, 116, 2},
- {0, 38, 2},
- {0, 37, 2},
- {0, 64, 2},
- {0, 63, 2},
- {65498, 0, 2},
- {65499, 0, 2},
- {65505, 0, 2},
- {65472, 0, 2},
- {65473, 0, 2},
- {0, 8, 2},
- {65474, 0, 2},
- {65479, 0, 2},
- {65489, 0, 2},
- {65482, 0, 2},
- {65528, 0, 2},
- {65450, 0, 2},
- {65456, 0, 2},
- {7, 0, 2},
- {65420, 0, 2},
- {0, 65476, 2},
- {65440, 0, 2},
- {0, 65529, 2},
- {0, 80, 2},
- {0, 15, 2},
- {65521, 0, 2},
- {0, 48, 2},
- {65488, 0, 2},
- {0, 7264, 2},
- {3008, 0, 2},
- {0, 38864, 2},
- {59282, 0, 2},
- {59283, 0, 2},
- {59292, 0, 2},
- {59294, 0, 2},
- {59293, 0, 2},
- {59300, 0, 2},
- {59355, 0, 2},
- {35266, 0, 2},
- {0, 62528, 2},
- {35332, 0, 2},
- {3814, 0, 2},
- {65477, 0, 2},
- {0, 57921, 2},
- {8, 0, 2},
- {0, 65528, 2},
- {74, 0, 2},
- {86, 0, 2},
- {100, 0, 2},
- {128, 0, 2},
- {112, 0, 2},
- {126, 0, 2},
- {9, 0, 2},
- {0, 65462, 2},
- {0, 65527, 2},
- {58331, 0, 2},
- {0, 65450, 2},
- {0, 65436, 2},
- {0, 65424, 2},
- {0, 65408, 2},
- {0, 65410, 2},
- {0, 58019, 2},
- {0, 57153, 2},
- {0, 57274, 2},
- {0, 28, 2},
- {65508, 0, 2},
- {0, 16, 2},
- {65520, 0, 2},
- {0, 26, 0},
- {65510, 0, 0},
- {0, 54793, 2},
- {0, 61722, 2},
- {0, 54809, 2},
- {54741, 0, 2},
- {54744, 0, 2},
- {0, 54756, 2},
- {0, 54787, 2},
- {0, 54753, 2},
- {0, 54754, 2},
- {0, 54721, 2},
- {58272, 0, 2},
- {0, 30204, 2},
- {0, 23256, 2},
- {0, 23228, 2},
- {0, 23217, 2},
- {0, 23221, 2},
- {0, 23231, 2},
- {0, 23278, 2},
- {0, 23254, 2},
- {0, 23275, 2},
- {0, 928, 2},
- {64608, 0, 2},
- {26672, 0, 2},
+ { 0, 0, 0 },
+ { 0, 0, 1 },
+ { 0, 0, 4 },
+ { 0, 32, 2 },
+ { 65504, 0, 2 },
+ { 0, 0, 2 },
+ { 743, 0, 2 },
+ { 121, 0, 2 },
+ { 0, 1, 2 },
+ { 65535, 0, 2 },
+ { 0, 65337, 2 },
+ { 65304, 0, 2 },
+ { 0, 65415, 2 },
+ { 65236, 0, 2 },
+ { 195, 0, 2 },
+ { 0, 210, 2 },
+ { 0, 206, 2 },
+ { 0, 205, 2 },
+ { 0, 79, 2 },
+ { 0, 202, 2 },
+ { 0, 203, 2 },
+ { 0, 207, 2 },
+ { 97, 0, 2 },
+ { 0, 211, 2 },
+ { 0, 209, 2 },
+ { 163, 0, 2 },
+ { 0, 213, 2 },
+ { 130, 0, 2 },
+ { 0, 214, 2 },
+ { 0, 218, 2 },
+ { 0, 217, 2 },
+ { 0, 219, 2 },
+ { 56, 0, 2 },
+ { 0, 2, 2 },
+ { 65535, 1, 2 },
+ { 65534, 0, 2 },
+ { 65457, 0, 2 },
+ { 0, 65439, 2 },
+ { 0, 65480, 2 },
+ { 0, 65406, 2 },
+ { 0, 10795, 2 },
+ { 0, 65373, 2 },
+ { 0, 10792, 2 },
+ { 10815, 0, 2 },
+ { 0, 65341, 2 },
+ { 0, 69, 2 },
+ { 0, 71, 2 },
+ { 10783, 0, 2 },
+ { 10780, 0, 2 },
+ { 10782, 0, 2 },
+ { 65326, 0, 2 },
+ { 65330, 0, 2 },
+ { 65331, 0, 2 },
+ { 65334, 0, 2 },
+ { 65333, 0, 2 },
+ { 42319, 0, 2 },
+ { 42315, 0, 2 },
+ { 65329, 0, 2 },
+ { 42280, 0, 2 },
+ { 42308, 0, 2 },
+ { 65327, 0, 2 },
+ { 65325, 0, 2 },
+ { 10743, 0, 2 },
+ { 42305, 0, 2 },
+ { 10749, 0, 2 },
+ { 65323, 0, 2 },
+ { 65322, 0, 2 },
+ { 10727, 0, 2 },
+ { 65318, 0, 2 },
+ { 42282, 0, 2 },
+ { 65467, 0, 2 },
+ { 65319, 0, 2 },
+ { 65465, 0, 2 },
+ { 65317, 0, 2 },
+ { 42261, 0, 2 },
+ { 42258, 0, 2 },
+ { 84, 0, 4 },
+ { 0, 116, 2 },
+ { 0, 38, 2 },
+ { 0, 37, 2 },
+ { 0, 64, 2 },
+ { 0, 63, 2 },
+ { 65498, 0, 2 },
+ { 65499, 0, 2 },
+ { 65505, 0, 2 },
+ { 65472, 0, 2 },
+ { 65473, 0, 2 },
+ { 0, 8, 2 },
+ { 65474, 0, 2 },
+ { 65479, 0, 2 },
+ { 65489, 0, 2 },
+ { 65482, 0, 2 },
+ { 65528, 0, 2 },
+ { 65450, 0, 2 },
+ { 65456, 0, 2 },
+ { 7, 0, 2 },
+ { 65420, 0, 2 },
+ { 0, 65476, 2 },
+ { 65440, 0, 2 },
+ { 0, 65529, 2 },
+ { 0, 80, 2 },
+ { 0, 15, 2 },
+ { 65521, 0, 2 },
+ { 0, 48, 2 },
+ { 65488, 0, 2 },
+ { 0, 7264, 2 },
+ { 3008, 0, 2 },
+ { 0, 38864, 2 },
+ { 59282, 0, 2 },
+ { 59283, 0, 2 },
+ { 59292, 0, 2 },
+ { 59294, 0, 2 },
+ { 59293, 0, 2 },
+ { 59300, 0, 2 },
+ { 59355, 0, 2 },
+ { 35266, 0, 2 },
+ { 0, 62528, 2 },
+ { 35332, 0, 2 },
+ { 3814, 0, 2 },
+ { 65477, 0, 2 },
+ { 0, 57921, 2 },
+ { 8, 0, 2 },
+ { 0, 65528, 2 },
+ { 74, 0, 2 },
+ { 86, 0, 2 },
+ { 100, 0, 2 },
+ { 128, 0, 2 },
+ { 112, 0, 2 },
+ { 126, 0, 2 },
+ { 9, 0, 2 },
+ { 0, 65462, 2 },
+ { 0, 65527, 2 },
+ { 58331, 0, 2 },
+ { 0, 65450, 2 },
+ { 0, 65436, 2 },
+ { 0, 65424, 2 },
+ { 0, 65408, 2 },
+ { 0, 65410, 2 },
+ { 0, 58019, 2 },
+ { 0, 57153, 2 },
+ { 0, 57274, 2 },
+ { 0, 28, 2 },
+ { 65508, 0, 2 },
+ { 0, 16, 2 },
+ { 65520, 0, 2 },
+ { 0, 26, 0 },
+ { 65510, 0, 0 },
+ { 0, 54793, 2 },
+ { 0, 61722, 2 },
+ { 0, 54809, 2 },
+ { 54741, 0, 2 },
+ { 54744, 0, 2 },
+ { 0, 54756, 2 },
+ { 0, 54787, 2 },
+ { 0, 54753, 2 },
+ { 0, 54754, 2 },
+ { 0, 54721, 2 },
+ { 58272, 0, 2 },
+ { 0, 30204, 2 },
+ { 0, 23256, 2 },
+ { 0, 23228, 2 },
+ { 0, 23217, 2 },
+ { 0, 23221, 2 },
+ { 0, 23231, 2 },
+ { 0, 23278, 2 },
+ { 0, 23254, 2 },
+ { 0, 23275, 2 },
+ { 0, 928, 2 },
+ { 64608, 0, 2 },
+ { 26672, 0, 2 },
};
const uint8_t unicode::index1[] = {
@@ -928,141 +928,141 @@ const uint8_t unicode::index2[] = {
};
const CodepointsWithSameUpperCaseInfo unicode::js_codepoints_with_same_upper_info[] = {
- {0, 0, 0},
- {32, 0, 0},
- {32, 232, 0},
- {32, 300, 0},
- {0, 200, 0},
- {0, 268, 0},
- {0, 775, 0},
- {1, 0, 0},
- {65336, 0, 0},
- {65415, 0, 0},
- {65268, 0, 0},
- {210, 0, 0},
- {206, 0, 0},
- {205, 0, 0},
- {79, 0, 0},
- {202, 0, 0},
- {203, 0, 0},
- {207, 0, 0},
- {211, 0, 0},
- {209, 0, 0},
- {213, 0, 0},
- {214, 0, 0},
- {218, 0, 0},
- {217, 0, 0},
- {219, 0, 0},
- {1, 2, 0},
- {0, 1, 0},
- {65535, 0, 0},
- {65439, 0, 0},
- {65480, 0, 0},
- {65406, 0, 0},
- {10795, 0, 0},
- {65373, 0, 0},
- {10792, 0, 0},
- {65341, 0, 0},
- {69, 0, 0},
- {71, 0, 0},
- {0, 116, 7289},
- {116, 0, 0},
- {38, 0, 0},
- {37, 0, 0},
- {64, 0, 0},
- {63, 0, 0},
- {32, 62, 0},
- {32, 96, 0},
- {32, 57, 0},
- {65452, 32, 7205},
- {32, 86, 0},
- {64793, 32, 0},
- {32, 54, 0},
- {32, 80, 0},
- {31, 32, 0},
- {32, 47, 0},
- {0, 30, 0},
- {0, 64, 0},
- {0, 25, 0},
- {65420, 0, 7173},
- {0, 54, 0},
- {64761, 0, 0},
- {0, 22, 0},
- {0, 48, 0},
- {0, 15, 0},
- {8, 0, 0},
- {65506, 0, 0},
- {65511, 0, 0},
- {65521, 0, 0},
- {65514, 0, 0},
- {65482, 0, 0},
- {65488, 0, 0},
- {65472, 0, 0},
- {65529, 0, 0},
- {80, 0, 0},
- {32, 6254, 0},
- {32, 6253, 0},
- {32, 6244, 0},
- {32, 6242, 0},
- {32, 6242, 6243},
- {32, 6236, 0},
- {0, 6222, 0},
- {0, 6221, 0},
- {0, 6212, 0},
- {0, 6210, 0},
- {0, 6210, 6211},
- {0, 6204, 0},
- {1, 6181, 0},
- {0, 6180, 0},
- {15, 0, 0},
- {48, 0, 0},
- {7264, 0, 0},
- {38864, 0, 0},
- {59314, 0, 0},
- {59315, 0, 0},
- {59324, 0, 0},
- {59326, 0, 0},
- {59326, 0, 1},
- {59325, 65535, 0},
- {59332, 0, 0},
- {59356, 0, 0},
- {0, 35267, 0},
- {62528, 0, 0},
- {1, 59, 0},
- {0, 58, 0},
- {65478, 0, 0},
- {65528, 0, 0},
- {65462, 0, 0},
- {65527, 0, 0},
- {58247, 58363, 0},
- {65450, 0, 0},
- {65436, 0, 0},
- {65424, 0, 0},
- {65408, 0, 0},
- {65410, 0, 0},
- {28, 0, 0},
- {16, 0, 0},
- {26, 0, 0},
- {54793, 0, 0},
- {61722, 0, 0},
- {54809, 0, 0},
- {54756, 0, 0},
- {54787, 0, 0},
- {54753, 0, 0},
- {54754, 0, 0},
- {54721, 0, 0},
- {30270, 1, 0},
- {30269, 0, 0},
- {30204, 0, 0},
- {23256, 0, 0},
- {23228, 0, 0},
- {23217, 0, 0},
- {23221, 0, 0},
- {23231, 0, 0},
- {23278, 0, 0},
- {23254, 0, 0},
- {23275, 0, 0},
- {928, 0, 0},
+ { 0, 0, 0 },
+ { 32, 0, 0 },
+ { 32, 232, 0 },
+ { 32, 300, 0 },
+ { 0, 200, 0 },
+ { 0, 268, 0 },
+ { 0, 775, 0 },
+ { 1, 0, 0 },
+ { 65336, 0, 0 },
+ { 65415, 0, 0 },
+ { 65268, 0, 0 },
+ { 210, 0, 0 },
+ { 206, 0, 0 },
+ { 205, 0, 0 },
+ { 79, 0, 0 },
+ { 202, 0, 0 },
+ { 203, 0, 0 },
+ { 207, 0, 0 },
+ { 211, 0, 0 },
+ { 209, 0, 0 },
+ { 213, 0, 0 },
+ { 214, 0, 0 },
+ { 218, 0, 0 },
+ { 217, 0, 0 },
+ { 219, 0, 0 },
+ { 1, 2, 0 },
+ { 0, 1, 0 },
+ { 65535, 0, 0 },
+ { 65439, 0, 0 },
+ { 65480, 0, 0 },
+ { 65406, 0, 0 },
+ { 10795, 0, 0 },
+ { 65373, 0, 0 },
+ { 10792, 0, 0 },
+ { 65341, 0, 0 },
+ { 69, 0, 0 },
+ { 71, 0, 0 },
+ { 0, 116, 7289 },
+ { 116, 0, 0 },
+ { 38, 0, 0 },
+ { 37, 0, 0 },
+ { 64, 0, 0 },
+ { 63, 0, 0 },
+ { 32, 62, 0 },
+ { 32, 96, 0 },
+ { 32, 57, 0 },
+ { 65452, 32, 7205 },
+ { 32, 86, 0 },
+ { 64793, 32, 0 },
+ { 32, 54, 0 },
+ { 32, 80, 0 },
+ { 31, 32, 0 },
+ { 32, 47, 0 },
+ { 0, 30, 0 },
+ { 0, 64, 0 },
+ { 0, 25, 0 },
+ { 65420, 0, 7173 },
+ { 0, 54, 0 },
+ { 64761, 0, 0 },
+ { 0, 22, 0 },
+ { 0, 48, 0 },
+ { 0, 15, 0 },
+ { 8, 0, 0 },
+ { 65506, 0, 0 },
+ { 65511, 0, 0 },
+ { 65521, 0, 0 },
+ { 65514, 0, 0 },
+ { 65482, 0, 0 },
+ { 65488, 0, 0 },
+ { 65472, 0, 0 },
+ { 65529, 0, 0 },
+ { 80, 0, 0 },
+ { 32, 6254, 0 },
+ { 32, 6253, 0 },
+ { 32, 6244, 0 },
+ { 32, 6242, 0 },
+ { 32, 6242, 6243 },
+ { 32, 6236, 0 },
+ { 0, 6222, 0 },
+ { 0, 6221, 0 },
+ { 0, 6212, 0 },
+ { 0, 6210, 0 },
+ { 0, 6210, 6211 },
+ { 0, 6204, 0 },
+ { 1, 6181, 0 },
+ { 0, 6180, 0 },
+ { 15, 0, 0 },
+ { 48, 0, 0 },
+ { 7264, 0, 0 },
+ { 38864, 0, 0 },
+ { 59314, 0, 0 },
+ { 59315, 0, 0 },
+ { 59324, 0, 0 },
+ { 59326, 0, 0 },
+ { 59326, 0, 1 },
+ { 59325, 65535, 0 },
+ { 59332, 0, 0 },
+ { 59356, 0, 0 },
+ { 0, 35267, 0 },
+ { 62528, 0, 0 },
+ { 1, 59, 0 },
+ { 0, 58, 0 },
+ { 65478, 0, 0 },
+ { 65528, 0, 0 },
+ { 65462, 0, 0 },
+ { 65527, 0, 0 },
+ { 58247, 58363, 0 },
+ { 65450, 0, 0 },
+ { 65436, 0, 0 },
+ { 65424, 0, 0 },
+ { 65408, 0, 0 },
+ { 65410, 0, 0 },
+ { 28, 0, 0 },
+ { 16, 0, 0 },
+ { 26, 0, 0 },
+ { 54793, 0, 0 },
+ { 61722, 0, 0 },
+ { 54809, 0, 0 },
+ { 54756, 0, 0 },
+ { 54787, 0, 0 },
+ { 54753, 0, 0 },
+ { 54754, 0, 0 },
+ { 54721, 0, 0 },
+ { 30270, 1, 0 },
+ { 30269, 0, 0 },
+ { 30204, 0, 0 },
+ { 23256, 0, 0 },
+ { 23228, 0, 0 },
+ { 23217, 0, 0 },
+ { 23221, 0, 0 },
+ { 23231, 0, 0 },
+ { 23278, 0, 0 },
+ { 23254, 0, 0 },
+ { 23275, 0, 0 },
+ { 928, 0, 0 },
};
const uint8_t unicode::codepoints_with_same_upper_index1[] = {
@@ -1289,220 +1289,220 @@ const uint8_t unicode::codepoints_with_same_upper_index2[] = {
};
const FoldingInfo unicode::js_foldinfo[] = {
- {0, 0, 0, 0},
- {32, 0, 0, 0},
- {32, 8415, 0, 0},
- {32, 300, 0, 0},
- {0, 65504, 0, 0},
- {0, 65504, 8383, 0},
- {0, 65504, 268, 0},
- {775, 743, 0, 0},
- {32, 8294, 0, 0},
- {0, 7615, 0, 0},
- {0, 65504, 8262, 0},
- {0, 121, 0, 0},
- {1, 0, 0, 0},
- {0, 65535, 0, 0},
- {65415, 0, 0, 0},
- {65268, 65236, 0, 0},
- {0, 195, 0, 0},
- {210, 0, 0, 0},
- {206, 0, 0, 0},
- {205, 0, 0, 0},
- {79, 0, 0, 0},
- {202, 0, 0, 0},
- {203, 0, 0, 0},
- {207, 0, 0, 0},
- {0, 97, 0, 0},
- {211, 0, 0, 0},
- {209, 0, 0, 0},
- {0, 163, 0, 0},
- {213, 0, 0, 0},
- {0, 130, 0, 0},
- {214, 0, 0, 0},
- {218, 0, 0, 0},
- {217, 0, 0, 0},
- {219, 0, 0, 0},
- {0, 56, 0, 0},
- {2, 1, 0, 0},
- {1, 65535, 0, 0},
- {0, 65534, 65535, 0},
- {0, 65457, 0, 0},
- {65439, 0, 0, 0},
- {65480, 0, 0, 0},
- {65406, 0, 0, 0},
- {10795, 0, 0, 0},
- {65373, 0, 0, 0},
- {10792, 0, 0, 0},
- {0, 10815, 0, 0},
- {65341, 0, 0, 0},
- {69, 0, 0, 0},
- {71, 0, 0, 0},
- {0, 10783, 0, 0},
- {0, 10780, 0, 0},
- {0, 10782, 0, 0},
- {0, 65326, 0, 0},
- {0, 65330, 0, 0},
- {0, 65331, 0, 0},
- {0, 65334, 0, 0},
- {0, 65333, 0, 0},
- {0, 42319, 0, 0},
- {0, 42315, 0, 0},
- {0, 65329, 0, 0},
- {0, 42280, 0, 0},
- {0, 42308, 0, 0},
- {0, 65327, 0, 0},
- {0, 65325, 0, 0},
- {0, 10743, 0, 0},
- {0, 42305, 0, 0},
- {0, 10749, 0, 0},
- {0, 65323, 0, 0},
- {0, 65322, 0, 0},
- {0, 10727, 0, 0},
- {0, 65318, 0, 0},
- {0, 42282, 0, 0},
- {0, 65467, 0, 0},
- {0, 65319, 0, 0},
- {0, 65465, 0, 0},
- {0, 65317, 0, 0},
- {0, 42261, 0, 0},
- {0, 42258, 0, 0},
- {116, 84, 7289, 0},
- {116, 0, 0, 0},
- {38, 0, 0, 0},
- {37, 0, 0, 0},
- {64, 0, 0, 0},
- {63, 0, 0, 0},
- {32, 62, 0, 0},
- {32, 96, 0, 0},
- {32, 57, 92, 0},
- {32, 65452, 7205, 0},
- {32, 86, 0, 0},
- {32, 64793, 0, 0},
- {32, 54, 0, 0},
- {32, 80, 0, 0},
- {32, 31, 0, 0},
- {32, 47, 0, 0},
- {32, 7549, 0, 0},
- {0, 65498, 0, 0},
- {0, 65499, 0, 0},
- {0, 65504, 30, 0},
- {0, 65504, 64, 0},
- {0, 65504, 25, 60},
- {0, 65420, 65504, 7173},
- {0, 65504, 54, 0},
- {0, 64761, 65504, 0},
- {0, 65504, 22, 0},
- {0, 65504, 48, 0},
- {1, 65505, 0, 0},
- {0, 65504, 65535, 0},
- {0, 65504, 15, 0},
- {0, 65504, 7517, 0},
- {0, 65472, 0, 0},
- {0, 65473, 0, 0},
- {8, 0, 0, 0},
- {65506, 65474, 0, 0},
- {65511, 65479, 35, 0},
- {65521, 65489, 0, 0},
- {65514, 65482, 0, 0},
- {0, 65528, 0, 0},
- {65482, 65450, 0, 0},
- {65488, 65456, 0, 0},
- {0, 7, 0, 0},
- {0, 65420, 0, 0},
- {65476, 65444, 65501, 0},
- {65472, 65440, 0, 0},
- {65529, 0, 0, 0},
- {80, 0, 0, 0},
- {32, 6254, 0, 0},
- {32, 6253, 0, 0},
- {32, 6244, 0, 0},
- {32, 6242, 0, 0},
- {32, 6242, 6243, 0},
- {32, 6236, 0, 0},
- {0, 65504, 6222, 0},
- {0, 65504, 6221, 0},
- {0, 65504, 6212, 0},
- {0, 65504, 6210, 0},
- {0, 65504, 6210, 6211},
- {0, 65504, 6204, 0},
- {0, 65456, 0, 0},
- {1, 6181, 0, 0},
- {0, 65535, 6180, 0},
- {15, 0, 0, 0},
- {0, 65521, 0, 0},
- {48, 0, 0, 0},
- {0, 65488, 0, 0},
- {7264, 0, 0, 0},
- {0, 3008, 0, 0},
- {0, 38864, 0, 0},
- {0, 8, 0, 0},
- {65528, 0, 0, 0},
- {59314, 59282, 0, 0},
- {59315, 59283, 0, 0},
- {59324, 59292, 0, 0},
- {59326, 59294, 0, 0},
- {59326, 59294, 1, 0},
- {59325, 59293, 65535, 0},
- {59332, 59300, 0, 0},
- {59356, 59355, 0, 0},
- {35267, 35266, 0, 0},
- {62528, 0, 0, 0},
- {0, 35332, 0, 0},
- {0, 3814, 0, 0},
- {1, 59, 0, 0},
- {0, 65535, 58, 0},
- {65478, 65477, 0, 0},
- {57921, 0, 0, 0},
- {0, 74, 0, 0},
- {0, 86, 0, 0},
- {0, 100, 0, 0},
- {0, 128, 0, 0},
- {0, 112, 0, 0},
- {0, 126, 0, 0},
- {0, 9, 0, 0},
- {65462, 0, 0, 0},
- {65527, 0, 0, 0},
- {58363, 58247, 58331, 0},
- {65450, 0, 0, 0},
- {65436, 0, 0, 0},
- {65424, 0, 0, 0},
- {65408, 0, 0, 0},
- {65410, 0, 0, 0},
- {58019, 57987, 0, 0},
- {57153, 57121, 0, 0},
- {57274, 57242, 0, 0},
- {28, 0, 0, 0},
- {0, 65508, 0, 0},
- {16, 0, 0, 0},
- {0, 65520, 0, 0},
- {26, 0, 0, 0},
- {0, 65510, 0, 0},
- {54793, 0, 0, 0},
- {61722, 0, 0, 0},
- {54809, 0, 0, 0},
- {0, 54741, 0, 0},
- {0, 54744, 0, 0},
- {54756, 0, 0, 0},
- {54787, 0, 0, 0},
- {54753, 0, 0, 0},
- {54754, 0, 0, 0},
- {54721, 0, 0, 0},
- {0, 58272, 0, 0},
- {1, 30270, 0, 0},
- {0, 30269, 65535, 0},
- {30204, 0, 0, 0},
- {23256, 0, 0, 0},
- {23228, 0, 0, 0},
- {23217, 0, 0, 0},
- {23221, 0, 0, 0},
- {23231, 0, 0, 0},
- {23278, 0, 0, 0},
- {23254, 0, 0, 0},
- {23275, 0, 0, 0},
- {928, 0, 0, 0},
- {0, 64608, 0, 0},
- {26672, 0, 0, 0},
+ { 0, 0, 0, 0 },
+ { 32, 0, 0, 0 },
+ { 32, 8415, 0, 0 },
+ { 32, 300, 0, 0 },
+ { 0, 65504, 0, 0 },
+ { 0, 65504, 8383, 0 },
+ { 0, 65504, 268, 0 },
+ { 775, 743, 0, 0 },
+ { 32, 8294, 0, 0 },
+ { 0, 7615, 0, 0 },
+ { 0, 65504, 8262, 0 },
+ { 0, 121, 0, 0 },
+ { 1, 0, 0, 0 },
+ { 0, 65535, 0, 0 },
+ { 65415, 0, 0, 0 },
+ { 65268, 65236, 0, 0 },
+ { 0, 195, 0, 0 },
+ { 210, 0, 0, 0 },
+ { 206, 0, 0, 0 },
+ { 205, 0, 0, 0 },
+ { 79, 0, 0, 0 },
+ { 202, 0, 0, 0 },
+ { 203, 0, 0, 0 },
+ { 207, 0, 0, 0 },
+ { 0, 97, 0, 0 },
+ { 211, 0, 0, 0 },
+ { 209, 0, 0, 0 },
+ { 0, 163, 0, 0 },
+ { 213, 0, 0, 0 },
+ { 0, 130, 0, 0 },
+ { 214, 0, 0, 0 },
+ { 218, 0, 0, 0 },
+ { 217, 0, 0, 0 },
+ { 219, 0, 0, 0 },
+ { 0, 56, 0, 0 },
+ { 2, 1, 0, 0 },
+ { 1, 65535, 0, 0 },
+ { 0, 65534, 65535, 0 },
+ { 0, 65457, 0, 0 },
+ { 65439, 0, 0, 0 },
+ { 65480, 0, 0, 0 },
+ { 65406, 0, 0, 0 },
+ { 10795, 0, 0, 0 },
+ { 65373, 0, 0, 0 },
+ { 10792, 0, 0, 0 },
+ { 0, 10815, 0, 0 },
+ { 65341, 0, 0, 0 },
+ { 69, 0, 0, 0 },
+ { 71, 0, 0, 0 },
+ { 0, 10783, 0, 0 },
+ { 0, 10780, 0, 0 },
+ { 0, 10782, 0, 0 },
+ { 0, 65326, 0, 0 },
+ { 0, 65330, 0, 0 },
+ { 0, 65331, 0, 0 },
+ { 0, 65334, 0, 0 },
+ { 0, 65333, 0, 0 },
+ { 0, 42319, 0, 0 },
+ { 0, 42315, 0, 0 },
+ { 0, 65329, 0, 0 },
+ { 0, 42280, 0, 0 },
+ { 0, 42308, 0, 0 },
+ { 0, 65327, 0, 0 },
+ { 0, 65325, 0, 0 },
+ { 0, 10743, 0, 0 },
+ { 0, 42305, 0, 0 },
+ { 0, 10749, 0, 0 },
+ { 0, 65323, 0, 0 },
+ { 0, 65322, 0, 0 },
+ { 0, 10727, 0, 0 },
+ { 0, 65318, 0, 0 },
+ { 0, 42282, 0, 0 },
+ { 0, 65467, 0, 0 },
+ { 0, 65319, 0, 0 },
+ { 0, 65465, 0, 0 },
+ { 0, 65317, 0, 0 },
+ { 0, 42261, 0, 0 },
+ { 0, 42258, 0, 0 },
+ { 116, 84, 7289, 0 },
+ { 116, 0, 0, 0 },
+ { 38, 0, 0, 0 },
+ { 37, 0, 0, 0 },
+ { 64, 0, 0, 0 },
+ { 63, 0, 0, 0 },
+ { 32, 62, 0, 0 },
+ { 32, 96, 0, 0 },
+ { 32, 57, 92, 0 },
+ { 32, 65452, 7205, 0 },
+ { 32, 86, 0, 0 },
+ { 32, 64793, 0, 0 },
+ { 32, 54, 0, 0 },
+ { 32, 80, 0, 0 },
+ { 32, 31, 0, 0 },
+ { 32, 47, 0, 0 },
+ { 32, 7549, 0, 0 },
+ { 0, 65498, 0, 0 },
+ { 0, 65499, 0, 0 },
+ { 0, 65504, 30, 0 },
+ { 0, 65504, 64, 0 },
+ { 0, 65504, 25, 60 },
+ { 0, 65420, 65504, 7173 },
+ { 0, 65504, 54, 0 },
+ { 0, 64761, 65504, 0 },
+ { 0, 65504, 22, 0 },
+ { 0, 65504, 48, 0 },
+ { 1, 65505, 0, 0 },
+ { 0, 65504, 65535, 0 },
+ { 0, 65504, 15, 0 },
+ { 0, 65504, 7517, 0 },
+ { 0, 65472, 0, 0 },
+ { 0, 65473, 0, 0 },
+ { 8, 0, 0, 0 },
+ { 65506, 65474, 0, 0 },
+ { 65511, 65479, 35, 0 },
+ { 65521, 65489, 0, 0 },
+ { 65514, 65482, 0, 0 },
+ { 0, 65528, 0, 0 },
+ { 65482, 65450, 0, 0 },
+ { 65488, 65456, 0, 0 },
+ { 0, 7, 0, 0 },
+ { 0, 65420, 0, 0 },
+ { 65476, 65444, 65501, 0 },
+ { 65472, 65440, 0, 0 },
+ { 65529, 0, 0, 0 },
+ { 80, 0, 0, 0 },
+ { 32, 6254, 0, 0 },
+ { 32, 6253, 0, 0 },
+ { 32, 6244, 0, 0 },
+ { 32, 6242, 0, 0 },
+ { 32, 6242, 6243, 0 },
+ { 32, 6236, 0, 0 },
+ { 0, 65504, 6222, 0 },
+ { 0, 65504, 6221, 0 },
+ { 0, 65504, 6212, 0 },
+ { 0, 65504, 6210, 0 },
+ { 0, 65504, 6210, 6211 },
+ { 0, 65504, 6204, 0 },
+ { 0, 65456, 0, 0 },
+ { 1, 6181, 0, 0 },
+ { 0, 65535, 6180, 0 },
+ { 15, 0, 0, 0 },
+ { 0, 65521, 0, 0 },
+ { 48, 0, 0, 0 },
+ { 0, 65488, 0, 0 },
+ { 7264, 0, 0, 0 },
+ { 0, 3008, 0, 0 },
+ { 0, 38864, 0, 0 },
+ { 0, 8, 0, 0 },
+ { 65528, 0, 0, 0 },
+ { 59314, 59282, 0, 0 },
+ { 59315, 59283, 0, 0 },
+ { 59324, 59292, 0, 0 },
+ { 59326, 59294, 0, 0 },
+ { 59326, 59294, 1, 0 },
+ { 59325, 59293, 65535, 0 },
+ { 59332, 59300, 0, 0 },
+ { 59356, 59355, 0, 0 },
+ { 35267, 35266, 0, 0 },
+ { 62528, 0, 0, 0 },
+ { 0, 35332, 0, 0 },
+ { 0, 3814, 0, 0 },
+ { 1, 59, 0, 0 },
+ { 0, 65535, 58, 0 },
+ { 65478, 65477, 0, 0 },
+ { 57921, 0, 0, 0 },
+ { 0, 74, 0, 0 },
+ { 0, 86, 0, 0 },
+ { 0, 100, 0, 0 },
+ { 0, 128, 0, 0 },
+ { 0, 112, 0, 0 },
+ { 0, 126, 0, 0 },
+ { 0, 9, 0, 0 },
+ { 65462, 0, 0, 0 },
+ { 65527, 0, 0, 0 },
+ { 58363, 58247, 58331, 0 },
+ { 65450, 0, 0, 0 },
+ { 65436, 0, 0, 0 },
+ { 65424, 0, 0, 0 },
+ { 65408, 0, 0, 0 },
+ { 65410, 0, 0, 0 },
+ { 58019, 57987, 0, 0 },
+ { 57153, 57121, 0, 0 },
+ { 57274, 57242, 0, 0 },
+ { 28, 0, 0, 0 },
+ { 0, 65508, 0, 0 },
+ { 16, 0, 0, 0 },
+ { 0, 65520, 0, 0 },
+ { 26, 0, 0, 0 },
+ { 0, 65510, 0, 0 },
+ { 54793, 0, 0, 0 },
+ { 61722, 0, 0, 0 },
+ { 54809, 0, 0, 0 },
+ { 0, 54741, 0, 0 },
+ { 0, 54744, 0, 0 },
+ { 54756, 0, 0, 0 },
+ { 54787, 0, 0, 0 },
+ { 54753, 0, 0, 0 },
+ { 54754, 0, 0, 0 },
+ { 54721, 0, 0, 0 },
+ { 0, 58272, 0, 0 },
+ { 1, 30270, 0, 0 },
+ { 0, 30269, 65535, 0 },
+ { 30204, 0, 0, 0 },
+ { 23256, 0, 0, 0 },
+ { 23228, 0, 0, 0 },
+ { 23217, 0, 0, 0 },
+ { 23221, 0, 0, 0 },
+ { 23231, 0, 0, 0 },
+ { 23278, 0, 0, 0 },
+ { 23254, 0, 0, 0 },
+ { 23275, 0, 0, 0 },
+ { 928, 0, 0, 0 },
+ { 0, 64608, 0, 0 },
+ { 26672, 0, 0, 0 },
};
const uint8_t unicode::folding_index1[] = {
@@ -1756,439 +1756,439 @@ const uint8_t unicode::folding_index2[] = {
bool
js::unicode::IsIdentifierStartNonBMP(uint32_t codePoint)
{
- if (codePoint >= 0x10000 && codePoint <= 0x1000b)
+ if (codePoint >= 0x10000 && codePoint <= 0x1000B) // LINEAR B SYLLABLE B008 A .. LINEAR B SYLLABLE B046 JE
return true;
- if (codePoint >= 0x1000d && codePoint <= 0x10026)
+ if (codePoint >= 0x1000D && codePoint <= 0x10026) // LINEAR B SYLLABLE B036 JO .. LINEAR B SYLLABLE B032 QO
return true;
- if (codePoint >= 0x10028 && codePoint <= 0x1003a)
+ if (codePoint >= 0x10028 && codePoint <= 0x1003A) // LINEAR B SYLLABLE B060 RA .. LINEAR B SYLLABLE B042 WO
return true;
- if (codePoint >= 0x1003c && codePoint <= 0x1003d)
+ if (codePoint >= 0x1003C && codePoint <= 0x1003D) // LINEAR B SYLLABLE B017 ZA .. LINEAR B SYLLABLE B074 ZE
return true;
- if (codePoint >= 0x1003f && codePoint <= 0x1004d)
+ if (codePoint >= 0x1003F && codePoint <= 0x1004D) // LINEAR B SYLLABLE B020 ZO .. LINEAR B SYLLABLE B091 TWO
return true;
- if (codePoint >= 0x10050 && codePoint <= 0x1005d)
+ if (codePoint >= 0x10050 && codePoint <= 0x1005D) // LINEAR B SYMBOL B018 .. LINEAR B SYMBOL B089
return true;
- if (codePoint >= 0x10080 && codePoint <= 0x100fa)
+ if (codePoint >= 0x10080 && codePoint <= 0x100FA) // LINEAR B IDEOGRAM B100 MAN .. LINEAR B IDEOGRAM VESSEL B305
return true;
- if (codePoint >= 0x10140 && codePoint <= 0x10174)
+ if (codePoint >= 0x10140 && codePoint <= 0x10174) // GREEK ACROPHONIC ATTIC ONE QUARTER .. GREEK ACROPHONIC STRATIAN FIFTY MNAS
return true;
- if (codePoint >= 0x10280 && codePoint <= 0x1029c)
+ if (codePoint >= 0x10280 && codePoint <= 0x1029C) // LYCIAN LETTER A .. LYCIAN LETTER X
return true;
- if (codePoint >= 0x102a0 && codePoint <= 0x102d0)
+ if (codePoint >= 0x102A0 && codePoint <= 0x102D0) // CARIAN LETTER A .. CARIAN LETTER UUU3
return true;
- if (codePoint >= 0x10300 && codePoint <= 0x1031f)
+ if (codePoint >= 0x10300 && codePoint <= 0x1031F) // OLD ITALIC LETTER A .. OLD ITALIC LETTER ESS
return true;
- if (codePoint >= 0x1032d && codePoint <= 0x1034a)
+ if (codePoint >= 0x1032D && codePoint <= 0x1034A) // OLD ITALIC LETTER YE .. GOTHIC LETTER NINE HUNDRED
return true;
- if (codePoint >= 0x10350 && codePoint <= 0x10375)
+ if (codePoint >= 0x10350 && codePoint <= 0x10375) // OLD PERMIC LETTER AN .. OLD PERMIC LETTER IA
return true;
- if (codePoint >= 0x10380 && codePoint <= 0x1039d)
+ if (codePoint >= 0x10380 && codePoint <= 0x1039D) // UGARITIC LETTER ALPA .. UGARITIC LETTER SSU
return true;
- if (codePoint >= 0x103a0 && codePoint <= 0x103c3)
+ if (codePoint >= 0x103A0 && codePoint <= 0x103C3) // OLD PERSIAN SIGN A .. OLD PERSIAN SIGN HA
return true;
- if (codePoint >= 0x103c8 && codePoint <= 0x103cf)
+ if (codePoint >= 0x103C8 && codePoint <= 0x103CF) // OLD PERSIAN SIGN AURAMAZDAA .. OLD PERSIAN SIGN BUUMISH
return true;
- if (codePoint >= 0x103d1 && codePoint <= 0x103d5)
+ if (codePoint >= 0x103D1 && codePoint <= 0x103D5) // OLD PERSIAN NUMBER ONE .. OLD PERSIAN NUMBER HUNDRED
return true;
- if (codePoint >= 0x10400 && codePoint <= 0x1049d)
+ if (codePoint >= 0x10400 && codePoint <= 0x1049D) // DESERET CAPITAL LETTER LONG I .. OSMANYA LETTER OO
return true;
- if (codePoint >= 0x104b0 && codePoint <= 0x104d3)
+ if (codePoint >= 0x104B0 && codePoint <= 0x104D3) // OSAGE CAPITAL LETTER A .. OSAGE CAPITAL LETTER ZHA
return true;
- if (codePoint >= 0x104d8 && codePoint <= 0x104fb)
+ if (codePoint >= 0x104D8 && codePoint <= 0x104FB) // OSAGE SMALL LETTER A .. OSAGE SMALL LETTER ZHA
return true;
- if (codePoint >= 0x10500 && codePoint <= 0x10527)
+ if (codePoint >= 0x10500 && codePoint <= 0x10527) // ELBASAN LETTER A .. ELBASAN LETTER KHE
return true;
- if (codePoint >= 0x10530 && codePoint <= 0x10563)
+ if (codePoint >= 0x10530 && codePoint <= 0x10563) // CAUCASIAN ALBANIAN LETTER ALT .. CAUCASIAN ALBANIAN LETTER KIW
return true;
- if (codePoint >= 0x10600 && codePoint <= 0x10736)
+ if (codePoint >= 0x10600 && codePoint <= 0x10736) // LINEAR A SIGN AB001 .. LINEAR A SIGN A664
return true;
- if (codePoint >= 0x10740 && codePoint <= 0x10755)
+ if (codePoint >= 0x10740 && codePoint <= 0x10755) // LINEAR A SIGN A701 A .. LINEAR A SIGN A732 JE
return true;
- if (codePoint >= 0x10760 && codePoint <= 0x10767)
+ if (codePoint >= 0x10760 && codePoint <= 0x10767) // LINEAR A SIGN A800 .. LINEAR A SIGN A807
return true;
- if (codePoint >= 0x10800 && codePoint <= 0x10805)
+ if (codePoint >= 0x10800 && codePoint <= 0x10805) // CYPRIOT SYLLABLE A .. CYPRIOT SYLLABLE JA
return true;
- if (codePoint >= 0x10808 && codePoint <= 0x10808)
+ if (codePoint >= 0x10808 && codePoint <= 0x10808) // CYPRIOT SYLLABLE JO .. CYPRIOT SYLLABLE JO
return true;
- if (codePoint >= 0x1080a && codePoint <= 0x10835)
+ if (codePoint >= 0x1080A && codePoint <= 0x10835) // CYPRIOT SYLLABLE KA .. CYPRIOT SYLLABLE WO
return true;
- if (codePoint >= 0x10837 && codePoint <= 0x10838)
+ if (codePoint >= 0x10837 && codePoint <= 0x10838) // CYPRIOT SYLLABLE XA .. CYPRIOT SYLLABLE XE
return true;
- if (codePoint >= 0x1083c && codePoint <= 0x1083c)
+ if (codePoint >= 0x1083C && codePoint <= 0x1083C) // CYPRIOT SYLLABLE ZA .. CYPRIOT SYLLABLE ZA
return true;
- if (codePoint >= 0x1083f && codePoint <= 0x10855)
+ if (codePoint >= 0x1083F && codePoint <= 0x10855) // CYPRIOT SYLLABLE ZO .. IMPERIAL ARAMAIC LETTER TAW
return true;
- if (codePoint >= 0x10860 && codePoint <= 0x10876)
+ if (codePoint >= 0x10860 && codePoint <= 0x10876) // PALMYRENE LETTER ALEPH .. PALMYRENE LETTER TAW
return true;
- if (codePoint >= 0x10880 && codePoint <= 0x1089e)
+ if (codePoint >= 0x10880 && codePoint <= 0x1089E) // NABATAEAN LETTER FINAL ALEPH .. NABATAEAN LETTER TAW
return true;
- if (codePoint >= 0x108e0 && codePoint <= 0x108f2)
+ if (codePoint >= 0x108E0 && codePoint <= 0x108F2) // HATRAN LETTER ALEPH .. HATRAN LETTER QOPH
return true;
- if (codePoint >= 0x108f4 && codePoint <= 0x108f5)
+ if (codePoint >= 0x108F4 && codePoint <= 0x108F5) // HATRAN LETTER SHIN .. HATRAN LETTER TAW
return true;
- if (codePoint >= 0x10900 && codePoint <= 0x10915)
+ if (codePoint >= 0x10900 && codePoint <= 0x10915) // PHOENICIAN LETTER ALF .. PHOENICIAN LETTER TAU
return true;
- if (codePoint >= 0x10920 && codePoint <= 0x10939)
+ if (codePoint >= 0x10920 && codePoint <= 0x10939) // LYDIAN LETTER A .. LYDIAN LETTER C
return true;
- if (codePoint >= 0x10980 && codePoint <= 0x109b7)
+ if (codePoint >= 0x10980 && codePoint <= 0x109B7) // MEROITIC HIEROGLYPHIC LETTER A .. MEROITIC CURSIVE LETTER DA
return true;
- if (codePoint >= 0x109be && codePoint <= 0x109bf)
+ if (codePoint >= 0x109BE && codePoint <= 0x109BF) // MEROITIC CURSIVE LOGOGRAM RMT .. MEROITIC CURSIVE LOGOGRAM IMN
return true;
- if (codePoint >= 0x10a00 && codePoint <= 0x10a00)
+ if (codePoint >= 0x10A00 && codePoint <= 0x10A00) // KHAROSHTHI LETTER A .. KHAROSHTHI LETTER A
return true;
- if (codePoint >= 0x10a10 && codePoint <= 0x10a13)
+ if (codePoint >= 0x10A10 && codePoint <= 0x10A13) // KHAROSHTHI LETTER KA .. KHAROSHTHI LETTER GHA
return true;
- if (codePoint >= 0x10a15 && codePoint <= 0x10a17)
+ if (codePoint >= 0x10A15 && codePoint <= 0x10A17) // KHAROSHTHI LETTER CA .. KHAROSHTHI LETTER JA
return true;
- if (codePoint >= 0x10a19 && codePoint <= 0x10a35)
+ if (codePoint >= 0x10A19 && codePoint <= 0x10A35) // KHAROSHTHI LETTER NYA .. KHAROSHTHI LETTER VHA
return true;
- if (codePoint >= 0x10a60 && codePoint <= 0x10a7c)
+ if (codePoint >= 0x10A60 && codePoint <= 0x10A7C) // OLD SOUTH ARABIAN LETTER HE .. OLD SOUTH ARABIAN LETTER THETH
return true;
- if (codePoint >= 0x10a80 && codePoint <= 0x10a9c)
+ if (codePoint >= 0x10A80 && codePoint <= 0x10A9C) // OLD NORTH ARABIAN LETTER HEH .. OLD NORTH ARABIAN LETTER ZAH
return true;
- if (codePoint >= 0x10ac0 && codePoint <= 0x10ac7)
+ if (codePoint >= 0x10AC0 && codePoint <= 0x10AC7) // MANICHAEAN LETTER ALEPH .. MANICHAEAN LETTER WAW
return true;
- if (codePoint >= 0x10ac9 && codePoint <= 0x10ae4)
+ if (codePoint >= 0x10AC9 && codePoint <= 0x10AE4) // MANICHAEAN LETTER ZAYIN .. MANICHAEAN LETTER TAW
return true;
- if (codePoint >= 0x10b00 && codePoint <= 0x10b35)
+ if (codePoint >= 0x10B00 && codePoint <= 0x10B35) // AVESTAN LETTER A .. AVESTAN LETTER HE
return true;
- if (codePoint >= 0x10b40 && codePoint <= 0x10b55)
+ if (codePoint >= 0x10B40 && codePoint <= 0x10B55) // INSCRIPTIONAL PARTHIAN LETTER ALEPH .. INSCRIPTIONAL PARTHIAN LETTER TAW
return true;
- if (codePoint >= 0x10b60 && codePoint <= 0x10b72)
+ if (codePoint >= 0x10B60 && codePoint <= 0x10B72) // INSCRIPTIONAL PAHLAVI LETTER ALEPH .. INSCRIPTIONAL PAHLAVI LETTER TAW
return true;
- if (codePoint >= 0x10b80 && codePoint <= 0x10b91)
+ if (codePoint >= 0x10B80 && codePoint <= 0x10B91) // PSALTER PAHLAVI LETTER ALEPH .. PSALTER PAHLAVI LETTER TAW
return true;
- if (codePoint >= 0x10c00 && codePoint <= 0x10c48)
+ if (codePoint >= 0x10C00 && codePoint <= 0x10C48) // OLD TURKIC LETTER ORKHON A .. OLD TURKIC LETTER ORKHON BASH
return true;
- if (codePoint >= 0x10c80 && codePoint <= 0x10cb2)
+ if (codePoint >= 0x10C80 && codePoint <= 0x10CB2) // OLD HUNGARIAN CAPITAL LETTER A .. OLD HUNGARIAN CAPITAL LETTER US
return true;
- if (codePoint >= 0x10cc0 && codePoint <= 0x10cf2)
+ if (codePoint >= 0x10CC0 && codePoint <= 0x10CF2) // OLD HUNGARIAN SMALL LETTER A .. OLD HUNGARIAN SMALL LETTER US
return true;
- if (codePoint >= 0x10d00 && codePoint <= 0x10d23)
+ if (codePoint >= 0x10D00 && codePoint <= 0x10D23) // HANIFI ROHINGYA LETTER A .. HANIFI ROHINGYA MARK NA KHONNA
return true;
- if (codePoint >= 0x10f00 && codePoint <= 0x10f1c)
+ if (codePoint >= 0x10F00 && codePoint <= 0x10F1C) // OLD SOGDIAN LETTER ALEPH .. OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
return true;
- if (codePoint >= 0x10f27 && codePoint <= 0x10f27)
+ if (codePoint >= 0x10F27 && codePoint <= 0x10F27) // OLD SOGDIAN LIGATURE AYIN-DALETH .. OLD SOGDIAN LIGATURE AYIN-DALETH
return true;
- if (codePoint >= 0x10f30 && codePoint <= 0x10f45)
+ if (codePoint >= 0x10F30 && codePoint <= 0x10F45) // SOGDIAN LETTER ALEPH .. SOGDIAN INDEPENDENT SHIN
return true;
- if (codePoint >= 0x11003 && codePoint <= 0x11037)
+ if (codePoint >= 0x11003 && codePoint <= 0x11037) // BRAHMI SIGN JIHVAMULIYA .. BRAHMI LETTER OLD TAMIL NNNA
return true;
- if (codePoint >= 0x11083 && codePoint <= 0x110af)
+ if (codePoint >= 0x11083 && codePoint <= 0x110AF) // KAITHI LETTER A .. KAITHI LETTER HA
return true;
- if (codePoint >= 0x110d0 && codePoint <= 0x110e8)
+ if (codePoint >= 0x110D0 && codePoint <= 0x110E8) // SORA SOMPENG LETTER SAH .. SORA SOMPENG LETTER MAE
return true;
- if (codePoint >= 0x11103 && codePoint <= 0x11126)
+ if (codePoint >= 0x11103 && codePoint <= 0x11126) // CHAKMA LETTER AA .. CHAKMA LETTER HAA
return true;
- if (codePoint >= 0x11144 && codePoint <= 0x11144)
+ if (codePoint >= 0x11144 && codePoint <= 0x11144) // CHAKMA LETTER LHAA .. CHAKMA LETTER LHAA
return true;
- if (codePoint >= 0x11150 && codePoint <= 0x11172)
+ if (codePoint >= 0x11150 && codePoint <= 0x11172) // MAHAJANI LETTER A .. MAHAJANI LETTER RRA
return true;
- if (codePoint >= 0x11176 && codePoint <= 0x11176)
+ if (codePoint >= 0x11176 && codePoint <= 0x11176) // MAHAJANI LIGATURE SHRI .. MAHAJANI LIGATURE SHRI
return true;
- if (codePoint >= 0x11183 && codePoint <= 0x111b2)
+ if (codePoint >= 0x11183 && codePoint <= 0x111B2) // SHARADA LETTER A .. SHARADA LETTER HA
return true;
- if (codePoint >= 0x111c1 && codePoint <= 0x111c4)
+ if (codePoint >= 0x111C1 && codePoint <= 0x111C4) // SHARADA SIGN AVAGRAHA .. SHARADA OM
return true;
- if (codePoint >= 0x111da && codePoint <= 0x111da)
+ if (codePoint >= 0x111DA && codePoint <= 0x111DA) // SHARADA EKAM .. SHARADA EKAM
return true;
- if (codePoint >= 0x111dc && codePoint <= 0x111dc)
+ if (codePoint >= 0x111DC && codePoint <= 0x111DC) // SHARADA HEADSTROKE .. SHARADA HEADSTROKE
return true;
- if (codePoint >= 0x11200 && codePoint <= 0x11211)
+ if (codePoint >= 0x11200 && codePoint <= 0x11211) // KHOJKI LETTER A .. KHOJKI LETTER JJA
return true;
- if (codePoint >= 0x11213 && codePoint <= 0x1122b)
+ if (codePoint >= 0x11213 && codePoint <= 0x1122B) // KHOJKI LETTER NYA .. KHOJKI LETTER LLA
return true;
- if (codePoint >= 0x11280 && codePoint <= 0x11286)
+ if (codePoint >= 0x11280 && codePoint <= 0x11286) // MULTANI LETTER A .. MULTANI LETTER GA
return true;
- if (codePoint >= 0x11288 && codePoint <= 0x11288)
+ if (codePoint >= 0x11288 && codePoint <= 0x11288) // MULTANI LETTER GHA .. MULTANI LETTER GHA
return true;
- if (codePoint >= 0x1128a && codePoint <= 0x1128d)
+ if (codePoint >= 0x1128A && codePoint <= 0x1128D) // MULTANI LETTER CA .. MULTANI LETTER JJA
return true;
- if (codePoint >= 0x1128f && codePoint <= 0x1129d)
+ if (codePoint >= 0x1128F && codePoint <= 0x1129D) // MULTANI LETTER NYA .. MULTANI LETTER BA
return true;
- if (codePoint >= 0x1129f && codePoint <= 0x112a8)
+ if (codePoint >= 0x1129F && codePoint <= 0x112A8) // MULTANI LETTER BHA .. MULTANI LETTER RHA
return true;
- if (codePoint >= 0x112b0 && codePoint <= 0x112de)
+ if (codePoint >= 0x112B0 && codePoint <= 0x112DE) // KHUDAWADI LETTER A .. KHUDAWADI LETTER HA
return true;
- if (codePoint >= 0x11305 && codePoint <= 0x1130c)
+ if (codePoint >= 0x11305 && codePoint <= 0x1130C) // GRANTHA LETTER A .. GRANTHA LETTER VOCALIC L
return true;
- if (codePoint >= 0x1130f && codePoint <= 0x11310)
+ if (codePoint >= 0x1130F && codePoint <= 0x11310) // GRANTHA LETTER EE .. GRANTHA LETTER AI
return true;
- if (codePoint >= 0x11313 && codePoint <= 0x11328)
+ if (codePoint >= 0x11313 && codePoint <= 0x11328) // GRANTHA LETTER OO .. GRANTHA LETTER NA
return true;
- if (codePoint >= 0x1132a && codePoint <= 0x11330)
+ if (codePoint >= 0x1132A && codePoint <= 0x11330) // GRANTHA LETTER PA .. GRANTHA LETTER RA
return true;
- if (codePoint >= 0x11332 && codePoint <= 0x11333)
+ if (codePoint >= 0x11332 && codePoint <= 0x11333) // GRANTHA LETTER LA .. GRANTHA LETTER LLA
return true;
- if (codePoint >= 0x11335 && codePoint <= 0x11339)
+ if (codePoint >= 0x11335 && codePoint <= 0x11339) // GRANTHA LETTER VA .. GRANTHA LETTER HA
return true;
- if (codePoint >= 0x1133d && codePoint <= 0x1133d)
+ if (codePoint >= 0x1133D && codePoint <= 0x1133D) // GRANTHA SIGN AVAGRAHA .. GRANTHA SIGN AVAGRAHA
return true;
- if (codePoint >= 0x11350 && codePoint <= 0x11350)
+ if (codePoint >= 0x11350 && codePoint <= 0x11350) // GRANTHA OM .. GRANTHA OM
return true;
- if (codePoint >= 0x1135d && codePoint <= 0x11361)
+ if (codePoint >= 0x1135D && codePoint <= 0x11361) // GRANTHA SIGN PLUTA .. GRANTHA LETTER VOCALIC LL
return true;
- if (codePoint >= 0x11400 && codePoint <= 0x11434)
+ if (codePoint >= 0x11400 && codePoint <= 0x11434) // NEWA LETTER A .. NEWA LETTER HA
return true;
- if (codePoint >= 0x11447 && codePoint <= 0x1144a)
+ if (codePoint >= 0x11447 && codePoint <= 0x1144A) // NEWA SIGN AVAGRAHA .. NEWA SIDDHI
return true;
- if (codePoint >= 0x11480 && codePoint <= 0x114af)
+ if (codePoint >= 0x11480 && codePoint <= 0x114AF) // TIRHUTA ANJI .. TIRHUTA LETTER HA
return true;
- if (codePoint >= 0x114c4 && codePoint <= 0x114c5)
+ if (codePoint >= 0x114C4 && codePoint <= 0x114C5) // TIRHUTA SIGN AVAGRAHA .. TIRHUTA GVANG
return true;
- if (codePoint >= 0x114c7 && codePoint <= 0x114c7)
+ if (codePoint >= 0x114C7 && codePoint <= 0x114C7) // TIRHUTA OM .. TIRHUTA OM
return true;
- if (codePoint >= 0x11580 && codePoint <= 0x115ae)
+ if (codePoint >= 0x11580 && codePoint <= 0x115AE) // SIDDHAM LETTER A .. SIDDHAM LETTER HA
return true;
- if (codePoint >= 0x115d8 && codePoint <= 0x115db)
+ if (codePoint >= 0x115D8 && codePoint <= 0x115DB) // SIDDHAM LETTER THREE-CIRCLE ALTERNATE I .. SIDDHAM LETTER ALTERNATE U
return true;
- if (codePoint >= 0x11600 && codePoint <= 0x1162f)
+ if (codePoint >= 0x11600 && codePoint <= 0x1162F) // MODI LETTER A .. MODI LETTER LLA
return true;
- if (codePoint >= 0x11644 && codePoint <= 0x11644)
+ if (codePoint >= 0x11644 && codePoint <= 0x11644) // MODI SIGN HUVA .. MODI SIGN HUVA
return true;
- if (codePoint >= 0x11680 && codePoint <= 0x116aa)
+ if (codePoint >= 0x11680 && codePoint <= 0x116AA) // TAKRI LETTER A .. TAKRI LETTER RRA
return true;
- if (codePoint >= 0x11700 && codePoint <= 0x1171a)
+ if (codePoint >= 0x11700 && codePoint <= 0x1171A) // AHOM LETTER KA .. AHOM LETTER ALTERNATE BA
return true;
- if (codePoint >= 0x11800 && codePoint <= 0x1182b)
+ if (codePoint >= 0x11800 && codePoint <= 0x1182B) // DOGRA LETTER A .. DOGRA LETTER RRA
return true;
- if (codePoint >= 0x118a0 && codePoint <= 0x118df)
+ if (codePoint >= 0x118A0 && codePoint <= 0x118DF) // WARANG CITI CAPITAL LETTER NGAA .. WARANG CITI SMALL LETTER VIYO
return true;
- if (codePoint >= 0x118ff && codePoint <= 0x118ff)
+ if (codePoint >= 0x118FF && codePoint <= 0x118FF) // WARANG CITI OM .. WARANG CITI OM
return true;
- if (codePoint >= 0x11a00 && codePoint <= 0x11a00)
+ if (codePoint >= 0x11A00 && codePoint <= 0x11A00) // ZANABAZAR SQUARE LETTER A .. ZANABAZAR SQUARE LETTER A
return true;
- if (codePoint >= 0x11a0b && codePoint <= 0x11a32)
+ if (codePoint >= 0x11A0B && codePoint <= 0x11A32) // ZANABAZAR SQUARE LETTER KA .. ZANABAZAR SQUARE LETTER KSSA
return true;
- if (codePoint >= 0x11a3a && codePoint <= 0x11a3a)
+ if (codePoint >= 0x11A3A && codePoint <= 0x11A3A) // ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA .. ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
return true;
- if (codePoint >= 0x11a50 && codePoint <= 0x11a50)
+ if (codePoint >= 0x11A50 && codePoint <= 0x11A50) // SOYOMBO LETTER A .. SOYOMBO LETTER A
return true;
- if (codePoint >= 0x11a5c && codePoint <= 0x11a83)
+ if (codePoint >= 0x11A5C && codePoint <= 0x11A83) // SOYOMBO LETTER KA .. SOYOMBO LETTER KSSA
return true;
- if (codePoint >= 0x11a86 && codePoint <= 0x11a89)
+ if (codePoint >= 0x11A86 && codePoint <= 0x11A89) // SOYOMBO CLUSTER-INITIAL LETTER RA .. SOYOMBO CLUSTER-INITIAL LETTER SA
return true;
- if (codePoint >= 0x11a9d && codePoint <= 0x11a9d)
+ if (codePoint >= 0x11A9D && codePoint <= 0x11A9D) // SOYOMBO MARK PLUTA .. SOYOMBO MARK PLUTA
return true;
- if (codePoint >= 0x11ac0 && codePoint <= 0x11af8)
+ if (codePoint >= 0x11AC0 && codePoint <= 0x11AF8) // PAU CIN HAU LETTER PA .. PAU CIN HAU GLOTTAL STOP FINAL
return true;
- if (codePoint >= 0x11c00 && codePoint <= 0x11c08)
+ if (codePoint >= 0x11C00 && codePoint <= 0x11C08) // BHAIKSUKI LETTER A .. BHAIKSUKI LETTER VOCALIC L
return true;
- if (codePoint >= 0x11c0a && codePoint <= 0x11c2e)
+ if (codePoint >= 0x11C0A && codePoint <= 0x11C2E) // BHAIKSUKI LETTER E .. BHAIKSUKI LETTER HA
return true;
- if (codePoint >= 0x11c40 && codePoint <= 0x11c40)
+ if (codePoint >= 0x11C40 && codePoint <= 0x11C40) // BHAIKSUKI SIGN AVAGRAHA .. BHAIKSUKI SIGN AVAGRAHA
return true;
- if (codePoint >= 0x11c72 && codePoint <= 0x11c8f)
+ if (codePoint >= 0x11C72 && codePoint <= 0x11C8F) // MARCHEN LETTER KA .. MARCHEN LETTER A
return true;
- if (codePoint >= 0x11d00 && codePoint <= 0x11d06)
+ if (codePoint >= 0x11D00 && codePoint <= 0x11D06) // MASARAM GONDI LETTER A .. MASARAM GONDI LETTER E
return true;
- if (codePoint >= 0x11d08 && codePoint <= 0x11d09)
+ if (codePoint >= 0x11D08 && codePoint <= 0x11D09) // MASARAM GONDI LETTER AI .. MASARAM GONDI LETTER O
return true;
- if (codePoint >= 0x11d0b && codePoint <= 0x11d30)
+ if (codePoint >= 0x11D0B && codePoint <= 0x11D30) // MASARAM GONDI LETTER AU .. MASARAM GONDI LETTER TRA
return true;
- if (codePoint >= 0x11d46 && codePoint <= 0x11d46)
+ if (codePoint >= 0x11D46 && codePoint <= 0x11D46) // MASARAM GONDI REPHA .. MASARAM GONDI REPHA
return true;
- if (codePoint >= 0x11d60 && codePoint <= 0x11d65)
+ if (codePoint >= 0x11D60 && codePoint <= 0x11D65) // GUNJALA GONDI LETTER A .. GUNJALA GONDI LETTER UU
return true;
- if (codePoint >= 0x11d67 && codePoint <= 0x11d68)
+ if (codePoint >= 0x11D67 && codePoint <= 0x11D68) // GUNJALA GONDI LETTER EE .. GUNJALA GONDI LETTER AI
return true;
- if (codePoint >= 0x11d6a && codePoint <= 0x11d89)
+ if (codePoint >= 0x11D6A && codePoint <= 0x11D89) // GUNJALA GONDI LETTER OO .. GUNJALA GONDI LETTER SA
return true;
- if (codePoint >= 0x11d98 && codePoint <= 0x11d98)
+ if (codePoint >= 0x11D98 && codePoint <= 0x11D98) // GUNJALA GONDI OM .. GUNJALA GONDI OM
return true;
- if (codePoint >= 0x11ee0 && codePoint <= 0x11ef2)
+ if (codePoint >= 0x11EE0 && codePoint <= 0x11EF2) // MAKASAR LETTER KA .. MAKASAR ANGKA
return true;
- if (codePoint >= 0x12000 && codePoint <= 0x12399)
+ if (codePoint >= 0x12000 && codePoint <= 0x12399) // CUNEIFORM SIGN A .. CUNEIFORM SIGN U U
return true;
- if (codePoint >= 0x12400 && codePoint <= 0x1246e)
+ if (codePoint >= 0x12400 && codePoint <= 0x1246E) // CUNEIFORM NUMERIC SIGN TWO ASH .. CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
return true;
- if (codePoint >= 0x12480 && codePoint <= 0x12543)
+ if (codePoint >= 0x12480 && codePoint <= 0x12543) // CUNEIFORM SIGN AB TIMES NUN TENU .. CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
return true;
- if (codePoint >= 0x13000 && codePoint <= 0x1342e)
+ if (codePoint >= 0x13000 && codePoint <= 0x1342E) // EGYPTIAN HIEROGLYPH A001 .. EGYPTIAN HIEROGLYPH AA032
return true;
- if (codePoint >= 0x14400 && codePoint <= 0x14646)
+ if (codePoint >= 0x14400 && codePoint <= 0x14646) // ANATOLIAN HIEROGLYPH A001 .. ANATOLIAN HIEROGLYPH A530
return true;
- if (codePoint >= 0x16800 && codePoint <= 0x16a38)
+ if (codePoint >= 0x16800 && codePoint <= 0x16A38) // BAMUM LETTER PHASE-A NGKUE MFON .. BAMUM LETTER PHASE-F VUEQ
return true;
- if (codePoint >= 0x16a40 && codePoint <= 0x16a5e)
+ if (codePoint >= 0x16A40 && codePoint <= 0x16A5E) // MRO LETTER TA .. MRO LETTER TEK
return true;
- if (codePoint >= 0x16ad0 && codePoint <= 0x16aed)
+ if (codePoint >= 0x16AD0 && codePoint <= 0x16AED) // BASSA VAH LETTER ENNI .. BASSA VAH LETTER I
return true;
- if (codePoint >= 0x16b00 && codePoint <= 0x16b2f)
+ if (codePoint >= 0x16B00 && codePoint <= 0x16B2F) // PAHAWH HMONG VOWEL KEEB .. PAHAWH HMONG CONSONANT CAU
return true;
- if (codePoint >= 0x16b40 && codePoint <= 0x16b43)
+ if (codePoint >= 0x16B40 && codePoint <= 0x16B43) // PAHAWH HMONG SIGN VOS SEEV .. PAHAWH HMONG SIGN IB YAM
return true;
- if (codePoint >= 0x16b63 && codePoint <= 0x16b77)
+ if (codePoint >= 0x16B63 && codePoint <= 0x16B77) // PAHAWH HMONG SIGN VOS LUB .. PAHAWH HMONG SIGN CIM NRES TOS
return true;
- if (codePoint >= 0x16b7d && codePoint <= 0x16b8f)
+ if (codePoint >= 0x16B7D && codePoint <= 0x16B8F) // PAHAWH HMONG CLAN SIGN TSHEEJ .. PAHAWH HMONG CLAN SIGN VWJ
return true;
- if (codePoint >= 0x16e40 && codePoint <= 0x16e7f)
+ if (codePoint >= 0x16E40 && codePoint <= 0x16E7F) // MEDEFAIDRIN CAPITAL LETTER M .. MEDEFAIDRIN SMALL LETTER Y
return true;
- if (codePoint >= 0x16f00 && codePoint <= 0x16f44)
+ if (codePoint >= 0x16F00 && codePoint <= 0x16F44) // MIAO LETTER PA .. MIAO LETTER HHA
return true;
- if (codePoint >= 0x16f50 && codePoint <= 0x16f50)
+ if (codePoint >= 0x16F50 && codePoint <= 0x16F50) // MIAO LETTER NASALIZATION .. MIAO LETTER NASALIZATION
return true;
- if (codePoint >= 0x16f93 && codePoint <= 0x16f9f)
+ if (codePoint >= 0x16F93 && codePoint <= 0x16F9F) // MIAO LETTER TONE-2 .. MIAO LETTER REFORMED TONE-8
return true;
- if (codePoint >= 0x16fe0 && codePoint <= 0x16fe1)
+ if (codePoint >= 0x16FE0 && codePoint <= 0x16FE1) // TANGUT ITERATION MARK .. NUSHU ITERATION MARK
return true;
- if (codePoint >= 0x17000 && codePoint <= 0x187f1)
+ if (codePoint >= 0x17000 && codePoint <= 0x187F1) // Tangut Ideograph .. Tangut Ideograph
return true;
- if (codePoint >= 0x18800 && codePoint <= 0x18af2)
+ if (codePoint >= 0x18800 && codePoint <= 0x18AF2) // TANGUT COMPONENT-001 .. TANGUT COMPONENT-755
return true;
- if (codePoint >= 0x1b000 && codePoint <= 0x1b11e)
+ if (codePoint >= 0x1B000 && codePoint <= 0x1B11E) // KATAKANA LETTER ARCHAIC E .. HENTAIGANA LETTER N-MU-MO-2
return true;
- if (codePoint >= 0x1b170 && codePoint <= 0x1b2fb)
+ if (codePoint >= 0x1B170 && codePoint <= 0x1B2FB) // NUSHU CHARACTER-1B170 .. NUSHU CHARACTER-1B2FB
return true;
- if (codePoint >= 0x1bc00 && codePoint <= 0x1bc6a)
+ if (codePoint >= 0x1BC00 && codePoint <= 0x1BC6A) // DUPLOYAN LETTER H .. DUPLOYAN LETTER VOCALIC M
return true;
- if (codePoint >= 0x1bc70 && codePoint <= 0x1bc7c)
+ if (codePoint >= 0x1BC70 && codePoint <= 0x1BC7C) // DUPLOYAN AFFIX LEFT HORIZONTAL SECANT .. DUPLOYAN AFFIX ATTACHED TANGENT HOOK
return true;
- if (codePoint >= 0x1bc80 && codePoint <= 0x1bc88)
+ if (codePoint >= 0x1BC80 && codePoint <= 0x1BC88) // DUPLOYAN AFFIX HIGH ACUTE .. DUPLOYAN AFFIX HIGH VERTICAL
return true;
- if (codePoint >= 0x1bc90 && codePoint <= 0x1bc99)
+ if (codePoint >= 0x1BC90 && codePoint <= 0x1BC99) // DUPLOYAN AFFIX LOW ACUTE .. DUPLOYAN AFFIX LOW ARROW
return true;
- if (codePoint >= 0x1d400 && codePoint <= 0x1d454)
+ if (codePoint >= 0x1D400 && codePoint <= 0x1D454) // MATHEMATICAL BOLD CAPITAL A .. MATHEMATICAL ITALIC SMALL G
return true;
- if (codePoint >= 0x1d456 && codePoint <= 0x1d49c)
+ if (codePoint >= 0x1D456 && codePoint <= 0x1D49C) // MATHEMATICAL ITALIC SMALL I .. MATHEMATICAL SCRIPT CAPITAL A
return true;
- if (codePoint >= 0x1d49e && codePoint <= 0x1d49f)
+ if (codePoint >= 0x1D49E && codePoint <= 0x1D49F) // MATHEMATICAL SCRIPT CAPITAL C .. MATHEMATICAL SCRIPT CAPITAL D
return true;
- if (codePoint >= 0x1d4a2 && codePoint <= 0x1d4a2)
+ if (codePoint >= 0x1D4A2 && codePoint <= 0x1D4A2) // MATHEMATICAL SCRIPT CAPITAL G .. MATHEMATICAL SCRIPT CAPITAL G
return true;
- if (codePoint >= 0x1d4a5 && codePoint <= 0x1d4a6)
+ if (codePoint >= 0x1D4A5 && codePoint <= 0x1D4A6) // MATHEMATICAL SCRIPT CAPITAL J .. MATHEMATICAL SCRIPT CAPITAL K
return true;
- if (codePoint >= 0x1d4a9 && codePoint <= 0x1d4ac)
+ if (codePoint >= 0x1D4A9 && codePoint <= 0x1D4AC) // MATHEMATICAL SCRIPT CAPITAL N .. MATHEMATICAL SCRIPT CAPITAL Q
return true;
- if (codePoint >= 0x1d4ae && codePoint <= 0x1d4b9)
+ if (codePoint >= 0x1D4AE && codePoint <= 0x1D4B9) // MATHEMATICAL SCRIPT CAPITAL S .. MATHEMATICAL SCRIPT SMALL D
return true;
- if (codePoint >= 0x1d4bb && codePoint <= 0x1d4bb)
+ if (codePoint >= 0x1D4BB && codePoint <= 0x1D4BB) // MATHEMATICAL SCRIPT SMALL F .. MATHEMATICAL SCRIPT SMALL F
return true;
- if (codePoint >= 0x1d4bd && codePoint <= 0x1d4c3)
+ if (codePoint >= 0x1D4BD && codePoint <= 0x1D4C3) // MATHEMATICAL SCRIPT SMALL H .. MATHEMATICAL SCRIPT SMALL N
return true;
- if (codePoint >= 0x1d4c5 && codePoint <= 0x1d505)
+ if (codePoint >= 0x1D4C5 && codePoint <= 0x1D505) // MATHEMATICAL SCRIPT SMALL P .. MATHEMATICAL FRAKTUR CAPITAL B
return true;
- if (codePoint >= 0x1d507 && codePoint <= 0x1d50a)
+ if (codePoint >= 0x1D507 && codePoint <= 0x1D50A) // MATHEMATICAL FRAKTUR CAPITAL D .. MATHEMATICAL FRAKTUR CAPITAL G
return true;
- if (codePoint >= 0x1d50d && codePoint <= 0x1d514)
+ if (codePoint >= 0x1D50D && codePoint <= 0x1D514) // MATHEMATICAL FRAKTUR CAPITAL J .. MATHEMATICAL FRAKTUR CAPITAL Q
return true;
- if (codePoint >= 0x1d516 && codePoint <= 0x1d51c)
+ if (codePoint >= 0x1D516 && codePoint <= 0x1D51C) // MATHEMATICAL FRAKTUR CAPITAL S .. MATHEMATICAL FRAKTUR CAPITAL Y
return true;
- if (codePoint >= 0x1d51e && codePoint <= 0x1d539)
+ if (codePoint >= 0x1D51E && codePoint <= 0x1D539) // MATHEMATICAL FRAKTUR SMALL A .. MATHEMATICAL DOUBLE-STRUCK CAPITAL B
return true;
- if (codePoint >= 0x1d53b && codePoint <= 0x1d53e)
+ if (codePoint >= 0x1D53B && codePoint <= 0x1D53E) // MATHEMATICAL DOUBLE-STRUCK CAPITAL D .. MATHEMATICAL DOUBLE-STRUCK CAPITAL G
return true;
- if (codePoint >= 0x1d540 && codePoint <= 0x1d544)
+ if (codePoint >= 0x1D540 && codePoint <= 0x1D544) // MATHEMATICAL DOUBLE-STRUCK CAPITAL I .. MATHEMATICAL DOUBLE-STRUCK CAPITAL M
return true;
- if (codePoint >= 0x1d546 && codePoint <= 0x1d546)
+ if (codePoint >= 0x1D546 && codePoint <= 0x1D546) // MATHEMATICAL DOUBLE-STRUCK CAPITAL O .. MATHEMATICAL DOUBLE-STRUCK CAPITAL O
return true;
- if (codePoint >= 0x1d54a && codePoint <= 0x1d550)
+ if (codePoint >= 0x1D54A && codePoint <= 0x1D550) // MATHEMATICAL DOUBLE-STRUCK CAPITAL S .. MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
return true;
- if (codePoint >= 0x1d552 && codePoint <= 0x1d6a5)
+ if (codePoint >= 0x1D552 && codePoint <= 0x1D6A5) // MATHEMATICAL DOUBLE-STRUCK SMALL A .. MATHEMATICAL ITALIC SMALL DOTLESS J
return true;
- if (codePoint >= 0x1d6a8 && codePoint <= 0x1d6c0)
+ if (codePoint >= 0x1D6A8 && codePoint <= 0x1D6C0) // MATHEMATICAL BOLD CAPITAL ALPHA .. MATHEMATICAL BOLD CAPITAL OMEGA
return true;
- if (codePoint >= 0x1d6c2 && codePoint <= 0x1d6da)
+ if (codePoint >= 0x1D6C2 && codePoint <= 0x1D6DA) // MATHEMATICAL BOLD SMALL ALPHA .. MATHEMATICAL BOLD SMALL OMEGA
return true;
- if (codePoint >= 0x1d6dc && codePoint <= 0x1d6fa)
+ if (codePoint >= 0x1D6DC && codePoint <= 0x1D6FA) // MATHEMATICAL BOLD EPSILON SYMBOL .. MATHEMATICAL ITALIC CAPITAL OMEGA
return true;
- if (codePoint >= 0x1d6fc && codePoint <= 0x1d714)
+ if (codePoint >= 0x1D6FC && codePoint <= 0x1D714) // MATHEMATICAL ITALIC SMALL ALPHA .. MATHEMATICAL ITALIC SMALL OMEGA
return true;
- if (codePoint >= 0x1d716 && codePoint <= 0x1d734)
+ if (codePoint >= 0x1D716 && codePoint <= 0x1D734) // MATHEMATICAL ITALIC EPSILON SYMBOL .. MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
return true;
- if (codePoint >= 0x1d736 && codePoint <= 0x1d74e)
+ if (codePoint >= 0x1D736 && codePoint <= 0x1D74E) // MATHEMATICAL BOLD ITALIC SMALL ALPHA .. MATHEMATICAL BOLD ITALIC SMALL OMEGA
return true;
- if (codePoint >= 0x1d750 && codePoint <= 0x1d76e)
+ if (codePoint >= 0x1D750 && codePoint <= 0x1D76E) // MATHEMATICAL BOLD ITALIC EPSILON SYMBOL .. MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
return true;
- if (codePoint >= 0x1d770 && codePoint <= 0x1d788)
+ if (codePoint >= 0x1D770 && codePoint <= 0x1D788) // MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA .. MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
return true;
- if (codePoint >= 0x1d78a && codePoint <= 0x1d7a8)
+ if (codePoint >= 0x1D78A && codePoint <= 0x1D7A8) // MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL .. MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
return true;
- if (codePoint >= 0x1d7aa && codePoint <= 0x1d7c2)
+ if (codePoint >= 0x1D7AA && codePoint <= 0x1D7C2) // MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA .. MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
return true;
- if (codePoint >= 0x1d7c4 && codePoint <= 0x1d7cb)
+ if (codePoint >= 0x1D7C4 && codePoint <= 0x1D7CB) // MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL .. MATHEMATICAL BOLD SMALL DIGAMMA
return true;
- if (codePoint >= 0x1e800 && codePoint <= 0x1e8c4)
+ if (codePoint >= 0x1E800 && codePoint <= 0x1E8C4) // MENDE KIKAKUI SYLLABLE M001 KI .. MENDE KIKAKUI SYLLABLE M060 NYON
return true;
- if (codePoint >= 0x1e900 && codePoint <= 0x1e943)
+ if (codePoint >= 0x1E900 && codePoint <= 0x1E943) // ADLAM CAPITAL LETTER ALIF .. ADLAM SMALL LETTER SHA
return true;
- if (codePoint >= 0x1ee00 && codePoint <= 0x1ee03)
+ if (codePoint >= 0x1EE00 && codePoint <= 0x1EE03) // ARABIC MATHEMATICAL ALEF .. ARABIC MATHEMATICAL DAL
return true;
- if (codePoint >= 0x1ee05 && codePoint <= 0x1ee1f)
+ if (codePoint >= 0x1EE05 && codePoint <= 0x1EE1F) // ARABIC MATHEMATICAL WAW .. ARABIC MATHEMATICAL DOTLESS QAF
return true;
- if (codePoint >= 0x1ee21 && codePoint <= 0x1ee22)
+ if (codePoint >= 0x1EE21 && codePoint <= 0x1EE22) // ARABIC MATHEMATICAL INITIAL BEH .. ARABIC MATHEMATICAL INITIAL JEEM
return true;
- if (codePoint >= 0x1ee24 && codePoint <= 0x1ee24)
+ if (codePoint >= 0x1EE24 && codePoint <= 0x1EE24) // ARABIC MATHEMATICAL INITIAL HEH .. ARABIC MATHEMATICAL INITIAL HEH
return true;
- if (codePoint >= 0x1ee27 && codePoint <= 0x1ee27)
+ if (codePoint >= 0x1EE27 && codePoint <= 0x1EE27) // ARABIC MATHEMATICAL INITIAL HAH .. ARABIC MATHEMATICAL INITIAL HAH
return true;
- if (codePoint >= 0x1ee29 && codePoint <= 0x1ee32)
+ if (codePoint >= 0x1EE29 && codePoint <= 0x1EE32) // ARABIC MATHEMATICAL INITIAL YEH .. ARABIC MATHEMATICAL INITIAL QAF
return true;
- if (codePoint >= 0x1ee34 && codePoint <= 0x1ee37)
+ if (codePoint >= 0x1EE34 && codePoint <= 0x1EE37) // ARABIC MATHEMATICAL INITIAL SHEEN .. ARABIC MATHEMATICAL INITIAL KHAH
return true;
- if (codePoint >= 0x1ee39 && codePoint <= 0x1ee39)
+ if (codePoint >= 0x1EE39 && codePoint <= 0x1EE39) // ARABIC MATHEMATICAL INITIAL DAD .. ARABIC MATHEMATICAL INITIAL DAD
return true;
- if (codePoint >= 0x1ee3b && codePoint <= 0x1ee3b)
+ if (codePoint >= 0x1EE3B && codePoint <= 0x1EE3B) // ARABIC MATHEMATICAL INITIAL GHAIN .. ARABIC MATHEMATICAL INITIAL GHAIN
return true;
- if (codePoint >= 0x1ee42 && codePoint <= 0x1ee42)
+ if (codePoint >= 0x1EE42 && codePoint <= 0x1EE42) // ARABIC MATHEMATICAL TAILED JEEM .. ARABIC MATHEMATICAL TAILED JEEM
return true;
- if (codePoint >= 0x1ee47 && codePoint <= 0x1ee47)
+ if (codePoint >= 0x1EE47 && codePoint <= 0x1EE47) // ARABIC MATHEMATICAL TAILED HAH .. ARABIC MATHEMATICAL TAILED HAH
return true;
- if (codePoint >= 0x1ee49 && codePoint <= 0x1ee49)
+ if (codePoint >= 0x1EE49 && codePoint <= 0x1EE49) // ARABIC MATHEMATICAL TAILED YEH .. ARABIC MATHEMATICAL TAILED YEH
return true;
- if (codePoint >= 0x1ee4b && codePoint <= 0x1ee4b)
+ if (codePoint >= 0x1EE4B && codePoint <= 0x1EE4B) // ARABIC MATHEMATICAL TAILED LAM .. ARABIC MATHEMATICAL TAILED LAM
return true;
- if (codePoint >= 0x1ee4d && codePoint <= 0x1ee4f)
+ if (codePoint >= 0x1EE4D && codePoint <= 0x1EE4F) // ARABIC MATHEMATICAL TAILED NOON .. ARABIC MATHEMATICAL TAILED AIN
return true;
- if (codePoint >= 0x1ee51 && codePoint <= 0x1ee52)
+ if (codePoint >= 0x1EE51 && codePoint <= 0x1EE52) // ARABIC MATHEMATICAL TAILED SAD .. ARABIC MATHEMATICAL TAILED QAF
return true;
- if (codePoint >= 0x1ee54 && codePoint <= 0x1ee54)
+ if (codePoint >= 0x1EE54 && codePoint <= 0x1EE54) // ARABIC MATHEMATICAL TAILED SHEEN .. ARABIC MATHEMATICAL TAILED SHEEN
return true;
- if (codePoint >= 0x1ee57 && codePoint <= 0x1ee57)
+ if (codePoint >= 0x1EE57 && codePoint <= 0x1EE57) // ARABIC MATHEMATICAL TAILED KHAH .. ARABIC MATHEMATICAL TAILED KHAH
return true;
- if (codePoint >= 0x1ee59 && codePoint <= 0x1ee59)
+ if (codePoint >= 0x1EE59 && codePoint <= 0x1EE59) // ARABIC MATHEMATICAL TAILED DAD .. ARABIC MATHEMATICAL TAILED DAD
return true;
- if (codePoint >= 0x1ee5b && codePoint <= 0x1ee5b)
+ if (codePoint >= 0x1EE5B && codePoint <= 0x1EE5B) // ARABIC MATHEMATICAL TAILED GHAIN .. ARABIC MATHEMATICAL TAILED GHAIN
return true;
- if (codePoint >= 0x1ee5d && codePoint <= 0x1ee5d)
+ if (codePoint >= 0x1EE5D && codePoint <= 0x1EE5D) // ARABIC MATHEMATICAL TAILED DOTLESS NOON .. ARABIC MATHEMATICAL TAILED DOTLESS NOON
return true;
- if (codePoint >= 0x1ee5f && codePoint <= 0x1ee5f)
+ if (codePoint >= 0x1EE5F && codePoint <= 0x1EE5F) // ARABIC MATHEMATICAL TAILED DOTLESS QAF .. ARABIC MATHEMATICAL TAILED DOTLESS QAF
return true;
- if (codePoint >= 0x1ee61 && codePoint <= 0x1ee62)
+ if (codePoint >= 0x1EE61 && codePoint <= 0x1EE62) // ARABIC MATHEMATICAL STRETCHED BEH .. ARABIC MATHEMATICAL STRETCHED JEEM
return true;
- if (codePoint >= 0x1ee64 && codePoint <= 0x1ee64)
+ if (codePoint >= 0x1EE64 && codePoint <= 0x1EE64) // ARABIC MATHEMATICAL STRETCHED HEH .. ARABIC MATHEMATICAL STRETCHED HEH
return true;
- if (codePoint >= 0x1ee67 && codePoint <= 0x1ee6a)
+ if (codePoint >= 0x1EE67 && codePoint <= 0x1EE6A) // ARABIC MATHEMATICAL STRETCHED HAH .. ARABIC MATHEMATICAL STRETCHED KAF
return true;
- if (codePoint >= 0x1ee6c && codePoint <= 0x1ee72)
+ if (codePoint >= 0x1EE6C && codePoint <= 0x1EE72) // ARABIC MATHEMATICAL STRETCHED MEEM .. ARABIC MATHEMATICAL STRETCHED QAF
return true;
- if (codePoint >= 0x1ee74 && codePoint <= 0x1ee77)
+ if (codePoint >= 0x1EE74 && codePoint <= 0x1EE77) // ARABIC MATHEMATICAL STRETCHED SHEEN .. ARABIC MATHEMATICAL STRETCHED KHAH
return true;
- if (codePoint >= 0x1ee79 && codePoint <= 0x1ee7c)
+ if (codePoint >= 0x1EE79 && codePoint <= 0x1EE7C) // ARABIC MATHEMATICAL STRETCHED DAD .. ARABIC MATHEMATICAL STRETCHED DOTLESS BEH
return true;
- if (codePoint >= 0x1ee7e && codePoint <= 0x1ee7e)
+ if (codePoint >= 0x1EE7E && codePoint <= 0x1EE7E) // ARABIC MATHEMATICAL STRETCHED DOTLESS FEH .. ARABIC MATHEMATICAL STRETCHED DOTLESS FEH
return true;
- if (codePoint >= 0x1ee80 && codePoint <= 0x1ee89)
+ if (codePoint >= 0x1EE80 && codePoint <= 0x1EE89) // ARABIC MATHEMATICAL LOOPED ALEF .. ARABIC MATHEMATICAL LOOPED YEH
return true;
- if (codePoint >= 0x1ee8b && codePoint <= 0x1ee9b)
+ if (codePoint >= 0x1EE8B && codePoint <= 0x1EE9B) // ARABIC MATHEMATICAL LOOPED LAM .. ARABIC MATHEMATICAL LOOPED GHAIN
return true;
- if (codePoint >= 0x1eea1 && codePoint <= 0x1eea3)
+ if (codePoint >= 0x1EEA1 && codePoint <= 0x1EEA3) // ARABIC MATHEMATICAL DOUBLE-STRUCK BEH .. ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
return true;
- if (codePoint >= 0x1eea5 && codePoint <= 0x1eea9)
+ if (codePoint >= 0x1EEA5 && codePoint <= 0x1EEA9) // ARABIC MATHEMATICAL DOUBLE-STRUCK WAW .. ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
return true;
- if (codePoint >= 0x1eeab && codePoint <= 0x1eebb)
+ if (codePoint >= 0x1EEAB && codePoint <= 0x1EEBB) // ARABIC MATHEMATICAL DOUBLE-STRUCK LAM .. ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
return true;
- if (codePoint >= 0x20000 && codePoint <= 0x2a6d6)
+ if (codePoint >= 0x20000 && codePoint <= 0x2A6D6) // CJK Ideograph Extension B .. CJK Ideograph Extension B
return true;
- if (codePoint >= 0x2a700 && codePoint <= 0x2b734)
+ if (codePoint >= 0x2A700 && codePoint <= 0x2B734) // CJK Ideograph Extension C .. CJK Ideograph Extension C
return true;
- if (codePoint >= 0x2b740 && codePoint <= 0x2b81d)
+ if (codePoint >= 0x2B740 && codePoint <= 0x2B81D) // CJK Ideograph Extension D .. CJK Ideograph Extension D
return true;
- if (codePoint >= 0x2b820 && codePoint <= 0x2cea1)
+ if (codePoint >= 0x2B820 && codePoint <= 0x2CEA1) // CJK Ideograph Extension E .. CJK Ideograph Extension E
return true;
- if (codePoint >= 0x2ceb0 && codePoint <= 0x2ebe0)
+ if (codePoint >= 0x2CEB0 && codePoint <= 0x2EBE0) // CJK Ideograph Extension F .. CJK Ideograph Extension F
return true;
- if (codePoint >= 0x2f800 && codePoint <= 0x2fa1d)
+ if (codePoint >= 0x2F800 && codePoint <= 0x2FA1D) // CJK COMPATIBILITY IDEOGRAPH-2F800 .. CJK COMPATIBILITY IDEOGRAPH-2FA1D
return true;
return false;
}
@@ -2196,549 +2196,1149 @@ js::unicode::IsIdentifierStartNonBMP(uint32_t codePoint)
bool
js::unicode::IsIdentifierPartNonBMP(uint32_t codePoint)
{
- if (codePoint >= 0x10000 && codePoint <= 0x1000b)
+ if (codePoint >= 0x10000 && codePoint <= 0x1000B) // LINEAR B SYLLABLE B008 A .. LINEAR B SYLLABLE B046 JE
return true;
- if (codePoint >= 0x1000d && codePoint <= 0x10026)
+ if (codePoint >= 0x1000D && codePoint <= 0x10026) // LINEAR B SYLLABLE B036 JO .. LINEAR B SYLLABLE B032 QO
return true;
- if (codePoint >= 0x10028 && codePoint <= 0x1003a)
+ if (codePoint >= 0x10028 && codePoint <= 0x1003A) // LINEAR B SYLLABLE B060 RA .. LINEAR B SYLLABLE B042 WO
return true;
- if (codePoint >= 0x1003c && codePoint <= 0x1003d)
+ if (codePoint >= 0x1003C && codePoint <= 0x1003D) // LINEAR B SYLLABLE B017 ZA .. LINEAR B SYLLABLE B074 ZE
return true;
- if (codePoint >= 0x1003f && codePoint <= 0x1004d)
+ if (codePoint >= 0x1003F && codePoint <= 0x1004D) // LINEAR B SYLLABLE B020 ZO .. LINEAR B SYLLABLE B091 TWO
return true;
- if (codePoint >= 0x10050 && codePoint <= 0x1005d)
+ if (codePoint >= 0x10050 && codePoint <= 0x1005D) // LINEAR B SYMBOL B018 .. LINEAR B SYMBOL B089
return true;
- if (codePoint >= 0x10080 && codePoint <= 0x100fa)
+ if (codePoint >= 0x10080 && codePoint <= 0x100FA) // LINEAR B IDEOGRAM B100 MAN .. LINEAR B IDEOGRAM VESSEL B305
return true;
- if (codePoint >= 0x10140 && codePoint <= 0x10174)
+ if (codePoint >= 0x10140 && codePoint <= 0x10174) // GREEK ACROPHONIC ATTIC ONE QUARTER .. GREEK ACROPHONIC STRATIAN FIFTY MNAS
return true;
- if (codePoint >= 0x101fd && codePoint <= 0x101fd)
+ if (codePoint >= 0x101FD && codePoint <= 0x101FD) // PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE .. PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
return true;
- if (codePoint >= 0x10280 && codePoint <= 0x1029c)
+ if (codePoint >= 0x10280 && codePoint <= 0x1029C) // LYCIAN LETTER A .. LYCIAN LETTER X
return true;
- if (codePoint >= 0x102a0 && codePoint <= 0x102d0)
+ if (codePoint >= 0x102A0 && codePoint <= 0x102D0) // CARIAN LETTER A .. CARIAN LETTER UUU3
return true;
- if (codePoint >= 0x102e0 && codePoint <= 0x102e0)
+ if (codePoint >= 0x102E0 && codePoint <= 0x102E0) // COPTIC EPACT THOUSANDS MARK .. COPTIC EPACT THOUSANDS MARK
return true;
- if (codePoint >= 0x10300 && codePoint <= 0x1031f)
+ if (codePoint >= 0x10300 && codePoint <= 0x1031F) // OLD ITALIC LETTER A .. OLD ITALIC LETTER ESS
return true;
- if (codePoint >= 0x1032d && codePoint <= 0x1034a)
+ if (codePoint >= 0x1032D && codePoint <= 0x1034A) // OLD ITALIC LETTER YE .. GOTHIC LETTER NINE HUNDRED
return true;
- if (codePoint >= 0x10350 && codePoint <= 0x1037a)
+ if (codePoint >= 0x10350 && codePoint <= 0x1037A) // OLD PERMIC LETTER AN .. COMBINING OLD PERMIC LETTER SII
return true;
- if (codePoint >= 0x10380 && codePoint <= 0x1039d)
+ if (codePoint >= 0x10380 && codePoint <= 0x1039D) // UGARITIC LETTER ALPA .. UGARITIC LETTER SSU
return true;
- if (codePoint >= 0x103a0 && codePoint <= 0x103c3)
+ if (codePoint >= 0x103A0 && codePoint <= 0x103C3) // OLD PERSIAN SIGN A .. OLD PERSIAN SIGN HA
return true;
- if (codePoint >= 0x103c8 && codePoint <= 0x103cf)
+ if (codePoint >= 0x103C8 && codePoint <= 0x103CF) // OLD PERSIAN SIGN AURAMAZDAA .. OLD PERSIAN SIGN BUUMISH
return true;
- if (codePoint >= 0x103d1 && codePoint <= 0x103d5)
+ if (codePoint >= 0x103D1 && codePoint <= 0x103D5) // OLD PERSIAN NUMBER ONE .. OLD PERSIAN NUMBER HUNDRED
return true;
- if (codePoint >= 0x10400 && codePoint <= 0x1049d)
+ if (codePoint >= 0x10400 && codePoint <= 0x1049D) // DESERET CAPITAL LETTER LONG I .. OSMANYA LETTER OO
return true;
- if (codePoint >= 0x104a0 && codePoint <= 0x104a9)
+ if (codePoint >= 0x104A0 && codePoint <= 0x104A9) // OSMANYA DIGIT ZERO .. OSMANYA DIGIT NINE
return true;
- if (codePoint >= 0x104b0 && codePoint <= 0x104d3)
+ if (codePoint >= 0x104B0 && codePoint <= 0x104D3) // OSAGE CAPITAL LETTER A .. OSAGE CAPITAL LETTER ZHA
return true;
- if (codePoint >= 0x104d8 && codePoint <= 0x104fb)
+ if (codePoint >= 0x104D8 && codePoint <= 0x104FB) // OSAGE SMALL LETTER A .. OSAGE SMALL LETTER ZHA
return true;
- if (codePoint >= 0x10500 && codePoint <= 0x10527)
+ if (codePoint >= 0x10500 && codePoint <= 0x10527) // ELBASAN LETTER A .. ELBASAN LETTER KHE
return true;
- if (codePoint >= 0x10530 && codePoint <= 0x10563)
+ if (codePoint >= 0x10530 && codePoint <= 0x10563) // CAUCASIAN ALBANIAN LETTER ALT .. CAUCASIAN ALBANIAN LETTER KIW
return true;
- if (codePoint >= 0x10600 && codePoint <= 0x10736)
+ if (codePoint >= 0x10600 && codePoint <= 0x10736) // LINEAR A SIGN AB001 .. LINEAR A SIGN A664
return true;
- if (codePoint >= 0x10740 && codePoint <= 0x10755)
+ if (codePoint >= 0x10740 && codePoint <= 0x10755) // LINEAR A SIGN A701 A .. LINEAR A SIGN A732 JE
return true;
- if (codePoint >= 0x10760 && codePoint <= 0x10767)
+ if (codePoint >= 0x10760 && codePoint <= 0x10767) // LINEAR A SIGN A800 .. LINEAR A SIGN A807
return true;
- if (codePoint >= 0x10800 && codePoint <= 0x10805)
+ if (codePoint >= 0x10800 && codePoint <= 0x10805) // CYPRIOT SYLLABLE A .. CYPRIOT SYLLABLE JA
return true;
- if (codePoint >= 0x10808 && codePoint <= 0x10808)
+ if (codePoint >= 0x10808 && codePoint <= 0x10808) // CYPRIOT SYLLABLE JO .. CYPRIOT SYLLABLE JO
return true;
- if (codePoint >= 0x1080a && codePoint <= 0x10835)
+ if (codePoint >= 0x1080A && codePoint <= 0x10835) // CYPRIOT SYLLABLE KA .. CYPRIOT SYLLABLE WO
return true;
- if (codePoint >= 0x10837 && codePoint <= 0x10838)
+ if (codePoint >= 0x10837 && codePoint <= 0x10838) // CYPRIOT SYLLABLE XA .. CYPRIOT SYLLABLE XE
return true;
- if (codePoint >= 0x1083c && codePoint <= 0x1083c)
+ if (codePoint >= 0x1083C && codePoint <= 0x1083C) // CYPRIOT SYLLABLE ZA .. CYPRIOT SYLLABLE ZA
return true;
- if (codePoint >= 0x1083f && codePoint <= 0x10855)
+ if (codePoint >= 0x1083F && codePoint <= 0x10855) // CYPRIOT SYLLABLE ZO .. IMPERIAL ARAMAIC LETTER TAW
return true;
- if (codePoint >= 0x10860 && codePoint <= 0x10876)
+ if (codePoint >= 0x10860 && codePoint <= 0x10876) // PALMYRENE LETTER ALEPH .. PALMYRENE LETTER TAW
return true;
- if (codePoint >= 0x10880 && codePoint <= 0x1089e)
+ if (codePoint >= 0x10880 && codePoint <= 0x1089E) // NABATAEAN LETTER FINAL ALEPH .. NABATAEAN LETTER TAW
return true;
- if (codePoint >= 0x108e0 && codePoint <= 0x108f2)
+ if (codePoint >= 0x108E0 && codePoint <= 0x108F2) // HATRAN LETTER ALEPH .. HATRAN LETTER QOPH
return true;
- if (codePoint >= 0x108f4 && codePoint <= 0x108f5)
+ if (codePoint >= 0x108F4 && codePoint <= 0x108F5) // HATRAN LETTER SHIN .. HATRAN LETTER TAW
return true;
- if (codePoint >= 0x10900 && codePoint <= 0x10915)
+ if (codePoint >= 0x10900 && codePoint <= 0x10915) // PHOENICIAN LETTER ALF .. PHOENICIAN LETTER TAU
return true;
- if (codePoint >= 0x10920 && codePoint <= 0x10939)
+ if (codePoint >= 0x10920 && codePoint <= 0x10939) // LYDIAN LETTER A .. LYDIAN LETTER C
return true;
- if (codePoint >= 0x10980 && codePoint <= 0x109b7)
+ if (codePoint >= 0x10980 && codePoint <= 0x109B7) // MEROITIC HIEROGLYPHIC LETTER A .. MEROITIC CURSIVE LETTER DA
return true;
- if (codePoint >= 0x109be && codePoint <= 0x109bf)
+ if (codePoint >= 0x109BE && codePoint <= 0x109BF) // MEROITIC CURSIVE LOGOGRAM RMT .. MEROITIC CURSIVE LOGOGRAM IMN
return true;
- if (codePoint >= 0x10a00 && codePoint <= 0x10a03)
+ if (codePoint >= 0x10A00 && codePoint <= 0x10A03) // KHAROSHTHI LETTER A .. KHAROSHTHI VOWEL SIGN VOCALIC R
return true;
- if (codePoint >= 0x10a05 && codePoint <= 0x10a06)
+ if (codePoint >= 0x10A05 && codePoint <= 0x10A06) // KHAROSHTHI VOWEL SIGN E .. KHAROSHTHI VOWEL SIGN O
return true;
- if (codePoint >= 0x10a0c && codePoint <= 0x10a13)
+ if (codePoint >= 0x10A0C && codePoint <= 0x10A13) // KHAROSHTHI VOWEL LENGTH MARK .. KHAROSHTHI LETTER GHA
return true;
- if (codePoint >= 0x10a15 && codePoint <= 0x10a17)
+ if (codePoint >= 0x10A15 && codePoint <= 0x10A17) // KHAROSHTHI LETTER CA .. KHAROSHTHI LETTER JA
return true;
- if (codePoint >= 0x10a19 && codePoint <= 0x10a35)
+ if (codePoint >= 0x10A19 && codePoint <= 0x10A35) // KHAROSHTHI LETTER NYA .. KHAROSHTHI LETTER VHA
return true;
- if (codePoint >= 0x10a38 && codePoint <= 0x10a3a)
+ if (codePoint >= 0x10A38 && codePoint <= 0x10A3A) // KHAROSHTHI SIGN BAR ABOVE .. KHAROSHTHI SIGN DOT BELOW
return true;
- if (codePoint >= 0x10a3f && codePoint <= 0x10a3f)
+ if (codePoint >= 0x10A3F && codePoint <= 0x10A3F) // KHAROSHTHI VIRAMA .. KHAROSHTHI VIRAMA
return true;
- if (codePoint >= 0x10a60 && codePoint <= 0x10a7c)
+ if (codePoint >= 0x10A60 && codePoint <= 0x10A7C) // OLD SOUTH ARABIAN LETTER HE .. OLD SOUTH ARABIAN LETTER THETH
return true;
- if (codePoint >= 0x10a80 && codePoint <= 0x10a9c)
+ if (codePoint >= 0x10A80 && codePoint <= 0x10A9C) // OLD NORTH ARABIAN LETTER HEH .. OLD NORTH ARABIAN LETTER ZAH
return true;
- if (codePoint >= 0x10ac0 && codePoint <= 0x10ac7)
+ if (codePoint >= 0x10AC0 && codePoint <= 0x10AC7) // MANICHAEAN LETTER ALEPH .. MANICHAEAN LETTER WAW
return true;
- if (codePoint >= 0x10ac9 && codePoint <= 0x10ae6)
+ if (codePoint >= 0x10AC9 && codePoint <= 0x10AE6) // MANICHAEAN LETTER ZAYIN .. MANICHAEAN ABBREVIATION MARK BELOW
return true;
- if (codePoint >= 0x10b00 && codePoint <= 0x10b35)
+ if (codePoint >= 0x10B00 && codePoint <= 0x10B35) // AVESTAN LETTER A .. AVESTAN LETTER HE
return true;
- if (codePoint >= 0x10b40 && codePoint <= 0x10b55)
+ if (codePoint >= 0x10B40 && codePoint <= 0x10B55) // INSCRIPTIONAL PARTHIAN LETTER ALEPH .. INSCRIPTIONAL PARTHIAN LETTER TAW
return true;
- if (codePoint >= 0x10b60 && codePoint <= 0x10b72)
+ if (codePoint >= 0x10B60 && codePoint <= 0x10B72) // INSCRIPTIONAL PAHLAVI LETTER ALEPH .. INSCRIPTIONAL PAHLAVI LETTER TAW
return true;
- if (codePoint >= 0x10b80 && codePoint <= 0x10b91)
+ if (codePoint >= 0x10B80 && codePoint <= 0x10B91) // PSALTER PAHLAVI LETTER ALEPH .. PSALTER PAHLAVI LETTER TAW
return true;
- if (codePoint >= 0x10c00 && codePoint <= 0x10c48)
+ if (codePoint >= 0x10C00 && codePoint <= 0x10C48) // OLD TURKIC LETTER ORKHON A .. OLD TURKIC LETTER ORKHON BASH
return true;
- if (codePoint >= 0x10c80 && codePoint <= 0x10cb2)
+ if (codePoint >= 0x10C80 && codePoint <= 0x10CB2) // OLD HUNGARIAN CAPITAL LETTER A .. OLD HUNGARIAN CAPITAL LETTER US
return true;
- if (codePoint >= 0x10cc0 && codePoint <= 0x10cf2)
+ if (codePoint >= 0x10CC0 && codePoint <= 0x10CF2) // OLD HUNGARIAN SMALL LETTER A .. OLD HUNGARIAN SMALL LETTER US
return true;
- if (codePoint >= 0x10d00 && codePoint <= 0x10d27)
+ if (codePoint >= 0x10D00 && codePoint <= 0x10D27) // HANIFI ROHINGYA LETTER A .. HANIFI ROHINGYA SIGN TASSI
return true;
- if (codePoint >= 0x10d30 && codePoint <= 0x10d39)
+ if (codePoint >= 0x10D30 && codePoint <= 0x10D39) // HANIFI ROHINGYA DIGIT ZERO .. HANIFI ROHINGYA DIGIT NINE
return true;
- if (codePoint >= 0x10f00 && codePoint <= 0x10f1c)
+ if (codePoint >= 0x10F00 && codePoint <= 0x10F1C) // OLD SOGDIAN LETTER ALEPH .. OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
return true;
- if (codePoint >= 0x10f27 && codePoint <= 0x10f27)
+ if (codePoint >= 0x10F27 && codePoint <= 0x10F27) // OLD SOGDIAN LIGATURE AYIN-DALETH .. OLD SOGDIAN LIGATURE AYIN-DALETH
return true;
- if (codePoint >= 0x10f30 && codePoint <= 0x10f50)
+ if (codePoint >= 0x10F30 && codePoint <= 0x10F50) // SOGDIAN LETTER ALEPH .. SOGDIAN COMBINING STROKE BELOW
return true;
- if (codePoint >= 0x11000 && codePoint <= 0x11046)
+ if (codePoint >= 0x11000 && codePoint <= 0x11046) // BRAHMI SIGN CANDRABINDU .. BRAHMI VIRAMA
return true;
- if (codePoint >= 0x11066 && codePoint <= 0x1106f)
+ if (codePoint >= 0x11066 && codePoint <= 0x1106F) // BRAHMI DIGIT ZERO .. BRAHMI DIGIT NINE
return true;
- if (codePoint >= 0x1107f && codePoint <= 0x110ba)
+ if (codePoint >= 0x1107F && codePoint <= 0x110BA) // BRAHMI NUMBER JOINER .. KAITHI SIGN NUKTA
return true;
- if (codePoint >= 0x110d0 && codePoint <= 0x110e8)
+ if (codePoint >= 0x110D0 && codePoint <= 0x110E8) // SORA SOMPENG LETTER SAH .. SORA SOMPENG LETTER MAE
return true;
- if (codePoint >= 0x110f0 && codePoint <= 0x110f9)
+ if (codePoint >= 0x110F0 && codePoint <= 0x110F9) // SORA SOMPENG DIGIT ZERO .. SORA SOMPENG DIGIT NINE
return true;
- if (codePoint >= 0x11100 && codePoint <= 0x11134)
+ if (codePoint >= 0x11100 && codePoint <= 0x11134) // CHAKMA SIGN CANDRABINDU .. CHAKMA MAAYYAA
return true;
- if (codePoint >= 0x11136 && codePoint <= 0x1113f)
+ if (codePoint >= 0x11136 && codePoint <= 0x1113F) // CHAKMA DIGIT ZERO .. CHAKMA DIGIT NINE
return true;
- if (codePoint >= 0x11144 && codePoint <= 0x11146)
+ if (codePoint >= 0x11144 && codePoint <= 0x11146) // CHAKMA LETTER LHAA .. CHAKMA VOWEL SIGN EI
return true;
- if (codePoint >= 0x11150 && codePoint <= 0x11173)
+ if (codePoint >= 0x11150 && codePoint <= 0x11173) // MAHAJANI LETTER A .. MAHAJANI SIGN NUKTA
return true;
- if (codePoint >= 0x11176 && codePoint <= 0x11176)
+ if (codePoint >= 0x11176 && codePoint <= 0x11176) // MAHAJANI LIGATURE SHRI .. MAHAJANI LIGATURE SHRI
return true;
- if (codePoint >= 0x11180 && codePoint <= 0x111c4)
+ if (codePoint >= 0x11180 && codePoint <= 0x111C4) // SHARADA SIGN CANDRABINDU .. SHARADA OM
return true;
- if (codePoint >= 0x111c9 && codePoint <= 0x111cc)
+ if (codePoint >= 0x111C9 && codePoint <= 0x111CC) // SHARADA SANDHI MARK .. SHARADA EXTRA SHORT VOWEL MARK
return true;
- if (codePoint >= 0x111d0 && codePoint <= 0x111da)
+ if (codePoint >= 0x111D0 && codePoint <= 0x111DA) // SHARADA DIGIT ZERO .. SHARADA EKAM
return true;
- if (codePoint >= 0x111dc && codePoint <= 0x111dc)
+ if (codePoint >= 0x111DC && codePoint <= 0x111DC) // SHARADA HEADSTROKE .. SHARADA HEADSTROKE
return true;
- if (codePoint >= 0x11200 && codePoint <= 0x11211)
+ if (codePoint >= 0x11200 && codePoint <= 0x11211) // KHOJKI LETTER A .. KHOJKI LETTER JJA
return true;
- if (codePoint >= 0x11213 && codePoint <= 0x11237)
+ if (codePoint >= 0x11213 && codePoint <= 0x11237) // KHOJKI LETTER NYA .. KHOJKI SIGN SHADDA
return true;
- if (codePoint >= 0x1123e && codePoint <= 0x1123e)
+ if (codePoint >= 0x1123E && codePoint <= 0x1123E) // KHOJKI SIGN SUKUN .. KHOJKI SIGN SUKUN
return true;
- if (codePoint >= 0x11280 && codePoint <= 0x11286)
+ if (codePoint >= 0x11280 && codePoint <= 0x11286) // MULTANI LETTER A .. MULTANI LETTER GA
return true;
- if (codePoint >= 0x11288 && codePoint <= 0x11288)
+ if (codePoint >= 0x11288 && codePoint <= 0x11288) // MULTANI LETTER GHA .. MULTANI LETTER GHA
return true;
- if (codePoint >= 0x1128a && codePoint <= 0x1128d)
+ if (codePoint >= 0x1128A && codePoint <= 0x1128D) // MULTANI LETTER CA .. MULTANI LETTER JJA
return true;
- if (codePoint >= 0x1128f && codePoint <= 0x1129d)
+ if (codePoint >= 0x1128F && codePoint <= 0x1129D) // MULTANI LETTER NYA .. MULTANI LETTER BA
return true;
- if (codePoint >= 0x1129f && codePoint <= 0x112a8)
+ if (codePoint >= 0x1129F && codePoint <= 0x112A8) // MULTANI LETTER BHA .. MULTANI LETTER RHA
return true;
- if (codePoint >= 0x112b0 && codePoint <= 0x112ea)
+ if (codePoint >= 0x112B0 && codePoint <= 0x112EA) // KHUDAWADI LETTER A .. KHUDAWADI SIGN VIRAMA
return true;
- if (codePoint >= 0x112f0 && codePoint <= 0x112f9)
+ if (codePoint >= 0x112F0 && codePoint <= 0x112F9) // KHUDAWADI DIGIT ZERO .. KHUDAWADI DIGIT NINE
return true;
- if (codePoint >= 0x11300 && codePoint <= 0x11303)
+ if (codePoint >= 0x11300 && codePoint <= 0x11303) // GRANTHA SIGN COMBINING ANUSVARA ABOVE .. GRANTHA SIGN VISARGA
return true;
- if (codePoint >= 0x11305 && codePoint <= 0x1130c)
+ if (codePoint >= 0x11305 && codePoint <= 0x1130C) // GRANTHA LETTER A .. GRANTHA LETTER VOCALIC L
return true;
- if (codePoint >= 0x1130f && codePoint <= 0x11310)
+ if (codePoint >= 0x1130F && codePoint <= 0x11310) // GRANTHA LETTER EE .. GRANTHA LETTER AI
return true;
- if (codePoint >= 0x11313 && codePoint <= 0x11328)
+ if (codePoint >= 0x11313 && codePoint <= 0x11328) // GRANTHA LETTER OO .. GRANTHA LETTER NA
return true;
- if (codePoint >= 0x1132a && codePoint <= 0x11330)
+ if (codePoint >= 0x1132A && codePoint <= 0x11330) // GRANTHA LETTER PA .. GRANTHA LETTER RA
return true;
- if (codePoint >= 0x11332 && codePoint <= 0x11333)
+ if (codePoint >= 0x11332 && codePoint <= 0x11333) // GRANTHA LETTER LA .. GRANTHA LETTER LLA
return true;
- if (codePoint >= 0x11335 && codePoint <= 0x11339)
+ if (codePoint >= 0x11335 && codePoint <= 0x11339) // GRANTHA LETTER VA .. GRANTHA LETTER HA
return true;
- if (codePoint >= 0x1133b && codePoint <= 0x11344)
+ if (codePoint >= 0x1133B && codePoint <= 0x11344) // COMBINING BINDU BELOW .. GRANTHA VOWEL SIGN VOCALIC RR
return true;
- if (codePoint >= 0x11347 && codePoint <= 0x11348)
+ if (codePoint >= 0x11347 && codePoint <= 0x11348) // GRANTHA VOWEL SIGN EE .. GRANTHA VOWEL SIGN AI
return true;
- if (codePoint >= 0x1134b && codePoint <= 0x1134d)
+ if (codePoint >= 0x1134B && codePoint <= 0x1134D) // GRANTHA VOWEL SIGN OO .. GRANTHA SIGN VIRAMA
return true;
- if (codePoint >= 0x11350 && codePoint <= 0x11350)
+ if (codePoint >= 0x11350 && codePoint <= 0x11350) // GRANTHA OM .. GRANTHA OM
return true;
- if (codePoint >= 0x11357 && codePoint <= 0x11357)
+ if (codePoint >= 0x11357 && codePoint <= 0x11357) // GRANTHA AU LENGTH MARK .. GRANTHA AU LENGTH MARK
return true;
- if (codePoint >= 0x1135d && codePoint <= 0x11363)
+ if (codePoint >= 0x1135D && codePoint <= 0x11363) // GRANTHA SIGN PLUTA .. GRANTHA VOWEL SIGN VOCALIC LL
return true;
- if (codePoint >= 0x11366 && codePoint <= 0x1136c)
+ if (codePoint >= 0x11366 && codePoint <= 0x1136C) // COMBINING GRANTHA DIGIT ZERO .. COMBINING GRANTHA DIGIT SIX
return true;
- if (codePoint >= 0x11370 && codePoint <= 0x11374)
+ if (codePoint >= 0x11370 && codePoint <= 0x11374) // COMBINING GRANTHA LETTER A .. COMBINING GRANTHA LETTER PA
return true;
- if (codePoint >= 0x11400 && codePoint <= 0x1144a)
+ if (codePoint >= 0x11400 && codePoint <= 0x1144A) // NEWA LETTER A .. NEWA SIDDHI
return true;
- if (codePoint >= 0x11450 && codePoint <= 0x11459)
+ if (codePoint >= 0x11450 && codePoint <= 0x11459) // NEWA DIGIT ZERO .. NEWA DIGIT NINE
return true;
- if (codePoint >= 0x1145e && codePoint <= 0x1145e)
+ if (codePoint >= 0x1145E && codePoint <= 0x1145E) // NEWA SANDHI MARK .. NEWA SANDHI MARK
return true;
- if (codePoint >= 0x11480 && codePoint <= 0x114c5)
+ if (codePoint >= 0x11480 && codePoint <= 0x114C5) // TIRHUTA ANJI .. TIRHUTA GVANG
return true;
- if (codePoint >= 0x114c7 && codePoint <= 0x114c7)
+ if (codePoint >= 0x114C7 && codePoint <= 0x114C7) // TIRHUTA OM .. TIRHUTA OM
return true;
- if (codePoint >= 0x114d0 && codePoint <= 0x114d9)
+ if (codePoint >= 0x114D0 && codePoint <= 0x114D9) // TIRHUTA DIGIT ZERO .. TIRHUTA DIGIT NINE
return true;
- if (codePoint >= 0x11580 && codePoint <= 0x115b5)
+ if (codePoint >= 0x11580 && codePoint <= 0x115B5) // SIDDHAM LETTER A .. SIDDHAM VOWEL SIGN VOCALIC RR
return true;
- if (codePoint >= 0x115b8 && codePoint <= 0x115c0)
+ if (codePoint >= 0x115B8 && codePoint <= 0x115C0) // SIDDHAM VOWEL SIGN E .. SIDDHAM SIGN NUKTA
return true;
- if (codePoint >= 0x115d8 && codePoint <= 0x115dd)
+ if (codePoint >= 0x115D8 && codePoint <= 0x115DD) // SIDDHAM LETTER THREE-CIRCLE ALTERNATE I .. SIDDHAM VOWEL SIGN ALTERNATE UU
return true;
- if (codePoint >= 0x11600 && codePoint <= 0x11640)
+ if (codePoint >= 0x11600 && codePoint <= 0x11640) // MODI LETTER A .. MODI SIGN ARDHACANDRA
return true;
- if (codePoint >= 0x11644 && codePoint <= 0x11644)
+ if (codePoint >= 0x11644 && codePoint <= 0x11644) // MODI SIGN HUVA .. MODI SIGN HUVA
return true;
- if (codePoint >= 0x11650 && codePoint <= 0x11659)
+ if (codePoint >= 0x11650 && codePoint <= 0x11659) // MODI DIGIT ZERO .. MODI DIGIT NINE
return true;
- if (codePoint >= 0x11680 && codePoint <= 0x116b7)
+ if (codePoint >= 0x11680 && codePoint <= 0x116B7) // TAKRI LETTER A .. TAKRI SIGN NUKTA
return true;
- if (codePoint >= 0x116c0 && codePoint <= 0x116c9)
+ if (codePoint >= 0x116C0 && codePoint <= 0x116C9) // TAKRI DIGIT ZERO .. TAKRI DIGIT NINE
return true;
- if (codePoint >= 0x11700 && codePoint <= 0x1171a)
+ if (codePoint >= 0x11700 && codePoint <= 0x1171A) // AHOM LETTER KA .. AHOM LETTER ALTERNATE BA
return true;
- if (codePoint >= 0x1171d && codePoint <= 0x1172b)
+ if (codePoint >= 0x1171D && codePoint <= 0x1172B) // AHOM CONSONANT SIGN MEDIAL LA .. AHOM SIGN KILLER
return true;
- if (codePoint >= 0x11730 && codePoint <= 0x11739)
+ if (codePoint >= 0x11730 && codePoint <= 0x11739) // AHOM DIGIT ZERO .. AHOM DIGIT NINE
return true;
- if (codePoint >= 0x11800 && codePoint <= 0x1183a)
+ if (codePoint >= 0x11800 && codePoint <= 0x1183A) // DOGRA LETTER A .. DOGRA SIGN NUKTA
return true;
- if (codePoint >= 0x118a0 && codePoint <= 0x118e9)
+ if (codePoint >= 0x118A0 && codePoint <= 0x118E9) // WARANG CITI CAPITAL LETTER NGAA .. WARANG CITI DIGIT NINE
return true;
- if (codePoint >= 0x118ff && codePoint <= 0x118ff)
+ if (codePoint >= 0x118FF && codePoint <= 0x118FF) // WARANG CITI OM .. WARANG CITI OM
return true;
- if (codePoint >= 0x11a00 && codePoint <= 0x11a3e)
+ if (codePoint >= 0x11A00 && codePoint <= 0x11A3E) // ZANABAZAR SQUARE LETTER A .. ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
return true;
- if (codePoint >= 0x11a47 && codePoint <= 0x11a47)
+ if (codePoint >= 0x11A47 && codePoint <= 0x11A47) // ZANABAZAR SQUARE SUBJOINER .. ZANABAZAR SQUARE SUBJOINER
return true;
- if (codePoint >= 0x11a50 && codePoint <= 0x11a83)
+ if (codePoint >= 0x11A50 && codePoint <= 0x11A83) // SOYOMBO LETTER A .. SOYOMBO LETTER KSSA
return true;
- if (codePoint >= 0x11a86 && codePoint <= 0x11a99)
+ if (codePoint >= 0x11A86 && codePoint <= 0x11A99) // SOYOMBO CLUSTER-INITIAL LETTER RA .. SOYOMBO SUBJOINER
return true;
- if (codePoint >= 0x11a9d && codePoint <= 0x11a9d)
+ if (codePoint >= 0x11A9D && codePoint <= 0x11A9D) // SOYOMBO MARK PLUTA .. SOYOMBO MARK PLUTA
return true;
- if (codePoint >= 0x11ac0 && codePoint <= 0x11af8)
+ if (codePoint >= 0x11AC0 && codePoint <= 0x11AF8) // PAU CIN HAU LETTER PA .. PAU CIN HAU GLOTTAL STOP FINAL
return true;
- if (codePoint >= 0x11c00 && codePoint <= 0x11c08)
+ if (codePoint >= 0x11C00 && codePoint <= 0x11C08) // BHAIKSUKI LETTER A .. BHAIKSUKI LETTER VOCALIC L
return true;
- if (codePoint >= 0x11c0a && codePoint <= 0x11c36)
+ if (codePoint >= 0x11C0A && codePoint <= 0x11C36) // BHAIKSUKI LETTER E .. BHAIKSUKI VOWEL SIGN VOCALIC L
return true;
- if (codePoint >= 0x11c38 && codePoint <= 0x11c40)
+ if (codePoint >= 0x11C38 && codePoint <= 0x11C40) // BHAIKSUKI VOWEL SIGN E .. BHAIKSUKI SIGN AVAGRAHA
return true;
- if (codePoint >= 0x11c50 && codePoint <= 0x11c59)
+ if (codePoint >= 0x11C50 && codePoint <= 0x11C59) // BHAIKSUKI DIGIT ZERO .. BHAIKSUKI DIGIT NINE
return true;
- if (codePoint >= 0x11c72 && codePoint <= 0x11c8f)
+ if (codePoint >= 0x11C72 && codePoint <= 0x11C8F) // MARCHEN LETTER KA .. MARCHEN LETTER A
return true;
- if (codePoint >= 0x11c92 && codePoint <= 0x11ca7)
+ if (codePoint >= 0x11C92 && codePoint <= 0x11CA7) // MARCHEN SUBJOINED LETTER KA .. MARCHEN SUBJOINED LETTER ZA
return true;
- if (codePoint >= 0x11ca9 && codePoint <= 0x11cb6)
+ if (codePoint >= 0x11CA9 && codePoint <= 0x11CB6) // MARCHEN SUBJOINED LETTER YA .. MARCHEN SIGN CANDRABINDU
return true;
- if (codePoint >= 0x11d00 && codePoint <= 0x11d06)
+ if (codePoint >= 0x11D00 && codePoint <= 0x11D06) // MASARAM GONDI LETTER A .. MASARAM GONDI LETTER E
return true;
- if (codePoint >= 0x11d08 && codePoint <= 0x11d09)
+ if (codePoint >= 0x11D08 && codePoint <= 0x11D09) // MASARAM GONDI LETTER AI .. MASARAM GONDI LETTER O
return true;
- if (codePoint >= 0x11d0b && codePoint <= 0x11d36)
+ if (codePoint >= 0x11D0B && codePoint <= 0x11D36) // MASARAM GONDI LETTER AU .. MASARAM GONDI VOWEL SIGN VOCALIC R
return true;
- if (codePoint >= 0x11d3a && codePoint <= 0x11d3a)
+ if (codePoint >= 0x11D3A && codePoint <= 0x11D3A) // MASARAM GONDI VOWEL SIGN E .. MASARAM GONDI VOWEL SIGN E
return true;
- if (codePoint >= 0x11d3c && codePoint <= 0x11d3d)
+ if (codePoint >= 0x11D3C && codePoint <= 0x11D3D) // MASARAM GONDI VOWEL SIGN AI .. MASARAM GONDI VOWEL SIGN O
return true;
- if (codePoint >= 0x11d3f && codePoint <= 0x11d47)
+ if (codePoint >= 0x11D3F && codePoint <= 0x11D47) // MASARAM GONDI VOWEL SIGN AU .. MASARAM GONDI RA-KARA
return true;
- if (codePoint >= 0x11d50 && codePoint <= 0x11d59)
+ if (codePoint >= 0x11D50 && codePoint <= 0x11D59) // MASARAM GONDI DIGIT ZERO .. MASARAM GONDI DIGIT NINE
return true;
- if (codePoint >= 0x11d60 && codePoint <= 0x11d65)
+ if (codePoint >= 0x11D60 && codePoint <= 0x11D65) // GUNJALA GONDI LETTER A .. GUNJALA GONDI LETTER UU
return true;
- if (codePoint >= 0x11d67 && codePoint <= 0x11d68)
+ if (codePoint >= 0x11D67 && codePoint <= 0x11D68) // GUNJALA GONDI LETTER EE .. GUNJALA GONDI LETTER AI
return true;
- if (codePoint >= 0x11d6a && codePoint <= 0x11d8e)
+ if (codePoint >= 0x11D6A && codePoint <= 0x11D8E) // GUNJALA GONDI LETTER OO .. GUNJALA GONDI VOWEL SIGN UU
return true;
- if (codePoint >= 0x11d90 && codePoint <= 0x11d91)
+ if (codePoint >= 0x11D90 && codePoint <= 0x11D91) // GUNJALA GONDI VOWEL SIGN EE .. GUNJALA GONDI VOWEL SIGN AI
return true;
- if (codePoint >= 0x11d93 && codePoint <= 0x11d98)
+ if (codePoint >= 0x11D93 && codePoint <= 0x11D98) // GUNJALA GONDI VOWEL SIGN OO .. GUNJALA GONDI OM
return true;
- if (codePoint >= 0x11da0 && codePoint <= 0x11da9)
+ if (codePoint >= 0x11DA0 && codePoint <= 0x11DA9) // GUNJALA GONDI DIGIT ZERO .. GUNJALA GONDI DIGIT NINE
return true;
- if (codePoint >= 0x11ee0 && codePoint <= 0x11ef6)
+ if (codePoint >= 0x11EE0 && codePoint <= 0x11EF6) // MAKASAR LETTER KA .. MAKASAR VOWEL SIGN O
return true;
- if (codePoint >= 0x12000 && codePoint <= 0x12399)
+ if (codePoint >= 0x12000 && codePoint <= 0x12399) // CUNEIFORM SIGN A .. CUNEIFORM SIGN U U
return true;
- if (codePoint >= 0x12400 && codePoint <= 0x1246e)
+ if (codePoint >= 0x12400 && codePoint <= 0x1246E) // CUNEIFORM NUMERIC SIGN TWO ASH .. CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
return true;
- if (codePoint >= 0x12480 && codePoint <= 0x12543)
+ if (codePoint >= 0x12480 && codePoint <= 0x12543) // CUNEIFORM SIGN AB TIMES NUN TENU .. CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
return true;
- if (codePoint >= 0x13000 && codePoint <= 0x1342e)
+ if (codePoint >= 0x13000 && codePoint <= 0x1342E) // EGYPTIAN HIEROGLYPH A001 .. EGYPTIAN HIEROGLYPH AA032
return true;
- if (codePoint >= 0x14400 && codePoint <= 0x14646)
+ if (codePoint >= 0x14400 && codePoint <= 0x14646) // ANATOLIAN HIEROGLYPH A001 .. ANATOLIAN HIEROGLYPH A530
return true;
- if (codePoint >= 0x16800 && codePoint <= 0x16a38)
+ if (codePoint >= 0x16800 && codePoint <= 0x16A38) // BAMUM LETTER PHASE-A NGKUE MFON .. BAMUM LETTER PHASE-F VUEQ
return true;
- if (codePoint >= 0x16a40 && codePoint <= 0x16a5e)
+ if (codePoint >= 0x16A40 && codePoint <= 0x16A5E) // MRO LETTER TA .. MRO LETTER TEK
return true;
- if (codePoint >= 0x16a60 && codePoint <= 0x16a69)
+ if (codePoint >= 0x16A60 && codePoint <= 0x16A69) // MRO DIGIT ZERO .. MRO DIGIT NINE
return true;
- if (codePoint >= 0x16ad0 && codePoint <= 0x16aed)
+ if (codePoint >= 0x16AD0 && codePoint <= 0x16AED) // BASSA VAH LETTER ENNI .. BASSA VAH LETTER I
return true;
- if (codePoint >= 0x16af0 && codePoint <= 0x16af4)
+ if (codePoint >= 0x16AF0 && codePoint <= 0x16AF4) // BASSA VAH COMBINING HIGH TONE .. BASSA VAH COMBINING HIGH-LOW TONE
return true;
- if (codePoint >= 0x16b00 && codePoint <= 0x16b36)
+ if (codePoint >= 0x16B00 && codePoint <= 0x16B36) // PAHAWH HMONG VOWEL KEEB .. PAHAWH HMONG MARK CIM TAUM
return true;
- if (codePoint >= 0x16b40 && codePoint <= 0x16b43)
+ if (codePoint >= 0x16B40 && codePoint <= 0x16B43) // PAHAWH HMONG SIGN VOS SEEV .. PAHAWH HMONG SIGN IB YAM
return true;
- if (codePoint >= 0x16b50 && codePoint <= 0x16b59)
+ if (codePoint >= 0x16B50 && codePoint <= 0x16B59) // PAHAWH HMONG DIGIT ZERO .. PAHAWH HMONG DIGIT NINE
return true;
- if (codePoint >= 0x16b63 && codePoint <= 0x16b77)
+ if (codePoint >= 0x16B63 && codePoint <= 0x16B77) // PAHAWH HMONG SIGN VOS LUB .. PAHAWH HMONG SIGN CIM NRES TOS
return true;
- if (codePoint >= 0x16b7d && codePoint <= 0x16b8f)
+ if (codePoint >= 0x16B7D && codePoint <= 0x16B8F) // PAHAWH HMONG CLAN SIGN TSHEEJ .. PAHAWH HMONG CLAN SIGN VWJ
return true;
- if (codePoint >= 0x16e40 && codePoint <= 0x16e7f)
+ if (codePoint >= 0x16E40 && codePoint <= 0x16E7F) // MEDEFAIDRIN CAPITAL LETTER M .. MEDEFAIDRIN SMALL LETTER Y
return true;
- if (codePoint >= 0x16f00 && codePoint <= 0x16f44)
+ if (codePoint >= 0x16F00 && codePoint <= 0x16F44) // MIAO LETTER PA .. MIAO LETTER HHA
return true;
- if (codePoint >= 0x16f50 && codePoint <= 0x16f7e)
+ if (codePoint >= 0x16F50 && codePoint <= 0x16F7E) // MIAO LETTER NASALIZATION .. MIAO VOWEL SIGN NG
return true;
- if (codePoint >= 0x16f8f && codePoint <= 0x16f9f)
+ if (codePoint >= 0x16F8F && codePoint <= 0x16F9F) // MIAO TONE RIGHT .. MIAO LETTER REFORMED TONE-8
return true;
- if (codePoint >= 0x16fe0 && codePoint <= 0x16fe1)
+ if (codePoint >= 0x16FE0 && codePoint <= 0x16FE1) // TANGUT ITERATION MARK .. NUSHU ITERATION MARK
return true;
- if (codePoint >= 0x17000 && codePoint <= 0x187f1)
+ if (codePoint >= 0x17000 && codePoint <= 0x187F1) // Tangut Ideograph .. Tangut Ideograph
return true;
- if (codePoint >= 0x18800 && codePoint <= 0x18af2)
+ if (codePoint >= 0x18800 && codePoint <= 0x18AF2) // TANGUT COMPONENT-001 .. TANGUT COMPONENT-755
return true;
- if (codePoint >= 0x1b000 && codePoint <= 0x1b11e)
+ if (codePoint >= 0x1B000 && codePoint <= 0x1B11E) // KATAKANA LETTER ARCHAIC E .. HENTAIGANA LETTER N-MU-MO-2
return true;
- if (codePoint >= 0x1b170 && codePoint <= 0x1b2fb)
+ if (codePoint >= 0x1B170 && codePoint <= 0x1B2FB) // NUSHU CHARACTER-1B170 .. NUSHU CHARACTER-1B2FB
return true;
- if (codePoint >= 0x1bc00 && codePoint <= 0x1bc6a)
+ if (codePoint >= 0x1BC00 && codePoint <= 0x1BC6A) // DUPLOYAN LETTER H .. DUPLOYAN LETTER VOCALIC M
return true;
- if (codePoint >= 0x1bc70 && codePoint <= 0x1bc7c)
+ if (codePoint >= 0x1BC70 && codePoint <= 0x1BC7C) // DUPLOYAN AFFIX LEFT HORIZONTAL SECANT .. DUPLOYAN AFFIX ATTACHED TANGENT HOOK
return true;
- if (codePoint >= 0x1bc80 && codePoint <= 0x1bc88)
+ if (codePoint >= 0x1BC80 && codePoint <= 0x1BC88) // DUPLOYAN AFFIX HIGH ACUTE .. DUPLOYAN AFFIX HIGH VERTICAL
return true;
- if (codePoint >= 0x1bc90 && codePoint <= 0x1bc99)
+ if (codePoint >= 0x1BC90 && codePoint <= 0x1BC99) // DUPLOYAN AFFIX LOW ACUTE .. DUPLOYAN AFFIX LOW ARROW
return true;
- if (codePoint >= 0x1bc9d && codePoint <= 0x1bc9e)
+ if (codePoint >= 0x1BC9D && codePoint <= 0x1BC9E) // DUPLOYAN THICK LETTER SELECTOR .. DUPLOYAN DOUBLE MARK
return true;
- if (codePoint >= 0x1d165 && codePoint <= 0x1d169)
+ if (codePoint >= 0x1D165 && codePoint <= 0x1D169) // MUSICAL SYMBOL COMBINING STEM .. MUSICAL SYMBOL COMBINING TREMOLO-3
return true;
- if (codePoint >= 0x1d16d && codePoint <= 0x1d172)
+ if (codePoint >= 0x1D16D && codePoint <= 0x1D172) // MUSICAL SYMBOL COMBINING AUGMENTATION DOT .. MUSICAL SYMBOL COMBINING FLAG-5
return true;
- if (codePoint >= 0x1d17b && codePoint <= 0x1d182)
+ if (codePoint >= 0x1D17B && codePoint <= 0x1D182) // MUSICAL SYMBOL COMBINING ACCENT .. MUSICAL SYMBOL COMBINING LOURE
return true;
- if (codePoint >= 0x1d185 && codePoint <= 0x1d18b)
+ if (codePoint >= 0x1D185 && codePoint <= 0x1D18B) // MUSICAL SYMBOL COMBINING DOIT .. MUSICAL SYMBOL COMBINING TRIPLE TONGUE
return true;
- if (codePoint >= 0x1d1aa && codePoint <= 0x1d1ad)
+ if (codePoint >= 0x1D1AA && codePoint <= 0x1D1AD) // MUSICAL SYMBOL COMBINING DOWN BOW .. MUSICAL SYMBOL COMBINING SNAP PIZZICATO
return true;
- if (codePoint >= 0x1d242 && codePoint <= 0x1d244)
+ if (codePoint >= 0x1D242 && codePoint <= 0x1D244) // COMBINING GREEK MUSICAL TRISEME .. COMBINING GREEK MUSICAL PENTASEME
return true;
- if (codePoint >= 0x1d400 && codePoint <= 0x1d454)
+ if (codePoint >= 0x1D400 && codePoint <= 0x1D454) // MATHEMATICAL BOLD CAPITAL A .. MATHEMATICAL ITALIC SMALL G
return true;
- if (codePoint >= 0x1d456 && codePoint <= 0x1d49c)
+ if (codePoint >= 0x1D456 && codePoint <= 0x1D49C) // MATHEMATICAL ITALIC SMALL I .. MATHEMATICAL SCRIPT CAPITAL A
return true;
- if (codePoint >= 0x1d49e && codePoint <= 0x1d49f)
+ if (codePoint >= 0x1D49E && codePoint <= 0x1D49F) // MATHEMATICAL SCRIPT CAPITAL C .. MATHEMATICAL SCRIPT CAPITAL D
return true;
- if (codePoint >= 0x1d4a2 && codePoint <= 0x1d4a2)
+ if (codePoint >= 0x1D4A2 && codePoint <= 0x1D4A2) // MATHEMATICAL SCRIPT CAPITAL G .. MATHEMATICAL SCRIPT CAPITAL G
return true;
- if (codePoint >= 0x1d4a5 && codePoint <= 0x1d4a6)
+ if (codePoint >= 0x1D4A5 && codePoint <= 0x1D4A6) // MATHEMATICAL SCRIPT CAPITAL J .. MATHEMATICAL SCRIPT CAPITAL K
return true;
- if (codePoint >= 0x1d4a9 && codePoint <= 0x1d4ac)
+ if (codePoint >= 0x1D4A9 && codePoint <= 0x1D4AC) // MATHEMATICAL SCRIPT CAPITAL N .. MATHEMATICAL SCRIPT CAPITAL Q
return true;
- if (codePoint >= 0x1d4ae && codePoint <= 0x1d4b9)
+ if (codePoint >= 0x1D4AE && codePoint <= 0x1D4B9) // MATHEMATICAL SCRIPT CAPITAL S .. MATHEMATICAL SCRIPT SMALL D
return true;
- if (codePoint >= 0x1d4bb && codePoint <= 0x1d4bb)
+ if (codePoint >= 0x1D4BB && codePoint <= 0x1D4BB) // MATHEMATICAL SCRIPT SMALL F .. MATHEMATICAL SCRIPT SMALL F
return true;
- if (codePoint >= 0x1d4bd && codePoint <= 0x1d4c3)
+ if (codePoint >= 0x1D4BD && codePoint <= 0x1D4C3) // MATHEMATICAL SCRIPT SMALL H .. MATHEMATICAL SCRIPT SMALL N
return true;
- if (codePoint >= 0x1d4c5 && codePoint <= 0x1d505)
+ if (codePoint >= 0x1D4C5 && codePoint <= 0x1D505) // MATHEMATICAL SCRIPT SMALL P .. MATHEMATICAL FRAKTUR CAPITAL B
return true;
- if (codePoint >= 0x1d507 && codePoint <= 0x1d50a)
+ if (codePoint >= 0x1D507 && codePoint <= 0x1D50A) // MATHEMATICAL FRAKTUR CAPITAL D .. MATHEMATICAL FRAKTUR CAPITAL G
return true;
- if (codePoint >= 0x1d50d && codePoint <= 0x1d514)
+ if (codePoint >= 0x1D50D && codePoint <= 0x1D514) // MATHEMATICAL FRAKTUR CAPITAL J .. MATHEMATICAL FRAKTUR CAPITAL Q
return true;
- if (codePoint >= 0x1d516 && codePoint <= 0x1d51c)
+ if (codePoint >= 0x1D516 && codePoint <= 0x1D51C) // MATHEMATICAL FRAKTUR CAPITAL S .. MATHEMATICAL FRAKTUR CAPITAL Y
return true;
- if (codePoint >= 0x1d51e && codePoint <= 0x1d539)
+ if (codePoint >= 0x1D51E && codePoint <= 0x1D539) // MATHEMATICAL FRAKTUR SMALL A .. MATHEMATICAL DOUBLE-STRUCK CAPITAL B
return true;
- if (codePoint >= 0x1d53b && codePoint <= 0x1d53e)
+ if (codePoint >= 0x1D53B && codePoint <= 0x1D53E) // MATHEMATICAL DOUBLE-STRUCK CAPITAL D .. MATHEMATICAL DOUBLE-STRUCK CAPITAL G
return true;
- if (codePoint >= 0x1d540 && codePoint <= 0x1d544)
+ if (codePoint >= 0x1D540 && codePoint <= 0x1D544) // MATHEMATICAL DOUBLE-STRUCK CAPITAL I .. MATHEMATICAL DOUBLE-STRUCK CAPITAL M
return true;
- if (codePoint >= 0x1d546 && codePoint <= 0x1d546)
+ if (codePoint >= 0x1D546 && codePoint <= 0x1D546) // MATHEMATICAL DOUBLE-STRUCK CAPITAL O .. MATHEMATICAL DOUBLE-STRUCK CAPITAL O
return true;
- if (codePoint >= 0x1d54a && codePoint <= 0x1d550)
+ if (codePoint >= 0x1D54A && codePoint <= 0x1D550) // MATHEMATICAL DOUBLE-STRUCK CAPITAL S .. MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
return true;
- if (codePoint >= 0x1d552 && codePoint <= 0x1d6a5)
+ if (codePoint >= 0x1D552 && codePoint <= 0x1D6A5) // MATHEMATICAL DOUBLE-STRUCK SMALL A .. MATHEMATICAL ITALIC SMALL DOTLESS J
return true;
- if (codePoint >= 0x1d6a8 && codePoint <= 0x1d6c0)
+ if (codePoint >= 0x1D6A8 && codePoint <= 0x1D6C0) // MATHEMATICAL BOLD CAPITAL ALPHA .. MATHEMATICAL BOLD CAPITAL OMEGA
return true;
- if (codePoint >= 0x1d6c2 && codePoint <= 0x1d6da)
+ if (codePoint >= 0x1D6C2 && codePoint <= 0x1D6DA) // MATHEMATICAL BOLD SMALL ALPHA .. MATHEMATICAL BOLD SMALL OMEGA
return true;
- if (codePoint >= 0x1d6dc && codePoint <= 0x1d6fa)
+ if (codePoint >= 0x1D6DC && codePoint <= 0x1D6FA) // MATHEMATICAL BOLD EPSILON SYMBOL .. MATHEMATICAL ITALIC CAPITAL OMEGA
return true;
- if (codePoint >= 0x1d6fc && codePoint <= 0x1d714)
+ if (codePoint >= 0x1D6FC && codePoint <= 0x1D714) // MATHEMATICAL ITALIC SMALL ALPHA .. MATHEMATICAL ITALIC SMALL OMEGA
return true;
- if (codePoint >= 0x1d716 && codePoint <= 0x1d734)
+ if (codePoint >= 0x1D716 && codePoint <= 0x1D734) // MATHEMATICAL ITALIC EPSILON SYMBOL .. MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
return true;
- if (codePoint >= 0x1d736 && codePoint <= 0x1d74e)
+ if (codePoint >= 0x1D736 && codePoint <= 0x1D74E) // MATHEMATICAL BOLD ITALIC SMALL ALPHA .. MATHEMATICAL BOLD ITALIC SMALL OMEGA
return true;
- if (codePoint >= 0x1d750 && codePoint <= 0x1d76e)
+ if (codePoint >= 0x1D750 && codePoint <= 0x1D76E) // MATHEMATICAL BOLD ITALIC EPSILON SYMBOL .. MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
return true;
- if (codePoint >= 0x1d770 && codePoint <= 0x1d788)
+ if (codePoint >= 0x1D770 && codePoint <= 0x1D788) // MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA .. MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
return true;
- if (codePoint >= 0x1d78a && codePoint <= 0x1d7a8)
+ if (codePoint >= 0x1D78A && codePoint <= 0x1D7A8) // MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL .. MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
return true;
- if (codePoint >= 0x1d7aa && codePoint <= 0x1d7c2)
+ if (codePoint >= 0x1D7AA && codePoint <= 0x1D7C2) // MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA .. MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
return true;
- if (codePoint >= 0x1d7c4 && codePoint <= 0x1d7cb)
+ if (codePoint >= 0x1D7C4 && codePoint <= 0x1D7CB) // MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL .. MATHEMATICAL BOLD SMALL DIGAMMA
return true;
- if (codePoint >= 0x1d7ce && codePoint <= 0x1d7ff)
+ if (codePoint >= 0x1D7CE && codePoint <= 0x1D7FF) // MATHEMATICAL BOLD DIGIT ZERO .. MATHEMATICAL MONOSPACE DIGIT NINE
return true;
- if (codePoint >= 0x1da00 && codePoint <= 0x1da36)
+ if (codePoint >= 0x1DA00 && codePoint <= 0x1DA36) // SIGNWRITING HEAD RIM .. SIGNWRITING AIR SUCKING IN
return true;
- if (codePoint >= 0x1da3b && codePoint <= 0x1da6c)
+ if (codePoint >= 0x1DA3B && codePoint <= 0x1DA6C) // SIGNWRITING MOUTH CLOSED NEUTRAL .. SIGNWRITING EXCITEMENT
return true;
- if (codePoint >= 0x1da75 && codePoint <= 0x1da75)
+ if (codePoint >= 0x1DA75 && codePoint <= 0x1DA75) // SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS .. SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS
return true;
- if (codePoint >= 0x1da84 && codePoint <= 0x1da84)
+ if (codePoint >= 0x1DA84 && codePoint <= 0x1DA84) // SIGNWRITING LOCATION HEAD NECK .. SIGNWRITING LOCATION HEAD NECK
return true;
- if (codePoint >= 0x1da9b && codePoint <= 0x1da9f)
+ if (codePoint >= 0x1DA9B && codePoint <= 0x1DA9F) // SIGNWRITING FILL MODIFIER-2 .. SIGNWRITING FILL MODIFIER-6
return true;
- if (codePoint >= 0x1daa1 && codePoint <= 0x1daaf)
+ if (codePoint >= 0x1DAA1 && codePoint <= 0x1DAAF) // SIGNWRITING ROTATION MODIFIER-2 .. SIGNWRITING ROTATION MODIFIER-16
return true;
- if (codePoint >= 0x1e000 && codePoint <= 0x1e006)
+ if (codePoint >= 0x1E000 && codePoint <= 0x1E006) // COMBINING GLAGOLITIC LETTER AZU .. COMBINING GLAGOLITIC LETTER ZHIVETE
return true;
- if (codePoint >= 0x1e008 && codePoint <= 0x1e018)
+ if (codePoint >= 0x1E008 && codePoint <= 0x1E018) // COMBINING GLAGOLITIC LETTER ZEMLJA .. COMBINING GLAGOLITIC LETTER HERU
return true;
- if (codePoint >= 0x1e01b && codePoint <= 0x1e021)
+ if (codePoint >= 0x1E01B && codePoint <= 0x1E021) // COMBINING GLAGOLITIC LETTER SHTA .. COMBINING GLAGOLITIC LETTER YATI
return true;
- if (codePoint >= 0x1e023 && codePoint <= 0x1e024)
+ if (codePoint >= 0x1E023 && codePoint <= 0x1E024) // COMBINING GLAGOLITIC LETTER YU .. COMBINING GLAGOLITIC LETTER SMALL YUS
return true;
- if (codePoint >= 0x1e026 && codePoint <= 0x1e02a)
+ if (codePoint >= 0x1E026 && codePoint <= 0x1E02A) // COMBINING GLAGOLITIC LETTER YO .. COMBINING GLAGOLITIC LETTER FITA
return true;
- if (codePoint >= 0x1e800 && codePoint <= 0x1e8c4)
+ if (codePoint >= 0x1E800 && codePoint <= 0x1E8C4) // MENDE KIKAKUI SYLLABLE M001 KI .. MENDE KIKAKUI SYLLABLE M060 NYON
return true;
- if (codePoint >= 0x1e8d0 && codePoint <= 0x1e8d6)
+ if (codePoint >= 0x1E8D0 && codePoint <= 0x1E8D6) // MENDE KIKAKUI COMBINING NUMBER TEENS .. MENDE KIKAKUI COMBINING NUMBER MILLIONS
return true;
- if (codePoint >= 0x1e900 && codePoint <= 0x1e94a)
+ if (codePoint >= 0x1E900 && codePoint <= 0x1E94A) // ADLAM CAPITAL LETTER ALIF .. ADLAM NUKTA
return true;
- if (codePoint >= 0x1e950 && codePoint <= 0x1e959)
+ if (codePoint >= 0x1E950 && codePoint <= 0x1E959) // ADLAM DIGIT ZERO .. ADLAM DIGIT NINE
return true;
- if (codePoint >= 0x1ee00 && codePoint <= 0x1ee03)
+ if (codePoint >= 0x1EE00 && codePoint <= 0x1EE03) // ARABIC MATHEMATICAL ALEF .. ARABIC MATHEMATICAL DAL
return true;
- if (codePoint >= 0x1ee05 && codePoint <= 0x1ee1f)
+ if (codePoint >= 0x1EE05 && codePoint <= 0x1EE1F) // ARABIC MATHEMATICAL WAW .. ARABIC MATHEMATICAL DOTLESS QAF
return true;
- if (codePoint >= 0x1ee21 && codePoint <= 0x1ee22)
+ if (codePoint >= 0x1EE21 && codePoint <= 0x1EE22) // ARABIC MATHEMATICAL INITIAL BEH .. ARABIC MATHEMATICAL INITIAL JEEM
return true;
- if (codePoint >= 0x1ee24 && codePoint <= 0x1ee24)
+ if (codePoint >= 0x1EE24 && codePoint <= 0x1EE24) // ARABIC MATHEMATICAL INITIAL HEH .. ARABIC MATHEMATICAL INITIAL HEH
return true;
- if (codePoint >= 0x1ee27 && codePoint <= 0x1ee27)
+ if (codePoint >= 0x1EE27 && codePoint <= 0x1EE27) // ARABIC MATHEMATICAL INITIAL HAH .. ARABIC MATHEMATICAL INITIAL HAH
return true;
- if (codePoint >= 0x1ee29 && codePoint <= 0x1ee32)
+ if (codePoint >= 0x1EE29 && codePoint <= 0x1EE32) // ARABIC MATHEMATICAL INITIAL YEH .. ARABIC MATHEMATICAL INITIAL QAF
return true;
- if (codePoint >= 0x1ee34 && codePoint <= 0x1ee37)
+ if (codePoint >= 0x1EE34 && codePoint <= 0x1EE37) // ARABIC MATHEMATICAL INITIAL SHEEN .. ARABIC MATHEMATICAL INITIAL KHAH
return true;
- if (codePoint >= 0x1ee39 && codePoint <= 0x1ee39)
+ if (codePoint >= 0x1EE39 && codePoint <= 0x1EE39) // ARABIC MATHEMATICAL INITIAL DAD .. ARABIC MATHEMATICAL INITIAL DAD
return true;
- if (codePoint >= 0x1ee3b && codePoint <= 0x1ee3b)
+ if (codePoint >= 0x1EE3B && codePoint <= 0x1EE3B) // ARABIC MATHEMATICAL INITIAL GHAIN .. ARABIC MATHEMATICAL INITIAL GHAIN
return true;
- if (codePoint >= 0x1ee42 && codePoint <= 0x1ee42)
+ if (codePoint >= 0x1EE42 && codePoint <= 0x1EE42) // ARABIC MATHEMATICAL TAILED JEEM .. ARABIC MATHEMATICAL TAILED JEEM
return true;
- if (codePoint >= 0x1ee47 && codePoint <= 0x1ee47)
+ if (codePoint >= 0x1EE47 && codePoint <= 0x1EE47) // ARABIC MATHEMATICAL TAILED HAH .. ARABIC MATHEMATICAL TAILED HAH
return true;
- if (codePoint >= 0x1ee49 && codePoint <= 0x1ee49)
+ if (codePoint >= 0x1EE49 && codePoint <= 0x1EE49) // ARABIC MATHEMATICAL TAILED YEH .. ARABIC MATHEMATICAL TAILED YEH
return true;
- if (codePoint >= 0x1ee4b && codePoint <= 0x1ee4b)
+ if (codePoint >= 0x1EE4B && codePoint <= 0x1EE4B) // ARABIC MATHEMATICAL TAILED LAM .. ARABIC MATHEMATICAL TAILED LAM
return true;
- if (codePoint >= 0x1ee4d && codePoint <= 0x1ee4f)
+ if (codePoint >= 0x1EE4D && codePoint <= 0x1EE4F) // ARABIC MATHEMATICAL TAILED NOON .. ARABIC MATHEMATICAL TAILED AIN
return true;
- if (codePoint >= 0x1ee51 && codePoint <= 0x1ee52)
+ if (codePoint >= 0x1EE51 && codePoint <= 0x1EE52) // ARABIC MATHEMATICAL TAILED SAD .. ARABIC MATHEMATICAL TAILED QAF
return true;
- if (codePoint >= 0x1ee54 && codePoint <= 0x1ee54)
+ if (codePoint >= 0x1EE54 && codePoint <= 0x1EE54) // ARABIC MATHEMATICAL TAILED SHEEN .. ARABIC MATHEMATICAL TAILED SHEEN
return true;
- if (codePoint >= 0x1ee57 && codePoint <= 0x1ee57)
+ if (codePoint >= 0x1EE57 && codePoint <= 0x1EE57) // ARABIC MATHEMATICAL TAILED KHAH .. ARABIC MATHEMATICAL TAILED KHAH
return true;
- if (codePoint >= 0x1ee59 && codePoint <= 0x1ee59)
+ if (codePoint >= 0x1EE59 && codePoint <= 0x1EE59) // ARABIC MATHEMATICAL TAILED DAD .. ARABIC MATHEMATICAL TAILED DAD
return true;
- if (codePoint >= 0x1ee5b && codePoint <= 0x1ee5b)
+ if (codePoint >= 0x1EE5B && codePoint <= 0x1EE5B) // ARABIC MATHEMATICAL TAILED GHAIN .. ARABIC MATHEMATICAL TAILED GHAIN
return true;
- if (codePoint >= 0x1ee5d && codePoint <= 0x1ee5d)
+ if (codePoint >= 0x1EE5D && codePoint <= 0x1EE5D) // ARABIC MATHEMATICAL TAILED DOTLESS NOON .. ARABIC MATHEMATICAL TAILED DOTLESS NOON
return true;
- if (codePoint >= 0x1ee5f && codePoint <= 0x1ee5f)
+ if (codePoint >= 0x1EE5F && codePoint <= 0x1EE5F) // ARABIC MATHEMATICAL TAILED DOTLESS QAF .. ARABIC MATHEMATICAL TAILED DOTLESS QAF
return true;
- if (codePoint >= 0x1ee61 && codePoint <= 0x1ee62)
+ if (codePoint >= 0x1EE61 && codePoint <= 0x1EE62) // ARABIC MATHEMATICAL STRETCHED BEH .. ARABIC MATHEMATICAL STRETCHED JEEM
return true;
- if (codePoint >= 0x1ee64 && codePoint <= 0x1ee64)
+ if (codePoint >= 0x1EE64 && codePoint <= 0x1EE64) // ARABIC MATHEMATICAL STRETCHED HEH .. ARABIC MATHEMATICAL STRETCHED HEH
return true;
- if (codePoint >= 0x1ee67 && codePoint <= 0x1ee6a)
+ if (codePoint >= 0x1EE67 && codePoint <= 0x1EE6A) // ARABIC MATHEMATICAL STRETCHED HAH .. ARABIC MATHEMATICAL STRETCHED KAF
return true;
- if (codePoint >= 0x1ee6c && codePoint <= 0x1ee72)
+ if (codePoint >= 0x1EE6C && codePoint <= 0x1EE72) // ARABIC MATHEMATICAL STRETCHED MEEM .. ARABIC MATHEMATICAL STRETCHED QAF
return true;
- if (codePoint >= 0x1ee74 && codePoint <= 0x1ee77)
+ if (codePoint >= 0x1EE74 && codePoint <= 0x1EE77) // ARABIC MATHEMATICAL STRETCHED SHEEN .. ARABIC MATHEMATICAL STRETCHED KHAH
return true;
- if (codePoint >= 0x1ee79 && codePoint <= 0x1ee7c)
+ if (codePoint >= 0x1EE79 && codePoint <= 0x1EE7C) // ARABIC MATHEMATICAL STRETCHED DAD .. ARABIC MATHEMATICAL STRETCHED DOTLESS BEH
return true;
- if (codePoint >= 0x1ee7e && codePoint <= 0x1ee7e)
+ if (codePoint >= 0x1EE7E && codePoint <= 0x1EE7E) // ARABIC MATHEMATICAL STRETCHED DOTLESS FEH .. ARABIC MATHEMATICAL STRETCHED DOTLESS FEH
return true;
- if (codePoint >= 0x1ee80 && codePoint <= 0x1ee89)
+ if (codePoint >= 0x1EE80 && codePoint <= 0x1EE89) // ARABIC MATHEMATICAL LOOPED ALEF .. ARABIC MATHEMATICAL LOOPED YEH
return true;
- if (codePoint >= 0x1ee8b && codePoint <= 0x1ee9b)
+ if (codePoint >= 0x1EE8B && codePoint <= 0x1EE9B) // ARABIC MATHEMATICAL LOOPED LAM .. ARABIC MATHEMATICAL LOOPED GHAIN
return true;
- if (codePoint >= 0x1eea1 && codePoint <= 0x1eea3)
+ if (codePoint >= 0x1EEA1 && codePoint <= 0x1EEA3) // ARABIC MATHEMATICAL DOUBLE-STRUCK BEH .. ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
return true;
- if (codePoint >= 0x1eea5 && codePoint <= 0x1eea9)
+ if (codePoint >= 0x1EEA5 && codePoint <= 0x1EEA9) // ARABIC MATHEMATICAL DOUBLE-STRUCK WAW .. ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
return true;
- if (codePoint >= 0x1eeab && codePoint <= 0x1eebb)
+ if (codePoint >= 0x1EEAB && codePoint <= 0x1EEBB) // ARABIC MATHEMATICAL DOUBLE-STRUCK LAM .. ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
return true;
- if (codePoint >= 0x20000 && codePoint <= 0x2a6d6)
+ if (codePoint >= 0x20000 && codePoint <= 0x2A6D6) // CJK Ideograph Extension B .. CJK Ideograph Extension B
return true;
- if (codePoint >= 0x2a700 && codePoint <= 0x2b734)
+ if (codePoint >= 0x2A700 && codePoint <= 0x2B734) // CJK Ideograph Extension C .. CJK Ideograph Extension C
return true;
- if (codePoint >= 0x2b740 && codePoint <= 0x2b81d)
+ if (codePoint >= 0x2B740 && codePoint <= 0x2B81D) // CJK Ideograph Extension D .. CJK Ideograph Extension D
return true;
- if (codePoint >= 0x2b820 && codePoint <= 0x2cea1)
+ if (codePoint >= 0x2B820 && codePoint <= 0x2CEA1) // CJK Ideograph Extension E .. CJK Ideograph Extension E
return true;
- if (codePoint >= 0x2ceb0 && codePoint <= 0x2ebe0)
+ if (codePoint >= 0x2CEB0 && codePoint <= 0x2EBE0) // CJK Ideograph Extension F .. CJK Ideograph Extension F
return true;
- if (codePoint >= 0x2f800 && codePoint <= 0x2fa1d)
+ if (codePoint >= 0x2F800 && codePoint <= 0x2FA1D) // CJK COMPATIBILITY IDEOGRAPH-2F800 .. CJK COMPATIBILITY IDEOGRAPH-2FA1D
return true;
- if (codePoint >= 0xe0100 && codePoint <= 0xe01ef)
+ if (codePoint >= 0xE0100 && codePoint <= 0xE01EF) // VARIATION SELECTOR-17 .. VARIATION SELECTOR-256
return true;
return false;
}
+
+bool
+js::unicode::CanUpperCaseSpecialCasing(char16_t ch)
+{
+ if (ch < 0x00DF || ch > 0xFB17)
+ return false;
+ if (ch <= 0x0587) {
+ // U+00DF LATIN SMALL LETTER SHARP S
+ // U+0149 LATIN SMALL LETTER N PRECEDED BY APOSTROPHE (LATIN SMALL LETTER APOSTROPHE N)
+ // U+01F0 LATIN SMALL LETTER J WITH CARON (LATIN SMALL LETTER J HACEK)
+ // U+0390 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER IOTA DIAERESIS TONOS)
+ // U+03B0 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER UPSILON DIAERESIS TONOS)
+ // U+0587 ARMENIAN SMALL LIGATURE ECH YIWN
+ return ch == 0x00DF || ch == 0x0149 || ch == 0x01F0 || ch == 0x0390 || ch == 0x03B0 ||
+ ch == 0x0587;
+ }
+ if (ch <= 0x1FFC) {
+ // U+1E96 LATIN SMALL LETTER H WITH LINE BELOW .. U+1E9A LATIN SMALL LETTER A WITH RIGHT HALF RING
+ if (ch <= 0x1E9A)
+ return ch >= 0x1E96;
+ if (ch < 0x1F50)
+ return false;
+ // U+1F50 GREEK SMALL LETTER UPSILON WITH PSILI
+ // U+1F52 GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+ // U+1F54 GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+ // U+1F56 GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+ // U+1F80 GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI .. U+1FAF GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ // U+1FB2 GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI .. U+1FB4 GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+ // U+1FB6 GREEK SMALL LETTER ALPHA WITH PERISPOMENI .. U+1FB7 GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+ // U+1FBC GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+ // U+1FC2 GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI .. U+1FC4 GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+ // U+1FC6 GREEK SMALL LETTER ETA WITH PERISPOMENI .. U+1FC7 GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+ // U+1FCC GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+ // U+1FD2 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA .. U+1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+ // U+1FD6 GREEK SMALL LETTER IOTA WITH PERISPOMENI .. U+1FD7 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+ // U+1FE2 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA .. U+1FE4 GREEK SMALL LETTER RHO WITH PSILI
+ // U+1FE6 GREEK SMALL LETTER UPSILON WITH PERISPOMENI .. U+1FE7 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+ // U+1FF2 GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI .. U+1FF4 GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+ // U+1FF6 GREEK SMALL LETTER OMEGA WITH PERISPOMENI .. U+1FF7 GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+ // U+1FFC GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+ return ch == 0x1F50 || ch == 0x1F52 || ch == 0x1F54 || ch == 0x1F56 ||
+ (ch >= 0x1F80 && ch <= 0x1FAF) || (ch >= 0x1FB2 && ch <= 0x1FB4) ||
+ (ch >= 0x1FB6 && ch <= 0x1FB7) || ch == 0x1FBC || (ch >= 0x1FC2 && ch <= 0x1FC4) ||
+ (ch >= 0x1FC6 && ch <= 0x1FC7) || ch == 0x1FCC || (ch >= 0x1FD2 && ch <= 0x1FD3) ||
+ (ch >= 0x1FD6 && ch <= 0x1FD7) || (ch >= 0x1FE2 && ch <= 0x1FE4) ||
+ (ch >= 0x1FE6 && ch <= 0x1FE7) || (ch >= 0x1FF2 && ch <= 0x1FF4) ||
+ (ch >= 0x1FF6 && ch <= 0x1FF7) || ch == 0x1FFC;
+ }
+ if (ch < 0xFB00)
+ return false;
+ // U+FB00 LATIN SMALL LIGATURE FF .. U+FB06 LATIN SMALL LIGATURE ST
+ // U+FB13 ARMENIAN SMALL LIGATURE MEN NOW .. U+FB17 ARMENIAN SMALL LIGATURE MEN XEH
+ return (ch >= 0xFB00 && ch <= 0xFB06) || (ch >= 0xFB13 && ch <= 0xFB17);
+}
+
+size_t
+js::unicode::LengthUpperCaseSpecialCasing(char16_t ch)
+{
+ switch(ch) {
+ case 0x00DF: return 2; // LATIN SMALL LETTER SHARP S
+ case 0x0149: return 2; // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE (LATIN SMALL LETTER APOSTROPHE N)
+ case 0x01F0: return 2; // LATIN SMALL LETTER J WITH CARON (LATIN SMALL LETTER J HACEK)
+ case 0x0390: return 3; // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER IOTA DIAERESIS TONOS)
+ case 0x03B0: return 3; // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER UPSILON DIAERESIS TONOS)
+ case 0x0587: return 2; // ARMENIAN SMALL LIGATURE ECH YIWN
+ case 0x1E96: return 2; // LATIN SMALL LETTER H WITH LINE BELOW
+ case 0x1E97: return 2; // LATIN SMALL LETTER T WITH DIAERESIS
+ case 0x1E98: return 2; // LATIN SMALL LETTER W WITH RING ABOVE
+ case 0x1E99: return 2; // LATIN SMALL LETTER Y WITH RING ABOVE
+ case 0x1E9A: return 2; // LATIN SMALL LETTER A WITH RIGHT HALF RING
+ case 0x1F50: return 2; // GREEK SMALL LETTER UPSILON WITH PSILI
+ case 0x1F52: return 3; // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+ case 0x1F54: return 3; // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+ case 0x1F56: return 3; // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+ case 0x1F80: return 2; // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+ case 0x1F81: return 2; // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+ case 0x1F82: return 2; // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+ case 0x1F83: return 2; // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+ case 0x1F84: return 2; // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+ case 0x1F85: return 2; // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+ case 0x1F86: return 2; // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+ case 0x1F87: return 2; // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+ case 0x1F88: return 2; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+ case 0x1F89: return 2; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+ case 0x1F8A: return 2; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+ case 0x1F8B: return 2; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+ case 0x1F8C: return 2; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+ case 0x1F8D: return 2; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+ case 0x1F8E: return 2; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+ case 0x1F8F: return 2; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ case 0x1F90: return 2; // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+ case 0x1F91: return 2; // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+ case 0x1F92: return 2; // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+ case 0x1F93: return 2; // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+ case 0x1F94: return 2; // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+ case 0x1F95: return 2; // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+ case 0x1F96: return 2; // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+ case 0x1F97: return 2; // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+ case 0x1F98: return 2; // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+ case 0x1F99: return 2; // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+ case 0x1F9A: return 2; // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+ case 0x1F9B: return 2; // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+ case 0x1F9C: return 2; // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+ case 0x1F9D: return 2; // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+ case 0x1F9E: return 2; // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+ case 0x1F9F: return 2; // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ case 0x1FA0: return 2; // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+ case 0x1FA1: return 2; // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+ case 0x1FA2: return 2; // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+ case 0x1FA3: return 2; // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+ case 0x1FA4: return 2; // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+ case 0x1FA5: return 2; // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+ case 0x1FA6: return 2; // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+ case 0x1FA7: return 2; // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+ case 0x1FA8: return 2; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+ case 0x1FA9: return 2; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+ case 0x1FAA: return 2; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+ case 0x1FAB: return 2; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+ case 0x1FAC: return 2; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+ case 0x1FAD: return 2; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+ case 0x1FAE: return 2; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+ case 0x1FAF: return 2; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ case 0x1FB2: return 2; // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+ case 0x1FB3: return 2; // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+ case 0x1FB4: return 2; // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+ case 0x1FB6: return 2; // GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+ case 0x1FB7: return 3; // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+ case 0x1FBC: return 2; // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+ case 0x1FC2: return 2; // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+ case 0x1FC3: return 2; // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+ case 0x1FC4: return 2; // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+ case 0x1FC6: return 2; // GREEK SMALL LETTER ETA WITH PERISPOMENI
+ case 0x1FC7: return 3; // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+ case 0x1FCC: return 2; // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+ case 0x1FD2: return 3; // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+ case 0x1FD3: return 3; // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+ case 0x1FD6: return 2; // GREEK SMALL LETTER IOTA WITH PERISPOMENI
+ case 0x1FD7: return 3; // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+ case 0x1FE2: return 3; // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+ case 0x1FE3: return 3; // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+ case 0x1FE4: return 2; // GREEK SMALL LETTER RHO WITH PSILI
+ case 0x1FE6: return 2; // GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+ case 0x1FE7: return 3; // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+ case 0x1FF2: return 2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+ case 0x1FF3: return 2; // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+ case 0x1FF4: return 2; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+ case 0x1FF6: return 2; // GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+ case 0x1FF7: return 3; // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+ case 0x1FFC: return 2; // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+ case 0xFB00: return 2; // LATIN SMALL LIGATURE FF
+ case 0xFB01: return 2; // LATIN SMALL LIGATURE FI
+ case 0xFB02: return 2; // LATIN SMALL LIGATURE FL
+ case 0xFB03: return 3; // LATIN SMALL LIGATURE FFI
+ case 0xFB04: return 3; // LATIN SMALL LIGATURE FFL
+ case 0xFB05: return 2; // LATIN SMALL LIGATURE LONG S T
+ case 0xFB06: return 2; // LATIN SMALL LIGATURE ST
+ case 0xFB13: return 2; // ARMENIAN SMALL LIGATURE MEN NOW
+ case 0xFB14: return 2; // ARMENIAN SMALL LIGATURE MEN ECH
+ case 0xFB15: return 2; // ARMENIAN SMALL LIGATURE MEN INI
+ case 0xFB16: return 2; // ARMENIAN SMALL LIGATURE VEW NOW
+ case 0xFB17: return 2; // ARMENIAN SMALL LIGATURE MEN XEH
+ }
+
+ MOZ_ASSERT_UNREACHABLE("Bad character input.");
+ return 0;
+}
+
+void
+js::unicode::AppendUpperCaseSpecialCasing(char16_t ch, char16_t* elements, size_t* index)
+{
+ switch(ch) {
+ case 0x00DF: // LATIN SMALL LETTER SHARP S
+ elements[(*index)++] = 0x0053; // LATIN CAPITAL LETTER S
+ elements[(*index)++] = 0x0053; // LATIN CAPITAL LETTER S
+ return;
+ case 0x0149: // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE (LATIN SMALL LETTER APOSTROPHE N)
+ elements[(*index)++] = 0x02BC; // MODIFIER LETTER APOSTROPHE
+ elements[(*index)++] = 0x004E; // LATIN CAPITAL LETTER N
+ return;
+ case 0x01F0: // LATIN SMALL LETTER J WITH CARON (LATIN SMALL LETTER J HACEK)
+ elements[(*index)++] = 0x004A; // LATIN CAPITAL LETTER J
+ elements[(*index)++] = 0x030C; // COMBINING CARON (NON-SPACING HACEK)
+ return;
+ case 0x0390: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER IOTA DIAERESIS TONOS)
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS)
+ elements[(*index)++] = 0x0301; // COMBINING ACUTE ACCENT (NON-SPACING ACUTE)
+ return;
+ case 0x03B0: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS (GREEK SMALL LETTER UPSILON DIAERESIS TONOS)
+ elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON
+ elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS)
+ elements[(*index)++] = 0x0301; // COMBINING ACUTE ACCENT (NON-SPACING ACUTE)
+ return;
+ case 0x0587: // ARMENIAN SMALL LIGATURE ECH YIWN
+ elements[(*index)++] = 0x0535; // ARMENIAN CAPITAL LETTER ECH
+ elements[(*index)++] = 0x0552; // ARMENIAN CAPITAL LETTER YIWN
+ return;
+ case 0x1E96: // LATIN SMALL LETTER H WITH LINE BELOW
+ elements[(*index)++] = 0x0048; // LATIN CAPITAL LETTER H
+ elements[(*index)++] = 0x0331; // COMBINING MACRON BELOW (NON-SPACING MACRON BELOW)
+ return;
+ case 0x1E97: // LATIN SMALL LETTER T WITH DIAERESIS
+ elements[(*index)++] = 0x0054; // LATIN CAPITAL LETTER T
+ elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS)
+ return;
+ case 0x1E98: // LATIN SMALL LETTER W WITH RING ABOVE
+ elements[(*index)++] = 0x0057; // LATIN CAPITAL LETTER W
+ elements[(*index)++] = 0x030A; // COMBINING RING ABOVE (NON-SPACING RING ABOVE)
+ return;
+ case 0x1E99: // LATIN SMALL LETTER Y WITH RING ABOVE
+ elements[(*index)++] = 0x0059; // LATIN CAPITAL LETTER Y
+ elements[(*index)++] = 0x030A; // COMBINING RING ABOVE (NON-SPACING RING ABOVE)
+ return;
+ case 0x1E9A: // LATIN SMALL LETTER A WITH RIGHT HALF RING
+ elements[(*index)++] = 0x0041; // LATIN CAPITAL LETTER A
+ elements[(*index)++] = 0x02BE; // MODIFIER LETTER RIGHT HALF RING
+ return;
+ case 0x1F50: // GREEK SMALL LETTER UPSILON WITH PSILI
+ elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON
+ elements[(*index)++] = 0x0313; // COMBINING COMMA ABOVE (NON-SPACING COMMA ABOVE)
+ return;
+ case 0x1F52: // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+ elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON
+ elements[(*index)++] = 0x0313; // COMBINING COMMA ABOVE (NON-SPACING COMMA ABOVE)
+ elements[(*index)++] = 0x0300; // COMBINING GRAVE ACCENT (NON-SPACING GRAVE)
+ return;
+ case 0x1F54: // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+ elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON
+ elements[(*index)++] = 0x0313; // COMBINING COMMA ABOVE (NON-SPACING COMMA ABOVE)
+ elements[(*index)++] = 0x0301; // COMBINING ACUTE ACCENT (NON-SPACING ACUTE)
+ return;
+ case 0x1F56: // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+ elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON
+ elements[(*index)++] = 0x0313; // COMBINING COMMA ABOVE (NON-SPACING COMMA ABOVE)
+ elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI
+ return;
+ case 0x1F80: // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F08; // GREEK CAPITAL LETTER ALPHA WITH PSILI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F81: // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F09; // GREEK CAPITAL LETTER ALPHA WITH DASIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F82: // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F0A; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F83: // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F0B; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F84: // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F0C; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F85: // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F0D; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F86: // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F0E; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F87: // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F0F; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F88: // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F08; // GREEK CAPITAL LETTER ALPHA WITH PSILI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F89: // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F09; // GREEK CAPITAL LETTER ALPHA WITH DASIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F8A: // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F0A; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F8B: // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F0B; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F8C: // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F0C; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F8D: // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F0D; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F8E: // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F0E; // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F8F: // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F0F; // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F90: // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F28; // GREEK CAPITAL LETTER ETA WITH PSILI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F91: // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F29; // GREEK CAPITAL LETTER ETA WITH DASIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F92: // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F2A; // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F93: // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F2B; // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F94: // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F2C; // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F95: // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F2D; // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F96: // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F2E; // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F97: // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F2F; // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F98: // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F28; // GREEK CAPITAL LETTER ETA WITH PSILI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F99: // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F29; // GREEK CAPITAL LETTER ETA WITH DASIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F9A: // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F2A; // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F9B: // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F2B; // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F9C: // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F2C; // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F9D: // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F2D; // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F9E: // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F2E; // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1F9F: // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F2F; // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FA0: // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F68; // GREEK CAPITAL LETTER OMEGA WITH PSILI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FA1: // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F69; // GREEK CAPITAL LETTER OMEGA WITH DASIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FA2: // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F6A; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FA3: // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F6B; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FA4: // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F6C; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FA5: // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F6D; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FA6: // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F6E; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FA7: // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1F6F; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FA8: // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F68; // GREEK CAPITAL LETTER OMEGA WITH PSILI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FA9: // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F69; // GREEK CAPITAL LETTER OMEGA WITH DASIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FAA: // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F6A; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FAB: // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F6B; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FAC: // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F6C; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FAD: // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F6D; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FAE: // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F6E; // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FAF: // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+ elements[(*index)++] = 0x1F6F; // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FB2: // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1FBA; // GREEK CAPITAL LETTER ALPHA WITH VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FB3: // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+ elements[(*index)++] = 0x0391; // GREEK CAPITAL LETTER ALPHA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FB4: // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x0386; // GREEK CAPITAL LETTER ALPHA WITH TONOS (GREEK CAPITAL LETTER ALPHA TONOS)
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FB6: // GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+ elements[(*index)++] = 0x0391; // GREEK CAPITAL LETTER ALPHA
+ elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI
+ return;
+ case 0x1FB7: // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x0391; // GREEK CAPITAL LETTER ALPHA
+ elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FBC: // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+ elements[(*index)++] = 0x0391; // GREEK CAPITAL LETTER ALPHA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FC2: // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1FCA; // GREEK CAPITAL LETTER ETA WITH VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FC3: // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+ elements[(*index)++] = 0x0397; // GREEK CAPITAL LETTER ETA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FC4: // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x0389; // GREEK CAPITAL LETTER ETA WITH TONOS (GREEK CAPITAL LETTER ETA TONOS)
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FC6: // GREEK SMALL LETTER ETA WITH PERISPOMENI
+ elements[(*index)++] = 0x0397; // GREEK CAPITAL LETTER ETA
+ elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI
+ return;
+ case 0x1FC7: // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x0397; // GREEK CAPITAL LETTER ETA
+ elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FCC: // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+ elements[(*index)++] = 0x0397; // GREEK CAPITAL LETTER ETA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FD2: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS)
+ elements[(*index)++] = 0x0300; // COMBINING GRAVE ACCENT (NON-SPACING GRAVE)
+ return;
+ case 0x1FD3: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS)
+ elements[(*index)++] = 0x0301; // COMBINING ACUTE ACCENT (NON-SPACING ACUTE)
+ return;
+ case 0x1FD6: // GREEK SMALL LETTER IOTA WITH PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI
+ return;
+ case 0x1FD7: // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS)
+ elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI
+ return;
+ case 0x1FE2: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+ elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON
+ elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS)
+ elements[(*index)++] = 0x0300; // COMBINING GRAVE ACCENT (NON-SPACING GRAVE)
+ return;
+ case 0x1FE3: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+ elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON
+ elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS)
+ elements[(*index)++] = 0x0301; // COMBINING ACUTE ACCENT (NON-SPACING ACUTE)
+ return;
+ case 0x1FE4: // GREEK SMALL LETTER RHO WITH PSILI
+ elements[(*index)++] = 0x03A1; // GREEK CAPITAL LETTER RHO
+ elements[(*index)++] = 0x0313; // COMBINING COMMA ABOVE (NON-SPACING COMMA ABOVE)
+ return;
+ case 0x1FE6: // GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+ elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON
+ elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI
+ return;
+ case 0x1FE7: // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+ elements[(*index)++] = 0x03A5; // GREEK CAPITAL LETTER UPSILON
+ elements[(*index)++] = 0x0308; // COMBINING DIAERESIS (NON-SPACING DIAERESIS)
+ elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI
+ return;
+ case 0x1FF2: // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x1FFA; // GREEK CAPITAL LETTER OMEGA WITH VARIA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FF3: // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+ elements[(*index)++] = 0x03A9; // GREEK CAPITAL LETTER OMEGA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FF4: // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x038F; // GREEK CAPITAL LETTER OMEGA WITH TONOS (GREEK CAPITAL LETTER OMEGA TONOS)
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FF6: // GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+ elements[(*index)++] = 0x03A9; // GREEK CAPITAL LETTER OMEGA
+ elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI
+ return;
+ case 0x1FF7: // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+ elements[(*index)++] = 0x03A9; // GREEK CAPITAL LETTER OMEGA
+ elements[(*index)++] = 0x0342; // COMBINING GREEK PERISPOMENI
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0x1FFC: // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+ elements[(*index)++] = 0x03A9; // GREEK CAPITAL LETTER OMEGA
+ elements[(*index)++] = 0x0399; // GREEK CAPITAL LETTER IOTA
+ return;
+ case 0xFB00: // LATIN SMALL LIGATURE FF
+ elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F
+ elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F
+ return;
+ case 0xFB01: // LATIN SMALL LIGATURE FI
+ elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F
+ elements[(*index)++] = 0x0049; // LATIN CAPITAL LETTER I
+ return;
+ case 0xFB02: // LATIN SMALL LIGATURE FL
+ elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F
+ elements[(*index)++] = 0x004C; // LATIN CAPITAL LETTER L
+ return;
+ case 0xFB03: // LATIN SMALL LIGATURE FFI
+ elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F
+ elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F
+ elements[(*index)++] = 0x0049; // LATIN CAPITAL LETTER I
+ return;
+ case 0xFB04: // LATIN SMALL LIGATURE FFL
+ elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F
+ elements[(*index)++] = 0x0046; // LATIN CAPITAL LETTER F
+ elements[(*index)++] = 0x004C; // LATIN CAPITAL LETTER L
+ return;
+ case 0xFB05: // LATIN SMALL LIGATURE LONG S T
+ elements[(*index)++] = 0x0053; // LATIN CAPITAL LETTER S
+ elements[(*index)++] = 0x0054; // LATIN CAPITAL LETTER T
+ return;
+ case 0xFB06: // LATIN SMALL LIGATURE ST
+ elements[(*index)++] = 0x0053; // LATIN CAPITAL LETTER S
+ elements[(*index)++] = 0x0054; // LATIN CAPITAL LETTER T
+ return;
+ case 0xFB13: // ARMENIAN SMALL LIGATURE MEN NOW
+ elements[(*index)++] = 0x0544; // ARMENIAN CAPITAL LETTER MEN
+ elements[(*index)++] = 0x0546; // ARMENIAN CAPITAL LETTER NOW
+ return;
+ case 0xFB14: // ARMENIAN SMALL LIGATURE MEN ECH
+ elements[(*index)++] = 0x0544; // ARMENIAN CAPITAL LETTER MEN
+ elements[(*index)++] = 0x0535; // ARMENIAN CAPITAL LETTER ECH
+ return;
+ case 0xFB15: // ARMENIAN SMALL LIGATURE MEN INI
+ elements[(*index)++] = 0x0544; // ARMENIAN CAPITAL LETTER MEN
+ elements[(*index)++] = 0x053B; // ARMENIAN CAPITAL LETTER INI
+ return;
+ case 0xFB16: // ARMENIAN SMALL LIGATURE VEW NOW
+ elements[(*index)++] = 0x054E; // ARMENIAN CAPITAL LETTER VEW
+ elements[(*index)++] = 0x0546; // ARMENIAN CAPITAL LETTER NOW
+ return;
+ case 0xFB17: // ARMENIAN SMALL LIGATURE MEN XEH
+ elements[(*index)++] = 0x0544; // ARMENIAN CAPITAL LETTER MEN
+ elements[(*index)++] = 0x053D; // ARMENIAN CAPITAL LETTER XEH
+ return;
+ }
+
+ MOZ_ASSERT_UNREACHABLE("Bad character input.");
+ return;
+}
diff --git a/js/src/vm/Unicode.h b/js/src/vm/Unicode.h
index d8807a4deb..3f87e47fb6 100644
--- a/js/src/vm/Unicode.h
+++ b/js/src/vm/Unicode.h
@@ -62,8 +62,16 @@ namespace CharFlag {
const uint8_t UNICODE_ID_CONTINUE = UNICODE_ID_START + UNICODE_ID_CONTINUE_ONLY;
}
+const char16_t NO_BREAK_SPACE = 0x00A0;
+const char16_t MICRO_SIGN = 0x00B5;
+const char16_t LATIN_SMALL_LETTER_SHARP_S = 0x00DF;
+const char16_t LATIN_SMALL_LETTER_Y_WITH_DIAERESIS = 0x00FF;
+const char16_t LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE = 0x0130;
+const char16_t COMBINING_DOT_ABOVE = 0x0307;
+const char16_t GREEK_CAPITAL_LETTER_SIGMA = 0x03A3;
+const char16_t GREEK_SMALL_LETTER_FINAL_SIGMA = 0x03C2;
+const char16_t GREEK_SMALL_LETTER_SIGMA = 0x03C3;
const char16_t BYTE_ORDER_MARK2 = 0xFFFE;
-const char16_t NO_BREAK_SPACE = 0x00A0;
const char16_t LeadSurrogateMin = 0xD800;
const char16_t LeadSurrogateMax = 0xDBFF;
@@ -239,6 +247,10 @@ IsSpaceOrBOM2(char16_t ch)
return CharInfo(ch).isSpace();
}
+/*
+ * Returns the simple upper case mapping (see CanUpperCaseSpecialCasing for
+ * details) of the given UTF-16 code unit.
+ */
inline char16_t
ToUpperCase(char16_t ch)
{
@@ -253,6 +265,10 @@ ToUpperCase(char16_t ch)
return uint16_t(ch) + info.upperCase;
}
+/*
+ * Returns the simple lower case mapping (see CanUpperCaseSpecialCasing for
+ * details) of the given UTF-16 code unit.
+ */
inline char16_t
ToLowerCase(char16_t ch)
{
@@ -330,6 +346,43 @@ ToLowerCaseNonBMPTrail(char16_t lead, char16_t trail)
}
/*
+ * Returns true if the given UTF-16 code unit has a language-independent,
+ * unconditional or conditional special upper case mapping.
+ *
+ * Unicode defines two case mapping modes:
+ * 1. "simple case mappings" for one-to-one mappings which are independent of
+ * context and language (defined in UnicodeData.txt).
+ * 2. "special case mappings" for mappings which can increase or decrease the
+ * string length; or are dependent on context or locale (defined in
+ * SpecialCasing.txt).
+ *
+ * The CanUpperCase() method defined above only supports simple case mappings.
+ * In order to support the full case mappings of all Unicode characters,
+ * callers need to check this method in addition to CanUpperCase().
+ *
+ * NOTE: All special upper case mappings are unconditional in Unicode 9.
+ */
+bool
+CanUpperCaseSpecialCasing(char16_t ch);
+
+/*
+ * Returns the length of the upper case mapping of |ch|.
+ *
+ * This function asserts if |ch| doesn't have a special upper case mapping.
+ */
+size_t
+LengthUpperCaseSpecialCasing(char16_t ch);
+
+/*
+ * Appends the upper case mapping of |ch| to the given output buffer,
+ * starting at the provided index.
+ *
+ * This function asserts if |ch| doesn't have a special upper case mapping.
+ */
+void
+AppendUpperCaseSpecialCasing(char16_t ch, char16_t* elements, size_t* index);
+
+/*
* For a codepoint C, CodepointsWithSameUpperCaseInfo stores three offsets
* from C to up to three codepoints with same uppercase (no codepoint in
* UnicodeData.txt has more than three such codepoints).
@@ -504,7 +557,7 @@ UTF16Encode(uint32_t codePoint, char16_t* lead, char16_t* trail)
*trail = TrailSurrogate(codePoint);
}
-static inline void
+inline void
UTF16Encode(uint32_t codePoint, char16_t* elements, unsigned* index)
{
if (!IsSupplementary(codePoint)) {
diff --git a/js/src/vm/UnicodeNonBMP.h b/js/src/vm/UnicodeNonBMP.h
index 687c1851e6..8b9fb2a4f2 100644
--- a/js/src/vm/UnicodeNonBMP.h
+++ b/js/src/vm/UnicodeNonBMP.h
@@ -19,6 +19,12 @@
// DIFF: the difference between the code point in the range and
// converted code point
+// U+10400 DESERET CAPITAL LETTER LONG I .. U+10427 DESERET CAPITAL LETTER EW
+// U+104B0 OSAGE CAPITAL LETTER A .. U+104D3 OSAGE CAPITAL LETTER ZHA
+// U+10C80 OLD HUNGARIAN CAPITAL LETTER A .. U+10CB2 OLD HUNGARIAN CAPITAL LETTER US
+// U+118A0 WARANG CITI CAPITAL LETTER NGAA .. U+118BF WARANG CITI CAPITAL LETTER VIYO
+// U+16E40 MEDEFAIDRIN CAPITAL LETTER M .. U+16E5F MEDEFAIDRIN CAPITAL LETTER Y
+// U+1E900 ADLAM CAPITAL LETTER ALIF .. U+1E921 ADLAM CAPITAL LETTER SHA
#define FOR_EACH_NON_BMP_LOWERCASE(macro) \
macro(0x10400, 0x10427, 0xd801, 0xdc00, 0xdc27, 40) \
macro(0x104b0, 0x104d3, 0xd801, 0xdcb0, 0xdcd3, 40) \
@@ -27,6 +33,12 @@
macro(0x16e40, 0x16e5f, 0xd81b, 0xde40, 0xde5f, 32) \
macro(0x1e900, 0x1e921, 0xd83a, 0xdd00, 0xdd21, 34)
+// U+10428 DESERET SMALL LETTER LONG I .. U+1044F DESERET SMALL LETTER EW
+// U+104D8 OSAGE SMALL LETTER A .. U+104FB OSAGE SMALL LETTER ZHA
+// U+10CC0 OLD HUNGARIAN SMALL LETTER A .. U+10CF2 OLD HUNGARIAN SMALL LETTER US
+// U+118C0 WARANG CITI SMALL LETTER NGAA .. U+118DF WARANG CITI SMALL LETTER VIYO
+// U+16E60 MEDEFAIDRIN SMALL LETTER M .. U+16E7F MEDEFAIDRIN SMALL LETTER Y
+// U+1E922 ADLAM SMALL LETTER ALIF .. U+1E943 ADLAM SMALL LETTER SHA
#define FOR_EACH_NON_BMP_UPPERCASE(macro) \
macro(0x10428, 0x1044f, 0xd801, 0xdc28, 0xdc4f, -40) \
macro(0x104d8, 0x104fb, 0xd801, 0xdcd8, 0xdcfb, -40) \
@@ -35,6 +47,12 @@
macro(0x16e60, 0x16e7f, 0xd81b, 0xde60, 0xde7f, -32) \
macro(0x1e922, 0x1e943, 0xd83a, 0xdd22, 0xdd43, -34)
+// U+10400 DESERET CAPITAL LETTER LONG I .. U+10427 DESERET CAPITAL LETTER EW
+// U+104B0 OSAGE CAPITAL LETTER A .. U+104D3 OSAGE CAPITAL LETTER ZHA
+// U+10C80 OLD HUNGARIAN CAPITAL LETTER A .. U+10CB2 OLD HUNGARIAN CAPITAL LETTER US
+// U+118A0 WARANG CITI CAPITAL LETTER NGAA .. U+118BF WARANG CITI CAPITAL LETTER VIYO
+// U+16E40 MEDEFAIDRIN CAPITAL LETTER M .. U+16E5F MEDEFAIDRIN CAPITAL LETTER Y
+// U+1E900 ADLAM CAPITAL LETTER ALIF .. U+1E921 ADLAM CAPITAL LETTER SHA
#define FOR_EACH_NON_BMP_CASE_FOLDING(macro) \
macro(0x10400, 0x10427, 0xd801, 0xdc00, 0xdc27, 40) \
macro(0x104b0, 0x104d3, 0xd801, 0xdcb0, 0xdcd3, 40) \
@@ -43,6 +61,12 @@
macro(0x16e40, 0x16e5f, 0xd81b, 0xde40, 0xde5f, 32) \
macro(0x1e900, 0x1e921, 0xd83a, 0xdd00, 0xdd21, 34)
+// U+10428 DESERET SMALL LETTER LONG I .. U+1044F DESERET SMALL LETTER EW
+// U+104D8 OSAGE SMALL LETTER A .. U+104FB OSAGE SMALL LETTER ZHA
+// U+10CC0 OLD HUNGARIAN SMALL LETTER A .. U+10CF2 OLD HUNGARIAN SMALL LETTER US
+// U+118C0 WARANG CITI SMALL LETTER NGAA .. U+118DF WARANG CITI SMALL LETTER VIYO
+// U+16E60 MEDEFAIDRIN SMALL LETTER M .. U+16E7F MEDEFAIDRIN SMALL LETTER Y
+// U+1E922 ADLAM SMALL LETTER ALIF .. U+1E943 ADLAM SMALL LETTER SHA
#define FOR_EACH_NON_BMP_REV_CASE_FOLDING(macro) \
macro(0x10428, 0x1044f, 0xd801, 0xdc28, 0xdc4f, -40) \
macro(0x104d8, 0x104fb, 0xd801, 0xdcd8, 0xdcfb, -40) \
diff --git a/js/src/vm/make_unicode.py b/js/src/vm/make_unicode.py
index b55b1940e4..8568ccb64c 100755
--- a/js/src/vm/make_unicode.py
+++ b/js/src/vm/make_unicode.py
@@ -26,6 +26,18 @@ import re
import os
import sys
from contextlib import closing
+from functools import partial
+from itertools import chain, groupby, ifilter, imap, izip_longest, tee
+from operator import is_not, itemgetter
+
+class codepoint_dict(dict):
+ def name(self, code_point):
+ (_, _, name, alias) = self[code_point]
+ return '{}{}'.format(name, (' (' + alias + ')' if alias else ''))
+
+ def full_name(self, code_point):
+ (_, _, name, alias) = self[code_point]
+ return 'U+{:04X} {}{}'.format(code_point, name, (' (' + alias + ')' if alias else ''))
# ECMAScript 2016
# §11.2 White Space
@@ -132,10 +144,32 @@ def read_derived_core_properties(derived_core_properties):
for char in range(int(start, 16), int(end, 16) + 1):
yield (char, char_property)
+def read_special_casing(special_casing):
+ # Format:
+ # <code>; <lower>; <title>; <upper>; (<condition_list>;)? # <comment>
+ for line in special_casing:
+ if line == '\n' or line.startswith('#'):
+ continue
+ row = line.split('#')[0].split(';')
+ code = int(row[0].strip(), 16)
+ lower = row[1].strip()
+ lower = [int(c, 16) for c in lower.split(' ')] if lower else []
+ upper = row[3].strip()
+ upper = [int(c, 16) for c in upper.split(' ')] if upper else []
+ languages = []
+ contexts = []
+ condition = row[4].strip()
+ if condition:
+ for cond in condition.split(' '):
+ if cond[0].islower():
+ languages.append(cond)
+ else:
+ contexts.append(cond)
+ pass
+ yield (code, lower, upper, languages, contexts)
+
def int_ranges(ints):
""" Yields consecutive ranges (inclusive) from integer values. """
- from itertools import tee, izip_longest
-
(a, b) = tee(sorted(ints))
start = next(b)
for (curr, succ) in izip_longest(a, b):
@@ -153,7 +187,7 @@ def utf16_encode(code):
return lead, trail
-def make_non_bmp_convert_macro(out_file, name, convert_map):
+def make_non_bmp_convert_macro(out_file, name, convert_map, codepoint_table):
# Find continuous range in convert_map.
convert_list = []
entry = None
@@ -179,6 +213,7 @@ def make_non_bmp_convert_macro(out_file, name, convert_map):
# Generate macro call for each range.
lines = []
+ comment = []
for entry in convert_list:
from_code = entry['code']
to_code = entry['code'] + entry['length'] - 1
@@ -190,29 +225,15 @@ def make_non_bmp_convert_macro(out_file, name, convert_map):
lines.append(' macro(0x{:x}, 0x{:x}, 0x{:x}, 0x{:x}, 0x{:x}, {:d})'.format(
from_code, to_code, lead, from_trail, to_trail, diff))
+ comment.append('// {} .. {}'.format(codepoint_table.full_name(from_code),
+ codepoint_table.full_name(to_code)))
+ out_file.write('\n'.join(comment))
+ out_file.write('\n')
out_file.write('#define FOR_EACH_NON_BMP_{}(macro) \\\n'.format(name))
out_file.write(' \\\n'.join(lines))
out_file.write('\n')
-def for_each_non_bmp_group(group_set):
- # Find continuous range in group_set.
- group_list = []
- entry = None
- for code in sorted(group_set.keys()):
- if entry and code == entry['code'] + entry['length']:
- entry['length'] += 1
- continue
-
- entry = {
- 'code': code,
- 'length': 1
- }
- group_list.append(entry)
-
- for entry in group_list:
- yield (entry['code'], entry['code'] + entry['length'] - 1)
-
def process_derived_core_properties(derived_core_properties):
id_start = set()
id_continue = set()
@@ -236,7 +257,7 @@ def process_unicode_data(unicode_data, derived_core_properties):
same_upper_cache = {same_upper_dummy: 0}
same_upper_index = [0] * (MAX_BMP + 1)
- test_table = {}
+ codepoint_table = codepoint_dict()
test_space_table = []
non_bmp_lower_map = {}
@@ -254,15 +275,9 @@ def process_unicode_data(unicode_data, derived_core_properties):
alias = row[-5]
uppercase = row[-3]
lowercase = row[-2]
- flags = 0
if uppercase:
upper = int(uppercase, 16)
-
- if upper not in same_upper_map:
- same_upper_map[upper] = [code]
- else:
- same_upper_map[upper].append(code)
else:
upper = code
@@ -271,6 +286,8 @@ def process_unicode_data(unicode_data, derived_core_properties):
else:
lower = code
+ codepoint_table[code] = (upper, lower, name, alias)
+
if code > MAX_BMP:
if code != lower:
non_bmp_lower_map[code] = lower
@@ -285,6 +302,16 @@ def process_unicode_data(unicode_data, derived_core_properties):
non_bmp_id_cont_set[code] = 1
continue
+ assert lower <= MAX_BMP and upper <= MAX_BMP
+
+ if code != upper:
+ if upper not in same_upper_map:
+ same_upper_map[upper] = [code]
+ else:
+ same_upper_map[upper].append(code)
+
+ flags = 0
+
# we combine whitespace and lineterminators because in pratice we don't need them separated
if category == 'Zs' or code in whitespace or code in line_terminator:
flags |= FLAG_SPACE
@@ -298,8 +325,6 @@ def process_unicode_data(unicode_data, derived_core_properties):
elif code in id_continue or code in compatibility_identifier_part:
flags |= FLAG_UNICODE_ID_CONTINUE_ONLY
- test_table[code] = (upper, lower, name, alias)
-
up_d = upper - code
low_d = lower - code
@@ -319,12 +344,12 @@ def process_unicode_data(unicode_data, derived_core_properties):
index[code] = i
for code in range(0, MAX_BMP + 1):
- entry = test_table.get(code)
+ entry = codepoint_table.get(code)
if not entry:
continue
- (upper, lower, name, alias) = entry
+ (upper, _, _, _) = entry
if upper not in same_upper_map:
continue
@@ -354,7 +379,7 @@ def process_unicode_data(unicode_data, derived_core_properties):
non_bmp_lower_map, non_bmp_upper_map,
non_bmp_space_set,
non_bmp_id_start_set, non_bmp_id_cont_set,
- test_table, test_space_table,
+ codepoint_table, test_space_table,
)
def process_case_folding(case_folding):
@@ -438,9 +463,149 @@ def process_case_folding(case_folding):
folding_tests
)
+def process_special_casing(special_casing, table, index):
+ # Unconditional special casing.
+ unconditional_tolower = {}
+ unconditional_toupper = {}
+
+ # Conditional special casing, language independent.
+ conditional_tolower = {}
+ conditional_toupper = {}
+
+ # Conditional special casing, language dependent.
+ lang_conditional_tolower = {}
+ lang_conditional_toupper = {}
+
+ def caseInfo(code):
+ (upper, lower, flags) = table[index[code]]
+ return ((code + lower) & 0xffff, (code + upper) & 0xffff)
+
+ for (code, lower, upper, languages, contexts) in read_special_casing(special_casing):
+ assert code <= MAX_BMP, 'Unexpected character outside of BMP: %s' % code
+ assert len(languages) <= 1, 'Expected zero or one language ids: %s' % languages
+ assert len(contexts) <= 1, 'Expected zero or one casing contexts: %s' % languages
+
+ (default_lower, default_upper) = caseInfo(code)
+ special_lower = len(lower) != 1 or lower[0] != default_lower
+ special_upper = len(upper) != 1 or upper[0] != default_upper
+
+ # Invariant: If |code| has casing per UnicodeData.txt, then it also has
+ # casing rules in SpecialCasing.txt.
+ assert code == default_lower or len(lower) != 1 or code != lower[0]
+ assert code == default_upper or len(upper) != 1 or code != upper[0]
+
+ language = languages[0] if languages else None
+ context = contexts[0] if contexts else None
+
+ if not language and not context:
+ if special_lower:
+ unconditional_tolower[code] = lower
+ if special_upper:
+ unconditional_toupper[code] = upper
+ elif not language and context:
+ if special_lower:
+ conditional_tolower[code] = (lower, context)
+ if special_upper:
+ conditional_toupper[code] = (upper, context)
+ else:
+ if language not in lang_conditional_tolower:
+ lang_conditional_tolower[language] = {}
+ lang_conditional_toupper[language] = {}
+ if special_lower:
+ lang_conditional_tolower[language][code] = (lower, context)
+ if special_upper:
+ lang_conditional_toupper[language][code] = (upper, context)
+
+ # Certain special casing rules are inlined in jsstr.cpp, ensure these cases
+ # still match the current SpecialCasing.txt file.
+ def lowerCase(code):
+ (lower, _) = caseInfo(code)
+ return lower
+
+ def upperCase(code):
+ (_, upper) = caseInfo(code)
+ return upper
+
+ def ascii(char_dict):
+ return ifilter(lambda ch: ch <= 0x7f, char_dict.iterkeys())
+
+ def latin1(char_dict):
+ return ifilter(lambda ch: ch <= 0xff, char_dict.iterkeys())
+
+ def is_empty(iterable):
+ return not any(True for _ in iterable)
+
+ def is_equals(iter1, iter2):
+ return all(x == y for (x, y) in izip_longest(iter1, iter2))
+
+ # Ensure no ASCII characters have special case mappings.
+ assert is_empty(ascii(unconditional_tolower))
+ assert is_empty(ascii(unconditional_toupper))
+ assert is_empty(ascii(conditional_tolower))
+ assert is_empty(ascii(conditional_toupper))
+
+ # Ensure no Latin1 characters have special lower case mappings.
+ assert is_empty(latin1(unconditional_tolower))
+ assert is_empty(latin1(conditional_tolower))
+
+ # Ensure no Latin1 characters have conditional special upper case mappings.
+ assert is_empty(latin1(conditional_toupper))
+
+ # Ensure U+00DF is the only Latin1 character with a special upper case mapping.
+ assert is_equals([0x00DF], latin1(unconditional_toupper))
+
+ # Ensure U+0130 is the only character with a special lower case mapping.
+ assert is_equals([0x0130], unconditional_tolower)
+
+ # Ensure no characters have language independent conditional upper case mappings.
+ assert is_empty(conditional_toupper)
+
+ # Ensure U+03A3 is the only character with language independent conditional lower case mapping.
+ assert is_equals([0x03A3], conditional_tolower)
+
+ # Verify U+0130 and U+03A3 have simple lower case mappings.
+ assert all(ch != lowerCase(ch) for ch in [0x0130, 0x03A3])
+
+ # Ensure Azeri, Lithuanian, and Turkish are the only languages with conditional case mappings.
+ assert is_equals(["az", "lt", "tr"], sorted(lang_conditional_tolower.iterkeys()))
+ assert is_equals(["az", "lt", "tr"], sorted(lang_conditional_toupper.iterkeys()))
+
+ # Maximum case mapping length is three characters.
+ itervals = lambda d: d.itervalues()
+ assert max(imap(len, chain(
+ itervals(unconditional_tolower),
+ itervals(unconditional_toupper),
+ imap(itemgetter(0), itervals(conditional_tolower)),
+ imap(itemgetter(0), itervals(conditional_toupper)),
+ imap(itemgetter(0), chain.from_iterable(imap(itervals, itervals(lang_conditional_tolower)))),
+ imap(itemgetter(0), chain.from_iterable(imap(itervals, itervals(lang_conditional_toupper)))),
+ ))) <= 3
+
+ # Ensure all case mapping contexts are known (see Unicode 9.0, §3.13 Default Case Algorithms).
+ assert set([
+ 'After_I', 'After_Soft_Dotted', 'Final_Sigma', 'More_Above', 'Not_Before_Dot',
+ ]).issuperset(set(ifilter(partial(is_not, None), chain(
+ imap(itemgetter(1), itervals(conditional_tolower)),
+ imap(itemgetter(1), itervals(conditional_toupper)),
+ imap(itemgetter(1), chain.from_iterable(imap(itervals, itervals(lang_conditional_tolower)))),
+ imap(itemgetter(1), chain.from_iterable(imap(itervals, itervals(lang_conditional_toupper)))),
+ ))))
+
+ # Special casing for U+00DF (LATIN SMALL LETTER SHARP S).
+ assert upperCase(0x00DF) == 0x00DF and unconditional_toupper[0x00DF] == [0x0053, 0x0053];
+
+ # Special casing for U+0130 (LATIN CAPITAL LETTER I WITH DOT ABOVE).
+ assert unconditional_tolower[0x0130] == [0x0069, 0x0307]
+
+ # Special casing for U+03A3 (GREEK CAPITAL LETTER SIGMA).
+ assert lowerCase(0x03A3) == 0x03C3 and conditional_tolower[0x03A3] == ([0x03C2], 'Final_Sigma');
+
+ return (unconditional_tolower, unconditional_toupper)
+
def make_non_bmp_file(version,
non_bmp_lower_map, non_bmp_upper_map,
- non_bmp_folding_map, non_bmp_rev_folding_map):
+ non_bmp_folding_map, non_bmp_rev_folding_map,
+ codepoint_table):
file_name = 'UnicodeNonBMP.h';
with io.open(file_name, mode='wb') as non_bmp_file:
non_bmp_file.write(mpl_license)
@@ -463,77 +628,277 @@ def make_non_bmp_file(version,
""")
- make_non_bmp_convert_macro(non_bmp_file, 'LOWERCASE', non_bmp_lower_map)
+ make_non_bmp_convert_macro(non_bmp_file, 'LOWERCASE', non_bmp_lower_map, codepoint_table)
non_bmp_file.write('\n')
- make_non_bmp_convert_macro(non_bmp_file, 'UPPERCASE', non_bmp_upper_map)
+ make_non_bmp_convert_macro(non_bmp_file, 'UPPERCASE', non_bmp_upper_map, codepoint_table)
non_bmp_file.write('\n')
- make_non_bmp_convert_macro(non_bmp_file, 'CASE_FOLDING', non_bmp_folding_map)
+ make_non_bmp_convert_macro(non_bmp_file, 'CASE_FOLDING', non_bmp_folding_map, codepoint_table)
non_bmp_file.write('\n')
- make_non_bmp_convert_macro(non_bmp_file, 'REV_CASE_FOLDING', non_bmp_rev_folding_map)
+ make_non_bmp_convert_macro(non_bmp_file, 'REV_CASE_FOLDING', non_bmp_rev_folding_map, codepoint_table)
non_bmp_file.write("""
#endif /* vm_UnicodeNonBMP_h */
""")
-def make_bmp_mapping_test(version, test_table):
+def write_special_casing_methods(unconditional_toupper, codepoint_table, println):
+ def hexlit(n):
+ """ Returns C++ hex-literal for |n|. """
+ return '0x{:04X}'.format(n)
+
+ def describe_range(ranges, depth):
+ indent = depth * ' '
+ for (start, end) in ranges:
+ if start == end:
+ println(indent, '// {}'.format(codepoint_table.full_name(start)))
+ else:
+ println(indent, '// {} .. {}'.format(codepoint_table.full_name(start),
+ codepoint_table.full_name(end)))
+
+ def out_range(start, end):
+ """ Tests if the input character isn't a member of the set {x | start <= x <= end}. """
+ if (start == end):
+ return 'ch != {}'.format(hexlit(start))
+ return 'ch < {} || ch > {}'.format(hexlit(start), hexlit(end))
+
+ def in_range(start, end, parenthesize=False):
+ """ Tests if the input character is in the set {x | start <= x <= end}. """
+ if (start == end):
+ return 'ch == {}'.format(hexlit(start))
+ (left, right) = ('(', ')') if parenthesize else ('', '')
+ return '{}ch >= {} && ch <= {}{}'.format(left, hexlit(start), hexlit(end), right)
+
+ def in_any_range(ranges, spaces):
+ """ Tests if the input character is included in any of the given ranges. """
+ lines = [[]]
+ for (start, end) in ranges:
+ expr = in_range(start, end, parenthesize=True)
+ line = ' || '.join(lines[-1] + [expr])
+ if len(line) < (100 - len(spaces) - len(' ||')):
+ lines[-1].append(expr)
+ else:
+ lines.append([expr])
+ return ' ||\n{}'.format(spaces).join(imap(lambda t: ' || '.join(t), lines))
+
+ def write_range_accept(parent_list, child_list, depth):
+ """ Accepts the input character if it matches any code unit in |child_list|. """
+ (min_parent, max_parent) = (parent_list[0], parent_list[-1])
+ (min_child, max_child) = (child_list[0], child_list[-1])
+ assert min_child >= min_parent
+ assert max_child <= max_parent
+ indent = depth * ' '
+
+ child_ranges = list(int_ranges(child_list))
+ has_successor = max_child != max_parent
+
+ # If |child_list| is a contiguous list of code units, emit a simple
+ # range check: |min_child <= input <= max_child|.
+ if len(child_ranges) == 1:
+ describe_range(child_ranges, depth)
+ if has_successor:
+ println(indent, 'if (ch <= {})'.format(hexlit(max_child)))
+ println(indent, ' return ch >= {};'.format(hexlit(min_child)))
+ else:
+ println(indent, 'return {};'.format(in_range(min_child, max_child)))
+ return
+
+ # Otherwise create a disjunction over the subranges in |child_ranges|.
+ if not has_successor:
+ spaces = indent + len('return ') * ' '
+ else:
+ spaces = indent + len(' return ') * ' '
+ range_test_expr = in_any_range(child_ranges, spaces)
+
+ if min_child != min_parent:
+ println(indent, 'if (ch < {})'.format(hexlit(min_child)))
+ println(indent, ' return false;')
+
+ # If there's no successor block, we can omit the |input <= max_child| check,
+ # because it was already checked when we emitted the parent range test.
+ if not has_successor:
+ describe_range(child_ranges, depth)
+ println(indent, 'return {};'.format(range_test_expr))
+ else:
+ println(indent, 'if (ch <= {}) {{'.format(hexlit(max_child)))
+ describe_range(child_ranges, depth + 1)
+ println(indent, ' return {};'.format(range_test_expr))
+ println(indent, '}')
+
+ def write_CanUpperCaseSpecialCasing():
+ """ Checks if the input has a special upper case mapping. """
+ println('bool')
+ println('js::unicode::CanUpperCaseSpecialCasing(char16_t ch)')
+ println('{')
+
+ assert unconditional_toupper, "|unconditional_toupper| is not empty"
+
+ # Sorted list of code units with special upper case mappings.
+ code_list = sorted(unconditional_toupper.iterkeys())
+
+ # Fail-fast if the input character isn't a special casing character.
+ println(' if ({})'.format(out_range(code_list[0], code_list[-1])))
+ println(' return false;')
+
+ for i in range(0, 16):
+ # Check if the input characters is in the range:
+ # |start_point <= input < end_point|.
+ start_point = i << 12
+ end_point = (i + 1) << 12
+ matches = [cu for cu in code_list if start_point <= cu < end_point]
+
+ # Skip empty ranges.
+ if not matches:
+ continue
+
+ # If |matches| consists of only a few characters, directly check
+ # the input against the characters in |matches|.
+ if len(matches) <= 8:
+ write_range_accept(code_list, matches, depth=1)
+ continue
+
+ # Otherwise split into further subranges.
+
+ # Only enter the if-block if the input is less-or-equals to the
+ # largest value in the current range.
+ is_last_block = matches[-1] == code_list[-1]
+ if not is_last_block:
+ println(' if (ch <= {}) {{'.format(hexlit(matches[-1])))
+ else:
+ println(' if (ch < {})'.format(hexlit(matches[0])))
+ println(' return false;')
+
+ for j in range(0, 16):
+ inner_start = start_point + (j << 8)
+ inner_end = start_point + ((j + 1) << 8)
+ inner_matches = [cu for cu in matches if inner_start <= cu < inner_end]
+
+ if inner_matches:
+ d = 1 if is_last_block else 2
+ write_range_accept(matches, inner_matches, depth=d)
+
+ if not is_last_block:
+ println(' }')
+
+ println('}')
+
+ def write_LengthUpperCaseSpecialCasing():
+ """ Slow case: Special casing character was found, returns its mapping length. """
+ println('size_t')
+ println('js::unicode::LengthUpperCaseSpecialCasing(char16_t ch)')
+ println('{')
+
+ println(' switch(ch) {')
+ for (code, converted) in sorted(unconditional_toupper.iteritems(), key=itemgetter(0)):
+ println(' case {}: return {}; // {}'.format(hexlit(code), len(converted),
+ codepoint_table.name(code)))
+ println(' }')
+ println('')
+ println(' MOZ_ASSERT_UNREACHABLE("Bad character input.");')
+ println(' return 0;')
+
+ println('}')
+
+ def write_AppendUpperCaseSpecialCasing():
+ """ Slow case: Special casing character was found, append its mapping characters. """
+ println('void')
+ println('js::unicode::AppendUpperCaseSpecialCasing(char16_t ch, char16_t* elements, size_t* index)')
+ println('{')
+
+ println(' switch(ch) {')
+ for (code, converted) in sorted(unconditional_toupper.iteritems(), key=itemgetter(0)):
+ println(' case {}: // {}'.format(hexlit(code), codepoint_table.name(code)))
+ for ch in converted:
+ println(' elements[(*index)++] = {}; // {}'.format(hexlit(ch),
+ codepoint_table.name(ch)))
+ println(' return;')
+ println(' }')
+ println('')
+ println(' MOZ_ASSERT_UNREACHABLE("Bad character input.");')
+ println(' return;')
+
+ println('}')
+
+ write_CanUpperCaseSpecialCasing()
+ println('')
+ write_LengthUpperCaseSpecialCasing()
+ println('')
+ write_AppendUpperCaseSpecialCasing()
+
+def make_bmp_mapping_test(version, codepoint_table, unconditional_tolower, unconditional_toupper):
+ def unicodeEsc(n):
+ return '\u{:04X}'.format(n)
+
file_name = '../tests/ecma_5/String/string-upper-lower-mapping.js'
- with io.open(file_name, mode='wb') as test_mapping:
- test_mapping.write(warning_message)
- test_mapping.write(unicode_version_message.format(version))
- test_mapping.write(public_domain)
- test_mapping.write('var mapping = [\n')
+ with io.open(file_name, mode='wb') as output:
+ write = partial(print, file=output, sep='', end='')
+ println = partial(print, file=output, sep='', end='\n')
+
+ write(warning_message)
+ write(unicode_version_message.format(version))
+ write(public_domain)
+ println('var mapping = [')
for code in range(0, MAX_BMP + 1):
- entry = test_table.get(code)
+ entry = codepoint_table.get(code)
if entry:
- (upper, lower, name, alias) = entry
- test_mapping.write(' [' + hex(upper) + ', ' + hex(lower) + '], /* ' +
- name + (' (' + alias + ')' if alias else '') + ' */\n')
+ (upper, lower, _, _) = entry
+ upper = unconditional_toupper[code] if code in unconditional_toupper else [upper]
+ lower = unconditional_tolower[code] if code in unconditional_tolower else [lower]
+ println(' ["{}", "{}"], /* {} */'.format("".join(imap(unicodeEsc, upper)),
+ "".join(imap(unicodeEsc, lower)),
+ codepoint_table.name(code)))
else:
- test_mapping.write(' [' + hex(code) + ', ' + hex(code) + '],\n')
- test_mapping.write('];')
- test_mapping.write("""
+ println(' ["{0}", "{0}"],'.format(unicodeEsc(code)))
+ println('];')
+ write("""
assertEq(mapping.length, 0x10000);
for (var i = 0; i <= 0xffff; i++) {
var char = String.fromCharCode(i);
var info = mapping[i];
- assertEq(char.toUpperCase().charCodeAt(0), info[0]);
- assertEq(char.toLowerCase().charCodeAt(0), info[1]);
+ assertEq(char.toUpperCase(), info[0]);
+ assertEq(char.toLowerCase(), info[1]);
}
if (typeof reportCompare === "function")
reportCompare(true, true);
""")
-def make_non_bmp_mapping_test(version, non_bmp_upper_map, non_bmp_lower_map):
+def make_non_bmp_mapping_test(version, non_bmp_upper_map, non_bmp_lower_map, codepoint_table):
file_name = '../tests/ecma_6/String/string-code-point-upper-lower-mapping.js'
with io.open(file_name, mode='wb') as test_non_bmp_mapping:
test_non_bmp_mapping.write(warning_message)
test_non_bmp_mapping.write(unicode_version_message.format(version))
test_non_bmp_mapping.write(public_domain)
+
for code in sorted(non_bmp_upper_map.keys()):
test_non_bmp_mapping.write("""\
-assertEq(String.fromCodePoint(0x{:x}).toUpperCase().codePointAt(0), 0x{:x});
-""".format(code, non_bmp_upper_map[code]))
+assertEq(String.fromCodePoint(0x{:04X}).toUpperCase().codePointAt(0), 0x{:04X}); // {}, {}
+""".format(code, non_bmp_upper_map[code],
+ codepoint_table.name(code), codepoint_table.name(non_bmp_upper_map[code])))
+
for code in sorted(non_bmp_lower_map.keys()):
test_non_bmp_mapping.write("""\
-assertEq(String.fromCodePoint(0x{:x}).toLowerCase().codePointAt(0), 0x{:x});
-""".format(code, non_bmp_lower_map[code]))
+assertEq(String.fromCodePoint(0x{:04X}).toLowerCase().codePointAt(0), 0x{:04X}); // {}, {}
+""".format(code, non_bmp_lower_map[code],
+ codepoint_table.name(code), codepoint_table.name(non_bmp_lower_map[code])))
test_non_bmp_mapping.write("""
if (typeof reportCompare === "function")
reportCompare(true, true);
""")
-def make_space_test(version, test_space_table):
+def make_space_test(version, test_space_table, codepoint_table):
+ def hex_and_name(c):
+ return ' 0x{:04X} /* {} */'.format(c, codepoint_table.name(c))
+
file_name = '../tests/ecma_5/String/string-space-trim.js'
with io.open(file_name, mode='wb') as test_space:
test_space.write(warning_message)
test_space.write(unicode_version_message.format(version))
test_space.write(public_domain)
- test_space.write('var onlySpace = String.fromCharCode(' +
- ', '.join(map(lambda c: hex(c), test_space_table)) + ');\n')
+ test_space.write('var onlySpace = String.fromCharCode(\n')
+ test_space.write(',\n'.join(map(hex_and_name, test_space_table)))
+ test_space.write('\n);\n')
test_space.write("""
assertEq(onlySpace.trim(), "");
assertEq((onlySpace + 'aaaa').trim(), 'aaaa');
@@ -544,7 +909,10 @@ if (typeof reportCompare === "function")
reportCompare(true, true);
""")
-def make_icase_test(version, folding_tests):
+def make_icase_test(version, folding_tests, codepoint_table):
+ def char_hex(c):
+ return '0x{:04X}'.format(c)
+
file_name = '../tests/ecma_6/RegExp/unicode-ignoreCase.js'
with io.open(file_name, mode='wb') as test_icase:
test_icase.write(warning_message)
@@ -565,7 +933,8 @@ function test(code, ...equivs) {
}
""")
for args in folding_tests:
- test_icase.write('test(' + ','.join([hex(c) for c in args]) + ');\n')
+ test_icase.write('test({}); // {}\n'.format(', '.join(map(char_hex, args)),
+ ', '.join(map(codepoint_table.name, args))))
test_icase.write("""
if (typeof reportCompare === "function")
reportCompare(true, true);
@@ -576,7 +945,9 @@ def make_unicode_file(version,
same_upper_table, same_upper_index,
folding_table, folding_index,
non_bmp_space_set,
- non_bmp_id_start_set, non_bmp_id_cont_set):
+ non_bmp_id_start_set, non_bmp_id_cont_set,
+ unconditional_toupper,
+ codepoint_table):
index1, index2, shift = splitbins(index)
# Don't forget to update CharInfo in Unicode.h if you need to change this
@@ -665,8 +1036,8 @@ def make_unicode_file(version,
* stop if you found the best shift
*/
"""
- def dump(data, name, file):
- file.write('const uint8_t unicode::' + name + '[] = {\n')
+ def dump(data, name, println):
+ println('const uint8_t unicode::{}[] = {{'.format(name))
line = pad = ' ' * 4
lines = []
@@ -682,93 +1053,79 @@ def make_unicode_file(version,
line = line + s + ', '
lines.append(line.rstrip())
- file.write('\n'.join(lines))
- file.write('\n};\n')
+ println('\n'.join(lines))
+ println('};')
+
+ def write_table(data_type, name, tbl, idx1_name, idx1, idx2_name, idx2, println):
+ println('const {} unicode::{}[] = {{'.format(data_type, name))
+ for d in tbl:
+ println(' {{ {} }},'.format(', '.join(str(e) for e in d)))
+ println('};')
+ println('')
+
+ dump(idx1, idx1_name, println)
+ println('')
+ dump(idx2, idx2_name, println)
+ println('')
+
+ def write_supplemental_identifier_method(name, group_set, println):
+ println('bool')
+ println('js::unicode::{}(uint32_t codePoint)'.format(name))
+ println('{')
+ for (from_code, to_code) in int_ranges(group_set.keys()):
+ println(' if (codePoint >= 0x{:X} && codePoint <= 0x{:X}) // {} .. {}'.format(from_code,
+ to_code,
+ codepoint_table.name(from_code),
+ codepoint_table.name(to_code)))
+ println(' return true;')
+ println(' return false;')
+ println('}')
+ println('')
file_name = 'Unicode.cpp'
with io.open(file_name, 'wb') as data_file:
- data_file.write(warning_message)
- data_file.write(unicode_version_message.format(version))
- data_file.write(public_domain)
- data_file.write('#include "vm/Unicode.h"\n\n')
- data_file.write('using namespace js;\n')
- data_file.write('using namespace js::unicode;\n')
- data_file.write(comment)
- data_file.write('const CharacterInfo unicode::js_charinfo[] = {\n')
- for d in table:
- data_file.write(' {')
- data_file.write(', '.join((str(e) for e in d)))
- data_file.write('},\n')
- data_file.write('};\n')
- data_file.write('\n')
-
- dump(index1, 'index1', data_file)
- data_file.write('\n')
- dump(index2, 'index2', data_file)
- data_file.write('\n')
-
- data_file.write('const CodepointsWithSameUpperCaseInfo unicode::js_codepoints_with_same_upper_info[] = {\n')
- for d in same_upper_table:
- data_file.write(' {')
- data_file.write(', '.join((str(e) for e in d)))
- data_file.write('},\n')
- data_file.write('};\n')
- data_file.write('\n')
-
- dump(same_upper_index1, 'codepoints_with_same_upper_index1', data_file)
- data_file.write('\n')
- dump(same_upper_index2, 'codepoints_with_same_upper_index2', data_file)
- data_file.write('\n')
-
- data_file.write('const FoldingInfo unicode::js_foldinfo[] = {\n')
- for d in folding_table:
- data_file.write(' {')
- data_file.write(', '.join((str(e) for e in d)))
- data_file.write('},\n')
- data_file.write('};\n')
- data_file.write('\n')
-
- dump(folding_index1, 'folding_index1', data_file)
- data_file.write('\n')
- dump(folding_index2, 'folding_index2', data_file)
- data_file.write('\n')
+ write = partial(print, file=data_file, sep='', end='')
+ println = partial(print, file=data_file, sep='', end='\n')
+
+ write(warning_message)
+ write(unicode_version_message.format(version))
+ write(public_domain)
+ println('#include "vm/Unicode.h"')
+ println('')
+ println('using namespace js;')
+ println('using namespace js::unicode;')
+ write(comment)
+
+ write_table('CharacterInfo',
+ 'js_charinfo', table,
+ 'index1', index1,
+ 'index2', index2,
+ println)
+
+ write_table('CodepointsWithSameUpperCaseInfo',
+ 'js_codepoints_with_same_upper_info', same_upper_table,
+ 'codepoints_with_same_upper_index1', same_upper_index1,
+ 'codepoints_with_same_upper_index2', same_upper_index2,
+ println)
+
+ write_table('FoldingInfo',
+ 'js_foldinfo', folding_table,
+ 'folding_index1', folding_index1,
+ 'folding_index2', folding_index2,
+ println)
# If the following assert fails, it means space character is added to
# non-BMP area. In that case the following code should be uncommented
# and the corresponding code should be added to frontend.
assert len(non_bmp_space_set.keys()) == 0
- data_file.write("""\
-bool
-js::unicode::IsIdentifierStartNonBMP(uint32_t codePoint)
-{
-""")
-
- for (from_code, to_code) in for_each_non_bmp_group(non_bmp_id_start_set):
- data_file.write("""\
- if (codePoint >= 0x{:x} && codePoint <= 0x{:x})
- return true;
-""".format(from_code, to_code))
-
- data_file.write("""\
- return false;
-}
-
-bool
-js::unicode::IsIdentifierPartNonBMP(uint32_t codePoint)
-{
-""")
+ write_supplemental_identifier_method('IsIdentifierStartNonBMP', non_bmp_id_start_set,
+ println)
- for (from_code, to_code) in for_each_non_bmp_group(non_bmp_id_cont_set):
- data_file.write("""\
- if (codePoint >= 0x{:x} && codePoint <= 0x{:x})
- return true;
-""".format(from_code, to_code))
+ write_supplemental_identifier_method('IsIdentifierPartNonBMP', non_bmp_id_cont_set,
+ println)
- data_file.write("""\
- return false;
-}
-""")
+ write_special_casing_methods(unconditional_toupper, codepoint_table, println)
def getsize(data):
""" return smallest possible integer size for the given array """
@@ -842,10 +1199,8 @@ def splitbins(t):
def make_irregexp_tables(version,
table, index,
folding_table, folding_index,
- test_table):
+ codepoint_table):
import string
- from functools import partial
- from itertools import chain, ifilter, imap
MAX_ASCII = 0x7F
MAX_LATIN1 = 0xFF
@@ -894,13 +1249,13 @@ def make_irregexp_tables(version,
def char_name(code):
assert 0 <= code and code <= MAX_BMP
- if code not in test_table:
+ if code not in codepoint_table:
return '<Unused>'
if code == LEAD_SURROGATE_MIN:
return '<Lead Surrogate Min>'
if code == TRAIL_SURROGATE_MAX:
return '<Trail Surrogate Max>'
- (_, _, name, alias) = test_table[code]
+ (_, _, name, alias) = codepoint_table[code]
return name if not name.startswith('<') else alias
def write_character_range(println, name, characters):
@@ -1080,7 +1435,8 @@ def update_unicode(args):
with download_or_open('UnicodeData.txt') as unicode_data, \
download_or_open('CaseFolding.txt') as case_folding, \
- download_or_open('DerivedCoreProperties.txt') as derived_core_properties:
+ download_or_open('DerivedCoreProperties.txt') as derived_core_properties, \
+ download_or_open('SpecialCasing.txt') as special_casing:
unicode_version = version_from_file(derived_core_properties, 'DerivedCoreProperties')
print('Processing...')
@@ -1090,13 +1446,16 @@ def update_unicode(args):
non_bmp_lower_map, non_bmp_upper_map,
non_bmp_space_set,
non_bmp_id_start_set, non_bmp_id_cont_set,
- test_table, test_space_table
+ codepoint_table, test_space_table
) = process_unicode_data(unicode_data, derived_core_properties)
(
folding_table, folding_index,
non_bmp_folding_map, non_bmp_rev_folding_map,
folding_tests
) = process_case_folding(case_folding)
+ (
+ unconditional_tolower, unconditional_toupper
+ ) = process_special_casing(special_casing, table, index)
print('Generating...')
make_unicode_file(unicode_version,
@@ -1104,19 +1463,23 @@ def update_unicode(args):
same_upper_table, same_upper_index,
folding_table, folding_index,
non_bmp_space_set,
- non_bmp_id_start_set, non_bmp_id_cont_set)
+ non_bmp_id_start_set, non_bmp_id_cont_set,
+ unconditional_toupper,
+ codepoint_table)
make_non_bmp_file(unicode_version,
non_bmp_lower_map, non_bmp_upper_map,
- non_bmp_folding_map, non_bmp_rev_folding_map)
+ non_bmp_folding_map, non_bmp_rev_folding_map,
+ codepoint_table)
make_irregexp_tables(unicode_version,
table, index,
folding_table, folding_index,
- test_table)
+ codepoint_table)
- make_bmp_mapping_test(unicode_version, test_table)
- make_non_bmp_mapping_test(unicode_version, non_bmp_upper_map, non_bmp_lower_map)
- make_space_test(unicode_version, test_space_table)
- make_icase_test(unicode_version, folding_tests)
+ make_bmp_mapping_test(unicode_version,
+ codepoint_table, unconditional_tolower, unconditional_toupper)
+ make_non_bmp_mapping_test(unicode_version, non_bmp_upper_map, non_bmp_lower_map, codepoint_table)
+ make_space_test(unicode_version, test_space_table, codepoint_table)
+ make_icase_test(unicode_version, folding_tests, codepoint_table)
if __name__ == '__main__':
import argparse