diff options
author | wolfbeast <mcwerewolf@gmail.com> | 2018-03-18 17:57:11 +0100 |
---|---|---|
committer | wolfbeast <mcwerewolf@gmail.com> | 2018-03-18 17:58:37 +0100 |
commit | b2331d76221b67bffb5cb5a2344c8b150a305dc7 (patch) | |
tree | 5d9cae4cdfc502b568f436fd66d77e0b16476395 | |
parent | 122938a4398ae8db07060dca3561ff46f93b5925 (diff) | |
parent | 427e7346629c76a1676a3f92320c3d2575d01b85 (diff) | |
download | uxp-b2331d76221b67bffb5cb5a2344c8b150a305dc7.tar.gz |
Correctly tokenize valid JS names when using code points outside of BMP range.
This resolves #72.
Merged remote-tracking branch 'janek/js_variable_unicode_1'
-rw-r--r-- | js/src/frontend/TokenStream.cpp | 149 | ||||
-rw-r--r-- | js/src/frontend/TokenStream.h | 1 | ||||
-rw-r--r-- | js/src/tests/ecma_6/Syntax/identifiers-with-extended-unicode-escape.js | 18 | ||||
-rw-r--r-- | js/src/vm/Unicode.cpp | 879 | ||||
-rw-r--r-- | js/src/vm/Unicode.h | 28 | ||||
-rw-r--r-- | js/src/vm/UnicodeNonBMP.h | 10 | ||||
-rwxr-xr-x | js/src/vm/make_unicode.py | 111 |
7 files changed, 1159 insertions, 37 deletions
diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp index c166ed4145..179a7c2447 100644 --- a/js/src/frontend/TokenStream.cpp +++ b/js/src/frontend/TokenStream.cpp @@ -118,13 +118,57 @@ IsIdentifier(const CharT* chars, size_t length) return true; } +static uint32_t +GetSingleCodePoint(const char16_t** p, const char16_t* end) +{ + uint32_t codePoint; + if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(**p)) && *p + 1 < end) { + char16_t lead = **p; + char16_t maybeTrail = *(*p + 1); + if (unicode::IsTrailSurrogate(maybeTrail)) { + *p += 2; + return unicode::UTF16Decode(lead, maybeTrail); + } + } + + codePoint = **p; + (*p)++; + return codePoint; +} + +static bool +IsIdentifierMaybeNonBMP(const char16_t* chars, size_t length) +{ + if (IsIdentifier(chars, length)) + return true; + + if (length == 0) + return false; + + const char16_t* p = chars; + const char16_t* end = chars + length; + uint32_t codePoint; + + codePoint = GetSingleCodePoint(&p, end); + if (!unicode::IsIdentifierStart(codePoint)) + return false; + + while (p < end) { + codePoint = GetSingleCodePoint(&p, end); + if (!unicode::IsIdentifierPart(codePoint)) + return false; + } + + return true; +} + bool frontend::IsIdentifier(JSLinearString* str) { JS::AutoCheckCannotGC nogc; return str->hasLatin1Chars() ? ::IsIdentifier(str->latin1Chars(nogc), str->length()) - : ::IsIdentifier(str->twoByteChars(nogc), str->length()); + : ::IsIdentifierMaybeNonBMP(str->twoByteChars(nogc), str->length()); } bool @@ -993,6 +1037,21 @@ IsTokenSane(Token* tp) #endif bool +TokenStream::matchTrailForLeadSurrogate(char16_t lead, char16_t* trail, uint32_t* codePoint) +{ + int32_t maybeTrail = getCharIgnoreEOL(); + if (!unicode::IsTrailSurrogate(maybeTrail)) { + ungetCharIgnoreEOL(maybeTrail); + return false; + } + + if (trail) + *trail = maybeTrail; + *codePoint = unicode::UTF16Decode(lead, maybeTrail); + return true; +} + +bool TokenStream::putIdentInTokenbuf(const char16_t* identStart) { int32_t c; @@ -1003,11 +1062,39 @@ TokenStream::putIdentInTokenbuf(const char16_t* identStart) tokenbuf.clear(); for (;;) { c = getCharIgnoreEOL(); + + if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) { + char16_t trail; + uint32_t codePoint; + if (matchTrailForLeadSurrogate(c, &trail, &codePoint)) { + if (!unicode::IsIdentifierPart(codePoint)) + break; + + if (!tokenbuf.append(c) || !tokenbuf.append(trail)) { + userbuf.setAddressOfNextRawChar(tmp); + return false; + } + continue; + } + } + if (!unicode::IsIdentifierPart(char16_t(c))) { if (c != '\\' || !matchUnicodeEscapeIdent(&qc)) break; + + if (MOZ_UNLIKELY(unicode::IsSupplementary(qc))) { + char16_t lead, trail; + unicode::UTF16Encode(qc, &lead, &trail); + if (!tokenbuf.append(lead) || !tokenbuf.append(trail)) { + userbuf.setAddressOfNextRawChar(tmp); + return false; + } + continue; + } + c = qc; } + if (!tokenbuf.append(c)) { userbuf.setAddressOfNextRawChar(tmp); return false; @@ -1168,12 +1255,23 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) static_assert('_' < 128, "IdentifierStart contains '_', but as !IsUnicodeIDStart('_'), " "ensure that '_' is never handled here"); - if (unicode::IsUnicodeIDStart(c)) { + if (unicode::IsUnicodeIDStart(char16_t(c))) { identStart = userbuf.addressOfNextRawChar() - 1; hadUnicodeEscape = false; goto identifier; } + if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) { + uint32_t codePoint; + if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) && + unicode::IsUnicodeIDStart(codePoint)) + { + identStart = userbuf.addressOfNextRawChar() - 2; + hadUnicodeEscape = false; + goto identifier; + } + } + goto badchar; } @@ -1224,6 +1322,17 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) c = getCharIgnoreEOL(); if (c == EOF) break; + + if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) { + uint32_t codePoint; + if (matchTrailForLeadSurrogate(c, nullptr, &codePoint)) { + if (!unicode::IsIdentifierPart(codePoint)) + break; + + continue; + } + } + if (!unicode::IsIdentifierPart(char16_t(c))) { if (c != '\\' || !matchUnicodeEscapeIdent(&qc)) break; @@ -1318,9 +1427,21 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) } ungetCharIgnoreEOL(c); - if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) { - reportError(JSMSG_IDSTART_AFTER_NUMBER); - goto error; + if (c != EOF) { + if (unicode::IsIdentifierStart(char16_t(c))) { + reportError(JSMSG_IDSTART_AFTER_NUMBER); + goto error; + } + + if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) { + uint32_t codePoint; + if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) && + unicode::IsIdentifierStart(codePoint)) + { + reportError(JSMSG_IDSTART_AFTER_NUMBER); + goto error; + } + } } // Unlike identifiers and strings, numbers cannot contain escaped @@ -1425,9 +1546,21 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) } ungetCharIgnoreEOL(c); - if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) { - reportError(JSMSG_IDSTART_AFTER_NUMBER); - goto error; + if (c != EOF) { + if (unicode::IsIdentifierStart(char16_t(c))) { + reportError(JSMSG_IDSTART_AFTER_NUMBER); + goto error; + } + + if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) { + uint32_t codePoint; + if (matchTrailForLeadSurrogate(c, nullptr, &codePoint) && + unicode::IsIdentifierStart(codePoint)) + { + reportError(JSMSG_IDSTART_AFTER_NUMBER); + goto error; + } + } } double dval; diff --git a/js/src/frontend/TokenStream.h b/js/src/frontend/TokenStream.h index 29dcead62e..5d6b4b795a 100644 --- a/js/src/frontend/TokenStream.h +++ b/js/src/frontend/TokenStream.h @@ -952,6 +952,7 @@ class MOZ_STACK_CLASS TokenStream uint32_t peekExtendedUnicodeEscape(uint32_t* codePoint); uint32_t matchUnicodeEscapeIdStart(uint32_t* codePoint); bool matchUnicodeEscapeIdent(uint32_t* codePoint); + bool matchTrailForLeadSurrogate(char16_t lead, char16_t* trail, uint32_t* codePoint); bool peekChars(int n, char16_t* cp); MOZ_MUST_USE bool getDirectives(bool isMultiline, bool shouldWarnDeprecated); diff --git a/js/src/tests/ecma_6/Syntax/identifiers-with-extended-unicode-escape.js b/js/src/tests/ecma_6/Syntax/identifiers-with-extended-unicode-escape.js index 8e0a05fb50..e4b5f46029 100644 --- a/js/src/tests/ecma_6/Syntax/identifiers-with-extended-unicode-escape.js +++ b/js/src/tests/ecma_6/Syntax/identifiers-with-extended-unicode-escape.js @@ -98,7 +98,7 @@ const otherIdContinue = [ 0x19DA, // NEW TAI LUE THAM DIGIT ONE, Gc=No ]; -for (let ident of [...idStart, ...otherIdStart]) { +for (let ident of [...idStart, ...otherIdStart, ...idStartSupplemental]) { for (let count of leadingZeros) { let zeros = "0".repeat(count); eval(` @@ -108,20 +108,13 @@ for (let ident of [...idStart, ...otherIdStart]) { } } -// Move this to the loop above when Bug 1197230 is fixed. -for (let ident of [...idStartSupplemental]) { - for (let zeros of leadingZeros) { - assertThrowsInstanceOf(() => eval(`\\u{${zeros}${ident.toString(16)}}`), SyntaxError); - } -} - for (let ident of [...idContinue, ...idContinueSupplemental, ...otherIdContinue]) { for (let zeros of leadingZeros) { assertThrowsInstanceOf(() => eval(`\\u{${zeros}${ident.toString(16)}}`), SyntaxError); } } -for (let ident of [...idStart, ...otherIdStart, ...idContinue, ...otherIdContinue]) { +for (let ident of [...idStart, ...otherIdStart, ...idContinue, ...otherIdContinue, ...idStartSupplemental, ...idContinueSupplemental]) { for (let zeros of leadingZeros) { eval(` let A\\u{${zeros}${ident.toString(16)}} = 123; @@ -130,13 +123,6 @@ for (let ident of [...idStart, ...otherIdStart, ...idContinue, ...otherIdContinu } } -// Move this to the loop above when Bug 1197230 is fixed. -for (let ident of [...idStartSupplemental, ...idContinueSupplemental]) { - for (let zeros of leadingZeros) { - assertThrowsInstanceOf(() => eval(`\\u{${zeros}${ident.toString(16)}}`), SyntaxError); - } -} - const notIdentifiers = [ 0x0000, // NULL, Gc=Cc diff --git a/js/src/vm/Unicode.cpp b/js/src/vm/Unicode.cpp index f4acf8f31f..82541c2312 100644 --- a/js/src/vm/Unicode.cpp +++ b/js/src/vm/Unicode.cpp @@ -1748,3 +1748,882 @@ const uint8_t unicode::folding_index2[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +bool +js::unicode::IsIdentifierStartNonBMP(uint32_t codePoint) +{ + if (codePoint >= 0x10000 && codePoint <= 0x1000b) + return true; + if (codePoint >= 0x1000d && codePoint <= 0x10026) + return true; + if (codePoint >= 0x10028 && codePoint <= 0x1003a) + return true; + if (codePoint >= 0x1003c && codePoint <= 0x1003d) + return true; + if (codePoint >= 0x1003f && codePoint <= 0x1004d) + return true; + if (codePoint >= 0x10050 && codePoint <= 0x1005d) + return true; + if (codePoint >= 0x10080 && codePoint <= 0x100fa) + return true; + if (codePoint >= 0x10140 && codePoint <= 0x10174) + return true; + if (codePoint >= 0x10280 && codePoint <= 0x1029c) + return true; + if (codePoint >= 0x102a0 && codePoint <= 0x102d0) + return true; + if (codePoint >= 0x10300 && codePoint <= 0x1031f) + return true; + if (codePoint >= 0x10330 && codePoint <= 0x1034a) + return true; + if (codePoint >= 0x10350 && codePoint <= 0x10375) + return true; + if (codePoint >= 0x10380 && codePoint <= 0x1039d) + return true; + if (codePoint >= 0x103a0 && codePoint <= 0x103c3) + return true; + if (codePoint >= 0x103c8 && codePoint <= 0x103cf) + return true; + if (codePoint >= 0x103d1 && codePoint <= 0x103d5) + return true; + if (codePoint >= 0x10400 && codePoint <= 0x1049d) + return true; + if (codePoint >= 0x104b0 && codePoint <= 0x104d3) + return true; + if (codePoint >= 0x104d8 && codePoint <= 0x104fb) + return true; + if (codePoint >= 0x10500 && codePoint <= 0x10527) + return true; + if (codePoint >= 0x10530 && codePoint <= 0x10563) + return true; + if (codePoint >= 0x10600 && codePoint <= 0x10736) + return true; + if (codePoint >= 0x10740 && codePoint <= 0x10755) + return true; + if (codePoint >= 0x10760 && codePoint <= 0x10767) + return true; + if (codePoint >= 0x10800 && codePoint <= 0x10805) + return true; + if (codePoint >= 0x10808 && codePoint <= 0x10808) + return true; + if (codePoint >= 0x1080a && codePoint <= 0x10835) + return true; + if (codePoint >= 0x10837 && codePoint <= 0x10838) + return true; + if (codePoint >= 0x1083c && codePoint <= 0x1083c) + return true; + if (codePoint >= 0x1083f && codePoint <= 0x10855) + return true; + if (codePoint >= 0x10860 && codePoint <= 0x10876) + return true; + if (codePoint >= 0x10880 && codePoint <= 0x1089e) + return true; + if (codePoint >= 0x108e0 && codePoint <= 0x108f2) + return true; + if (codePoint >= 0x108f4 && codePoint <= 0x108f5) + return true; + if (codePoint >= 0x10900 && codePoint <= 0x10915) + return true; + if (codePoint >= 0x10920 && codePoint <= 0x10939) + return true; + if (codePoint >= 0x10980 && codePoint <= 0x109b7) + return true; + if (codePoint >= 0x109be && codePoint <= 0x109bf) + return true; + if (codePoint >= 0x10a00 && codePoint <= 0x10a00) + return true; + if (codePoint >= 0x10a10 && codePoint <= 0x10a13) + return true; + if (codePoint >= 0x10a15 && codePoint <= 0x10a17) + return true; + if (codePoint >= 0x10a19 && codePoint <= 0x10a33) + return true; + if (codePoint >= 0x10a60 && codePoint <= 0x10a7c) + return true; + if (codePoint >= 0x10a80 && codePoint <= 0x10a9c) + return true; + if (codePoint >= 0x10ac0 && codePoint <= 0x10ac7) + return true; + if (codePoint >= 0x10ac9 && codePoint <= 0x10ae4) + return true; + if (codePoint >= 0x10b00 && codePoint <= 0x10b35) + return true; + if (codePoint >= 0x10b40 && codePoint <= 0x10b55) + return true; + if (codePoint >= 0x10b60 && codePoint <= 0x10b72) + return true; + if (codePoint >= 0x10b80 && codePoint <= 0x10b91) + return true; + if (codePoint >= 0x10c00 && codePoint <= 0x10c48) + return true; + if (codePoint >= 0x10c80 && codePoint <= 0x10cb2) + return true; + if (codePoint >= 0x10cc0 && codePoint <= 0x10cf2) + return true; + if (codePoint >= 0x11003 && codePoint <= 0x11037) + return true; + if (codePoint >= 0x11083 && codePoint <= 0x110af) + return true; + if (codePoint >= 0x110d0 && codePoint <= 0x110e8) + return true; + if (codePoint >= 0x11103 && codePoint <= 0x11126) + return true; + if (codePoint >= 0x11150 && codePoint <= 0x11172) + return true; + if (codePoint >= 0x11176 && codePoint <= 0x11176) + return true; + if (codePoint >= 0x11183 && codePoint <= 0x111b2) + return true; + if (codePoint >= 0x111c1 && codePoint <= 0x111c4) + return true; + if (codePoint >= 0x111da && codePoint <= 0x111da) + return true; + if (codePoint >= 0x111dc && codePoint <= 0x111dc) + return true; + if (codePoint >= 0x11200 && codePoint <= 0x11211) + return true; + if (codePoint >= 0x11213 && codePoint <= 0x1122b) + return true; + if (codePoint >= 0x11280 && codePoint <= 0x11286) + return true; + if (codePoint >= 0x11288 && codePoint <= 0x11288) + return true; + if (codePoint >= 0x1128a && codePoint <= 0x1128d) + return true; + if (codePoint >= 0x1128f && codePoint <= 0x1129d) + return true; + if (codePoint >= 0x1129f && codePoint <= 0x112a8) + return true; + if (codePoint >= 0x112b0 && codePoint <= 0x112de) + return true; + if (codePoint >= 0x11305 && codePoint <= 0x1130c) + return true; + if (codePoint >= 0x1130f && codePoint <= 0x11310) + return true; + if (codePoint >= 0x11313 && codePoint <= 0x11328) + return true; + if (codePoint >= 0x1132a && codePoint <= 0x11330) + return true; + if (codePoint >= 0x11332 && codePoint <= 0x11333) + return true; + if (codePoint >= 0x11335 && codePoint <= 0x11339) + return true; + if (codePoint >= 0x1133d && codePoint <= 0x1133d) + return true; + if (codePoint >= 0x11350 && codePoint <= 0x11350) + return true; + if (codePoint >= 0x1135d && codePoint <= 0x11361) + return true; + if (codePoint >= 0x11400 && codePoint <= 0x11434) + return true; + if (codePoint >= 0x11447 && codePoint <= 0x1144a) + return true; + if (codePoint >= 0x11480 && codePoint <= 0x114af) + return true; + if (codePoint >= 0x114c4 && codePoint <= 0x114c5) + return true; + if (codePoint >= 0x114c7 && codePoint <= 0x114c7) + return true; + if (codePoint >= 0x11580 && codePoint <= 0x115ae) + return true; + if (codePoint >= 0x115d8 && codePoint <= 0x115db) + return true; + if (codePoint >= 0x11600 && codePoint <= 0x1162f) + return true; + if (codePoint >= 0x11644 && codePoint <= 0x11644) + return true; + if (codePoint >= 0x11680 && codePoint <= 0x116aa) + return true; + if (codePoint >= 0x11700 && codePoint <= 0x11719) + return true; + if (codePoint >= 0x118a0 && codePoint <= 0x118df) + return true; + if (codePoint >= 0x118ff && codePoint <= 0x118ff) + return true; + if (codePoint >= 0x11ac0 && codePoint <= 0x11af8) + return true; + if (codePoint >= 0x11c00 && codePoint <= 0x11c08) + return true; + if (codePoint >= 0x11c0a && codePoint <= 0x11c2e) + return true; + if (codePoint >= 0x11c40 && codePoint <= 0x11c40) + return true; + if (codePoint >= 0x11c72 && codePoint <= 0x11c8f) + return true; + if (codePoint >= 0x12000 && codePoint <= 0x12399) + return true; + if (codePoint >= 0x12400 && codePoint <= 0x1246e) + return true; + if (codePoint >= 0x12480 && codePoint <= 0x12543) + return true; + if (codePoint >= 0x13000 && codePoint <= 0x1342e) + return true; + if (codePoint >= 0x14400 && codePoint <= 0x14646) + return true; + if (codePoint >= 0x16800 && codePoint <= 0x16a38) + return true; + if (codePoint >= 0x16a40 && codePoint <= 0x16a5e) + return true; + if (codePoint >= 0x16ad0 && codePoint <= 0x16aed) + return true; + if (codePoint >= 0x16b00 && codePoint <= 0x16b2f) + return true; + if (codePoint >= 0x16b40 && codePoint <= 0x16b43) + return true; + if (codePoint >= 0x16b63 && codePoint <= 0x16b77) + return true; + if (codePoint >= 0x16b7d && codePoint <= 0x16b8f) + return true; + if (codePoint >= 0x16f00 && codePoint <= 0x16f44) + return true; + if (codePoint >= 0x16f50 && codePoint <= 0x16f50) + return true; + if (codePoint >= 0x16f93 && codePoint <= 0x16f9f) + return true; + if (codePoint >= 0x16fe0 && codePoint <= 0x16fe0) + return true; + if (codePoint >= 0x17000 && codePoint <= 0x187ec) + return true; + if (codePoint >= 0x18800 && codePoint <= 0x18af2) + return true; + if (codePoint >= 0x1b000 && codePoint <= 0x1b001) + return true; + if (codePoint >= 0x1bc00 && codePoint <= 0x1bc6a) + return true; + if (codePoint >= 0x1bc70 && codePoint <= 0x1bc7c) + return true; + if (codePoint >= 0x1bc80 && codePoint <= 0x1bc88) + return true; + if (codePoint >= 0x1bc90 && codePoint <= 0x1bc99) + return true; + if (codePoint >= 0x1d400 && codePoint <= 0x1d454) + return true; + if (codePoint >= 0x1d456 && codePoint <= 0x1d49c) + return true; + if (codePoint >= 0x1d49e && codePoint <= 0x1d49f) + return true; + if (codePoint >= 0x1d4a2 && codePoint <= 0x1d4a2) + return true; + if (codePoint >= 0x1d4a5 && codePoint <= 0x1d4a6) + return true; + if (codePoint >= 0x1d4a9 && codePoint <= 0x1d4ac) + return true; + if (codePoint >= 0x1d4ae && codePoint <= 0x1d4b9) + return true; + if (codePoint >= 0x1d4bb && codePoint <= 0x1d4bb) + return true; + if (codePoint >= 0x1d4bd && codePoint <= 0x1d4c3) + return true; + if (codePoint >= 0x1d4c5 && codePoint <= 0x1d505) + return true; + if (codePoint >= 0x1d507 && codePoint <= 0x1d50a) + return true; + if (codePoint >= 0x1d50d && codePoint <= 0x1d514) + return true; + if (codePoint >= 0x1d516 && codePoint <= 0x1d51c) + return true; + if (codePoint >= 0x1d51e && codePoint <= 0x1d539) + return true; + if (codePoint >= 0x1d53b && codePoint <= 0x1d53e) + return true; + if (codePoint >= 0x1d540 && codePoint <= 0x1d544) + return true; + if (codePoint >= 0x1d546 && codePoint <= 0x1d546) + return true; + if (codePoint >= 0x1d54a && codePoint <= 0x1d550) + return true; + if (codePoint >= 0x1d552 && codePoint <= 0x1d6a5) + return true; + if (codePoint >= 0x1d6a8 && codePoint <= 0x1d6c0) + return true; + if (codePoint >= 0x1d6c2 && codePoint <= 0x1d6da) + return true; + if (codePoint >= 0x1d6dc && codePoint <= 0x1d6fa) + return true; + if (codePoint >= 0x1d6fc && codePoint <= 0x1d714) + return true; + if (codePoint >= 0x1d716 && codePoint <= 0x1d734) + return true; + if (codePoint >= 0x1d736 && codePoint <= 0x1d74e) + return true; + if (codePoint >= 0x1d750 && codePoint <= 0x1d76e) + return true; + if (codePoint >= 0x1d770 && codePoint <= 0x1d788) + return true; + if (codePoint >= 0x1d78a && codePoint <= 0x1d7a8) + return true; + if (codePoint >= 0x1d7aa && codePoint <= 0x1d7c2) + return true; + if (codePoint >= 0x1d7c4 && codePoint <= 0x1d7cb) + return true; + if (codePoint >= 0x1e800 && codePoint <= 0x1e8c4) + return true; + if (codePoint >= 0x1e900 && codePoint <= 0x1e943) + return true; + if (codePoint >= 0x1ee00 && codePoint <= 0x1ee03) + return true; + if (codePoint >= 0x1ee05 && codePoint <= 0x1ee1f) + return true; + if (codePoint >= 0x1ee21 && codePoint <= 0x1ee22) + return true; + if (codePoint >= 0x1ee24 && codePoint <= 0x1ee24) + return true; + if (codePoint >= 0x1ee27 && codePoint <= 0x1ee27) + return true; + if (codePoint >= 0x1ee29 && codePoint <= 0x1ee32) + return true; + if (codePoint >= 0x1ee34 && codePoint <= 0x1ee37) + return true; + if (codePoint >= 0x1ee39 && codePoint <= 0x1ee39) + return true; + if (codePoint >= 0x1ee3b && codePoint <= 0x1ee3b) + return true; + if (codePoint >= 0x1ee42 && codePoint <= 0x1ee42) + return true; + if (codePoint >= 0x1ee47 && codePoint <= 0x1ee47) + return true; + if (codePoint >= 0x1ee49 && codePoint <= 0x1ee49) + return true; + if (codePoint >= 0x1ee4b && codePoint <= 0x1ee4b) + return true; + if (codePoint >= 0x1ee4d && codePoint <= 0x1ee4f) + return true; + if (codePoint >= 0x1ee51 && codePoint <= 0x1ee52) + return true; + if (codePoint >= 0x1ee54 && codePoint <= 0x1ee54) + return true; + if (codePoint >= 0x1ee57 && codePoint <= 0x1ee57) + return true; + if (codePoint >= 0x1ee59 && codePoint <= 0x1ee59) + return true; + if (codePoint >= 0x1ee5b && codePoint <= 0x1ee5b) + return true; + if (codePoint >= 0x1ee5d && codePoint <= 0x1ee5d) + return true; + if (codePoint >= 0x1ee5f && codePoint <= 0x1ee5f) + return true; + if (codePoint >= 0x1ee61 && codePoint <= 0x1ee62) + return true; + if (codePoint >= 0x1ee64 && codePoint <= 0x1ee64) + return true; + if (codePoint >= 0x1ee67 && codePoint <= 0x1ee6a) + return true; + if (codePoint >= 0x1ee6c && codePoint <= 0x1ee72) + return true; + if (codePoint >= 0x1ee74 && codePoint <= 0x1ee77) + return true; + if (codePoint >= 0x1ee79 && codePoint <= 0x1ee7c) + return true; + if (codePoint >= 0x1ee7e && codePoint <= 0x1ee7e) + return true; + if (codePoint >= 0x1ee80 && codePoint <= 0x1ee89) + return true; + if (codePoint >= 0x1ee8b && codePoint <= 0x1ee9b) + return true; + if (codePoint >= 0x1eea1 && codePoint <= 0x1eea3) + return true; + if (codePoint >= 0x1eea5 && codePoint <= 0x1eea9) + return true; + if (codePoint >= 0x1eeab && codePoint <= 0x1eebb) + return true; + if (codePoint >= 0x20000 && codePoint <= 0x2a6d6) + return true; + if (codePoint >= 0x2a700 && codePoint <= 0x2b734) + return true; + if (codePoint >= 0x2b740 && codePoint <= 0x2b81d) + return true; + if (codePoint >= 0x2b820 && codePoint <= 0x2cea1) + return true; + if (codePoint >= 0x2f800 && codePoint <= 0x2fa1d) + return true; + return false; +} + +bool +js::unicode::IsIdentifierPartNonBMP(uint32_t codePoint) +{ + if (codePoint >= 0x10000 && codePoint <= 0x1000b) + return true; + if (codePoint >= 0x1000d && codePoint <= 0x10026) + return true; + if (codePoint >= 0x10028 && codePoint <= 0x1003a) + return true; + if (codePoint >= 0x1003c && codePoint <= 0x1003d) + return true; + if (codePoint >= 0x1003f && codePoint <= 0x1004d) + return true; + if (codePoint >= 0x10050 && codePoint <= 0x1005d) + return true; + if (codePoint >= 0x10080 && codePoint <= 0x100fa) + return true; + if (codePoint >= 0x10140 && codePoint <= 0x10174) + return true; + if (codePoint >= 0x101fd && codePoint <= 0x101fd) + return true; + if (codePoint >= 0x10280 && codePoint <= 0x1029c) + return true; + if (codePoint >= 0x102a0 && codePoint <= 0x102d0) + return true; + if (codePoint >= 0x102e0 && codePoint <= 0x102e0) + return true; + if (codePoint >= 0x10300 && codePoint <= 0x1031f) + return true; + if (codePoint >= 0x10330 && codePoint <= 0x1034a) + return true; + if (codePoint >= 0x10350 && codePoint <= 0x1037a) + return true; + if (codePoint >= 0x10380 && codePoint <= 0x1039d) + return true; + if (codePoint >= 0x103a0 && codePoint <= 0x103c3) + return true; + if (codePoint >= 0x103c8 && codePoint <= 0x103cf) + return true; + if (codePoint >= 0x103d1 && codePoint <= 0x103d5) + return true; + if (codePoint >= 0x10400 && codePoint <= 0x1049d) + return true; + if (codePoint >= 0x104a0 && codePoint <= 0x104a9) + return true; + if (codePoint >= 0x104b0 && codePoint <= 0x104d3) + return true; + if (codePoint >= 0x104d8 && codePoint <= 0x104fb) + return true; + if (codePoint >= 0x10500 && codePoint <= 0x10527) + return true; + if (codePoint >= 0x10530 && codePoint <= 0x10563) + return true; + if (codePoint >= 0x10600 && codePoint <= 0x10736) + return true; + if (codePoint >= 0x10740 && codePoint <= 0x10755) + return true; + if (codePoint >= 0x10760 && codePoint <= 0x10767) + return true; + if (codePoint >= 0x10800 && codePoint <= 0x10805) + return true; + if (codePoint >= 0x10808 && codePoint <= 0x10808) + return true; + if (codePoint >= 0x1080a && codePoint <= 0x10835) + return true; + if (codePoint >= 0x10837 && codePoint <= 0x10838) + return true; + if (codePoint >= 0x1083c && codePoint <= 0x1083c) + return true; + if (codePoint >= 0x1083f && codePoint <= 0x10855) + return true; + if (codePoint >= 0x10860 && codePoint <= 0x10876) + return true; + if (codePoint >= 0x10880 && codePoint <= 0x1089e) + return true; + if (codePoint >= 0x108e0 && codePoint <= 0x108f2) + return true; + if (codePoint >= 0x108f4 && codePoint <= 0x108f5) + return true; + if (codePoint >= 0x10900 && codePoint <= 0x10915) + return true; + if (codePoint >= 0x10920 && codePoint <= 0x10939) + return true; + if (codePoint >= 0x10980 && codePoint <= 0x109b7) + return true; + if (codePoint >= 0x109be && codePoint <= 0x109bf) + return true; + if (codePoint >= 0x10a00 && codePoint <= 0x10a03) + return true; + if (codePoint >= 0x10a05 && codePoint <= 0x10a06) + return true; + if (codePoint >= 0x10a0c && codePoint <= 0x10a13) + return true; + if (codePoint >= 0x10a15 && codePoint <= 0x10a17) + return true; + if (codePoint >= 0x10a19 && codePoint <= 0x10a33) + return true; + if (codePoint >= 0x10a38 && codePoint <= 0x10a3a) + return true; + if (codePoint >= 0x10a3f && codePoint <= 0x10a3f) + return true; + if (codePoint >= 0x10a60 && codePoint <= 0x10a7c) + return true; + if (codePoint >= 0x10a80 && codePoint <= 0x10a9c) + return true; + if (codePoint >= 0x10ac0 && codePoint <= 0x10ac7) + return true; + if (codePoint >= 0x10ac9 && codePoint <= 0x10ae6) + return true; + if (codePoint >= 0x10b00 && codePoint <= 0x10b35) + return true; + if (codePoint >= 0x10b40 && codePoint <= 0x10b55) + return true; + if (codePoint >= 0x10b60 && codePoint <= 0x10b72) + return true; + if (codePoint >= 0x10b80 && codePoint <= 0x10b91) + return true; + if (codePoint >= 0x10c00 && codePoint <= 0x10c48) + return true; + if (codePoint >= 0x10c80 && codePoint <= 0x10cb2) + return true; + if (codePoint >= 0x10cc0 && codePoint <= 0x10cf2) + return true; + if (codePoint >= 0x11000 && codePoint <= 0x11046) + return true; + if (codePoint >= 0x11066 && codePoint <= 0x1106f) + return true; + if (codePoint >= 0x1107f && codePoint <= 0x110ba) + return true; + if (codePoint >= 0x110d0 && codePoint <= 0x110e8) + return true; + if (codePoint >= 0x110f0 && codePoint <= 0x110f9) + return true; + if (codePoint >= 0x11100 && codePoint <= 0x11134) + return true; + if (codePoint >= 0x11136 && codePoint <= 0x1113f) + return true; + if (codePoint >= 0x11150 && codePoint <= 0x11173) + return true; + if (codePoint >= 0x11176 && codePoint <= 0x11176) + return true; + if (codePoint >= 0x11180 && codePoint <= 0x111c4) + return true; + if (codePoint >= 0x111ca && codePoint <= 0x111cc) + return true; + if (codePoint >= 0x111d0 && codePoint <= 0x111da) + return true; + if (codePoint >= 0x111dc && codePoint <= 0x111dc) + return true; + if (codePoint >= 0x11200 && codePoint <= 0x11211) + return true; + if (codePoint >= 0x11213 && codePoint <= 0x11237) + return true; + if (codePoint >= 0x1123e && codePoint <= 0x1123e) + return true; + if (codePoint >= 0x11280 && codePoint <= 0x11286) + return true; + if (codePoint >= 0x11288 && codePoint <= 0x11288) + return true; + if (codePoint >= 0x1128a && codePoint <= 0x1128d) + return true; + if (codePoint >= 0x1128f && codePoint <= 0x1129d) + return true; + if (codePoint >= 0x1129f && codePoint <= 0x112a8) + return true; + if (codePoint >= 0x112b0 && codePoint <= 0x112ea) + return true; + if (codePoint >= 0x112f0 && codePoint <= 0x112f9) + return true; + if (codePoint >= 0x11300 && codePoint <= 0x11303) + return true; + if (codePoint >= 0x11305 && codePoint <= 0x1130c) + return true; + if (codePoint >= 0x1130f && codePoint <= 0x11310) + return true; + if (codePoint >= 0x11313 && codePoint <= 0x11328) + return true; + if (codePoint >= 0x1132a && codePoint <= 0x11330) + return true; + if (codePoint >= 0x11332 && codePoint <= 0x11333) + return true; + if (codePoint >= 0x11335 && codePoint <= 0x11339) + return true; + if (codePoint >= 0x1133c && codePoint <= 0x11344) + return true; + if (codePoint >= 0x11347 && codePoint <= 0x11348) + return true; + if (codePoint >= 0x1134b && codePoint <= 0x1134d) + return true; + if (codePoint >= 0x11350 && codePoint <= 0x11350) + return true; + if (codePoint >= 0x11357 && codePoint <= 0x11357) + return true; + if (codePoint >= 0x1135d && codePoint <= 0x11363) + return true; + if (codePoint >= 0x11366 && codePoint <= 0x1136c) + return true; + if (codePoint >= 0x11370 && codePoint <= 0x11374) + return true; + if (codePoint >= 0x11400 && codePoint <= 0x1144a) + return true; + if (codePoint >= 0x11450 && codePoint <= 0x11459) + return true; + if (codePoint >= 0x11480 && codePoint <= 0x114c5) + return true; + if (codePoint >= 0x114c7 && codePoint <= 0x114c7) + return true; + if (codePoint >= 0x114d0 && codePoint <= 0x114d9) + return true; + if (codePoint >= 0x11580 && codePoint <= 0x115b5) + return true; + if (codePoint >= 0x115b8 && codePoint <= 0x115c0) + return true; + if (codePoint >= 0x115d8 && codePoint <= 0x115dd) + return true; + if (codePoint >= 0x11600 && codePoint <= 0x11640) + return true; + if (codePoint >= 0x11644 && codePoint <= 0x11644) + return true; + if (codePoint >= 0x11650 && codePoint <= 0x11659) + return true; + if (codePoint >= 0x11680 && codePoint <= 0x116b7) + return true; + if (codePoint >= 0x116c0 && codePoint <= 0x116c9) + return true; + if (codePoint >= 0x11700 && codePoint <= 0x11719) + return true; + if (codePoint >= 0x1171d && codePoint <= 0x1172b) + return true; + if (codePoint >= 0x11730 && codePoint <= 0x11739) + return true; + if (codePoint >= 0x118a0 && codePoint <= 0x118e9) + return true; + if (codePoint >= 0x118ff && codePoint <= 0x118ff) + return true; + if (codePoint >= 0x11ac0 && codePoint <= 0x11af8) + return true; + if (codePoint >= 0x11c00 && codePoint <= 0x11c08) + return true; + if (codePoint >= 0x11c0a && codePoint <= 0x11c36) + return true; + if (codePoint >= 0x11c38 && codePoint <= 0x11c40) + return true; + if (codePoint >= 0x11c50 && codePoint <= 0x11c59) + return true; + if (codePoint >= 0x11c72 && codePoint <= 0x11c8f) + return true; + if (codePoint >= 0x11c92 && codePoint <= 0x11ca7) + return true; + if (codePoint >= 0x11ca9 && codePoint <= 0x11cb6) + return true; + if (codePoint >= 0x12000 && codePoint <= 0x12399) + return true; + if (codePoint >= 0x12400 && codePoint <= 0x1246e) + return true; + if (codePoint >= 0x12480 && codePoint <= 0x12543) + return true; + if (codePoint >= 0x13000 && codePoint <= 0x1342e) + return true; + if (codePoint >= 0x14400 && codePoint <= 0x14646) + return true; + if (codePoint >= 0x16800 && codePoint <= 0x16a38) + return true; + if (codePoint >= 0x16a40 && codePoint <= 0x16a5e) + return true; + if (codePoint >= 0x16a60 && codePoint <= 0x16a69) + return true; + if (codePoint >= 0x16ad0 && codePoint <= 0x16aed) + return true; + if (codePoint >= 0x16af0 && codePoint <= 0x16af4) + return true; + if (codePoint >= 0x16b00 && codePoint <= 0x16b36) + return true; + if (codePoint >= 0x16b40 && codePoint <= 0x16b43) + return true; + if (codePoint >= 0x16b50 && codePoint <= 0x16b59) + return true; + if (codePoint >= 0x16b63 && codePoint <= 0x16b77) + return true; + if (codePoint >= 0x16b7d && codePoint <= 0x16b8f) + return true; + if (codePoint >= 0x16f00 && codePoint <= 0x16f44) + return true; + if (codePoint >= 0x16f50 && codePoint <= 0x16f7e) + return true; + if (codePoint >= 0x16f8f && codePoint <= 0x16f9f) + return true; + if (codePoint >= 0x16fe0 && codePoint <= 0x16fe0) + return true; + if (codePoint >= 0x17000 && codePoint <= 0x187ec) + return true; + if (codePoint >= 0x18800 && codePoint <= 0x18af2) + return true; + if (codePoint >= 0x1b000 && codePoint <= 0x1b001) + return true; + if (codePoint >= 0x1bc00 && codePoint <= 0x1bc6a) + return true; + if (codePoint >= 0x1bc70 && codePoint <= 0x1bc7c) + return true; + if (codePoint >= 0x1bc80 && codePoint <= 0x1bc88) + return true; + if (codePoint >= 0x1bc90 && codePoint <= 0x1bc99) + return true; + if (codePoint >= 0x1bc9d && codePoint <= 0x1bc9e) + return true; + if (codePoint >= 0x1d165 && codePoint <= 0x1d169) + return true; + if (codePoint >= 0x1d16d && codePoint <= 0x1d172) + return true; + if (codePoint >= 0x1d17b && codePoint <= 0x1d182) + return true; + if (codePoint >= 0x1d185 && codePoint <= 0x1d18b) + return true; + if (codePoint >= 0x1d1aa && codePoint <= 0x1d1ad) + return true; + if (codePoint >= 0x1d242 && codePoint <= 0x1d244) + return true; + if (codePoint >= 0x1d400 && codePoint <= 0x1d454) + return true; + if (codePoint >= 0x1d456 && codePoint <= 0x1d49c) + return true; + if (codePoint >= 0x1d49e && codePoint <= 0x1d49f) + return true; + if (codePoint >= 0x1d4a2 && codePoint <= 0x1d4a2) + return true; + if (codePoint >= 0x1d4a5 && codePoint <= 0x1d4a6) + return true; + if (codePoint >= 0x1d4a9 && codePoint <= 0x1d4ac) + return true; + if (codePoint >= 0x1d4ae && codePoint <= 0x1d4b9) + return true; + if (codePoint >= 0x1d4bb && codePoint <= 0x1d4bb) + return true; + if (codePoint >= 0x1d4bd && codePoint <= 0x1d4c3) + return true; + if (codePoint >= 0x1d4c5 && codePoint <= 0x1d505) + return true; + if (codePoint >= 0x1d507 && codePoint <= 0x1d50a) + return true; + if (codePoint >= 0x1d50d && codePoint <= 0x1d514) + return true; + if (codePoint >= 0x1d516 && codePoint <= 0x1d51c) + return true; + if (codePoint >= 0x1d51e && codePoint <= 0x1d539) + return true; + if (codePoint >= 0x1d53b && codePoint <= 0x1d53e) + return true; + if (codePoint >= 0x1d540 && codePoint <= 0x1d544) + return true; + if (codePoint >= 0x1d546 && codePoint <= 0x1d546) + return true; + if (codePoint >= 0x1d54a && codePoint <= 0x1d550) + return true; + if (codePoint >= 0x1d552 && codePoint <= 0x1d6a5) + return true; + if (codePoint >= 0x1d6a8 && codePoint <= 0x1d6c0) + return true; + if (codePoint >= 0x1d6c2 && codePoint <= 0x1d6da) + return true; + if (codePoint >= 0x1d6dc && codePoint <= 0x1d6fa) + return true; + if (codePoint >= 0x1d6fc && codePoint <= 0x1d714) + return true; + if (codePoint >= 0x1d716 && codePoint <= 0x1d734) + return true; + if (codePoint >= 0x1d736 && codePoint <= 0x1d74e) + return true; + if (codePoint >= 0x1d750 && codePoint <= 0x1d76e) + return true; + if (codePoint >= 0x1d770 && codePoint <= 0x1d788) + return true; + if (codePoint >= 0x1d78a && codePoint <= 0x1d7a8) + return true; + if (codePoint >= 0x1d7aa && codePoint <= 0x1d7c2) + return true; + if (codePoint >= 0x1d7c4 && codePoint <= 0x1d7cb) + return true; + if (codePoint >= 0x1d7ce && codePoint <= 0x1d7ff) + return true; + if (codePoint >= 0x1da00 && codePoint <= 0x1da36) + return true; + if (codePoint >= 0x1da3b && codePoint <= 0x1da6c) + return true; + if (codePoint >= 0x1da75 && codePoint <= 0x1da75) + return true; + if (codePoint >= 0x1da84 && codePoint <= 0x1da84) + return true; + if (codePoint >= 0x1da9b && codePoint <= 0x1da9f) + return true; + if (codePoint >= 0x1daa1 && codePoint <= 0x1daaf) + return true; + if (codePoint >= 0x1e000 && codePoint <= 0x1e006) + return true; + if (codePoint >= 0x1e008 && codePoint <= 0x1e018) + return true; + if (codePoint >= 0x1e01b && codePoint <= 0x1e021) + return true; + if (codePoint >= 0x1e023 && codePoint <= 0x1e024) + return true; + if (codePoint >= 0x1e026 && codePoint <= 0x1e02a) + return true; + if (codePoint >= 0x1e800 && codePoint <= 0x1e8c4) + return true; + if (codePoint >= 0x1e8d0 && codePoint <= 0x1e8d6) + return true; + if (codePoint >= 0x1e900 && codePoint <= 0x1e94a) + return true; + if (codePoint >= 0x1e950 && codePoint <= 0x1e959) + return true; + if (codePoint >= 0x1ee00 && codePoint <= 0x1ee03) + return true; + if (codePoint >= 0x1ee05 && codePoint <= 0x1ee1f) + return true; + if (codePoint >= 0x1ee21 && codePoint <= 0x1ee22) + return true; + if (codePoint >= 0x1ee24 && codePoint <= 0x1ee24) + return true; + if (codePoint >= 0x1ee27 && codePoint <= 0x1ee27) + return true; + if (codePoint >= 0x1ee29 && codePoint <= 0x1ee32) + return true; + if (codePoint >= 0x1ee34 && codePoint <= 0x1ee37) + return true; + if (codePoint >= 0x1ee39 && codePoint <= 0x1ee39) + return true; + if (codePoint >= 0x1ee3b && codePoint <= 0x1ee3b) + return true; + if (codePoint >= 0x1ee42 && codePoint <= 0x1ee42) + return true; + if (codePoint >= 0x1ee47 && codePoint <= 0x1ee47) + return true; + if (codePoint >= 0x1ee49 && codePoint <= 0x1ee49) + return true; + if (codePoint >= 0x1ee4b && codePoint <= 0x1ee4b) + return true; + if (codePoint >= 0x1ee4d && codePoint <= 0x1ee4f) + return true; + if (codePoint >= 0x1ee51 && codePoint <= 0x1ee52) + return true; + if (codePoint >= 0x1ee54 && codePoint <= 0x1ee54) + return true; + if (codePoint >= 0x1ee57 && codePoint <= 0x1ee57) + return true; + if (codePoint >= 0x1ee59 && codePoint <= 0x1ee59) + return true; + if (codePoint >= 0x1ee5b && codePoint <= 0x1ee5b) + return true; + if (codePoint >= 0x1ee5d && codePoint <= 0x1ee5d) + return true; + if (codePoint >= 0x1ee5f && codePoint <= 0x1ee5f) + return true; + if (codePoint >= 0x1ee61 && codePoint <= 0x1ee62) + return true; + if (codePoint >= 0x1ee64 && codePoint <= 0x1ee64) + return true; + if (codePoint >= 0x1ee67 && codePoint <= 0x1ee6a) + return true; + if (codePoint >= 0x1ee6c && codePoint <= 0x1ee72) + return true; + if (codePoint >= 0x1ee74 && codePoint <= 0x1ee77) + return true; + if (codePoint >= 0x1ee79 && codePoint <= 0x1ee7c) + return true; + if (codePoint >= 0x1ee7e && codePoint <= 0x1ee7e) + return true; + if (codePoint >= 0x1ee80 && codePoint <= 0x1ee89) + return true; + if (codePoint >= 0x1ee8b && codePoint <= 0x1ee9b) + return true; + if (codePoint >= 0x1eea1 && codePoint <= 0x1eea3) + return true; + if (codePoint >= 0x1eea5 && codePoint <= 0x1eea9) + return true; + if (codePoint >= 0x1eeab && codePoint <= 0x1eebb) + return true; + if (codePoint >= 0x20000 && codePoint <= 0x2a6d6) + return true; + if (codePoint >= 0x2a700 && codePoint <= 0x2b734) + return true; + if (codePoint >= 0x2b740 && codePoint <= 0x2b81d) + return true; + if (codePoint >= 0x2b820 && codePoint <= 0x2cea1) + return true; + if (codePoint >= 0x2f800 && codePoint <= 0x2fa1d) + return true; + if (codePoint >= 0xe0100 && codePoint <= 0xe01ef) + return true; + return false; +} diff --git a/js/src/vm/Unicode.h b/js/src/vm/Unicode.h index 8b538d06de..bdac848fbd 100644 --- a/js/src/vm/Unicode.h +++ b/js/src/vm/Unicode.h @@ -143,11 +143,15 @@ IsIdentifierStart(char16_t ch) return CharInfo(ch).isUnicodeIDStart(); } +bool +IsIdentifierStartNonBMP(uint32_t codePoint); + inline bool IsIdentifierStart(uint32_t codePoint) { - // TODO: Supplemental code points not yet supported (bug 1197230). - return codePoint <= UTF16Max && IsIdentifierStart(char16_t(codePoint)); + if (MOZ_UNLIKELY(codePoint > UTF16Max)) + return IsIdentifierStartNonBMP(codePoint); + return IsIdentifierStart(char16_t(codePoint)); } inline bool @@ -170,11 +174,16 @@ IsIdentifierPart(char16_t ch) return CharInfo(ch).isUnicodeIDContinue(); } + +bool +IsIdentifierPartNonBMP(uint32_t codePoint); + inline bool IsIdentifierPart(uint32_t codePoint) { - // TODO: Supplemental code points not yet supported (bug 1197230). - return codePoint <= UTF16Max && IsIdentifierPart(char16_t(codePoint)); + if (MOZ_UNLIKELY(codePoint > UTF16Max)) + return IsIdentifierPartNonBMP(codePoint); + return IsIdentifierPart(char16_t(codePoint)); } inline bool @@ -183,6 +192,17 @@ IsUnicodeIDStart(char16_t ch) return CharInfo(ch).isUnicodeIDStart(); } +bool +IsUnicodeIDStartNonBMP(uint32_t codePoint); + +inline bool +IsUnicodeIDStart(uint32_t codePoint) +{ + if (MOZ_UNLIKELY(codePoint > UTF16Max)) + return IsIdentifierStartNonBMP(codePoint); + return IsUnicodeIDStart(char16_t(codePoint)); +} + inline bool IsSpace(char16_t ch) { diff --git a/js/src/vm/UnicodeNonBMP.h b/js/src/vm/UnicodeNonBMP.h index 6cc64cde4a..f99e227cdd 100644 --- a/js/src/vm/UnicodeNonBMP.h +++ b/js/src/vm/UnicodeNonBMP.h @@ -10,6 +10,16 @@ #ifndef vm_UnicodeNonBMP_h #define vm_UnicodeNonBMP_h +// |macro| receives the following arguments +// macro(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF) +// FROM: code point where the range starts +// TO: code point where the range ends +// LEAD: common lead surrogate of FROM and TO +// TRAIL_FROM: trail surrogate of FROM +// TRAIL_FROM: trail surrogate of TO +// DIFF: the difference between the code point in the range and +// converted code point + #define FOR_EACH_NON_BMP_LOWERCASE(macro) \ macro(0x10400, 0x10427, 0xd801, 0xdc00, 0xdc27, 40) \ macro(0x104b0, 0x104d3, 0xd801, 0xdcb0, 0xdcd3, 40) \ diff --git a/js/src/vm/make_unicode.py b/js/src/vm/make_unicode.py index 73c090ac91..83f0d004b3 100755 --- a/js/src/vm/make_unicode.py +++ b/js/src/vm/make_unicode.py @@ -155,37 +155,65 @@ def utf16_encode(code): return lead, trail def make_non_bmp_convert_macro(out_file, name, convert_map): + # Find continuous range in convert_map. convert_list = [] entry = None for code in sorted(convert_map.keys()): + lead, trail = utf16_encode(code) converted = convert_map[code] diff = converted - code - if entry and code == entry['code'] + entry['length'] and diff == entry['diff']: + if (entry and code == entry['code'] + entry['length'] and + diff == entry['diff'] and lead == entry['lead']): + entry['length'] += 1 continue - entry = { 'code': code, 'diff': diff, 'length': 1 } + entry = { + 'code': code, + 'diff': diff, + 'length': 1, + 'lead': lead, + 'trail': trail, + } convert_list.append(entry) + # Generate macro call for each range. lines = [] for entry in convert_list: from_code = entry['code'] to_code = entry['code'] + entry['length'] - 1 diff = entry['diff'] - from_lead, from_trail = utf16_encode(from_code) - to_lead, to_trail = utf16_encode(to_code) - - assert from_lead == to_lead + lead = entry['lead'] + from_trail = entry['trail'] + to_trail = entry['trail'] + entry['length'] - 1 lines.append(' macro(0x{:x}, 0x{:x}, 0x{:x}, 0x{:x}, 0x{:x}, {:d})'.format( - from_code, to_code, from_lead, from_trail, to_trail, diff)) + from_code, to_code, lead, from_trail, to_trail, diff)) out_file.write('#define FOR_EACH_NON_BMP_{}(macro) \\\n'.format(name)) out_file.write(' \\\n'.join(lines)) out_file.write('\n') +def for_each_non_bmp_group(group_set): + # Find continuous range in group_set. + group_list = [] + entry = None + for code in sorted(group_set.keys()): + if entry and code == entry['code'] + entry['length']: + entry['length'] += 1 + continue + + entry = { + 'code': code, + 'length': 1 + } + group_list.append(entry) + + for entry in group_list: + yield (entry['code'], entry['code'] + entry['length'] - 1) + def process_derived_core_properties(derived_core_properties): id_start = set() id_continue = set() @@ -214,6 +242,9 @@ def process_unicode_data(unicode_data, derived_core_properties): non_bmp_lower_map = {} non_bmp_upper_map = {} + non_bmp_id_start_set = {} + non_bmp_id_cont_set = {} + non_bmp_space_set = {} (id_start, id_continue) = process_derived_core_properties(derived_core_properties) @@ -246,6 +277,13 @@ def process_unicode_data(unicode_data, derived_core_properties): non_bmp_lower_map[code] = lower if code != upper: non_bmp_upper_map[code] = upper + if category == 'Zs': + non_bmp_space_set[code] = 1 + test_space_table.append(code) + if code in id_start: + non_bmp_id_start_set[code] = 1 + if code in id_continue: + non_bmp_id_cont_set[code] = 1 continue # we combine whitespace and lineterminators because in pratice we don't need them separated @@ -315,6 +353,8 @@ def process_unicode_data(unicode_data, derived_core_properties): table, index, same_upper_table, same_upper_index, non_bmp_lower_map, non_bmp_upper_map, + non_bmp_space_set, + non_bmp_id_start_set, non_bmp_id_cont_set, test_table, test_space_table, ) @@ -412,6 +452,16 @@ def make_non_bmp_file(version, #ifndef vm_UnicodeNonBMP_h #define vm_UnicodeNonBMP_h +// |macro| receives the following arguments +// macro(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF) +// FROM: code point where the range starts +// TO: code point where the range ends +// LEAD: common lead surrogate of FROM and TO +// TRAIL_FROM: trail surrogate of FROM +// TRAIL_FROM: trail surrogate of TO +// DIFF: the difference between the code point in the range and +// converted code point + """) make_non_bmp_convert_macro(non_bmp_file, 'LOWERCASE', non_bmp_lower_map) @@ -525,7 +575,9 @@ if (typeof reportCompare === "function") def make_unicode_file(version, table, index, same_upper_table, same_upper_index, - folding_table, folding_index): + folding_table, folding_index, + non_bmp_space_set, + non_bmp_id_start_set, non_bmp_id_cont_set): index1, index2, shift = splitbins(index) # Don't forget to update CharInfo in Unicode.h if you need to change this @@ -682,6 +734,43 @@ def make_unicode_file(version, dump(folding_index2, 'folding_index2', data_file) data_file.write('\n') + # If the following assert fails, it means space character is added to + # non-BMP area. In that case the following code should be uncommented + # and the corresponding code should be added to frontend. + assert len(non_bmp_space_set.keys()) == 0 + + data_file.write("""\ +bool +js::unicode::IsIdentifierStartNonBMP(uint32_t codePoint) +{ +""") + + for (from_code, to_code) in for_each_non_bmp_group(non_bmp_id_start_set): + data_file.write("""\ + if (codePoint >= 0x{:x} && codePoint <= 0x{:x}) + return true; +""".format(from_code, to_code)) + + data_file.write("""\ + return false; +} + +bool +js::unicode::IsIdentifierPartNonBMP(uint32_t codePoint) +{ +""") + + for (from_code, to_code) in for_each_non_bmp_group(non_bmp_id_cont_set): + data_file.write("""\ + if (codePoint >= 0x{:x} && codePoint <= 0x{:x}) + return true; +""".format(from_code, to_code)) + + data_file.write("""\ + return false; +} +""") + def getsize(data): """ return smallest possible integer size for the given array """ maxdata = max(data) @@ -1000,6 +1089,8 @@ def update_unicode(args): table, index, same_upper_table, same_upper_index, non_bmp_lower_map, non_bmp_upper_map, + non_bmp_space_set, + non_bmp_id_start_set, non_bmp_id_cont_set, test_table, test_space_table ) = process_unicode_data(unicode_data, derived_core_properties) ( @@ -1012,7 +1103,9 @@ def update_unicode(args): make_unicode_file(unicode_version, table, index, same_upper_table, same_upper_index, - folding_table, folding_index) + folding_table, folding_index, + non_bmp_space_set, + non_bmp_id_start_set, non_bmp_id_cont_set) make_non_bmp_file(unicode_version, non_bmp_lower_map, non_bmp_upper_map, non_bmp_folding_map, non_bmp_rev_folding_map) |