summaryrefslogtreecommitdiff
path: root/js/src
diff options
context:
space:
mode:
authorMoonchild <moonchild@palemoon.org>2021-02-11 13:55:24 +0000
committerMoonchild <moonchild@palemoon.org>2021-02-11 13:55:24 +0000
commit9ca880e20012f560af27ae624aaf37a3b19eba99 (patch)
treef4105248670f38fc13557dd4c69e6431a9ba8007 /js/src
parent2c72b8859a959629462a58b1385408e25bb89bad (diff)
downloaduxp-9ca880e20012f560af27ae624aaf37a3b19eba99.tar.gz
Issue #1738 - Part 2: Implement well-formed JSON stringify
This implements the ES2019 spec for JSON stringification, including lower-casing, properly escaping lone surrogates, etc.
Diffstat (limited to 'js/src')
-rw-r--r--js/src/json.cpp69
-rw-r--r--js/src/vm/Unicode.h13
2 files changed, 68 insertions, 14 deletions
diff --git a/js/src/json.cpp b/js/src/json.cpp
index 73e37e2370..e32994e908 100644
--- a/js/src/json.cpp
+++ b/js/src/json.cpp
@@ -66,26 +66,67 @@ InfallibleQuote(RangedPtr<const SrcCharT> srcBegin, RangedPtr<const SrcCharT> sr
/* Step 1. */
*dstPtr++ = '"';
+ // XXX: This is a rather ugly in-line definition. Move it somewhere better?
+ auto ToLowerHex = [](uint8_t u) {
+ MOZ_ASSERT(u <= 0xF);
+ return "0123456789abcdef"[u];
+ };
+
/* Step 2. */
while (srcBegin != srcEnd) {
- SrcCharT c = *srcBegin++;
- size_t escapeIndex = c % sizeof(escapeLookup);
- Latin1Char escaped = escapeLookup[escapeIndex];
- if (MOZ_LIKELY((escapeIndex != size_t(c)) || !escaped)) {
+ const SrcCharT c = *srcBegin++;
+
+ // Handle the Latin-1 cases.
+ if (MOZ_LIKELY(c < sizeof(escapeLookup))) {
+ Latin1Char escaped = escapeLookup[c];
+
+ // Directly copy non-escaped code points.
+ if (escaped == 0) {
+ *dstPtr++ = c;
+ continue;
+ }
+
+ // Escape the rest, elaborating Unicode escapes when needed.
+ *dstPtr++ = '\\';
+ *dstPtr++ = escaped;
+ if (escaped == 'u') {
+ *dstPtr++ = '0';
+ *dstPtr++ = '0';
+
+ uint8_t x = c >> 4;
+ MOZ_ASSERT(x < 10);
+ *dstPtr++ = '0' + x;
+
+ *dstPtr++ = ToLowerHex(c & 0xF);
+ }
+
+ continue;
+ }
+
+ // Non-ASCII non-surrogates are directly copied.
+ if (!unicode::IsSurrogate(c)) {
*dstPtr++ = c;
continue;
}
- *dstPtr++ = '\\';
- *dstPtr++ = escaped;
- if (escaped == 'u') {
- MOZ_ASSERT(c < ' ');
- MOZ_ASSERT((c >> 4) < 10);
- uint8_t x = c >> 4, y = c % 16;
- *dstPtr++ = '0';
- *dstPtr++ = '0';
- *dstPtr++ = '0' + x;
- *dstPtr++ = y < 10 ? '0' + y : 'a' + (y - 10);
+
+ // So too for complete surrogate pairs.
+ if (MOZ_LIKELY(unicode::IsLeadSurrogate(c) &&
+ srcBegin < srcEnd &&
+ unicode::IsTrailSurrogate(*srcBegin)))
+ {
+ *dstPtr++ = c;
+ *dstPtr++ = *srcBegin++;
+ continue;
}
+
+ // But lone surrogates are Unicode-escaped.
+ char32_t as32 = char32_t(c);
+ *dstPtr++ = '\\';
+ *dstPtr++ = 'u';
+ *dstPtr++ = ToLowerHex(as32 >> 12);
+ *dstPtr++ = ToLowerHex((as32 >> 8) & 0xF);
+ *dstPtr++ = ToLowerHex((as32 >> 4) & 0xF);
+ *dstPtr++ = ToLowerHex(as32 & 0xF);
}
/* Steps 3-4. */
diff --git a/js/src/vm/Unicode.h b/js/src/vm/Unicode.h
index e470f43418..b1e3e17c61 100644
--- a/js/src/vm/Unicode.h
+++ b/js/src/vm/Unicode.h
@@ -466,6 +466,19 @@ IsTrailSurrogate(uint32_t codePoint)
return codePoint >= TrailSurrogateMin && codePoint <= TrailSurrogateMax;
}
+/**
+ * Returns true if the given value is a UTF-16 surrogate.
+ *
+ * This function is intended for use in contexts where 32-bit values may need
+ * to be tested to see if they reside in the surrogate range, so it doesn't
+ * just take char16_t.
+ */
+inline bool
+IsSurrogate(uint32_t codePoint)
+{
+ return LeadSurrogateMin <= codePoint && codePoint <= TrailSurrogateMax;
+}
+
inline char16_t
LeadSurrogate(uint32_t codePoint)
{