summaryrefslogtreecommitdiff
path: root/js/src/jsstr.cpp
diff options
context:
space:
mode:
authorMoonchild <moonchild@palemoon.org>2022-09-02 19:29:43 +0000
committerMoonchild <moonchild@palemoon.org>2022-09-02 19:29:43 +0000
commit74124f150b7167b69f0f4ae6657489c5db556ad3 (patch)
treed1954e5cc29052abbbab1bb49c20a7e882c56e50 /js/src/jsstr.cpp
parent4078c2dd1e3798a283594b84de68b3ded1e69898 (diff)
downloaduxp-74124f150b7167b69f0f4ae6657489c5db556ad3.tar.gz
Issue #1999 - Switch to the unorm2 API for String.normalize()
Resolves #1999
Diffstat (limited to 'js/src/jsstr.cpp')
-rw-r--r--js/src/jsstr.cpp115
1 files changed, 88 insertions, 27 deletions
diff --git a/js/src/jsstr.cpp b/js/src/jsstr.cpp
index 4167d78741..b9e10b61b9 100644
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -35,7 +35,7 @@
#include "jit/InlinableNatives.h"
#include "js/Conversions.h"
#include "js/UniquePtr.h"
-#include "unicode/unorm.h"
+#include "unicode/unorm2.h"
#include "vm/GlobalObject.h"
#include "vm/Interpreter.h"
#include "vm/Opcodes.h"
@@ -900,79 +900,140 @@ js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
return ToUpperCaseHelper(cx, args);
}
-/* ES6 20140210 draft 21.1.3.12. */
+/* ES2017 21.1.3.12. */
bool
js::str_normalize(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
- // Steps 1-3.
+ // Steps 1-2.
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
if (!str)
return false;
- // Step 4.
- UNormalizationMode form;
+ enum NormalizationForm {
+ NFC, NFD, NFKC, NFKD
+ };
+
+ NormalizationForm form;
if (!args.hasDefined(0)) {
- form = UNORM_NFC;
+ // Step 3.
+ form = NFC;
} else {
- // Steps 5-6.
+ // Step 4.
RootedLinearString formStr(cx, ArgToRootedString(cx, args, 0));
if (!formStr)
return false;
- // Step 7.
+ // Step 5.
if (EqualStrings(formStr, cx->names().NFC)) {
- form = UNORM_NFC;
+ form = NFC;
} else if (EqualStrings(formStr, cx->names().NFD)) {
- form = UNORM_NFD;
+ form = NFD;
} else if (EqualStrings(formStr, cx->names().NFKC)) {
- form = UNORM_NFKC;
+ form = NFKC;
} else if (EqualStrings(formStr, cx->names().NFKD)) {
- form = UNORM_NFKD;
+ form = NFKD;
} else {
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INVALID_NORMALIZE_FORM);
return false;
}
}
- // Step 8.
+ JSLinearString* linear = str->ensureLinear(cx);
+ if (!linear)
+ return false;
+
+ // Latin1 strings are already in Normalization Form C.
+ if (form == NFC && linear->hasLatin1Chars()) {
+ // Step 7.
+ args.rval().setString(str);
+ return true;
+ }
+
+ // Step 6.
AutoStableStringChars stableChars(cx);
- if (!str->ensureFlat(cx) || !stableChars.initTwoByte(cx, str))
+ if (!stableChars.initTwoByte(cx, linear))
+ return false;
+
+ mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();
+
+ // The unorm2_getXXXInstance() methods return a shared instance which must
+ // not be deleted.
+ UErrorCode status = U_ZERO_ERROR;
+ const UNormalizer2* normalizer;
+ if (form == NFC) {
+ normalizer = unorm2_getNFCInstance(&status);
+ } else if (form == NFD) {
+ normalizer = unorm2_getNFDInstance(&status);
+ } else if (form == NFKC) {
+ normalizer = unorm2_getNFKCInstance(&status);
+ } else {
+ MOZ_ASSERT(form == NFKD);
+ normalizer = unorm2_getNFKDInstance(&status);
+ }
+ if (U_FAILURE(status)) {
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INTERNAL_INTL_ERROR);
return false;
+ }
+
+ int32_t spanLength = unorm2_spanQuickCheckYes(normalizer,
+ Char16ToUChar(srcChars.begin().get()),
+ srcChars.length(), &status);
+ if (U_FAILURE(status)) {
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INTERNAL_INTL_ERROR);
+ return false;
+ }
+ MOZ_ASSERT(0 <= spanLength && size_t(spanLength) <= srcChars.length());
+
+ // Return if the input string is already normalized.
+ if (size_t(spanLength) == srcChars.length()) {
+ // Step 7.
+ args.rval().setString(str);
+ return true;
+ }
static const size_t INLINE_CAPACITY = 32;
- const UChar* srcChars = Char16ToUChar(stableChars.twoByteRange().begin().get());
- int32_t srcLen = AssertedCast<int32_t>(str->length());
Vector<char16_t, INLINE_CAPACITY> chars(cx);
- if (!chars.resize(INLINE_CAPACITY))
+ if (!chars.resize(Max(INLINE_CAPACITY, srcChars.length())))
return false;
- UErrorCode status = U_ZERO_ERROR;
- int32_t size = unorm_normalize(srcChars, srcLen, form, 0,
- Char16ToUChar(chars.begin()), INLINE_CAPACITY,
- &status);
+ // Copy the already normalized prefix.
+ if (spanLength > 0)
+ PodCopy(chars.begin(), srcChars.begin().get(), size_t(spanLength));
+
+ mozilla::RangedPtr<const char16_t> remainingStart = srcChars.begin() + spanLength;
+ size_t remainingLength = srcChars.length() - size_t(spanLength);
+
+ int32_t size = unorm2_normalizeSecondAndAppend(normalizer, Char16ToUChar(chars.begin()),
+ spanLength, chars.length(),
+ Char16ToUChar(remainingStart.get()),
+ remainingLength, &status);
if (status == U_BUFFER_OVERFLOW_ERROR) {
+ MOZ_ASSERT(size >= 0);
if (!chars.resize(size))
return false;
status = U_ZERO_ERROR;
#ifdef DEBUG
int32_t finalSize =
#endif
- unorm_normalize(srcChars, srcLen, form, 0,
- Char16ToUChar(chars.begin()), size,
- &status);
- MOZ_ASSERT(size == finalSize || U_FAILURE(status), "unorm_normalize behaved inconsistently");
+ unorm2_normalizeSecondAndAppend(normalizer, Char16ToUChar(chars.begin()), spanLength,
+ chars.length(), Char16ToUChar(remainingStart.get()),
+ remainingLength, &status);
+ MOZ_ASSERT_IF(!U_FAILURE(status), size == finalSize);
}
- if (U_FAILURE(status))
+ if (U_FAILURE(status)) {
+ JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INTERNAL_INTL_ERROR);
return false;
+ }
+ MOZ_ASSERT(size >= 0);
JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
if (!ns)
return false;
- // Step 9.
+ // Step 7.
args.rval().setString(ns);
return true;
}