From 93bd7fc61efac5acb6641d1b168fb7203250843f Mon Sep 17 00:00:00 2001 From: Martok Date: Tue, 20 Jun 2023 19:17:45 +0200 Subject: Issue #2259 - Add missing IsAscii* helper functions in mozilla/TextUtils.h --- mfbt/TextUtils.h | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 145 insertions(+), 4 deletions(-) diff --git a/mfbt/TextUtils.h b/mfbt/TextUtils.h index 84889b5ef4..66442a0d0b 100644 --- a/mfbt/TextUtils.h +++ b/mfbt/TextUtils.h @@ -35,6 +35,86 @@ public: } // namespace detail +// The overloads below are not templated in order to make +// implicit conversions to span work as expected for the Span +// overloads. + +/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */ +inline constexpr bool IsAscii(unsigned char aChar) { return aChar < 0x80; } + +/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */ +inline constexpr bool IsAscii(signed char aChar) { + return IsAscii(static_cast(aChar)); +} + +/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */ +inline constexpr bool IsAscii(char aChar) { + return IsAscii(static_cast(aChar)); +} + +/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */ +inline constexpr bool IsAscii(char16_t aChar) { return aChar < 0x80; } + +/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */ +inline constexpr bool IsAscii(char32_t aChar) { return aChar < 0x80; } + +/** + * Returns true iff every character in the null-terminated string pointed to by + * |aChar| is ASCII, i.e. in the range [0, 0x80). + */ +template +constexpr bool IsAsciiNullTerminated(const Char* aChar) { + while (Char c = *aChar++) { + if (!IsAscii(c)) { + return false; + } + } + return true; +} + +/** + * Returns true iff |aChar| matches Ascii Whitespace. + * + * This function is intended to match the Infra standard + * (https://infra.spec.whatwg.org/#ascii-whitespace) + */ +template +constexpr bool IsAsciiWhitespace(Char aChar) { + using UnsignedChar = typename detail::MakeUnsignedChar::Type; + auto uc = static_cast(aChar); + return uc == 0x9 || uc == 0xA || uc == 0xC || uc == 0xD || uc == 0x20; +} + +/** + * Returns true iff |aChar| matches [a-z]. + * + * This function is basically what you thought islower was, except its behavior + * doesn't depend on the user's current locale. + */ +template +constexpr bool +IsAsciiLowercaseAlpha(Char aChar) +{ + using UnsignedChar = typename detail::MakeUnsignedChar::Type; + auto uc = static_cast(aChar); + return 'a' <= uc && uc <= 'z'; +} + +/** + * Returns true iff |aChar| matches [A-Z]. + * + * This function is basically what you thought isupper was, except its behavior + * doesn't depend on the user's current locale. + */ +template +constexpr bool +IsAsciiUppercaseAlpha(Char aChar) +{ + using UnsignedChar = typename detail::MakeUnsignedChar::Type; + auto uc = static_cast(aChar); + return 'A' <= uc && uc <= 'Z'; +} + /** * Returns true iff |aChar| matches [a-zA-Z]. * @@ -44,12 +124,73 @@ public: template constexpr bool IsAsciiAlpha(Char aChar) +{ + return IsAsciiLowercaseAlpha(aChar) || IsAsciiUppercaseAlpha(aChar); +} + +/** + * Returns true iff |aChar| matches [0-9]. + * + * This function is basically what you thought isdigit was, except its behavior + * doesn't depend on the user's current locale. + */ +template +constexpr bool +IsAsciiDigit(Char aChar) { using UnsignedChar = typename detail::MakeUnsignedChar::Type; - return ('a' <= static_cast(aChar) && - static_cast(aChar) <= 'z') || - ('A' <= static_cast(aChar) && - static_cast(aChar) <= 'Z'); + auto uc = static_cast(aChar); + return '0' <= uc && uc <= '9'; +} + +/** + * Returns true iff |aChar| matches [0-9a-fA-F]. + * + * This function is basically isxdigit, but guaranteed to be only for ASCII. + */ +template +constexpr bool IsAsciiHexDigit(Char aChar) { + using UnsignedChar = typename detail::MakeUnsignedChar::Type; + auto uc = static_cast(aChar); + return ('0' <= uc && uc <= '9') || ('a' <= uc && uc <= 'f') || + ('A' <= uc && uc <= 'F'); +} + +/** + * Returns true iff |aChar| matches [a-zA-Z0-9]. + * + * This function is basically what you thought isalnum was, except its behavior + * doesn't depend on the user's current locale. + */ +template +constexpr bool IsAsciiAlphanumeric(Char aChar) { + return IsAsciiDigit(aChar) || IsAsciiAlpha(aChar); +} + +/** + * Converts an ASCII alphanumeric digit [0-9a-zA-Z] to number as if in base-36. + * (This function therefore works for decimal, hexadecimal, etc.). + */ +template +uint8_t AsciiAlphanumericToNumber(Char aChar) { + using UnsignedChar = typename detail::MakeUnsignedChar::Type; + auto uc = static_cast(aChar); + + if ('0' <= uc && uc <= '9') { + return uc - '0'; + } + + if ('A' <= uc && uc <= 'Z') { + return uc - 'A' + 10; + } + + // Ideally this function would be constexpr, but unfortunately gcc at least as + // of 6.4 forbids non-constexpr function calls in unevaluated constexpr + // function calls. See bug 1453456. So for now, just assert and leave the + // entire function non-constexpr. + MOZ_ASSERT('a' <= uc && uc <= 'z', + "non-ASCII alphanumeric character can't be converted to number"); + return uc - 'a' + 10; } } // namespace mozilla -- cgit v1.2.3