diff options
author | Martok <martok@martoks-place.de> | 2023-06-20 19:17:45 +0200 |
---|---|---|
committer | Martok <martok@martoks-place.de> | 2023-06-29 22:18:07 +0200 |
commit | 93bd7fc61efac5acb6641d1b168fb7203250843f (patch) | |
tree | 8d12a3619cd99b7f2e6e88bc90a4356ab210e735 /mfbt | |
parent | 1ec5d757e64c86b45145d8a388d4befb74810776 (diff) | |
download | uxp-93bd7fc61efac5acb6641d1b168fb7203250843f.tar.gz |
Issue #2259 - Add missing IsAscii* helper functions in mozilla/TextUtils.h
Diffstat (limited to 'mfbt')
-rw-r--r-- | mfbt/TextUtils.h | 149 |
1 files changed, 145 insertions, 4 deletions
diff --git a/mfbt/TextUtils.h b/mfbt/TextUtils.h index 84889b5ef4..66442a0d0b 100644 --- a/mfbt/TextUtils.h +++ b/mfbt/TextUtils.h @@ -35,6 +35,86 @@ public: } // namespace detail +// The overloads below are not templated in order to make +// implicit conversions to span work as expected for the Span +// overloads. + +/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */ +inline constexpr bool IsAscii(unsigned char aChar) { return aChar < 0x80; } + +/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */ +inline constexpr bool IsAscii(signed char aChar) { + return IsAscii(static_cast<unsigned char>(aChar)); +} + +/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */ +inline constexpr bool IsAscii(char aChar) { + return IsAscii(static_cast<unsigned char>(aChar)); +} + +/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */ +inline constexpr bool IsAscii(char16_t aChar) { return aChar < 0x80; } + +/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */ +inline constexpr bool IsAscii(char32_t aChar) { return aChar < 0x80; } + +/** + * Returns true iff every character in the null-terminated string pointed to by + * |aChar| is ASCII, i.e. in the range [0, 0x80). + */ +template <typename Char> +constexpr bool IsAsciiNullTerminated(const Char* aChar) { + while (Char c = *aChar++) { + if (!IsAscii(c)) { + return false; + } + } + return true; +} + +/** + * Returns true iff |aChar| matches Ascii Whitespace. + * + * This function is intended to match the Infra standard + * (https://infra.spec.whatwg.org/#ascii-whitespace) + */ +template <typename Char> +constexpr bool IsAsciiWhitespace(Char aChar) { + using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type; + auto uc = static_cast<UnsignedChar>(aChar); + return uc == 0x9 || uc == 0xA || uc == 0xC || uc == 0xD || uc == 0x20; +} + +/** + * Returns true iff |aChar| matches [a-z]. + * + * This function is basically what you thought islower was, except its behavior + * doesn't depend on the user's current locale. + */ +template<typename Char> +constexpr bool +IsAsciiLowercaseAlpha(Char aChar) +{ + using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type; + auto uc = static_cast<UnsignedChar>(aChar); + return 'a' <= uc && uc <= 'z'; +} + +/** + * Returns true iff |aChar| matches [A-Z]. + * + * This function is basically what you thought isupper was, except its behavior + * doesn't depend on the user's current locale. + */ +template<typename Char> +constexpr bool +IsAsciiUppercaseAlpha(Char aChar) +{ + using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type; + auto uc = static_cast<UnsignedChar>(aChar); + return 'A' <= uc && uc <= 'Z'; +} + /** * Returns true iff |aChar| matches [a-zA-Z]. * @@ -45,11 +125,72 @@ template<typename Char> constexpr bool IsAsciiAlpha(Char aChar) { + return IsAsciiLowercaseAlpha(aChar) || IsAsciiUppercaseAlpha(aChar); +} + +/** + * Returns true iff |aChar| matches [0-9]. + * + * This function is basically what you thought isdigit was, except its behavior + * doesn't depend on the user's current locale. + */ +template<typename Char> +constexpr bool +IsAsciiDigit(Char aChar) +{ using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type; - return ('a' <= static_cast<UnsignedChar>(aChar) && - static_cast<UnsignedChar>(aChar) <= 'z') || - ('A' <= static_cast<UnsignedChar>(aChar) && - static_cast<UnsignedChar>(aChar) <= 'Z'); + auto uc = static_cast<UnsignedChar>(aChar); + return '0' <= uc && uc <= '9'; +} + +/** + * Returns true iff |aChar| matches [0-9a-fA-F]. + * + * This function is basically isxdigit, but guaranteed to be only for ASCII. + */ +template <typename Char> +constexpr bool IsAsciiHexDigit(Char aChar) { + using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type; + auto uc = static_cast<UnsignedChar>(aChar); + return ('0' <= uc && uc <= '9') || ('a' <= uc && uc <= 'f') || + ('A' <= uc && uc <= 'F'); +} + +/** + * Returns true iff |aChar| matches [a-zA-Z0-9]. + * + * This function is basically what you thought isalnum was, except its behavior + * doesn't depend on the user's current locale. + */ +template <typename Char> +constexpr bool IsAsciiAlphanumeric(Char aChar) { + return IsAsciiDigit(aChar) || IsAsciiAlpha(aChar); +} + +/** + * Converts an ASCII alphanumeric digit [0-9a-zA-Z] to number as if in base-36. + * (This function therefore works for decimal, hexadecimal, etc.). + */ +template <typename Char> +uint8_t AsciiAlphanumericToNumber(Char aChar) { + using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type; + auto uc = static_cast<UnsignedChar>(aChar); + + if ('0' <= uc && uc <= '9') { + return uc - '0'; + } + + if ('A' <= uc && uc <= 'Z') { + return uc - 'A' + 10; + } + + // Ideally this function would be constexpr, but unfortunately gcc at least as + // of 6.4 forbids non-constexpr function calls in unevaluated constexpr + // function calls. See bug 1453456. So for now, just assert and leave the + // entire function non-constexpr. + MOZ_ASSERT('a' <= uc && uc <= 'z', + "non-ASCII alphanumeric character can't be converted to number"); + return uc - 'a' + 10; } } // namespace mozilla |