summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartok <martok@martoks-place.de>2023-06-20 19:17:45 +0200
committerMartok <martok@martoks-place.de>2023-06-29 22:18:07 +0200
commit93bd7fc61efac5acb6641d1b168fb7203250843f (patch)
tree8d12a3619cd99b7f2e6e88bc90a4356ab210e735
parent1ec5d757e64c86b45145d8a388d4befb74810776 (diff)
downloaduxp-93bd7fc61efac5acb6641d1b168fb7203250843f.tar.gz
Issue #2259 - Add missing IsAscii* helper functions in mozilla/TextUtils.h
-rw-r--r--mfbt/TextUtils.h149
1 files changed, 145 insertions, 4 deletions
diff --git a/mfbt/TextUtils.h b/mfbt/TextUtils.h
index 84889b5ef4..66442a0d0b 100644
--- a/mfbt/TextUtils.h
+++ b/mfbt/TextUtils.h
@@ -35,6 +35,86 @@ public:
} // namespace detail
+// The overloads below are not templated in order to make
+// implicit conversions to span work as expected for the Span
+// overloads.
+
+/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
+inline constexpr bool IsAscii(unsigned char aChar) { return aChar < 0x80; }
+
+/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
+inline constexpr bool IsAscii(signed char aChar) {
+ return IsAscii(static_cast<unsigned char>(aChar));
+}
+
+/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
+inline constexpr bool IsAscii(char aChar) {
+ return IsAscii(static_cast<unsigned char>(aChar));
+}
+
+/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
+inline constexpr bool IsAscii(char16_t aChar) { return aChar < 0x80; }
+
+/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
+inline constexpr bool IsAscii(char32_t aChar) { return aChar < 0x80; }
+
+/**
+ * Returns true iff every character in the null-terminated string pointed to by
+ * |aChar| is ASCII, i.e. in the range [0, 0x80).
+ */
+template <typename Char>
+constexpr bool IsAsciiNullTerminated(const Char* aChar) {
+ while (Char c = *aChar++) {
+ if (!IsAscii(c)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * Returns true iff |aChar| matches Ascii Whitespace.
+ *
+ * This function is intended to match the Infra standard
+ * (https://infra.spec.whatwg.org/#ascii-whitespace)
+ */
+template <typename Char>
+constexpr bool IsAsciiWhitespace(Char aChar) {
+ using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
+ auto uc = static_cast<UnsignedChar>(aChar);
+ return uc == 0x9 || uc == 0xA || uc == 0xC || uc == 0xD || uc == 0x20;
+}
+
+/**
+ * Returns true iff |aChar| matches [a-z].
+ *
+ * This function is basically what you thought islower was, except its behavior
+ * doesn't depend on the user's current locale.
+ */
+template<typename Char>
+constexpr bool
+IsAsciiLowercaseAlpha(Char aChar)
+{
+ using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
+ auto uc = static_cast<UnsignedChar>(aChar);
+ return 'a' <= uc && uc <= 'z';
+}
+
+/**
+ * Returns true iff |aChar| matches [A-Z].
+ *
+ * This function is basically what you thought isupper was, except its behavior
+ * doesn't depend on the user's current locale.
+ */
+template<typename Char>
+constexpr bool
+IsAsciiUppercaseAlpha(Char aChar)
+{
+ using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
+ auto uc = static_cast<UnsignedChar>(aChar);
+ return 'A' <= uc && uc <= 'Z';
+}
+
/**
* Returns true iff |aChar| matches [a-zA-Z].
*
@@ -45,11 +125,72 @@ template<typename Char>
constexpr bool
IsAsciiAlpha(Char aChar)
{
+ return IsAsciiLowercaseAlpha(aChar) || IsAsciiUppercaseAlpha(aChar);
+}
+
+/**
+ * Returns true iff |aChar| matches [0-9].
+ *
+ * This function is basically what you thought isdigit was, except its behavior
+ * doesn't depend on the user's current locale.
+ */
+template<typename Char>
+constexpr bool
+IsAsciiDigit(Char aChar)
+{
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
- return ('a' <= static_cast<UnsignedChar>(aChar) &&
- static_cast<UnsignedChar>(aChar) <= 'z') ||
- ('A' <= static_cast<UnsignedChar>(aChar) &&
- static_cast<UnsignedChar>(aChar) <= 'Z');
+ auto uc = static_cast<UnsignedChar>(aChar);
+ return '0' <= uc && uc <= '9';
+}
+
+/**
+ * Returns true iff |aChar| matches [0-9a-fA-F].
+ *
+ * This function is basically isxdigit, but guaranteed to be only for ASCII.
+ */
+template <typename Char>
+constexpr bool IsAsciiHexDigit(Char aChar) {
+ using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
+ auto uc = static_cast<UnsignedChar>(aChar);
+ return ('0' <= uc && uc <= '9') || ('a' <= uc && uc <= 'f') ||
+ ('A' <= uc && uc <= 'F');
+}
+
+/**
+ * Returns true iff |aChar| matches [a-zA-Z0-9].
+ *
+ * This function is basically what you thought isalnum was, except its behavior
+ * doesn't depend on the user's current locale.
+ */
+template <typename Char>
+constexpr bool IsAsciiAlphanumeric(Char aChar) {
+ return IsAsciiDigit(aChar) || IsAsciiAlpha(aChar);
+}
+
+/**
+ * Converts an ASCII alphanumeric digit [0-9a-zA-Z] to number as if in base-36.
+ * (This function therefore works for decimal, hexadecimal, etc.).
+ */
+template <typename Char>
+uint8_t AsciiAlphanumericToNumber(Char aChar) {
+ using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
+ auto uc = static_cast<UnsignedChar>(aChar);
+
+ if ('0' <= uc && uc <= '9') {
+ return uc - '0';
+ }
+
+ if ('A' <= uc && uc <= 'Z') {
+ return uc - 'A' + 10;
+ }
+
+ // Ideally this function would be constexpr, but unfortunately gcc at least as
+ // of 6.4 forbids non-constexpr function calls in unevaluated constexpr
+ // function calls. See bug 1453456. So for now, just assert and leave the
+ // entire function non-constexpr.
+ MOZ_ASSERT('a' <= uc && uc <= 'z',
+ "non-ASCII alphanumeric character can't be converted to number");
+ return uc - 'a' + 10;
}
} // namespace mozilla