summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gfx/thebes/gfxFont.cpp9
-rwxr-xr-xintl/unicharutil/tools/genUnicodePropertyData.pl164
-rw-r--r--intl/unicharutil/util/nsUnicodeProperties.cpp2
-rw-r--r--intl/unicharutil/util/nsUnicodeProperties.h31
-rw-r--r--netwerk/dns/nsIDNService.cpp66
5 files changed, 174 insertions, 98 deletions
diff --git a/gfx/thebes/gfxFont.cpp b/gfx/thebes/gfxFont.cpp
index f79c5cbd72..8ac64bc1b4 100644
--- a/gfx/thebes/gfxFont.cpp
+++ b/gfx/thebes/gfxFont.cpp
@@ -725,10 +725,9 @@ gfxShapedText::SetGlyphs(uint32_t aIndex, CompressedGlyph aGlyph,
#define ZWNJ 0x200C
#define ZWJ 0x200D
static inline bool
-IsDefaultIgnorable(uint32_t aChar)
+IsIgnorable(uint32_t aChar)
{
- return GetIdentifierModification(aChar) == XIDMOD_DEFAULT_IGNORABLE ||
- aChar == ZWNJ || aChar == ZWJ;
+ return (IsDefaultIgnorable(aChar)) || aChar == ZWNJ || aChar == ZWJ;
}
void
@@ -744,7 +743,7 @@ gfxShapedText::SetMissingGlyph(uint32_t aIndex, uint32_t aChar, gfxFont *aFont)
DetailedGlyph *details = AllocateDetailedGlyphs(aIndex, 1);
details->mGlyphID = aChar;
- if (IsDefaultIgnorable(aChar)) {
+ if (IsIgnorable(aChar)) {
// Setting advance width to zero will prevent drawing the hexbox
details->mAdvance = 0;
} else {
@@ -762,7 +761,7 @@ gfxShapedText::SetMissingGlyph(uint32_t aIndex, uint32_t aChar, gfxFont *aFont)
bool
gfxShapedText::FilterIfIgnorable(uint32_t aIndex, uint32_t aCh)
{
- if (IsDefaultIgnorable(aCh)) {
+ if (IsIgnorable(aCh)) {
// There are a few default-ignorables of Letter category (currently,
// just the Hangul filler characters) that we'd better not discard
// if they're followed by additional characters in the same cluster.
diff --git a/intl/unicharutil/tools/genUnicodePropertyData.pl b/intl/unicharutil/tools/genUnicodePropertyData.pl
index 8b247e83c6..bd86076eac 100755
--- a/intl/unicharutil/tools/genUnicodePropertyData.pl
+++ b/intl/unicharutil/tools/genUnicodePropertyData.pl
@@ -23,6 +23,7 @@
# - HangulSyllableType.txt
# - LineBreak.txt
# - EastAsianWidth.txt
+# - DerivedCoreProperties.txt
# - ReadMe.txt (to record version/date of the UCD)
# - Unihan_Variants.txt (from Unihan.zip)
# though this may change if we find a need for additional properties.
@@ -30,12 +31,13 @@
# The Unicode data files listed above should be together in one directory.
#
# We also require the file
-# http://www.unicode.org/Public/security/latest/xidmodifications.txt
+# http://www.unicode.org/Public/security/latest/IdentifierStatus.txt
+# http://www.unicode.org/Public/security/latest/IdentifierType.txt
# This file should be in a sub-directory "security" immediately below the
# directory containing the other Unicode data files.
#
-# We also require the latest data file for UTR50, currently revision-13:
-# http://www.unicode.org/Public/vertical/revision-13/VerticalOrientation-13.txt
+# We also require the latest data file for UTR50, currently revision-15:
+# http://www.unicode.org/Public/vertical/revision-15/VerticalOrientation-15.txt
# This file should be in a sub-directory "vertical" immediately below the
# directory containing the other Unicode data files.
#
@@ -140,20 +142,35 @@ sub readIcuHeader
die "didn't find ICU script codes\n" if $sc == -1;
-my %xidmodCode = (
-'Recommended' => 0,
-'Inclusion' => 1,
-'Uncommon_Use' => 2,
-'Technical' => 3,
-'Obsolete' => 4,
-'Aspirational' => 5,
-'Limited_Use' => 6,
-'Exclusion' => 7,
-'Not_XID' => 8,
-'Not_NFKC' => 9,
-'Default_Ignorable' => 10,
-'Deprecated' => 11,
-'not-chars' => 12
+# We don't currently store these values; %idType is used only to check that
+# properties listed in the IdentifierType.txt file are recognized. We record
+# only the %mappedIdType values that are used by nsIDNService::isLabelSafe.
+# In practice, it would be sufficient for us to read only the last value in
+# IdentifierType.txt, but we check that all values are known so that we'll get
+# a warning if future updates introduce new ones, and can consider whether
+# they need to be taken into account.
+my %idType = (
+ "Not_Character" => 0,
+ "Recommended" => 1,
+ "Inclusion" => 2,
+ "Uncommon_Use" => 3,
+ "Technical" => 4,
+ "Obsolete" => 5,
+ "Aspirational" => 6,
+ "Limited_Use" => 7,
+ "Exclusion" => 8,
+ "Not_XID" => 9,
+ "Not_NFKC" => 10,
+ "Default_Ignorable" => 11,
+ "Deprecated" => 12
+);
+
+# These match the IdentifierType enum in nsUnicodeProperties.h.
+my %mappedIdType = (
+ "Restricted" => 0,
+ "Allowed" => 1,
+ "Aspirational" => 2 # for Aspirational characters that are not excluded
+ # by another attribute.
);
my %bidicategoryCode = (
@@ -229,7 +246,10 @@ my %lineBreakCode = ( # ordering matches ICU's ULineBreak enum
"CP" => 36,
"CJ" => 37,
"HL" => 38,
- "RI" => 39
+ "RI" => 39,
+ "EB" => 40,
+ "EM" => 41,
+ "ZWJ" => 42
);
my %eastAsianWidthCode = (
@@ -249,7 +269,7 @@ my @mirror;
my @pairedBracketType;
my @hangul;
my @casemap;
-my @xidmod;
+my @idtype;
my @numericvalue;
my @hanVariant;
my @bidicategory;
@@ -258,13 +278,14 @@ my @fullWidthInverse;
my @verticalOrientation;
my @lineBreak;
my @eastAsianWidthFWH;
+my @defaultIgnorable;
for (my $i = 0; $i < 0x110000; ++$i) {
$script[$i] = $scriptCode{"UNKNOWN"};
$category[$i] = $catCode{"UNASSIGNED"};
$combining[$i] = 0;
$pairedBracketType[$i] = 0;
$casemap[$i] = 0;
- $xidmod[$i] = $xidmodCode{"not-chars"};
+ $idtype[$i] = $mappedIdType{'Restricted'};
$numericvalue[$i] = -1;
$hanVariant[$i] = 0;
$bidicategory[$i] = $bidicategoryCode{"L"};
@@ -273,6 +294,7 @@ for (my $i = 0; $i < 0x110000; ++$i) {
$verticalOrientation[$i] = 1; # default for unlisted codepoints is 'R'
$lineBreak[$i] = $lineBreakCode{"XX"};
$eastAsianWidthFWH[$i] = 0;
+ $defaultIgnorable[$i] = 0;
}
# blocks where the default for bidi category is not L
@@ -557,25 +579,72 @@ while (<FH>) {
}
close FH;
-# read xidmodifications.txt
-open FH, "< $UNICODE/security/xidmodifications.txt" or die "can't open UCD file xidmodifications.txt\n";
+# read DerivedCoreProperties.txt (for Default-Ignorables)
+open FH, "< $UNICODE/DerivedCoreProperties.txt" or die "can't open UCD file DerivedCoreProperties.txt\n";
push @versionInfo, "";
+
while (<FH>) {
- chomp;
- unless (/\xef\xbb\xbf/) {
+ chomp;
push @versionInfo, $_;
- }
- last if /Generated:/;
+ last if /Date:/;
+}
+while (<FH>) {
+ s/#.*//;
+ if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*Default_Ignorable_Code_Point/) {
+ my $start = hex "0x$1";
+ my $end = (defined $2) ? hex "0x$2" : $start;
+ for (my $i = $start; $i <= $end; ++$i) {
+ $defaultIgnorable[$i] = 1;
+ }
+ }
}
+close FH;
+
+# read IdentifierStatus.txt
+open FH, "< $UNICODE/security/IdentifierStatus.txt" or die "can't open UCD file IdentifierStatus.txt\n";
+push @versionInfo, "";
while (<FH>) {
- if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+[^ ]+\s+;\s+([^ ]+)/) {
- my $xidmod = $3;
- warn "unknown Identifier Modification $xidmod" unless exists $xidmodCode{$xidmod};
- $xidmod = $xidmodCode{$xidmod};
+ chomp;
+ s/\xef\xbb\xbf//;
+ push @versionInfo, $_;
+ last if /Date:/;
+
+}
+
+while (<FH>) {
+ if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+Allowed/) {
my $start = hex "0x$1";
my $end = (defined $2) ? hex "0x$2" : $start;
for (my $i = $start; $i <= $end; ++$i) {
- $xidmod[$i] = $xidmod;
+ $idtype[$i] = $mappedIdType{'Allowed'};
+ }
+
+ }
+}
+close FH;
+
+# read IdentifierType.txt, to find Aspirational characters
+open FH, "< $UNICODE/security/IdentifierType.txt" or die "can't open UCD file IdentifierType.txt\n";
+push @versionInfo, "";
+while (<FH>) {
+ chomp;
+ s/\xef\xbb\xbf//;
+ push @versionInfo, $_;
+ last if /Date:/;
+}
+while (<FH>) {
+ if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+([^#]+)/) {
+ my $idtype = $3;
+ foreach (split(/ /, $idtype)) {
+ warn "unknown Identifier Type $_" unless exists $idType{$_};
+ }
+ my $start = hex "0x$1";
+ my $end = (defined $2) ? hex "0x$2" : $start;
+ if ($idtype =~ /Aspirational/ and (not $idtype =~ /Exclusion|Not_XID|Not_NFKC/)) {
+
+ for (my $i = $start; $i <= $end; ++$i) {
+ $idtype[$i] = $mappedIdType{'Aspirational'};
+ }
}
}
}
@@ -617,8 +686,8 @@ while (<FH>) {
}
close FH;
-# read VerticalOrientation-13.txt
-open FH, "< $UNICODE/vertical/VerticalOrientation-13.txt" or die "can't open UTR50 data file VerticalOrientation-13.txt\n";
+# read VerticalOrientation-15.txt
+open FH, "< $UNICODE/vertical/VerticalOrientation-15.txt" or die "can't open UTR50 data file VerticalOrientation-15.txt\n";
push @versionInfo, "";
while (<FH>) {
chomp;
@@ -738,14 +807,15 @@ sub sprintCharProps2_short
{
my $usv = shift;
return sprintf("{%d,%d},",
- $verticalOrientation[$usv], $xidmod[$usv]);
+ $verticalOrientation[$usv], $idtype[$usv]);
}
$type = q|
struct nsCharProps2 {
- // Currently only 6 bits are defined here, so 2 more could be added without
- // affecting the storage requirements for this struct.
+ // Currently only 4 bits are defined here, so 4 more could be added without
+ // affecting the storage requirements for this struct. Or we could pack two
+ // records per byte, at the cost of a slightly more complex accessor.
unsigned char mVertOrient:2;
- unsigned char mXidmod:4;
+ unsigned char mIdType:2;
};
|;
&genTables("#if ENABLE_INTL_API", "#endif",
@@ -754,23 +824,31 @@ struct nsCharProps2 {
sub sprintCharProps2_full
{
my $usv = shift;
- return sprintf("{%d,%d,%d,%d,%d,%d,%d,%d,%d},",
+ return sprintf("{%d,%d,%d,%d,%d,%d,%d,%d,%d,%d},",
$script[$usv], $pairedBracketType[$usv],
$eastAsianWidthFWH[$usv], $category[$usv],
- $bidicategory[$usv], $xidmod[$usv], $numericvalue[$usv],
- $verticalOrientation[$usv], $lineBreak[$usv]);
+ $idtype[$usv], $defaultIgnorable[$usv], $bidicategory[$usv],
+ $verticalOrientation[$usv], $lineBreak[$usv],
+ $numericvalue[$usv]);
}
$type = q|
+// This struct currently requires 5 bytes. We try to ensure that whole-byte
+// fields will not straddle byte boundaries, to optimize access to them.
struct nsCharProps2 {
unsigned char mScriptCode:8;
+ // -- byte boundary --
unsigned char mPairedBracketType:2;
unsigned char mEastAsianWidthFWH:1;
unsigned char mCategory:5;
+ // -- byte boundary --
+ unsigned char mIdType:2;
+ unsigned char mDefaultIgnorable:1;
unsigned char mBidiCategory:5;
- unsigned char mXidmod:4;
- signed char mNumericValue:5;
+ // -- byte boundary --
unsigned char mVertOrient:2;
- unsigned char mLineBreak; // only 6 bits actually needed
+ unsigned char mLineBreak:6;
+ // -- byte boundary --
+ signed char mNumericValue; // only 5 bits are actually needed here
};
|;
&genTables("#if !ENABLE_INTL_API", "#endif",
diff --git a/intl/unicharutil/util/nsUnicodeProperties.cpp b/intl/unicharutil/util/nsUnicodeProperties.cpp
index 71f684f0e9..99a87bc958 100644
--- a/intl/unicharutil/util/nsUnicodeProperties.cpp
+++ b/intl/unicharutil/util/nsUnicodeProperties.cpp
@@ -31,7 +31,7 @@ GetCharProps2(uint32_t aCh)
using namespace mozilla::unicode;
static const nsCharProps2 undefined = {
VERTICAL_ORIENTATION_R,
- XIDMOD_NOT_CHARS
+ 0 // IdentifierType
};
return undefined;
}
diff --git a/intl/unicharutil/util/nsUnicodeProperties.h b/intl/unicharutil/util/nsUnicodeProperties.h
index ee1d77252c..2ff69d19a5 100644
--- a/intl/unicharutil/util/nsUnicodeProperties.h
+++ b/intl/unicharutil/util/nsUnicodeProperties.h
@@ -38,20 +38,13 @@ enum PairedBracketType {
PAIRED_BRACKET_TYPE_CLOSE = 2
};
-enum XidmodType {
- XIDMOD_RECOMMENDED,
- XIDMOD_INCLUSION,
- XIDMOD_UNCOMMON_USE,
- XIDMOD_TECHNICAL,
- XIDMOD_OBSOLETE,
- XIDMOD_ASPIRATIONAL,
- XIDMOD_LIMITED_USE,
- XIDMOD_EXCLUSION,
- XIDMOD_NOT_XID,
- XIDMOD_NOT_NFKC,
- XIDMOD_DEFAULT_IGNORABLE,
- XIDMOD_DEPRECATED,
- XIDMOD_NOT_CHARS
+/* Flags for Unicode security IdentifierType.txt attributes. Only a subset
+ of these are currently checked by Gecko, so we only define flags for the
+ ones we need. */
+enum IdentifierType {
+ IDTYPE_RESTRICTED = 0,
+ IDTYPE_ALLOWED = 1,
+ IDTYPE_ASPIRATIONAL = 2,
};
enum EmojiPresentation {
@@ -179,6 +172,12 @@ IsEastAsianWidthFWH(uint32_t aCh)
return false;
}
+inline bool
+IsDefaultIgnorable(uint32_t aCh)
+{
+ return u_hasBinaryProperty(aCh, UCHAR_DEFAULT_IGNORABLE_CODE_POINT);
+}
+
inline EmojiPresentation
GetEmojiPresentation(uint32_t aCh)
{
@@ -201,8 +200,8 @@ inline VerticalOrientation GetVerticalOrientation(uint32_t aCh) {
return VerticalOrientation(GetCharProps2(aCh).mVertOrient);
}
-inline XidmodType GetIdentifierModification(uint32_t aCh) {
- return XidmodType(GetCharProps2(aCh).mXidmod);
+inline IdentifierType GetIdentifierType(uint32_t aCh) {
+ return IdentifierType(GetCharProps2(aCh).mIdType);
}
uint32_t GetFullWidth(uint32_t aCh);
diff --git a/netwerk/dns/nsIDNService.cpp b/netwerk/dns/nsIDNService.cpp
index 9210e20b5f..70e255ed15 100644
--- a/netwerk/dns/nsIDNService.cpp
+++ b/netwerk/dns/nsIDNService.cpp
@@ -314,42 +314,42 @@ nsresult nsIDNService::ACEtoUTF8(const nsACString & input, nsACString & _retval,
return NS_OK;
}
-
-/**
- * Returns |true| if |aString| contains only ASCII characters according
- * to our CRT.
- *
- * @param aString an 8-bit wide string to scan
- */
-inline bool IsAsciiString(mozilla::Span<const char> aString) {
- for (char c : aString) {
- if (!nsCRT::IsAscii(c)) {
- return false;
- }
- }
- return true;
-}
+
+/**
+ * Returns |true| if |aString| contains only ASCII characters according
+ * to our CRT.
+ *
+ * @param aString an 8-bit wide string to scan
+ */
+inline bool IsAsciiString(mozilla::Span<const char> aString) {
+ for (char c : aString) {
+ if (!nsCRT::IsAscii(c)) {
+ return false;
+ }
+ }
+ return true;
+}
NS_IMETHODIMP nsIDNService::IsACE(const nsACString & input, bool *_retval)
{
// look for the ACE prefix in the input string. it may occur
// at the beginning of any segment in the domain name. for
// example: "www.xn--ENCODED.com"
- if (!IsAsciiString(input)) {
- *_retval = false;
- return NS_OK;
- }
- auto stringContains = [](const nsACString& haystack,
- const nsACString& needle) {
- return std::search(haystack.BeginReading(), haystack.EndReading(),
- needle.BeginReading(),
- needle.EndReading()) != haystack.EndReading();
- };
-
- *_retval = StringBeginsWith(input, NS_LITERAL_CSTRING("xn--")) ||
- (!input.IsEmpty() && input[0] != '.' &&
- stringContains(input, NS_LITERAL_CSTRING(".xn--")));
- return NS_OK;
+ if (!IsAsciiString(input)) {
+ *_retval = false;
+ return NS_OK;
+ }
+ auto stringContains = [](const nsACString& haystack,
+ const nsACString& needle) {
+ return std::search(haystack.BeginReading(), haystack.EndReading(),
+ needle.BeginReading(),
+ needle.EndReading()) != haystack.EndReading();
+ };
+
+ *_retval = StringBeginsWith(input, NS_LITERAL_CSTRING("xn--")) ||
+ (!input.IsEmpty() && input[0] != '.' &&
+ stringContains(input, NS_LITERAL_CSTRING(".xn--")));
+ return NS_OK;
}
NS_IMETHODIMP nsIDNService::Normalize(const nsACString & input,
@@ -730,11 +730,11 @@ bool nsIDNService::isLabelSafe(const nsAString &label)
// Check for restricted characters; aspirational scripts are NOT permitted,
// in anticipation of the category being merged into Limited-Use scripts
// in the upcoming (Unicode 10.0-based) revision of UAX #31.
- XidmodType xm = GetIdentifierModification(ch);
- if (xm != XIDMOD_RECOMMENDED &&
- xm != XIDMOD_INCLUSION) {
+ IdentifierType idType = GetIdentifierType(ch);
+ if (idType == IDTYPE_RESTRICTED) {
return false;
}
+ MOZ_ASSERT(idType == IDTYPE_ALLOWED || idType == IDTYPE_ASPIRATIONAL);
// Check for mixed script
Script script = GetScriptCode(ch);