From fcc171ddb76d8fc70074854a7b05ed720002e8cc Mon Sep 17 00:00:00 2001 From: Job Bautista Date: Thu, 23 Jun 2022 15:44:03 +0800 Subject: Issue #326 - Part 5: Simplify the genUnicodePropertyData.pl tool to generate only the data we need to supplement ICU properties Backported from Mozilla bug 1402271. Some removals were already done by Part 1b. --- intl/unicharutil/tools/genUnicodePropertyData.pl | 434 +---------------------- 1 file changed, 14 insertions(+), 420 deletions(-) diff --git a/intl/unicharutil/tools/genUnicodePropertyData.pl b/intl/unicharutil/tools/genUnicodePropertyData.pl index e17b6cb76a..8c7437f82d 100755 --- a/intl/unicharutil/tools/genUnicodePropertyData.pl +++ b/intl/unicharutil/tools/genUnicodePropertyData.pl @@ -9,6 +9,10 @@ # read from the Unicode Character Database and compiled into multi-level arrays # for efficient lookup. # +# Note that for most properties, we now rely on ICU; this tool and the tables +# it generates are used only for a couple of properties not readily exposed +# via ICU APIs. +# # To regenerate the tables in nsUnicodePropertyData.cpp: # # (1) Download the current Unicode data files from @@ -17,13 +21,6 @@ # # NB: not all the files are actually needed; currently, we require # - UnicodeData.txt -# - Scripts.txt -# - BidiMirroring.txt -# - BidiBrackets.txt -# - HangulSyllableType.txt -# - LineBreak.txt -# - EastAsianWidth.txt -# - DerivedCoreProperties.txt # - ReadMe.txt (to record version/date of the UCD) # - Unihan_Variants.txt (from Unihan.zip) # though this may change if we find a need for additional properties. @@ -44,7 +41,6 @@ # (2) Run this tool using a command line of the form # # perl genUnicodePropertyData.pl \ -# /path/to/harfbuzz/src \ # /path/to/icu/common/unicode \ # /path/to/UCD-directory # @@ -58,17 +54,15 @@ use strict; use List::Util qw(first); -if ($#ARGV != 2) { +if ($#ARGV != 1) { print <<__EOT; # Run this tool using a command line of the form # # perl genUnicodePropertyData.pl \\ -# /path/to/harfbuzz/src \\ # /path/to/icu/common/unicode \\ # /path/to/UCD-directory # -# where harfbuzz/src is the directory containing harfbuzz .cc and .hh files, -# icu/common/unicode is the directory containing ICU 'common' public headers, +# where icu/common/unicode is the directory containing ICU 'common' headers, # and UCD-directory is a directory containing the current Unicode Character # Database files (UnicodeData.txt, etc), available from # http://www.unicode.org/Public/UNIDATA/, with additional resources as @@ -84,35 +78,11 @@ __EOT exit 0; } -my $HARFBUZZ = $ARGV[0]; -my $ICU = $ARGV[1]; -my $UNICODE = $ARGV[2]; - -# load HB_Category constants - -my $cc = -1; -my %catCode; +my $ICU = $ARGV[0]; +my $UNICODE = $ARGV[1]; -sub readHarfBuzzHeader -{ - my $file = shift; - open FH, "< $HARFBUZZ/$file" or die "can't open harfbuzz header $HARFBUZZ/$file\n"; - while () { - if (m/HB_UNICODE_GENERAL_CATEGORY_([A-Z_]+)/) { - $cc++; - $catCode{$1} = $cc; - } - } - close FH; -} - -&readHarfBuzzHeader("hb-unicode.h"); - -die "didn't find HarfBuzz category codes\n" if $cc == -1; - -my %scriptCode; -my @scriptCodeToTag; my @scriptCodeToName; +my @idtype; my $sc = -1; @@ -129,8 +99,6 @@ sub readIcuHeader s/SIGN_WRITING/SIGNWRITING/; if (m|USCRIPT_([A-Z_]+)\s*=\s*([0-9]+),\s*/\*\s*([A-Z][a-z]{3})\s*\*/|) { $sc = $2; - $scriptCode{$1} = $sc; - $scriptCodeToTag[$sc] = $3; $scriptCodeToName[$sc] = $1; } } @@ -170,32 +138,6 @@ my %mappedIdType = ( "Allowed" => 1 ); -my %bidicategoryCode = ( - "L" => 0, # Left-to-Right - "R" => 1, # Right-to-Left - "EN" => 2, # European Number - "ES" => 3, # European Number Separator - "ET" => 4, # European Number Terminator - "AN" => 5, # Arabic Number - "CS" => 6, # Common Number Separator - "B" => 7, # Paragraph Separator - "S" => 8, # Segment Separator - "WS" => 9, # Whitespace - "ON" => 10, # Other Neutrals - "LRE" => 11, # Left-to-Right Embedding - "LRO" => 12, # Left-to-Right Override - "AL" => 13, # Right-to-Left Arabic - "RLE" => 14, # Right-to-Left Embedding - "RLO" => 15, # Right-to-Left Override - "PDF" => 16, # Pop Directional Format - "NSM" => 17, # Non-Spacing Mark - "BN" => 18, # Boundary Neutral - "FSI" => 19, # First Strong Isolate - "LRI" => 20, # Left-to-Right Isolate - "RLI" => 21, # Right-to-left Isolate - "PDI" => 22 # Pop Direcitonal Isolate -); - my %verticalOrientationCode = ( 'U' => 0, # U - Upright, the same orientation as in the code charts 'R' => 1, # R - Rotated 90 degrees clockwise compared to the code charts @@ -203,141 +145,18 @@ my %verticalOrientationCode = ( 'Tr' => 3 # Tr - Transformed typographically, with fallback to Rotated ); -my %lineBreakCode = ( # ordering matches ICU's ULineBreak enum - "XX" => 0, - "AI" => 1, - "AL" => 2, - "B2" => 3, - "BA" => 4, - "BB" => 5, - "BK" => 6, - "CB" => 7, - "CL" => 8, - "CM" => 9, - "CR" => 10, - "EX" => 11, - "GL" => 12, - "HY" => 13, - "ID" => 14, - "IN" => 15, - "IS" => 16, - "LF" => 17, - "NS" => 18, - "NU" => 19, - "OP" => 20, - "PO" => 21, - "PR" => 22, - "QU" => 23, - "SA" => 24, - "SG" => 25, - "SP" => 26, - "SY" => 27, - "ZW" => 28, - "NL" => 29, - "WJ" => 30, - "H2" => 31, - "H3" => 32, - "JL" => 33, - "JT" => 34, - "JV" => 35, - "CP" => 36, - "CJ" => 37, - "HL" => 38, - "RI" => 39, - "EB" => 40, - "EM" => 41, - "ZWJ" => 42 -); - -my %eastAsianWidthCode = ( - "N" => 0, - "A" => 1, - "H" => 2, - "W" => 3, - "F" => 4, - "Na" => 5 -); - # initialize default properties -my @script; -my @category; -my @combining; -my @mirror; -my @pairedBracketType; -my @hangul; -my @casemap; -my @idtype; -my @numericvalue; my @hanVariant; -my @bidicategory; my @fullWidth; my @fullWidthInverse; my @verticalOrientation; -my @lineBreak; -my @eastAsianWidthFWH; -my @defaultIgnorable; for (my $i = 0; $i < 0x110000; ++$i) { - $script[$i] = $scriptCode{"UNKNOWN"}; - $category[$i] = $catCode{"UNASSIGNED"}; - $combining[$i] = 0; - $pairedBracketType[$i] = 0; - $casemap[$i] = 0; - $idtype[$i] = $mappedIdType{'Restricted'}; - $numericvalue[$i] = -1; $hanVariant[$i] = 0; - $bidicategory[$i] = $bidicategoryCode{"L"}; $fullWidth[$i] = 0; $fullWidthInverse[$i] = 0; $verticalOrientation[$i] = 1; # default for unlisted codepoints is 'R' - $lineBreak[$i] = $lineBreakCode{"XX"}; - $eastAsianWidthFWH[$i] = 0; - $defaultIgnorable[$i] = 0; } -# blocks where the default for bidi category is not L -for my $i (0x0600..0x07BF, 0x08A0..0x08FF, 0xFB50..0xFDCF, 0xFDF0..0xFDFF, 0xFE70..0xFEFF, 0x1EE00..0x0001EEFF) { - $bidicategory[$i] = $bidicategoryCode{"AL"}; -} -for my $i (0x0590..0x05FF, 0x07C0..0x089F, 0xFB1D..0xFB4F, 0x00010800..0x00010FFF, 0x0001E800..0x0001EDFF, 0x0001EF00..0x0001EFFF) { - $bidicategory[$i] = $bidicategoryCode{"R"}; -} -for my $i (0x20A0..0x20CF) { - $bidicategory[$i] = $bidicategoryCode{"ET"}; -} - -my %ucd2hb = ( -'Cc' => 'CONTROL', -'Cf' => 'FORMAT', -'Cn' => 'UNASSIGNED', -'Co' => 'PRIVATE_USE', -'Cs' => 'SURROGATE', -'Ll' => 'LOWERCASE_LETTER', -'Lm' => 'MODIFIER_LETTER', -'Lo' => 'OTHER_LETTER', -'Lt' => 'TITLECASE_LETTER', -'Lu' => 'UPPERCASE_LETTER', -'Mc' => 'SPACING_MARK', -'Me' => 'ENCLOSING_MARK', -'Mn' => 'NON_SPACING_MARK', -'Nd' => 'DECIMAL_NUMBER', -'Nl' => 'LETTER_NUMBER', -'No' => 'OTHER_NUMBER', -'Pc' => 'CONNECT_PUNCTUATION', -'Pd' => 'DASH_PUNCTUATION', -'Pe' => 'CLOSE_PUNCTUATION', -'Pf' => 'FINAL_PUNCTUATION', -'Pi' => 'INITIAL_PUNCTUATION', -'Po' => 'OTHER_PUNCTUATION', -'Ps' => 'OPEN_PUNCTUATION', -'Sc' => 'CURRENCY_SYMBOL', -'Sk' => 'MODIFIER_SYMBOL', -'Sm' => 'MATH_SYMBOL', -'So' => 'OTHER_SYMBOL', -'Zl' => 'LINE_SEPARATOR', -'Zp' => 'PARAGRAPH_SEPARATOR', -'Zs' => 'SPACE_SEPARATOR' -); - # read ReadMe.txt my @versionInfo; open FH, "< $UNICODE/ReadMe.txt" or die "can't open Unicode ReadMe.txt file\n"; @@ -347,12 +166,6 @@ while () { } close FH; -my $kTitleToUpper = 0x80000000; -my $kUpperToLower = 0x40000000; -my $kLowerToTitle = 0x20000000; -my $kLowerToUpper = 0x10000000; -my $kCaseMapCharMask = 0x001fffff; - # read UnicodeData.txt open FH, "< $UNICODE/UnicodeData.txt" or die "can't open UCD file UnicodeData.txt\n"; while () { @@ -365,12 +178,6 @@ while () { if ($fields[1] =~ /Last/) { my $last = hex "0x$fields[0]"; do { - $category[$first] = $catCode{$ucd2hb{$fields[2]}}; - $combining[$first] = $fields[3]; - $bidicategory[$first] = $bidicategoryCode{$fields[4]}; - unless (length($fields[7]) == 0) { - $numericvalue[$first] = $fields[7]; - } if ($fields[1] =~ /CJK/) { @hanVariant[$first] = 3; } @@ -381,33 +188,6 @@ while () { } } else { my $usv = hex "0x$fields[0]"; - $category[$usv] = $catCode{$ucd2hb{$fields[2]}}; - $combining[$usv] = $fields[3]; - my $upper = hex $fields[12]; - my $lower = hex $fields[13]; - my $title = hex $fields[14]; - # we only store one mapping for each character, - # but also record what kind of mapping it is - if ($upper && $lower) { - $casemap[$usv] |= $kTitleToUpper; - $casemap[$usv] |= ($usv ^ $upper); - } - elsif ($lower) { - $casemap[$usv] |= $kUpperToLower; - $casemap[$usv] |= ($usv ^ $lower); - } - elsif ($title && ($title != $upper)) { - $casemap[$usv] |= $kLowerToTitle; - $casemap[$usv] |= ($usv ^ $title); - } - elsif ($upper) { - $casemap[$usv] |= $kLowerToUpper; - $casemap[$usv] |= ($usv ^ $upper); - } - $bidicategory[$usv] = $bidicategoryCode{$fields[4]}; - unless (length($fields[7]) == 0) { - $numericvalue[$usv] = $fields[7]; - } if ($fields[1] =~ /CJK/) { @hanVariant[$usv] = 3; } @@ -427,180 +207,6 @@ while () { } close FH; -# read Scripts.txt -open FH, "< $UNICODE/Scripts.txt" or die "can't open UCD file Scripts.txt\n"; -push @versionInfo, ""; -while () { - chomp; - push @versionInfo, $_; - last if /Date:/; -} -while () { - if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+([^ ]+)/) { - my $script = uc($3); - unless (exists $scriptCode{$script}) { - warn "unknown ICU script $script"; - $scriptCode{$script} = $scriptCode{"UNKNOWN"}; - } - $script = $scriptCode{$script}; - my $script = $scriptCode{$script}; - my $start = hex "0x$1"; - my $end = (defined $2) ? hex "0x$2" : $start; - for (my $i = $start; $i <= $end; ++$i) { - $script[$i] = $script; - } - } -} -close FH; - -# read BidiMirroring.txt -my @offsets = (); -push @offsets, 0; - -open FH, "< $UNICODE/BidiMirroring.txt" or die "can't open UCD file BidiMirroring.txt\n"; -push @versionInfo, ""; -while () { - chomp; - push @versionInfo, $_; - last if /Date:/; -} -while () { - s/#.*//; - if (m/([0-9A-F]{4,6});\s*([0-9A-F]{4,6})/) { - my $mirrorOffset = hex("0x$2") - hex("0x$1"); - my $offsetIndex = first { $offsets[$_] eq $mirrorOffset } 0..$#offsets; - if ($offsetIndex == undef) { - die "too many offset codes\n" if scalar @offsets == 31; - push @offsets, $mirrorOffset; - $offsetIndex = $#offsets; - } - $mirror[hex "0x$1"] = $offsetIndex; - } -} -close FH; - -# read BidiBrackets.txt -my %pairedBracketTypeCode = ( - 'N' => 0, - 'O' => 1, - 'C' => 2 -); -open FH, "< $UNICODE/BidiBrackets.txt" or die "can't open UCD file BidiBrackets.txt\n"; -push @versionInfo, ""; -while () { - chomp; - push @versionInfo, $_; - last if /Date:/; -} -while () { - s/#.*//; - if (m/([0-9A-F]{4,6});\s*([0-9A-F]{4,6});\s*(.)/) { - my $mirroredChar = $offsets[$mirror[hex "0x$1"]] + hex "0x$1"; - die "bidi bracket does not match mirrored char\n" unless $mirroredChar == hex "0x$2"; - my $pbt = uc($3); - warn "unknown Bidi Bracket type" unless exists $pairedBracketTypeCode{$pbt}; - $pairedBracketType[hex "0x$1"] = $pairedBracketTypeCode{$pbt}; - } -} -close FH; - -# read HangulSyllableType.txt -my %hangulType = ( - 'L' => 0x01, - 'V' => 0x02, - 'T' => 0x04, - 'LV' => 0x03, - 'LVT' => 0x07 -); -open FH, "< $UNICODE/HangulSyllableType.txt" or die "can't open UCD file HangulSyllableType.txt\n"; -push @versionInfo, ""; -while () { - chomp; - push @versionInfo, $_; - last if /Date:/; -} -while () { - s/#.*//; - if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) { - my $hangul = uc($3); - warn "unknown Hangul syllable type" unless exists $hangulType{$hangul}; - $hangul = $hangulType{$hangul}; - my $start = hex "0x$1"; - my $end = (defined $2) ? hex "0x$2" : $start; - for (my $i = $start; $i <= $end; ++$i) { - $hangul[$i] = $hangul; - } - } -} -close FH; - -# read LineBreak.txt -open FH, "< $UNICODE/LineBreak.txt" or die "can't open UCD file LineBreak.txt\n"; -push @versionInfo, ""; -while () { - chomp; - push @versionInfo, $_; - last if /Date:/; -} -while () { - s/#.*//; - if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) { - my $lb = uc($3); - warn "unknown LineBreak class" unless exists $lineBreakCode{$lb}; - $lb = $lineBreakCode{$lb}; - my $start = hex "0x$1"; - my $end = (defined $2) ? hex "0x$2" : $start; - for (my $i = $start; $i <= $end; ++$i) { - $lineBreak[$i] = $lb; - } - } -} -close FH; - -# read EastAsianWidth.txt -open FH, "< $UNICODE/EastAsianWidth.txt" or die "can't open UCD file EastAsianWidth.txt\n"; -push @versionInfo, ""; -while () { - chomp; - push @versionInfo, $_; - last if /Date:/; -} -while () { - s/#.*//; - if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) { - my $start = hex "0x$1"; - my $end = (defined $2) ? hex "0x$2" : $start; - my $eaw = $3; - warn "unknown EastAsianWidth class" unless exists $eastAsianWidthCode{$eaw}; - my $isFWH = ($eaw =~ m/^[FWH]$/) ? 1 : 0; - for (my $i = $start; $i <= $end; ++$i) { - $eastAsianWidthFWH[$i] = $isFWH; - } - } -} -close FH; - -# read DerivedCoreProperties.txt (for Default-Ignorables) -open FH, "< $UNICODE/DerivedCoreProperties.txt" or die "can't open UCD file DerivedCoreProperties.txt\n"; -push @versionInfo, ""; - -while () { - chomp; - push @versionInfo, $_; - last if /Date:/; -} -while () { - s/#.*//; - if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*Default_Ignorable_Code_Point/) { - my $start = hex "0x$1"; - my $end = (defined $2) ? hex "0x$2" : $start; - for (my $i = $start; $i <= $end; ++$i) { - $defaultIgnorable[$i] = 1; - } - } -} -close FH; - # read IdentifierStatus.txt open FH, "< $UNICODE/security/IdentifierStatus.txt" or die "can't open UCD file IdentifierStatus.txt\n"; push @versionInfo, ""; @@ -759,8 +365,7 @@ struct nsCharProps2 { unsigned char mIdType:2; }; |; -&genTables("", "", - "CharProp2", $type, "nsCharProps2", 9, 7, \&sprintCharProps2, 16, 1, 1); +&genTables("CharProp2", $type, "nsCharProps2", 9, 7, \&sprintCharProps2, 16, 1, 1); print HEADER "#pragma pack()\n\n"; @@ -776,42 +381,32 @@ sub sprintHanVariants return sprintf("0x%02x,", $val); } ## Han Variant data currently unused but may be needed in future, see bug 857481 -## &genTables("", "", "HanVariant", "", "uint8_t", 9, 7, \&sprintHanVariants, 2, 1, 4); +## &genTables("HanVariant", "", "uint8_t", 9, 7, \&sprintHanVariants, 2, 1, 4); sub sprintFullWidth { my $usv = shift; return sprintf("0x%04x,", $fullWidth[$usv]); } -&genTables("", "", "FullWidth", "", "uint16_t", 10, 6, \&sprintFullWidth, 0, 2, 1); +&genTables("FullWidth", "", "uint16_t", 10, 6, \&sprintFullWidth, 0, 2, 1); sub sprintFullWidthInverse { my $usv = shift; return sprintf("0x%04x,", $fullWidthInverse[$usv]); } -&genTables("", "", "FullWidthInverse", "", "uint16_t", 10, 6, \&sprintFullWidthInverse, 0, 2, 1); +&genTables("FullWidthInverse", "", "uint16_t", 10, 6, \&sprintFullWidthInverse, 0, 2, 1); print STDERR "Total data = $totalData\n"; -printf DATA_TABLES "const uint32_t kTitleToUpper = 0x%08x;\n", $kTitleToUpper; -printf DATA_TABLES "const uint32_t kUpperToLower = 0x%08x;\n", $kUpperToLower; -printf DATA_TABLES "const uint32_t kLowerToTitle = 0x%08x;\n", $kLowerToTitle; -printf DATA_TABLES "const uint32_t kLowerToUpper = 0x%08x;\n", $kLowerToUpper; -printf DATA_TABLES "const uint32_t kCaseMapCharMask = 0x%08x;\n\n", $kCaseMapCharMask; - sub genTables { - my ($guardBegin, $guardEnd, - $prefix, $typedef, $type, $indexBits, $charBits, $func, $maxPlane, $bytesPerEntry, $charsPerEntry) = @_; + my ($prefix, $typedef, $type, $indexBits, $charBits, $func, $maxPlane, $bytesPerEntry, $charsPerEntry) = @_; if ($typedef ne '') { - print HEADER "$guardBegin\n"; print HEADER "$typedef\n"; - print HEADER "$guardEnd\n\n"; } - print DATA_TABLES "\n$guardBegin\n"; print DATA_TABLES "#define k${prefix}MaxPlane $maxPlane\n"; print DATA_TABLES "#define k${prefix}IndexBits $indexBits\n"; print DATA_TABLES "#define k${prefix}CharBits $charBits\n"; @@ -880,7 +475,6 @@ sub genTables print DATA_TABLES $i < $#char ? "},\n" : "}\n"; } print DATA_TABLES "};\n"; - print DATA_TABLES "$guardEnd\n"; my $dataSize = $pmCount * $indexLen * $pmBits/8 + $chCount * $pageLen * $bytesPerEntry + -- cgit v1.2.3 From 2c92077debfbe17eaa29302307dcc5d257e3de87 Mon Sep 17 00:00:00 2001 From: Job Bautista Date: Thu, 23 Jun 2022 15:45:09 +0800 Subject: Issue #326 - Part 6: Regenerate Unicode data tables with the stripped-down tool --- intl/unicharutil/util/nsUnicodePropertyData.cpp | 38 +------------------------ intl/unicharutil/util/nsUnicodeScriptCodes.h | 26 +---------------- 2 files changed, 2 insertions(+), 62 deletions(-) diff --git a/intl/unicharutil/util/nsUnicodePropertyData.cpp b/intl/unicharutil/util/nsUnicodePropertyData.cpp index 6adc950e20..dccf14bcd2 100644 --- a/intl/unicharutil/util/nsUnicodePropertyData.cpp +++ b/intl/unicharutil/util/nsUnicodePropertyData.cpp @@ -11,7 +11,7 @@ */ /* - * Created on Thu Jun 23 05:27:27 2022 from UCD data files with version info: + * Created on Thu Jun 23 07:44:34 2022 from UCD data files with version info: * # Unicode Character Database @@ -29,27 +29,6 @@ This directory contains the final data files for the Unicode Character Database, for Version 10.0.0 of the Unicode Standard. -# Scripts-10.0.0.txt -# Date: 2017-03-11, 06:40:37 GMT - -# BidiMirroring-10.0.0.txt -# Date: 2017-04-12, 17:30:00 GMT [KW, LI] - -# BidiBrackets-10.0.0.txt -# Date: 2017-04-12, 17:30:00 GMT [AG, LI, KW] - -# HangulSyllableType-10.0.0.txt -# Date: 2017-02-14, 04:26:11 GMT - -# LineBreak-10.0.0.txt -# Date: 2017-03-08, 02:00:00 GMT [KW, LI] - -# EastAsianWidth-10.0.0.txt -# Date: 2017-03-08, 02:00:00 GMT [KW, LI] - -# DerivedCoreProperties-10.0.0.txt -# Date: 2017-03-19, 00:05:15 GMT - # IdentifierStatus.txt # Date: 2017-04-08, 16:13:41 GMT @@ -67,8 +46,6 @@ for the Unicode Character Database, for Version 10.0.0 of the Unicode Standard. #include #include "harfbuzz/hb.h" - - #define kCharProp2MaxPlane 16 #define kCharProp2IndexBits 9 #define kCharProp2CharBits 7 @@ -180,9 +157,6 @@ static const nsCharProps2 sCharProp2Values[96][128] = { {{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,1},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}, {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{1,0},{1,0}} }; - - - #define kFullWidthMaxPlane 0 #define kFullWidthIndexBits 10 #define kFullWidthCharBits 6 @@ -201,9 +175,6 @@ static const uint16_t sFullWidthValues[9][64] = { {0x30bf,0x30c1,0x30c4,0x30c6,0x30c8,0x30ca,0x30cb,0x30cc,0x30cd,0x30ce,0x30cf,0x30d2,0x30d5,0x30d8,0x30db,0x30de,0x30df,0x30e0,0x30e1,0x30e2,0x30e4,0x30e6,0x30e8,0x30e9,0x30ea,0x30eb,0x30ec,0x30ed,0x30ef,0x30f3,0x3099,0x309a,0x3164,0x3131,0x3132,0x3133,0x3134,0x3135,0x3136,0x3137,0x3138,0x3139,0x313a,0x313b,0x313c,0x313d,0x313e,0x313f,0x3140,0x3141,0x3142,0x3143,0x3144,0x3145,0x3146,0x3147,0x3148,0x3149,0x314a,0x314b,0x314c,0x314d,0x314e,0x0000}, {0x0000,0x0000,0x314f,0x3150,0x3151,0x3152,0x3153,0x3154,0x0000,0x0000,0x3155,0x3156,0x3157,0x3158,0x3159,0x315a,0x0000,0x0000,0x315b,0x315c,0x315d,0x315e,0x315f,0x3160,0x0000,0x0000,0x3161,0x3162,0x3163,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x2502,0x2190,0x2191,0x2192,0x2193,0x25a0,0x25cb,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000} }; - - - #define kFullWidthInverseMaxPlane 0 #define kFullWidthInverseIndexBits 10 #define kFullWidthInverseCharBits 6 @@ -226,13 +197,6 @@ static const uint16_t sFullWidthInverseValues[13][64] = { {0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006a,0x006b,0x006c,0x006d,0x006e,0x006f,0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,0x0078,0x0079,0x007a,0x007b,0x007c,0x007d,0x007e,0x2985,0x2986,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000}, {0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x00a2,0x00a3,0x00ac,0x00af,0x00a6,0x00a5,0x20a9,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000} }; - -const uint32_t kTitleToUpper = 0x80000000; -const uint32_t kUpperToLower = 0x40000000; -const uint32_t kLowerToTitle = 0x20000000; -const uint32_t kLowerToUpper = 0x10000000; -const uint32_t kCaseMapCharMask = 0x001fffff; - /* * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * * */ diff --git a/intl/unicharutil/util/nsUnicodeScriptCodes.h b/intl/unicharutil/util/nsUnicodeScriptCodes.h index a1c4aa462b..b69148386e 100644 --- a/intl/unicharutil/util/nsUnicodeScriptCodes.h +++ b/intl/unicharutil/util/nsUnicodeScriptCodes.h @@ -11,7 +11,7 @@ */ /* - * Created on Thu Jun 23 05:27:27 2022 from UCD data files with version info: + * Created on Thu Jun 23 07:44:34 2022 from UCD data files with version info: * # Unicode Character Database @@ -29,27 +29,6 @@ This directory contains the final data files for the Unicode Character Database, for Version 10.0.0 of the Unicode Standard. -# Scripts-10.0.0.txt -# Date: 2017-03-11, 06:40:37 GMT - -# BidiMirroring-10.0.0.txt -# Date: 2017-04-12, 17:30:00 GMT [KW, LI] - -# BidiBrackets-10.0.0.txt -# Date: 2017-04-12, 17:30:00 GMT [AG, LI, KW] - -# HangulSyllableType-10.0.0.txt -# Date: 2017-02-14, 04:26:11 GMT - -# LineBreak-10.0.0.txt -# Date: 2017-03-08, 02:00:00 GMT [KW, LI] - -# EastAsianWidth-10.0.0.txt -# Date: 2017-03-08, 02:00:00 GMT [KW, LI] - -# DerivedCoreProperties-10.0.0.txt -# Date: 2017-03-19, 00:05:15 GMT - # IdentifierStatus.txt # Date: 2017-04-08, 16:13:41 GMT @@ -70,7 +49,6 @@ for the Unicode Character Database, for Version 10.0.0 of the Unicode Standard. #pragma pack(1) - struct nsCharProps2 { // Currently only 4 bits are defined here, so 4 more could be added without // affecting the storage requirements for this struct. Or we could pack two @@ -79,8 +57,6 @@ struct nsCharProps2 { unsigned char mIdType:2; }; - - #pragma pack() namespace mozilla { -- cgit v1.2.3