diff options
author | Matt A. Tobin <email@mattatobin.com> | 2021-11-29 16:07:07 -0500 |
---|---|---|
committer | Matt A. Tobin <email@mattatobin.com> | 2021-11-29 16:07:07 -0500 |
commit | 66ffcf201a2023f3e38d05a9c2bf6d050986acca (patch) | |
tree | 8066dc9a24da2f32228eff6a89af9ad9616d9d64 /intl/uconv/tools/mkjpconv.pl | |
parent | 4e0c9d42eac353bf644f729c09a4642c41734a5e (diff) | |
download | aura-central-66ffcf201a2023f3e38d05a9c2bf6d050986acca.tar.gz |
Issue %3005 - Move intl to system/
Diffstat (limited to 'intl/uconv/tools/mkjpconv.pl')
-rwxr-xr-x | intl/uconv/tools/mkjpconv.pl | 323 |
1 files changed, 0 insertions, 323 deletions
diff --git a/intl/uconv/tools/mkjpconv.pl b/intl/uconv/tools/mkjpconv.pl deleted file mode 100755 index 1394a6bc6..000000000 --- a/intl/uconv/tools/mkjpconv.pl +++ /dev/null @@ -1,323 +0,0 @@ -#!/usr/bin/perl -$ID = "mkjpconv.pl @ARGV (Time-stamp: <2001-08-08 18:54:54 shom>)"; - -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -# -# based on CP932.TXT from unicode.org -# additional information from SHIFTJIS.TXT from unicode.org -# -# mapping policy: -# jis0208 to unicode : based on CP932 -# unicode to jis0208 : based on CP932 -# the lowest code is used for dual mapping to jis0208 -# ascii region : based on ISO8859-1 ( same as CP932 ) IGNORE? -# kana region : based on CP932 -# IBM Ext(0xFxxx>) : premap to NEC region ( mappable to JIS ) - -if ($ARGV[0] eq "") { - print STDERR "usage: mkjpconv.pl SHIFTJIS.TXT <INFILE(ex:CP932.TXT)> [Another check]\n"; - exit 1; -} - -open (SI, "SHIFTJIS.TXT") || die; -while(<SI>) { - ($hi,$lo) = /^0x(..)?(..)\s/; - if ($lo eq "") { next; } - if ($hi eq "") { $hi=" " } - $defined{"0x$hi$lo"} = 1; -} -close (SI); - -shift(@ARGV); - -$src = $ARGV[0]; - -$gendir = "$src.d"; -mkdir("$src.d"); - -$sufile = "sjis2ucs-$src.map"; -$usfile = "ucs2sjis-$src.map"; -$jufile = "jis2ucs-$src.map"; -$jeufile = "jisext2ucs-$src.map"; -$jaufile = "jisasc2ucs-$src.map"; -$jrkufile = "jiskana2ucs-$src.map"; -$ujfile = "ucs2jis-$src.map"; -$ujefile = "ucs2jisext-$src.map"; -$ujafile = "ucs2jisasc-$src.map"; -$ujrkfile = "ucs2jiskana-$src.map"; -$ibmnecfile = "$gendir/IBMNEC.map"; -$jdxfile = "$gendir/jis0208.ump"; -$jdxextfile = "jis0208ext.ump"; -$commentfile = "comment-$src.txt"; - -open (IN, "NPL.header") || die; -while(<IN>) { - $NPL .= $_; -} -close (IN); - -foreach $infile ( @ARGV ) { - - open (IN, "$infile") || die; - - while(<IN>) { - ($from, $to, $seq, $dum, $comment) = - /^\s*(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)(\+0x\S+)?(\s+\#\s*(\S.*))?$/; - if ( $seq ne "" ) { - print "Warning: Unicode Seq:\t$from\t$to$seq\t# $comment\n"; - } - - if ( $from eq "" ) { next; } - - if ( $from =~ /0x(..)$/ ) { - $from = " 0x$1"; - } - - if ( $fromto{$from} eq "" ) { - push(@fromlist, $from); - $fromto{$from} = $to; - $commentbody{$from} = $comment; - $commentseq{$from} = $seq - } elsif ( $fromto{$from} ne $to ) { - # another mappint SJIS:UCS2 = 1:N - print "Another map in $infile\t$from\t$fromto{$from},$to\n"; - } - - if ($checkanother==1) { - next; - } - - if ( $tofrom{$to} eq "" ) { - $tofrom{$to} = $from; - } else { - if ( $from !~ /$tofrom{$to}/ ){ - $tofrom{$to} = "$tofrom{$to},$from"; - } - } - - # print "$from $to\n"; - } - - close (IN); - - $checkanother == 1; -} - -open (COMMENT, ">$commentfile") || die; -foreach $from (sort(@fromlist)) { - print COMMENT "$from\t$fromto{$from}$commentseq{$from}\t$commentbody{$from}\n"; -} -close (COMMENT); - - -open(SU, ">$sufile") || die; -open(US, ">$usfile") || die; -open(JU, ">$jufile") || die; -open(JEU, ">$jeufile") || die; -open(JAU, ">$jaufile") || die; -open(JRKU, ">$jrkufile") || die; -open(UJ, ">$ujfile") || die; -open(UJE, ">$ujefile") || die; -open(UJA, ">$ujafile") || die; -open(UJRK, ">$ujrkfile") || die; -open(IBMNEC, ">$ibmnecfile") || die; - -# print SU "/* generated from $src : SJIS UCS2 */\n"; -# print US "/* generated from $src : UCS2 SJIS */\n"; -print "Generated from $src\n"; -print "Command: mkjpconv.pl @ARGV\n"; -print "SJIS(JIS)\tUCS2\tSJIS\tS:U:S\tSJIS lower\n"; - -foreach $i (sort(@fromlist)) { - - $ucs = ""; - - $sjis = $i; - $sjis =~ s/\s+//; - $jis = sjistojis($sjis); - - print "$i($jis)\t$fromto{$i}\t$tofrom{$fromto{$i}}"; - $ucs = $fromto{$i}; - - if ( $i eq $tofrom{$fromto{$i}} ) { - print "\t1:1:1"; - print "\t$i"; - } else { - print "\t1:1:N"; - @tolist = split(/,/,$tofrom{$fromto{$i}}); - print "\t$tolist[0]"; - #$ucs = $tolist[0]; - if ( $sjis =~ /0xF[A-D]../ ) { - $ibmnec{$sjis} = $tolist[0]; - #print IBMNEC "$sjis\t$tolist[0]\n"; - } - - } - print SU "$sjis\t$ucs\n"; - push(@uslist, "$ucs\t$sjis\n"); - - #print US "$ucs\t$sjis\n"; - if ( $jis ne "") { - #if ($sjis =~ /^0x87../ || $sjis =~ /^0xED../ ) { - # cp932 ext - if ($sjis =~ /0x..../ && $defined{$sjis} != 1) { - # jis not define - print JEU "$jis\t$ucs\n"; - push(@ujelist, "$ucs\t$jis\n"); - $jisextucs{$jis} = $ucs; - } else { - print JU "$jis\t$ucs\n"; - push(@ujlist, "$ucs\t$jis\n"); - $jisucs{$jis} = $ucs; - } - - #print UJ "$ucs\t$jis\n"; - } elsif ( $sjis =~ /\s*0x([8-9A-D].)/ ) { - $code = $1; - print JRKU "0x00$code\t$ucs\n"; - push(@ujrklist, "$ucs\t0x00$code\n"); - } elsif ( $sjis =~ /\s*0x([0-7].)/ ) { - $code = $1; - print JAU "0x00$code\t$ucs\n"; - push(@ujalist, "$ucs\t0x00$code\n"); - } - #print "\t# $comment{$i}\n"; - print "\n"; -} - -print US sort(@uslist); -print UJ sort(@ujlist); -print UJE sort(@ujelist); -print UJA sort(@ujalist); -print UJRK sort(@ujrklist); - -# make ibmnec mapping - -print IBMNEC $NPL; -print IBMNEC "/* generated by $ID */\n"; -print IBMNEC "/* IBM ext codes to NEC sel (in CP932) */\n\n"; - -foreach $i (0xFA, 0xFB, 0xFC) { - for ($j=( ($i==0xFA) ? 0x40 : 0x00 ); $j<=0xFF; $j++) { - $ibm = sprintf("0x%02X%02X", $i, $j); - $raw = substr($ibm, 2,6); - if ("" == $ibmnec{$ibm}) { - print IBMNEC "/* $raw:UNDEF */ 0, \n"; - } else { - print IBMNEC "/* $raw */ $ibmnec{$ibm}, \n"; - } - } -} - -close(IBMNEC); - -# make jdx - -open (JDX, ">$jdxfile") || die; - -print JDX $NPL; -print JDX "/* generated by $ID */\n"; -print JDX "/* JIS X 0208 (with CP932 ext) to Unicode mapping */\n"; - -for ($i=0; $i<94; $i++) { - printf JDX "/* 0x%2XXX */\n", ($i+0x21); - printf JDX " "; - for ($j=0; $j<94; $j++) { - $jis = sprintf("0x%02X%02X", ($i+0x21), $j+0x21); - # get JIS - $ucs = $jisucs{$jis}; - if ("" == $ucs) { - # try CP932 ext - # try jis ext - $ucs = $jisextucs{$jis} - } - if ("" == $ucs) { - # undefined - print JDX "0xFFFD,"; - } else { - print JDX "$ucs,"; - } - if (7 == ( ($j+1) % 8 )) { - printf JDX "/* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8; - } - } - printf JDX " /* 0x%2X%1X%1X*/\n", $i+0x21, 2+($j/16), (6==($j%16))?0:8; -} - -close (JDX); - - -close(SU); -close(US); -close(JU); -close(JEU); -close(JAU); -close(JRKU); -close(UJ); -close(UJE); -close(UJA); -close(UJRK); - -# generate uf files - -sub genuf { - my ($infile, $outfile) = @_; - my $com = "cat $infile | ./umaptable -uf > $gendir/$outfile"; - print "Executing $com\n"; - system($com); -} - -genuf($sufile, "sjis.uf"); -genuf($jufile, "jis0208.uf"); -if ( $#ujelist > 0 ) { - genuf($jeufile, "jis0208ext.uf"); -} else { - print "Extension is not found. jis0208ext.uf is not generated.\n"; -} -genuf("$jaufile $jrkufile", "jis0201.uf"); -# genuf($jaufile, "jis0201.uf"); -# genuf($jrkufile, "jis0201gl.uf"); - - -# generate test page - - -exit; - -sub sjistojis { - my($sjis) = (@_); - my($first,$second,$h, $l, $j0208); - - if ( $sjis !~ /^0x....$/ ) { - return ""; - } - - $first = hex(substr($sjis,2,2)); - $second = hex(substr($sjis,4,2)); - $jnum=0; - - if($first < 0xE0) - { - $jnum = ($first - 0x81) * ((0xfd - 0x80)+(0x7f - 0x40)); - } else { - $jnum = ($first - 0xe0 + (0xa0-0x81)) * ((0xfd - 0x80)+(0x7f - 0x40)); - } - if($second >= 0x80) - { - $jnum += $second - 0x80 + (0x7f-0x40); - } - else - { - $jnum += $second - 0x40; - } - if(($jnum / 94 ) < 94) { - return sprintf "0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21); - } else { - #return sprintf "# 0x%02X%02X", (($jnum / 94) + 0x21), (($jnum % 94)+0x21); - return ""; - } -} - |