+++ /dev/null
-#! /usr/bin/perl
-
-# Copyright (C) 2000, 2001 Free Software Foundation
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# gen-table.pl - Generate tables for gcj from Unicode data.
-# Usage: perl gen-table.pl DATA-FILE
-#
-# You can find the Unicode data file here:
-# ftp://www.unicode.org/Public/3.0-Update1/UnicodeData-3.0.1.txt
-# Please update this URL when this program is used with a more
-# recent version of the table. Note that this table cannot be
-# distributed with gcc.
-# This program should not be re-run indiscriminately. Care must be
-# taken that what it generates is in sync with the Java specification.
-
-# Names of fields in Unicode data table.
-$CODE = 0;
-$NAME = 1;
-$CATEGORY = 2;
-$COMBINING_CLASSES = 3;
-$BIDI_CATEGORY = 4;
-$DECOMPOSITION = 5;
-$DECIMAL_VALUE = 6;
-$DIGIT_VALUE = 7;
-$NUMERIC_VALUE = 8;
-$MIRRORED = 9;
-$OLD_NAME = 10;
-$COMMENT = 11;
-$UPPER = 12;
-$LOWER = 13;
-$TITLE = 14;
-
-# Start of special-cased gaps in Unicode data table.
-%gaps = (
- 0x4e00 => "CJK",
- 0xac00 => "Hangul",
- 0xd800 => "Unassigned High Surrogate",
- 0xdb80 => "Private Use High Surrogate",
- 0xdc00 => "Low Surrogate",
- 0xe000 => "Private Use"
- );
-
-# This lists control characters which are also considered whitespace.
-# This is a somewhat odd list, taken from the JCL definition of
-# Character.isIdentifierIgnorable.
-%whitespace_controls =
- (
- 0x0009 => 1,
- 0x000a => 1,
- 0x000b => 1,
- 0x000c => 1,
- 0x000d => 1,
- 0x001c => 1,
- 0x001d => 1,
- 0x001e => 1,
- 0x001f => 1
- );
-
-open (INPUT, "< $ARGV[0]") || exit 1;
-
-$last_code = -1;
-while (<INPUT>)
-{
- chop;
- @fields = split (';', $_, 30);
- if ($#fields != 14)
- {
- print STDERR "Entry for $fields[$CODE] has wrong number of fields\n";
- }
-
- $code = hex ($fields[$CODE]);
- last if $code > 0xffff;
- if ($code > $last_code + 1)
- {
- # Found a gap.
- if (defined $gaps{$code})
- {
- # Fill the gap with the last character read.
- @gfields = @fields;
- }
- else
- {
- # The gap represents undefined characters. Only the type
- # matters.
- @gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
- '', '', '', '');
- }
- for (++$last_code; $last_code < $code; ++$last_code)
- {
- $gfields{$CODE} = sprintf ("%04x", $last_code);
- &process_one ($last_code, @gfields);
- }
- }
- &process_one ($code, @fields);
- $last_code = $code;
-}
-
-close (INPUT);
-
-@gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
- '', '', '', '');
-for (++$last_code; $last_code < 0x10000; ++$last_code)
-{
- $gfields{$CODE} = sprintf ("%04x", $last_code);
- &process_one ($last_code, @gfields);
-}
---$last_code; # Want last to be 0xFFFF.
-
-&print_tables ($last_code);
-
-exit 0;
-
-# Process a single character.
-sub process_one
-{
- my ($code, @fields) = @_;
-
- my $value = '';
- my $type = $fields[$CATEGORY];
-
- # See if the character is a valid identifier start.
- if ($type =~ /L./ # Letter
- || $type eq 'Pc' # Connecting punctuation
- || $type eq 'Sc') # Currency symbol
- {
- $value = 'LETTER_START';
- }
-
- # See if the character is a valid identifier member.
- if ($type =~ /L./ # Letter
- || $type eq 'Pc' # Connecting punctuation
- || $type eq 'Sc' # Currency symbol
- || $type =~ /N[dl]/ # Number: decimal or letter
- || $type =~ /M[nc]/ # Mark: non-spacing or combining
- || ($type eq 'Cc' # Certain controls
- && ! defined $whitespace_controls{$code})
- || ($code >= 0x200c # Join controls
- && $code <= 0x200f)
- || ($code >= 0x202a # Bidi controls -- note that there
- # is a typo in the JCL where these are
- # concerned.
- && $code <= 0x202e)
- || ($code >= 0x206a # Format controls
- && $code <= 0x206f)
- || $code == 0xfeff) # ZWNBSP
- {
- if ($value eq '')
- {
- $value = 'LETTER_PART';
- }
- else
- {
- $value = 'LETTER_PART | ' . $value;
- }
- }
-
- if ($value eq '')
- {
- $value = '0';
- }
- else
- {
- $value = '(' . $value . ')';
- }
-
- $map[$code] = $value;
-}
-
-sub print_tables
-{
- my ($last) = @_;
-
- local ($bytes_out) = 0;
-
- open (OUT, "> chartables.h");
-
- print OUT "/* This file is automatically generated. DO NOT EDIT!\n";
- print OUT " Instead, edit gen-table.pl and re-run. */\n\n";
-
- print OUT "#ifndef GCC_CHARTABLES_H\n";
- print OUT "#define GCC_CHARTABLES_H\n\n";
-
- print OUT "#define LETTER_START 1\n";
- print OUT "#define LETTER_PART 2\n\n";
-
- for ($count = 0; $count <= $last; $count += 256)
- {
- $row[$count / 256] = &print_row ($count, '(char *) ', 'const char', 1,
- 'page');
- }
-
- print OUT "static const char *const type_table[256] = {\n";
- for ($count = 0; $count <= $last; $count += 256)
- {
- print OUT ",\n" if $count > 0;
- print OUT " ", $row[$count / 256];
- $bytes_out += 4;
- }
- print OUT "\n};\n\n";
-
- print OUT "#endif /* ! GCC_CHARTABLES_H */\n";
-
- close (OUT);
-
- printf "Generated %d bytes\n", $bytes_out;
-}
-
-# Print a single "row" of a two-level table.
-sub print_row
-{
- my ($start, $def_pfx, $typname, $typsize, $name) = @_;
-
- my ($i);
- my (@values);
- my ($flag) = 1;
- my ($off);
- for ($off = 0; $off < 256; ++$off)
- {
- $values[$off] = $map[$off + $start];
- if ($values[$off] ne $values[0])
- {
- $flag = 0;
- }
- }
- if ($flag)
- {
- return $def_pfx . $values[0];
- }
-
- printf OUT "static %s %s%d[256] = {\n ", $typname, $name, $start / 256;
- my ($column) = 2;
- for ($i = $start; $i < $start + 256; ++$i)
- {
- print OUT ", "
- if $i > $start;
- my ($text) = $values[$i - $start];
- if (length ($text) + $column + 2 > 78)
- {
- print OUT "\n ";
- $column = 2;
- }
- print OUT $text;
- $column += length ($text) + 2;
- }
- print OUT "\n};\n\n";
-
- $bytes_out += 256 * $typsize;
-
- return sprintf "%s%d", $name, $start / 256;
-}