+++ /dev/null
-/* Language lexer for the GNU compiler for the Java(TM) language.
- Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
- Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING. If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.
-
-Java and all Java-based marks are trademarks or registered trademarks
-of Sun Microsystems, Inc. in the United States and other countries.
-The Free Software Foundation is independent of Sun Microsystems, Inc. */
-
-/* It defines java_lex (yylex) that reads a Java ASCII source file
- possibly containing Unicode escape sequence or utf8 encoded
- characters and returns a token for everything found but comments,
- white spaces and line terminators. When necessary, it also fills
- the java_lval (yylval) union. It's implemented to be called by a
- re-entrant parser generated by Bison.
-
- The lexical analysis conforms to the Java grammar described in "The
- Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
- Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
-
-#include "keyword.h"
-#include "flags.h"
-#include "chartables.h"
-
-/* Function declarations. */
-static char *java_sprint_unicode PARAMS ((struct java_line *, int));
-static void java_unicode_2_utf8 PARAMS ((unicode_t));
-static void java_lex_error PARAMS ((const char *, int));
-#ifndef JC1_LITE
-static int java_is_eol PARAMS ((FILE *, int));
-static tree build_wfl_node PARAMS ((tree));
-#endif
-static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
-static int java_parse_escape_sequence PARAMS ((void));
-static int java_start_char_p PARAMS ((unicode_t));
-static int java_part_char_p PARAMS ((unicode_t));
-static int java_parse_doc_section PARAMS ((int));
-static void java_parse_end_comment PARAMS ((int));
-static int java_get_unicode PARAMS ((void));
-static int java_read_unicode PARAMS ((java_lexer *, int *));
-static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
- int *));
-static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
-static int java_read_char PARAMS ((java_lexer *));
-static void java_allocate_new_line PARAMS ((void));
-static void java_unget_unicode PARAMS ((void));
-static unicode_t java_sneak_unicode PARAMS ((void));
-#ifndef JC1_LITE
-static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
-#endif
-
-java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
-#ifndef JC1_LITE
-static void error_if_numeric_overflow PARAMS ((tree));
-#endif
-
-#ifdef HAVE_ICONV
-/* This is nonzero if we have initialized `need_byteswap'. */
-static int byteswap_init = 0;
-
-/* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
- big-endian order -- not native endian order. We handle this by
- doing a conversion once at startup and seeing what happens. This
- flag holds the results of this determination. */
-static int need_byteswap = 0;
-#endif
-
-void
-java_init_lex (finput, encoding)
- FILE *finput;
- const char *encoding;
-{
-#ifndef JC1_LITE
- int java_lang_imported = 0;
-
- if (!java_lang_id)
- java_lang_id = get_identifier ("java.lang");
- if (!java_lang_cloneable)
- java_lang_cloneable = get_identifier ("java.lang.Cloneable");
- if (!java_io_serializable)
- java_io_serializable = get_identifier ("java.io.Serializable");
- if (!inst_id)
- inst_id = get_identifier ("inst$");
- if (!wpv_id)
- wpv_id = get_identifier ("write_parm_value$");
-
- if (!java_lang_imported)
- {
- tree node = build_tree_list
- (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
- read_import_dir (TREE_PURPOSE (node));
- TREE_CHAIN (node) = ctxp->import_demand_list;
- ctxp->import_demand_list = node;
- java_lang_imported = 1;
- }
-
- if (!wfl_operator)
- wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
- if (!label_id)
- label_id = get_identifier ("$L");
- if (!wfl_append)
- wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
- if (!wfl_string_buffer)
- wfl_string_buffer =
- build_expr_wfl (get_identifier (flag_emit_class_files
- ? "java.lang.StringBuffer"
- : "gnu.gcj.runtime.StringBuffer"),
- NULL, 0, 0);
- if (!wfl_to_string)
- wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
-
- CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
- CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
-
- memset ((PTR) ctxp->modifier_ctx, 0, 11*sizeof (ctxp->modifier_ctx[0]));
- memset ((PTR) current_jcf, 0, sizeof (JCF));
- ctxp->current_parsed_class = NULL;
- ctxp->package = NULL_TREE;
-#endif
-
- ctxp->filename = input_filename;
- ctxp->lineno = lineno = 0;
- ctxp->p_line = NULL;
- ctxp->c_line = NULL;
- ctxp->java_error_flag = 0;
- ctxp->lexer = java_new_lexer (finput, encoding);
-}
-
-static char *
-java_sprint_unicode (line, i)
- struct java_line *line;
- int i;
-{
- static char buffer [10];
- if (line->unicode_escape_p [i] || line->line [i] > 128)
- sprintf (buffer, "\\u%04x", line->line [i]);
- else
- {
- buffer [0] = line->line [i];
- buffer [1] = '\0';
- }
- return buffer;
-}
-
-static unicode_t
-java_sneak_unicode ()
-{
- return (ctxp->c_line->line [ctxp->c_line->current]);
-}
-
-static void
-java_unget_unicode ()
-{
- if (!ctxp->c_line->current)
- /* Can't unget unicode. */
- abort ();
-
- ctxp->c_line->current--;
- ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
-}
-
-static void
-java_allocate_new_line ()
-{
- unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
- char ahead_escape_p = (ctxp->c_line ?
- ctxp->c_line->unicode_escape_ahead_p : 0);
-
- if (ctxp->c_line && !ctxp->c_line->white_space_only)
- {
- if (ctxp->p_line)
- {
- free (ctxp->p_line->unicode_escape_p);
- free (ctxp->p_line->line);
- free (ctxp->p_line);
- }
- ctxp->p_line = ctxp->c_line;
- ctxp->c_line = NULL; /* Reallocated. */
- }
-
- if (!ctxp->c_line)
- {
- ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
- ctxp->c_line->max = JAVA_LINE_MAX;
- ctxp->c_line->line = (unicode_t *)xmalloc
- (sizeof (unicode_t)*ctxp->c_line->max);
- ctxp->c_line->unicode_escape_p =
- (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
- ctxp->c_line->white_space_only = 0;
- }
-
- ctxp->c_line->line [0] = ctxp->c_line->size = 0;
- ctxp->c_line->char_col = ctxp->c_line->current = 0;
- if (ahead)
- {
- ctxp->c_line->line [ctxp->c_line->size] = ahead;
- ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
- ctxp->c_line->size++;
- }
- ctxp->c_line->ahead [0] = 0;
- ctxp->c_line->unicode_escape_ahead_p = 0;
- ctxp->c_line->lineno = ++lineno;
- ctxp->c_line->white_space_only = 1;
-}
-
-/* Create a new lexer object. */
-
-java_lexer *
-java_new_lexer (finput, encoding)
- FILE *finput;
- const char *encoding;
-{
- java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
- int enc_error = 0;
-
- lex->finput = finput;
- lex->bs_count = 0;
- lex->unget_value = 0;
- lex->hit_eof = 0;
-
-#ifdef HAVE_ICONV
- lex->handle = iconv_open ("UCS-2", encoding);
- if (lex->handle != (iconv_t) -1)
- {
- lex->first = -1;
- lex->last = -1;
- lex->out_first = -1;
- lex->out_last = -1;
- lex->read_anything = 0;
- lex->use_fallback = 0;
-
- /* Work around broken iconv() implementations by doing checking at
- runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
- then all UCS-2 encoders will be broken. Perhaps not a valid
- assumption. */
- if (! byteswap_init)
- {
- iconv_t handle;
-
- byteswap_init = 1;
-
- handle = iconv_open ("UCS-2", "UTF-8");
- if (handle != (iconv_t) -1)
- {
- unicode_t result;
- unsigned char in[3];
- char *inp, *outp;
- size_t inc, outc, r;
-
- /* This is the UTF-8 encoding of \ufeff. */
- in[0] = 0xef;
- in[1] = 0xbb;
- in[2] = 0xbf;
-
- inp = in;
- inc = 3;
- outp = (char *) &result;
- outc = 2;
-
- r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
- &outp, &outc);
- iconv_close (handle);
- /* Conversion must be complete for us to use the result. */
- if (r != (size_t) -1 && inc == 0 && outc == 0)
- need_byteswap = (result != 0xfeff);
- }
- }
-
- lex->byte_swap = need_byteswap;
- }
- else
-#endif /* HAVE_ICONV */
- {
- /* If iconv failed, use the internal decoder if the default
- encoding was requested. This code is used on platforms where
- iconv exists but is insufficient for our needs. For
- instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
-
- On Solaris the default encoding, as returned by nl_langinfo(),
- is `646' (aka ASCII), but the Solaris iconv_open() doesn't
- understand that. We work around that by pretending
- `646' to be the same as UTF-8. */
- if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
- enc_error = 1;
-#ifdef HAVE_ICONV
- else
- lex->use_fallback = 1;
-#endif /* HAVE_ICONV */
- }
-
- if (enc_error)
- fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
-
- return lex;
-}
-
-void
-java_destroy_lexer (lex)
- java_lexer *lex;
-{
-#ifdef HAVE_ICONV
- if (! lex->use_fallback)
- iconv_close (lex->handle);
-#endif
- free (lex);
-}
-
-static int
-java_read_char (lex)
- java_lexer *lex;
-{
- if (lex->unget_value)
- {
- unicode_t r = lex->unget_value;
- lex->unget_value = 0;
- return r;
- }
-
-#ifdef HAVE_ICONV
- if (! lex->use_fallback)
- {
- size_t ir, inbytesleft, in_save, out_count, out_save;
- char *inp, *outp;
- unicode_t result;
-
- /* If there is data which has already been converted, use it. */
- if (lex->out_first == -1 || lex->out_first >= lex->out_last)
- {
- lex->out_first = 0;
- lex->out_last = 0;
-
- while (1)
- {
- /* See if we need to read more data. If FIRST == 0 then
- the previous conversion attempt ended in the middle of
- a character at the end of the buffer. Otherwise we
- only have to read if the buffer is empty. */
- if (lex->first == 0 || lex->first >= lex->last)
- {
- int r;
-
- if (lex->first >= lex->last)
- {
- lex->first = 0;
- lex->last = 0;
- }
- if (feof (lex->finput))
- return UEOF;
- r = fread (&lex->buffer[lex->last], 1,
- sizeof (lex->buffer) - lex->last,
- lex->finput);
- lex->last += r;
- }
-
- inbytesleft = lex->last - lex->first;
- out_count = sizeof (lex->out_buffer) - lex->out_last;
-
- if (inbytesleft == 0)
- {
- /* We've tried to read and there is nothing left. */
- return UEOF;
- }
-
- in_save = inbytesleft;
- out_save = out_count;
- inp = &lex->buffer[lex->first];
- outp = &lex->out_buffer[lex->out_last];
- ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
- &inbytesleft, &outp, &out_count);
-
- /* If we haven't read any bytes, then look to see if we
- have read a BOM. */
- if (! lex->read_anything && out_save - out_count >= 2)
- {
- unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
- if (uc == 0xfeff)
- {
- lex->byte_swap = 0;
- lex->out_first += 2;
- }
- else if (uc == 0xfffe)
- {
- lex->byte_swap = 1;
- lex->out_first += 2;
- }
- lex->read_anything = 1;
- }
-
- if (lex->byte_swap)
- {
- unsigned int i;
- for (i = 0; i < out_save - out_count; i += 2)
- {
- char t = lex->out_buffer[lex->out_last + i];
- lex->out_buffer[lex->out_last + i]
- = lex->out_buffer[lex->out_last + i + 1];
- lex->out_buffer[lex->out_last + i + 1] = t;
- }
- }
-
- lex->first += in_save - inbytesleft;
- lex->out_last += out_save - out_count;
-
- /* If we converted anything at all, move along. */
- if (out_count != out_save)
- break;
-
- if (ir == (size_t) -1)
- {
- if (errno == EINVAL)
- {
- /* This is ok. This means that the end of our buffer
- is in the middle of a character sequence. We just
- move the valid part of the buffer to the beginning
- to force a read. */
- memmove (&lex->buffer[0], &lex->buffer[lex->first],
- lex->last - lex->first);
- lex->last -= lex->first;
- lex->first = 0;
- }
- else
- {
- /* A more serious error. */
- java_lex_error ("unrecognized character in input stream",
- 0);
- return UEOF;
- }
- }
- }
- }
-
- if (lex->out_first == -1 || lex->out_first >= lex->out_last)
- {
- /* Don't have any data. */
- return UEOF;
- }
-
- /* Success. */
- result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
- lex->out_first += 2;
- return result;
- }
- else
-#endif /* HAVE_ICONV */
- {
- int c, c1, c2;
- c = getc (lex->finput);
-
- if (c == EOF)
- return UEOF;
- if (c < 128)
- return (unicode_t) c;
- else
- {
- if ((c & 0xe0) == 0xc0)
- {
- c1 = getc (lex->finput);
- if ((c1 & 0xc0) == 0x80)
- {
- unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
- /* Check for valid 2-byte characters. We explicitly
- allow \0 because this encoding is common in the
- Java world. */
- if (r == 0 || (r >= 0x80 && r <= 0x7ff))
- return r;
- }
- }
- else if ((c & 0xf0) == 0xe0)
- {
- c1 = getc (lex->finput);
- if ((c1 & 0xc0) == 0x80)
- {
- c2 = getc (lex->finput);
- if ((c2 & 0xc0) == 0x80)
- {
- unicode_t r = (unicode_t)(((c & 0xf) << 12) +
- (( c1 & 0x3f) << 6)
- + (c2 & 0x3f));
- /* Check for valid 3-byte characters.
- Don't allow surrogate, \ufffe or \uffff. */
- if (r >= 0x800 && r <= 0xffff
- && ! (r >= 0xd800 && r <= 0xdfff)
- && r != 0xfffe && r != 0xffff)
- return r;
- }
- }
- }
-
- /* We simply don't support invalid characters. We also
- don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
- cannot be valid Java characters. */
- java_lex_error ("malformed UTF-8 character", 0);
- }
- }
-
- /* We only get here on error. */
- return UEOF;
-}
-
-static void
-java_store_unicode (l, c, unicode_escape_p)
- struct java_line *l;
- unicode_t c;
- int unicode_escape_p;
-{
- if (l->size == l->max)
- {
- l->max += JAVA_LINE_MAX;
- l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
- l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
- sizeof (char)*l->max);
- }
- l->line [l->size] = c;
- l->unicode_escape_p [l->size++] = unicode_escape_p;
-}
-
-static int
-java_read_unicode (lex, unicode_escape_p)
- java_lexer *lex;
- int *unicode_escape_p;
-{
- int c;
-
- c = java_read_char (lex);
- *unicode_escape_p = 0;
-
- if (c != '\\')
- {
- lex->bs_count = 0;
- return c;
- }
-
- ++lex->bs_count;
- if ((lex->bs_count) % 2 == 1)
- {
- /* Odd number of \ seen. */
- c = java_read_char (lex);
- if (c == 'u')
- {
- unicode_t unicode = 0;
- int shift = 12;
-
- /* Recognize any number of `u's in \u. */
- while ((c = java_read_char (lex)) == 'u')
- ;
-
- /* Unget the most recent character as it is not a `u'. */
- if (c == UEOF)
- return UEOF;
- lex->unget_value = c;
-
- /* Next should be 4 hex digits, otherwise it's an error.
- The hex value is converted into the unicode, pushed into
- the Unicode stream. */
- for (shift = 12; shift >= 0; shift -= 4)
- {
- if ((c = java_read_char (lex)) == UEOF)
- return UEOF;
- if (hex_p (c))
- unicode |= (unicode_t)(hex_value (c) << shift);
- else
- java_lex_error ("Non hex digit in Unicode escape sequence", 0);
- }
- lex->bs_count = 0;
- *unicode_escape_p = 1;
- return unicode;
- }
- lex->unget_value = c;
- }
- return (unicode_t) '\\';
-}
-
-static int
-java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
- java_lexer *lex;
- int *unicode_escape_p;
-{
- int c = java_read_unicode (lex, unicode_escape_p);
-
- if (c == '\r')
- {
- /* We have to read ahead to see if we got \r\n. In that case we
- return a single line terminator. */
- int dummy;
- c = java_read_unicode (lex, &dummy);
- if (c != '\n')
- lex->unget_value = c;
- /* In either case we must return a newline. */
- c = '\n';
- }
-
- return c;
-}
-
-static int
-java_get_unicode ()
-{
- /* It's time to read a line when... */
- if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
- {
- int c;
- int found_chars = 0;
-
- if (ctxp->lexer->hit_eof)
- return UEOF;
-
- java_allocate_new_line ();
- if (ctxp->c_line->line[0] != '\n')
- {
- for (;;)
- {
- int unicode_escape_p;
- c = java_read_unicode_collapsing_terminators (ctxp->lexer,
- &unicode_escape_p);
- if (c != UEOF)
- {
- found_chars = 1;
- java_store_unicode (ctxp->c_line, c, unicode_escape_p);
- if (ctxp->c_line->white_space_only
- && !JAVA_WHITE_SPACE_P (c)
- && c != '\n')
- ctxp->c_line->white_space_only = 0;
- }
- if ((c == '\n') || (c == UEOF))
- break;
- }
-
- if (c == UEOF && ! found_chars)
- {
- ctxp->lexer->hit_eof = 1;
- return UEOF;
- }
- }
- }
- ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
- JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
- return ctxp->c_line->line [ctxp->c_line->current++];
-}
-
-/* Parse the end of a C style comment.
- * C is the first character following the '/' and '*'. */
-static void
-java_parse_end_comment (c)
- int c;
-{
- for ( ;; c = java_get_unicode ())
- {
- switch (c)
- {
- case UEOF:
- java_lex_error ("Comment not terminated at end of input", 0);
- return;
- case '*':
- switch (c = java_get_unicode ())
- {
- case UEOF:
- java_lex_error ("Comment not terminated at end of input", 0);
- return;
- case '/':
- return;
- case '*': /* Reparse only '*'. */
- java_unget_unicode ();
- }
- }
- }
-}
-
-/* Parse the documentation section. Keywords must be at the beginning
- of a documentation comment line (ignoring white space and any `*'
- character). Parsed keyword(s): @DEPRECATED. */
-
-static int
-java_parse_doc_section (c)
- int c;
-{
- int valid_tag = 0, seen_star = 0;
-
- while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
- {
- switch (c)
- {
- case '*':
- seen_star = 1;
- break;
- case '\n': /* ULT */
- valid_tag = 1;
- default:
- seen_star = 0;
- }
- c = java_get_unicode();
- }
-
- if (c == UEOF)
- java_lex_error ("Comment not terminated at end of input", 0);
-
- if (seen_star && (c == '/'))
- return 1; /* Goto step1 in caller. */
-
- /* We're parsing `@deprecated'. */
- if (valid_tag && (c == '@'))
- {
- char tag [11];
- int tag_index = 0;
-
- while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
- {
- c = java_get_unicode ();
- tag [tag_index++] = c;
- }
-
- if (c == UEOF)
- java_lex_error ("Comment not terminated at end of input", 0);
- tag [tag_index] = '\0';
-
- if (!strcmp (tag, "deprecated"))
- ctxp->deprecated = 1;
- }
- java_unget_unicode ();
- return 0;
-}
-
-/* Return true if C is a valid start character for a Java identifier.
- This is only called if C >= 128 -- smaller values are handled
- inline. However, this function handles all values anyway. */
-static int
-java_start_char_p (c)
- unicode_t c;
-{
- unsigned int hi = c / 256;
- const char *const page = type_table[hi];
- unsigned long val = (unsigned long) page;
- int flags;
-
- if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
- flags = page[c & 255];
- else
- flags = val;
-
- return flags & LETTER_START;
-}
-
-/* Return true if C is a valid part character for a Java identifier.
- This is only called if C >= 128 -- smaller values are handled
- inline. However, this function handles all values anyway. */
-static int
-java_part_char_p (c)
- unicode_t c;
-{
- unsigned int hi = c / 256;
- const char *const page = type_table[hi];
- unsigned long val = (unsigned long) page;
- int flags;
-
- if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
- flags = page[c & 255];
- else
- flags = val;
-
- return flags & LETTER_PART;
-}
-
-static int
-java_parse_escape_sequence ()
-{
- unicode_t char_lit;
- int c;
-
- switch (c = java_get_unicode ())
- {
- case 'b':
- return (unicode_t)0x8;
- case 't':
- return (unicode_t)0x9;
- case 'n':
- return (unicode_t)0xa;
- case 'f':
- return (unicode_t)0xc;
- case 'r':
- return (unicode_t)0xd;
- case '"':
- return (unicode_t)0x22;
- case '\'':
- return (unicode_t)0x27;
- case '\\':
- return (unicode_t)0x5c;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7':
- {
- int octal_escape[3];
- int octal_escape_index = 0;
- int max = 3;
- int i, shift;
-
- for (; octal_escape_index < max && RANGE (c, '0', '7');
- c = java_get_unicode ())
- {
- if (octal_escape_index == 0 && c > '3')
- {
- /* According to the grammar, `\477' has a well-defined
- meaning -- it is `\47' followed by `7'. */
- --max;
- }
- octal_escape [octal_escape_index++] = c;
- }
-
- java_unget_unicode ();
-
- for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
- i < octal_escape_index; i++, shift -= 3)
- char_lit |= (octal_escape [i] - '0') << shift;
-
- return char_lit;
- }
- default:
- java_lex_error ("Invalid character in escape sequence", 0);
- return JAVA_CHAR_ERROR;
- }
-}
-
-/* Isolate the code which may raise an arithmetic exception in its
- own function. */
-
-#ifndef JC1_LITE
-struct jpa_args
-{
- YYSTYPE *java_lval;
- char *literal_token;
- int fflag;
- int number_beginning;
-};
-
-#ifdef REAL_ARITHMETIC
-#define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
-#else
-#define IS_ZERO(X) ((X) == 0)
-#endif
-
-static void java_perform_atof PARAMS ((PTR));
-
-static void
-java_perform_atof (av)
- PTR av;
-{
- struct jpa_args *a = (struct jpa_args *)av;
- YYSTYPE *java_lval = a->java_lval;
- int number_beginning = a->number_beginning;
- REAL_VALUE_TYPE value;
- tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
-
- SET_REAL_VALUE_ATOF (value,
- REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
-
- if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
- {
- JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
- value = DCONST0;
- }
- else if (IS_ZERO (value))
- {
- /* We check to see if the value is really 0 or if we've found an
- underflow. We do this in the most primitive imaginable way. */
- int really_zero = 1;
- char *p = a->literal_token;
- if (*p == '-')
- ++p;
- while (*p && *p != 'e' && *p != 'E')
- {
- if (*p != '0' && *p != '.')
- {
- really_zero = 0;
- break;
- }
- ++p;
- }
- if (! really_zero)
- {
- int i = ctxp->c_line->current;
- ctxp->c_line->current = number_beginning;
- java_lex_error ("Floating point literal underflow", 0);
- ctxp->c_line->current = i;
- }
- }
-
- SET_LVAL_NODE_TYPE (build_real (type, value), type);
-}
-#endif
-
-static int yylex PARAMS ((YYSTYPE *));
-
-static int
-#ifdef JC1_LITE
-yylex (java_lval)
-#else
-java_lex (java_lval)
-#endif
- YYSTYPE *java_lval;
-{
- int c;
- unicode_t first_unicode;
- int ascii_index, all_ascii;
- char *string;
-
- /* Translation of the Unicode escape in the raw stream of Unicode
- characters. Takes care of line terminator. */
- step1:
- /* Skip white spaces: SP, TAB and FF or ULT. */
- for (c = java_get_unicode ();
- c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
- if (c == '\n')
- {
- ctxp->elc.line = ctxp->c_line->lineno;
- ctxp->elc.col = ctxp->c_line->char_col-2;
- }
-
- ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
-
- if (c == 0x1a) /* CTRL-Z. */
- {
- if ((c = java_get_unicode ()) == UEOF)
- return 0; /* Ok here. */
- else
- java_unget_unicode (); /* Caught later, at the end of the
- function. */
- }
- /* Handle EOF here. */
- if (c == UEOF) /* Should probably do something here... */
- return 0;
-
- /* Take care of eventual comments. */
- if (c == '/')
- {
- switch (c = java_get_unicode ())
- {
- case '/':
- for (;;)
- {
- c = java_get_unicode ();
- if (c == UEOF)
- {
- /* It is ok to end a `//' comment with EOF, unless
- we're being pedantic. */
- if (pedantic)
- java_lex_error ("Comment not terminated at end of input",
- 0);
- return 0;
- }
- if (c == '\n') /* ULT */
- goto step1;
- }
- break;
-
- case '*':
- if ((c = java_get_unicode ()) == '*')
- {
- if ((c = java_get_unicode ()) == '/')
- goto step1; /* Empty documentation comment. */
- else if (java_parse_doc_section (c))
- goto step1;
- }
-
- java_parse_end_comment ((c = java_get_unicode ()));
- goto step1;
- break;
- default:
- java_unget_unicode ();
- c = '/';
- break;
- }
- }
-
- ctxp->elc.line = ctxp->c_line->lineno;
- ctxp->elc.prev_col = ctxp->elc.col;
- ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
- if (ctxp->elc.col < 0)
- abort ();
-
- /* Numeric literals. */
- if (JAVA_ASCII_DIGIT (c) || (c == '.'))
- {
- /* This section of code is borrowed from gcc/c-lex.c. */
-#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
- int parts[TOTAL_PARTS];
- HOST_WIDE_INT high, low;
- /* End borrowed section. */
- char literal_token [256];
- int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
- int found_hex_digits = 0, found_non_octal_digits = 0;
- int i;
-#ifndef JC1_LITE
- int number_beginning = ctxp->c_line->current;
- tree value;
-#endif
-
- /* We might have a . separator instead of a FP like .[0-9]*. */
- if (c == '.')
- {
- unicode_t peep = java_sneak_unicode ();
-
- if (!JAVA_ASCII_DIGIT (peep))
- {
- JAVA_LEX_SEP('.');
- BUILD_OPERATOR (DOT_TK);
- }
- }
-
- for (i = 0; i < TOTAL_PARTS; i++)
- parts [i] = 0;
-
- if (c == '0')
- {
- c = java_get_unicode ();
- if (c == 'x' || c == 'X')
- {
- radix = 16;
- c = java_get_unicode ();
- }
- else if (JAVA_ASCII_DIGIT (c))
- radix = 8;
- else if (c == '.')
- {
- /* Push the '.' back and prepare for a FP parsing... */
- java_unget_unicode ();
- c = '0';
- }
- else
- {
- /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
- JAVA_LEX_LIT ("0", 10);
- switch (c)
- {
- case 'L': case 'l':
- SET_LVAL_NODE (long_zero_node);
- return (INT_LIT_TK);
- case 'f': case 'F':
- SET_LVAL_NODE (float_zero_node);
- return (FP_LIT_TK);
- case 'd': case 'D':
- SET_LVAL_NODE (double_zero_node);
- return (FP_LIT_TK);
- default:
- java_unget_unicode ();
- SET_LVAL_NODE (integer_zero_node);
- return (INT_LIT_TK);
- }
- }
- }
- /* Parse the first part of the literal, until we find something
- which is not a number. */
- while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
- JAVA_ASCII_DIGIT (c))
- {
- /* We store in a string (in case it turns out to be a FP) and in
- PARTS if we have to process a integer literal. */
- int numeric = hex_value (c);
- int count;
-
- /* Remember when we find a valid hexadecimal digit. */
- if (radix == 16)
- found_hex_digits = 1;
- /* Remember when we find an invalid octal digit. */
- else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
- found_non_octal_digits = 1;
-
- literal_token [literal_index++] = c;
- /* This section of code if borrowed from gcc/c-lex.c. */
- for (count = 0; count < TOTAL_PARTS; count++)
- {
- parts[count] *= radix;
- if (count)
- {
- parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
- parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
- }
- else
- parts[0] += numeric;
- }
- if (parts [TOTAL_PARTS-1] != 0)
- overflow = 1;
- /* End borrowed section. */
- c = java_get_unicode ();
- }
-
- /* If we have something from the FP char set but not a digit, parse
- a FP literal. */
- if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
- {
- int stage = 0;
- int seen_digit = (literal_index ? 1 : 0);
- int seen_exponent = 0;
- int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
- double unless specified. */
-
- /* It is ok if the radix is 8 because this just means we've
- seen a leading `0'. However, radix==16 is invalid. */
- if (radix == 16)
- java_lex_error ("Can't express non-decimal FP literal", 0);
- radix = 10;
-
- for (;;)
- {
- if (c == '.')
- {
- if (stage < 1)
- {
- stage = 1;
- literal_token [literal_index++ ] = c;
- c = java_get_unicode ();
- }
- else
- java_lex_error ("Invalid character in FP literal", 0);
- }
-
- if (c == 'e' || c == 'E')
- {
- if (stage < 2)
- {
- /* {E,e} must have seen at least a digit. */
- if (!seen_digit)
- java_lex_error
- ("Invalid FP literal, mantissa must have digit", 0);
- seen_digit = 0;
- seen_exponent = 1;
- stage = 2;
- literal_token [literal_index++] = c;
- c = java_get_unicode ();
- }
- else
- java_lex_error ("Invalid character in FP literal", 0);
- }
- if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
- {
- fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
- stage = 4; /* So we fall through. */
- }
-
- if ((c=='-' || c =='+') && stage == 2)
- {
- stage = 3;
- literal_token [literal_index++] = c;
- c = java_get_unicode ();
- }
-
- if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
- (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
- (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
- (stage == 3 && JAVA_ASCII_DIGIT (c)))
- {
- if (JAVA_ASCII_DIGIT (c))
- seen_digit = 1;
- if (stage == 2)
- stage = 3;
- literal_token [literal_index++ ] = c;
- c = java_get_unicode ();
- }
- else
- {
-#ifndef JC1_LITE
- struct jpa_args a;
-#endif
- if (stage != 4) /* Don't push back fF/dD. */
- java_unget_unicode ();
-
- /* An exponent (if any) must have seen a digit. */
- if (seen_exponent && !seen_digit)
- java_lex_error
- ("Invalid FP literal, exponent must have digit", 0);
-
- literal_token [literal_index] = '\0';
- JAVA_LEX_LIT (literal_token, radix);
-
-#ifndef JC1_LITE
- a.literal_token = literal_token;
- a.fflag = fflag;
- a.java_lval = java_lval;
- a.number_beginning = number_beginning;
- if (do_float_handler (java_perform_atof, (PTR) &a))
- return FP_LIT_TK;
-
- JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
-#else
- return FP_LIT_TK;
-#endif
- }
- }
- } /* JAVA_ASCII_FPCHAR (c) */
-
- /* Here we get back to converting the integral literal. */
- if (radix == 16 && ! found_hex_digits)
- java_lex_error
- ("0x must be followed by at least one hexadecimal digit", 0);
- else if (radix == 8 && found_non_octal_digits)
- java_lex_error ("Octal literal contains digit out of range", 0);
- else if (c == 'L' || c == 'l')
- long_suffix = 1;
- else
- java_unget_unicode ();
-
-#ifdef JAVA_LEX_DEBUG
- literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
- JAVA_LEX_LIT (literal_token, radix);
-#endif
- /* This section of code is borrowed from gcc/c-lex.c. */
- if (!overflow)
- {
- bytes = GET_TYPE_PRECISION (long_type_node);
- for (i = bytes; i < TOTAL_PARTS; i++)
- if (parts [i])
- {
- overflow = 1;
- break;
- }
- }
- high = low = 0;
- for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
- {
- high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
- / HOST_BITS_PER_CHAR)]
- << (i * HOST_BITS_PER_CHAR));
- low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
- }
- /* End borrowed section. */
-
- /* Range checking. */
- if (long_suffix)
- {
- /* 9223372036854775808L is valid if operand of a '-'. Otherwise
- 9223372036854775807L is the biggest `long' literal that can be
- expressed using a 10 radix. For other radices, everything that
- fits withing 64 bits is OK. */
- int hb = (high >> 31);
- if (overflow || (hb && low && radix == 10)
- || (hb && high & 0x7fffffff && radix == 10))
- JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
- }
- else
- {
- /* 2147483648 is valid if operand of a '-'. Otherwise,
- 2147483647 is the biggest `int' literal that can be
- expressed using a 10 radix. For other radices, everything
- that fits within 32 bits is OK. As all literals are
- signed, we sign extend here. */
- int hb = (low >> 31) & 0x1;
- if (overflow || high || (hb && low & 0x7fffffff && radix == 10))
- JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
- high = -hb;
- }
-#ifndef JC1_LITE
- value = build_int_2 (low, high);
- JAVA_RADIX10_FLAG (value) = radix == 10;
- SET_LVAL_NODE_TYPE (value, long_suffix ? long_type_node : int_type_node);
-#else
- SET_LVAL_NODE_TYPE (build_int_2 (low, high),
- long_suffix ? long_type_node : int_type_node);
-#endif
- return INT_LIT_TK;
- }
-
- /* Character literals. */
- if (c == '\'')
- {
- int char_lit;
- if ((c = java_get_unicode ()) == '\\')
- char_lit = java_parse_escape_sequence ();
- else
- {
- if (c == '\n' || c == '\'')
- java_lex_error ("Invalid character literal", 0);
- char_lit = c;
- }
-
- c = java_get_unicode ();
-
- if ((c == '\n') || (c == UEOF))
- java_lex_error ("Character literal not terminated at end of line", 0);
- if (c != '\'')
- java_lex_error ("Syntax error in character literal", 0);
-
- if (char_lit == JAVA_CHAR_ERROR)
- char_lit = 0; /* We silently convert it to zero. */
-
- JAVA_LEX_CHAR_LIT (char_lit);
- SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
- return CHAR_LIT_TK;
- }
-
- /* String literals. */
- if (c == '"')
- {
- int no_error;
- char *string;
-
- for (no_error = 1, c = java_get_unicode ();
- c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
- {
- if (c == '\\')
- c = java_parse_escape_sequence ();
- if (c == JAVA_CHAR_ERROR)
- {
- no_error = 0;
- c = 0; /* We silently convert it to zero. */
- }
- java_unicode_2_utf8 (c);
- }
- if (c == '\n' || c == UEOF) /* ULT. */
- {
- lineno--; /* Refer to the line where the terminator was seen. */
- java_lex_error ("String not terminated at end of line", 0);
- lineno++;
- }
-
- obstack_1grow (&temporary_obstack, '\0');
- string = obstack_finish (&temporary_obstack);
-#ifndef JC1_LITE
- if (!no_error || (c != '"'))
- java_lval->node = error_mark_node; /* FIXME: Requires futher
- testing. */
- else
- java_lval->node = build_string (strlen (string), string);
-#endif
- obstack_free (&temporary_obstack, string);
- return STRING_LIT_TK;
- }
-
- /* Separator. */
- switch (c)
- {
- case '(':
- JAVA_LEX_SEP (c);
- BUILD_OPERATOR (OP_TK);
- case ')':
- JAVA_LEX_SEP (c);
- return CP_TK;
- case '{':
- JAVA_LEX_SEP (c);
- if (ctxp->ccb_indent == 1)
- ctxp->first_ccb_indent1 = lineno;
- ctxp->ccb_indent++;
- BUILD_OPERATOR (OCB_TK);
- case '}':
- JAVA_LEX_SEP (c);
- ctxp->ccb_indent--;
- if (ctxp->ccb_indent == 1)
- ctxp->last_ccb_indent1 = lineno;
- BUILD_OPERATOR (CCB_TK);
- case '[':
- JAVA_LEX_SEP (c);
- BUILD_OPERATOR (OSB_TK);
- case ']':
- JAVA_LEX_SEP (c);
- return CSB_TK;
- case ';':
- JAVA_LEX_SEP (c);
- return SC_TK;
- case ',':
- JAVA_LEX_SEP (c);
- return C_TK;
- case '.':
- JAVA_LEX_SEP (c);
- BUILD_OPERATOR (DOT_TK);
- /* return DOT_TK; */
- }
-
- /* Operators. */
- switch (c)
- {
- case '=':
- if ((c = java_get_unicode ()) == '=')
- {
- BUILD_OPERATOR (EQ_TK);
- }
- else
- {
- /* Equals is used in two different locations. In the
- variable_declarator: rule, it has to be seen as '=' as opposed
- to being seen as an ordinary assignment operator in
- assignment_operators: rule. */
- java_unget_unicode ();
- BUILD_OPERATOR (ASSIGN_TK);
- }
-
- case '>':
- switch ((c = java_get_unicode ()))
- {
- case '=':
- BUILD_OPERATOR (GTE_TK);
- case '>':
- switch ((c = java_get_unicode ()))
- {
- case '>':
- if ((c = java_get_unicode ()) == '=')
- {
- BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
- }
- else
- {
- java_unget_unicode ();
- BUILD_OPERATOR (ZRS_TK);
- }
- case '=':
- BUILD_OPERATOR2 (SRS_ASSIGN_TK);
- default:
- java_unget_unicode ();
- BUILD_OPERATOR (SRS_TK);
- }
- default:
- java_unget_unicode ();
- BUILD_OPERATOR (GT_TK);
- }
-
- case '<':
- switch ((c = java_get_unicode ()))
- {
- case '=':
- BUILD_OPERATOR (LTE_TK);
- case '<':
- if ((c = java_get_unicode ()) == '=')
- {
- BUILD_OPERATOR2 (LS_ASSIGN_TK);
- }
- else
- {
- java_unget_unicode ();
- BUILD_OPERATOR (LS_TK);
- }
- default:
- java_unget_unicode ();
- BUILD_OPERATOR (LT_TK);
- }
-
- case '&':
- switch ((c = java_get_unicode ()))
- {
- case '&':
- BUILD_OPERATOR (BOOL_AND_TK);
- case '=':
- BUILD_OPERATOR2 (AND_ASSIGN_TK);
- default:
- java_unget_unicode ();
- BUILD_OPERATOR (AND_TK);
- }
-
- case '|':
- switch ((c = java_get_unicode ()))
- {
- case '|':
- BUILD_OPERATOR (BOOL_OR_TK);
- case '=':
- BUILD_OPERATOR2 (OR_ASSIGN_TK);
- default:
- java_unget_unicode ();
- BUILD_OPERATOR (OR_TK);
- }
-
- case '+':
- switch ((c = java_get_unicode ()))
- {
- case '+':
- BUILD_OPERATOR (INCR_TK);
- case '=':
- BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
- default:
- java_unget_unicode ();
- BUILD_OPERATOR (PLUS_TK);
- }
-
- case '-':
- switch ((c = java_get_unicode ()))
- {
- case '-':
- BUILD_OPERATOR (DECR_TK);
- case '=':
- BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
- default:
- java_unget_unicode ();
- BUILD_OPERATOR (MINUS_TK);
- }
-
- case '*':
- if ((c = java_get_unicode ()) == '=')
- {
- BUILD_OPERATOR2 (MULT_ASSIGN_TK);
- }
- else
- {
- java_unget_unicode ();
- BUILD_OPERATOR (MULT_TK);
- }
-
- case '/':
- if ((c = java_get_unicode ()) == '=')
- {
- BUILD_OPERATOR2 (DIV_ASSIGN_TK);
- }
- else
- {
- java_unget_unicode ();
- BUILD_OPERATOR (DIV_TK);
- }
-
- case '^':
- if ((c = java_get_unicode ()) == '=')
- {
- BUILD_OPERATOR2 (XOR_ASSIGN_TK);
- }
- else
- {
- java_unget_unicode ();
- BUILD_OPERATOR (XOR_TK);
- }
-
- case '%':
- if ((c = java_get_unicode ()) == '=')
- {
- BUILD_OPERATOR2 (REM_ASSIGN_TK);
- }
- else
- {
- java_unget_unicode ();
- BUILD_OPERATOR (REM_TK);
- }
-
- case '!':
- if ((c = java_get_unicode()) == '=')
- {
- BUILD_OPERATOR (NEQ_TK);
- }
- else
- {
- java_unget_unicode ();
- BUILD_OPERATOR (NEG_TK);
- }
-
- case '?':
- JAVA_LEX_OP ("?");
- BUILD_OPERATOR (REL_QM_TK);
- case ':':
- JAVA_LEX_OP (":");
- BUILD_OPERATOR (REL_CL_TK);
- case '~':
- BUILD_OPERATOR (NOT_TK);
- }
-
- /* Keyword, boolean literal or null literal. */
- for (first_unicode = c, all_ascii = 1, ascii_index = 0;
- JAVA_PART_CHAR_P (c); c = java_get_unicode ())
- {
- java_unicode_2_utf8 (c);
- if (all_ascii && c >= 128)
- all_ascii = 0;
- ascii_index++;
- }
-
- obstack_1grow (&temporary_obstack, '\0');
- string = obstack_finish (&temporary_obstack);
- java_unget_unicode ();
-
- /* If we have something all ascii, we consider a keyword, a boolean
- literal, a null literal or an all ASCII identifier. Otherwise,
- this is an identifier (possibly not respecting formation rule). */
- if (all_ascii)
- {
- const struct java_keyword *kw;
- if ((kw=java_keyword (string, ascii_index)))
- {
- JAVA_LEX_KW (string);
- switch (kw->token)
- {
- case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
- case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
- case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
- case PRIVATE_TK: case STRICT_TK:
- SET_MODIFIER_CTX (kw->token);
- return MODIFIER_TK;
- case FLOAT_TK:
- SET_LVAL_NODE (float_type_node);
- return FP_TK;
- case DOUBLE_TK:
- SET_LVAL_NODE (double_type_node);
- return FP_TK;
- case BOOLEAN_TK:
- SET_LVAL_NODE (boolean_type_node);
- return BOOLEAN_TK;
- case BYTE_TK:
- SET_LVAL_NODE (byte_type_node);
- return INTEGRAL_TK;
- case SHORT_TK:
- SET_LVAL_NODE (short_type_node);
- return INTEGRAL_TK;
- case INT_TK:
- SET_LVAL_NODE (int_type_node);
- return INTEGRAL_TK;
- case LONG_TK:
- SET_LVAL_NODE (long_type_node);
- return INTEGRAL_TK;
- case CHAR_TK:
- SET_LVAL_NODE (char_type_node);
- return INTEGRAL_TK;
-
- /* Keyword based literals. */
- case TRUE_TK:
- case FALSE_TK:
- SET_LVAL_NODE ((kw->token == TRUE_TK ?
- boolean_true_node : boolean_false_node));
- return BOOL_LIT_TK;
- case NULL_TK:
- SET_LVAL_NODE (null_pointer_node);
- return NULL_TK;
-
- /* Some keyword we want to retain information on the location
- they where found. */
- case CASE_TK:
- case DEFAULT_TK:
- case SUPER_TK:
- case THIS_TK:
- case RETURN_TK:
- case BREAK_TK:
- case CONTINUE_TK:
- case TRY_TK:
- case CATCH_TK:
- case THROW_TK:
- case INSTANCEOF_TK:
- BUILD_OPERATOR (kw->token);
-
- default:
- return kw->token;
- }
- }
- }
-
- /* We may have an ID here. */
- if (JAVA_START_CHAR_P (first_unicode))
- {
- JAVA_LEX_ID (string);
- java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
- return ID_TK;
- }
-
- /* Everything else is an invalid character in the input. */
- {
- char lex_error_buffer [128];
- sprintf (lex_error_buffer, "Invalid character `%s' in input",
- java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
- java_lex_error (lex_error_buffer, 1);
- }
- return 0;
-}
-
-#ifndef JC1_LITE
-/* This is called by the parser to see if an error should be generated
- due to numeric overflow. This function only handles the particular
- case of the largest negative value, and is only called in the case
- where this value is not preceded by `-'. */
-static void
-error_if_numeric_overflow (value)
- tree value;
-{
- if (TREE_CODE (value) == INTEGER_CST && JAVA_RADIX10_FLAG (value))
- {
- unsigned HOST_WIDE_INT lo, hi;
-
- lo = TREE_INT_CST_LOW (value);
- hi = TREE_INT_CST_HIGH (value);
- if (TREE_TYPE (value) == long_type_node)
- {
- int hb = (hi >> 31);
- if (hb && !(hi & 0x7fffffff))
- java_lex_error ("Numeric overflow for `long' literal", 0);
- }
- else
- {
- int hb = (lo >> 31) & 0x1;
- if (hb && !(lo & 0x7fffffff))
- java_lex_error ("Numeric overflow for `int' literal", 0);
- }
- }
-}
-#endif /* JC1_LITE */
-
-static void
-java_unicode_2_utf8 (unicode)
- unicode_t unicode;
-{
- if (RANGE (unicode, 0x01, 0x7f))
- obstack_1grow (&temporary_obstack, (char)unicode);
- else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
- {
- obstack_1grow (&temporary_obstack,
- (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
- obstack_1grow (&temporary_obstack,
- (unsigned char)(0x80 | (unicode & 0x3f)));
- }
- else /* Range 0x800-0xffff. */
- {
- obstack_1grow (&temporary_obstack,
- (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
- obstack_1grow (&temporary_obstack,
- (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
- obstack_1grow (&temporary_obstack,
- (unsigned char)(0x80 | (unicode & 0x003f)));
- }
-}
-
-#ifndef JC1_LITE
-static tree
-build_wfl_node (node)
- tree node;
-{
- node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
- /* Prevent java_complete_lhs from short-circuiting node (if constant). */
- TREE_TYPE (node) = NULL_TREE;
- return node;
-}
-#endif
-
-static void
-java_lex_error (msg, forward)
- const char *msg ATTRIBUTE_UNUSED;
- int forward ATTRIBUTE_UNUSED;
-{
-#ifndef JC1_LITE
- ctxp->elc.line = ctxp->c_line->lineno;
- ctxp->elc.col = ctxp->c_line->char_col-1+forward;
-
- /* Might be caught in the middle of some error report. */
- ctxp->java_error_flag = 0;
- java_error (NULL);
- java_error (msg);
-#endif
-}
-
-#ifndef JC1_LITE
-static int
-java_is_eol (fp, c)
- FILE *fp;
- int c;
-{
- int next;
- switch (c)
- {
- case '\r':
- next = getc (fp);
- if (next != '\n' && next != EOF)
- ungetc (next, fp);
- return 1;
- case '\n':
- return 1;
- default:
- return 0;
- }
-}
-#endif
-
-char *
-java_get_line_col (filename, line, col)
- const char *filename ATTRIBUTE_UNUSED;
- int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
-{
-#ifdef JC1_LITE
- return 0;
-#else
- /* Dumb implementation. Doesn't try to cache or optimize things. */
- /* First line of the file is line 1, first column is 1. */
-
- /* COL == -1 means, at the CR/LF in LINE. */
- /* COL == -2 means, at the first non space char in LINE. */
-
- FILE *fp;
- int c, ccol, cline = 1;
- int current_line_col = 0;
- int first_non_space = 0;
- char *base;
-
- if (!(fp = fopen (filename, "r")))
- fatal_io_error ("can't open %s", filename);
-
- while (cline != line)
- {
- c = getc (fp);
- if (c == EOF)
- {
- static const char msg[] = "<<file too short - unexpected EOF>>";
- obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
- goto have_line;
- }
- if (java_is_eol (fp, c))
- cline++;
- }
-
- /* Gather the chars of the current line in a buffer. */
- for (;;)
- {
- c = getc (fp);
- if (c < 0 || java_is_eol (fp, c))
- break;
- if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
- first_non_space = current_line_col;
- obstack_1grow (&temporary_obstack, c);
- current_line_col++;
- }
- have_line:
-
- obstack_1grow (&temporary_obstack, '\n');
-
- if (col == -1)
- {
- col = current_line_col;
- first_non_space = 0;
- }
- else if (col == -2)
- col = first_non_space;
- else
- first_non_space = 0;
-
- /* Place the '^' a the right position. */
- base = obstack_base (&temporary_obstack);
- for (ccol = 1; ccol <= col+3; ccol++)
- {
- /* Compute \t when reaching first_non_space. */
- char c = (first_non_space ?
- (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
- obstack_1grow (&temporary_obstack, c);
- }
- obstack_grow0 (&temporary_obstack, "^", 1);
-
- fclose (fp);
- return obstack_finish (&temporary_obstack);
-#endif
-}
-
-#ifndef JC1_LITE
-static int
-utf8_cmp (str, length, name)
- const unsigned char *str;
- int length;
- const char *name;
-{
- const unsigned char *limit = str + length;
- int i;
-
- for (i = 0; name[i]; ++i)
- {
- int ch = UTF8_GET (str, limit);
- if (ch != name[i])
- return ch - name[i];
- }
-
- return str == limit ? 0 : 1;
-}
-
-/* A sorted list of all C++ keywords. */
-
-static const char *const cxx_keywords[] =
-{
- "_Complex",
- "__alignof",
- "__alignof__",
- "__asm",
- "__asm__",
- "__attribute",
- "__attribute__",
- "__builtin_va_arg",
- "__complex",
- "__complex__",
- "__const",
- "__const__",
- "__extension__",
- "__imag",
- "__imag__",
- "__inline",
- "__inline__",
- "__label__",
- "__null",
- "__real",
- "__real__",
- "__restrict",
- "__restrict__",
- "__signed",
- "__signed__",
- "__typeof",
- "__typeof__",
- "__volatile",
- "__volatile__",
- "and",
- "and_eq",
- "asm",
- "auto",
- "bitand",
- "bitor",
- "bool",
- "break",
- "case",
- "catch",
- "char",
- "class",
- "compl",
- "const",
- "const_cast",
- "continue",
- "default",
- "delete",
- "do",
- "double",
- "dynamic_cast",
- "else",
- "enum",
- "explicit",
- "export",
- "extern",
- "false",
- "float",
- "for",
- "friend",
- "goto",
- "if",
- "inline",
- "int",
- "long",
- "mutable",
- "namespace",
- "new",
- "not",
- "not_eq",
- "operator",
- "or",
- "or_eq",
- "private",
- "protected",
- "public",
- "register",
- "reinterpret_cast",
- "return",
- "short",
- "signed",
- "sizeof",
- "static",
- "static_cast",
- "struct",
- "switch",
- "template",
- "this",
- "throw",
- "true",
- "try",
- "typedef",
- "typeid",
- "typename",
- "typeof",
- "union",
- "unsigned",
- "using",
- "virtual",
- "void",
- "volatile",
- "wchar_t",
- "while",
- "xor",
- "xor_eq"
-};
-
-/* Return true if NAME is a C++ keyword. */
-
-int
-cxx_keyword_p (name, length)
- const char *name;
- int length;
-{
- int last = ARRAY_SIZE (cxx_keywords);
- int first = 0;
- int mid = (last + first) / 2;
- int old = -1;
-
- for (mid = (last + first) / 2;
- mid != old;
- old = mid, mid = (last + first) / 2)
- {
- int kwl = strlen (cxx_keywords[mid]);
- int min_length = kwl > length ? length : kwl;
- int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
-
- if (r == 0)
- {
- int i;
- /* We've found a match if all the remaining characters are `$'. */
- for (i = min_length; i < length && name[i] == '$'; ++i)
- ;
- if (i == length)
- return 1;
- r = 1;
- }
-
- if (r < 0)
- last = mid;
- else
- first = mid;
- }
- return 0;
-}
-#endif /* JC1_LITE */