(* * * Copyright (c) 2001-2002, * George C. Necula * Scott McPeak * Wes Weimer * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. The names of the contributors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * *) (* A simple lexical analyzer for constructing CIL based on format strings *) { open Formatparse exception Eof exception InternalError of string module H = Hashtbl module E = Errormsg (* ** Keyword hashtable *) let keywords = H.create 211 (* ** Useful primitives *) let scan_ident id = try H.find keywords id with Not_found -> IDENT id (* default to variable name *) (* ** Buffer processor *) let init ~(prog: string) : Lexing.lexbuf = H.clear keywords; Lexerhack.currentPattern := prog; List.iter (fun (key, token) -> H.add keywords key token) [ ("const", CONST); ("__const", CONST); ("__const__", CONST); ("static", STATIC); ("extern", EXTERN); ("long", LONG); ("short", SHORT); ("signed", SIGNED); ("unsigned", UNSIGNED); ("volatile", VOLATILE); ("char", CHAR); ("int", INT); ("float", FLOAT); ("double", DOUBLE); ("void", VOID); ("enum", ENUM); ("struct", STRUCT); ("typedef", TYPEDEF); ("union", UNION); ("break", BREAK); ("continue", CONTINUE); ("goto", GOTO); ("return", RETURN); ("switch", SWITCH); ("case", CASE); ("default", DEFAULT); ("while", WHILE); ("do", DO); ("for", FOR); ("if", IF); ("else", ELSE); ("__attribute__", ATTRIBUTE); ("__attribute", ATTRIBUTE); ("__int64", INT64); ("__builtin_va_arg", BUILTIN_VA_ARG); ]; E.startParsingFromString prog let finish () = E.finishParsing () (*** Error handling ***) let error msg = E.parse_error msg (*** escape character management ***) let scan_escape str = match str with "n" -> "\n" | "r" -> "\r" | "t" -> "\t" | "b" -> "\b" | "f" -> "\012" (* ASCII code 12 *) | "v" -> "\011" (* ASCII code 11 *) | "a" -> "\007" (* ASCII code 7 *) | "e" -> "\027" (* ASCII code 27. This is a GCC extension *) | _ -> str let get_value chr = match chr with '0'..'9' -> (Char.code chr) - (Char.code '0') | 'a'..'z' -> (Char.code chr) - (Char.code 'a') + 10 | 'A'..'Z' -> (Char.code chr) - (Char.code 'A') + 10 | _ -> 0 let scan_hex_escape str = String.make 1 (Char.chr ( (get_value (String.get str 0)) * 16 + (get_value (String.get str 1)) )) let scan_oct_escape str = (* weimer: wide-character constants like L'\400' may be bigger than * 256 (in fact, may be up to 511), so Char.chr cannot be used directly *) let the_value = (get_value (String.get str 0)) * 64 + (get_value (String.get str 1)) * 8 + (get_value (String.get str 2)) in if the_value < 256 then String.make 1 (Char.chr the_value ) else (String.make 1 (Char.chr (the_value / 256))) ^ (String.make 1 (Char.chr (the_value mod 256))) (* ISO standard locale-specific function to convert a wide character * into a sequence of normal characters. Here we work on strings. * We convert L"Hi" to "H\000i\000" *) let wbtowc wstr = let len = String.length wstr in let dest = String.make (len * 2) '\000' in for i = 0 to len-1 do dest.[i*2] <- wstr.[i] ; done ; dest (* This function converst the "Hi" in L"Hi" to { L'H', L'i', L'\0' } *) let wstr_to_warray wstr = let len = String.length wstr in let res = ref "{ " in for i = 0 to len-1 do res := !res ^ (Printf.sprintf "L'%c', " wstr.[i]) done ; res := !res ^ "}" ; !res let getArgName (l: Lexing.lexbuf) (prefixlen: int) = let lexeme = Lexing.lexeme l in let ll = String.length lexeme in if ll > prefixlen then String.sub lexeme (prefixlen + 1) (ll - prefixlen - 1) else "" } let decdigit = ['0'-'9'] let octdigit = ['0'-'7'] let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F'] let letter = ['a'- 'z' 'A'-'Z'] let floatsuffix = ['f' 'F' 'l' 'L'] let usuffix = ['u' 'U'] let lsuffix = "l"|"L"|"ll"|"LL" let intsuffix = lsuffix | usuffix | usuffix lsuffix | lsuffix usuffix let intnum = decdigit+ intsuffix? let octnum = '0' octdigit+ intsuffix? let hexnum = '0' ['x' 'X'] hexdigit+ intsuffix? let exponent = ['e' 'E']['+' '-']? decdigit+ let fraction = '.' decdigit+ let floatraw = (intnum? fraction) |(intnum exponent) |(intnum? fraction exponent) |(intnum '.') |(intnum '.' exponent) let floatnum = floatraw floatsuffix? let ident = (letter|'_')(letter|decdigit|'_')* let attribident = (letter|'_')(letter|decdigit|'_'|':') let blank = [' ' '\t' '\012' '\r'] let escape = '\\' _ let hex_escape = '\\' ['x' 'X'] hexdigit hexdigit let oct_escape = '\\' octdigit octdigit octdigit (* The arguments are of the form %l:foo *) let argname = ':' ident rule initial = parse blank { initial lexbuf} | "/*" { let _ = comment lexbuf in initial lexbuf} | "//" { endline lexbuf } | "\n" { E.newline (); initial lexbuf} | floatnum {CST_FLOAT (Lexing.lexeme lexbuf)} | hexnum {CST_INT (Lexing.lexeme lexbuf)} | octnum {CST_INT (Lexing.lexeme lexbuf)} | intnum {CST_INT (Lexing.lexeme lexbuf)} | "<<=" {INF_INF_EQ} | ">>=" {SUP_SUP_EQ} | "*=" {STAR_EQ} | "/=" {SLASH_EQ} | "&=" {AND_EQ} | "|=" {PIPE_EQ} | "^=" {CIRC_EQ} | "%=" {PERCENT_EQ} | "..." {ELLIPSIS} | "-=" {MINUS_EQ} | "+=" {PLUS_EQ} | "*=" {STAR_EQ} | "<<" {INF_INF} | ">>" {SUP_SUP} | "==" {EQ_EQ} | "!=" {EXCLAM_EQ} | "<=" {INF_EQ} | ">=" {SUP_EQ} | "=" {EQ} | "<" {INF} | ">" {SUP} | "++" {PLUS_PLUS} | "--" {MINUS_MINUS} | "->" {ARROW} | '+' {PLUS} | '-' {MINUS} | '*' {STAR} | '/' {SLASH} | '!' {EXCLAM} | '&' {AND} | '|' {PIPE} | '^' {CIRC} | '~' {TILDE} | '[' {LBRACKET} | ']' {RBRACKET} | '{' {LBRACE} | '}' {RBRACE} | '(' {LPAREN} | ')' {RPAREN} | ';' {SEMICOLON} | ',' {COMMA} | '.' {DOT} | ':' {COLON} | '?' {QUEST} | "sizeof" {SIZEOF} | "%eo" argname {ARG_eo (getArgName lexbuf 3) } | "%e" argname {ARG_e (getArgName lexbuf 2) } | "%E" argname {ARG_E (getArgName lexbuf 2) } | "%u" argname {ARG_u (getArgName lexbuf 2) } | "%b" argname {ARG_b (getArgName lexbuf 2) } | "%t" argname {ARG_t (getArgName lexbuf 2) } | "%d" argname {ARG_d (getArgName lexbuf 2) } | "%lo" argname {ARG_lo (getArgName lexbuf 3) } | "%l" argname {ARG_l (getArgName lexbuf 2) } | "%i" argname {ARG_i (getArgName lexbuf 2) } | "%I" argname {ARG_I (getArgName lexbuf 2) } | "%o" argname {ARG_o (getArgName lexbuf 2) } | "%va" argname {ARG_va (getArgName lexbuf 3) } | "%v" argname {ARG_v (getArgName lexbuf 2) } | "%k" argname {ARG_k (getArgName lexbuf 2) } | "%f" argname {ARG_f (getArgName lexbuf 2) } | "%F" argname {ARG_F (getArgName lexbuf 2) } | "%p" argname {ARG_p (getArgName lexbuf 2) } | "%P" argname {ARG_P (getArgName lexbuf 2) } | "%s" argname {ARG_s (getArgName lexbuf 2) } | "%S" argname {ARG_S (getArgName lexbuf 2) } | "%g" argname {ARG_g (getArgName lexbuf 2) } | "%A" argname {ARG_A (getArgName lexbuf 2) } | "%c" argname {ARG_c (getArgName lexbuf 2) } | '%' {PERCENT} | ident {scan_ident (Lexing.lexeme lexbuf)} | eof {EOF} | _ {E.parse_error "Formatlex: Invalid symbol" } and comment = parse "*/" { () } | '\n' { E.newline (); comment lexbuf } | _ { comment lexbuf } and endline = parse '\n' { E.newline (); initial lexbuf} | _ { endline lexbuf}