(**********************************************************************)
(* (c) Greg Morrisett, Neal Glew,                                     *)
(*     June 1998, all rights reserved.                                *)
(**********************************************************************)

(* tallex.mll
 *
 * A basic lexer for tal format MASM files.
 *
 * TODO: really should make a more efficient symbol table as the three hash
 *       table lookups with exception handlers ain't great!
 *)

{
open Tal;;
open Talparser;;

let line = ref 1;;

let process_newline () =
  incr line
;;

(* Unfortunately we need to scan some filenames and they look too much like
 * identifiers.  The only way around this is to use context information to
 * know when to look for them.  The rw_filename table contains keywords that
 * require a filename on the same line.
 *)

let rw_filename = Hashtbl.create 13;;

let _ =
  List.iter (fun (s,f) -> Hashtbl.add rw_filename s f)
    ["include",(fun s->Tinclude s); "tal_export",(fun s->Ttal_export s);
      "tal_import",(fun s->Ttal_import s)]
;;

(* MASM reserved words are not case sensitive, we use two tables for reserved
 * words: those that are case sensitive (some of the TAL keywords), and those
 * that aren't (some TAL keywords, all MASM reserved words)
 *)

let rw_cis = Hashtbl.create 157;;
let rw_cs = Hashtbl.create 157;;

let _ =
  List.iter
    (fun (s,t) -> Hashtbl.add rw_cis s t)
    ["adc",Tadc; "add",Tadd; "al",Tal;
      "alen",Talen; "and",Tand; "asub",Tasub;
      "aupd",Taupd; "bexn",Tbexn; "bl",Tbl; "bswap",Tbswap; "btagi",Tbtagi;
      "btagvar",Tbtagvar; "call",Tcall; "cbw",Tcbw; "cdq",Tcdq; "cl",Tcl;
      "clc",Tclc; "cmc",Tcmc;
      "cmova",Tcmova; "cmovae",Tcmovae; "cmovb",Tcmovb; "cmovbe",Tcmovbe;
      "cmovc",Tcmovc; "cmove",Tcmove; "cmovg",Tcmovg; "cmovge",Tcmovge;
      "cmovl",Tcmovl; "cmovle",Tcmovle; "cmovna",Tcmovna; "cmovnae",Tcmovnae;
      "cmovnb",Tcmovnb; "cmovnbe",Tcmovnbe; "cmovnc",Tcmovnc; "cmovne",Tcmovne;
      "cmovng",Tcmovng; "cmovnge",Tcmovnge; "cmovnl",Tcmovnl;
      "cmovnle",Tcmovnle; "cmovno",Tcmovno; "cmovnp",Tcmovnp; "cmovns",Tcmovns;
      "cmovnz",Tcmovnz; "cmovo",Tcmovo; "cmovp",Tcmovp; "cmovpe",Tcmovpe;
      "cmovpo",Tcmovpo; "cmovs",Tcmovs; "cmovz",Tcmovz;
      "cmp",Tcmp; "code",Tcode; "coerce",Tcoerce; "cwd",Tcwd; "cwde",Tcwde;
      "data",Tdata; "db",Tdb; "dd",Tdd;
      "dec",Tdec; "div",Tdiv; "dl",Tdl; "dw",Tdw; "dword",Tdword;
      "eax",Teax; "ebp",Tebp; "ebx",Tebx; "ecx",Tecx; "edi",Tedi; "edx",Tedx;
      "end",Tend; "entry",Tentry; "esi",Tesi; "esp",Tesp; "fallthru",Tfallthru;
      "idiv",Tidiv; "imul",Timul; "inc",Tinc; "int",Tint;
      "into",Tinto; 
      "ja",Tja; "jae",Tjae; "jb",Tjb; "jbe",Tjbe; "jc",Tjc; "je",Tje;
      "jecxz",Tjecxz; "jg",Tjg; "jge",Tjge; "jl",Tjl; "jle",Tjle; "jmp",Tjmp;
      "jna",Tjna; "jnae",Tjnae; "jnb",Tjnb; "jnbe",Tjnbe;
      "jnc",Tjnc; "jne",Tjne; "jng",Tjng; "jnge",Tjnge;
      "jnl",Tjnl; "jnle",Tjnle; "jno",Tjno; "jnp",Tjnp;
      "jns",Tjns; "jnz",Tjnz; "jo",Tjo; "jp",Tjp;
      "jpe",Tjpe; "jpo",Tjpo; "js",Tjs; "jz",Tjz;
      "label",Tlabel; "labeltype",Tlabeltype; "lahf",Tlahf; "lea",Tlea;
      "loopd",Tloopd; "looped",Tlooped; "loopned",Tloopned;
      "malloc",Tmalloc; "mov",Tmov; "movsx",Tmovsx; "movzx",Tmovzx;
      "mul",Tmul; "neg",Tneg; "nop",Tnop; "not",Tnot; "or",Tor; "pop",Tpop;
      "popad",Tpopad; "popfd",Tpopfd; "ptr",Tptr;
      "push",Tpush; "pushad",Tpushad;
      "pushfd",Tpushfd; "rcl",Trcl; "rcr",Trcr; "retn",Tretn; "rol",Trol;
      "ror",Tror; "sahf",Tsahf; "sal",Tsal; 
      "sar",Tsar; "sbb",Tsbb;
      "seta",Tseta; "setae",Tsetae; "setb",Tsetb; "setbe",Tsetbe; "setc",Tsetc;
      "sete",Tsete; "setg",Tsetg; "setge",Tsetge; "setl",Tsetl; "setle",Tsetle;
      "setna",Tsetna; "setnae",Tsetnae; "setnb",Tsetnb; "setnbe",Tsetnbe;
      "setnc",Tsetnc; "setne",Tsetne; "setng",Tsetng; "setnge",Tsetnge;
      "setnl",Tsetnl; "setnle",Tsetnle; "setno",Tsetno; "setnp",Tsetnp;
      "setns",Tsetns; "setnz",Tsetnz; "seto",Tseto; "setp",Tsetp;
      "setpe",Tsetpe; "setpo",Tsetpo; "sets",Tsets; "setz",Tsetz;
      "shl",Tshl; "shld",Tshld; "shr",Tshr; "shrd",Tshrd; "stc",Tstc;
      "sub",Tsub; "test",Ttest; "type",Ttype;
      "unpack",Tunpack; "val",Tval; "xchg",Txchg; "xor",Txor
    ]
;;

let _ =
  List.iter
    (fun (s,t) -> Hashtbl.add rw_cs s t) 
    ["?", Tquestion; "_begin_TAL",T_begin_TAL; "_end_TAL",T_end_TAL;
      "All",TAll;
      "array",Tarray; "B1",TB Byte1; "B2",TB Byte2; "B4",TB Byte4;
      "B8",TB Byte8; "BA1",TBA Byte1;
      "BA2",TBA Byte2; "BA4",TBA Byte4; "BA8",TBA Byte8;
      "BV1",TBV Byte1; "BV2",TBV Byte2; "BV4",TBV Byte4;
      "BV8",TBV Byte8; "Exist",TExist;
      "exn",Texn; "exnname",Texnname; "fn",Tfn;
      "junk",Tjunk;
      "of",Tof; "pack",Tpack;
      "R",TR; "real",Treal; "rec",Trec; "RL",TRL; "roll",Troll;
      "S",TS; "se",Tse; "slot",Tslot; "sptr",Tsptr; "sum",Tsum;
      "rollsum",Trollsum; "T",TT; "T4",TT4; "tapp",Ttapp;
      "Ts",TTs; "unroll",Tunroll; "vector",Tvector
    ] 
;;

let process_identifier s filename lexbuf =
  let lower = String.lowercase s in
  try Hashtbl.find rw_cis lower
  with Not_found ->
    try Hashtbl.find rw_cs s
    with Not_found ->
      try
	let t = (Hashtbl.find rw_filename lower) in
	t (filename lexbuf)
      with Not_found ->
      	Tident s
;;

let process_number s =
  let l = String.length s in
  if l=0 then invalid_arg "Tallex.process_number";
  match s.[l-1] with
    'b' | 'y' | 'o' | 'q' | 'h' ->
      failwith "Tallex.process_number - nondecimal unimplemented"
  | 'd' | 't' -> Tnumber (int_of_string (String.sub s 0 (l-1)))
  | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ->
      Tnumber (int_of_string s)
  | _ -> invalid_arg "Tallex.process_number"
;;

let process_string s =
  Tstring (String.sub s 1 (String.length s - 2))
;;

let process_cap1 s =
  let cap = if s.[1]='r' then Tal.Read else Tal.ReadWrite in
  let init =
    if (String.length s)>2 & s.[2]='u' then Tal.Uninit else Tal.Init in
  Tfieldmod (cap,init)
;;

let process_cap2 s =
  let cap =
    if (String.length s)>2 & s.[2]='r' then Tal.Read else Tal.ReadWrite in
  let init = if s.[1]='u' then Tal.Uninit else Tal.Init in
  Tfieldmod (cap,init)
;;

let reset_lexer () = line:=1;;
} 

rule main = parse
(* Whitespace, Comments *)
  [' ' '\009' '\011' '\012']+ {main lexbuf}
| '\010' | '\013' | "\010\013" | "\013\010"
    {process_newline (); Teol}
| ';' {comment lexbuf; Teol}
(* Identifiers *)
| ['a'-'z' 'A'-'Z' '_' '$' '?']['a'-'z' 'A'-'Z' '_' '$' '?' '0'-'9']*
    {process_identifier (Lexing.lexeme lexbuf) filename lexbuf}
(* Literals *)
| '-'? ['0'-'9']+['d' 't']?
| ['0'-'9']+['b' 'y' 'o' 'q']
| ['0'-'9']['0'-'9' 'a'-'f' 'A'-'F']*'h'
    {process_number (Lexing.lexeme lexbuf)}
| '\'' [' '-'&' '\040'-'~']* '\''
| '\034' ['\032' '\033' '\035'-'~']* '\034'
    {process_string (Lexing.lexeme lexbuf)}
(* Punctuation *)
| '<' {Tlab}
| '>' {Trab}            | '@' {Tat}
| '(' {Tlb}             | ',' {Tcomma}
| ')' {Trb}		| '.' {Tdot}  
| '[' {Tlsb}		| '*' {Tstar} 
| ']' {Trsb}		| '+' {Tplus} 
| '{' {Tlcb}		| ':' {Tcolon}
| '}' {Trcb}		| '=' {Tequal}
| '|' {Tbar}            | '`' {Tbackquote}
(* Special *)
| "-!>" {Tarrow}
| "::" {Tcons}
| '^' ['r' 'w'] ['i' 'u']? {process_cap1 (Lexing.lexeme lexbuf)}
| '^' ['i' 'u'] ['r' 'w']? {process_cap2 (Lexing.lexeme lexbuf)}
| '^' {failwith "Illegal field modifier"}
| eof {Teof}
(* Ununsed *)
| ['~' '\\' '/' '-' '&' '%' '#' '!' '\'']
    {failwith "Current unused character"}
| ['\000'-'\008' '\014'-'\031' '\127'-'\255'] {failwith "Illegal Character"}
| _ {failwith ("should not be here! "^
	       (Char.escaped (Lexing.lexeme_char lexbuf 0)))}
(* Comments *)
and comment = parse
  '\010' | '\013' | "\010\013" | "\013\010" {process_newline ()}
| [^ '\010' '\013' ]+ {comment lexbuf}

(* Filenames *)
and filename = parse
  [' ' '\009' '\011' '\012']+ {filename lexbuf}
| ['\033'-'\126']+ {Lexing.lexeme lexbuf}
| _ {failwith "Illegal Character in filename"}

(* EOF: tallex.mll *)
