(**********************************************************************)
(* (c) Greg Morrisett, Neal Glew, David Walker,                       *)
(*     June 1998, all rights reserved.                                *)
(**********************************************************************)

(* x86tal.mli
 * 
 * This is a fairly complete specification of the abstract syntax for
 * a typed version of 32-bit (flat-model) iNTEL 80386, Pentium, Pentium Pro,
 * and/or Pentium II assembly language.  
 *
 * TODO:  1. floating point
 *
 *)

(**********************************************************************)
(* Miscellanous stuff *)

open Utilities;;
open Numtypes;;
open Identifier;;

type scale = Byte1 | Byte2 | Byte4 | Byte8
let scale_to_int32 s =
   match s with
     Byte1 -> i32_1
   | Byte2 -> i32_2
   | Byte4 -> i32_4
   | Byte8 -> i32_8
;;

type reg = Eax | Ebx | Ecx | Edx | Esi | Edi | Ebp | Esp | Virt of identifier
let compare_regs r1 r2 =
  match r1 with
    Virt i1 ->
      (match r2 with
	Virt i2 ->
	  id_compare i1 i2
      |	_ -> 1)
  | _ ->
      (match r2 with
	Virt _ -> -1
      |	_ -> compare r1 r2)
;;

(* For part word stuff: e = 32bit, x = 16bit, h = "high" 8bit, l = low 8bit *)
type reg_part = RPe | RPx | RPh | RPl;;

(**********************************************************************)
(* Kinds *)

(* Kbyte <= Ktype, Kmemi <= Kmem *)
type kind = 
    Kbyte of scale	  (* describes types of 8/16/32/64 bit values *)
  | Ktype   		  (* describes types of all values *)
  | Kmemi of int32        (* types for memory of size i *)
  | Kmem                  (* types for memory or heap blocks *)
  | Kstack  		  (* describes types of the stack & pointers into it *)
  | Kint                  (* integer "sort" for array indices *)
  | Karrow of kind * kind (* functions from constructors to constructors *)
  | Kprod of kind list    (* tuples of constructors *)
;;

let k4byte = Kbyte Byte4;; (* 32 bit values *)

(**********************************************************************)
(* Type Constructors *)

(* primitive constructors *)
type primcon = 
    PCbytes of scale      (* : Kbyte s *)
  | PCjunk of int32       (* : Uninitialized junk on the stack or in memory *)
  | PCexn                 (* : K4byte *)
  | PCexnname             (* : Kmem -> K4byte *)
  | PCint of int32

(* fields in tuples & arrays have variances:
     readonly, writeonly, readwrite, or uninitialised. *)
type variance = Read | Write | ReadWrite | Uninit;;

type rcon = 
(* the language portion of con's *)
    Cvar of identifier
  | Clam of identifier * kind * con
  | Capp of con * con
  | Ctuple of con list
  | Cproj of int * con
(* the "type" portion of con's *)
  | Clab of identifier
  | Cprim of primcon  (* see above *)
  | Crec of (identifier * kind * con) list
  | Cforall of identifier * kind * con
  | Cexist of identifier * kind * con
  | Ccode of register_state
  | Chptr of int32 list*con option
  | Cfield of con*variance
  | Cprod of con list
  | Csum of con list
  | Carray of con*con
  | Csing of con
(* the "stack" portion of con's *)
  | Csptr of con
  | Cempty
  | Ccons of con * con
  | Cappend of con * con
and con = 
  { mutable rcon     : rcon;   (* "raw" constructor *)
    mutable isnorm   : bool;   (* true only if rcon is in (full) normal form *)
    mutable freevars : identifier Set.set option} (* free vars of rcon *)

and register_state = {
    rs_regs : (reg,con) Dict.dict;
    rs_tla : con option;
    mutable rs_cc : ccinfo;
    rs_save_cc : ccinfo;
  }
and ccinfo =
    CCnoinfo
  | CCcmp of con*con
  | CCtest of con*con
;;

let ccinfo_map f cc =
  match cc with
    CCnoinfo -> cc
  | CCcmp (c1,c2) -> CCcmp (f c1,f c2)
  | CCtest (c1,c2) -> CCtest (f c1,f c2)
;;
let ccinfo_app f cc =
  match cc with
    CCnoinfo -> ()
  | CCcmp (c1,c2) -> f c1; f c2; ()
  | CCtest (c1,c2) -> f c1; f c2; ()
;;

let rs_empty =
  { rs_regs=Dict.empty compare_regs; rs_tla=None;
    rs_cc=CCnoinfo; rs_save_cc=CCnoinfo }
;;
let rs_map f rs =
  { rs_regs = Dict.map_dict f rs.rs_regs;
    rs_tla = (match rs.rs_tla with None -> None | Some c -> Some (f c));
    rs_cc = ccinfo_map f rs.rs_cc;
    rs_save_cc = ccinfo_map f rs.rs_save_cc
  } 
;;
let rs_app f rs =
  Dict.app_dict (fun r c -> f c) rs.rs_regs;
  (match rs.rs_tla with None -> () | Some c -> f c);
  ccinfo_app f rs.rs_cc
;;
let rs_get_reg rs r = Dict.lookup rs.rs_regs r;;
let rs_set_reg rs r c = { rs with rs_regs=Dict.insert rs.rs_regs r c };;
let rs_set_regs rs rcs = { rs with rs_regs=Dict.inserts rs.rs_regs rcs };;
let rs_del_reg rs r = { rs with rs_regs=Dict.delete rs.rs_regs r };;
let rs_del_regs rs rl =
  { rs with rs_regs=List.fold_left Dict.delete rs.rs_regs rl }
;;
let rs_map_reg f rs = { rs with rs_regs=Dict.map_dict f rs.rs_regs };;
let rs_app_reg f rs = Dict.app_dict f rs.rs_regs;;
let rs_fold_reg f rs a = Dict.fold_dict f rs.rs_regs a;;

let rs_get_tla rs = rs.rs_tla;;
let rs_set_tla rs c = { rs with rs_tla = Some c };;
let rs_del_tla rs = { rs with rs_tla = None };;

let rs_get_cc rs = rs.rs_cc;;
let rs_set_cc rs cc = { rs with rs_cc=cc; rs_save_cc=rs.rs_cc };;
let rs_restore_cc rs = rs.rs_cc <- rs.rs_save_cc;;

let defcon rc = {rcon = rc; isnorm = false; freevars = None};;
let prcon rc =
  {rcon = rc; isnorm = true; freevars = Some (Set.empty id_compare)}
;;

let pcbytes s = prcon (Cprim (PCbytes s));;
let cbyte4 = pcbytes Byte4;;
let pcjunk i = prcon (Cprim (PCjunk i));;
let pcexn = prcon (Cprim PCexn);;
let pcexnname = prcon (Cprim PCexnname);;
let pcint i = prcon (Cprim (PCint i));;

let cvar v =
  {rcon = Cvar v; isnorm = true; freevars = Some (Set.singleton id_compare v)}
;;
let clam v k c = defcon (Clam (v,k,c));;
let capp c1 c2 = defcon (Capp (c1,c2));;
let ctuple cs = defcon (Ctuple cs);;
let cproj c i = defcon (Cproj (i,c));;
let clab l = prcon (Clab l);;
let crec vkcs = defcon (Crec vkcs);;
let cforall i k c = defcon (Cforall (i,k,c));;
let cexist i k c = defcon (Cexist (i,k,c));;
let ccode rs = defcon (Ccode rs);;
let ccode_l rcs = ccode (rs_set_regs rs_empty rcs);;
let ccode_l_tla rcs c = ccode (rs_set_regs (rs_set_tla rs_empty c) rcs);;
let chptr is co = defcon (Chptr (is,co));;
let cptr c = defcon (Chptr ([],Some c));;
let cfield c v = defcon (Cfield (c,v));;
let cprod cs = defcon (Cprod cs);;
let cprod_b cs = cptr (cprod cs);;
let csum cs = defcon (Csum cs);;
let carray cl ce = defcon (Carray (cl,ce));;
let csing c = defcon (Csing c);;
let carray_s v ce =
  let cv = cvar v in
  cexist v Kint (cprod_b [cfield (csing cv) Read;carray cv ce])
;;
let csptr c = defcon (Csptr c);;
let cempty = prcon Cempty;;
let ccons c1 c2 = defcon (Ccons (c1,c2));;
let cappend c1 c2 = defcon (Cappend (c1,c2));;

let min_pointer_integer = int_to_int32 4096;;
let is_non_pointer_integer i =
  (land32 i i32_3 <>$ i32_0) or i<$min_pointer_integer
;;

(**********************************************************************)
(* Instructions *)

(* various coercions that only affect the type of a value/reg/path/etc *)
type coercion =
    Pack of con * con  	 (* abstract a type: first con is hidden,
	      		    second con is existential *)
  | Tapp of con        	 (* instantiate type var *)
  | Roll of con        	 (* introduce recursive type *)
  | Unroll             	 (* eliminate recursive type *)
  | Tosum of con       	 (* coerce record/tag to a sum *)
  | Fromsum            	 (* coerce a unary-sum to a record *)
  | RollTosum of con   	 (* combined Tosum followed by Roll *)
  | Toexn              	 (* coerce a record to an exn *)
  | Toarray of int32*int*con
                         (* coerce record to an array/vector
                            (offset,depth,element type) *)
  | Slot of int32*int32  (* coerce stack slot to junk *)
  | Subsume of con       (* subsumption *)

type 'a coerce = 'a * coercion list

(* Operands for most instructions *)
type genop =
    Immed of int32
  | Reg of reg
  | Addr of identifier
  | Prjr of reg coerce * int32
  | Prjl of identifier coerce * int32

type condition = 
    Above | AboveEq | Below | BelowEq | Eq | Greater | GreaterEq | Less
  | LessEq | NotEq | NotOverflow | NotSign | Overflow | ParityEven
  | ParityOdd | Sign

let negate_condition c = 
  match c with
    Above -> BelowEq
  | AboveEq -> Below
  | Below -> AboveEq
  | BelowEq -> Above
  | Eq -> NotEq
  | Greater -> LessEq
  | GreaterEq -> Less
  | Less -> GreaterEq
  | LessEq -> Greater
  | NotEq -> Eq
  | NotOverflow -> Overflow
  | NotSign -> Sign
  | Overflow -> NotOverflow
  | ParityEven -> ParityOdd
  | ParityOdd -> ParityEven
  | Sign -> NotSign
;;

type arithbin = Adc | Add | And | Imul2 | Or | Sbb | Sub | Xor
type arithun = Dec | Inc | Neg | Not
type arithmd = Div | Idiv | Imul1 | Mul
type arithsr = Rcl | Rcr | Rol | Ror | Sal | Sar | Shl | Shr

type conv = Cbw | Cdq | Cwd | Cwde;;

(* Malloc allocates a peice of memory that is either a nested set or products
 * of fields or an exnname.  For conciseness we provide bytearrays as an
 * alternative to a product of n byte fields and a later Toarray coercion.
 *)
type mallocarg = 
    Mprod of mallocarg list
  | Mfield of con 
  | Mbytearray of scale * int32
  | Mexnname of con
let malloc_prod cs = Mprod (List.map (fun c -> Mfield c) cs);;

(* This is a subset of the x86 32-bit instructions that we might want to
 * cover.  Does not include floating-point support yet.
 *)
type instruction = 
    ArithBin of arithbin * genop * genop
	                        (* binary arithmetic operation *)
  | ArithUn of arithun * genop  (* unary arithmetic operation *)
  | ArithMD of arithmd * genop  (* multiply/division *)
  | ArithSR of arithsr * genop * int32 option (* None = ECX, shift/rotate *)
  | Bswap of reg                (* toggle endianess *)
  | Call of genop coerce        (* push return addr, jump to label *)
  | Clc                      	(* clear carry flag *)
  | Cmc                      	(* toggle carry flag *)
  | Cmovcc of condition * reg * genop coerce
	                        (* conditional move *)
  | Cmp of genop * genop     	(* compare *)
  | Conv of conv                (* various 8/16/32 -> 16/32/64 ops *)
  | Imul3 of reg * genop * int32(* signed multiply 3 arg form *)
  | Int of int8               	(* interrupt:  system call *)
  | Into                        (* interrupt if overflow set *)
  | Jcc of condition * identifier coerce
	                        (* jump on condition *)
  | Jecxz of identifier coerce  (* jump if ECX is zero *)
  | Jmp of genop coerce      	(* jump *)
  | Lahf                     	(* move flags into Eax (exc. overflow) *)
  | Lea of reg * genop          (* move effective address into register *)
  | Loopd of identifier coerce * bool option
                                (* decrement ECX and if result nonzero jump
				   if bool present jump only if
				     nonzero Z flag equals the boolean *)
  | Mov of genop * (genop coerce)
                                (* move, load, store *)
  | Movpart of bool * genop * reg_part * genop * reg_part
	                        (* Move/zero extend/sign/extend/trunc part word
                                   to another part word.  One genop must be a
                                   register. *)
  | Nop                      	(* no-op *)
  | Pop of genop             	(* stack pop *)
  | Popad                    	(* pop all registers (32-bit) *)
  | Popfd                    	(* pop eflags *)
  | Push of genop coerce     	(* push onto stack *)
  | Pushad                   	(* push all registers (32-bit) *)
  | Pushfd                   	(* push eflags *)
  | Retn of int32 option       	(* return "near" (i.e., doesn't touch CS) *)
  | Sahf                     	(* move ah into flags (exc. overflow) *)
  | Setcc of condition * genop	(* set dest=1/0 if condition is true/false *)
  | Shld of genop * reg * int32 option (* None = ECX, shift 64 *)
  | Shrd of genop * reg * int32 option (* None = ECX, shift 64 *)
  | Stc                      	(* set carry flag *)
  | Test of genop * genop    	(* test *)
  | Xchg of genop * reg         (* exchange *)
(* operations specific to x86tal *)
  | Asub of reg * genop * int32 * reg * genop
    (* r1:=genop1[r2] where genop1 is an array of size genop2 and elt size i *)
  | Aupd of genop * int32 * reg * reg * genop
    (* genop1[r1]:=r2 where genop1 is an array of size genop2 and elt size i *)
  | Bexn of reg * genop * identifier coerce
                                (* r.exname != op branch to label *)
  | Btagi of reg * int32 * identifier coerce * condition
	                        (* Compare tag and branch on condition *)
  | Btagvar of reg * int32 * int32 * identifier coerce * condition
	                        (* Comp tag in rec and branch on condition *)
  | Coerce of reg coerce        (* coerce register to new type *)
  | Comment of string
  | Fallthru of con list  
       (* only valid when preceeding a label L.  effectively, 
	* a jmp L[c1,...,cn]. *)
  | Malloc of int32 * mallocarg   (* allocate tuple,exn_name,bytearray *)
  | Unpack of identifier * reg * genop coerce
                                (* effectively a move *)
    (* read the thread-local-area at offset i, coercing the operand and 
     * placing result into reg *)
  | Gettla of reg * (int32 coerce)
    (* write the thread-local-area at offset i with contents of reg *)
  | Settla of int32 * (reg coerce)


(* Notes on incompleteness:

+ No floating point support.
+ No BCD ops - who wants them?
+ No string ops - ditto.
+ No ENTER/LEAVE - ditto.
+ No BOUND - ditto.
+ No system ops - we're supporting user space application only for now.
+ No concurrentcy ops (xadd, cmpxchg, lock, ...)
   - not supporting concurrency yet.
+ No bit ops - not sure if these are useful.
+ No XLAT - ditto.
+ No CPUID.
+ No far ops - supporting flat model only.

*)

type code_block = identifier * con option * instruction vector

(**********************************************************************)
(* Static Data *)

(* There are some other things we could add here such as exnnames *)

type data_item =
    Dlabel of identifier coerce
  | Dbytes of string
  | D2bytes of int16
  | D4bytes of int32 coerce
  | Djunk
  | Dexnname of con
  | Dup
  | Ddown
;;

type data_block = identifier * con option * (data_item list) coerce

(**********************************************************************)
(* Compilation Units *)

type int_ref = string

type con_abbrev = identifier * con;;

type int_con = identifier * kind * int_con_def
and int_con_def = AbsCon | BoundCon of con | ConcCon of con;;

type tal_int =
    { int_abbrevs : con_abbrev vector;
      int_cons : int_con vector;
      int_vals : (identifier * con) vector
    }
;;

type tal_int_type = {
    it_cons : int_con list;
    it_vals : (identifier * con) list
  } 
;;

type con_block = (identifier * kind * con);;

type tal_imp =
    { imports : int_ref vector;
      exports : int_ref vector;
      imp_abbrevs : con_abbrev vector;
      con_blocks : con_block vector;
      code_blocks : code_block vector;
      data_blocks : data_block vector
    } 
;;

(* EOF: x86tal.ml *)
