(**********************************************************************)
(* (c) Greg Morrisett, Neal Glew, David Walker,                       *)
(*     June 1998, all rights reserved.                                *)
(**********************************************************************)

(* x86tal.mli
 * 
 * This is a fairly complete specification of the abstract syntax for
 * a typed version of 32-bit (flat-model) iNTEL 80386, Pentium, Pentium Pro,
 * and/or Pentium II assembly language.  
 *
 * TODO:  1. floating point
 *
 *)

(**********************************************************************)
(* Miscellanous stuff *)

open Utilities;;
open Identifier;;

type scale = Byte1 | Byte2 | Byte4 | Byte8
type reg = Eax | Ebx | Ecx | Edx | Esi | Edi | Ebp | Esp | Virt of identifier
let compare_regs r1 r2 =
  match r1 with
    Virt i1 ->
      (match r2 with
	Virt i2 ->
	  id_compare i1 i2
      |	_ -> 1)
  | _ ->
      (match r2 with
	Virt _ -> -1
      |	_ -> compare r1 r2)
;;

(**********************************************************************)
(* Kinds *)

(* K4byte <= Ktype *)
type kind = 
    K4byte  		  (* describes types of 32-bit values *)
  | Ktype   		  (* describes types of all values *)
  | Kstack  		  (* describes types of the stack & pointers into it *)
  | Karrow of kind * kind (* functions from constructors to constructors *)
  | Kprod of kind list    (* tuples of constructors *)

(**********************************************************************)
(* Type Constructors *)

(* primitive constructors *)
type primcon = 
    PCbytes of scale      (* : Ktype except for PCbytes Byte4 *)
  | PCjunk                (* : Uninitialized junk on the stack *)
  | PCtag of int          (* : K4byte, i.e., Singleton(i) *)
  | PCreal                (* : Ktype *)
  | PCexn                 (* : K4byte *)
  | PCexnname             (* : K4byte -> K4byte *)
  | PCarray               (* : K4byte -> K4byte *)
  | PCvector              (* : K4byte -> K4byte *)
  | PCbytearray of scale  (* : K4byte *)
  | PCbytevector of scale (* : K4byte *)
  | PCstackptr            (* : Kstack -> K4byte *)

(* fields in tuples or the stack can be initialized or uninitialized 
 * and read-only or read/write *)
type init_flag = Uninit | Init
type capability = Read | ReadWrite

(* a field in a record, sum, or the stack comes with an init flag *)
type field = con * capability * init_flag
and fields = field list
(* a variant (i,fs) is equivalent to a Cprod(Cprim(PCtag i)::fs).  That is,
 * it's a tuple where the first field is guaranteed to be i. *)
and variant = int * fields
(* variants or tuple allows for a pointer to a tuple or a pointer to 
 * one of a possible number of variants.  We assume that pointers of this
 * form cannot have values in the range of 0..255 so that we may 
 * distinguish them from tags. *)
and variants_or_tuple = Variants of variant list | Tuple of fields
(* values of a sum type are either tags or a (variant or a tuple). 
 * Note that the tags must be unique, and the tags on the variants
 * must be unique in order for the sum to be well-formed.
 *)
and sum = {sum_tags : int list; sum_vt : variants_or_tuple option}

and rcon = 
(* the language portion of con's *)
    Cvar of identifier
  | Clam of identifier * kind * con
  | Capp of con * con
  | Ctuple of con list
  | Cproj of int * con
(* the "type" portion of con's *)
  | Clab of identifier
  | Cprim of primcon  (* see above *)
  | Crec of (identifier * kind * con) list
  | Cforall of identifier * kind * con
  | Cexist of identifier * kind * con
  | Cprod of fields
  | Csum of sum
  | Ccode of register_state
(* the "stack" portion of con's *)
  | Cempty
  | Ccons of con * con
  | Cappend of con * con
and con = 
  { mutable rcon     : rcon; 
    mutable isnorm   : bool; 
    mutable freevars : identifier Set.set option}

and register_state = (reg,con) Dict.dict

let defcon rc = {rcon = rc; isnorm = false; freevars = None};;

(**********************************************************************)
(* Instructions *)

(* various coercions that only affect the type of a value/reg/path/etc *)
type coercion =
    Pack of con * con  (* abstract a type: first con is hidden,
	      		  second con is existential *)
  | Tapp of con        (* instantiate type var *)
  | Roll of con        (* introduce recursive type *)
  | Unroll             (* eliminate recursive type *)
  | Tosum of con       (* coerce record/tag to a sum *)
  | Fromsum            (* coerce a unary-sum to a record *)
  | RollTosum of con   (* combined Tosum followed by Roll *)
  | Toexn              (* coerce a record to an exn *)
  | Toarray            (* coerce a record to an array/vector *)
  | Slot of int        (* slot i of stack type => Junk *)
type 'a coerce = 'a * coercion list

(* Operands for most instructions *)
type genop =
    Immed of int
  | Tag of int                    (* tag introduction *)
  | Reg of reg
  | Addr of identifier
  | Prjr of reg coerce * int
  | Prjl of identifier coerce * int

(* Note: Above/Below are unsigned, Greater/Less are signed *)
type condition = 
    Above | AboveEq | Below | BelowEq | Eq | Greater | GreaterEq | Less
  | LessEq | NotEq | NotOverflow | NotSign | Overflow | ParityEven
  | ParityOdd | Sign

type arithbin = Adc | Add | And | Imul2 | Or | Sbb | Sub | Xor
type arithun = Dec | Inc | Neg | Not
type arithmd = Div | Idiv | Imul1 | Mul
type arithsr = Rcl | Rcr | Rol | Ror | Sal | Sar | Shl | Shr

(* note: the move sign/zero extend ops can only do 8/16 -> 16/32 moves *)
type conv =
    Cbw | Cdq | Cwd | Cwde
  | Movsx of reg*scale*genop*scale (* move with sign extend *)
  | Movzx of reg*scale*genop*scale (* move with zero extend *)

(* we can malloc either a tuple, bytearray, or exception name *)
type mallocarg = 
    Mprod of (con*capability) list | Mbytearray of scale | Mexnname of con

(* This is a subset of the x86 32-bit instructions that we might want to
 * cover.  Does not include floating-point support yet.
 *)
type instruction = 
    ArithBin of arithbin * genop * genop
	                        (* binary arithmetic operation *)
  | ArithUn of arithun * genop  (* unary arithmetic operation *)
  | ArithMD of arithmd * genop  (* multiply/division *)
  | ArithSR of arithsr * genop * int option (* None = ECX, shift/rotate *)
  | Bswap of reg                (* toggle endianess *)
  | Call of genop coerce        (* push return addr, jump to label *)
  | Clc                      	(* clear carry flag *)
  | Cmc                      	(* toggle carry flag *)
  | Cmovcc of condition * reg * genop coerce
	                        (* conditional move *)
  | Cmp of genop * genop     	(* compare *)
  | Conv of conv                (* various 8/16/32 -> 16/32/64 ops *)
  | Imul3 of reg * genop * int  (* signed multiply 3 arg form *)
  | Int of int               	(* interrupt:  system call *)
  | Into                        (* interrupt if overflow set *)
  | Jcc of condition * identifier coerce
	                        (* jump on condition *)
  | Jecxz of identifier coerce  (* jump if ECX is zero *)
  | Jmp of genop coerce      	(* jump *)
  | Lahf                     	(* move flags into Eax (exc. overflow) *)
  | Lea of reg * genop          (* move effective address into register *)
  | Loopd of identifier coerce * bool option
                                (* decrement ECX and if result nonzero jump
				   if bool present jump only if
				     nonzero Z flag equals the boolean *)
  | Mov of genop * (genop coerce)
                                (* move, load, store *)
  | Nop                      	(* no-op *)
  | Pop of genop             	(* stack pop *)
  | Popad                    	(* pop all registers (32-bit) *)
  | Popfd                    	(* pop eflags *)
  | Push of genop coerce     	(* push onto stack *)
  | Pushad                   	(* push all registers (32-bit) *)
  | Pushfd                   	(* push eflags *)
  | Retn of int option       	(* return "near" (i.e., doesn't touch CS) *)
  | Sahf                     	(* move ah into flags (exc. overflow) *)
  | Setcc of condition * genop	(* set dest=1/0 if condition is true/false *)
  | Shld of genop * reg * int option (* None = ECX, shift 64 *)
  | Shrd of genop * reg * int option (* None = ECX, shift 64 *)
  | Stc                      	(* set carry flag *)
  | Test of genop * genop    	(* test *)
  | Xchg of genop * reg         (* exchange *)
(* operations specific to x86tal *)
  | Alen of scale * reg * genop         (* Array length *)
  | Asub of scale * reg * genop * reg   (* r1 := genop[r2] *)
  | Aupd of scale * genop * reg * reg   (* genop[r1] := r2 *)
  | Bexn of reg * genop * identifier coerce
                                (* r.exname != op branch to label *)
  | Btagi of reg * int * identifier coerce * condition
	                        (* Compare tag and branch on condition *)
  | Btagvar of reg * int * identifier coerce * condition
	                        (* Comp tag in rec and branch on condition *)
  | Coerce of reg coerce        (* coerce register to new type *)
  | Comment of string
  | Fallthru of con list  
       (* only valid when preceeding a label L.  effectively, 
	* a jmp L[c1,...,cn]. *)
  | Malloc of reg * int * mallocarg   (* allocate tuple,exn_name,bytearray *)
  | Unpack of identifier * reg * genop coerce
                                (* effectively a move *)

(* Notes on incompleteness:

+ No floating point support.
+ No BCD ops - who wants them?
+ No string ops - ditto.
+ No ENTER/LEAVE - ditto.
+ No BOUND - ditto.
+ No system ops - we're supporting user space application only for now.
+ No concurrentcy ops (xadd, cmpxchg, lock, ...)
   - not supporting concurrency yet.
+ No bit ops - not sure if these are useful.
+ No XLAT - ditto.
+ No CPUID.
+ No far ops - supporting flat model only.

*)

type code_block = identifier * con * instruction vector

(**********************************************************************)
(* Static Data *)

(* There are some other things we could add here such as exnnames *)

type data_item =
    Dlabel of identifier coerce
  | Dtag of int coerce
  | Dbytes of string
  | D2bytes of int
  | D4bytes of int
  | Djunk
  | Dexnname of con
;;

type data_block = identifier * con option * (data_item list) coerce

(**********************************************************************)
(* Compilation Units *)

type int_ref = string

type int_con = identifier * kind * int_con_def
and int_con_def = AbsCon | ConcCon of con;;

type tal_int =
    { int_includes : int_ref vector;
      int_cons : int_con vector;
      int_vals : (identifier * con) vector
    }
;;

type tal_int_type = {
    it_cons : int_con list;
    it_vals : (identifier * con) list
  } 
;;

type con_block = (identifier * kind * con);;

type tal_imp =
    { imports : int_ref vector;
      exports : int_ref vector;
      con_blocks : con_block vector;
      code_blocks : code_block vector;
      data_blocks : data_block vector
    } 
;;

(* EOF: x86tal.ml *)
