(**********************************************************************)
(* (c) Greg Morrisett, Neal Glew, David Walker, Dan Grossman          *)
(*     June 1998, all rights reserved.                                *)
(**********************************************************************)

(* tal.mli
 * 
 * This is a fairly complete specification of the abstract syntax for
 * a typed version of 32-bit (flat-model) iNTEL 80386, Pentium, Pentium Pro,
 * and/or Pentium II assembly language.  
 *
 * TODO:  1. floating point
 *
 *)

(**********************************************************************)
(* Miscellanous stuff *)

open Utilities;;
open Numtypes;;
open Identifier;;

type scale = Byte1 | Byte2 | Byte4 | Byte8
val scale_to_int32 : scale -> int32
type reg = Eax | Ebx | Ecx | Edx | Esi | Edi | Ebp | Esp | Virt of identifier
val compare_regs : reg -> reg -> int

(* For part word stuff: e = 32bit, x = 16bit, h = "high" 8bit, l = low 8bit *)
type reg_part = RPe | RPx | RPh | RPl;;

(**********************************************************************)
(* Kinds *)

(* Kbyte <= Ktype, Kmemi <= Kmem *)
type kind = 
    Kbyte of scale	  (* describes types of 8/16/32/64 bit values *)
  | Ktype   		  (* describes types of all values *)
  | Kmemi of int32        (* types for memory of size i *)
  | Kmem                  (* types for memory or heap blocks *)
  | Kstack  		  (* describes types of the stack & pointers into it *)
  | Kint                  (* integer "sort" for array indices *)
  | Karrow of kind * kind (* functions from constructors to constructors *)
  | Kprod of kind list    (* tuples of constructors *)
(* Cyclone *)
  | Ktstack               (* stack of codegen regions *)
(* End Cyclone *)
;;

val k4byte : kind;; (* 32 bit values *)

(**********************************************************************)
(* Type Constructors *)

(* primitive constructors *)
type primcon = 
    PCbytes of scale      (* : Kbyte s *)
  | PCjunk of int32       (* : Uninitialized junk on the stack or in memory *)
  | PCint of int32        (* : Kint *)

(* fields in tuples & arrays have variances:
     readonly, writeonly, readwrite, or uninitialised. *)
type variance = Read | Write | ReadWrite | Uninit;;

type con_state = NotNorm | Normalized | WeakHead
type rcon = 
(* the language portion of con's *)
    Cvar of identifier
  | Clam of identifier * kind * con
  | Capp of con * con
  | Ctuple of con list
  | Cproj of int * con
(* the "type" portion of con's *)
  | Clab of identifier
  | Cprim of primcon                    (* see above *)
  | Crec of (identifier * kind * con) list
  | Cforall of identifier * kind * con
  | Cexist of identifier * kind * con
  | Ccode of register_state
  | Chptr of int32 list*con option*(con*variance) option
                                        (* Kmem,_ -> K4byte *)
  | Cfield of con*variance              (* Kbyte i -> Kmem i *)
  | Cprod of con list
  | Csum of con list
  | Carray of con*con        		(* Kint,Kmem -> Kmem *)
  | Csing of con             		(* Kint -> K4byte *)
(* the "stack" portion of con's *)
  | Csptr of con                        (* Kstack -> K4byte *)
  | Cempty
  | Ccons of con * con                  (* Kmem/Kbyte,Kstack -> Kstack *)
  | Cappend of con * con                (* Kstack,Kstack -> Kstack *)
(* Cyclone *)
  | Ctmpl of con * con option
        * (identifier * con) list * (identifier * con) list
  | Ctptr of identifier
  | Ctrgn of con * con option
        * (identifier * (identifier * con) list * (identifier * con) list) list
  | Ctcons of con * con
  | Ctempty
(* End Cyclone *)
(* an explicit substitution *)
  | Csubst of con * esubst
(* Enil - empty substitution, Es replace var with con, Eo left-to-composition*)
and esubst = Enil | Es of identifier * con | Eo of esubst * esubst
and con = 
  { mutable rcon     : rcon;   (* "raw" constructor *)
    mutable con_state : con_state;   
    mutable freevars : identifier Set.set option} (* free vars of rcon *)

and register_state
and ccinfo =
    CCnoinfo
  | CCcmp of con*con
  | CCtest of con*con
;;

val defcon : rcon -> con

val pcbytes : scale -> con
val cbyte8 : con
val cbyte4 : con
val cbyte2 : con
val cbyte1 : con

val pcjunk : int32 -> con
val pcint : int32 -> con

val cvar : identifier -> con
val clam : identifier -> kind -> con -> con
val capp : con -> con -> con
val ctuple : con list -> con
val cproj : con -> int -> con
val clab : identifier -> con
val crec : (identifier * kind * con) list -> con
val cforall : identifier -> kind -> con -> con
val cexist : identifier -> kind -> con -> con
val ccode : register_state -> con
val ccode_l : (reg * con) list -> con
val ccode_l_tla : (reg * con) list -> con -> con
val chptr : int32 list -> con option -> (con*variance) option -> con
val cptr : con -> con
val cfield : con -> variance -> con
val cprod : con list -> con
val cprod_b : con list -> con   (* Boxed product *)
val csum : con list -> con
val carray : con -> con -> con
val carray_s : identifier -> con -> con  (* E[v:Sint].^*[S(v)^r,array(v,c)] *)
val csing : con -> con
val csptr : con -> con
val cempty : con
val ccons : con -> con -> con
val cappend : con -> con -> con
val csubst : con -> esubst -> con
val ctmpl : (con * con option * (identifier * con) list * (identifier * con) list) -> con
val ctptr : identifier -> con
val ctrgn : (con * con option * (identifier * (identifier * con) list * (identifier * con) list) list) -> con
val ctcons : con -> con -> con
val ctempty : con
val csubst : con -> esubst -> con

val rs_empty : register_state;;
val rs_map : (con -> con) -> register_state -> register_state;;
val rs_app : (con -> unit) -> register_state -> unit;;

(* the register portion of the register state *)
val rs_get_reg : register_state -> reg -> con;; (* throws Dict.Absent *)
val rs_set_reg : register_state -> reg -> con -> register_state;;
val rs_set_regs : register_state -> (reg * con) list -> register_state;;
val rs_del_reg : register_state -> reg -> register_state;;
val rs_del_regs : register_state -> reg list -> register_state;;
val rs_map_reg : (con -> con) -> register_state -> register_state;;
val rs_app_reg : (reg -> con -> 'a) -> register_state -> unit;;
val rs_fold_reg : (reg -> con -> 'a -> 'a) -> register_state -> 'a -> 'a;;

(* the thread-local portion of the register state *)
val rs_get_tla : register_state -> con option;;
val rs_set_tla : register_state -> con -> register_state;;
val rs_del_tla : register_state -> register_state;;

(* The condition code portion of the register state *)
val rs_get_cc : register_state -> ccinfo;;
val rs_set_cc : register_state -> ccinfo -> register_state
    (* rs_set_cc saves the old ccinfo which can be restored by rs_restore_cc *)
;;
val rs_restore_cc : register_state -> unit;; (* destructive *)

(* Pointer integers *)
val min_pointer_integer : int32;;
val is_non_pointer_integer : int32 -> bool;;

(**********************************************************************)
(* Instructions *)
type annotate = (* Added by Dan *)
    Con        of con
  | AReg       of reg
  | StackTail  of reg * int
  | StackSlice of reg * int * int * con


(* various coercions that only affect the type of a value/reg/path/etc *)
type coercion =
    Pack of con * con  	 (* abstract a type: first con is hidden,
	      		    second con is existential *)
  | Tapp of annotate     (* instantiate type var *)
  | Roll of con        	 (* introduce recursive type *)
  | Unroll             	 (* eliminate recursive type *)
  | Tosum of con       	 (* coerce record/tag to a sum *)
  | Fromsum            	 (* coerce a unary-sum to a record *)
  | RollTosum of con   	 (* combined Tosum followed by Roll *)
  | Toarray of int32*int*con
                         (* coerce record to an array/vector
                            (offset,depth,element type) *)
  | Slot of int32*int32  (* coerce stack slot to junk *)
  | Subsume of con       (* subsumption *)

type 'a coerce = 'a * coercion list (* (r,[c1;...;cn]) is c1(...cn(r)...) *)

(* Operands for most instructions *)
type genop =
    Immed of int32
  | Reg of reg
  | Addr of identifier
  | Prjr of reg coerce * int32
  | Prjl of identifier coerce * int32

(* Note: Above/Below are unsigned, Greater/Less are signed *)
type condition = 
    Above | AboveEq | Below | BelowEq | Eq | Greater | GreaterEq | Less
  | LessEq | NotEq | NotOverflow | NotSign | Overflow | ParityEven
  | ParityOdd | Sign

val negate_condition : condition -> condition;;

type arithbin = 
    Adc 
  | Add   (* (Un)signed Add *)
  | And   (* Bitwise And *)
  | Imul2 (* Signed Multiply, 2 operands *)
  | Or    (* Bitwise Or *)
  | Sbb   
  | Sub   (* (Un)signed Subtract *)
  | Xor   (* Bitwise Xor *)
type arithun = 
    Dec    
  | Inc 
  | Neg   (* 0 - op *)
  | Not   (* 1's complement *)
type arithmd = 
    Div   (* Unsigned *)
  | Idiv  (* Signed divide. Quotient -> Eax, Remainder -> Edx *) 
  | Imul1 (* Signed multiply. Result -> Edx Eax *)
  | Mul   (* Unsigned *)
type arithsr = 
    Rcl 
  | Rcr 
  | Rol 
  | Ror 
  | Sal   (* Signed *) 
  | Sar   (* Signed *)
  | Shl   (* Unsigned *)
  | Shr   (* Unsigned *)

type conv = Cbw | Cdq | Cwd | Cwde;;

(* Malloc allocates a peice of memory that is either a nested set or products
 * of fields or an exnname.  For conciseness we provide bytearrays as an
 * alternative to a product of n byte fields and a later Toarray coercion.
 *)
type mallocarg = 
    Mprod of mallocarg list
  | Mfield of con 
  | Mbytearray of scale * int32
val malloc_prod : con list -> mallocarg;;

(* This is a subset of the x86 32-bit instructions that we might want to
 * cover.  Does not include floating-point support yet.
 *)
type instruction = 
    ArithBin of arithbin * genop * genop
	                        (* binary arithmetic operation *)
  | ArithUn of arithun * genop  (* unary arithmetic operation *)
  | ArithMD of arithmd * genop  (* multiply/division *)
  | ArithSR of arithsr * genop * int32 option (* NONE = ECX, shift/rotate *)
  | Bswap of reg                (* toggle endianess *)
  | Call of genop coerce        (* push return addr, jump to label *)
  | Clc                      	(* clear carry flag *)
  | Cmc                      	(* toggle carry flag *)
  | Cmovcc of condition * reg * genop coerce
	                        (* conditional move *)
  | Cmp of genop * genop     	(* compare *)
  | Conv of conv                (* various 8/16/32 -> 16/32/64 ops *)
  | Imul3 of reg * genop * int32(* signed multiply 3 arg form *)
  | Int of int8                	(* interrupt:  system call *)
  | Into                        (* interrupt if overflow set *)
  | Jcc of condition * identifier coerce
	                        (* jump on condition *)
  | Jecxz of identifier coerce  (* jump if ECX is zero *)
  | Jmp of genop coerce      	(* jump *)
  | Lahf                     	(* move flags into Eax (exc. overflow) *)
  | Lea of reg * genop          (* move effective address into register *)
  | Loopd of identifier coerce * bool option
                                (* decrement ECX and if result nonzero jump
				   if bool present jump only if
				     nonzero Z flag equals the boolean *)
  | Mov of genop * (genop coerce)
                                (* move, load, store *)
  | Movpart of bool * genop * reg_part * genop * reg_part
	                        (* Move (zero-extend(t)/sign-extend(f))
				   part word to another part word. 
				   if part1=part2 then just move.
				   One genop must be a register. *)
  | Nop                      	(* no-op *)
  | Pop of genop             	(* stack pop *)
  | Popad                    	(* pop all registers (32-bit) *)
  | Popfd                    	(* pop eflags *)
  | Push of genop coerce     	(* push onto stack *)
  | Pushad                   	(* push all registers (32-bit) *)
  | Pushfd                   	(* push eflags *)
  | Retn of int32 option       	(* return "near" (i.e., doesn't touch CS) *)
  | Sahf                     	(* move ah into flags (exc. overflow) *)
  | Setcc of condition * genop	(* set dest=1/0 if condition is true/false *)
  | Shld of genop * reg * int32 option (* NONE = ECX, shift 64 *)
  | Shrd of genop * reg * int32 option (* NONE = ECX, shift 64 *)
  | Stc                      	(* set carry flag *)
  | Test of genop * genop    	(* test *)
  | Xchg of genop * reg         (* exchange *)
(* operations specific to x86tal *)
  | Asub of reg * genop * int32 * reg * genop
    (* r1:=genop1[r2] where genop1 is an array of size genop2 and elt size i *)
  | Aupd of genop * int32 * reg * reg * genop
    (* genop1[r1]:=r2 where genop1 is an array of size genop2 and elt size i *)
  | Btagi of reg * int32 * identifier coerce * condition
	                        (* Compare tag and branch on condition *)
  | Btagvar of reg * int32 * int32 * identifier coerce * condition
	                        (* Comp tag in rec and branch on condition *)
  | Coerce of reg coerce        (* Coerce register *)
  | Comment of string
  | Fallthru of con list  
       (* only valid when preceeding a label L.  effectively, 
	* a jmp L[c1,...,cn]. *)
  | Malloc of int32 * mallocarg * con option
        (* allocate tuple,exn_name,bytearray of size i into register eax
         * new pointer is tag for third argument *)
  | Unpack of identifier * reg * genop coerce
                                (* effectively a move *)
    (* read the thread-local-area at offset i, placing result into reg *)
  | Gettla of reg * (int32 coerce)
    (* write the thread-local-area at offset i with contents of reg *)
  | Settla of int32 * (reg coerce)
(* Cyclone *)
  | CgStart of con
  | CgDump of identifier * reg * identifier
  | CgHole of reg * identifier * identifier
  | CgHoleBtagi of reg * int32 * identifier * identifier coerce * condition
  | CgHoleBtagvar of reg * int32 * int32 * identifier * identifier coerce * condition
  | CgHoleJmp of identifier * identifier coerce
  | CgHoleJcc of condition * identifier * identifier coerce
  | CgFill of reg * identifier * identifier * reg
  | CgFillBtag of reg * identifier * identifier * reg * identifier * identifier
  | CgFillJmp of reg * identifier * identifier * reg * identifier * identifier
  | CgFillJcc of reg * identifier * identifier * reg * identifier * identifier
  | CgForget of identifier
  | CgAbort of int32
  | CgEnd of reg

val tla_field_cyclone : int32
val get_cyclone_field : register_state -> (con * variance) option
val set_cyclone_field : register_state -> con -> variance -> register_state
(* End Cyclone *)

(* Notes on incompleteness:

+ No floating point support.
+ No BCD ops - who wants them?
+ No string ops - ditto.
+ No ENTER/LEAVE - ditto.
+ No BOUND - ditto.
+ No system ops - we're supporting user space applications only for now.
+ No concurrentcy ops (xadd, cmpxchg, lock, ...)
   - not supporting concurrency yet.
+ No bit ops - not sure if these are useful.
+ No XLAT - ditto.
+ No CPUID.
+ No far ops - supporting flat model only.
*)

type code_block = identifier * con option * instruction vector

(* Cyclone *)
type template = identifier * identifier * con * code_block list
(* End Cyclone *)

(**********************************************************************)
(* Static Data *)

type data_item =
    Dlabel of identifier coerce
  | Dbytes of string
  | D2bytes of int16
  | D4bytes of int32 coerce
  | Djunk
  | Dup
  | Ddown
;;

(* A datablock is one of the following:
 *   Exnname:
 *     Dexnname
 *     Cannot coerce
 *   Tuple:
 *     (Dlabel | Dbytes | D2bytes | D4bytes | Djunk)^*
 *     Which may then be coerced.
 *)

type data_block = identifier * con option * (data_item list) coerce

(**********************************************************************)
(* Compilation Units *)

type int_ref = string

type con_abbrev = identifier * con;;

type int_con = identifier * kind * int_con_def
and int_con_def = AbsCon | BoundCon of con | ConcCon of con;;

type tal_int =
    { int_abbrevs : con_abbrev vector;
      int_cons : int_con vector;
      int_vals : (identifier * con) vector
    }
;;

type tal_int_type = {
    it_cons : int_con list;
    it_vals : (identifier * con) list
  } 
;;

type con_block = identifier * kind * con;;

type tal_imp =
    { imports : int_ref vector;
      exports : int_ref vector;
      imp_abbrevs : con_abbrev vector;
      con_blocks : con_block vector;
      code_blocks : code_block vector;
      data_blocks : data_block vector
(* Cyclone *)
        ;
      templates : template vector
(* End Cyclone *)
    } 
;;

(* EOF: x86tal.mli *)
