(**********************************************************************)
(* (c) Greg Morrisett, Neal Glew, Richard Samuels                     *)
(*     April 1999, all rights reserved.                               *)
(**********************************************************************)

(* annot.ml
 * Annotations for storing type information in object files. 
 *)

open Format;;
open Numtypes;;
open Identifier;;
open Tal;;
open Talpp;;


let termchar = '\000';;

let coercion_termination_char = '\000';;
let comment_termination_char = '\000';;
let con_termination_char = '\000';;

let genop_coercion_chars = ("(",",",")");;
let instr_coercion_chars = ("[",";","]");;
let conlist_chars = ("<",";",">");;


type c_inner = coercion list;;		(* Inner coercion, as in Prjr/Prjl *)
type c_outer = coercion list;;		(* Outer (explicit) coercion *)
type c_both = (c_outer * c_inner);;	(* Both, either can be [] *)

type genop_coercion = c_both;;		(* inner * outer. Either can be []. *)


(* An instruction can have any number of genop coercions. *)
type instr_coercion = genop_coercion list;;


(***** Annotation types *****)

type annotation = 
 | Instr_annot of int * instr_coercion	(* instr code * coercion *)
 | Coerce_annot of reg * coercion list
 | Malloc_annot of mallocarg * con option
 | Unpack_annot of identifier * reg * bool * genop_coercion
					(* bool is true if src reg = dst reg *)
 | Fallthru_annot of con list
 | Comment_annot of string
 | Label_annot of con option * coercion list(* Used for data & code labels *)
;;


(***** Macro/Instruction Codes *****)

let malloc_code = 0x9
and fallthru_code = 0x8
and comment_code = 0x7
and label_code = 0xF
and coerce_code = 0x6
and unpack_code = 0xA;;

let encode_instr instr = 
   match instr with
    | Asub _ -> 0x2
    | Aupd _ -> 0x3
    | Btagi _ -> 0x4
    | Btagvar _ -> 0x5
    | Coerce _ -> coerce_code
    | Comment _ -> comment_code
    | Fallthru _ -> fallthru_code
    | Malloc _ -> malloc_code
    | Unpack _ -> unpack_code
    | Gettla _ -> 0xB
    | Settla _ -> 0xC
    | (CgStart _ | CgDump _ | CgHole _ | CgHoleBtagi _ | CgHoleJmp _ |
       CgHoleJcc _ | CgFill _ | CgFillBtag _ | CgFillJmp _ | CgFillJcc _ |
       CgForget _ | CgAbort _ | CgEnd _) -> 0xD
    | _ -> 0x1				(* Normal instruction *)
;;


(***** Instr_coercion constructors *****)

(* get_inner_coercion
   Returns the inner coercion contained in a Prjr or Prjl operand. *)

let get_inner_coercion op = 
   match op with
    | Prjr ((_, crc), _) ->
	 crc
    | Prjl ((_, crc), _) ->
	 crc
    | _ -> []
;;

let ic_inner op =
   [([], get_inner_coercion op)]
;;
   
let ic_2_inner op1 op2 =
   [([], get_inner_coercion op1); ([], get_inner_coercion op2)]
;;

let ic_both op crc =
   [(crc, get_inner_coercion op)]
;;

let ic_outer crc =
   [(crc, [])]
;;


(***** Working with instructions *****)


(* instr_coercions_of
   Returns the coercions contained in a given instruction. *)

let instr_coercions_of instr =
   match instr with
    | ArithBin (_, op1, op2) ->
	 (ic_2_inner op1 op2)
    | ArithMD (_, op) ->
	 (ic_inner op)
    | ArithSR (_, op, _) ->
	 (ic_inner op)
    | ArithUn (_, op) ->
	 (ic_inner op)

    (* Bswap *)	 

    | Call (op, crc) ->
	 (ic_both op crc)
    (* Clc, Cmc *)
    | Cmovcc (_, _, (op, crc)) ->
	 (ic_both op crc)
    | Cmp (op1, op2) ->
	 (ic_2_inner op1 op2)
    (* Conv *)

    | Imul3 (_, op, _) ->
	 (ic_inner op)
    (* Int, Into *)

    | Jcc (_, (_, crc)) ->
	 (ic_outer crc)
    | Jecxz (_, crc) ->
	 (ic_outer crc)
    | Jmp (op, crc) ->
	 (ic_both op crc)

    (* Lahf *)
    | Lea (_, op) ->
	 (ic_inner op)
    | Loopd ((_, crc), _) -> 
	 (ic_outer crc)
	 
    | Mov (op1, (op2, crc2)) ->
(**)	 [([], get_inner_coercion op1);
			(crc2, get_inner_coercion op2)]
    | Movpart (_,op1,_,op2,_) ->
	 (ic_2_inner op1 op2)

    (* Nop *)
	 
    | Pop (op) -> 
	 (ic_inner op)
    (* Popad, Popfd, Pushad, Pushfd *)
    | Push (op, crc) -> 
	 (ic_both op crc)

    (* Retn, Sahf *)

    | Setcc (_, op) ->
	 (ic_inner op)
    | Shld (op, _, _) ->
	 (ic_inner op)
    | Shrd (op, _, _) ->
	 (ic_inner op)
    (* Stc *)
	 
    | Test (op1, op2) ->
	 (ic_2_inner op1 op2)
	 
    | Xchg (op, _) ->
	 (ic_inner op)
	 
   (***** Macros *****)
   | Asub (dstreg, arrayop, eltsize, indexreg, sizeop) ->
	(ic_2_inner arrayop sizeop)
   | Aupd (arrayop, eltsize, indexreg, srcreg, sizeop) ->
	(ic_2_inner arrayop sizeop)
   | Btagi (reg, tag, (ident, labl_coerce), cond) ->
	(ic_outer labl_coerce)
   | Btagvar (reg, offset, tag, (ident, labl_coerce), cond) ->
	(ic_outer labl_coerce)
(*   | Coerce (reg, reg_coerce) ->
	(ic_outer reg_coerce)
   | Unpack (var, dstreg, (op, crc)) ->
	(ic_both op crc)
*)
   | Gettla (reg, (offset, offset_coerce)) ->
	(ic_outer offset_coerce)
   | Settla (offset, (reg, reg_coerce)) ->
	(ic_outer reg_coerce)

(* Cyclone *)
(*
   | CgStart (arg) -> 
   | CgDump (tv, reg, lab) ->
   | CgHole (reg, labeloftmpl, labelofhole) ->
   | CgHoleBtagi (reg, tag, tmpllab, (holelab,crc), cc) ->
   | CgHoleBtagvar (reg, sum, tag, tmpllab, (holelab,crc), cc) ->
   | CgHoleJmp (tmpllab, (holelab, crc)) ->
   | CgHoleJcc (cc, tmpllab, (holelab,crc)) ->
   | CgFill (reg1, labeloftmpl, labelofhole, reg2) ->
   | CgFillBtag (holereg, htmplab, holelab, targreg, ttmplab, targlab) ->
   | CgFillJmp (holereg, htmplab, holelab, targreg, ttmplab, targlab) ->
   | CgFillJcc (holereg, htmplab, holelab, targreg, ttmplab, targlab) ->
   | CgForget tv -> 
   | CgAbort i ->
   | CgEnd reg ->
*)
(* End Cyclone *)

   | _ -> []
;;



(* annotate
   Returns an annotation for a given instruction. *)

let annotate instr = 
   match instr with
    | Coerce (reg, crcl) ->
	 Coerce_annot (reg, crcl)
    | Comment str ->
	 Comment_annot str
    | Fallthru conlist ->
	 Fallthru_annot conlist
    | Malloc (size, mallocarg, tagging) -> 
	 Malloc_annot (mallocarg, tagging)
    | Unpack (id, srcreg, (Reg dstreg, crcl)) when srcreg = dstreg ->
	 Unpack_annot (id, srcreg, true, (crcl, []))
    | Unpack (id, srcreg, (dstop, crcl)) ->
	 Unpack_annot (id, srcreg, false, (crcl, get_inner_coercion dstop))
    | _ -> 
	 Instr_annot ((encode_instr instr), (instr_coercions_of instr))
;;





(***** Pretty-printing *****)

let annot_printing_options = ref Talpp.std_options;;

let pp_print_byte fmt b = 
   Format.fprintf fmt "%X" b

(*   pp_print_char fmt (Char.chr b) *)
;;



(* print_list
   Given a function print_fn, where (print_fn i) prints item i in the list,
   prints the list where 'pre' is the list prefix string, 'sep' is the list
   separator string, and 'post' terminates the list.
   For OCaml format [a;b;c;...;z], pre="[", sep=";", post="]". *)

let print_list fmt (pre,sep,post) print_fn l =
   let rec print_list_rec l = 
      match l with 
	 [] -> pp_print_string fmt post
       | (lastone :: []) ->
	    (print_fn lastone);
	    print_list_rec [];
       | (head :: tail) ->
	    (print_fn head);
	    pp_print_string fmt sep;
	    print_list_rec tail;
   in
(**)(* When writing a null list, eliminate prefix char (makes reading easier)*)
   if l = [] then
      pp_print_string fmt post
   else begin
      pp_print_string fmt pre;
      print_list_rec l
   end
;;

   
(* print_con_list
   Prints a list of cons using Talpp's pretty-printer. *)

let print_con_list fmt conl = 
   let print_talcon item =
      (Talpp.print_con fmt !annot_printing_options item);
      (pp_print_char fmt con_termination_char)
   in
   print_list fmt conlist_chars print_talcon conl
;;


(* print_coercions
   Prints a list of coercions using Talpp's pretty-printer. 
   Uses "EAX" as a placeholder for the object being coerced. *)

let print_coercions fmt crcl =
   Talpp.print_coerce fmt (fun fmt _ _ -> pp_print_string fmt "EAX") 
      !annot_printing_options ((), crcl);
   pp_print_char fmt coercion_termination_char
;;


(* print_genop_coerce
   Prints inner and outer coercions for a genop, separated with a comma. *)

let print_genop_coerce fmt (outer, inner) =
   match (outer, inner) with
(*    | ([],[]) -> () *)
    | _ -> print_list fmt genop_coercion_chars (print_coercions fmt)
	    [outer; inner]
;;


(* print_instr_coerce
   Prints an instruction coercion (a list of genop coercions). 
   The list is written like an OCaml list [a;b;c;d;...]; null lists are -. *)

let print_instr_coerce fmt coercel =
(*   if (List.exists (fun (o,i) -> o <> [] || i <> []) coercel) then *)
      print_list fmt instr_coercion_chars (print_genop_coerce fmt) coercel
(*   else
      pp_print_string fmt "-" *)
;;


let print_code fmt code =
   pp_print_byte fmt code

(*  ( match code with
    | 0x0 -> "No_info"
    | 0x1 -> "Instr"
    | 0x2 -> "Asub"
    | 0x3 -> "Aupd" 
    | 0x4 -> "Btagi"
    | 0x5 -> "Btagvar"
    | 0x6 -> "Coerce"
    | 0x7 -> "Comment"
    | 0x8 -> "Fallthru"
    | 0x9 -> "Malloc"
    | 0xA -> "Unpack" 
    | 0xB -> "Gettla"
    | 0xC -> "Settla"
    | 0xD -> "Cyclone"
    | 0xF -> "LABEL"
    | _ -> "UNKNOWN" ); 

   pp_print_char fmt ' ' *)
;;


(* print_annot
   Prints an annotation to the formatter fmt. *)

let print_annot fmt annot = 
   match annot with
    | Instr_annot (i, coercel) ->
	 print_code fmt i;(* Instruction code byte *)
	 print_instr_coerce fmt coercel;

    | Coerce_annot (r,c) ->
	 print_code fmt coerce_code;
	 Talpp.print_coerce fmt Talpp.print_reg !annot_printing_options (r,c);
    | Unpack_annot (id, srcreg, b, gencrc) ->
	 print_code fmt unpack_code;
	 (* write id *)
	 Identifier.id_prn fmt id;
	 Format.pp_print_char fmt '\000';

	 (* write srcreg as a genop coerce with no coerce, because we can
	    read genop coerces without having to put in an extra token *)
	 Talpp.print_coerce fmt Talpp.print_reg 
	    !annot_printing_options (srcreg, []);
	 Format.pp_print_char fmt '\000';

	 (* write b as 1 for true and 0 for false 
	    b indicates whether srcreg = dstreg: (not b) <=> exists mov instr*)
	 Format.pp_print_char fmt (if b then '1' else '0');
	 Format.pp_print_char fmt ' ';

	 (* write the coercion for the destination *)
	 print_genop_coerce fmt gencrc;

    | Malloc_annot (mallocarg, None) ->
	 print_code fmt malloc_code;
	 print_con_list fmt [];
    | Malloc_annot (mallocarg, Some malloccon) ->
	 print_code fmt malloc_code;
	 print_con_list fmt [malloccon];
    | Fallthru_annot (conlist) ->
	 print_code fmt fallthru_code;
	 print_con_list fmt conlist;
    | Comment_annot (str) ->
	 print_code fmt comment_code;	 
	 pp_print_string fmt str;
	 pp_print_char fmt comment_termination_char;
	 if String.contains str '\000' then 
	    failwith "print_annot: Comment has null char!"
    | Label_annot (None, crcl) ->
	 print_code fmt label_code;
	 print_con_list fmt [];
	 print_coercions fmt crcl;
    | Label_annot (Some labelcon, crcl) ->
	 print_code fmt label_code;
	 print_con_list fmt [labelcon];
	 print_coercions fmt crcl
;;
   
      



(* annotate_instrs 
   Prints the encoded annotations for a vector of instructions. *)

let annotate_instrs fmt instrv =
   Array.iter (fun i -> (print_annot fmt (annotate i);pp_print_newline fmt ()))
      instrv; (**)(* notice newline *)
;;


(* annotate_code_block
   Prints the encoded annotations for a given code block, including
   the code block's label cons. *)

let annotate_code_block fmt blk = 
   let (ident, conopt, instrv) = blk in
   print_annot fmt (Label_annot (conopt, []));
(**)pp_print_newline fmt ();
   annotate_instrs fmt instrv
;;
	 


(* expected_buf_size
   Returns an initial guess for the size of a buffer for the given Tal.imp. *)

let expected_buf_size imp = 
   let code_block_weight = 20 in 
   let instr_weight = 1 in 
   let sum_code_block_sizes blks = 
      let result = ref 0 in
      for i = 0 to (Array.length blks) - 1 do
	 let (_, _, instrv) = blks.(i) in
	 result := !result + (Array.length instrv);
      done; 
      !result
   in
   (code_block_weight * (Array.length imp.code_blocks))
      + (instr_weight * (sum_code_block_sizes imp.code_blocks))
;;

   



(* annotate_imp
   Returns a buffer containing the annotations for the given tal
   implementation, encoded into a string buffer.

   The buffer initially gets a size equal to the number of 
   instructions + the number of code blocks -- seems to be a reasonable 
   estimate. *)


let annotate_imp imp =
   let buf = Buffer.create (expected_buf_size imp) in
   let fmt = Format.formatter_of_buffer buf in

   Array.iter (annotate_code_block fmt) imp.code_blocks;
(* Array.iter (annotate_data_block fmt) imp.data_blocks; *)

   Format.pp_print_flush fmt ();
   buf
;;
