(**************************************************************)
(*
 *  Ensemble, (Version 0.40)
 *  Copyright 1997 Cornell University
 *  All rights reserved.
 *
 *  See ensemble/doc/license.txt for further information.
 *)
(**************************************************************)
(**************************************************************)
(*
 *  TransisE, (Version 0)
 *  Hebrew University
 *  Copyright Notice
 *
 *  The contents of this file are subject to copyright by
 *  the Hebrew University, which reserves all rights.  Use,
 *  distribution and copying of this material is expressly
 *  prohibited except by prior written permission from
 *  the Hebrew University or from its appropriately authorized 
 *  agents and licensors.
 *)
(**************************************************************)
(* TCAUSAL.ML : Lansis causal ordering protocol *)
(* Author: Ohad Rodeh, 12/96 *)
(**************************************************************)
(* 
General Description 
-------------------
  The layer sends messages reliablely and delivers them upwards, 
in causal order. It does so by adding acknowlegmemnts to messages. 
(1) The layer detects message loss, and completes missing messages, using
Nak messages. 
(2) Failure detection, is achieved using ImAlive messages. 
Every timeout, an ImAlive message is broadcasted. If you fail to 
hear from a process within a timeout, then that process is assumed
failed. 
(3) Garbage collection - The ImAlive messages, are actually empty
regular messages, thus acknowlegments are piggybacked onto them. Thus,
the set of stable messages is computed with the help of periodic ImAlive's. 

Notes
-----
(1) Does not support event type Ack.
(2) Assumes that Bottom discards messages from failed processes.
*)
(**************************************************************)
open Trans
open Layer
open Event
open Util
open View
open Dag
(**************************************************************)
let name = Trace.source_file "TCAUSAL"
(**************************************************************)

type header = NoHdr
  | Data             of seqno * msg_type * Dag.cmsg_id list
  | ImAlive          of seqno * msg_type * Dag.cmsg_id list
  | Retrans_Data     of rank  * seqno * msg_type * Dag.cmsg_id list
  | Retrans_ImAlive  of rank  * seqno * msg_type * Dag.cmsg_id list
  | Nak              of (rank * seqno * seqno) list

type ('a,'b) msg =   
  | Full    of msg_type * ('a,'b)Layer.msg * Iovecl.t
  | Null    of msg_type

type ('a,'b) state = {
    dag		        : ('a, 'b) msg Dag.cdag; (* The data structure *)
    sweep	        : Time.t;       (* Periodic timer *)
    mutable next_gossip : Time.t;       (* Next time a wakeup is due *)
    mutable blocking    : bool;         (* Am I blocked ? *)
    failed	        : bool array;   (* Array of failed members *)
    mutable num_alive   : int           (* The number of alive members *)
  }

(**************************************************************)

let dump (ls,vs) s =
  eprintf "TCAUSAL:dump:%s\n" ls.name;
  Dag.stat s.dag

let nack = false
let rtr = true

(**************************************************************)

let init () (ls,vs) = 
  let n = ls.nmembers in {
  sweep	        = Param.time vs.params "tcausal_sweep" ;
  dag           = Dag.create ls.nmembers;
  next_gossip   = Time.invalid;
  blocking      = false;
  failed        = array_create name ls.nmembers false;
  num_alive     = n
 }

(**************************************************************)

let hdlrs s (ls,vs) {up_out=up;upnm_out=upnm;dn_out=dn;dnlm_out=dnlm;dnnm_out=dnnm} =
  let failwith m = dump (ls,vs) s ; failwith (name^":"^m) in
  let assert a m = if not a then failwith m in
  let log = Trace.log name ls.name in

  (* Compute the holes in the dag - and request them.
     *)
  let send_nacks h = 
    dnlm (create name ECast[Unreliable]) (Nak(h));
    let l = string_of_list  (function
	(r,lo,hi) -> sprintf "(%d,%d,%d)" r lo hi) h in
    if nack then log (fun () -> l)

   (* Got a retransmission request, send as much as possible.
      *)
  and retrans rnk lo hi = 
    if rtr then log (fun () -> sprintf "Retransimitting [%d] -> (%d,%d,%d) num_alive=%d" ls.rank rnk lo hi s.num_alive); 
    let list = Dag.list_of_dag_interval s.dag rnk (lo,hi) in
    List.iter (fun (seqno, (data ,ackl)) ->
      match data with
      | (Full (msg_tp,abv,iov)) -> 
      	  dn (create name ECast[
	      Iov iov;
	      Unreliable
	    ]) abv (Retrans_Data(msg_tp,rnk,seqno,ackl))
      | Null msg_tp -> 
 	  dnlm (create name ECast[
		Unreliable
	      ]) (Retrans_ImAlive (msg_tp,rnk,seqno,ackl))
	  ) list

    (* ECast a message locally. 
       *)
  and up_causal org ack_vct = function
    | Full(msg_tp,abv,Iov) -> 
	up (create name ECast[
      	    Origin org;
	    Iov Iov;
	    (AckVct ack_vct);
	    Up_flags (msg_tp lor xxx_causal)
	  ]) abv
    | Null msg_tp -> 
	  upnm (create name EAlive[
	      	Origin org;
	      	(AckVct ack_vct);
	      	Up_flags (msg_tp lor xxx_causal)
	      ])
  in
  
  (* Send ImAlive messages, and self deliver. 
     *)
  let send_imalive () = 
    let ((_,seq,_,ack_vct),ack_list) = 
      Dag.insert_my_msg s.dag ls.rank (Null xxx_causal) in
    dnlm (castEv name) (ImAlive(xxx_causal,seq,ack_list)); 
    up_causal ls.rank ack_vct (Null xxx_causal) 

    (* ECast a message sent by the application, and self deliver. 
     *)
  and send_regular ev abv dn_iov = 
    let ((_,seq,_,ack_vct),ack_list) = 
      Dag.insert_my_msg s.dag ls.rank (Full(ev.options,abv,dn_iov))
    in dn ev abv (Data (ev.options,seq,ack_list)); 
    up_causal ls.rank ack_vct (Full(ev.options,abv,dn_iov))

    (* (1) Update the dag 
       (2) deliver all new deliverable messages. 
       (3) Check for missing messages, and send quick Nacks, with
       probability ~ 1/s.num_alive
     *)
  and update_dag org seq data ack_list = 
    let l = Dag.missing s.dag ack_list in
    if l <> [] then 
      if Random.int (s.num_alive) < 1 then (
      	send_nacks l;
	if nack then log (fun () -> "Immidiate response");
	);
    let dlvr = Dag.insert s.dag org seq data ack_list in
    List.iter (fun (r,s, data,ack_vct) -> 
      up_causal r ack_vct data) dlvr  
  in
  
  let up_hdlr ev abv hdr = match getType ev, hdr with

    (* ECast:Data: Got a data message from other process.
     *)
  | ECast, Data(msg_tp, seqno, ack_list) ->
      update_dag (getOrigin ev) seqno (Full(msg_tp,abv,(getIov ev))) ack_list

     (* ECast:Retrans: Got a retransmission.  
      *)
  | (ECast|ESend), Retrans_Data(msg_tp, rank,seqno, ack_list) ->
      update_dag rank seqno (Full (msg_tp,abv,(getIov ev))) ack_list

  | _, NoHdr -> up ev abv 
  | _  -> failwith "bad header"


  and uplm_hdlr ev hdr = match getType ev,hdr with

    (* Nak: got a request for retransmission of member [rnk]. If the
     * requested message is from yourself, answer. If you know that
     * [rnk] failed, or you are blocking, reply with probability
     * 1/num_alive. If none of the above hold, reply with probability
     * 1/3num_alive. This covers the case that [rnk] failed, but the
     * layer does not know this yet.  
       *)
  | (ESend|ECast), Nak(nak_list) ->
      List.iter (function 
      |	(rnk,lo,hi) when rnk=ls.rank -> 
	  if rtr then log (fun () -> "mine");
	  retrans rnk lo hi
      |	(rnk,lo,hi) when (s.failed.(rnk) or s.blocking)
    	&& Random.int (s.num_alive) < 1 -> 
	  if rtr then log (fun () -> "failed|blocking");
	  retrans rnk lo hi 
      |	(rnk,lo,hi) when Random.int (3 * s.num_alive) < 1 -> 
	  if rtr then log (fun () -> "random");
	  retrans rnk lo hi 
      |	_ -> ()
	  ) nak_list;
      free name ev 

    (* Received an ImAlive message
       *)
  | ECast, ImAlive(msg_tp, seqno, ack_list) -> 
      update_dag (getOrigin ev) seqno (Null msg_tp) ack_list

    (* Received a retransmission of an ImAlive message
       *)
  | (ECast|ESend), Retrans_ImAlive(msg_tp, rank, seqno, ack_list) -> 
      update_dag rank seqno (Null msg_tp) ack_list
  
  | _ -> failwith "unknown local message"

  and upnm_hdlr ev = match getType ev with

    (* Compute the number of failed processes.
     *)
  | EFail -> 
      let failed = getFailures ev in
      List.iter (fun rank ->
      	s.failed.(rank) <- true
      ) failed ;
      s.num_alive <- ls.nmembers;
      Array.iter (function v -> if v then 
	s.num_alive <- pred s.num_alive) s.failed;
      upnm ev

    (* Init -> request a periodic timer.
     *)
  | EInit -> (
      dnnm (timerAlarm name Time.zero) ;
      upnm ev
      )

    (* ETimer: every so often:
       (1) Send nacks. 
       (2) Garbage collection.
       (3) Send ImAlive. 
       (4) Check for Failures.
       
       Do not always send your list of missing messages. We assume that
       members will miss the same set of messages, in order to 
       avoid requesting the same retransmission list multiple times, a 
       process requests retransmissions with probability ~ 1/num_alive.
     *)

  | ETimer -> (
      let time = getTime ev in
      if time >= s.next_gossip  then (
      	let old = s.next_gossip in
	s.next_gossip <- Time.add time s.sweep ;
	dnnm (timerAlarm name s.next_gossip) ; (* request next gossip *)

      	if old <> Time.invalid then (
	  Dag.gc s.dag; 
	  if ls.nmembers > 1 then (
	    let l = Dag.compute_holes s.dag in 
	    (match l with
	    | [] -> ()
	    | _ when Random.int (s.num_alive) < 1 -> send_nacks l;
	      	if nack then log (fun () -> "ETimer send")
	    | _ when s.blocking 
	      && Random.int (s.num_alive) < 2 -> send_nacks l;
	      	if nack then log (fun () -> "ETimer send blocking")
	    | _ -> ());
	    send_imalive ())
	    )
	);
      upnm ev
      )
  | EDump -> ( dump (ls,vs) s ; upnm ev ) 
  | _ -> upnm ev

  and dn_hdlr ev abv = match getType ev with
  | ECast when not (getUnreliable ev) -> 
      send_regular ev abv (getIov ev)
  | _ -> dn ev abv NoHdr

  and dnnm_hdlr ev = match getType ev with
  | EAck  -> printf "EAck not supported";
      free name ev
  | EBlock -> s.blocking <- true; 
(*      log (fun () -> "Blocking"); *)
      dnnm ev
  | _ -> dnnm ev

in {up_in=up_hdlr;uplm_in=uplm_hdlr;upnm_in=upnm_hdlr;dn_in=dn_hdlr;dnnm_in=dnnm_hdlr}

let l args vs = Layer.hdr_state init hdlrs None NoOpt args vs

let _ = 
  Param.default "tcausal_sweep" (Param.Time (Time.of_float 1.0)) ;
  Layer.install "tcausal" (Layer.init l)

(**************************************************************)
