(**************************************************************)
(*
 *  Ensemble, (Version 0.40)
 *  Copyright 1997 Cornell University
 *  All rights reserved.
 *
 *  See ensemble/doc/license.txt for further information.
 *)
(**************************************************************)
(**************************************************************)
(* PR_STABLE.ML : stability detection protocol *)
(* Author: Mark Hayden, 11/96 *)

(* Note: Sequence numbers begin at 0 in each epoch.
 * Acknowlegdements give the number of messages acknowledged
 * so far in the epoch.  Thus the first messge is
 * acknowledged with '1'.  Similarly, the stability numbers
 * correspond to the number of messages that are stable in
 * the current epoch. *)

(* Note: Protocol can quiet itself by not starting cascades
 * at beginning or end of a round.  *)

(**************************************************************)
open Trans
open Layer
open View
open Event
open Util
open Arrayop
(**************************************************************)
let name = Trace.source_file "PR_STABLE"
(**************************************************************)

type gossip = {
    g_round : seqno ;
    g_failed : bool array ;
    g_repr : bool array ;
    g_st_mins : seqno array ;
    g_st_maxs : seqno array ;
    g_mins : seqno array ;
    g_maxs : seqno array
  } 

type header = NoHdr
| Unrel
| Gossip of gossip

type state = {
  sweep		: Time.t ;
  explicitack	: bool ;
  fanout        : fanout ;
  mutable next_gossip : Time.t ;
  mutable gossiped : bool ;
  mutable block_ok : bool ;
  mutable gossip : int array ;

  recd  	: seqno array ;
  ackd  	: seqno array ;

  mutable round : seqno ;
  failed        : bool array ;
  repr          : bool array ;
  maxs          : seqno array ;
  mins          : seqno array ;
  st_mins       : seqno array ;
  st_maxs       : seqno array ;

  (* Fields used to keep track of tree-based gossiping.
   *)
  optimize      : bool ;
  parent        : rank option ;
  mutable parented : bool ;
  children      : rank list ;
  descend       : rank list
}

(**************************************************************)

let dump (ls,vs) s =
  eprintf "PR_STABLE:dump:%s\n" ls.name ;
  eprintf "  rank=%d, round=%d, block_ok=%b\n" ls.rank s.round s.block_ok ;
  eprintf "  failed   =%s\n" (string_of_bool_array s.failed) ;
  eprintf "  recd     =%s\n" (string_of_int_array s.recd) ;
  eprintf "  my_row   =%s\n" (string_of_int_array s.ackd) ;
  eprintf "  repr     =%s\n" (string_of_bool_array s.repr) ;
  eprintf "  mins     =%s\n" (string_of_int_array s.mins) ;
  eprintf "  maxs     =%s\n" (string_of_int_array s.maxs) ;
  eprintf "  st_mins  =%s\n" (string_of_int_array s.st_mins) ;
  eprintf "  st_maxs  =%s\n" (string_of_int_array s.st_maxs)

(**************************************************************)

let child1 r = pred ((succ r) * 2)
let child2 r = succ (child1 r)

let init () (ls,vs) = 
  let failed = array_create name ls.nmembers false in
  let parent = if ls.rank = 0 then None else Some (pred ((succ ls.rank) / 2)) in
  let children =
    (if child1 ls.rank < ls.nmembers then [child1 ls.rank] else []) @
    (if child2 ls.rank < ls.nmembers then [child2 ls.rank] else [])
  in
  let descend = 
    let rec loop rank =
      if rank >= ls.nmembers then []
      else rank :: ((loop (child1 rank)) @ loop (child2 rank))
    in loop ls.rank
  in
  let array v = array_create name ls.nmembers v in
  let s = {
    sweep	= Param.time vs.params "pr_stable_sweep" ;
    explicitack	= Param.bool vs.params "pr_stable_explicit_ack" ;
    fanout      = Param.int vs.params "pr_stable_fanout" ;

    next_gossip = Time.invalid ;
    gossiped    = true ;
    block_ok    = false ;
    
    recd	= array 0 ;
    ackd	= array 0 ;

    round       = 0 ;
    failed      = failed ;
    gossip      = gossip failed ls.rank ;
    
    repr        = array false ;
    st_mins  	= array 0 ;
    st_maxs  	= array 0 ;
    mins	= array 0 ;
    maxs  	= array 0 ;
    
    optimize    = false (*true*) ;
    parent      = parent ;
    parented    = true ;
    children    = children ;
    descend     = descend
  } in
  s.repr.(ls.rank) <- true ;
  s

(**************************************************************)

let hdlrs s (ls,vs) {up_out=up;upnm_out=upnm;dn_out=dn;dnlm_out=dnlm;dnnm_out=dnnm} =
  let failwith m = dump (ls,vs) s ; failwith (name^":"^m) in

  let do_gossip down period =
    if ls.nmembers > 1 then (
      let dests = 
	if period
	&& s.gossiped 
	then [] else (
	  s.gossiped <- true ;
	  choose s.gossip s.fanout
        )
      in
      
      let dests = 
	if s.optimize then (
	  let children = 
	    if down then s.children else []
	  in
	  let parent =
	    if s.parent <> None
	    && (not s.parented)
	    && List.for_all (fun rank -> s.repr.(rank)) s.descend 
	    then (
	      s.parented <- true ;
	      [some_of s.parent]
	    ) else []
	  in
	  dests @ parent @ children
        ) else dests
      in

      if dests <> [] then (
      	let hdr = Gossip {
	  g_round = s.round ;
	  g_failed = Array.copy s.failed ;
	  g_repr = Array.copy s.repr ;
	  g_mins = Array.copy s.mins ;
	  g_maxs = Array.copy s.maxs ;
	  g_st_mins = Array.copy s.st_mins ;
	  g_st_maxs = Array.copy s.st_maxs
      	} in

      	dnlm (create name ESend[
	  Ranks dests ;
	  Unreliable
        ]) hdr
      )
    )
  in

  let ready repr fail =
    array_for_all2 (||) s.repr s.failed
  in
(*
  let ready repr fail =
    let missing = ref 0 in
    for i = 0 to pred ls.nmembers do
      if not (s.repr.(i) || s.failed.(i)) then
	incr missing 
    done ;

    !missing <= 1
  in
*)
  
  let check_stable () =
    (* Max in my recd.  Always safe to do (and necessary).
     *)
    maxify s.maxs s.recd ;

    (* Check if we have full representation for the group.
     *)
    if ready s.repr s.failed then (
      s.round <- succ s.round ;
      fillify s.repr false ;
      s.repr.(ls.rank) <- true ;

      setify s.st_mins s.mins ;
      setify s.st_maxs s.maxs ;
      
      setify s.mins s.ackd ; 
      setify s.maxs s.recd ;

      (* Send down EStable event.
       *)
      dnnm (create name EStable[
	(Stability s.st_mins) ;
	(NumCasts s.st_maxs)
      ]) ;

      do_gossip true true
    ) ;

    if s.block_ok && s.st_mins = s.st_maxs then (
      upnm (create name EBlockOk[]) ;
      s.block_ok <- false ;
    ) ;
  in

  let up_hdlr ev abv hdr = match getType ev,hdr with
  | ECast, NoHdr ->
      let origin = (getOrigin ev) in
      array_incr s.recd origin ;
      if s.explicitack then (
      	up (set name ev [Ack (RankSeqno(origin,s.recd.(origin)))]) abv
      ) else (
      	s.ackd.(origin) <- s.recd.(origin) ;
      	up ev abv
      )      
   
  | ECast, Unrel ->
      up ev abv

  | _, NoHdr -> up ev abv
  | _, _     -> failwith "bad up event"


  and uplm_hdlr ev hdr = match getType ev,hdr with

    (* Gossip Message: if from a live member, copy into
     * the origins row in my acknowledgement matrix.
     *)
  | (ECast|ESend), Gossip(g) ->
      let origin = (getOrigin ev) in

      if (not g.g_failed.(ls.rank))	(* he doesn't think I'm failed *)
      && (not s.failed.(origin))	(* I don't think he's failed *)
      then (
      	maxify s.maxs    g.g_maxs ;
    	maxify s.st_maxs g.g_maxs ;

	if s.failed = g.g_failed then (
	  (* If he is from an advanced round, then advance my info.
	   *)
	  if s.round < g.g_round then (
	    s.round <- g.g_round ;
	    fillify s.repr false ;
	    s.repr.(ls.rank) <- true ;
	    
	    maxify s.st_mins g.g_st_mins ;
	    maxify s.st_maxs g.g_st_maxs ;
	    
	    setify s.mins s.ackd ; 
	    setify s.maxs s.recd ;	(* No maxify here! *)
	    
	    do_gossip true true ;
	    
	    (* OPT: and deliver EStable event.
	     *)
	  ) ;

	  (* Merge mins, maxs, & repr.
	   *)
	  if s.round = g.g_round then (	(* he is in my round or later *)
	    mergify s.repr g.g_repr ;
      	    maxify s.maxs g.g_maxs ;
	    minify s.mins g.g_mins ;
	  ) ;
	) ;

	do_gossip false true
      ) ;
      free name ev

  | _ -> failwith "unknown local message"

  and upnm_hdlr ev = match getType ev with

    (* EInit: request a first timer alarm to bootstrap things.
     *)
  | EInit ->
      dnnm (timerAlarm name Time.zero) ;
      upnm ev

    (* EFail: mark the failed members and check if any
     * messages are now stable.
     *)
  | EFail ->
      let failed = getFailures ev in
      List.iter (fun rank ->
      	s.failed.(rank) <- true
      ) failed ;
      s.gossip <- gossip s.failed ls.rank ;

      check_stable () ;
      upnm ev

  | EBlockOk ->
      if s.block_ok then
	failwith "2nd EBlockOk" ;
      if ls.nmembers = 1 then (
	upnm ev ;
      ) else (
	s.block_ok <- true ;
	free name ev
      )

    (* ETimer: every so often:
     *   1. recalculate stability and deliver EStable event
     *   2. gossip about my row in the stability matrix
     *)
  | ETimer ->
      let time = getTime ev in
      (*if !verbose then dump (ls,vs) s ;*)
      if time >= s.next_gossip then (
	s.next_gossip <- Time.add time s.sweep ;
	dnnm (timerAlarm name s.next_gossip) ; (* request next gossip *)

	(* Gossip, if haven't done so already.
	 *)
	do_gossip true true ;
	s.gossiped <- false ;
	s.parented <- false ;

	check_stable () ;

	(* Pass down stability info for Mnak layer.
	 *)
	(* BUG: perf *)
      	dnnm (create name EStable[
	  (Stability s.st_mins) ;
	  (NumCasts s.st_maxs)
        ]) ;	

	do_gossip false false ;
(*
	dump (ls,vs) s 
*)
      ) ;
      upnm ev

    (* Lost messages also increment stability info.
     *)
  | ELostMessage ->
      let origin = (getOrigin ev) in
      array_incr s.recd origin ;
      s.ackd.(origin) <- s.recd.(origin) ;
      upnm ev

  | EDump -> dump (ls,vs) s ; upnm ev
  | _ -> upnm ev

  and dn_hdlr ev abv = match getType ev with

    (* ECast: if Unreliable option is not set then
     * increment my recd counter.  Then pass the event down.  
     *)
  | ECast ->
      if getUnreliable ev then (
        dn ev abv Unrel
      ) else (
        array_incr s.recd ls.rank ;
        array_incr s.ackd ls.rank ;
        dn ev abv NoHdr
      )

  | _ -> dn ev abv NoHdr

  and dnnm_hdlr ev = match getType ev with

    (* EAck: update my entry in my row of the acknowledgement
     * matrix.
     *)
  | EAck ->
      begin
	match (getAck ev) with
	| RankSeqno(rank,seqno) -> (
	    if seqno > s.ackd.(rank) then (
	      s.ackd.(rank) <- seqno ;
	    )
	  )
	| _ -> failwith "got unknown type of ack"
      end ;
      free name ev

  | _ -> dnnm ev

in {up_in=up_hdlr;uplm_in=uplm_hdlr;upnm_in=upnm_hdlr;dn_in=dn_hdlr;dnnm_in=dnnm_hdlr}

let l args vf = Layer.hdr init hdlrs None NoOpt args vf

let _ = 
  Param.default "pr_stable_sweep" (Param.Time (Time.of_float 0.5)) ;
  Param.default "pr_stable_explicit_ack" (Param.Bool false) ;
  Param.default "pr_stable_fanout" (Param.Int 3) ;
  Layer.install name (Layer.init l)

(**************************************************************)
