(**************************************************************)
(*
 *  Ensemble, (Version 0.40)
 *  Copyright 1997 Cornell University
 *  All rights reserved.
 *
 *  See ensemble/doc/license.txt for further information.
 *)
(**************************************************************)
(**************************************************************)
(* PBCAST.ML : stability detection protocol *)
(* Author: Mark Hayden, 11/96 *)
(**************************************************************)
open Trans
open Layer
open Event
open Util
open View
(**************************************************************)
let name = Trace.source_file "PBCAST"
(**************************************************************)

type round = seqno

type header = NoHdr
| Data of rank * seqno * bool(*retrans?*)
| Gossip of round * (seqno array)
| Request of round * ((rank * seqno) array)

type 'abv message =
| Msg of round * 'abv * Iovecl.t
| Lost
| Unset					(* for iq *)
| Reset					(* for iq *)

type 'abv state = {
  casts		: 'abv message Iq.t array ;
  sweep		: Time.t ;
  stale         : round ;
  max_entropy   : int ;
  msg_overhead  : int ;
  disseminate   : bool ;

  mutable gossip : int array ;

  mutable round : round ;
  mutable next_gossip : Time.t ;
  recd  	: seqno array ;
  free  	: seqno array ;
  high  	: seqno array ;
  failed        : bool array ;
  mutable acct_recd : seqno ;		(* # delivered messages *)
  mutable acct_lost : seqno ;		(* # lost messages *)
  mutable acct_retrans : seqno ;	(* # retrans messages *)
  mutable acct_bad_ret : seqno		(* # retrans messages not used *)
}

(**************************************************************)

let dump (ls,vs) s =
  eprintf "PBCAST:dump:%s\n" ls.name ;
(*
  eprintf "  rank=%d, round=%d, block_ok=%b\n" ls.rank s.round s.block_ok ;
  eprintf "  failed   =%s\n" (string_of_bool_array s.failed) ;
  eprintf "  recd     =%s\n" (string_of_int_array s.recd) ;
  eprintf "  my_row   =%s\n" (string_of_int_array s.ackd) ;
  eprintf "  repr     =%s\n" (string_of_bool_array s.repr) ;
  eprintf "  mins     =%s\n" (string_of_int_array s.mins) ;
  eprintf "  maxs     =%s\n" (string_of_int_array s.maxs) ;
  eprintf "  st_mins  =%s\n" (string_of_int_array s.st_mins) ;
  eprintf "  st_maxs  =%s\n" (string_of_int_array s.st_maxs)
*)()

(**************************************************************)

let init () (ls,vs) = 
  { sweep	= Param.time vs.params "pbcast_sweep" ;
    stale       = Param.int vs.params "pbcast_stale" ;
    disseminate = Param.bool vs.params "pbcast_disseminate" ;
    max_entropy = Param.int vs.params "pbcast_max_entropy" ;
    msg_overhead = 100 ;
    next_gossip = Time.invalid ;
    gossip      = gossip (array_create name ls.nmembers false) ls.rank ;
    casts       = array_createf ls.nmembers (fun _ -> Iq.empty Unset Reset) ;
    round       = 0 ;
    recd        = array_create name ls.nmembers 0 ;
    free        = array_create name ls.nmembers 0 ;
    high        = array_create name ls.nmembers 0 ;
    failed      = array_create name ls.nmembers false ;
    acct_recd   = 0 ;
    acct_lost   = 0 ;
    acct_retrans = 0 ;
    acct_bad_ret = 0
  }

(**************************************************************)

let hdlrs s (ls,vs) {up_out=up;upnm_out=upnm;dn_out=dn;dnlm_out=dnlm;dnnm_out=dnnm} =
  let failwith m = dump (ls,vs) s ; failwith (name^":"^m) in
  let ack = make_acker name dnnm in
  let log = Trace.log name ls.name in	(* general *)
  let logr = Trace.log (name^"R") ls.name in (* reliability, messages retransmission *)
  let logg = Trace.log (name^"G") ls.name in (* gossipping status *)
  let loga = Trace.log (name^"A") ls.name in (* accounting summaries *)
  let logb = Trace.log "PBCASTB"(*"BUFFER"*) ls.name (*(name^":"^ls.name)*) in

  let check_deliver rank =
    if rank = ls.rank then failwith "sanity" ;
    Iq.read_prefix s.casts.(rank) (fun msg seqno ->
      log (fun () -> sprintf "deliver:(%d,%d)" rank seqno) ;
      array_incr s.recd rank ;
      match msg with
      | Msg(_,abv,iov) ->
	  s.acct_recd <- succ s.acct_recd ;
          up (create name ECast[
      	    Origin rank ;
	    Iov iov
      	  ]) abv
      |	Lost ->
	  s.acct_lost <- succ s.acct_lost ;
(*
	  log (fun () -> sprintf "deliver(ETimer):(%d,%d)" rank seqno) ;
	  log (fun () -> sprintf "lost:(%d,%d)" rank seqno) ;
*)
	  upnm (create name ELostMessage[Origin rank])
      | _ -> failwith "sanity[1]"
    )
  in

  let up_hdlr ev abv hdr = match getType ev,hdr with
  | (ECast|ESend), Data(rank,seqno,retrans) ->
      if rank = ls.rank then 
	failwith "sanity:retransmit of my message" ;
      s.high.(rank) <- max s.high.(rank) seqno ;

      log (fun () -> sprintf "got:(%d,%d)" rank seqno) ;

      if seqno >= s.free.(rank)
      && Iq.assign s.casts.(rank) seqno (Msg(s.round,abv,(getIov ev))) 
      then (
	if retrans then
	  s.acct_retrans <- succ s.acct_retrans ;
	check_deliver rank ;
      ) else (
	s.acct_bad_ret <- succ s.acct_bad_ret ;
	logr (fun () -> sprintf "dropping redundant cast (%d,%d)" rank seqno)
      )

  | _, NoHdr -> up ev abv
  | _, _     -> failwith "bad up event"

  and uplm_hdlr ev hdr = match getType ev,hdr with
  | (ECast|ESend), Gossip(round,avail) ->
      if (getOrigin ev) = ls.rank then failwith "gossip from myself" ;
      let requests = ref [] in
      let nrequests = ref 0 in
      for rank = 0 to pred ls.nmembers do
	if rank <> ls.rank 
	&& avail.(rank) > s.recd.(rank) 
	&& !nrequests < 100 then (
	  s.high.(rank) <- max s.high.(rank) avail.(rank) ;
	  for seqno = s.high.(rank) downto s.recd.(rank) do
	    if !nrequests < 100 then (
	      match Iq.get s.casts.(rank) seqno with
	      | Msg(_,_,_) -> ()
	      |	_ ->
		  requests := (rank,seqno) :: !requests ;
		  incr nrequests
	    )
	  done ;
	) ;
      done ;

      if !nrequests > 0 then (
      	logr (fun () -> sprintf "requesting=%s"
	  (string_of_list (fun (a,b) -> sprintf "(%d,%d)" a b) !requests)) ;
	let requests = Array.of_list !requests in
      	dnlm (sendRank name (getOrigin ev)) (Request(round, requests)) ;
      ) ;
      
      ack ev ; free name ev

  | (ECast|ESend), Request(round,requests) ->
      if (getOrigin ev) = ls.rank then failwith "request from myself" ;
      let origin = (getOrigin ev) in
      if round <> s.round then (
	logg (fun () -> sprintf "rank=%d round=%d origin=%d old-gossip=%d" ls.rank s.round (getOrigin ev) round) ;
      ) else (
	let sent = ref 0 in
	let nrequests = Array.length requests in
	let serviced = ref 0 in
	let rec loop i = 
	  if !sent < s.max_entropy && i < nrequests then (
	    let (rank,seqno) = requests.(i) in
	    begin match Iq.get s.casts.(rank) seqno with
	    | Msg(_,abv,iov) ->
		incr serviced ;
	      	dn (sendRanksIov name [origin] iov) abv (Data(rank,seqno,true)) ;
	      	sent := !sent + succ (Iovecl.len name iov) + s.msg_overhead
	    | _ -> ()
	    end ;
	    loop (succ i)
	  )
	in loop 0 ;
	logr (fun () -> sprintf "Request():(rank=%d) serviced %d/%d" ls.rank !serviced nrequests) ;

(*	
	logg (fun () -> sprintf "recd gossip:rmt:%s from %d" (string_of_int_array recd) origin) ;
	logg (fun () -> sprintf "recd gossip:lcl:%s (I am %d)" (string_of_int_array s.recd) ls.rank) ;
	    
	for rank = 0 to pred ls.nmembers do
	  if rank <> origin && recd.(rank) < s.recd.(rank) then (
	    let recd = max recd.(rank) s.free.(rank) in
	    for i = recd to s.recd.(rank) do
	      if !sent < s.max_entropy then (
		match Iq.get s.casts.(rank) i with
		| Msg(_,abv,iov) ->
		    logr (fun () -> sprintf "transmitting:(%d,%d)->%d" rank i origin) ;
		    dn (sendRanksIov name [origin] iov) abv (Data(rank,i)) ;
		    sent := !sent + succ (Iovecl.len name iov) + s.msg_overhead
		|	_ -> ()
	      )
	    done ;
	  )
	done
*)
      ) ;
      ack ev ; free name ev

  | _ -> failwith "unknown local message"

  and upnm_hdlr ev = match getType ev with

    (* EInit: request a first timer alarm to bootstrap things.
     *)
  | EInit ->
      dnnm (timerAlarm name Time.zero) ;
      upnm ev

    (* EFail: mark the failed members and check if any
     * messages are now stable.
     *)
  | EFail ->
      let failed = getFailures ev in
      List.iter (fun rank ->
      	s.failed.(rank) <- true ;
	s.gossip <- gossip s.failed ls.rank
      ) failed ;
      upnm ev

    (* When timer expires:
     * 1. Increment rounds
     * 2. Gossip.
     * 3. Clean out stale messages.
     *)
  | ETimer ->
      let time = getTime ev in

      (*if !verbose then dump (ls,vs) s ;*)
      if time >= s.next_gossip then (
	s.next_gossip <- Time.add time s.sweep ;
	dnnm (timerAlarm name s.next_gossip) ; (* request next gossip *)
	s.round <- succ s.round ;

	for rank = 0 to pred ls.nmembers do
	  let casts = s.casts.(rank) in

	  (* Figure out the new free pointer for this
           * member.
	   *)
	  let rec loop seqno free =
	    if seqno > s.high.(rank) then (
(*
	      log (fun () -> "grop") ;
*)
	      free 
	    ) else (
	      match Iq.get casts seqno with
	      |	Msg(round,_,_) ->
(*
		  log (fun () -> "pogog:grip") ;
*)
		  if round + s.stale >= s.round then (
(*
		    log (fun () -> "gorp") ;
*)
		    free
		  ) else (
(*
		    log (fun () -> "pogog") ;
*)
		    loop (succ seqno) (succ seqno)
		  )
	      |	Reset -> failwith "got Reset"
	      |	_ ->
		  loop (succ seqno) free
	    )
	  in
	  let free = loop s.free.(rank) s.free.(rank) in

(*
	  log (fun () -> sprintf "free=%d high=%d" free s.high.(rank)) ;
	  let iq = Iq.list_of_iq casts in
	  let iq = List.map fst iq in
	  log (fun () -> sprintf "iq=%s" (string_of_int_list iq)) ;
	  
	  log (fun () -> sprintf "free=%d high=%d" free s.high.(rank)) ;
*)
	  (* Deliver any messages not seen yet.  Others are
	   * considered lost.
	   *)
	  for seqno = s.recd.(rank) to (pred free) do
	    match Iq.get casts seqno with
	    | Msg(_,_,_) -> ()
	    | Unset ->
		Iq.assign casts seqno Lost ; ()
	    | _ -> failwith "sanity[2]"
          done ;

	  if rank <> ls.rank then
	    check_deliver rank ;
	  if s.recd.(rank) < free then
	    failwith "sanity[3]" ;

	  (* Set the new values of the free.  High
	   * should not have changed.
	   *)
	  s.free.(rank) <- free ;
	  Iq.advance_head casts free ;
	  log (fun () -> sprintf "head advanced:(%d,%d)" rank free) ;
	done ;
(*
	log (fun () -> sprintf "round=%d, free=%s, recd=%s, high=%s" 
	  s.round (string_of_int_array s.free) (string_of_int_array s.recd) (string_of_int_array s.high)) ;
*)
	(* Send out gossip for this round.
	 *)
	let dests = choose s.gossip 1 in
	logg (fun () -> sprintf "rank=%d round=%d gossipping to %s" 
	  ls.rank s.round (string_of_int_list dests)) ;
	dnlm (create name ESend[
	  Ranks dests ;
	  Unreliable
	]) (Gossip(s.round,Array.copy(s.recd))) ;
      ) ;
      upnm ev

  | EAccount ->
(*
      loga (fun () -> sprintf "msgs=%s"
        (string_of_int_array (Array.map (fun c -> Iq.tail c - Iq.head c) s.casts))) ;
      loga (fun () -> sprintf "recd=%s" (string_of_int_array s.recd)) ;
      loga (fun () -> sprintf "free=%s" (string_of_int_array s.free)) ;
*)
      loga (fun () -> sprintf "(rank=%d) recd=%d lost=%d retrans=%d redundant=%d" ls.rank s.acct_recd s.acct_lost s.acct_retrans s.acct_bad_ret) ;
      (*
	let iq = Iq.list_of_iq s.casts.(0) in
	let iq = List.map fst iq in
	logb (fun () -> sprintf "iq=%s" (string_of_int_list iq)) ;
	logb (fun () -> sprintf "head=%d read=%d tail=%d" 
	  (Iq.head s.casts.(0))
	  (Iq.read s.casts.(0))
	  (Iq.tail s.casts.(0))) ;
      *)
      upnm ev

  | EDump -> dump (ls,vs) s ; upnm ev
  | _ -> upnm ev

  and dn_hdlr ev abv = match getType ev with
    (* ECast: if unreliable option is not set then
     * increment my recd counter.  Then pass the event down.
     *)
  | ECast ->
      if getUnreliable ev then (
        dn ev abv NoHdr
      ) else (	
	s.acct_recd <- succ s.acct_recd ;
 	Iq.assign s.casts.(ls.rank) s.recd.(ls.rank) (Msg(s.round,abv,(getIov ev))) ;
	if s.disseminate then 
          dn ev abv (Data(ls.rank,s.recd.(ls.rank),false))
	else 
	  free name ev ;
        array_incr s.recd ls.rank ;
	s.high.(ls.rank) <- max s.high.(ls.rank) s.recd.(ls.rank) ;
      )

  | _ -> dn ev abv NoHdr

  and dnnm_hdlr = dnnm

in {up_in=up_hdlr;uplm_in=uplm_hdlr;upnm_in=upnm_hdlr;dn_in=dn_hdlr;dnnm_in=dnnm_hdlr}

let l args vf = Layer.hdr init hdlrs None NoOpt args vf

let _ = 
  Param.default "pbcast_sweep" (Param.Time (Time.of_float 0.100)) ;
  Param.default "pbcast_stale" (Param.Int 20) ;
  Param.default "pbcast_disseminate" (Param.Bool true) ;
  Param.default "pbcast_max_entropy" (Param.Int 30000) ;
  Layer.install name (Layer.init l)

(**************************************************************)
