(**************************************************************)
(*
 *  Ensemble, (Version 0.40)
 *  Copyright 1997 Cornell University
 *  All rights reserved.
 *
 *  See ensemble/doc/license.txt for further information.
 *)
(**************************************************************)
(**************************************************************)
(* MNAK.ML : FIFO, reliable multicast protocol *)
(* Author: Mark Hayden, 12/95 *)
(* Based on code by: Robbert vanRenesse *)
(**************************************************************)
open View
open Event
open Util
open Layer
open Trans
open Compresse
(**************************************************************)
let name = Trace.source_file "MNAK"
let name_DnCast = addinfo "DnCast" name
let name_UpCast = addinfo "UpCast" name
let name_nak = addinfo "nak" name
let name_rc = addinfo "rc" name
let name_stable = addinfo "stable" name
let name_exit = addinfo "exit" name
(**************************************************************)
(* Data(seqno): sent with message number 'seqno'

 * Retrans(seqno): sent with the retransmission of member
 * 'rank's messsage number 'seqno.'

 * Nak(rank,lo,hi): request for retransmission of messages
 * number 'lo' to 'hi'. 
 * BUG: comment should say, inclusive/exclusive.

 * Lost(seqno): lost message: it was already stable.
 *)
type header = NoHdr
  | Data    of seqno
  | Retrans of rank * seqno
  | Nak     of rank * seqno * seqno
  | Lost    of rank * seqno

(**************************************************************)
(* These are for optimizations in the Layer module.
 *)
let detector = function 
  | Data seqno -> Some seqno
  | _ -> None

let constructor seqno = Data seqno

(**************************************************************)

type ('abv,'cps) state = {
  allow_lost    : bool ;
  mutable coord	: rank ;
  buf		: ('abv,'cps) Compresse.t Iq.t array ;
  failed	: bool array ;
  naked		: int array ;

  mutable acct_size  : int ;		(* # bytes buffered *)
  dbg_n		: int array
}

(**************************************************************)

let dump (ls,vs) s =
  eprintf "MNAK:dump:%s\n" ls.name ;
  eprintf "  rank=%d, nmembers=%d\n" ls.rank ls.nmembers ;
  eprintf "  failed =%s\n" (string_of_bool_array s.failed) ;
  eprintf "  cast_lo=%s\n" (string_of_int_array (Array.map Iq.head s.buf)) ;
  eprintf "  cast_hi=%s\n" (string_of_int_array (Array.map Iq.tail s.buf)) ;
  eprintf "  read   =%s\n" (string_of_int_array (Array.map Iq.read s.buf))
(*
  ; eprintf "  dbg_n  =%s\n" (string_of_int_array s.dbg_n)
  ; for i = 0 to pred ls.nmembers do
    eprintf "  buf(%d)=%s\n" i (string_of_int_list (List.map fst (Iq.list_of_iq s.buf.(i))))
  done
*)

(**************************************************************)

let init ack_rate (ls,vs) = {
  allow_lost = Param.bool vs.params "mnak_allow_lost" ;
  coord	     = 0 ;
  buf        = array_createf ls.nmembers (fun _ -> Iq.empty Unset Reset) ;
  failed     = array_create name ls.nmembers false ;
  naked	     = array_create name ls.nmembers 0 ;
  acct_size  = 0 ;
  dbg_n	     = array_create name ls.nmembers 0
}

(**************************************************************)

let hdlrs s (ls,vs) {up_out=up;upnm_out=upnm;dn_out=dn;dnlm_out=dnlm;dnnm_out=dnnm} =
  let log = Trace.log name ls.name in
  let logn = Trace.log (name^"N") ls.name in (* Naks *)
  let logb = Trace.log "BUFFER" (name^":"^ls.name) in
  let logl = Trace.log (name^"L") ls.name in (* for Lost message tracking *)
  let failwith m = dump (ls,vs) s ; failwith (name^":"^m) in

  (* Arguments to two functions below:
   * - rank: rank of member who's cast I've just gotten (or heard about)
   * - seqno: seqno of message from member 'rank'
   *)

  (*
   * CHECK_NAK: checks if missing a message, and sends appropriate
   * Nak. If there is a hole then send a Nak to (in this order):
   * 1. to original owner of message if he is not failed.
   * 2. to coord if I'm not the coord
   * 3. to entire group
   *)
  let check_nak rank is_stable =
    let buf = s.buf.(rank) in
    if_some (Iq.read_hole buf) (fun (lo,hi) ->
      if is_stable || (hi > s.naked.(rank)) then (
(*
	if seqno >= lo then (
*)
	  (* Keep track of highest msg # we've naked.
	   *)
	  s.naked.(rank) <- max s.naked.(rank) hi ;
	  logn (fun () -> sprintf "naking:%d..%d" lo hi) ;

	  if not s.failed.(rank) then (
	    dnlm (sendRank name rank) (Nak(rank,lo,hi))
	  ) else if s.coord <> ls.rank then (
	    dnlm (sendRank name s.coord) (Nak(rank,lo,hi))
	  ) else (
	    (* Don't forget to set the Unreliable option
	     * for the STABLE layer (see note in stable.ml).
	     *)
	    dnlm (create name ECast[Unreliable]) (Nak(rank,lo,hi))
	  )
(*
	)
*)
      )
    )
  in

  (* READ_PREFIX: called to read message from beginning of buffer.
   *)
  let read_prefix rank =
    Iq.read_prefix s.buf.(rank) (fun cps seqno ->
      if cps = LostC then (
(*     	logl (fun () -> sprintf "read_prefix:Lost(%d,%d)" rank seqno) ;*)
	upnm (create name ELostMessage[
	  Origin rank
	]) ;
      ) else (
      	log (fun () -> sprintf "read_prefix:Data(%d,%d)" rank seqno) ;
(*
     	if seqno <> s.dbg_n.(rank) then failwith "sanity[4a]" ;
	s.dbg_n.(rank) <- succ s.dbg_n.(rank) ;
*)
      	let (abv,iov) = uncompress cps in
	Iovecl.ref name iov ;
        up (castOriginIov name rank iov) abv
      )
    ) ;

(*
    if Iq.read s.buf.(rank) >= s.naked.(rank) then*) (
      check_nak rank false
    )
  in

  (*
   * RECV_CAST: called when a cast message is recieved,
   * either as a data message or a retranmission.
   * This function is on the "slow path," the fast
   * path is in the up_hdlr for ECast.
   *)
  let recv_cast rank seqno abv iov =
    (* Add to buffer and check if events are now in order.
     *)
    if Iq.assign s.buf.(rank) seqno (Full(abv,iov)) then (
      Iovecl.ref name_rc iov ;
      log (fun () -> sprintf "recv_cast:slow_recv:assign=%d" seqno) ;
      s.acct_size <- s.acct_size + Iovecl.len name iov ;
      read_prefix rank ;
    ) else (
      log (fun () -> sprintf "dropping redundant msg:seqno=%d" seqno)
    )
  in

  let up_hdlr ev abv hdr = match getType ev, hdr with

    (* ECast:Data: Got a data message from other
     * member.  Check for fast path or call recv_cast.
     *)
  | ECast, Data(seqno) ->
      (* Check for fast-path.
       *)
      let origin = getOrigin ev in
      let iov = getIov ev in

      if Iq.opt_insert_check_doread 
	s.buf.(origin) seqno (Full(abv,iov)) 
      then (
	Iovecl.ref name_UpCast iov ;
	s.acct_size <- s.acct_size + getIovLen ev ;
(*
      	if seqno <> s.dbg_n.(o) then failwith "sanity[4a]" ;
	s.dbg_n.(o) <- succ s.dbg_n.(o) ;
*)   	
	up ev abv
      ) else (
      	recv_cast origin seqno abv (getIov ev) ;
        free name ev
      )

    (* ECast:Retrans: Got a retransmission.  Always
     * use the slow path.
     *)
  | (ECast|ESend), Retrans(rank,seqno) ->
      if rank <> ls.rank then (
(*
      	eprintf "MNAK:Cast:Retrans:seqno=%d\n" seqno ;
	let buf = s.buf.(rank) in
	let l = Iq.list_of_iq buf in
	let l = List.map fst l in
	eprintf "  head=%d,read=%d,tail=%d,%s\n" 
	  (Iq.head buf) (Iq.read buf)
	  (Iq.tail buf) (string_of_int_list l)
*)
        recv_cast rank seqno abv (getIov ev) ;
      ) ;
      free name ev

  | _, NoHdr -> up ev abv
  | _        -> failwith "bad header"

  and uplm_hdlr ev hdr = match getType ev,hdr with

    (* Nak: got a request for retransmission.  Send any
     * messages I have in the requested interval, lo..hi.
     *)
  | (ESend|ECast), Nak(rank,lo,hi) ->
      (* Retransmit any of the messages asked for that I have.
       *)
      (* TODO: check if request is for message from failed member
       * and I'm coordinator and I don't have what is being asked for
       *)
      let origin = getOrigin ev in
      logn (fun () -> sprintf "Nak:(%d,%d..%d)" rank lo hi) ;
      let buf = s.buf.(rank) in
      let lost = ref None in
      for seqno = lo to hi do
	match Iq.get buf seqno with
	| Reset ->
	    lost := Some seqno
	| LostC | Unset ->
	    (* Do nothing...
	     *)
	    logl (fun () -> sprintf "Nak for message I think is Lost or Unset") ;
	| cps ->
      	    let (abv,iov) = uncompress cps in
	    Iovecl.ref name_nak iov ;
            dn (create name ESend[
      	      Ranks [origin] ;
	      Iov iov ;
	      Unreliable	
	    ]) abv (Retrans(rank,seqno))
      done ;
  
      if_some !lost (fun seqno ->
	if s.allow_lost then (
	  logl (fun () -> sprintf "Nak:Lost(%d,%d)" rank seqno) ;
	  dnlm (create name ESend[
	     Ranks [origin] ;
	     Unreliable
	  ]) (Lost(rank,seqno)) ;
	) else (
	  logl (fun () -> sprintf "Nak:ignoring apparent lost message") ;
        )
      ) ;
	  
      free name ev

  | (ESend|ECast), Lost(rank,seqno) ->
      let buf = s.buf.(rank) in
      let it = Iq.get buf seqno in
      if it = Unset then (
	(* Mark all previous messages as Lost.
	 *)
      	logl (fun () -> sprintf "got Lost notice") ;
	for i = Iq.read buf to seqno do
(*     	  logl (fun () -> sprintf "message marked Lost") ;*)
	  if Iq.get buf seqno = Unset then
	    (Iq.assign buf i LostC ; ())
	done ;
	
	(* Read them all...
	 *)
	read_prefix rank ;
      ) ;
      free name ev

  | _ -> failwith "unknown local message"

  and upnm_hdlr ev = match getType ev with
    (* EFail: Mark failed members, find out who is the new
     * coordinator, and pass on up.  
     *)
  | EFail ->
      List.iter (fun rank ->
      	s.failed.(rank) <- true
      ) (getFailures ev) ;

      (* Find out who's the new coordinator.
       *)
      for i = pred ls.nmembers downto 0 do
        if not s.failed.(i) then s.coord <- i
      done ;

      upnm ev

    (* Stability protocol is requesting information about my
     * buffers.  I pass up the seqno of messages that I've
     * read so far.  Note that I could tell about the
     * messages I've received but not delivered yet (because
     * they are out of order), but I suspect that would
     * introduce liveness problems.  
     *)
  | EStableReq ->
      let casts = Array.map Iq.read s.buf in
      upnm (set name ev [NumCasts casts])

    (* EStable: got stability and num casts information.
     * 1. garbage collect now-stable messages.
     * 2. check for any messages I'm missing.
     *)
  | EStable ->
      (* GC any messages that are now stable.
       *)
      let my_next = Iq.tail s.buf.(ls.rank) in
      let mins = getStability ev in

      for rank = 0 to pred ls.nmembers do
	let buf = s.buf.(rank) in
	let min = mins.(rank) in
	if rank <>| ls.rank then (
	  for seqno = Iq.read buf to pred min do
	    if Iq.get buf seqno == Unset then
	      (Iq.assign buf seqno LostC ; ()) ;
	  done ;
	  read_prefix rank ;
	) ;
        Iq.advance_head_gc buf min (fun c -> 
	  let iov = snd (uncompress c) in
	  Iovecl.free name_stable iov ;
	  s.acct_size <- s.acct_size - size c ;
	)
      done ;

      if my_next <>| Iq.tail s.buf.(ls.rank) then (
	eprintf "MNAK:about to fail:my_next=%d\n" my_next ;
        failwith "sanity[1]"
      ) ;

      (* Check if there are some messages that we haven't
       * gotten yet.  
       *)
      (* TODO: when max=stable then we must zap the iq (you remember why...) *)
      let maxs = getNumCasts ev in
      for rank = 0 to pred ls.nmembers do
      	if rank <>| ls.rank then (
	  Iq.advance_tail s.buf.(rank) maxs.(rank) ; (*BUG?*)
	  check_nak rank true
	)
      done ;

      upnm ev

  | EExit ->
      (* GC all buffers.
       *)
      Array.iter (fun buf -> 
	Iq.free buf (fun c ->
	  Iovecl.free name_exit (snd (uncompress c))
	)
      ) s.buf ;
      upnm ev

  | EAccount ->
      (* Dump information about status of buffers.
       *)
      logb (fun () -> sprintf "total bytes=%d" s.acct_size) ;
      logb (fun () -> sprintf "msgs=%s"
        (string_of_int_array (Array.map (fun c -> Iq.tail c - Iq.head c) s.buf))) ;
      upnm ev

  | EDump -> ( dump (ls,vs) s ; upnm ev )
  | _ -> upnm ev

  and dn_hdlr ev abv = match getType ev with

    (* ECast: buffer a copy and send it on.
     *)
  | ECast ->
      if getUnreliable ev then (
        dn ev abv NoHdr
      ) else (
	let buf = s.buf.(ls.rank) in
	let seqno = Iq.tail buf in
	let iov = getIov ev in
      	if Iq.opt_insert_check_doread
 	  buf seqno (Full(abv,iov))
	then (
	  s.acct_size <- s.acct_size + getIovLen ev ;
	  Iovecl.ref name_DnCast iov ;
	  dn ev abv (Data seqno)
	) else failwith "problem inserting message into own buffer"
      )

  | _ -> dn ev abv NoHdr

  and dnnm_hdlr = dnnm

in {up_in=up_hdlr;uplm_in=uplm_hdlr;upnm_in=upnm_hdlr;dn_in=dn_hdlr;dnnm_in=dnnm_hdlr}

let l args vs = Layer.hdr_state init hdlrs None (LocalSeqno(NoHdr,ECast,detector,constructor)) args vs

let _ = 
  Param.default "mnak_allow_lost" (Param.Bool false) ;
  Layer.install name (Layer.init l)

(**************************************************************)
