(**************************************************************)
(*
 *  Ensemble, (Version 0.40)
 *  Copyright 1997 Cornell University
 *  All rights reserved.
 *
 *  See ensemble/doc/license.txt for further information.
 *)
(**************************************************************)
(**************************************************************)
(* TOTEM.ML *)
(* Author: Roy Friedman, 3/96 *)
(* Bug fixes & major changes: Mark Hayden, 10/96 *)
(* Original C code was done by Roy Friedman as a hack on the
 * C Total Later.  This is a hack on Mark Hayden's ML Total
 * Layer.basically, there is a token constantly rotating
 * within the group.   Anyone that wishes to send, must buffer
 * its messages until it gets the token.  Once a member
 * receives the token, it sends everything it has to send,
 * and pass the tokan to the next member in the membership
 * list.  This protocol was originally developed as part of
 * the Totem project.  *)
(**************************************************************)
(* Notes
 * MH: does not observe causality

 * RF: The token is an unbounded counter. To prevent it
 * from wrapping around, a view must be installed every so
 * often. If messages are sent at a rate of 1,000 per
 * second, then at least every 24 days.  If the rate is at
 * least 10,000 per second, then every 2.4 days etc.  
 *)
(**************************************************************)
open Trans
open Layer
open Event
open Util
open View
(**************************************************************)
let name = Trace.source_file "TOTEM"
let failwith s = failwith (Util.failmsg name s)
(*let verbose = ref true*)
(**************************************************************)
(* Headers

 * NoHdr: all non-cast messages

 * TokenSend(token): Assigns next token holder to the next
 * member in the cycle (as determined by the origin of the
 * mesage).  This header type never comes with attached
 * data.  The new token holder will send his first message
 * with sequence number 'token.'

 * Ordered(seqno,has_token): Deliver this message with
 * sequence number 'seqno.'  If has_token is true then the
 * message also passes token to the next member in the group
 * (as determined by the origin).

 * Unordered: (ie., not ordered by a token holder) Sent at
 * the end of the view.  All members deliver unordered
 * messages in fifo order in order of the sending members
 * rank.

 *)

type header = NoHdr
  | TokenSend of seqno
  | Ordered   of seqno * bool
  | Unordered

(**************************************************************)

type 'abv item =
  | Item of rank * 'abv * Iovecl.t
  | Unset				(* for iq *)
  | Reset				(* for iq *)

(**************************************************************)

type 'abv state = {
  prev		: rank ;		(* prev member in cycle *)
  next		: rank ;		(* next member in cycle *)
  mutable got_view : bool ;
  mutable blocking : bool ;		(* are we blocking? *)
  waiting 	: ('abv * Iovecl.t) Queue.t ;(* waiting for token *)
  order 	: 'abv item Iq.t ;	(* ordered by token *)
  unord 	: ('abv * Iovecl.t) Queue.t array ; (* unordered *)
  mutable token	: seqno option
}

(**************************************************************)

let dump (ls,vs) s =
  eprintf "TOTEM:dump:%s\n" ls.name ;
  eprintf " blocking=%b \n" s.blocking ;
  eprintf " waiting=%d\n" (Queue.length s.waiting) ;
  eprintf " order=%d..%d\n" (Iq.head s.order) (Iq.tail s.order) ;
  let l = Iq.list_of_iq s.order in
  let l = string_of_list (function (s,Item(r,_,_)) -> sprintf "(%d,%d)" r s | _ -> failwith "sanity") l in
  eprintf "   =%s\n" l

(**************************************************************)

let init await_ack (ls,vs) = {
  got_view	= false ;
  blocking 	= false ;
  next		= (succ ls.rank) mod ls.nmembers ;
  prev		= (pred ls.rank + ls.nmembers) mod ls.nmembers ;(* Hack! *)
  waiting 	= Queue.create () ;
  order 	= Iq.empty Unset Reset ;
  token		= None ;
  unord 	= array_createf ls.nmembers (fun _ -> Queue.create ())
}

(**************************************************************)

let hdlrs s (ls,vs) {up_out=up;upnm_out=upnm;dn_out=dn;dnlm_out=dnlm;dnnm_out=dnnm} =
  let ack = make_acker name dnnm in
  let failwith m = dump (ls,vs) s ; failwith m in

  (* Check for any requests.  If there are requests and we
   * have no messages buffered, then just send the token on.
   * If we have messages, then them, with the last message
   * being sent as an OrderedTokenSend.  
   *)
  let check_token () =
    if_some s.token (fun token ->
      if Iq.head s.order >= token then (
	s.token <- None ;
	let len = Queue.length s.waiting in
	if len = 0 then (
	  dnlm (sendRank name s.next) (TokenSend(token))
	) else (
	  if !verbose then
      	    eprintf "TOTEM:%d->[%d..%d]\n" ls.rank token (token+len-1) ;
	  for i = 0 to pred len do
	    let abv,iov = Queue.take s.waiting in
	    let pass = (i = pred len) in
	    dn (castIov name iov) abv (Ordered(token + i,pass)) ;
	    up (castOriginIov name ls.rank iov) abv ;
	    if not (Iq.opt_check_update s.order (token + i)) then
	      failwith "sanity"
	  done
	)
      )
    )
  in

  let got_token token =
    if not s.blocking then (
      s.token <- Some token ;
      check_token ()
    )
  in

  let up_hdlr ev abv hdr = match getType ev, hdr with
  | ECast, Ordered(seqno,token) ->
      if s.got_view then
      	failwith "ECast(Ordered) after EView" ;
      if !verbose then
	eprintf "TOTEM:%d:Ordered:seqno=%d\n" ls.rank seqno ;
      if Iq.opt_check_update s.order seqno then (
	if !verbose then
      	  eprintf "  (opt_check_update)\n" ;
        up ev abv
      ) else (
	if not (Iq.assign s.order seqno (Item((getOrigin ev),abv,(getIov ev)))) then
	  failwith "2nd seqno of same seqno" ;
	Iq.get_prefix s.order (fun seqno -> function
	  | Item(origin,abv,iov) ->
	      if !verbose then
		eprintf "  (delivering:%d)\n" seqno ;
	      up (castOriginIov name origin iov) abv
	  | _ -> failwith "iqsanity"
	) ;
        ack ev ; free name ev
      ) ;

      check_token () ;

      (* Check if the token is for me.
       *)
      if token & (getOrigin ev) = s.prev then
        got_token (succ seqno)

  | ECast, Unordered ->
      if s.got_view then
      	failwith "ECast(Unordered) after EView" ;
      Queue.add (abv,(getIov ev)) s.unord.((getOrigin ev)) ;
      ack ev ; free name ev

  | _, NoHdr      -> up ev abv (* all other events have NoHdr *)
  | _             -> failwith "non-NoHdr on non ECast"

  and uplm_hdlr ev hdr = match getType ev,hdr with
  | ESend, TokenSend(token) ->
      got_token token ;
      ack ev ; free name ev

  | _ -> failwith "non-NoHdr on non ECast"

  and upnm_hdlr ev = match getType ev with
  | EInit ->
      upnm ev;
      if ls.rank = 0 && ls.nmembers > 1 then
	dnlm (sendRank name s.next) (TokenSend(0))
  | EView ->
      s.got_view <- true ;

      (* Deliver unordered messages.
       *)
      for i = 0 to pred ls.nmembers do
        queue_clean (fun (abv,iov) ->
	  if !verbose then
	    eprintf "TOTEM:unordered:%d->%d\n" i ls.rank ;
          up (castOriginIov name i iov) abv
        ) s.unord.(i)
      done ;
      upnm ev
  | _ -> upnm ev

  and dn_hdlr ev abv = match getType ev with
  | ECast ->
      if getNoTotal ev then (
      	dn ev abv NoHdr
      ) else if ls.nmembers > 1 then (
	if s.got_view then
	  failwith "ECast after EView" ;
	Queue.add (abv,(getIov ev)) s.waiting ;
        free name ev
      ) else (
	up (castOriginIov name ls.rank (getIov ev)) abv ;
        free name ev
      )

  | _ -> dn ev abv NoHdr

  and dnnm_hdlr ev = match getType ev with
  | EBlockOk ->
      if s.blocking then 
      	failwith "blocking twice" ;
      s.blocking <- true ;
      if !verbose then
	eprintf "TOTEM:sending %d unordered\n" (Queue.length s.waiting) ;
      Util.queue_clean (fun (abv,iov) ->
      	Queue.add (abv,iov) s.unord.(ls.rank) ;
	dn (castIov name iov) abv Unordered
      ) s.waiting ;
      dnnm ev

  | EAck ->
      free name ev

  | _ -> dnnm ev

in {up_in=up_hdlr;uplm_in=uplm_hdlr;upnm_in=upnm_hdlr;dn_in=dn_hdlr;dnnm_in=dnnm_hdlr}

let l args vf = Layer.hdr init hdlrs None NoOpt args vf

let _ = Layer.install name (Layer.init l)

(**************************************************************)
