(**************************************************************)
(*
 *  Ensemble, (Version 0.40)
 *  Copyright 1997 Cornell University
 *  All rights reserved.
 *
 *  See ensemble/doc/license.txt for further information.
 *)
(**************************************************************)
(**************************************************************)
(* TOP.ML *)
(* Top-most layer.  Handles default group behavior *)
(* Author: Mark Hayden, 7/95 *)
(* Based on code by Robbert vanRenesse *)
(**************************************************************)
open Layer
open View
open Event
open Trans
open Util
(**************************************************************)
let name = Trace.source_file "TOP"
(**************************************************************)
(* State transitions for this layer:

  OLD_STATE:
    NEW_STATE: event

  Normal:
    Merging: EBlockOk
    NextView: EView, EBlockOk

  Merging:
    NextView: EView, EMergeFailed, EMergeDenied

  NextView:
    *sink*
*)
(**************************************************************)

type header = NoHdr

type states =
  | Normal
  | Merging
  | NextView

type state = {
  sweep			: Time.t ;	(* gossip sweep interval (if any) *)
  gossip                : bool ;	(* initiate EGossipExt events? *)
  account               : bool ;	(* initiate EAccount events? *)
  dump_fail             : bool ;	(* fail on dump events *)
  mutable state		: states ;	(* state of the protocol *)
  mutable elected	: bool ;	(* have I been elected *)
  mutable next_sweep	: Time.t ;	(* time of next sweep *)
  mutable suspects      : rank list ;	(* suspected members *)
  mutable failed	: rank list ;	(* failed members *)
  mutable mergers	: View.state list ; (* Mergers for next view *)
  mutable dn_block	: bool ;	(* I've done a EBlock *)
  mutable up_prompt     : bool ;        (* I've been prompted *)
  dn_block_abv	        : Once.t ;	(* EBlock was from above! *)
  up_block_ok           : Once.t ;	(* I've recd an EBlockOk *)
  mutable dbg_block_time : Time.t ;	(* time when I blocked *)
  mutable dbg_block_time_first : Time.t	(* time when I blocked *)
}

(**************************************************************)

let string_of_state = function
| Normal	-> "Normal"
| Merging	-> "Merging"
| NextView	-> "NextView"

let dump (ls,vs) s =
  eprintf "TOP:dump:%s: state=%s\n" ls.name (string_of_state s.state) ;
  eprintf "  dn_block=%b elected=%b\n" s.dn_block s.elected ;
  eprintf "  failed=%s\n" (string_of_int_list s.failed) ;
  eprintf "  dbg_block_time_first=%s\n" (Time.to_string s.dbg_block_time_first)

(**************************************************************)

let init () (ls,vs) = {
  sweep         = Param.time vs.params "top_sweep" ;
  gossip        = Param.bool vs.params "top_gossip" ;
  account       = Param.bool vs.params "top_account" ;
  dump_fail     = Param.bool vs.params "top_dump_fail" ;
  state		= Normal ;
  failed	= [] ;
  suspects      = [] ;
  dn_block	= false ;
  dn_block_abv  = Once.create "TOP:dn_block_abv" ;
  up_prompt     = false ;
  up_block_ok   = Once.create "TOP:up_block_ok" ;
  mergers	= [] ;
  next_sweep	= Time.invalid ;
  elected	= false ;
  dbg_block_time = Time.zero ;
  dbg_block_time_first = Time.invalid
}

let hdlrs s (ls,vs) {up_out=up;upnm_out=upnm;dn_out=dn;dnlm_out=dnlm;dnnm_out=dnnm} =
  let log = Trace.log name ls.name in
  let ack = make_acker name dnnm in

  let failwith m = dump (ls,vs) s ; failwith (name^":"^m) in

  let do_block () =
    if s.elected && not s.dn_block then (
      if s.state = Merging then
	failwith "Merging w/o EBlock" ;
      s.dn_block <- true ;
      dnnm (create name EBlock[])
    )
  in

  let do_fail () =
    if s.elected && Lset.subtract s.suspects s.failed <> [] then (
      do_block () ;
      s.failed   <- Lset.union s.failed s.suspects ;
      s.suspects <- s.suspects ;
      dnnm (create name EFail[(Failures s.failed)])
    )
  in

  let do_view () =
    if not s.elected then failwith "do_view when not coord" ;

    (* Remove failed members from the view.
     *)
    let live = array_create name ls.nmembers true in
    List.iter (fun rank -> live.(rank) <- false) s.failed ;
    let members = array_combine vs.view vs.address in
    let members = array_combine live members in
    let members = array_filter fst members in
    let members = Array.map snd members in

    let members = 
      let add =
 	List.map (fun vs -> 
	  array_combine vs.view vs.address
	) s.mergers 
      in
      Array.concat (members :: add)
    in

    let view,address = array_split members in
    if view.(0) <> ls.endpt then
      log (fun () -> sprintf "warning:do_view:coord but not 0th rank") ;
    let ltime = fst vs.view_id in
    let view_id = (succ ltime, ls.endpt) in
    let new_vs = View.set vs [
      Vs_view view ;
      Vs_address address ;
      Vs_view_id view_id
    ] in

    dnnm (create name EView[ViewState new_vs]) ;
    s.state	<- NextView ;
    s.mergers	<- []
  in
  
  let up_hdlr ev abv hdr = failwith "up_hdlr"

  and uplm_hdlr ev hdr = match getType ev with

  | EMergeRequest ->
      if s.state <> NextView then (
	let mergers = getMergers ev in
	let mview = Array.to_list mergers.view in
	let check_disjoint = 
	  List.for_all (fun vs ->
	    let view = Array.to_list vs.view in
	    Lset.disjoint view mview
	  ) s.mergers
	in
	if not check_disjoint then
	  failwith "repeating mergers" ;
	s.mergers <- mergers :: s.mergers ;
	log (fun () -> Event.to_string ev) ;
	do_block () 
      ) ;
      free name ev

  | _ -> failwith "unknown local message"

  and upnm_hdlr ev = (match getType ev with
  | EView ->
      log (fun () -> Event.to_string ev) ;
      s.state <- NextView ;
      upnm ev

    (* Bounce an Exit event down and also pass the leave
     * event up.  
     *)
  | ELeave ->
      dnnm (create name EExit[]) ;
      upnm ev

  | EFail ->
      log (fun () -> Event.to_string ev) ;
      s.failed   <- Lset.union s.failed (getFailures ev) ;
      s.suspects <- Lset.union s.failed s.suspects ;

  | EMergeFailed | EMergeDenied ->
      (* Should do this only if it was the coordinator,
       * otherwise, should just fail the member. [MH: ???]
       *)
      if s.state = Merging then
	do_view ()

  | ESuspect ->
      log (fun () -> Event.to_string ev) ;
      let suspects = getSuspects ev in
      s.suspects <- Lset.union s.suspects suspects ;
      if s.elected then 
	do_fail ()

  | EBlock ->
      dnnm (create name EBlockOk[])

  | EBlockOk ->
      log (fun () -> Event.to_string ev) ;
      Once.set s.up_block_ok ;
      if Once.isset s.dn_block_abv then (
	upnm ev
      ) else (
	if not s.dn_block then 
	  failwith "EBlockOk w/o EBlock" ;
	
	if s.state = Normal then (
	  (* Check if merging.	
	   *)
	  try 
	    let contact,view_id = getContact ev in
	    match view_id, s.failed, s.mergers with
	    | Some(view_id), [], [] ->
	      	dnlm (create name EMergeRequest[
		  Contact(contact,Some view_id)
		]) NoHdr ;
	      	s.state <- Merging
	    | _ -> raise (Failure "escape out")
	  with _ ->
	    do_view ()
        )
      )

  | ETimer ->
      let time = getTime ev in
      if time >= s.next_sweep then (
	s.next_sweep <- Time.add time s.sweep ;
	dnnm (timerAlarm name s.next_sweep) ;
	if ls.am_coord && s.gossip then
	  dnnm (create name EGossipExt []) ;
	if s.account then
	  dnnm (create name EAccount []) ;
      ) ;
	   
      if s.state = Normal then (
	s.dbg_block_time <- time
      ) else (
	if time > Time.add s.dbg_block_time (Time.of_float 200.0) then (
	  if s.dbg_block_time_first = Time.invalid then
	    s.dbg_block_time_first <- s.dbg_block_time ;
	  eprintf "TOP:lingering stack, time=%s\n" (Time.to_string time) ;
	  s.dbg_block_time <- time ;
	  dump (ls,vs) s ;
	  dnnm (create name EDump[])
	)
      )

  | EProtocol ->			(*Just here for view change perf tests*)
      upnm (create name EPrompt[]) ;
      free name ev

  | EPrompt ->
      do_block () ;
      s.up_prompt <- true ;
      upnm ev

  | EElect ->
      s.elected <- true ;
      dnnm (timerAlarm name Time.zero) ;
      
      (* If prompted, then start blocking.
       *)
      if s.up_prompt then
	do_block () ;

      do_fail ()

  | EDump ->
      dump (ls,vs) s ;
      if s.dump_fail then (
	eprintf "TOP:got EDump:exiting\n" ;
	exit 1 ;
      )
(*
  | EStable ->
      eprintf "TOP:stable:%s\n" (string_of_int_array (getStability ev)) ;
*)

  | EAsync 
  | EExit 
  | EStable 
  | EGossipExt 
  | EInit 
  | EAlive 
  | EXferDone 
  | ELostMessage 
  | EAccount 
  | EStableReq
  | ERekey-> ()
  | _ -> eprintf "TOP:dropping:%s\n" (to_string ev )
  ) (*; free name ev: BUG VIEW *)

  and dn_hdlr _ _ = failwith "bad down event"
  and dnnm_hdlr ev = match getType ev with
  | EExit ->
      dnnm ev

  | EFail ->
      let failures = getFailures ev in
      if List.mem ls.rank failures then
	failwith "dnnm:failure of myself" ;
      dnnm ev

  | EBlock ->
      log (fun () -> sprintf "EBlock") ;
      (* This was causing problems with big pbcast tests...
       *)
      if Once.isset s.dn_block_abv then (
	eprintf "TOP:warning: got 2nd EBlock from above, sending EBlockOk (hack!)\n" ;
	upnm (create name EBlockOk[]) ; (* hack! *)
      ) else (
	Once.set s.dn_block_abv ;
        if s.dn_block then 
	  failwith "EBlock from above when blocked" ;
        s.dn_block <- true ;
        dnnm (create name EBlock[]) ;	(*???*)
(*
        if Once.isset s.dn_block_abv then ( (*???*)
	  upnm (create name EBlockOk[]) ;	(*???*)
        )
*)
      )

  | _ -> failwith "unknown dnnm event"

in {up_in=up_hdlr;uplm_in=uplm_hdlr;upnm_in=upnm_hdlr;dn_in=dn_hdlr;dnnm_in=dnnm_hdlr}

let _ = Trace.comment "broken TOP.ml (???)"

let l args vf = Layer.hdr init hdlrs None NoOpt args vf

let _ = 
  Param.default "top_sweep" (Param.Time (Time.of_float 1.0)) ;
  Param.default "top_gossip" (Param.Bool true) ;
  Param.default "top_account" (Param.Bool true) ;
  Param.default "top_dump_fail" (Param.Bool false) ;
  Layer.install name (Layer.init l)

(**************************************************************)
