(**************************************************************)
(*
 *  Ensemble, (Version 0.40)
 *  Copyright 1997 Cornell University
 *  All rights reserved.
 *
 *  See ensemble/doc/license.txt for further information.
 *)
(**************************************************************)
(**************************************************************)
(* PROCSVR.ML: process server *)
(* The back end of a remote execution server.
 * If window from which procsvr is started dies, then procsvr dies also
 * when output is attempted.  To avoid this, redirect output to e.g.,
 * /dev/null.
 *)
(* Author: Takako M. Hickey, 4/97 *)
(* Thanks to Mark Hayden and Robbert van Renesse for many useful
 * discussions.
 *)
(**************************************************************)
open Ensemble
open Util 
open View
open Appl_intf 
open Rpc
open Session
open Process
open Db
open Dbinput
open Env
open Dutil
(**************************************************************)
let name = Trace.source_file "PROCSVR" 
let failwith s = failwith (name^":"^s) 
(**************************************************************)

let heartbeat_rate	= ref 1.0
(* if the following is too frequent for large group, cause flood *)
let gossip_rate		= ref 300.0

let view 		= ref [||]
let rank_of_endpt endpt = array_index endpt !view (* may raise Not_found *)
let endpt_of_rank rank	= !view.(rank) (* may raise Invalid_argument *)

let rpc_events_to_submit = ref []
let clear_rpc_events () =
    rpc_events_to_submit := []
and add_rpc_event event =
    rpc_events_to_submit := !rpc_events_to_submit @ [event] 

let my_state = {
  sttype = SvrProc ;
  stname = Unix.gethostname () ;
  stendpt = None ;
  staticentry = [] ;
  nrunning = 0 ;
  nwaiting = 0 ;
  ncompleted = 0 ;
  gossipid = 0 ;
  stload = 0.0 ;
  stuptime = 0.0 ;
  strestrictions = [] 
}

(**************************************************************)
let proc_done skt = (
  (try
    my_state.nrunning <- my_state.nrunning - 1 ;
    let (handle, status) = Socket.wait_process (Obj.magic skt) in
    let proc = proc_move_to_done handle in
    Util.printf "PROCSVR: done process %s:%s 0x%08x (proc_list size: %d)\n"
      proc.psessname proc.procname (Obj.magic handle) (List.length !proc_exec_list) ;
    (match proc.waitendpt with
     | Some e ->
         let dest = rank_of_endpt e in
         (match proc.waitoptype with
          | Sess ->
  (*
  Util.printf "%s: Sending DSessWaitSuccess for: (%s %s)\n"
  	my_state.stname proc.psessname, proc.procname ;
  *)
              add_rpc_event (Send ([dest], (DSessWaitSuccess(proc.psessname, proc.waitreqid, proc.procname)))) ;
          | Proc ->
  (*
  Util.printf "%s: Sending DProcWaitSuccess: (%s %s)\n"
  	my_state.stname proc.psessname proc.procname ;
  *)
              add_rpc_event (Send ([dest], (DProcWaitSuccess(proc.psessname, proc.waitreqid, proc.procname)))) ;
         ) ;
         my_state.ncompleted <- my_state.ncompleted + 1 ;
         proc_done_remove proc.psessname proc.procname ;
     | None ->
         my_state.nwaiting <- my_state.nwaiting + 1 ;
     ) 
   with exn -> (
     Util.printf "PROCSVR (%s): " my_state.stname ;
     (try Printexc.print (function () -> raise exn) () with _ -> () ;
   )))
)

(**************************************************************)
let intf (ls,vs) procskt =
  let vs		= ref vs
  and ls		= ref ls
  and curtime		= ref 0.0	(* current logical time *)
  and gossip_time 	= ref 0.0	(* next gossip timeout *)
  and msgq 		= Queue.create ()
  and events_to_submit	= ref []
  and ntowait		= ref 0
  and sessvrs		= ref []
  in

  let async = Appl.async (!vs.group, !ls.endpt) in
  let gettime () = Time.to_float (Alarm.gettime (Alarm.get())) in
  let starttime	= (gettime ())
  and clear_events () =
    events_to_submit := []
  and add_event event =
    events_to_submit := !events_to_submit @ [event] ;
  in

  (* Return endpts of pairs with same host as mine from machine list.
   *)
  let check_duplicate s = (
    let compendpt v1 v2 = (match v1 with
     | Some e1 -> (match v2 with
         | Some e2 -> e1 < e2
         | _ -> false
       )
     | _ -> false
    ) in
    let rec loop = function
      | [] -> false
      | h::t ->
          if h.sttype = SvrProc & h.stname = my_state.stname &
            compendpt h.stendpt my_state.stendpt then (
            Util.printf "PROCSVR: leaving group.  another deamon present on %s\n" my_state.stname ;
            true
          )
          else
            loop t
    in
       loop s
  )
  in

  let collect_sessvrs slist = (
    let f st =
      if st.sttype = SvrSess then (
        (match st.stendpt with
         | Some e ->
             sessvrs := !sessvrs @ [(rank_of_endpt e)] ;
         | None -> ())
      )
    in
      sessvrs := [] ;
      List.map f slist ;
  )
  in

  (* Cast load information to sessvrs.
   *)
  let cast_gossip () = (
    my_state.gossipid <- my_state.gossipid + 1 ;
    my_state.stload <- get_load () ;
    my_state.stuptime <- (gettime ()) -. starttime ;
    add_event (Send (!sessvrs, (DGossip(my_state.stname, gettime(), compose_dbentry my_state))))
(*
    add_event (Cast (DGossip(my_state.stname, gettime(), compose_dbentry my_state)))
*)
  )
  in

  let update_load () = (
    my_state.gossipid <- my_state.gossipid + 1 ;
    my_state.stload <- get_load () ;
    my_state.stload
  )
  in

(**************************************************************)
  let exec_program endpt sessname reqid program env ticket = (
    (* TODO: pass enviroment *)
    let args = [| "sh"; "-c"; program |] in
    let rank = rank_of_endpt endpt in
    (try (
Util.printf "PROCSVR: calling spwan %d\n" my_state.ncompleted ;
       let handle = Socket.spawn_process "/bin/sh" args procskt in
Util.printf "PROCSVR: spwan finished %d\n" my_state.ncompleted ;
       my_state.nrunning <- my_state.nrunning + 1 ;
       ntowait := !ntowait + 1 ;
       let suffix = (string_of_int ticket) in
       let proc = proc_exec_create sessname program env !ls.endpt handle suffix in
       Util.printf "PROCSVR: exec process for %s:%s (%s) (proc_list size: %d, nrunnning: %d) 0x%08x\n"
         sessname proc.procname program (List.length !proc_exec_list) my_state.nrunning (Obj.magic handle) ;
       add_event (Send ([rank], (DProcCreateSuccess(sessname, reqid, (toseprocess proc)))))) 
    with exn -> (
       Util.printf "PROCSVR (%s): " my_state.stname ;
       (try Printexc.print (function () -> raise exn) () with _ ->
          add_event (Send ([rank], (DProcOpFailure(sessname, reqid, PrCreate, "sapwn_process failed"))))))
    )
  )
  in

  let sig_proc proc sesssig = (
    Util.printf "PROCSVR: killing process %s\n" proc.procname ;
    (match sesssig with
    | SessSigKill ->
        Socket.terminate_process proc.handle ;
    | _ -> ()
    )
  )
  in

  (* TODO: think about whether to cast load after exec.
   *)
  let recv_msg endpt msg = (match msg with
    | DSessWait(sessname, reqid, procname) ->
        (try
           let p = proc_exec_lookup sessname procname in
           (match p.waitendpt with
            | Some _ ->
              let dest = rank_of_endpt endpt in
              add_event (Send ([dest], (DSessOpFailure(sessname, reqid, SeWait, ("wait already issued on process " ^ sessname ^ ":" ^ procname))))) 
            | None ->
              p.waitreqid <- reqid ;
              p.waitoptype <- Sess ;
              p.waitendpt <- Some endpt
           )
         with Not_found ->
           (try
              let p = proc_done_remove sessname procname in
              let dest = rank_of_endpt endpt in
              my_state.nwaiting <- my_state.nwaiting - 1 ;
              my_state.ncompleted <- my_state.ncompleted + 1 ;
              add_event (Send ([dest], (DSessWaitSuccess(sessname, reqid, procname)))) ;
            with Not_found ->
              let dest = rank_of_endpt endpt in
              add_event (Send ([dest], (DProcOpFailure(sessname, reqid, PrWait, ("wait on unknown process " ^ sessname ^ ":" ^ procname))))) ;
           )
        )

    | DSessSig(sessname, reqid, procname, sesssig) ->
        let dest = rank_of_endpt endpt in
        (try
           let p = proc_exec_lookup sessname procname in
           sig_proc p sesssig;
           add_event (Send ([dest], (DSessSigSuccess(sessname, reqid, procname, sesssig))))
         with
         | Not_found ->
            add_event (Send ([dest], (DSessOpFailure(sessname, reqid, SeWait, ("process not active " ^ sessname ^ ":" ^ procname))))) ;
         | _ ->
            add_event (Send ([dest], (DSessOpFailure(sessname, reqid, SeWait, "signal failed")))) ;
        )
    | DProcCreate(sessname, reqid, program, env, ticket) ->
        exec_program endpt sessname reqid program env ticket

    | DProcWait(sessname, reqid, procname) ->
(*
Util.printf "%s: Got DProcWait for %s:%s (%d) (proc_list size: %d)\n"
my_state.stname sessname procname reqid (List.length !proc_list) ;
             proc_list_print () ;
*)
        (try
           let p = proc_exec_lookup sessname procname in
           (match p.waitendpt with
            | Some _ ->
                let dest = rank_of_endpt endpt in
                add_event (Send ([dest], (DProcOpFailure(sessname, reqid, PrWait, ("wait already issued on process " ^ sessname ^ ":" ^ procname))))) ;
            | None ->
                p.waitreqid <- reqid ;
                p.waitoptype <- Proc ;
                p.waitendpt <- Some endpt
           )
         with Not_found ->
           (try
              let p = proc_done_remove sessname procname in
              let dest = rank_of_endpt endpt in
              my_state.nwaiting <- my_state.nwaiting - 1 ;
              my_state.ncompleted <- my_state.ncompleted + 1 ;
              add_event (Send ([dest], (DProcWaitSuccess(sessname, reqid, procname)))) ;
            with Not_found ->
              let dest = rank_of_endpt endpt in
              add_event (Send ([dest], (DProcOpFailure(sessname, reqid, PrWait, ("wait on unknown process " ^ sessname ^ ":" ^ procname))))) ;
           )
        )
    | DProcSig(sessname, reqid, procname, sesssig, rid) ->
        let dest = rank_of_endpt endpt in
        (try
           let p = proc_exec_lookup sessname procname in
           sig_proc p sesssig;
           add_event (Send ([dest], (DProcSigSuccess(sessname, reqid, procname, sesssig, rid))))
         with
         | Not_found ->
             add_event (Send ([dest], (DProcOpFailure(sessname, reqid, PrSig, ("process not active " ^ sessname ^ ":" ^ procname))))) ;
         | _ ->
             add_event (Send ([dest], (DProcOpFailure(sessname, reqid, PrWait, "signal failed")))) ;
        )

    | DShutdown() ->
        Util.printf "PROCSVR: got shutdown request\n" ;
        kill_all_procs () ;
        add_event Leave
    | DUpQuery() ->
        let time = (gettime ()) -. starttime in
        let load = update_load () in
        let dest = rank_of_endpt endpt in
        add_event (Send ([dest], (DUpAnswer(my_state.sttype, my_state.stname, time, load, my_state.ncompleted))))
    | _ -> ()

  ) ;
  let events = !events_to_submit in
  clear_events () ;
  events
  in

  let recv_cast from msg =
    try
      let endpt = endpt_of_rank from in
      recv_msg endpt msg 
    with Invalid_argument _ -> (
      printf "PROCSVR: msg from unknown member %d\n" from ;
      []
    )
  and recv_send from msg =
    try
      let endpt = endpt_of_rank from in
      recv_msg endpt msg 
    with Invalid_argument _ -> (
      printf "PROCSVR: msg from unknown member %d\n" from ;
      []
    )

  and block () = []

  (* Periodic routine *)
  and heartbeat tick = (
    curtime := (Time.to_float tick) ;
    let empty = ref false in
    while (not !empty) do
      try
        let (endpt, msg) = Queue.take msgq in
        recv_msg endpt msg ;
      with Queue.Empty -> empty := true
    done ;

    let t = gettime() in
    if t > !gossip_time then (
      gossip_time := t +. !gossip_rate ;	
      cast_gossip () ;
    ) ;

    let events = (!events_to_submit @ !rpc_events_to_submit) in
    clear_rpc_events () ;
    clear_events () ;
    events
  )

  (* Save msg for later processing.  Must save endpt instead of rank
   * since rank may change via view change.
   *)
  and block_recv_cast from msg =
    (try
      Queue.add ((endpt_of_rank from), msg) msgq ;
      print_msghdr msg
    with Invalid_argument _ ->
      print_msghdr msg)
  and block_recv_send from msg =
    (try
      Queue.add ((endpt_of_rank from), msg) msgq ;
      print_msghdr msg
    with Invalid_argument _ ->
      print_msghdr msg)

  and block_view (ls,vs) = [ls.rank, [my_state]]
   (* [vs.rank, ()] *)
(*
    if vs.rank = 0 then (
      let l = Util.sequence vs.nmembers in
      let l = List.map (fun rank -> (rank,())) l in
      l
    ) else []
*)
  and block_install_view (ls,vs) s =
    List.fold_left Lset.union [] s 
  and unblock_view (ls',vs') s =
    vs := vs' ;
    ls := ls' ;
    collect_sessvrs s ;
    let i = truncate (gettime()) in
    Random.init i ;

    Util.printf "PROCSVR: got view, nmembers=%d " !ls.nmembers ;
    Util.printf "\n";

    let dup = check_duplicate s in
    if dup then
      [Leave]
    else (
      events_to_submit := [] ;
(*  This probably procude too many messages for a large group.
      cast_gossip () ;
*)
      !events_to_submit
    )

  and exit () =
    exit 0

  in full (*(Appl_intf.debug*) {
    recv_cast           = recv_cast ;
    recv_send           = recv_send ;
    heartbeat           = heartbeat ;
    heartbeat_rate	= Time.of_float !heartbeat_rate ;
    block               = block ;
    block_recv_cast     = block_recv_cast ;
    block_recv_send     = block_recv_send ;
    block_view 		= block_view ;
    block_install_view  = block_install_view ;
    unblock_view        = unblock_view ;
    exit                = exit
  }(*)*) 

(**************************************************************)

let run () =
  Sys.signal Sys.sigpipe Sys.Signal_ignore ;
  let props = Property.Total :: Property.Scale :: Property.vsync in
  let props = List.map Property.string_of_id props in
  let props = String.concat ":" props in
  Arge.set_default Arge.properties props ;

  (*
   * Parse command line arguments.
   *)
  let undoc = "undocumented" in
  Arge.parse [
  ] (Arge.badarg name) "procsvr" ;

  (*
   * Get default transport and alarm info.
   *)
  let (ls,vs) = Appl.default_info "execsvr" in
  let alarm = Alarm.get () in

  (*
   * Initialize the application interface.
   *)
  let addr = (Unix.gethostbyname my_state.stname).Unix.h_addr_list.(0) in
  my_state.staticentry <- read_entry my_state.stname ;
  if my_state.staticentry = [] then
    my_state.staticentry <- [("name", String my_state.stname)] ;
  my_state.staticentry <- my_state.staticentry @
    [("addr", Addr addr) ;
     ("endpt", Endpt ls.endpt)
    ] ;
  my_state.stendpt <- Some ls.endpt ;
  my_state.stload <- get_load () ;
  my_state.strestrictions <- [(DBlt, DBmachine, "nrunning", Int 50)] ;

  (*
   * Initialize RPC
   *)
  Sockio.ensemble_register (Alarm.add_sock alarm) (Alarm.rmv_sock alarm) ;
  let procskt = Socket.process_socket() in
(*
  eprintf "Socket.process_socket: %d\n" (Obj.magic procskt);
*)
  Sockio.register (Obj.magic procskt) proc_done (fun _ -> ()) ;

  let interface = intf (ls,vs) procskt in

  (*
   * Initialize the Horus protocol stack, using the
   * interface, transports, and group endpt chosen above.  
   *)
  Appl.config interface (ls,vs) ;

  (*
   * Enter a main loop.  The argument is the number of non-blocking
   * select before doing blocking select.
   *)
  Appl.main_loop ()
(*
  main_loop 10
*)
 (* end of run function *)


(* Run the application, with exception handlers to catch any
 * problems that might occur.  
 *)
let _ =
  Appl.exec ["procsvr"] run

(**************************************************************)
