(**************************************************************)
(*
 *  Ensemble, (Version 0.40)
 *  Copyright 1997 Cornell University
 *  All rights reserved.
 *
 *  See ensemble/doc/license.txt for further information.
 *)
(**************************************************************)
(* DSH.ML: Distributed shell *)
(* Execute commands on remote machines.  Syntax:
 *
 *      dsh comspec [machspec] [procspec] [-multlimit n]
 *
 * where machspec is 0 or more of:
 *	-name machine_name
 *	-arch machine_type
 *	-os os_type
 *	-data dataname			(has data)
 *	-leastload n			(n least loaded)
 *	-loadbound load			(bound on load)
 *	-random n			(n random)
 *
 * comspec is one of:
 *      -c commands
 *      -s script_filename      (file that contains list of commands)
 *
 * and procspec is:
 *      -p process_properties   (e.g. "project=ensemble;cpu=intensive")
 *
 * Separate multiple commands to be executed in parallel by comma (,).
 * Separate multiple commands to be executed sequentially by colon
 * (:).
 *
 * Machines that do not restrict the specified type of processes are
 * selected according the combination of machspec.  Ties are broken
 * randomly.  If there are not enough machines, no jobs are executed,
 * unless the multlimit flag is on, in which case upto the number
 * of jobs specified by the multlimit flag are executed.
 *
 * example 1: dsh -c "uptime,uptime"
 *   runs two uptime jobs concurrently on two random public machines.
 * example 2: mach spec of "-r 2 -l 1" means "least loaded of random 2".
 *
 * All commands are routed via a contacting sessvr which is determined
 * by the following enviroment variables:
 *
 *   ENS_EXECVR_HOSTS: machine where contacting servers are searched
 *   ENS_EXECSVR_PORT: port where contacting server listens
 *   ENS_DBDEFAULT:    file which contains initial specification for
 *                     machine database (For an example look in
 *                     execsvr/dbdefault)
 *)
(* Author: Takako M. Hickey, 4/97 *)
(* Thanks to Mark Hayden and Robbert van Renesse for many useful
 * discussions.
 *)
(**************************************************************)
open Ensemble
open Rpc
open Str
open Session
open Db
open Unix
(**************************************************************)
let name = Trace.source_file "DSH" 
let failwith s = failwith (name^":"^s) 
(**************************************************************)

and default_execsvr_hosts= "eclair0:eclair1"
let default_execsvr_port= 8123
and server		= (ref None: Rpc.Sockio.rpc_handle option ref)

and barrierdiv          = (regexp "[:]+")
and multdiv             = (regexp "[,]+")
and seqcmds	        = ref [||]
and nseqcmds		= ref 0 
and curseqcmd		= ref 0 
and parcmds	        = ref [||]
and ntocreate           = ref 0 
and ncreated            = ref 0 
and ntowait		= ref 0 
and procprop		= ref []
and machspec	        = ref []
and unlimited           = ref true
and multlimit           = ref 1
and destroy_issued	= ref true 


(**************************************************************)
let init_rpc servername = (
  let get_servernames () =
    let buffer =
      (try
         Sys.getenv "ENS_EXECSVR_HOSTS"
       with _ ->
         default_execsvr_hosts
      )
    in
      Array.of_list (Str.split (regexp "[:]+") buffer)
  in
  let get_serverport () =
    (try
      int_of_string (Sys.getenv "ENS_EXECSVR_PORT")
    with _ ->
      default_execsvr_port
    )
  in
  Sockio.sockio_register () ;
  let s = Sockio.bind_to_service servername in
  let l = (get_servernames ()) in
  let dp = (get_serverport ()) in
  for i = 0 to (Array.length l) - 1 do
    Sockio.add_to_service s l.(i) dp ; 
  done ;
  server := Some s
)

let select_loop interval = (
  try
    while true do
      Sockio.select interval
    done
  with Not_found ->
    Util.printf "got not found while selecting\n"
)


let rec recv_failure reply = 
  Util.printf "rpc failed: %s\n" (Obj.magic reply) 

let rec recv_reply reply = 
  let issue_command sessname =
    (match !server with
    | Some s ->
        if !ntocreate = !ncreated then (
          if (!curseqcmd >= !nseqcmds) then (
            if not !destroy_issued then
              destroy_issued := true ;
              Sockio.rpc s (SessDestroy(sessname)) recv_reply recv_failure ; 
          )
          else (
            parcmds := Array.of_list (Str.split multdiv !seqcmds.(!curseqcmd)) ;
            ntocreate := Array.length !parcmds ;
            Util.printf"DSH: sending %d command(s): %s...\n"
              !ntocreate !parcmds.(0);
            ncreated := 0 ;
            curseqcmd := !curseqcmd + 1 ;

            let c = Array.sub !parcmds !ncreated (Array.length !parcmds - !ncreated) in
            let to_array a = [|Cmd(a)|] in
            let cmds = Array.map to_array c in
            let request = 
            if !unlimited then
              ProcCreate(sessname, cmds, environment (), [|!machspec|], !procprop, Unlimited())
            else
              ProcCreate(sessname, cmds, environment (), [|!machspec|], !procprop, MultLimit(!multlimit))
            in
            (* TODO: check for empty commands *)
            Sockio.rpc s request recv_reply recv_failure ; 
          )
        ) ;
    | None ->
        exit 0
    )
  in

  let print_output sessname procname output =
    if not (output = "") then (
      let sarray = Array.of_list (Util.string_split "\n" output) in
      for i = 0 to (Array.length sarray - 1) do
        if not (sarray.(i) = "") then (
          Util.printf "%s" (procname ^ ": ") ;
          Util.printf "%s" (sarray.(i) ^ "\n")
        )
      done ;
      flush Pervasives.stdout 
    )
  in

  (match (Obj.magic reply) with
  | SessCreateSuccess(hint, sessname) ->
(*
Util.printf "created session: %s\n" sessname ;
*)
      issue_command sessname
  | SessWaitSuccess(sessname) ->
Util.printf "got SessWaitSuccess for: %s\n" sessname ;
      issue_command sessname
  | SessDestroySuccess(sessname) ->
      exit 0 
  | ProcCreateSuccess(sessname, proc) ->
Util.printf "DSH: created %d process(es) under session: %s\n" (Array.length proc) sessname ;
      ntowait := Array.length proc ;
      ncreated := !ncreated + !ntowait ;
      (match !server with
      | Some s ->
          for i = 0 to !ntowait - 1 do
Util.printf "DSH: sending ProcWait for: (%s, %s)\n" sessname proc.(i).seprocname ;
            let request = ProcWait(sessname, proc.(i).seprocname) in
            Sockio.rpc s request recv_reply recv_failure ; 
          done
      | None -> ())
  | ProcWaitSuccess(sessname, procname) -> (
Util.printf "DSH: got ProcWaitSuccess for: (%s, %s)\n" sessname procname;
      ntowait := !ntowait - 1 ;
      if !ntowait = 0 then
        issue_command sessname ;
    )
  | ProcOpFailure(sessname, opname, comment) ->
      (match opname with
      | PrCreate ->
          Util.printf "ProcCreate failed: %s\n" comment ;
          ncreated := !ntocreate ;
          issue_command sessname ;
      | _ -> Util.printf "ProcOpFailure: %s\n" comment ;
      )
  | _ -> ()
  )
  

(**************************************************************)
let run () = (
  let comfile   = ref ""
  and cmdbuf    = ref ""
  and prop  = ref "" in

  Sys.signal Sys.sigpipe Sys.Signal_ignore ;

  let undoc = "undocumented" in
  Arge.parse [
    ("-name",  Arg.String(fun a ->
       (try
         let addr = (gethostbyname a).h_addr_list.(0) in
         machspec := !machspec @ [(DBeq, ("addr", Addr addr))] ;
       with _ ->
         Util.printf "an invalid machine name. defaulting to a random machine.\n"
       )), undoc) ;
    ("-arch",  Arg.String(fun t ->
       machspec := !machspec @ [(DBeq, ("machtype", String t))]), undoc) ;
    ("-os",  Arg.String(fun t ->
       machspec := !machspec @ [(DBeq, ("ostype", String t))]), undoc) ;
    ("-data",  Arg.String(fun data ->
       machspec := !machspec @ [(DBinclude(data), ("data", Noval))]), undoc) ;
    ("-leastload",  Arg.Int(fun n ->
       machspec := !machspec @ [(DBmin(n), ("load", Noval))]), undoc) ;
    ("-loadbound",  Arg.Float(fun load ->
       machspec := !machspec @ [(DBlt, ("load", Float load))]), undoc) ;
    ("-random",  Arg.Int(fun n ->
       machspec := !machspec @ [(DBrandom(n), ("", Noval))]), undoc) ;
    ("-multlimit",  Arg.Int(fun i -> unlimited := false ; multlimit := i), undoc) ;

    ("-c",  Arg.String(fun s -> cmdbuf := s), undoc) ;
    ("-s",  Arg.String(fun s -> comfile := s), undoc) ;
    ("-p",  Arg.String(fun s -> prop := s), undoc) 
  ] (Arge.badarg "DSH") "dsh" ;

  if not (!comfile = "") then (
    let chan = open_in !comfile in
    let len = in_channel_length chan in
    let buffer = String.create len in
    let nchar = input chan buffer 0 len in
    seqcmds := Array.of_list (Str.split (regexp "[\n]+") buffer) ;
    close_in chan 
  )
  else
    seqcmds := Array.of_list (Str.split barrierdiv !cmdbuf) ;
  nseqcmds := Array.length !seqcmds ;

  if not (!prop = "") then (
    let plist = Array.of_list (Str.split (regexp "[;]+") !prop) in
    for i = 0 to (Array.length plist) - 1 do
      let entry = Array.of_list (Str.split (regexp "[=]+") plist.(i)) in
      if Array.length entry = 2 then (
Util.printf "adding proc property %s %s\n" entry.(0) entry.(1) ;
          procprop := !procprop @ [(entry.(0), String entry.(1))]
      ) ;
    done ;
  ) ;

  init_rpc "execsvr" ;
  (match !server with
  | Some s ->
      let request = SessCreate("dsh") in
      Sockio.rpc s request recv_reply recv_failure ; 
      select_loop 2.0 
  | None -> ()
  ) 
)

let _ = run ()
(**************************************************************)
