CS 3110 Recitation 24
Streams and lazy evaluation

Introduction : the if construct

We have already seen that in OCaml, if true then e1 else e2 evaluates to e1, while if false then e1 else e2 evaluates to e2. Actually, in if true then e1 else e2, the value of e2 is never evaluated: this is called a lazy evaluation. We say that if eagerly evaluates condition expression to true or false, and lazily evaluates e1 and e2.

In OCaml, function arguments are eagerly evaluated: the function arguments are evaluated before the function is called. This is not true for every language; for instance in Haskell function arguments are lazily evaluated. Also in OCaml, function bodies are lazily evaluated: fun x->e is considered a value, and no attempt is made to evaluate the value of e: function bodies are not evaluated until the function is applied.

The example below shows that trying to reprogram an if construct using an eager evaluation always fails: the if construct needs a lazy evaluation!

let rec factorial (n : int) : int =
  if n <= 0 then 1 else n * factorial (n - 1)

let my_if ((b, t, f):bool * 'a * 'a) : 'a =
  if b then t else f

(* does not terminate :
   get to factorial2(0), then tries to 
   evaluate factorial2(-1), factorial2(-2), etc. *)
let rec factorial2 (n : int) : int =
  my_if (n <= 0, 1, n * factorial2 (n - 1))

let if_funs ((b, t, f):bool * (unit->'a) * (unit->'a)): 'a=
  if b then t() else f()

(* factorial2 fixed *)
let rec factorial3 (n : int) : int =
  if_funs (n <= 0, (fun () -> 1), (fun () -> n * factorial3 (n - 1)))

Call by value and call by name

Let us now look at let constructs and function evaluations. In an eager language like OCaml, these are evaluated using a call by value semantics: let x=v in e2 --> e2{v/x} and (fun x->e2) v --> e2{v/x}; the value bound to x is evaluated eagerly before the body e2. In a lazy language like Haskell however, they are evaluated using a call by name semantics: let x=e1 in e2 --> e2{e1/x} and (fun x->e2) e1 --> e2{e1/x}: e1 is not evaluated until x is used, and a variable can stand for an unevaluated expression. However a question arises: what if x occurs 10 times in e2? Should we evaluate it 10 times? In Haskell this is solved by a thunk-like mechanism, where x is evaluated only the first time it is used, and then its value is remembered.

Thunks

We already know that let f=e evaluated e right away; on the other hand, let f=fun () -> e evaluates e every time, but not until f is called. Here we introduce thunks, for which if we write let f = Thunk.make (fun ()->e), e is evaluated once, but not until we use it by calling Thunk.apply f.

The implementation here has to use a ref, so that it is possible to do something different the seond time Thunk.apply is called.

module type THUNK =
  sig
    (* A 'a thunk is a lazily
     * evaluated expression e of type
     * 'a. *)
    type 'a thunk
    (* make(fn()=>e) creates a thunk
     * for e *)
    val make : ( unit -> 'a ) -> 'a thunk
    (* apply(t) is the value of t's expression,
     * which is only evaluated once *)
    val apply : 'a thunk -> 'a
  end

module Thunk : THUNK =
  struct
    type 'a thunkPart =
      Done of 'a
    | Undone of (unit -> 'a)

    type 'a thunk =  ('a thunkPart) ref

    let make (f : unit -> 'a) : 'a thunk =
      ref (Undone f)

    let apply (th : 'a thunk) : 'a =
      match !th with
       Done x -> x
     | Undone f ->
          let ans = f()
          in (th := Done ans;
              ans)
end

Following are a few examples using thunks

(* Return a function that prints x and then returns it. *)
let print_and_return (x:int) () : int =
  (print_int x;
   print_string "\n";
   x)

(* prints
   3
   4
   5
   5
   5 
returns 36
*)
let silly_adds () : int =
    let a : int             = print_and_return 3 ()
    and b : int Thunk.thunk = Thunk.make(print_and_return 4)
    and c : unit -> int     = print_and_return 5
    in
    let d : int = c () + (Thunk.apply b) + a
    and e : int = c () + (Thunk.apply b) + a
    and f : int = c () + (Thunk.apply b) + a
  in
    d + e + f

(* A silly way to take a long time to do something. *)
let rec slow_add1 (x: int) =
  let rec slow_id ((x, y):(int * int)) =
    if(y = 0) then x else slow_id(x, y-1)
  in
    (slow_id (x,100000000)) + 1

(* Returns immediately *)
let seven_thunk = Thunk.make (fun () -> slow_add1 6)

Streams

A stream is an infinite list, for example evaluating all the natural numbers or evaluating all the prime numbers. You can ask for the rest of as many times as you like and you'll never get null. However the universe is finite, so a stream must really just act like an infinite list; the idea is to use a function to describe what comes next. Here is a possible signature for a stream:

module type STREAM =
  sig
    type 'a stream
    val make : ('a * ('a -> 'a)) -> 'a stream
    (* next returns the value in the stream and the updated stream *)
    val next : 'a stream -> ('a * 'a stream)
    val make2 : ('b * ('b -> 'a*'b)) -> 'a stream
  end

Streams the wrong way

The following code tries to implement streams in a wrong way. But what is wrong?

(* THE FOLLOWING CODE IS WRONG.
 * FOR DEMONSTRATION PURPOSES ONLY.
 * DO NOT TRY THIS AT HOME.
 *)

open Stream

module Stream : STREAM =
  struct

    type 'a stream = Str of ('a * 'a stream)

    let rec make ((init, f): 'a * ('a -> 'a)) : 'a stream =
      Str(init, make (f init, f))

    let next (Str(th):'a stream) :'a*'a stream =
      th

    let make2 (init,f) = failwith "unimplemented"
  end

It is wrong because of the eagerness of OCaml. The make function will never return, because it tries to to evaluate f init, then f f init etc. right away. Precisely, what we would like is a lazy evaluation of this f init.

Streams the right way

This is done by this correct stream implementation, which uses thunks to make things work:

(* One CORRECT Stream implementation *)
module Stream : STREAM =
  struct
    type 'a stream = Cons of (('a * 'a stream) Thunk.thunk)

    let rec make ((init, f): 'a * ('a -> 'a)) : 'a stream =
      Cons(Thunk.make(fun () -> (init, make (f init, f))))

    let rec make2((init, f) : 'b * ('b -> 'a*'b)): 'a stream =
      Cons(Thunk.make(fun () ->
                      let (next_elem, next_state) = f(init) in
                        (next_elem, make2(next_state, f))
                      ))
    let next (Cons(th) : 'a stream) : 'a * 'a stream =
      Thunk.apply th
  end

We can easily build a few examples of simple streams:

(* Some very simple streams *)
let nats  = Stream.make (0, fun x -> 1 + x)
let evens = Stream.make (0, fun x -> 2 + x)
let alt   = Stream.make (true, not)

Streams of primes

We can now make a stream which generates all the prime numbers, without generating them in advance.

let rec next_prime(n) =
    let n' = n+1 in
    let rec no_divisors_above(m) =
	if m*m > n'
	then true
	else if n' = m*(n' / m)
	     then false
	     else no_divisors_above(m+1)
  in
    if no_divisors_above(2) then n'
    else next_prime(n')

let primes = Stream.make(2, next_prime)