Recitation 15
Implementing ordered sets

We can use red-black trees to implement ordered sets. Here is an ordered set signature that is designed to support implementation of both set and map abstractions. This signature is parameterized with respect to two types, key and elem, which may be the same type.

signature ORDERED_FUNCTIONAL_SET = sig
  (* Overview: a "set" is a set of distinct elements of type
   * "elem". Each element is identified by a unique key, which
   * may be the same as the element itself. Two elements are
   * considered distinct if they have different keys.
   * Keys are a totally ordered set.
   *
   * A set can be used to represent an ordinary set if key = elem.
   * It can be used to represent a mapping if elem = key * value.
   *
   * For example, if key and elem are int, then a set might be
   * {1,-11,0}, {}, or {1001}. If key is string and elem is int,
   * a set could be {("elephant", 2), ("rhino", 25), ("zebra", 2)} *)
  type key
  type elem
  type set

  (* compare(k1,k2) reports the ordering of k1 and k2. *)
  val compare: key * key -> order
  (* keyOf(e) is the key of e. *)
  val keyOf: elem -> key
  (* empty is the empty set. *)
  val empty : unit -> set
  (* Effects: add(s,e) is s union {e}. Returns true
   * if e already in s, false otherwise. *)
  val add: set * elem -> set * bool
  (* remove(s,k) is (s',eo) where s' = s - {k} (set difference)
   * and eo is either SOME e if there is an e in s
   * where k is e's key, or NONE otherwise. *)
  val remove: set * key -> set * elem
  (* lookup(s,k) is SOME e where k = keyOf(e), or NONE if
   * the set contains no such e. *)
  val lookup: set * key -> elem option
  (* size(s) is the number of elements in s. *)
  val size: set -> int

  (* Ordered set operations *)

  (* first(s) is SOME of the element of s with the smallest key,
   * or NONE if s is empty. *)
  val first: set -> elem option
  (* last(s) is SOME of the element of s with the largest key,
   * or NONE if s is empty. *)
  val last: set -> elem
  (* A fold operation on ordered sets takes a key argument
   * that defines the element where the fold starts. *)
  type 'b folder = ((elem*'b)->'b) -> 'b -> key -> set -> 'b
  (* fold over the elements in key order. *)
  val fold_forward: 'b folder
  (* fold over the elements in reverse key order. *)
  val fold_backward: 'b folder
end

We've added some operations to show the added power of ordered sets. The first function gives the first element in the set, and fold_forward iterates over the elements of the set in ascending order. We can similarly implement last and fold_backward from the set signature. 

We have already seen red-black trees, which are one good way to implement ordered sets. Here is an implementation of red-black trees that implements this set signature as a functor:

signature ORDERED_SET_PARAMS = sig
  type key
  type elem
  val keyOf: elem -> key
  val compare: key * key -> order
end


functor RedBlackTree(structure Params : ORDERED_SET_PARAMS) = struct
  type key = Params.key
  type elem = Params.elem
  val compare = Params.compare
  val keyOf = Params.keyOf
  datatype color = Red | Black
  datatype tree =
    Empty
  | Node of {color: color, value: elem, left: tree, right: tree}
  (* Representation invariant:
   * 0. All values in the left subtree are less than "value", and
   *    all values in the right subtree are greater than "value".
   * 1. No red node has a red parent.
   * 2. Every path from the root to an empty node has the
   *    same number of black nodes (the "black height"). *)

  fun lookup(t:tree,k:key) =
    case t
      of Empty => NONE
       | Node {color,value,left,right} =>
        (case compare (k, keyOf(value))
           of EQUAL => SOME value
            | LESS => lookup(left, k)
            | GREATER => lookup(right, k))

  fun add(t: tree, e: elem): tree * bool = let
    (* Definition: a tree t satisfies the "reconstruction invariant"
     * if it is black and satisfies the rep invariant, or if it is
     * red and its children satisfy the rep invariant. *)

    (* makeBlack(t) is a tree that satisfies the rep invariant.
     * Requires: t satisfies the reconstruction invariant
     * Algorithm: Make a tree identical to t but with a black root. *)
    fun makeBlack (t:tree): tree =
      case t
        of Empty => Empty
      | Node {color,value,left,right}
          => Node {color=Black, value=value,
                   left=left, right=right}
    (* Construct the result of a red-black tree rotation. *)
    fun rotate(x: elem, y: elem, z: elem,
               a: tree, b: tree, c:tree, d: tree): tree =
      Node {color=Red, value=y,
            left= Node {color=Black, value=x, left=a, right=b},
            right=Node {color=Black, value=z, left=c, right=d}}
    (* balance(t) is a tree that satisfies the reconstruction
     * invariant and contains all the same values as t.
     * Requires: the children of t satisfy the reconstruction
     * invariant. *)
    fun balance (t:tree): tree =
      case t
        of (*1*) Node {color=Black, value=z,
                       left=Node {color=Red, value=y,
                                  left=Node {color=Red, value=x,
                                             left=a, right=b},
                                  right=c},
                       right=d}
          => rotate(x,y,z,a,b,c,d)
      | (*2*) Node {color=Black, value=z,
                    left=Node {color=Red, value=x,
                               left=a,
                               right=Node {color=Red, value=y,
                                           left=b, right=c}},
                    right=d}
          => rotate(x,y,z,a,b,c,d)
      | (*3*) Node {color=Black, value=x,
                    left=a,
                    right=Node {color=Red, value=z,
                                left=Node {color=Red, value=y,
                                           left=b, right=c},
                                right=d}}
          => rotate(x,y,z,a,b,c,d)
      | (*4*) Node {color=Black, value=x,
                    left=a,
                    right=Node {color=Red, value=y,
                                left=b,
                                right=Node {color=Red, value=z,
                                            left=c, right=d}}}
          => rotate(x,y,z,a,b,c,d)
      | _ => t

    (* Insert x into t, returning (t',b) where t' is a tree that
     * contains all the elements of t, plus e, and satisfies the
     * reconstruction invariant. b is true if t contains e already. *)
    fun walk (t:tree):tree * bool =
      case t
        of Empty => (Node {color=Red, value=e,
                           left=Empty, right=Empty}, false)
         | Node {color,value,left,right} =>
          (case compare (keyOf(value),keyOf(e))
             of EQUAL => (Node {color=color,value=e,
                                left=left,right=right},
                          true)
              | GREATER => let val (t',b) = walk(left) in
               (balance (Node {color=color,
                               value=value,
                               left=t',
                               right=right}),b)
                end
              | LESS => let val (t',b) = walk(right) in
                (balance (Node {color=color,
                                value=value,
                                left=left,
                                right=t'}),b)
                end)
  in
    let val (t',b) = walk(t) in (makeBlack(t'), b) end
  end

  fun first(t: tree): elem option =
    case t of
      Empty => NONE
    | Node{color, value, left, right} =>
        case first(left) of
          NONE => SOME value
        | eo => eo
  fun fold_forward (f: elem*'b->'b) (b:'b) (k:key) (t:tree) =
    case t
      of Empty => b
       | Node {color,value,left,right} =>
        (case compare(keyOf(value), k) of
           EQUAL => fold_forward f (f(value,b)) k right
         | LESS => fold_forward f b k right
         | GREATER => let val lft = fold_forward f b k left in
             fold_forward f (f(value,lft)) k right
           end)
end

Here is how the red-black tree data structure can be packaged up as a set implementation. This implementation represents a set as a red-black tree plus an integer that keeps track of the total number of elements in the set. Otherwise there is no efficient way to implement the size operation.

functor RedBlackSet(structure Params: ORDERED_SET_PARAMS)
  :> ORDERED_FUNCTIONAL_SET where type key = Params.key and
                                  type elem = Params.elem =
  struct
    type key = Params.key
    type elem = Params.elem
    val compare = Params.compare
    val keyOf = Params.keyOf
    structure RBTree = RedBlackTree(structure Params = Params)
    type set = RBTree.tree * int

    fun empty() = (RBTree.Empty, 0)
    fun add((t,n),e) =
      let val (t',b) = RBTree.add(t,e) in
        if b then ((t',n),b) else ((t', n+1),b)
      end
    fun remove(s, k) = raise Fail "Not implemented: remove"
    fun lookup((t,n), k) = RBTree.lookup(t,k)
    fun size((t,n)) = n

    exception Empty
    fun first((t,n)) = RBTree.first(t)
    fun last((t,n)) =
      raise Fail "Not implemented: last"

    type 'b folder = ((elem*'b)->'b) -> 'b -> key -> set -> 'b
    fun fold_forward f b k (t,n) = RBTree.fold_forward f b k t
    fun fold_backward f b k s =
      raise Fail("Not implemented: fold")
  end

Red-black trees are nice because they guarantee O(lg n) insert, lookup, and deletion time, with good constant factors, and they support ordered elements.