(* Binary search tree, each node has value and two children.
*
* For any node with value x, all the values in the left
* subtree are smaller than x, and all those in the right
* subtree are larger than x (using built-in < as comparator).
*)
type 'a tree = TNode of 'a * 'a tree * 'a tree | TLeaf
let rec contains x = function
TLeaf -> false
| TNode (y, l, r) ->
x = y || (x < y && contains x l) || contains x r
let rec add x = function
TLeaf -> TNode (x, TLeaf, TLeaf) (* When get to leaf, put new node there *)
| TNode (y, l, r) as t -> (* Recursively search for value *)
if x = y then t
else if x > y then TNode (y, l, add x r)
else (* x < y *) TNode (y, add x l, r)
let rec depth = function
TLeaf -> 0
| TNode(_, a, b) -> 1 + max (depth a) (depth b)
let rec tree_of_list =
function
[] -> TLeaf
| x :: l -> add x (tree_of_list l)
(* fold_inorder f acc t folds the function f over the tree t,
* going left subtree, current value, right subtree
*)
let rec fold_inorder f acc t =
match t with
TLeaf -> acc
| TNode(v,l,r) ->
let acc = fold_inorder f acc l in
let acc = f acc v in
fold_inorder f acc r
(* Check that a binary search tree is ordered: for a node
* with value x the left subtree has values < x and the
* right subtree has values > x *)
(* Returns the minimum value in t, max_int if t is empty
*)
let findmin_int t =
let f acc v = if v < acc then v else acc in
fold_inorder f max_int t
(* Returns the maximum value in t, min_int if t is empty
*)
let findmax_int t =
let f acc v = if v > acc then v else acc in
fold_inorder f min_int t
(* Check that:
1. Maximum of the left subtree is less than the current value
2. Minimum of the right subtree is greater than the current value
3. Left subtree is a valid BST
4. Right subtree is a valid BST
Note: We implicitly require uniqueness of values
Note: Empty subtrees always satisfy requirements 1 and 2
*)
let rec repOk_int t =
match t with
TLeaf -> true
| TNode(v, l, r) ->
let (lmax, rmin) = (findmax_int l, findmin_int r) in
lmax < v && v < rmin && repOk_int(l) && repOk_int(r)
let t0 = tree_of_list[3; 5];;
let t1 = tree_of_list [3; 9; 5; 7; 11];;
let t2 = tree_of_list [3; 5; 7; 9; 11];;
let t3 = tree_of_list [1;2;3;4;5;6;7;8;9;10];;
repOk_int t0;;
repOk_int t1;;
repOk_int t2;;
repOk_int t3;;
contains 5 t1;;
contains 6 t1;;
depth t3;;
(* General repOk for any ordered type not just ints *)
(* Returns Some v, where v is the minimum value in t
If t is empty, returns None
*)
let findmin_tree t =
let f acc v =
match acc with
None -> Some v
| Some x -> if v < x then Some v else acc in
fold_inorder f None t
(* Returns Some v, where v is the maximum value in t
If t is empty, returns None
*)
let findmax_tree t =
let f acc v =
match acc with
None -> Some v
| Some x -> if v > x then Some v else acc in
fold_inorder f None t
(* "Option less than"
* Returns true if either xo or yo is None
* Otherwise returns the result of comparing the values inside
*)
let olt xo yo =
match (xo,yo) with
None,_ -> true
| _, None -> true
| Some x, Some y -> x < y
(* Check that:
1. Maximum of the left subtree is less than the current value
2. Minimum of the right subtree is greater than the current value
3. Left subtree is a valid BST
4. Right subtree is a valid BST
Note: We implicitly require uniqueness of values
Note: Empty subtrees always satisfy requirements 1 and 2
*)
let rec repOk t =
match t with
TLeaf -> true
| TNode (v, l, r) ->
let (lmax, rmin) = (findmax_tree l, findmin_tree r) in
(olt lmax (Some v)) && (olt (Some v) rmin) && repOk(l) && repOk(r)
let ta = tree_of_list["c"; "e"];;
let tb = tree_of_list ["c"; "i"; "e"; "g"; "k"];;
let tc = tree_of_list ["c"; "e"; "g"; "i"; "k"];;
let td = tree_of_list ["a";"b";"c";"d";"e";"f";"g";"h";"i";"j"];;
repOk ta;;
repOk tb;;
repOk tc;;
repOk td;;
(* Red-Black tree. Binary search tree with properties:
* 1. Nodes are colored either red or black.
* 2. The root is black
* 3. Every leaf is black.
* 4. The children of every red node are black.
* 5. Every path from a node to a descendant leaf has the same
* number of black nodes as every other path.
*
* Longest path from root to leaf can be at most 2x longer than
* shortest path because at most every other node can be red (no
* red node with red parents) and same number of black nodes on
* each path, thus search always O(log n).
*)
type color = Red | Black
type 'a rbtree = Node of color * 'a * 'a rbtree * 'a rbtree | Leaf
let rec mem x = function
Leaf -> false
| Node (_, y, left, right) ->
x = y || (x < y && mem x left) || (x > y && mem x right)
(* Repair violations of the invariant that a red node should have no
* red children. Make this the responsibility of the grandparent of
* the red node having the red parent. There are four cases each of
* which results in a red node with two black children (which may in
* turn violate the constraint above it in the tree. *)
let balance = function
Black, z, Node (Red, y, Node (Red, x, a, b), c), d
| Black, z, Node (Red, x, a, Node (Red, y, b, c)), d
| Black, x, a, Node (Red, z, Node (Red, y, b, c), d)
| Black, x, a, Node (Red, y, b, Node (Red, z, c, d)) ->
Node (Red, y, Node (Black, x, a, b), Node (Black, z, c, d))
| a, b, c, d -> Node (a, b, c, d)
let insert x s =
(* same as for basic binary search tree except 3 things:
* 1. initially replace a leaf with a red node with two leaves as
* children, 2. balance result of each recursive call (because
* inserting red node may have violated red has no red children
* invariant), 3. force root to be black
*
* Note considerably simpler than imperative version of insert, which
* has left and right rotations in addition to operation similar to
* balance's grandparent update. *)
let rec ins = function
Leaf -> Node (Red, x, Leaf, Leaf)
| Node (color, y, a, b) as s ->
if x < y then balance (color, y, ins a, b)
else if x > y then balance (color, y, a, ins b)
else s in
match ins s with
Node (_, y, a, b) ->
Node (Black, y, a, b)
| Leaf -> (* guaranteed to be nonempty *)
raise (Failure "RBT insert failed with ins returning leaf")
let rec rbt_of_list = function
[] -> Leaf
| x :: l -> insert x (rbt_of_list l);;
let rec height = function
Leaf -> 0
| Node(_, _, a, b) -> 1 + max (height a) (height b)
(* Computes number of blacks along every path from root to leaf
* in RBT, and if invariant that this value is equal for all paths
* is violated then returns -1 *)
let rec path_num_blacks = function
Leaf -> 1
| Node(Black, _, a, b) ->
let ba = path_num_blacks a
and bb = path_num_blacks b in
if ba <> bb || ba = -1 || bb = -1
then -1
else 1+ba
| Node(Red, _, a, b) ->
let ba = path_num_blacks a
and bb = path_num_blacks b in
if ba <> bb || ba = -1 || bb = -1
then -1
else ba
let root_black = function
Leaf
| Node (Black, _, _, _) -> true
| _ -> false
let rec red_black_children = function
Node (Red, _, Node (Red, _, _, _), _)
| Node (Red, _, _, Node (Red, _, _, _)) -> false
| Leaf -> true
| Node (_, _, a, b) -> red_black_children a && red_black_children b
let rec fold_rbt f acc t =
match t with
Leaf -> acc
| Node(c,v,l,r) ->
let acc = fold_rbt f acc l in
let acc = f acc v in
fold_rbt f acc r
let findmin_rbt t =
let f acc v =
match acc with
None -> Some(v)
| Some(x) -> if v Some(v)
| Some(x) -> if v>x then Some(v) else acc
in fold_rbt f None t
let rec ordered t =
match t with
Leaf -> true
| Node(c,v,l,r) ->
let (lmax,rmin) = (findmax_rbt l, findmin_rbt r) in
(olt lmax (Some(v))) && (olt (Some(v)) rmin) && ordered(l) && ordered(r)
let repOK t =
ordered t && root_black t && red_black_children t
&& path_num_blacks t > -1
let r0 = rbt_of_list [3;5]
let r1 = rbt_of_list [3; 9; 5; 7; 11];;
let r2 = rbt_of_list [3; 5; 7; 9; 11];;
let r3 = rbt_of_list [1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20];;
let ra = rbt_of_list ["a"; "c"; "e"; "g"; "j"];;
repOK r0;;
repOK r1;;
repOK r2;;
repOK r3;;
repOK ra;;
mem 5 r1;;
mem 6 r1;;
height r1;;
height r2;;
height r3;;
path_num_blacks r3;;