(** * SearchTree: Binary Search Trees *) (** If the type of keys can be totally ordered -- that is, it supports a well-behaved [<=] comparison -- then maps can be implemented with _binary search trees_ (BSTs). Insert and lookup operations on BSTs take time proportional to the height of the tree. If the tree is balanced, the operations therefore take logarithmic time. *) From Coq Require Import String. (* for an example, and manual grading *) From Coq Require Import Logic.FunctionalExtensionality. From VFA Require Import Perm. From VFA Require Import Maps. From VFA Require Import Sort. (* ################################################################# *) (** * BST Implementation *) Definition key := nat. Inductive tree (V : Type) : Type := | E | T (l : tree V) (k : key) (v : V) (r : tree V). Arguments E {V}. Arguments T {V}. (** An example tree: 4 -> "four" / \ / \ 2 -> "two" 5 -> "five" *) Definition ex_tree : tree string := (T (T E 2 "two" E) 4 "four" (T E 5 "five" E))%string. (** [empty_tree] contains no bindings. *) Definition empty_tree {V : Type} : tree V := E. (** [bound k t] is whether [k] is bound in [t]. *) Fixpoint bound {V : Type} (x : key) (t : tree V) := match t with | E => false | T l y v r => if x ? y then bound x r else true end. (** [lookup d k t] is the value bound to [k] in [t], or is default value [d] if [k] is not bound in [t]. *) Fixpoint lookup {V : Type} (d : V) (x : key) (t : tree V) : V := match t with | E => d | T l y v r => if x ? y then lookup d x r else v end. (** [insert k v t] is the map containing all the bindings of [t] along with a binding of [k] to [v]. *) Fixpoint insert {V : Type} (x : key) (v : V) (t : tree V) : tree V := match t with | E => T E x v E | T l y v' r => if x ? y then T l y v' (insert x v r) else T l x v r end. (* ################################################################# *) (** * BST Invariant *) (** The _BST invariant_ is not part of the definition of [tree]. *) Module NotBst. Open Scope string_scope. Definition t : tree string := T (T E 5 "five" E) 4 "four" (T E 2 "two" E). Example not_bst_lookup_wrong : lookup "" 2 t <> "two". Proof. simpl. unfold not. intros contra. discriminate. Qed. End NotBst. (** A formalization of the BST invariant: *) Fixpoint ForallT {V : Type} (P: key -> V -> Prop) (t: tree V) : Prop := match t with | E => True | T l k v r => P k v /\ ForallT P l /\ ForallT P r end. Inductive BST {V : Type} : tree V -> Prop := | BST_E : BST E | BST_T : forall l x v r, ForallT (fun y _ => y < x) l -> ForallT (fun y _ => y > x) r -> BST l -> BST r -> BST (T l x v r). Hint Constructors BST. Example is_BST_ex : BST ex_tree. Proof. unfold ex_tree. repeat (constructor; try omega). Qed. Example not_BST_ex : ~ BST NotBst.t. Proof. unfold NotBst.t. intros contra. inv contra. inv H3. omega. Qed. (** **** Exercise: 1 star, standard (empty_tree_BST) *) Theorem empty_tree_BST : forall (V : Type), BST (@empty_tree V). Proof. (* FILL IN HERE *) Admitted. (** [] *) (** **** Exercise: 4 stars, standard (insert_BST) *) Theorem insert_BST : forall (V : Type) (k : key) (v : V) (t : tree V), BST t -> BST (insert k v t). Proof. (* FILL IN HERE *) Admitted. (** [] *) (** Since [empty_tree] and [insert] are the only operations that create BSTs, we are guaranteed that any [tree] is a BST -- unless it was constructed manually with [T]. It would therefore make sense to limit the use of [T] to only within the tree operations, rather than expose it. Coq, like OCaml and other functional languages, can do this with its module system. See [ADT] for details. *) (* ################################################################# *) (** * Correctness of BST Operations *) (** To prove the correctness of [lookup] and [bound], we need specifications for them. We'll study two different techniques for that in this chapter. *) (** _Algebraic specification_ is a technique based on writing down equations that relate the results of operations. Here is an algebraic specification for [lookup]: lookup d k empty_tree = d lookup d k (insert k v t) = v lookup d k' (insert k v t) = lookup d k' t if k <> k' We can verify these equations. *) Theorem lookup_empty : forall (V : Type) (d : V) (k : key), lookup d k empty_tree = d. Proof. auto. Qed. Theorem lookup_insert_eq : forall (V : Type) (t : tree V) (d : V) (k : key) (v : V), lookup d k (insert k v t) = v. Proof. induction t; intros; simpl. - bdestruct (k bdestruct (X =? Y) | |- context [ if ?X <=? ?Y then _ else _ ] => bdestruct (X <=? Y) | |- context [ if ?X bdestruct (X k' -> lookup d k' (insert k v t) = lookup d k' t. Proof. induction t; intros; bdall. Qed. (* ################################################################# *) (** * Converting a BST to a List *) (** [elements t] is the key--value bindings of [t] as an _association list_: *) Fixpoint elements {V : Type} (t : tree V) : list (key * V) := match t with | E => [] | T l k v r => elements l ++ [(k, v)] ++ elements r end. Example elements_ex : elements ex_tree = [(2, "two"); (4, "four"); (5, "five")]%string. Proof. reflexivity. Qed. (** Here are three desirable properties for [elements]: 1. The list has the same bindings as the tree. 2. The list is sorted by keys. 3. The list contains no duplicates. Let's formally specify and verify them. *) (* ================================================================= *) (** ** Part 1: Same Bindings *) (** We want to show that a binding is in [elements t] iff it's in [t]. We'll prove the two directions of that bi-implication separately: - [elements] is _complete_: if a binding is in [t] then it's in [elements t]. - [elements] is _correct_: if a binding is in [elements t] then it's in [t]. *) Definition elements_complete_spec := forall (V : Type) (k : key) (v d : V) (t : tree V), BST t -> bound k t = true -> lookup d k t = v -> In (k, v) (elements t). (** **** Exercise: 3 stars, standard (elements_complete) *) Theorem elements_complete : elements_complete_spec. Proof. (* FILL IN HERE *) Admitted. (** [] *) Definition elements_correct_spec := forall (V : Type) (k : key) (v d : V) (t : tree V), BST t -> In (k, v) (elements t) -> bound k t = true /\ lookup d k t = v. (** **** Exercise: 4 stars, standard (elements_correct) *) Theorem elements_correct : elements_correct_spec. Proof. (* FILL IN HERE *) Admitted. (** [] *) (** The inverses of completeness and correctness also should hold: - inverse completeness: if a binding is not in [t] then it's not in [elements t]. - inverse correctness: if a binding is not in [elements t] then it's not in [t]. Let's prove that they do. *) (** **** Exercise: 2 stars, advanced (elements_complete_inverse) *) (** This inverse doesn't require induction. Look for a way to use [elements_correct] to quickly prove the result. *) Theorem elements_complete_inverse : forall (V : Type) (k : key) (v : V) (t : tree V), BST t -> bound k t = false -> ~ In (k, v) (elements t). Proof. (* FILL IN HERE *) Admitted. (** [] *) (** **** Exercise: 4 stars, advanced (elements_correct_inverse) *) (** Prove the inverse. First, prove this helper lemma by induction on [t]. *) Lemma bound_value : forall (V : Type) (k : key) (t : tree V), bound k t = true -> exists v, forall d, lookup d k t = v. Proof. (* FILL IN HERE *) Admitted. (** Prove the main result. You don't need induction. *) Theorem elements_correct_inverse : forall (V : Type) (k : key) (t : tree V), BST t -> (forall v, ~ In (k, v) (elements t)) -> bound k t = false. Proof. (* FILL IN HERE *) Admitted. (** [] *) (* ================================================================= *) (** ** Part 2: Sorted (Advanced) *) (** **** Exercise: 4 stars, advanced (sorted_elements) *) Definition list_keys {V : Type} (lst : list (key * V)) := map fst lst. Theorem sorted_elements : forall (V : Type) (t : tree V), BST t -> Sort.sorted (list_keys (elements t)). Proof. (* FILL IN HERE *) Admitted. (** [] *) (* ================================================================= *) (** ** Part 3: No Duplicates (Advanced and Optional) *) Definition disjoint {X:Type} (l1 l2: list X) := forall (x : X), In x l1 -> ~ In x l2. (** **** Exercise: 4 stars, advanced, optional (elements_nodup_keys) *) Theorem elements_nodup_keys : forall (V : Type) (t : tree V), BST t -> NoDup (list_keys (elements t)). Proof. (* FILL IN HERE *) Admitted. (** That concludes the proof of correctness of [elements]. *) (* ################################################################# *) (** * A Faster [elements] Implementation *) (** A tail-recursive implementation of [elements] that runs in linear time: *) Fixpoint fast_elements_tr {V : Type} (t : tree V) (acc : list (key * V)) : list (key * V) := match t with | E => acc | T l k v r => fast_elements_tr l ((k, v) :: fast_elements_tr r acc) end. Definition fast_elements {V : Type} (t : tree V) : list (key * V) := fast_elements_tr t []. (** **** Exercise: 3 stars, standard (fast_elements_eq_elements) *) Lemma fast_elements_eq_elements : forall (V : Type) (t : tree V), fast_elements t = elements t. Proof. (* FILL IN HERE *) Admitted. (** [] *) (** Since the two implementations compute the same function, all the results we proved about the correctness of [elements] also hold for [fast_elements]. For example: *) Corollary fast_elements_correct : forall (V : Type) (k : key) (v d : V) (t : tree V), BST t -> In (k, v) (fast_elements t) -> bound k t = true /\ lookup d k t = v. Proof. intros. rewrite fast_elements_eq_elements in *. apply elements_correct; assumption. Qed. (** This corollary illustrates a general technique: prove the correctness of a simple, slow implementation; then prove that the slow version is functionally equivalent to a fast implementation. The proof of correctness for the fast implementation then comes "for free". *) (* ################################################################# *) (** * An Algebraic Specification of [elements] *) (** The verification of [elements] we did above did not adhere to the algebraic specifcation approach, which would suggest that we look for equations of the form elements empty_tree = ... elements (insert k v t) = ... (elements t) ... The first of these is easy; we can trivially prove the following: *) Lemma elements_empty : forall (V : Type), @elements V empty_tree = []. Proof. intros. simpl. reflexivity. Qed. (** But the second equation gets ugly... *) Fixpoint kvs_insert {V : Type} (k : key) (v : V) (kvs : list (key * V)) := match kvs with | [] => [(k, v)] | (k', v') :: kvs' => if k ? k' then (k', v') :: kvs_insert k v kvs' else (k, v) :: kvs' end. (** **** Exercise: 3 stars, standard, optional (kvs_insert_elements) *) Lemma kvs_insert_elements : forall (V : Type) (t : tree V), BST t -> forall (k : key) (v : V), elements (insert k v t) = kvs_insert k v (elements t). Proof. (* FILL IN HERE *) Admitted. (** [] *) (* ################################################################# *) (** * Model-based Specifications *) (** A different approach to specification and verification: - Write the specification in terms of an _abstract model_ instead of the concrete implementation. For example, partial map operations instead of BST operations. - Define an _abstraction function_ (or relation) that converts concrete representations into abstract values. For example, convert trees into maps. - Verify that the specification holds of the abstracted implementation. *) Fixpoint map_of_list {V : Type} (el : list (key * V)) : partial_map V := match el with | [] => empty | (k, v) :: el' => update (map_of_list el') k v end. Definition Abs {V : Type} (t : tree V) : partial_map V := map_of_list (elements t). (** One small difference between trees and functional maps is that applying the latter returns an [option V] which might be [None], whereas [lookup] returns a default value if key is not bound lookup fails. We can easily provide a function on functional partial maps having the latter behavior. *) Definition find {V : Type} (d : V) (k : key) (m : partial_map V) : V := match m k with | Some v => v | None => d end. (** We also need a [bound] operation on maps. *) Definition map_bound {V : Type} (k : key) (m : partial_map V) : bool := match m k with | Some _ => true | None => false end. (** We now proceed to prove that each operation preserves (or establishes) the abstraction relationship in an appropriate way: concrete abstract -------- -------- empty_tree empty bound map_bound lookup find insert update *) Lemma empty_relate : forall (V : Type), @Abs V empty_tree = empty. Proof. reflexivity. Qed. (** **** Exercise: 3 stars, standard, optional (bound_relate) *) Theorem bound_relate : forall (V : Type) (t : tree V) (k : key), BST t -> map_bound k (Abs t) = bound k t. Proof. (* FILL IN HERE *) Admitted. (** [] *) (** **** Exercise: 3 stars, standard, optional (lookup_relate) *) Lemma lookup_relate : forall (V : Type) (t : tree V) (d : V) (k : key), BST t -> find d k (Abs t) = lookup d k t. Proof. (* FILL IN HERE *) Admitted. (** [] *) (** **** Exercise: 3 stars, standard, optional (insert_relate) *) Lemma insert_relate : forall (V : Type) (t : tree V) (k : key) (v : V), BST t -> Abs (insert k v t) = update (Abs t) k v. Proof. (* TODO: find a direct proof that doesn't rely on [kvs_insert_elements] *) (* FILL IN HERE *) Admitted. (** [] *) (** The previous three lemmas are in essence saying that the following diagrams commute. bound k t -------------------+ | | Abs | | V V m -----------------> b map_bound k lookup d k t -----------------> v | | Abs | | Some V V m -----------------> Some v find d k insert k v t -----------------> t' | | Abs | | Abs V V m -----------------> m' update' k v Where we define: update' k v m = update m k v *) (* ################################################################# *) (** * Efficiency of Search Trees *) (** All the theory we've developed so far has been about correctness. But the reason we use binary search trees is that they are efficient. That is, if there are [N] elements in a (reasonably well balanced) BST, each insertion or lookup takes about [log N] time. What could go wrong? 1. The search tree might not be balanced. In that case, each insertion or lookup will take as much as linear time. - SOLUTION: use an algorithm that ensures the trees stay balanced. We'll do that in [Redblack]. 2. Our keys are natural numbers, and Coq's [nat] type takes linear time per comparison. That is, computing (j