(** * Redblack: Red-Black Trees *) (** Red-black trees are a kind of _balanced_ binary search tree (BST). Keeping the tree balanced ensures that the worst-case running time of operations is logarithmic rather than linear. *) (** This chapter is based on the Coq standard library module [MSetRBT], which can be found at [https://coq.inria.fr/distrib/current/stdlib/Coq.MSets.MSetRBT.html]. The design decisions for that module are described in the following paper: - Efficient Verified Red-Black Trees, by Andrew W. Appel, September 2011. Available from http://www.cs.princeton.edu/~appel/papers/redblack.pdf. *) From Coq Require Import String. From Coq Require Import Logic.FunctionalExtensionality. From VFA Require Import Perm. From VFA Require Import Extract. Open Scope Z_scope. (* ################################################################# *) (** * Implementation *) (** [Section]s can have [Variable]s which are in scope throughout. The variables become arguments as needed. *) Section ValueType. Variable V : Type. Variable default : V. Definition key := int. Inductive color := Red | Black. Inductive tree : Type := | E : tree | T : color -> tree -> key -> V -> tree -> tree. Definition empty_tree : tree := E. (** [lookup] is the same as for BSTs. *) Fixpoint lookup (x: key) (t : tree) : V := match t with | E => default | T _ tl k v tr => if ltb x k then lookup x tl else if ltb k x then lookup x tr else v end. (** [insert] requires balancing the tree. *) Definition balance (rb : color) (t1 : tree) (k : key) (vk : V) (t2 : tree) : tree := match rb with | Red => T Red t1 k vk t2 | _ => match t1 with | T Red (T Red a x vx b) y vy c => T Red (T Black a x vx b) y vy (T Black c k vk t2) | T Red a x vx (T Red b y vy c) => T Red (T Black a x vx b) y vy (T Black c k vk t2) | a => match t2 with | T Red (T Red b y vy c) z vz d => T Red (T Black t1 k vk b) y vy (T Black c z vz d) | T Red b y vy (T Red c z vz d) => T Red (T Black t1 k vk b) y vy (T Black c z vz d) | _ => T Black t1 k vk t2 end end end. Fixpoint ins (x : key) (vx : V) (t : tree) : tree := match t with | E => T Red E x vx E | T c a y vy b => if ltb x y then balance c (ins x vx a) y vy b else if ltb y x then balance c a y vy (ins x vx b) else T c a x vx b end. Definition make_black (t : tree) : tree := match t with | E => E | T _ a x vx b => T Black a x vx b end. Definition insert (x : key) (vx : V) (t : tree) := make_black (ins x vx t). (** [elements] is the same as for BSTs. *) Fixpoint elements_tr (t : tree) (acc: list (key * V)) : list (key * V) := match t with | E => acc | T _ l k v r => elements_tr l ((k, v) :: elements_tr r acc) end. Definition elements (t : tree) : list (key * V) := elements_tr t []. (* ###################################################################### *) (** * Case-Analysis Automation *) (** Proofs about [insert] will need automation. Here's an example. *) Lemma ins_not_E : forall (x : key) (vx : V) (t : tree), ins x vx t <> E. Proof. intros. destruct t; simpl. discriminate. destruct (ltb x k). unfold balance. destruct c. discriminate. destruct (ins x vx t1). destruct t2. discriminate. (* Let's automate. *) match goal with | |- match ?c with Red => _ | Black => _ end <> _ => destruct c | |- match ?t with E => _ | T _ _ _ _ _ => _ end <> _=> destruct t end. repeat match goal with | |- match ?c with Red => _ | Black => _ end <> _ => destruct c | |- match ?t with E => _ | T _ _ _ _ _ => _ end <> _=> destruct t end. discriminate. match goal with | |- T _ _ _ _ _ <> E => discriminate end. Abort. Lemma ins_not_E : forall (x : key) (vx : V) (t : tree), ins x vx t <> E. Proof. intros. destruct t; simpl. - discriminate. - unfold balance. repeat match goal with | |- (if ?x then _ else _) <> _ => destruct x | |- match ?c with Red => _ | Black => _ end <> _=> destruct c | |- match ?t with E => _ | T _ _ _ _ _ => _ end <> _=> destruct t | |- T _ _ _ _ _ <> E => discriminate end. Qed. (* ###################################################################### *) (** * The BST Invariant *) (** Red-black trees must be BSTs. *) Fixpoint ForallT (P: int -> V -> Prop) (t: tree) : Prop := match t with | E => True | T c l k v r => P k v /\ ForallT P l /\ ForallT P r end. Inductive BST : tree -> Prop := | ST_E : BST E | ST_T : forall (c : color) (l : tree) (k : key) (v : V) (r : tree), ForallT (fun k' _ => (Abs k') < (Abs k)) l -> ForallT (fun k' _ => (Abs k') > (Abs k)) r -> BST l -> BST r -> BST (T c l k v r). Lemma empty_tree_BST: BST empty_tree. Proof. unfold empty_tree. constructor. Qed. (** Let's show that [insert] preserves the BST invariant, that is: *) Theorem insert_BST : forall t v k, BST t -> BST (insert k v t). Abort. (** It will take quite a bit of work, but automation will help. *) (** First, we show that if a non-empty tree would be a BST, then the balanced version of it is also a BST: *) Lemma balance_BST: forall (c : color) (l : tree) (k : key) (v : V) (r : tree), ForallT (fun k' _ => (Abs k') < (Abs k)) l -> ForallT (fun k' _ => (Abs k') > (Abs k)) r -> BST l -> BST r -> BST (balance c l k v r). Proof. intros c l k v r PL PR BL BR. unfold balance. repeat match goal with | |- BST (match ?c with Red => _ | Black => _ end) => destruct c | |- BST (match ?t with E => _ | T _ _ _ _ _ => _ end) => destruct t end. (* 58 cases remaining. *) - constructor. assumption. assumption. assumption. assumption. - constructor; auto. - constructor; auto. - (* Now the tree gets bigger, and the proof gets more complicated. *) constructor; auto. + simpl in *. repeat split. (* The intro pattern [?] means to let Coq choose the name. *) destruct PR as [? _]. omega. + simpl in *. repeat split. * inv BR. simpl in *. destruct H5 as [? _]. omega. * inv BR. simpl in *. destruct H5 as [_ [? _]]. auto. * inv BR. simpl in *. destruct H5 as [_ [_ ?]]. auto. + constructor; auto. + inv BR. inv H7. constructor; auto. - constructor; auto. - (* 53 cases remain. This could go on for awhile... *) Abort. (** Let's automate. *) Lemma balance_BST: forall (c : color) (l : tree) (k : key) (v : V) (r : tree), ForallT (fun k' _ => (Abs k') < (Abs k)) l -> ForallT (fun k' _ => (Abs k') > (Abs k)) r -> BST l -> BST r -> BST (balance c l k v r). Proof. intros. unfold balance. repeat (match goal with | |- BST (match ?c with Red => _ | Black => _ end) => destruct c | |- BST (match ?t with E => _ | T _ _ _ _ _ => _ end) => destruct t | |- ForallT _ (T _ _ _ _ _) => repeat split | H: ForallT _ (T _ _ _ _ _) |- _ => destruct H as [? [? ?] ] | H: BST (T _ _ _ _ _) |- _ => inv H end; (try constructor; auto; try omega)). (** 41 cases remain. They're about inequalities. *) Abort. (** To make progress, we can set up some helper lemmas. *) Lemma ForallT_imp : forall (P Q : int -> V -> Prop) t, ForallT P t -> (forall k v, P k v -> Q k v) -> ForallT Q t. Proof. induction t; intros. - auto. - destruct H as [? [? ?]]. repeat split; auto. Qed. Lemma ForallT_greater : forall t k k0, ForallT (fun k' _ => Abs k' > Abs k) t -> Abs k > Abs k0 -> ForallT (fun k' _ => Abs k' > Abs k0) t. Proof. intros. eapply ForallT_imp; eauto. intros. simpl in H1. omega. Qed. Lemma ForallT_less : forall t k k0, ForallT (fun k' _ => Abs k' < Abs k) t -> Abs k < Abs k0 -> ForallT (fun k' _ => Abs k' < Abs k0) t. Proof. intros; eapply ForallT_imp; eauto. intros. simpl in H1. omega. Qed. (** Now we can return to automating the proof. *) Lemma balance_BST: forall (c : color) (l : tree) (k : key) (v : V) (r : tree), ForallT (fun k' _ => (Abs k') < (Abs k)) l -> ForallT (fun k' _ => (Abs k') > (Abs k)) r -> BST l -> BST r -> BST (balance c l k v r). Proof. intros. unfold balance. repeat (match goal with | |- BST (match ?c with Red => _ | Black => _ end) => destruct c | |- BST (match ?s with E => _ | T _ _ _ _ _ => _ end) => destruct s | |- ForallT _ (T _ _ _ _ _) => repeat split | H: ForallT _ (T _ _ _ _ _) |- _ => destruct H as [? [? ?] ] | H: BST (T _ _ _ _ _) |- _ => inv H end; (try constructor; auto; try omega)). (* [all: t] applies [t] to every subgoal. *) all: try eapply ForallT_greater; try eapply ForallT_less; eauto; try omega. Qed. (* ...after several exercises... *) (** **** Exercise: 2 stars, standard (insert_BST) *) Theorem insert_BST : forall t v k, BST t -> BST (insert k v t). Proof. (* FILL IN HERE *) Admitted. (** [] *) (* ###################################################################### *) (** * Verification *) (** We now verify the equational specification of maps holds for red-black trees: lookup k empty_tree = default lookup k (insert k v t) = v lookup k' (insert k v t) = lookup k' t if k <> k' The first equation is trivial to verify. *) Lemma lookup_empty : forall k, lookup k empty_tree = default. Proof. auto. Qed. (** The next two equations are more challenging because of [balance]. *) (** ...after several helper lemmas... *) (** **** Exercise: 3 stars, standard (lookup_insert) *) Theorem lookup_insert_eq : forall (t : tree) (k : key) (v : V), BST t -> lookup k (insert k v t) = v. Proof. (* FILL IN HERE *) Admitted. Theorem lookup_insert_neq: forall (t : tree) (k k' : key) (v : V), BST t -> k <> k' -> lookup k' (insert k v t) = lookup k' t. Proof. (* FILL IN HERE *) Admitted. (** [] *) (* ###################################################################### *) (** * Efficiency *) (** Red-black trees must also satisfy the _red-black invariants_: *) (** - Local Invariant: No red node has a red child. - Global Invariant: Every path from the root to a leaf has the same number of black nodes. *) (** These ensure that the tree remains approximately balanced. *) (** Proposition [RB t c n] holds when [t] satisfies the red-black invariants, assuming that [c] is the color of [t]'s parent, and [n] is the black height that [t] is supposed to have. *) Inductive RB : tree -> color -> nat -> Prop := | RB_leaf: forall (c : color), RB E c 0 | RB_r: forall (l r : tree) (k : key) (v : V) (n : nat), RB l Red n -> RB r Red n -> RB (T Red l k v r) Black n | RB_b: forall (c : color) (l r : tree) (k : key) (v : V) (n : nat), RB l Black n -> RB r Black n -> RB (T Black l k v r) c (S n). (** ...after many helper lemmas... *) (** **** Exercise: 2 stars, standard (insert_RB) *) (** Prove that [insert] maintains the [RB] invariant. *) Lemma insert_RB : forall (t : tree) (k : key) (v : V) (n : nat), RB t Red n -> exists (n' : nat), RB (insert k v t) Red n'. Proof. (* FILL IN HERE *) Admitted. (** [] *) End ValueType. (* ################################################################# *) (** * Performance of Extracted Code *) (** We can extract the red-black tree implementation: *) Extraction "redblack.ml" empty_tree insert lookup elements. (** Run it in the OCaml top level with these commands: #use "redblack.ml";; #use "test_searchtree.ml";; On a recent machine with a 2.9 GHz Intel Core i9 that prints: Insert and lookup 1000000 random integers in 0.860663 seconds. Insert and lookup 20000 random integers in 0.007908 seconds. Insert and lookup 20000 consecutive integers in 0.004668 seconds. That execution uses the bytecode interpreter. The native compiler will have better performance: $ ocamlopt -c redblack.mli redblack.ml $ ocamlopt redblack.cmx -open Redblack test_searchtree.ml -o test_redblack $ ./test_redblack On the same machine that prints, Insert and lookup 1000000 random integers in 0.475669 seconds. Insert and lookup 20000 random integers in 0.00312 seconds. Insert and lookup 20000 consecutive integers in 0.001183 seconds. *) (** The benchmark measurements above (and in [Extract]) demonstrate the following: - On random insertions, red-black trees are about the same as ordinary BSTs. - On consecutive insertions, red-black trees are _much_ faster than ordinary BSTs. - Red-black trees are about as fast on consecutive insertions as on random. *)