Require Import Coq.Program.Equality.
Require Import Coq.Init.Logic.
Require Import List.
Require Import Arith.
Require Import Eqdep.
Unset Automatic Introduction.
Set Implicit Arguments.
Module Type MATCHER_ARG.
Require Import Coq.Init.Logic.
Require Import List.
Require Import Arith.
Require Import Eqdep.
Unset Automatic Introduction.
Set Implicit Arguments.
Module Type MATCHER_ARG.
Our development will be parameterized over a type of characters that supports
decidable equality.
Parameter char_p : Set.
Parameter char_eq : forall (c1 c2:char_p), {c1=c2} + {c1<>c2}.
End MATCHER_ARG.
Module Matcher(MA : MATCHER_ARG).
Import MA.
Parameter char_eq : forall (c1 c2:char_p), {c1=c2} + {c1<>c2}.
End MATCHER_ARG.
Module Matcher(MA : MATCHER_ARG).
Import MA.
Regular expression abstract syntax
Inductive regexp : Set :=
| Any : regexp
| Char (c: char_p) : regexp
| Eps : regexp
| Cat (r1 r2: regexp) : regexp
| Alt (r1 r2: regexp) : regexp
| Zero : regexp
| Star (r: regexp) : regexp.
Definition regexp_eq : forall (r1 r2:regexp), {r1=r2} + {r1<>r2}.
intros.
decide equality. apply char_eq.
Defined.
| Any : regexp
| Char (c: char_p) : regexp
| Eps : regexp
| Cat (r1 r2: regexp) : regexp
| Alt (r1 r2: regexp) : regexp
| Zero : regexp
| Star (r: regexp) : regexp.
Definition regexp_eq : forall (r1 r2:regexp), {r1=r2} + {r1<>r2}.
intros.
decide equality. apply char_eq.
Defined.
A simplification tactic used through the development
Ltac mysimp :=
simpl in * ; intros ;
repeat match goal with
| [ |- context[char_eq ?x ?y] ] => destruct (char_eq x y) ; auto
| [ |- _ /\ _ ] => split
| [ H : existT ?f ?t ?x = existT ?f ?t ?y |- _ ] =>
generalize (inj_pairT2 _ f t x y H) ; clear H ; intro H ; subst
| [ H : _ /\ _ |- _ ] => destruct H
| [ |- context[ _ ++ nil ] ] => rewrite <- app_nil_end
| [ H : exists x, _ |- _ ] => destruct H
| [ H : _ \/ _ |- _] => destruct H
| [ H : _ <-> _ |- _] => destruct H
| [ |- _ <-> _ ] => split
| [ H : _::_ = _::_ |- _] => injection H ; clear H
| _ => idtac
end ; try (firstorder) ; auto.
simpl in * ; intros ;
repeat match goal with
| [ |- context[char_eq ?x ?y] ] => destruct (char_eq x y) ; auto
| [ |- _ /\ _ ] => split
| [ H : existT ?f ?t ?x = existT ?f ?t ?y |- _ ] =>
generalize (inj_pairT2 _ f t x y H) ; clear H ; intro H ; subst
| [ H : _ /\ _ |- _ ] => destruct H
| [ |- context[ _ ++ nil ] ] => rewrite <- app_nil_end
| [ H : exists x, _ |- _ ] => destruct H
| [ H : _ \/ _ |- _] => destruct H
| [ H : _ <-> _ |- _] => destruct H
| [ |- _ <-> _ ] => split
| [ H : _::_ = _::_ |- _] => injection H ; clear H
| _ => idtac
end ; try (firstorder) ; auto.
Simplification followed by substitution.
Ltac s := repeat (mysimp ; subst).
Semantics for regexp
Now we can give a semantic interpretation to the regexps. In essence, we are giving an interpretation as (inductively-generated) sets of strings. The notation is meant to suggest "Oxford" brackets, but is probably an unfortunate choice as it conflicts with the Ltac notation.
Reserved Notation "[[ r ]]" (at level 0).
Inductive in_regexp : regexp -> list char_p -> Prop :=
| Any_i : forall c, [[ Any ]] (c::nil)
| Char_i : forall c, [[ Char c ]] (c::nil)
| Eps_i : [[ Eps ]] nil
| Alt_left_i : forall (r1 r2:regexp) cs,
[[ r1 ]] cs -> [[ Alt r1 r2 ]] cs
| Alt_right_i : forall (r1 r2:regexp) cs,
[[ r2 ]] cs -> [[ Alt r1 r2 ]] cs
| Cat_i : forall (r1 r2:regexp) cs1 cs2,
[[ r1 ]] cs1 -> [[ r2 ]] cs2 -> [[ Cat r1 r2 ]] (cs1++cs2)
| Star_eps_i : forall (r:regexp), [[ Star r ]] nil
| Star_cat_i : forall (r:regexp) cs1 cs2,
[[ r ]] cs1 -> [[ Star r ]] cs2 -> [[ Star r ]] (cs1++cs2)
where "[[ r ]]" := (in_regexp r).
Hint Constructors in_regexp : dfa.
Inductive in_regexp : regexp -> list char_p -> Prop :=
| Any_i : forall c, [[ Any ]] (c::nil)
| Char_i : forall c, [[ Char c ]] (c::nil)
| Eps_i : [[ Eps ]] nil
| Alt_left_i : forall (r1 r2:regexp) cs,
[[ r1 ]] cs -> [[ Alt r1 r2 ]] cs
| Alt_right_i : forall (r1 r2:regexp) cs,
[[ r2 ]] cs -> [[ Alt r1 r2 ]] cs
| Cat_i : forall (r1 r2:regexp) cs1 cs2,
[[ r1 ]] cs1 -> [[ r2 ]] cs2 -> [[ Cat r1 r2 ]] (cs1++cs2)
| Star_eps_i : forall (r:regexp), [[ Star r ]] nil
| Star_cat_i : forall (r:regexp) cs1 cs2,
[[ r ]] cs1 -> [[ Star r ]] cs2 -> [[ Star r ]] (cs1++cs2)
where "[[ r ]]" := (in_regexp r).
Hint Constructors in_regexp : dfa.
Equivalence of regular expression parsers.
Definition reg_eq (r1 r2: regexp) : Prop := forall cs, [[r1]] cs <-> [[r2]] cs.
Infix "[=]" := reg_eq (right associativity, at level 85).
Infix "[=]" := reg_eq (right associativity, at level 85).
Reflexivity
Lemma reg_eq_refl : forall (r:regexp), r [=] r.
unfold reg_eq. tauto.
Qed.
Hint Resolve reg_eq_refl : dfa.
unfold reg_eq. tauto.
Qed.
Hint Resolve reg_eq_refl : dfa.
Transitivity
Lemma reg_eq_trans : forall (r1 r2 r3: regexp), r1 [=] r2 -> r2 [=] r3 -> r1 [=] r3.
unfold reg_eq. mysimp.
Qed.
unfold reg_eq. mysimp.
Qed.
Symmetry
Lemma reg_eq_sym : forall (r1 r2: regexp), r1 [=] r2 -> r2 [=] r1.
unfold reg_eq ; mysimp.
Qed.
Ltac in_inv :=
match goal with
| [ H : in_regexp Any _ |- _] => inversion H ; clear H
| [ H : in_regexp (Char _) _ |- _ ] => inversion H ; clear H ; subst ; mysimp
| [ H : in_regexp Eps _ |- _] => inversion H ; clear H
| [ H : in_regexp Zero _ |- _] => inversion H
| [ H : in_regexp (Cat _ _) _ |- _] => inversion H ; clear H
| [ H : in_regexp (Alt _ _) _ |- _] => inversion H ; clear H
end ; subst.
unfold reg_eq ; mysimp.
Qed.
Ltac in_inv :=
match goal with
| [ H : in_regexp Any _ |- _] => inversion H ; clear H
| [ H : in_regexp (Char _) _ |- _ ] => inversion H ; clear H ; subst ; mysimp
| [ H : in_regexp Eps _ |- _] => inversion H ; clear H
| [ H : in_regexp Zero _ |- _] => inversion H
| [ H : in_regexp (Cat _ _) _ |- _] => inversion H ; clear H
| [ H : in_regexp (Alt _ _) _ |- _] => inversion H ; clear H
end ; subst.
A little tactic that tries to prove in_regexp.
Ltac pv_in :=
try in_inv ;
match goal with
| [ |- in_regexp (Cat _ _) (_ ++ _) ] => apply Cat_i ; auto
| [ |- in_regexp (Cat _ _) nil ] =>
let H := fresh "H" in
assert (H:@nil char_p= nil ++ nil) ; [auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| [ |- in_regexp (Cat _ _) (?c :: ?x ++ ?y) ] =>
let H := fresh "H" in assert (H: c::x++y = (c::x) ++ y) ;
[ auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| _ => auto with dfa
end.
try in_inv ;
match goal with
| [ |- in_regexp (Cat _ _) (_ ++ _) ] => apply Cat_i ; auto
| [ |- in_regexp (Cat _ _) nil ] =>
let H := fresh "H" in
assert (H:@nil char_p= nil ++ nil) ; [auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| [ |- in_regexp (Cat _ _) (?c :: ?x ++ ?y) ] =>
let H := fresh "H" in assert (H: c::x++y = (c::x) ++ y) ;
[ auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| _ => auto with dfa
end.
Cat r Eps is equivalent to r
Lemma cat_eps_r : forall r, (Cat r Eps) [=] r.
Proof.
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite (app_nil_end cs). repeat pv_in.
Qed.
Proof.
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite (app_nil_end cs). repeat pv_in.
Qed.
Cat Eps r is equivalent to r
Lemma cat_eps_l : forall r, (Cat Eps r) [=] r.
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite <- (app_nil_l cs). repeat pv_in.
Qed.
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite <- (app_nil_l cs). repeat pv_in.
Qed.
Cat r Zero is equivalent to Zero
Lemma cat_zero_r : forall r, (Cat r Zero) [=] Zero.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
Cat Zero r is equivalent to Zero
Lemma cat_zero_l : forall r, (Cat Zero r) [=] Zero.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
An optimized constructor for Cat.
Definition OptCat (r1 r2: regexp) :=
match r1, r2 with
| Zero, _ => Zero
| Eps, r2 => r2
| _, Zero => Zero
| r1, Eps => r1
| r1, r2 => Cat r1 r2
end.
Lemma opt_cat : forall r1 r2, OptCat r1 r2 [=] Cat r1 r2.
intros. apply reg_eq_sym ;
destruct r1 ; intros ; simpl ;
(apply cat_zero_l || apply cat_eps_l || idtac) ;
destruct r2 ; simpl ;
(apply cat_zero_r || apply cat_eps_r || apply reg_eq_refl).
Qed.
match r1, r2 with
| Zero, _ => Zero
| Eps, r2 => r2
| _, Zero => Zero
| r1, Eps => r1
| r1, r2 => Cat r1 r2
end.
Lemma opt_cat : forall r1 r2, OptCat r1 r2 [=] Cat r1 r2.
intros. apply reg_eq_sym ;
destruct r1 ; intros ; simpl ;
(apply cat_zero_l || apply cat_eps_l || idtac) ;
destruct r2 ; simpl ;
(apply cat_zero_r || apply cat_eps_r || apply reg_eq_refl).
Qed.
Alt r Zero is equivalent to r.
Lemma alt_zero_r : forall r, (Alt r Zero) [=] r.
unfold reg_eq ; intros ; mysimp ; repeat pv_in.
Qed.
unfold reg_eq ; intros ; mysimp ; repeat pv_in.
Qed.
Alt Zero r is equivalent to r.
Lemma alt_zero_l : forall r, (Alt Zero r) [=] r.
unfold reg_eq ; intros ; mysimp ; repeat pv_in.
Qed.
unfold reg_eq ; intros ; mysimp ; repeat pv_in.
Qed.
Alt r r is equivalent to r.
Lemma alt_r_r : forall r, (Alt r r) [=] r.
unfold reg_eq ; intros ; mysimp ; repeat pv_in.
Qed.
unfold reg_eq ; intros ; mysimp ; repeat pv_in.
Qed.
Optimized version of Alt.
Definition OptAlt (r1 r2:regexp) : regexp :=
match r1, r2 with
| Zero, r2 => r2
| r1, Zero => r1
| r1, r2 => if regexp_eq r1 r2 then r1 else Alt r1 r2
end.
match r1, r2 with
| Zero, r2 => r2
| r1, Zero => r1
| r1, r2 => if regexp_eq r1 r2 then r1 else Alt r1 r2
end.
OptAlt r1 r2 is equivalent to Alt r1 r2
Lemma opt_alt : forall r1 r2, OptAlt r1 r2 [=] Alt r1 r2.
intros. apply reg_eq_sym.
destruct r1 ; (apply alt_zero_l || idtac) ;
destruct r2 ; (apply alt_zero_r || apply alt_r_r || apply reg_eq_refl || idtac) ;
match goal with
| [ |- _ [=] OptAlt (Char ?c1) (Char ?c2)] =>
simpl ; destruct (regexp_eq (Char c1) (Char c2));
destruct (char_eq c1 c2) ; subst ; simpl ;
try apply alt_r_r ; apply reg_eq_refl
| [ |- _ [=] OptAlt (_ ?r1 ?r2) (_ ?r3 ?r4) ] =>
simpl ; destruct (regexp_eq r1 r3) ; subst ;
try apply reg_eq_refl ;
destruct (regexp_eq r2 r4) ; subst ; simpl ;
try apply alt_r_r ; apply reg_eq_refl
| [ |- _ [=] OptAlt (Star ?r1) (Star ?r2) ] =>
simpl ; destruct (regexp_eq r1 r2) ; subst ;
try apply alt_r_r ; apply reg_eq_refl
end.
Qed.
intros. apply reg_eq_sym.
destruct r1 ; (apply alt_zero_l || idtac) ;
destruct r2 ; (apply alt_zero_r || apply alt_r_r || apply reg_eq_refl || idtac) ;
match goal with
| [ |- _ [=] OptAlt (Char ?c1) (Char ?c2)] =>
simpl ; destruct (regexp_eq (Char c1) (Char c2));
destruct (char_eq c1 c2) ; subst ; simpl ;
try apply alt_r_r ; apply reg_eq_refl
| [ |- _ [=] OptAlt (_ ?r1 ?r2) (_ ?r3 ?r4) ] =>
simpl ; destruct (regexp_eq r1 r3) ; subst ;
try apply reg_eq_refl ;
destruct (regexp_eq r2 r4) ; subst ; simpl ;
try apply alt_r_r ; apply reg_eq_refl
| [ |- _ [=] OptAlt (Star ?r1) (Star ?r2) ] =>
simpl ; destruct (regexp_eq r1 r2) ; subst ;
try apply alt_r_r ; apply reg_eq_refl
end.
Qed.
Now we define the actual derivative-based recognizer.
Fixpoint null (r:regexp) : regexp :=
match r with
| Any => Zero
| Char _ => Zero
| Eps => Eps
| Zero => Zero
| Alt r1 r2 => OptAlt (null r1) (null r2)
| Cat r1 r2 => OptCat (null r1) (null r2)
| Star _ => Eps
end.
Definition accepts_null (r:regexp) := regexp_eq (null r) Eps.
match r with
| Any => Zero
| Char _ => Zero
| Eps => Eps
| Zero => Zero
| Alt r1 r2 => OptAlt (null r1) (null r2)
| Cat r1 r2 => OptCat (null r1) (null r2)
| Star _ => Eps
end.
Definition accepts_null (r:regexp) := regexp_eq (null r) Eps.
This is the heart of the algorithm. It returns a regexp denoting
{ cs | (c::cs) in r }.
Fixpoint deriv (r:regexp) (c:char_p) : regexp :=
match r with
| Any => Eps
| Char c' => if char_eq c c' then Eps else Zero
| Eps => Zero
| Zero => Zero
| Alt r1 r2 => OptAlt (deriv r1 c) (deriv r2 c)
| Cat r1 r2 => OptAlt (OptCat (deriv r1 c) r2) (OptCat (null r1) (deriv r2 c))
| Star r => OptCat (deriv r c) (Star r)
end.
match r with
| Any => Eps
| Char c' => if char_eq c c' then Eps else Zero
| Eps => Zero
| Zero => Zero
| Alt r1 r2 => OptAlt (deriv r1 c) (deriv r2 c)
| Cat r1 r2 => OptAlt (OptCat (deriv r1 c) r2) (OptCat (null r1) (deriv r2 c))
| Star r => OptCat (deriv r c) (Star r)
end.
This calculates the derivative of a regular expression with respect to a string.
Fixpoint derivs (r:regexp) (cs:list char_p) : regexp :=
match cs with
| nil => r
| c::cs' => derivs (deriv r c) cs'
end.
match cs with
| nil => r
| c::cs' => derivs (deriv r c) cs'
end.
To see if cs matches r, calculate the derivative of r with respect
to s, and see if the resulting regexp accepts the empty string.
Definition deriv_parse r cs :=
if accepts_null (derivs r cs) then true else false.
if accepts_null (derivs r cs) then true else false.
Tactic for helping to reason about the optimizing constructors.
Ltac pv_opt :=
match goal with
| [ |- in_regexp (OptAlt ?r1 ?r2) ?cs ] =>
apply (proj2 (opt_alt r1 r2 cs))
| [ |- in_regexp (OptCat ?r1 ?r2) ?cs ] =>
apply (proj2 (opt_cat r1 r2 cs))
| [ H : ?x ++ ?y = nil |- _] =>
generalize (app_eq_nil _ _ H) ; clear H ; mysimp
| [ H : nil = ?x ++ ?y |- _] =>
generalize (app_eq_nil _ _ (eq_sym H)) ; clear H ; mysimp
| [ H : in_regexp (OptCat ?r1 ?r2) ?cs |- _] =>
generalize (proj1 (opt_cat r1 r2 cs) H) ; clear H ; intro H
| [ H : in_regexp (OptAlt ?r1 ?r2) ?cs |- _] =>
generalize (proj1 (opt_alt r1 r2 cs) H) ; clear H ; intro H
| [ H : (_,_) = (_,_) |- _ ] => injection H ; clear H ; mysimp
| [ H : ?cs1 ++ ?cs2 = ?c :: ?cs |- _ ] =>
destruct cs1 ; simpl in H ; subst ; [ idtac | injection H ; intros ; subst ]
| _ => eauto with dfa
end.
match goal with
| [ |- in_regexp (OptAlt ?r1 ?r2) ?cs ] =>
apply (proj2 (opt_alt r1 r2 cs))
| [ |- in_regexp (OptCat ?r1 ?r2) ?cs ] =>
apply (proj2 (opt_cat r1 r2 cs))
| [ H : ?x ++ ?y = nil |- _] =>
generalize (app_eq_nil _ _ H) ; clear H ; mysimp
| [ H : nil = ?x ++ ?y |- _] =>
generalize (app_eq_nil _ _ (eq_sym H)) ; clear H ; mysimp
| [ H : in_regexp (OptCat ?r1 ?r2) ?cs |- _] =>
generalize (proj1 (opt_cat r1 r2 cs) H) ; clear H ; intro H
| [ H : in_regexp (OptAlt ?r1 ?r2) ?cs |- _] =>
generalize (proj1 (opt_alt r1 r2 cs) H) ; clear H ; intro H
| [ H : (_,_) = (_,_) |- _ ] => injection H ; clear H ; mysimp
| [ H : ?cs1 ++ ?cs2 = ?c :: ?cs |- _ ] =>
destruct cs1 ; simpl in H ; subst ; [ idtac | injection H ; intros ; subst ]
| _ => eauto with dfa
end.
Useful for some rewriting steps below.
Lemma app_nil_beg : forall A (cs:list A), cs = nil ++ cs.
auto.
Qed.
Lemma app_cons : forall A (x:A) (y z:list A), x :: y ++ z = (x::y) ++ z.
auto.
Qed.
auto.
Qed.
Lemma app_cons : forall A (x:A) (y z:list A), x :: y ++ z = (x::y) ++ z.
auto.
Qed.
r accepts the empty string iff null r accepts the empty string.
Lemma Null1 : forall r, [[r]] nil -> [[null r]] nil.
induction r ; simpl ; intros ; repeat (in_inv ; pv_opt) ; eauto with dfa ;
pv_opt ; subst ; auto with dfa.
Qed.
Hint Resolve Null1.
Lemma Null2 : forall r, [[null r]] nil -> [[r]] nil.
induction r ; simpl ; intros ; repeat in_inv ; repeat pv_opt ; repeat in_inv ;
eauto with dfa ; pv_opt ; subst ; eauto with dfa.
Qed.
Hint Resolve Null2.
induction r ; simpl ; intros ; repeat (in_inv ; pv_opt) ; eauto with dfa ;
pv_opt ; subst ; auto with dfa.
Qed.
Hint Resolve Null1.
Lemma Null2 : forall r, [[null r]] nil -> [[r]] nil.
induction r ; simpl ; intros ; repeat in_inv ; repeat pv_opt ; repeat in_inv ;
eauto with dfa ; pv_opt ; subst ; eauto with dfa.
Qed.
Hint Resolve Null2.
If null r matches cs, then cs must be empty
Lemma NullNil : forall r cs, [[null r]] cs -> cs = nil.
Proof.
induction r ; simpl ; intros ; try in_inv ; auto ; pv_opt ; try in_inv ; repeat
match goal with
| [ IHr : forall cs, in_regexp (null ?r) _ -> _ = _,
H : in_regexp (null ?r) _ |- _] =>
rewrite (IHr _ H) ; clear IHr ; auto
end.
Qed.
Proof.
induction r ; simpl ; intros ; try in_inv ; auto ; pv_opt ; try in_inv ; repeat
match goal with
| [ IHr : forall cs, in_regexp (null ?r) _ -> _ = _,
H : in_regexp (null ?r) _ |- _] =>
rewrite (IHr _ H) ; clear IHr ; auto
end.
Qed.
deriv is correct part 1.
Lemma Deriv1 : forall r c cs, [[r]] (c::cs) -> [[deriv r c]] cs.
Proof.
induction r ; simpl ; intros ; (repeat in_inv) ; (repeat pv_opt) ;
match goal with
| [ H : in_regexp _ nil |- in_regexp (Alt _ _) ?cs ] =>
apply Alt_right_i ; rewrite (app_nil_beg cs)
| [ |- in_regexp (Alt _ _) _ ] => apply Alt_left_i
| [ H : in_regexp (Star _) _ |- _ ] => idtac
end ; repeat pv_opt.
remember (Star r) as r1 ; remember (c::cs) as cs1 ;
generalize Heqr1 Heqcs1 ; clear Heqr1 Heqcs1 ;
induction H ; intros ; try congruence ;
injection Heqr1 ; intros ; subst ; destruct cs1 ; simpl in * ; subst ;
[eapply IHin_regexp2 ; auto | injection Heqcs1 ; intros ; subst; auto with dfa].
Qed.
Proof.
induction r ; simpl ; intros ; (repeat in_inv) ; (repeat pv_opt) ;
match goal with
| [ H : in_regexp _ nil |- in_regexp (Alt _ _) ?cs ] =>
apply Alt_right_i ; rewrite (app_nil_beg cs)
| [ |- in_regexp (Alt _ _) _ ] => apply Alt_left_i
| [ H : in_regexp (Star _) _ |- _ ] => idtac
end ; repeat pv_opt.
remember (Star r) as r1 ; remember (c::cs) as cs1 ;
generalize Heqr1 Heqcs1 ; clear Heqr1 Heqcs1 ;
induction H ; intros ; try congruence ;
injection Heqr1 ; intros ; subst ; destruct cs1 ; simpl in * ; subst ;
[eapply IHin_regexp2 ; auto | injection Heqcs1 ; intros ; subst; auto with dfa].
Qed.
deriv is correct part 2.
Lemma Deriv2 : forall r c cs, [[deriv r c]] cs -> [[r]] (c::cs).
Proof.
induction r ; simpl ; intros ; try in_inv ; auto with dfa ;
repeat match goal with
| [ H : context[char_eq ?c1 ?c2] |- _ ] =>
destruct (char_eq c1 c2) ; subst ; try congruence ; in_inv ; auto with dfa
| [ H1: in_regexp (deriv ?r1 _) _, H2 : in_regexp ?r2 _ |-
in_regexp (Cat ?r1 ?r2) (_ :: _ ++ _) ] => rewrite app_cons ; auto with dfa
| [ H1 : in_regexp (null ?r1) _ |- _ ] =>
generalize (NullNil _ H1) ; intros ; subst ; rewrite app_nil_beg ; auto with dfa
| [ |- in_regexp (Star _) (_::_++_) ] => rewrite app_cons ; eauto with dfa
| _ => pv_opt ; in_inv ; auto with dfa
end.
Qed.
Proof.
induction r ; simpl ; intros ; try in_inv ; auto with dfa ;
repeat match goal with
| [ H : context[char_eq ?c1 ?c2] |- _ ] =>
destruct (char_eq c1 c2) ; subst ; try congruence ; in_inv ; auto with dfa
| [ H1: in_regexp (deriv ?r1 _) _, H2 : in_regexp ?r2 _ |-
in_regexp (Cat ?r1 ?r2) (_ :: _ ++ _) ] => rewrite app_cons ; auto with dfa
| [ H1 : in_regexp (null ?r1) _ |- _ ] =>
generalize (NullNil _ H1) ; intros ; subst ; rewrite app_nil_beg ; auto with dfa
| [ |- in_regexp (Star _) (_::_++_) ] => rewrite app_cons ; eauto with dfa
| _ => pv_opt ; in_inv ; auto with dfa
end.
Qed.
Same thing except for derivatives of strings.
Lemma Derivs1 cs r : [[derivs r cs]] nil -> [[r]] cs.
Proof.
induction cs ; simpl ; intros ; auto ; apply Deriv2 ; auto.
Qed.
Lemma Derivs2 cs r : [[r]] cs -> [[derivs r cs]] nil.
Proof.
induction cs ; simpl ; intros ; auto ; apply IHcs. apply Deriv1 ; auto.
Qed.
Proof.
induction cs ; simpl ; intros ; auto ; apply Deriv2 ; auto.
Qed.
Lemma Derivs2 cs r : [[r]] cs -> [[derivs r cs]] nil.
Proof.
induction cs ; simpl ; intros ; auto ; apply IHcs. apply Deriv1 ; auto.
Qed.
null r returns Eps or Zero
Lemma NullEpsOrZero : forall r, {null r = Eps} + {null r = Zero}.
induction r ; simpl ; try (right ; auto; fail) ; try (left ; auto; fail) ;
destruct IHr1 ; destruct IHr2 ; rewrite e; rewrite e0 ; simpl ; auto.
Qed.
induction r ; simpl ; try (right ; auto; fail) ; try (left ; auto; fail) ;
destruct IHr1 ; destruct IHr2 ; rewrite e; rewrite e0 ; simpl ; auto.
Qed.
If null r = Eps, then r accepts the empty string.
Lemma AccNull : forall r, null r = Eps -> [[r]] nil.
Proof.
induction r ; simpl ; intros ; try congruence ; auto with dfa.
destruct (null r1) ; simpl in * ; try congruence ; destruct (null r2) ;
simpl in * ; try congruence.
rewrite app_nil_beg. auto with dfa.
destruct (NullEpsOrZero r1). apply Alt_left_i. auto.
rewrite e in *. simpl in *. apply Alt_right_i. auto.
Qed.
Proof.
induction r ; simpl ; intros ; try congruence ; auto with dfa.
destruct (null r1) ; simpl in * ; try congruence ; destruct (null r2) ;
simpl in * ; try congruence.
rewrite app_nil_beg. auto with dfa.
destruct (NullEpsOrZero r1). apply Alt_left_i. auto.
rewrite e in *. simpl in *. apply Alt_right_i. auto.
Qed.
Correctness of the derivative matcher part 1
Lemma DerivParse1 : forall r cs, deriv_parse r cs = true -> [[r]] cs.
Proof.
unfold deriv_parse. intros. intros. destruct (accepts_null (derivs r cs)) ;
try congruence. generalize (AccNull _ e). apply Derivs1.
Qed.
Proof.
unfold deriv_parse. intros. intros. destruct (accepts_null (derivs r cs)) ;
try congruence. generalize (AccNull _ e). apply Derivs1.
Qed.
Correctness of the derivative matcher part 2
Lemma DerivParse2 : forall r cs, [[r]] cs -> deriv_parse r cs = true.
Proof.
intros. unfold deriv_parse. generalize (Derivs2 H).
intros. generalize (Null1 H0). unfold accepts_null.
generalize (NullEpsOrZero (derivs r cs)). intros. destruct H1 ;
rewrite e in *. destruct (regexp_eq Eps Eps) ; try congruence ; auto.
inversion H2.
Qed.
Proof.
intros. unfold deriv_parse. generalize (Derivs2 H).
intros. generalize (Null1 H0). unfold accepts_null.
generalize (NullEpsOrZero (derivs r cs)). intros. destruct H1 ;
rewrite e in *. destruct (regexp_eq Eps Eps) ; try congruence ; auto.
inversion H2.
Qed.
From this, we can build a decidable regexp matcher by running
the derivative-based parser.
Definition parse r cs : {[[r]] cs} + {~[[r]] cs}.
intros.
remember (deriv_parse r cs) as b.
generalize (DerivParse1 r cs).
generalize (@DerivParse2 r cs).
intros.
destruct b. left ; auto.
right. rewrite <- Heqb in *. intro. generalize (H H1). intro. congruence.
Qed.
End Matcher.
Set Extraction AccessOpaque.
Extraction Matcher.
intros.
remember (deriv_parse r cs) as b.
generalize (DerivParse1 r cs).
generalize (@DerivParse2 r cs).
intros.
destruct b. left ; auto.
right. rewrite <- Heqb in *. intro. generalize (H H1). intro. congruence.
Qed.
End Matcher.
Set Extraction AccessOpaque.
Extraction Matcher.
Exercises:
- Show that (Star (Star r)) is equivalent to (Star r) and incorporate this
in the optimizations.
- Show that (Star Eps) is equivalent to Eps.
- Following Bob Harper's JFP article, an expression r is "standard" if
whenever it contains a sub-expression (Star r'), then r' does not
accept the empty string. Write a function that converts an arbitrary
regular expression to an equivalent, standard regular-expression.
- An alternative to our derivative-based matcher is to build one based
on the list monad. That is, define an interp function of type
regexp -> list char_p -> list (list char_p) with the intution
that: