
structure Tokens = Tokens

type pos           = int
type svalue        = Tokens.svalue
type ('a,'b) token = ('a,'b) Tokens.token
type lexresult     = (svalue, pos) token


fun mkKW (kw, tk) = (kw, fn (p1:pos, p2:pos) => tk (p1, p2))


val keywords = map mkKW
[
  ("andalso",Tokens.KW_andalso),
  ("bool",   Tokens.KW_bool),
  ("char",   Tokens.KW_char),
  ("div",    Tokens.KW_div),
  ("else",   Tokens.KW_else),
  ("end",    Tokens.KW_end),
  ("fn",     Tokens.KW_fn),
  ("fun",    Tokens.KW_fun),
  ("if",     Tokens.KW_if),
  ("in",     Tokens.KW_in),
  ("int",    Tokens.KW_int),
  ("let",    Tokens.KW_let),
  ("list",   Tokens.KW_list),
  ("mod",    Tokens.KW_mod),
  ("not",    Tokens.KW_not),
  ("of",     Tokens.KW_of),
  ("orelse", Tokens.KW_orelse),
  ("real",   Tokens.KW_real),
  ("ref",    Tokens.KW_ref),
  ("true",   Tokens.KW_true),
  ("false",  Tokens.KW_false),
  ("string", Tokens.KW_string),
  ("then",   Tokens.KW_then),
  ("undefined", Tokens.KW_undefined),
  ("unit",   Tokens.KW_unit),
  ("val",    Tokens.KW_val)
]


fun findKeywords (str: string, pos1: pos, pos2: pos) =
  case List.find (fn (s, _) => s = str) keywords of
    NONE          => Tokens.ID(str, pos1, pos2)
  | SOME((_, tk)) => tk(pos1, pos2)


(* Line number counter. *)
val pos = ref 1

fun error (e,l : int,_) =
     (TextIO.output (TextIO.stdOut,
                     String.concat["line ", (Int.toString l), ": ", e, "\n"]);
      raise Fail "")

fun eof ()          = Tokens.EOF(!pos,!pos)

val charlist        = ref ([] : string list)

val commentNesting  = ref (0)

fun addString (s)   = (charlist :=  s::(!charlist))

fun makeString ()   = (concat(rev(!charlist)) before charlist:=[])


%%

%s STRING CHAR COMMENT;

%header (functor MiniMLLexFun(structure Tokens: MiniML_TOKENS));


eol        = ("\n"|"\013\n"|"\013");
alpha      = [A-Za-z];
caps       = [A-Z];
digit      = [0-9];
ws         = [\ \t \n];
allchar    = ({alpha}|{digit}|"_"|"'");
identifier = {alpha}{allchar}*;
integer    = "~"?{digit}+;
real       = "~"?{digit}*"."{digit}+;


%%

<INITIAL>{eol}          => (pos := (!pos) + 1; continue());
<INITIAL>{ws}+          => (continue());

<INITIAL>{identifier}   => (findKeywords (yytext, !pos, !pos));
<INITIAL>{integer}      => (Tokens.INT(valOf (Int.fromString (yytext)), !pos, !pos));
<INITIAL>{real}         => (Tokens.REAL(valOf (Real.fromString (yytext)), !pos, !pos));

<INITIAL>"#"\"          => (YYBEGIN CHAR; charlist:=[""]; continue ());
<INITIAL>"#"            => (Tokens.HASH(!pos,!pos));
<INITIAL>";"            => (Tokens.SEMICOLON(!pos,!pos));
<INITIAL>","            => (Tokens.COMMA(!pos,!pos));
<INITIAL>"@"            => (Tokens.APPEND(!pos, !pos));
<INITIAL>"::"           => (Tokens.CONS(!pos, !pos));
<INITIAL>":="           => (Tokens.ASSGN(!pos,!pos));
<INITIAL>":"            => (Tokens.COLON(!pos,!pos));
<INITIAL>"!"            => (Tokens.BANG (!pos,!pos));
<INITIAL>"(*"           => (YYBEGIN COMMENT; commentNesting := 0; continue ());
<INITIAL>"("            => (Tokens.LPAREN(!pos,!pos));
<INITIAL>")"            => (Tokens.RPAREN(!pos,!pos));
<INITIAL>"["            => (Tokens.LBRACKET(!pos, !pos));
<INITIAL>"]"            => (Tokens.RBRACKET(!pos, !pos));
<INITIAL>"*"            => (Tokens.TIMES(!pos,!pos));
<INITIAL>"+"            => (Tokens.PLUS(!pos,!pos));
<INITIAL>"/"            => (Tokens.SLASH(!pos, !pos));
<INITIAL>"->"           => (Tokens.ARROW(!pos,!pos));
<INITIAL>"-"            => (Tokens.MINUS(!pos,!pos));
<INITIAL>"=>"           => (Tokens.DARROW(!pos,!pos));
<INITIAL>"="            => (Tokens.EQSIGN(!pos,!pos));
<INITIAL>">"            => (Tokens.GREATER(!pos,!pos));
<INITIAL>">="           => (Tokens.GREATEREQ(!pos, !pos));
<INITIAL>"<"            => (Tokens.LESS(!pos,!pos));
<INITIAL>"<="           => (Tokens.LESSEQ(!pos, !pos));
<INITIAL>"~"            => (Tokens.NEG(!pos,!pos));
<INITIAL>"^"            => (Tokens.CARET(!pos,!pos));
<INITIAL>\"             => (YYBEGIN STRING; charlist := [""]; continue ());

<INITIAL>.              => (error ("bad character(s) " ^ yytext,!pos,!pos);
                            continue());

<CHAR>{eol}             => (YYBEGIN INITIAL;
                             error ("unterminated character constant",!pos,!pos);
                             pos := (!pos)+1;
                             continue ());
<CHAR>\"                => (YYBEGIN INITIAL;
                             let val c = makeString()
                             in
                               if String.size(c) = 1 then
                                   Tokens.CHAR (valOf (Char.fromString (c)),!pos,!pos)
                               else error("invalid character", !pos, !pos)
                             end);
<CHAR>"\\\""            => (addString "\""; continue() );
<CHAR>.                 => (addString (yytext); continue ());


<STRING>{eol}           => (YYBEGIN INITIAL;
                             error ("unterminated string constant",!pos,!pos);
                                    pos := (!pos)+1; (* charlist := "";*)
                                    continue() );
<STRING>\"              => (YYBEGIN INITIAL;
                            Tokens.STRING (makeString(), !pos,!pos));
<STRING>"\\\""          => (addString "\""; continue());
<STRING>"\\n"           => (addString "\n"; continue());
<STRING>"\\t"           => (addString "\t"; continue());
<STRING>"\\r"           => (addString "\r"; continue());
<STRING>.               => (addString (yytext); continue ());
                                                                                                                                                          
                                                                                                                                                          
<COMMENT>"*)"           => (if (!commentNesting)=0
                             then (YYBEGIN INITIAL;
                                   continue ())
                             else (commentNesting := (!commentNesting)-1;
                                   continue ()));
<COMMENT>"(*"           => (commentNesting := (!commentNesting)+1;
                            continue ());
<COMMENT>.              => (continue());

