001    /* $Id $*/
002    /* Copyright 2000, 2001, Compaq Computer Corporation */
003    
004    package javafe.parser;
005    
006    import java.io.IOException;
007    import java.util.Hashtable;
008    
009    import javafe.ast.Identifier;
010    import javafe.ast.PrettyPrint;
011    import javafe.ast.LexicalPragma;
012    import javafe.ast.LexicalPragmaVec;
013    import javafe.ast._SpecialParserInterface;
014    
015    import javafe.util.Assert;
016    import javafe.util.ErrorSet;
017    import javafe.util.CorrelatedReader;
018    import javafe.util.Location;
019    
020    /**
021    
022     A <TT>Lex</TT> object generates a sequence of Java "input elements"
023     (that is, tokens) by converting the sequence of input characters and
024     line terminators generated by an underlying
025     <code>CorrelatedReader</code>.
026    
027     <p> The conversion of input characters occurs according to the lexical
028     rules in Chapter Three of the <cite>Java Language
029     Specification</cite>.  This specification describes three lexical
030     translation steps: the first two steps translate a raw Unicode input
031     stream into a "cooked" one in which Unicode escape characters from the
032     raw stream have been processed and in which line terminators have been
033     identified; the last step translates this cooked stream into a
034     sequence of Java "input elements" (comments, white space, identifiers,
035     tokens, literals, and punctuation).  <code>Lex</code> objects perform
036     the last of these translations steps; the first two are performed by
037     an underlying <code>CorrelatedReader</code> that is given to the
038     <code>Lex</code> object as the source of input characters.
039    
040     <p> Before a newly-created <code>Lex</code> object can be used, its
041     <code>restart</code> method must be called giving a
042     <code>CorrelatedReader</code> to scan.  At any point, a
043     <code>Lex</code> can be restarted on a different underlying reader.
044    
045     <p> The <TT>Lex</TT> class is thread safe, but instances of
046     <TT>Lex</TT> are not.  That is, two different threads may safely
047     access two different instances of <TT>Lex</TT> concurrently, but they
048     may not access the same instance of <TT>Lex</TT> concurrently.
049    
050    
051     <h3> Simple scanning </h3>
052    
053     <p> The <code>getNextToken</code> method of <code>Lex</code> objects
054     returns the translated token sequence, one token at a time.  It
055     discards white space, and it processes comments as described below.
056     In addition, <code>getNextToken</code> fills in the <code>Token</code>
057     fields of <code>this</code> (<code>Lex</code> is a subclass of
058     <code>Token</code>); for example, <code>ttype</code> gets an integer
059     code defining the type of token returned and <code>startingLoc</code>
060     gives the location of the first character making up the token.  If the
061     token is an identifier, <code>identifierVal</code> indicates which
062     one; if the token is a literal, <code>auxVal</code> gives its value.
063    
064     <p> <code>Lex</code> objects report errors by calling both the
065     <code>fatal</code> and <code>error</code> methods of
066     <code>ErrorSet</code>.  The fatal errors are unexpected characters,
067     unterminated comments and string and character literals, and IO errors
068     in the underlying input stream.  Recoverable errors are overflows in
069     literals (including overflows in octal escape sequences), non-octal
070     digits in integer literals, the string <code>0x</code> (interpreted as
071     a malformed integer literal), missing digits in a floating-point
072     exponent, bad escape sequences in character and string literals,
073     character literals containing no or multiple characters, and the
074     character literal <code>'''</code> (interpreted as <code>'\''</code>).
075    
076    
077     <h3> Lookahead </h3>
078    
079     <code>Lex</code> objects allow their clients to peek ahead into the
080     token stream by calling <code>lookahead</code>.  This method returns
081     the token code for the future token, but it does not affect the
082     <code>Token</code> fields of <code>this</code>.
083    
084     <p> If a call to <code>lookahead</code> needs to look past the set of
085     tokens already scanned, and those tokens have errors, then the errors
086     are reported immediately.
087    
088    
089     <h3> Extensibility: Keywords, punctuation </h3>
090    
091     <p> A keyword is a Java identifier with a special token code.
092     Ordinarily, identifiers are associated with the code
093     <code>TagConstants.IDENT</code>.  Keywords, while matching the lexical
094     grammar of identifiers, are associated with different codes.  In fact,
095     each keyword is typically associated with its own code.
096    
097     <p> The set of identifiers that a <code>Lex</code> object recognizes
098     as keywords is extensible.  A keyword is added to a <code>Lex</code>
099     object by calling the <code>addKeyword</code> method.  As a
100     convenience, a boolean given to the <code>Lex</code> constructor
101     indicates whether a newly-constructed <code>Lex</code> object should
102     automatically have all Java keywords added to it.
103    
104     <p> A punctuation string is a string of punctuation characters
105     recognized by a <code>Lex</code> object to be a token.  (Punctuation
106     characters are non-alphanumeric ASCII characters whose ASCII codes are
107     between 33 ('!')  and 126 ('~') inclusive.)  As with keywords, the set
108     of punctuation strings recognized by a <code>Lex</code> object is
109     extensible.  A punctuation string is added to a <code>Lex</code>
110     object by calling the <code>addPunctuation</code> method.  A boolean
111     given to the <code>Lex</code> constructor indicates whether a
112     newly-constructed <code>Lex</code> object should automatically have
113     all Java punctuation strings added to it.
114    
115    
116     <h3> Extensibility: comments and pragmas </h3>
117    
118     <p> The handling of comments is special in two ways: the punctuation
119     strings that start comments is extensible, the text of comments can be
120     parsed for pragmas.
121    
122     <h5> Comment recognition </h5>
123    
124     <p> Ordinarily, keywords and punctuation strings are mapped to token
125     codes that are returned by calls to <code>getNextToken</code>.
126     However, two token codes are treated specially:
127     <code>TagConstants.C_COMMENT</code> and <code>TagConstants.EOL_COMMENT</code>.
128     These codes are used to indicate the start of C-like comments
129     (<code>/*...*</code><code>/</code>) and end-of-line comments
130     (<code>//...</code>), respectively.  When a keyword or punctuation
131     string is mapped to these codes, it is handled like a comment
132     initiator rather than a regular token.
133    
134     <p> For all newly-created <code>Lex</code> objects, <code>/*</code> is
135     mapped to <code>TagConstants.C_COMMENT</code> and <code>//</code> is mapped
136     to <code>TagConstants.EOL_COMMENT</code>.  Other punctuation strings can be
137     made comment initiators by mapping them to comment-initiating codes.
138     This is more useful for <code>TagConstants.EOL_COMMENT</code> than for
139     <code>TagConstants.C_COMMENT</code>, since the string
140     <code>*</code><code>/</code> is hard-wired as the terminator of C-like
141     comments.
142    
143     <h5> Pragma parsing </h5>
144    
145     <code>Lex</code> objects are designed to support annotation of Java
146     programs through pragmas.  A <a href = "../pragma-handling.html">
147     separate document </a> describes our overall aproach to pragmas.  In
148     brief, our front-end supports two kinds of pragmas: control pragmas
149     that can appear anywhere in an input file and are collected in a list
150     apart from the parse tree, and syntax pragmas that can only appear in
151     certain grammatical contexts and become part of the parse tree.
152     Pragmas always appear in Java comments, at most one pragma per
153     comment.  These comments must have one of the following forms:
154    
155     <menu>
156     <li> <code>/*</code><i>tag</i> <i>white-space</i> <i>pragma-text</i><code>*</code><code>/</code>
157     <li> <code>//</code><i>tag</i> <i>white-space-minus-EOL</i> <i>pragma-text</i> <i>EOL</code>
158     </menu>
159    
160     <p> When a <code>Lex</code> object is created, it can optionally be
161     associated with a <code>PragmaParser</code> object.  If a
162     <code>Lex</code> object has no <code>PragmaParser</code>, it discards
163     all comments.  Otherwise, the <code>Lex</code> object passes the first
164     character of the comment (or -1 if the comment is empty) to the
165     <code>checkTag</code> method of the <code>PragmaParser</code> object,
166     which returns <code>false</code> if the comment definitely does not
167     contain any pragmas.  If the comment may contain pragmas, the
168     <code>Lex</code> object bundles the text between the delimiters of a
169     comment into a <code>CorrelatedReader</code> which it passes to the
170     <code>restart</code> method of its <code>PragmaParser</code>.  (This
171     text excludes both the opening <code>/*</code> or <code>//</code> and
172     the closing <code>*</code><code>/</code> or line terminator.)
173    
174     <p> The <code>Lex</code> object then calls <code>getNextPragma</code>
175     to read the pragmas out of the comment one at a time.  The
176     <code>Lex</code> object does this in a lazy manner; that is, it reads
177     a pragma, returns it to the parser, and waits until the parser calls
178     for the next token before it attempts to read another pragma.  The
179     <code>getNextPragma</code> method returns a boolean, returning
180     <code>false</code> if there are no more pragmas to be parsed.  The
181     <code>getNextPragma</code> method takes a <code>Token</code> as an
182     argument, storing information about the pragma parsed into this
183     argument.
184    
185     <p> When <code>PragmaParser.getNextPragma</code> returns a
186     <code>LexicalPragma</code>, the <code>Lex</code> object puts it in an
187     internal list rather than returning it to the parser.  The list of
188     collected lexical pragmas can be retrieved by calling
189     <code>getLexicalPragmas</code>.
190    
191     @see javafe.util.CorrelatedReader
192     @see javafe.parser.TagConstants
193     @see javafe.parser.Token
194     @see javafe.parser.PragmaParser
195    
196     */
197    
198    public class Lex extends Token
199    {
200        //// Underlying input stream
201    
202        /**
203         * Current state of input stream underlying the scanner minus the
204         * first character.  See <code>m_nextchr</code>. <p>
205         *
206         * This is null iff we are closed.
207         */
208    
209        /*@ spec_public @*/ protected CorrelatedReader m_in = null;
210    
211        /** Each call to <code>getNextToken</code> reads ahead one character
212         and leaves the result in <code>m_nextchr</code>.  In other words,
213         between calls to <code>getNextToken</code>, the stream of
214         characters yet-to-scanned consists of the character in
215         <code>m_nextchr</code> followed by the characters remaining in
216         <code>m_in</code>. */
217    
218        protected int m_nextchr;
219    
220    
221        //// Buffer used to hold the actual char's making up a token.
222    
223        // The exact characters making up a token are copied into this array
224        // for all tokens.  Although in some cases this isn't needed (e.g.,
225        // for string literals), we do it anyway in case someday we want to
226        // export that ability of getting a token's exact text rather than
227        // its interpretation, esp. in the case of literals.
228    
229        /** The characters that constitute the current token.  Only the
230         first <TT>textlen</TT> characters are part of the current token;
231         the actual length of <TT>text</TT> may be bigger.  The lexer may
232         occasionally need to resize <TT>text</TT>, so the same array might
233         not be used throughout the lifetime of the lexer.  */
234    
235        //@ invariant text != null;
236        //@ invariant text.length > 4;
237        protected char[] text = new char[64]; // Invariant: text.length > 4
238        // 64 should be large enough that overflows won't happen for most inputs
239    
240        /** The number of characters in the current token.  The "current
241         token" is the one parsed by the previous call to
242         <TT>getNextToken</TT> (there is no "current token" between
243         creation of a lexer and the first call to getNextToken).  */
244    
245        //@ invariant 0 <= textlen &&  textlen <= text.length;
246        protected int textlen = 0;
247    
248        /** Append 'c' to <CODE>text</CODE>, expanding if necessary. */
249        //@ modifies text, textlen;
250        //@ ensures textlen == \old(textlen)+1;
251        protected void append(int c) {
252            try {
253                text[textlen] = (char)c;            //@ nowarn IndexTooBig;  // caught
254            } catch (ArrayIndexOutOfBoundsException e) {
255                //@ assume textlen>=text.length;
256                char[] newtext = new char[textlen + 128];
257                System.arraycopy(text, 0, newtext, 0, textlen);
258                text = newtext;
259                text[textlen] = (char)c;
260            }
261            textlen++;
262        }
263    
264    
265        //// Instance variables for token queue and pragma parsing
266    
267        protected final TokenQueue lookaheadq = new TokenQueue();
268    
269        public /*@ non_null @*/ LexicalPragmaVec lexicalPragmas;
270    
271        //@ invariant inPragma ==> pragmaParser != null;
272        protected PragmaParser pragmaParser;
273        protected boolean inPragma;
274    
275    
276        //// Constructors
277    
278        /**
279         * Creates a lexical analyzer that will tokenize the characters
280         * read from an underlying <code>CorrelatedReader</code>.  Before
281         * the newly-created scanner can be used, its <code>restart</code>
282         * method must be called on a <code>CorrelatedReader</code>.  The
283         * <code>pragmaParser</code> object is used to parse pragmas out
284         * of comments; if it is <code>null</code>, all comments are
285         * discarded.  The <code>isJava</code> flag controls the initial
286         * set of keywords and punctuation strings; if <code>true</code>,
287         * the new scanner will recognize Java's keywords and punctuation
288         * strings, if <code>false</code>, the new scanner will recognize
289         * <em>no</em> keywords or punctuation strings.  If
290         * <code>isJava</code> is true, the token codes used for the
291         * Java's keywords and punctuation strings are those defined by
292         * the <code>TagConstants</code> class. 
293         */
294    
295        //@ ensures m_in==null;
296        public Lex(PragmaParser pragmaParser, boolean isJava) {
297            this.pragmaParser = pragmaParser;
298            lexicalPragmas = LexicalPragmaVec.make();
299            
300            inPragma = false;
301            if (isJava) {
302                addJavaKeywords();
303                addJavaPunctuation();
304            } else {
305                addPunctuation("/*", TagConstants.C_COMMENT);
306                addPunctuation("//", TagConstants.EOL_COMMENT);
307            }
308        }
309    
310    
311        /** Start scaning a new <code>CorrelatedReader</code>.  First closes
312         the old <code>CorrelatedReader</code> associated with
313         <code>this</code> (if there was one), and clears out the set of
314         collected lexical pragms.  In addition to (re)-seting the
315         underlying input stream, this method scans the first token,
316         returning the token kind of the result and setting the
317         <code>Token</code> fields of <code>this</code>.  If a
318         <code>CorrelatedReader</code> is already underlying
319         <code>this</code>, it is closed before the new reader is
320         installed.  Note: The argument <code>in</code> is "captured" in
321         the internal, private state of the resulting scanner and should
322         not be used by other parts of the program. */
323        //@ public normal_behavior
324        //@ modifies m_in, ttype, auxVal, identifierVal, lexicalPragmas;
325        //@ ensures m_in != null;
326        //@ also
327        //@ protected normal_behavior
328        //@ modifies m_nextchr;
329        public int restart(/*@ non_null @*/ CorrelatedReader in) {
330            close();
331            try {
332                m_in = in;
333                m_nextchr = m_in.read();
334            } catch (IOException e) {
335                ErrorSet.fatal(m_in.getLocation(), e.toString());
336                return TagConstants.NULL; // Dummy
337            }
338            return getNextToken();
339        }
340    
341    
342        /** Closes the <code>CorrelatedReader</code> underlying
343         <code>this</code>, clears the set of collected lexical pragmas,
344         and in other ways frees up resources associated with
345         <code>this</code>.  After being closed, a <code>Lex</code> object
346         can be restarted by calling <code>restart</code>.  (An IO
347         exception raised by closing the underlying input stream will be
348         converted into a <code>javafe.util.FatalError</code> runtime
349         exception.) */
350        //@ public normal_behavior
351        //@ modifies m_in, lexicalPragmas;
352        //@ ensures m_in==null;
353        //@ also 
354        //@ protected normal_behavior
355        //@ modifies inPragma;
356        public void close() {
357            if (m_in != null) {
358                m_in.close();
359            }
360            m_in = null;
361            lookaheadq.clear();
362            if (pragmaParser != null) {
363                pragmaParser.close();
364                lexicalPragmas = LexicalPragmaVec.make();
365            }
366            inPragma = false;
367        }
368    
369    
370    
371        //// Public members for scanning
372    
373        public void replaceLookaheadToken(int k, Token t) {
374            lookaheadq.setElementAt(k,t);
375        }
376    
377        /** Scans next token from input stream.  Returns the code of the
378         next token in the token stream and fills in the <code>Token</code>
379         fields of <CODE>this</CODE>.  Note that the
380         <code>startingLoc</code> and <code>endingLoc</code> fields of
381         <code>this</code> are not accurate for the end-of-file token. */
382    
383        //@ public normal_behavior
384        //@ requires m_in != null;
385        //@ modifies ttype, auxVal, identifierVal;
386        //@ also 
387        //@ protected normal_behavior
388        //@ modifies inPragma, m_nextchr;
389        public int getNextToken() {
390            if (lookaheadq.notempty) {
391                lookaheadq.dequeue(this);
392                return ttype;
393            }
394    
395            for(;;) {
396                if (inPragma)
397                    if (! pragmaParser.getNextPragma(this)) {
398                        inPragma = false;
399                    } else if (ttype != TagConstants.LEXICALPRAGMA) {
400                        return ttype;
401                    } else {
402                        lexicalPragmas.addElement((LexicalPragma)this.auxVal);
403                        continue;
404                    }
405    
406                int t = scanToken();
407                if (t != TagConstants.C_COMMENT &&
408                    t != TagConstants.EOL_COMMENT) {
409                    return t;
410                }
411                else { 
412                    scanComment(t);
413                }
414            }
415        }
416    
417        /** Returns token type of the <TT>k</TT>th future token, where k=0
418         is the current token.  If <code>k</code> is past the end of the
419         token stream, <code>TagConstants.EOF</code> is returned. */
420    
421        //@ requires k>=0;
422        //@ requires m_in != null;
423        public int lookahead(int k) {
424            if (k == 0) return ttype;
425            int lookahead_count = lookaheadq.size();
426    
427            if (lookahead_count < k) { // Need to add more tokens in lookahead buffer
428                this.copyInto(savedState);
429                try {
430                    for(int deficit = k - lookahead_count; 0 < deficit; deficit--) {
431                        for(;;) {
432                            if (inPragma)
433                                if (! pragmaParser.getNextPragma(this))
434                                    inPragma = false;
435                                else if (ttype != TagConstants.LEXICALPRAGMA)
436                                    break;
437                                else {
438                                    lexicalPragmas.addElement((LexicalPragma)this.auxVal);
439                                    continue;
440                                }
441    
442                            int t = scanToken();
443                            if (t != TagConstants.C_COMMENT
444                                && t != TagConstants.EOL_COMMENT)
445                                break;
446                            else scanComment(t);
447                        }
448                        lookaheadq.enqueue(this);
449                    }
450                } finally { 
451                    savedState.copyInto(this);
452                    savedState.clear();
453                }
454            }
455            return lookaheadq.elementAt(k-1).ttype;
456        }
457        //@ invariant savedState != null;
458        protected Token savedState = new Token();
459    
460        public Token lookaheadToken(int k) {
461            if (k==0) return this;
462            lookahead(k);
463            return lookaheadq.elementAt(k-1);
464        }
465    
466        /** Returns the set of lexical pragmas collected.  It also clears
467         the set of lexical pragmas so that the next call will not include
468         them.  (If this lexer has no <code>PragmaParser</code>, then an
469         empty vector is returned.) */
470    
471        //@ ensures \result != null;
472        public LexicalPragmaVec getLexicalPragmas() {
473            return lexicalPragmas.copy();
474        }
475    
476        /**
477         * Remove the first LexicalPragma from our set of lexical pragmas
478         * collected, returning it or null if our set is empty.
479         */
480        public LexicalPragma popLexicalPragma() {
481            if (lexicalPragmas.size()>0)
482                return lexicalPragmas.pop();
483    
484            return null;
485        }
486    
487    
488        //// Internal scanning routines
489    
490        /** Returns the code of the next token in the token stream, updating
491         the <code>Token</code> fields of <code>this</code> along the way.
492         Advances underlying stream to the character just past the last
493         character of the token returned, and changes the internal buffer
494         used by <code>getTokenText</code> to contain the text of this
495         token.
496    
497         <p> In most cases, this method leaves <code>m_nextchr</code>
498         holding the character just after the token scanned and
499         <code>m_in</code> pointing to the character after that.  However,
500         if <code>TagConstants.C_COMMENT</code> or
501         <code>TagConstants.EOL_COMMENT</code> is returned, it leaves
502         <code>m_in</code> pointing to the character just after the token
503         scanned and <code>m_nextchr</code> undefined.  This aids in pragma
504         processing. */
505    
506        //@ requires m_in != null;
507        private int scanToken() {
508            // The following "assert" is obvious from the program text, because
509            // the two arrays are non-null and their types do not coincide in any
510            // other way.  ESC/Java can figure this out, too, so therefore the
511            // following "assert" produces no warning.  This property is also
512            // cruicial to figuring out that the updates to "text[...]" in this
513            // body do not destroy the invariants declared in TagConstants about
514            // "punctuationCodes[...]".  However, oddly enough, without being
515            // prompted to first prove this property, ESC/Java isn't able to
516            // prove the necessary thing about the updates to "text[...]".
517            //@ assert (Object)text != (Object)TagConstants.punctuationCodes;
518    
519            try {
520                int nextchr = m_nextchr;
521                textlen = 0;
522    
523                while (Character.isWhitespace((char)nextchr)) {
524                    m_in.mark();
525                    nextchr = m_in.read();
526                }
527                startingLoc = m_in.getLocation();
528    
529                // Inline the identifier-handling code because it's the common case
530                if (Character.isJavaIdentifierStart((char)nextchr)) { 
531                    int h = 0;
532                    do {
533                        try {
534                            text[textlen] = (char)nextchr;
535                            textlen++;
536                        } catch (ArrayIndexOutOfBoundsException e) { this.append(nextchr);}
537                        h = _SpecialParserInterface.HC*h + nextchr;
538                        nextchr = m_in.read();
539                    } while (Character.isJavaIdentifierPart((char)nextchr));
540                    m_nextchr = nextchr;
541                    auxVal = null;
542                    identifierVal = _SpecialParserInterface.intern(text, textlen, h);
543                    if (onlyjavakeywords) {
544                        ttype = _SpecialParserInterface.getTokenType(identifierVal);
545                    } else if (keywords != null) {
546                        Object val = keywords.get(identifierVal);
547                        if (val != null) {
548                            ttype = ((Integer)val).intValue();
549                            // From the Unenforceable invariant on keyword:
550                            /*@ assume ttype != TagConstants.BOOLEANLIT &&
551                             ttype != TagConstants.INTLIT &&
552                             ttype != TagConstants.LONGLIT &&
553                             ttype != TagConstants.FLOATLIT &&
554                             ttype != TagConstants.DOUBLELIT &&
555                             ttype != TagConstants.STRINGLIT &&
556                             ttype != TagConstants.CHARLIT &&
557                             ttype != TagConstants.LEXICALPRAGMA &&
558                             ttype != TagConstants.MODIFIERPRAGMA &&
559                             ttype != TagConstants.STMTPRAGMA &&
560                             ttype != TagConstants.TYPEDECLELEMPRAGMA &&
561                             ttype != TagConstants.TYPEMODIFIERPRAGMA;  */
562                        } else if (javakeywords)
563                            ttype = _SpecialParserInterface.getTokenType(identifierVal);
564                        else ttype = TagConstants.IDENT;
565                    } else ttype = TagConstants.IDENT;
566                    endingLoc = m_in.getLocation(); // Chalin: trying to satisfy Token inv.
567                    return ttype;
568                }
569                ttype = Token.CLEAR;      // keep Token invariants happy
570                identifierVal = null;
571    
572                if (Character.isDigit((char)nextchr))
573                    return scanNumber(nextchr);
574                if (nextchr == '\'' || nextchr == '\"')
575                    return scanCharOrString(nextchr);
576    
577                scanJavaExtensions(nextchr);
578                if (ttype != TagConstants.NULL) return ttype;
579    
580                scanPunctuation(nextchr);
581                if (ttype != TagConstants.NULL) return ttype;
582    
583                if (nextchr == -1) {
584                    m_nextchr = nextchr;
585                    return ttype = TagConstants.EOF;
586                }
587    
588                String s = PrettyPrint.toCanonicalString(TagConstants.CHARLIT, 
589                                                         new Integer(nextchr));
590                ErrorSet.fatal(m_in.getLocation(), "Unexpected character " + s);
591    
592                //@ unreachable;
593                return ttype;
594    
595            } catch (IOException e) {
596                ErrorSet.fatal(m_in.getLocation(), e.toString());
597                return TagConstants.NULL; // Dummy
598            }
599        }
600    
601    
602        /** Handle a comment.  m_in points to the character just after the
603         "//" or "/*".  The mark is set at the last character read.  */
604    
605        //@ requires m_in != null;
606        //@ requires !inPragma;
607        //@ modifies inPragma, m_nextchr;
608        private void scanComment(int commentKind) {
609            try {
610                m_in.mark();
611                int firstchr = m_in.read();
612                // System.out.println("scanComment: firstchr = '"+(char)firstchr+"'");
613    
614                /* Decide if this comment contains a pragma. */
615    
616                if (pragmaParser != null) {
617                    if (pragmaParser.checkTag(firstchr)) {
618                        Assert.notFalse(! inPragma);
619                        inPragma = true;
620                    }
621                }
622    
623                // Scan the comment
624                boolean nonEmptyComment = false; // For empty multi-line comment
625          
626                int locStartComment = m_in.getLocation();
627                int nextchr = firstchr;
628                if (commentKind == TagConstants.EOL_COMMENT) {
629                    nonEmptyComment = true;
630                    while (nextchr != -1 && nextchr != '\n')
631                        nextchr = m_in.read();
632                } else {
633                    for(;;) {
634                        if (nextchr == -1) {
635                            ErrorSet.fatal(
636                               startingLoc,
637                               "Unterminated or improperly nested comment or pragma");
638                        }
639                        int oldchr = nextchr;
640                        nextchr = m_in.read();
641                        if (oldchr == '*' && nextchr == '/') break;
642                        nonEmptyComment = true;
643                    }
644                }
645    
646                // If the comment contains a pragma, set up the pragma parser
647                if (inPragma) {
648                    if(nonEmptyComment) {
649                        boolean eolComment = commentKind == TagConstants.EOL_COMMENT;
650                        int discard = !eolComment ? 2 :
651                            nextchr == '\n' ? 1 : 0;
652                        CorrelatedReader nu = m_in.createReaderFromMark(discard);
653                        pragmaParser.restart(nu, eolComment);
654                    } else {
655                        // Is an empty multi-line comment
656                        inPragma = false;
657                    }
658                }
659                if (!inPragma) {
660                    m_in.clearMark();
661                }
662    
663                // Clean up and return
664                m_nextchr = m_in.read();
665            } catch (IOException e) {
666                m_in.clearMark();
667                ErrorSet.fatal(m_in.getLocation(), "IO error");
668            }
669        }   //@ nowarn Exception;   // ignore Index... from createReaderFromMark
670    
671    
672        // Notes.  The routines below all assume that the input stream to be
673        // scanned consist of their input parameter nextchr plus the characters
674        // in m_in; that is, the nextchr argument is playing the role that
675        // m_nextchr plays between calls to getNextToken.  These routines also
676        // assume that textlen = 0 and startingLoc has already been filled in
677        // with the location of the character in nextchr.
678        // Unless otherwise noted, the routines below fill in all fields except
679        // startingLoc, that is, they fill in m_nextchr, text & textlen,
680        // plus the Token fields ttype, endingLoc, and auxVal.
681        // Errors are handled internally by these routines.  Either the
682        // error is fatal, or the error is recoverable and recovery is done
683        // by the routines.
684    
685        /** Scan a character or string constant. */
686    
687        //@ requires m_in != null;
688        private int scanCharOrString(int nextchr) {
689            try {
690                int endquote = nextchr;
691                this.append(endquote);
692                boolean chr = (endquote == '\'');
693                nextchr = m_in.read();
694                stringLitLen = 0;
695                for(;;) {
696                    if (nextchr != '\\' && nextchr != '\n' && nextchr != -1) {
697                        // Normal case
698                        this.append(nextchr);
699                        if (nextchr == endquote) { nextchr = m_in.read(); break; }
700                        this.stringLitAppend(nextchr);
701                        nextchr = m_in.read();
702                        continue;
703                    }
704    
705                    if (nextchr == -1 || nextchr == '\n') {
706                        if (chr) ErrorSet.fatal(startingLoc,
707                                                "Unterminated character literal");
708                        else ErrorSet.fatal(startingLoc, "Unterminated string literal");
709                        break;
710                    }
711    
712                    // At this point, we know we're at the start of an escape sequence
713                    this.append(nextchr);
714                    nextchr = m_in.read();
715                    if ('0' <= nextchr && nextchr <= '7') { // Octal sequence
716                        int firstdigit = nextchr - '0';
717                        int result = firstdigit;
718                        this.append(nextchr);
719                        nextchr = m_in.read();
720                        if ('0' <= nextchr && nextchr <= '7') {
721                            this.append(nextchr); result = 8*result + (nextchr - '0');
722                            nextchr = m_in.read();
723                            if (0 <= firstdigit && firstdigit <= 3
724                                && '0' <= nextchr && nextchr <= '7') {
725                                this.append(nextchr); result = 8*result + (nextchr - '0');
726                                nextchr = m_in.read();
727                            }
728                        }
729                        if (255 < result) {
730                            ErrorSet.error(m_in.getLocation(),
731                                           "Octal escape sequence overflow");
732                            this.stringLitAppend('\0');
733                        } else this.stringLitAppend(result);
734                    } else {
735                        if (nextchr == 'b') this.stringLitAppend('\b');
736                        else if (nextchr == 't')  this.stringLitAppend('\t');
737                        else if (nextchr == 'n')  this.stringLitAppend('\n');
738                        else if (nextchr == 'f')  this.stringLitAppend('\f');
739                        else if (nextchr == 'r')  this.stringLitAppend('\r');
740                        else if (nextchr == '\"') this.stringLitAppend('\"');
741                        else if (nextchr == '\'') this.stringLitAppend('\'');
742                        else if (nextchr == '\\') this.stringLitAppend('\\');
743                        else {
744                            ErrorSet.error(startingLoc, "Bad escape sequence");
745                            continue;
746                        }
747                        this.append(nextchr);
748                        nextchr = m_in.read();
749                    }
750                }
751    
752                if (chr) {
753                    if (1 < stringLitLen)
754                        ErrorSet.error(startingLoc,
755                                       "Character literal with multiple characters.");
756                    else if (stringLitLen == 0)
757                        if (nextchr == '\'') {
758                            this.append(nextchr);
759                            nextchr = m_in.read();
760                            stringLit[0] = '\'';
761                            ErrorSet.error(startingLoc, "Unquoted \' in character literal.");
762                        } else {
763                            stringLit[0] = '\0';
764                            ErrorSet.error(startingLoc, "Empty character literal.");
765                        }
766                    ttype = TagConstants.CHARLIT;
767                    auxVal = new Integer(stringLit[0]);  // CF
768                } else {
769                    ttype = TagConstants.STRINGLIT;
770                    auxVal = String.valueOf(stringLit, 0, stringLitLen);
771                }
772                m_nextchr = nextchr;
773                endingLoc = m_in.getLocation();
774                return ttype;
775            } catch (IOException e) {
776                ErrorSet.fatal(m_in.getLocation(), e.toString());
777                return TagConstants.NULL; // Dummy
778            }
779        }
780    
781    
782        /** Scans a numeric literal.  Requires <code>nextchr</code> is a
783         decimal digit.  Reads a numeric literal into <code>text</code>.
784         Depending on the kind of literal found, will return one of
785         <code>TagConstants.INTLIT</code>,
786         <code>TagConstants.LONGLIT</code>,
787         <code>TagConstants.FLOATLIT</code> or
788         <code>TagConstants.DOUBLELIT</code>.  If an error is detected, a
789         message is sent to <code>ErrorSet</code>, <code>m_in</code> is
790         advanced to what appears to be the end of the erroneous token, and
791         a legal literal is left in <code>text</code>. */
792    
793        //@ requires m_in != null;
794        //@ modifies endingLoc, ttype, auxVal, m_nextchr, text, textlen;
795        //@ ensures \result==ttype;
796        /*@ ensures \result==TagConstants.INTLIT || \result==TagConstants.LONGLIT ||
797         \result==TagConstants.FLOATLIT || \result==TagConstants.DOUBLELIT ||
798         \result==TagConstants.MAX_INT_PLUS_ONE ||
799         \result==TagConstants.MAX_LONG_PLUS_ONE ; */
800        private int scanNumber(int nextchr) {
801            try {
802                // Get the first chunk of digits
803                int tentativeResult = 0;
804                while (Character.isDigit((char)nextchr)) {
805                    this.append(nextchr);
806                    tentativeResult = 10*tentativeResult + (nextchr - '0');
807                    nextchr = m_in.read();
808                }
809                //@ assume textlen>0; // because know nextchar was initially a digit...
810    
811                // Handle floating point literals in another subtroutine
812                if (nextchr == '.') {
813                    m_in.mark();
814                    int nextnextchr = m_in.read();
815                    if (nextnextchr == '.') {
816                        // Two . in a row - cannot be a double so
817                        // back up 
818                        m_in.reset();
819                        // fall through and let the text so far be handled as
820                        // a integer literal
821                    } else {
822                        this.append(nextchr);
823                        return finishFloatingPointLiteral(nextnextchr);
824                    }
825                } else if (nextchr == 'e' || nextchr == 'E'
826                           || nextchr == 'F' || nextchr == 'f'
827                           || nextchr == 'd' || nextchr == 'D')
828                    return finishFloatingPointLiteral(nextchr);
829    
830                long result = 0;
831                if (text[0] != '0' || (textlen == 1 && nextchr != 'x' && nextchr != 'X'))
832                    // Handle base-ten literal
833                    if (textlen <= 9) result = tentativeResult;
834                    else {
835                        // Parse as negative value to avoid problems w/ -Long.MIN_VALUE
836                        long mullimit = Long.MIN_VALUE / 10;
837                        for(int i = 0; i < textlen; i++) {
838                            int d = text[i] - '0';
839                            long r2 = 10*result;
840                            if (result < mullimit || r2 < Long.MIN_VALUE + d) {
841                                ErrorSet.error(startingLoc, "Integer literal overflow");
842                                result = 0;
843                                break;
844                            } else result = r2 - d;
845                        }
846                        if (nextchr == 'L' || nextchr == 'l') {
847                            if (result == Long.MIN_VALUE) {
848                                this.append(nextchr);
849                                m_nextchr = m_in.read();
850                                endingLoc = m_in.getLocation();
851                                auxVal = null;
852                                return ttype = TagConstants.MAX_LONG_PLUS_ONE;
853                            }
854                        } else if (result == Integer.MIN_VALUE) {
855                            m_nextchr = nextchr;
856                            endingLoc = m_in.getLocation();
857                            auxVal = null;
858                            return ttype = TagConstants.MAX_INT_PLUS_ONE;
859                        }
860                        result = -result;
861                    }
862    
863                else if (nextchr != 'x' && nextchr != 'X') {
864                    // Handle octal literal
865                    for(int i = 1; i < textlen; i++) {
866                        int d = text[i] - '0';
867                        if (0 <= d && d <= 7) {
868                            if ((result >>> 61) != 0) {
869                                ErrorSet.error(startingLoc, "Integer literal overflow");
870                                result = 0;
871                                break;
872                            }
873                            result = (result << 3) + d;
874                        } else {
875                            ErrorSet.error(startingLoc,
876                                           "Non-octal digit found in octal literal.");
877                            result = 0;
878                            break;
879                        }
880                    }
881    
882                } else {
883                    // Handle hex literal
884                    this.append(nextchr);
885                    boolean overflow = false;
886                    for(nextchr = m_in.read(); ; nextchr = m_in.read()) {
887                        int d = Character.digit((char)nextchr, 16);
888                        if (d == -1) break;
889                        this.append(nextchr);
890                        if (! overflow)
891                            if ((result >>> 60) != 0) overflow = true;
892                            else result = (result << 4) + d;
893                    }
894                    if (textlen <= 2)
895                        ErrorSet.error(startingLoc, "Too few digits in a hex literal.");
896                    else if (overflow) {
897                        ErrorSet.error(startingLoc, "Integer literal overflow");
898                        result = 0;
899                    }
900                }
901    
902                if (nextchr == 'L' || nextchr == 'l') {
903                    this.append(nextchr);
904                    m_nextchr = m_in.read();
905                    endingLoc = m_in.getLocation();
906                    auxVal = new Long(result);
907                    return ttype = TagConstants.LONGLIT;
908                }
909                if ((result & 0xffffffff00000000L) != 0) {
910                    ErrorSet.error(startingLoc, "Integer literal overflow");
911                    result = 0;
912                }
913                m_nextchr = nextchr;
914                endingLoc = m_in.getLocation();
915                auxVal = new Integer((int)result);
916                return ttype = TagConstants.INTLIT;
917            } catch (IOException e) {
918                ErrorSet.fatal(m_in.getLocation(), e.toString());
919                return TagConstants.NULL; // Dummy
920            }
921        }
922    
923        /** Finishes scanning a floating-point literal.
924    
925         <p> Requires: <code>text</code> contains a possibly empty sequence
926         of decimal digits followed by an optional <code>'.'</code>; also,
927         <code>text</code> cannot be empty.  Further, let <i>s</i> be the
928         sequence of characters consisting of the characters in
929         <code>text</code> followed by <code>nextchr</code> followed by the
930         characters in <code>m_in</code>.  This routine requires that a
931         prefix of <i>s</i> match the syntax of floating-point literals as
932         defined by the Java language specification.
933    
934         <p> Ensures: Scans the floating-point literal in <i>s</i>.
935         Depending on the type of the literal, returns
936         <code>TagConstants.FLOATLIT</code> or
937         <code>TagConstants.DOUBLELIT</code> and sets sets
938         <code>auxVal</code> to either a <code>Float</code> or
939         <code>Double</code>.  If an error is encountered, a message is
940         sent to <code>ErorrSet</code> and recovery is performed. */
941    
942        //@ requires m_in != null;
943        //@ requires textlen>0;
944        //@ modifies ttype, auxVal, m_nextchr, endingLoc, text, textlen;
945        //@ ensures \result==ttype;
946        //@ ensures \result==TagConstants.FLOATLIT || \result==TagConstants.DOUBLELIT;
947        private int finishFloatingPointLiteral(int nextchr) {
948            try {
949                boolean error = false;
950                boolean zeroMantissa = true; // Used for underflow detection
951    
952                // First, see if there are any non-zero digits in the non-fractional
953                // part of the mantissa
954                for(int i = 0, len = textlen - (text[textlen-1] == '.' ? 1 : 0);
955                    i < len; i++)
956                    if (text[i] != '0') { zeroMantissa = false; break; }
957    
958                // If there's a fractional part, scan it in
959                while (Character.isDigit((char)nextchr)) {
960                    if (nextchr != '0') zeroMantissa = false;
961                    this.append(nextchr);
962                    nextchr = m_in.read();          // throws IOException
963                }
964    
965                // If there's an exponent, scan it in
966                if (nextchr == 'e' || nextchr == 'E') {
967                    this.append(nextchr);
968                    nextchr = m_in.read();
969                    if (nextchr == '+' || nextchr == '-') {
970                        this.append(nextchr);
971                        nextchr = m_in.read();
972                    }
973                    while (Character.isDigit((char)nextchr)) {
974                        this.append(nextchr);
975                        nextchr = m_in.read();
976                    }
977                    if (! Character.isDigit(text[textlen-1])) {
978                        ErrorSet.error(startingLoc, ("Digits required in exponent part"
979                                                     + " of floating-point literal."));
980                        error = true;
981                    }
982                }
983    
984                String s = (error ? "1.0" : String.valueOf(this.text, 0, textlen));
985    
986                // Assert: s has one of the following forms:
987                //   [0-9]+ (. [0-9]*)? (e [+-] [0-9]+)?
988                //   . [0-9]+ (e [+-] [0-9]+)?
989                // Also, zeroMantissa is true iff all digits in the mantissa are zero
990                // (Todo: examine code more closely to see if this is really true.)
991    
992                boolean f = (nextchr == 'F' || nextchr == 'f');
993                if (f || nextchr == 'D' || nextchr == 'd') {
994                    this.append(nextchr);
995                    m_nextchr = m_in.read();
996                } else m_nextchr = nextchr;
997                endingLoc = m_in.getLocation();
998    
999                if (f) {
1000                    Float result = Float.valueOf(s);  // !throw NumberFormatException
1001                    auxVal = result;
1002                    if (result.isInfinite())
1003                        ErrorSet.error(startingLoc, "Floating-point literal overflow");
1004                    else if (! zeroMantissa && result.floatValue() == 0.0)
1005                        ErrorSet.error(startingLoc, "Floating-point literal underflow");
1006                } else {
1007                    Double result = Double.valueOf(s);  // !throw NumberFormatException
1008                    auxVal = result;
1009                    if (result.isInfinite())
1010                        ErrorSet.error(startingLoc, "Floating-point literal overflow");
1011                    else if (! zeroMantissa && result.doubleValue() == 0.0D)
1012                        ErrorSet.error(startingLoc, "Floating-point literal underflow");
1013                }   
1014                return ttype = (f ? TagConstants.FLOATLIT : TagConstants.DOUBLELIT);
1015            } catch (IOException e) {
1016                ErrorSet.fatal(m_in.getLocation(), e.toString());
1017                return TagConstants.NULL; // Dummy
1018            }
1019        }   //@ nowarn Exception;   // NumberFormatException won't be thrown
1020    
1021        /** Scans a punctuation string <em>or</em> a floating-point
1022         number.  If input doesn't match either a floating-point number or
1023         any punctuation, returns <code>TagConstants.NULL</code>.  Assumes
1024         <code>startingLoc</code> already filled in.
1025    
1026         <p>The routine may change the mark arbitrarily.
1027    
1028         <p> This method leaves <code>m_in</code> in a different state than
1029         the previous ones do.  Ordinarily, <code>scanXXX</code> routines
1030         return with <code>m_nextchr</code> holding the character just
1031         after the token scanned and <code>m_in</code> pointing to the
1032         character after that.  <code>scanPunctuation</code> does too, but
1033         only when the value returned is <em>not</em>
1034         <code>TagConstants.C_COMMENT</code> or
1035         <code>TagConstants.EOL_COMMENT</code>; in those two cases, it
1036         returns with <code>m_in</code> pointing to the character just
1037         after the token scanned and <code>m_nextchr</code> undefined.
1038         This aids in pragma processing.  Also, if <code>TagConstants.NULL</code>
1039         is returned, then <code>m_nextchr</code> is undefined and
1040         <code>m_in</code> is where it was on entry.
1041         */
1042    
1043        //@ requires m_in != null;
1044        //@ modifies ttype, auxVal, m_nextchr, textlen, endingLoc, text;
1045        //@ modifies m_in.marked;
1046        //@ ensures \result==ttype;
1047        private int scanPunctuation(int nextchr) {
1048            try {
1049                boolean possibleFloatingPointNumber = (nextchr == '.');
1050                text[0] = (char)nextchr;
1051                textlen = 1;
1052                PunctuationPrefixTree prefix = punctuationTable;
1053                PunctuationPrefixTree lastPunctuation = prefix;
1054                int lastPunctuationLength = 0;
1055                m_in.mark();
1056    
1057                // The following loop, which has been unrolled once so the
1058                // first iteration can be handled specially, has the invariant:
1059                //   prefix == null || depth(prefix) == textlen-1
1060                //   nextchr == text[textlen-1]
1061                //   (nextchr_0 + m_in_0) == (text[0,textlen) + m_in)
1062                //   lastPunctuation != null && lastPunctuation is last valid punct seen
1063                //   lastPunctuationLength == depth(lastPunctuation)
1064                //   (nextchr_0+m_in_0)==(text[0,lastPunctuationLength]+m_in.resetedtext)
1065                // Stating part of this invariant graphically:
1066                //               l      t       l = lastPunctuationLength; t = textlen
1067                //    text:   abcdefghij
1068                // nextchr_0+
1069                //    m_in_0: abcdefghijklmnop
1070                //               r     nm       r = resetted text, n = nextchr, m = m_in
1071    
1072                int index = nextchr - '!';
1073                if (index < 0 || PunctuationPrefixTree.CHILDLEN <= index)
1074                    prefix = null;
1075                else
1076                    prefix = prefix.children[nextchr - '!'];
1077                if (prefix != null && prefix.code != TagConstants.NULL) {
1078                    lastPunctuation = prefix;
1079                    lastPunctuationLength = textlen;
1080                    m_in.mark();
1081                }
1082                nextchr = m_in.read();
1083                if (possibleFloatingPointNumber && Character.isDigit((char)nextchr)) {
1084                    // Deal with special case of floating-point numbers
1085                    m_in.clearMark();
1086                    return finishFloatingPointLiteral(nextchr);
1087                }
1088                this.append(nextchr);
1089                while (prefix != null) {
1090                    index = nextchr - '!';
1091                    if (index < 0 || PunctuationPrefixTree.CHILDLEN <= index)
1092                        prefix = null;
1093                    else
1094                        prefix = prefix.children[nextchr - '!'];
1095                    if (prefix != null && prefix.code != TagConstants.NULL) {
1096                        lastPunctuation = prefix;
1097                        lastPunctuationLength = textlen;
1098                        m_in.mark();
1099                    }
1100                    nextchr = m_in.read();
1101                    this.append(nextchr);
1102                }
1103                // End of unrolled loop
1104    
1105                m_in.reset();
1106                textlen = lastPunctuationLength;
1107                endingLoc = m_in.getLocation();
1108                ttype = lastPunctuation.code;
1109                if (ttype != TagConstants.C_COMMENT &&
1110                    ttype != TagConstants.EOL_COMMENT &&
1111                    ttype != TagConstants.NULL) {
1112                    m_nextchr = m_in.read();
1113                }
1114                return ttype;
1115            } catch (IOException e) {
1116                ErrorSet.fatal(m_in.getLocation(), e.toString());
1117                return TagConstants.NULL; // Dummy
1118            }
1119        }
1120    
1121        /** Scans a Java extension.  If input doesn't match any Java extension,
1122         returns <code>TagConstants.NULL</code>.  Assumes <code>startingLoc</code>
1123         already filled in, and assumes <code>textlen</code> is 0.
1124    
1125         <p>The routine may change the mark arbitrarily.
1126    
1127         <p> If a Java extension is matched, returns with <code>m_nextchr</code>
1128         holding the character just after the token scanned and <code>m_in</code>
1129         pointing to the character after that.
1130         */
1131        //@ requires m_in != null;
1132        //@ requires textlen == 0;
1133        //@ modifies ttype, auxVal, m_nextchr;
1134        //@ modifies m_in.marked;
1135        //@ ensures \result==ttype;
1136        protected int scanJavaExtensions(int nextchr) {
1137            ttype = TagConstants.NULL;
1138            return ttype;
1139        }
1140    
1141        //// Internal char buffer used by string scanner.
1142    
1143        // Having this separate from text allows us to accumulate both the
1144        // original input in text and the escape-converted input in stringLit
1145    
1146        //@ private invariant stringLit != null;
1147        //@ private invariant stringLit.length>=64;
1148        private char[] stringLit = new char[64];
1149    
1150        //@ private invariant stringLitLen>=0;
1151        private int stringLitLen = 0;
1152    
1153        private void stringLitAppend(int c) {
1154            try {
1155                stringLit[stringLitLen] = (char)c;    //@ nowarn IndexTooBig; // caught
1156            } catch (ArrayIndexOutOfBoundsException e) {
1157                char[] newstringLit = new char[stringLitLen + 128];
1158                System.arraycopy(stringLit, 0, newstringLit, 0, stringLitLen);
1159                stringLit = newstringLit;
1160                stringLit[stringLitLen] = (char)c;
1161            }
1162            stringLitLen++;
1163        }
1164    
1165    
1166        //// The keyword and punctuation tables, plus methods for changing them
1167    
1168        //@ private invariant punctuationTable != null;
1169        private PunctuationPrefixTree punctuationTable = new PunctuationPrefixTree();
1170    
1171        /**
1172         * Unenforceable invariant: all tokenTypes in this table do not
1173         * require a non-null auxVal.  (cf. Token.auxVal).
1174         */
1175        //  Old specs from original full JML spec files.  Must be
1176        //  rewritten for current java.util.Hashtable specs.
1177        /*  invariant (keywords != null) ==> (keywords.keyType == \type(Identifier) &&
1178                      keywords.elementType == \type(Integer)); */
1179        protected Hashtable keywords = null;
1180    
1181        protected boolean javakeywords = false, onlyjavakeywords = false;
1182        //@ invariant onlyjavakeywords == (javakeywords && keywords == null);
1183    
1184        static {
1185            // When class initializes, change the <CODE>tokenType</CODE> field
1186            // of <CODE>Identifier</CODE> instances associated with keywords.
1187            for(int code = TagConstants.FIRST_KEYWORD;
1188                code <= TagConstants.LAST_KEYWORD;
1189                code++) {
1190                Identifier idn = Identifier.intern(TagConstants.toString(code));
1191                _SpecialParserInterface.setTokenType(idn, code);
1192            }
1193        } 
1194    
1195        /** Add all of Java's keywords to the scanner.  The token codes used
1196         for these keywords are those defined by the <code>TagConstants</code>
1197         class.  Requires that none of these keywords have been added
1198         already. */
1199    
1200        public void addJavaKeywords() {
1201            javakeywords = true;
1202            if (keywords != null)
1203                // New values override existing ones...
1204                for(int i = TagConstants.FIRST_KEYWORD;
1205                    i <= TagConstants.LAST_KEYWORD;
1206                    i++)
1207                    keywords.remove(Identifier.intern(TagConstants.toString(i)));
1208            else onlyjavakeywords = true;
1209        }
1210    
1211    
1212        /**
1213         * Add a keyword to a <code>Lex</code> object with the given code.
1214         * Requires that <code>newkeyword</code> is a Java identifier and
1215         * that <code>code</code> is not <code>TagConstants.NULL</code> or
1216         * a tokenType that requires auxVal to be non-null;
1217         * (cf. Token.auxVal).
1218         *
1219         * Also requires that the keyword hasn't already been added.
1220         */
1221        //@ requires code != TagConstants.NULL;
1222        //@ requires code != TagConstants.BOOLEANLIT;
1223        //@ requires code != TagConstants.INTLIT;
1224        //@ requires code != TagConstants.LONGLIT;
1225        //@ requires code != TagConstants.DOUBLELIT;
1226        //@ requires code != TagConstants.STRINGLIT;
1227        //@ requires code != TagConstants.CHARLIT;
1228        //@ requires code != TagConstants.LEXICALPRAGMA;
1229        //@ requires code != TagConstants.MODIFIERPRAGMA;
1230        //@ requires code != TagConstants.STMTPRAGMA;
1231        //@ requires code != TagConstants.TYPEDECLELEMPRAGMA;
1232        //@ requires code != TagConstants.TYPEMODIFIERPRAGMA;
1233        public void addKeyword(/*@ non_null @*/ String newkeyword, int code) {
1234            Assert.precondition(code != TagConstants.NULL);
1235            if (keywords == null) {
1236                keywords = new Hashtable();
1237                //  Old specs from original full JML spec files.  Must be
1238                //  rewritten for current java.util.Hashtable specs.
1239                //  set keywords.keyType = \type(Identifier);
1240                //  set keywords.elementType = \type(Integer);
1241            }
1242            keywords.put(Identifier.intern(newkeyword), new Integer(code));
1243            onlyjavakeywords = false;
1244        }
1245    
1246    
1247    
1248        /** Add all of Java's punctuation strings to the scanner.  The codes
1249         used for these punctuation strings are the found in the
1250         <code>TagConstants</code> class.  Requires that none of these
1251         punctuation strings have been added before. */
1252    
1253        public void addJavaPunctuation() {
1254            Assert.notFalse(TagConstants.punctuationStrings.length ==
1255                            TagConstants.punctuationCodes.length);
1256            for(int i = 0; i < TagConstants.punctuationStrings.length; i++)
1257                addPunctuation(TagConstants.punctuationStrings[i],
1258                               TagConstants.punctuationCodes[i]);
1259        }
1260    
1261        /** Add a punctuation string to a scanner associated with a given
1262         code.  Requires that the characters in the punctuation string are
1263         all punctuation characters, that is, non-alphanumeric ASCII
1264         characters whose codes are between 33 ('!')  and 126 ('~')
1265         inclusive.  Also requires that the code is not
1266         <code>TagConstants.NULL</code> and that the punctuation string
1267         hasn't already been added. */
1268        //@ requires punctuation != null;
1269        //@ requires code != TagConstants.NULL;
1270        //@ requires code != TagConstants.IDENT;
1271        //@ requires code != TagConstants.BOOLEANLIT;
1272        //@ requires code != TagConstants.INTLIT;
1273        //@ requires code != TagConstants.LONGLIT;
1274        //@ requires code != TagConstants.FLOATLIT;
1275        //@ requires code != TagConstants.DOUBLELIT;
1276        //@ requires code != TagConstants.STRINGLIT;
1277        //@ requires code != TagConstants.CHARLIT;
1278        //@ requires code != TagConstants.LEXICALPRAGMA;
1279        //@ requires code != TagConstants.MODIFIERPRAGMA;
1280        //@ requires code != TagConstants.STMTPRAGMA;
1281        //@ requires code != TagConstants.TYPEDECLELEMPRAGMA;
1282        //@ requires code != TagConstants.TYPEMODIFIERPRAGMA;
1283        public void addPunctuation(String punctuation, int code) {
1284            Assert.precondition(code != TagConstants.NULL);
1285            PunctuationPrefixTree prefix = punctuationTable;
1286            for(int j = 0; j < punctuation.length(); j++) {
1287                int c = punctuation.charAt(j);
1288                Assert.precondition(        //@ nowarn Pre;
1289                                    ('!' <= c && c <= '/') || (':' <= c && c <= '@')
1290                                    || ('[' <= c && c <= '`') || ('{' <= c && c <= '~'));
1291                int index = c - '!';
1292                PunctuationPrefixTree child = prefix.children[index];
1293                if (child == null)
1294                    child = prefix.children[index] = new PunctuationPrefixTree();
1295                prefix = child;
1296            }
1297            prefix.code = code;
1298        }
1299    
1300    
1301        //// Diagnostic routines
1302    
1303        /** Checks invariants (assumes that <CODE>Token</CODE> fields
1304         haven't been mucked with by outside code).  <code>prefix</code> is
1305         used to prefix error messages with context provided by the
1306         caller. */
1307    
1308        public void zzz(String prefix) {
1309            super.zzz();
1310            lookaheadq.zzz("Near character "
1311                           + Location.toOffset(startingLoc)
1312                           + ": " + prefix);
1313        }
1314    }
1315    
1316    class PunctuationPrefixTree {
1317        public static final int CHILDLEN = 1 + '~' - '!';
1318    
1319        /*@ invariant code != TagConstants.IDENT &&
1320         code != TagConstants.BOOLEANLIT &&
1321         code != TagConstants.INTLIT &&
1322         code != TagConstants.LONGLIT &&
1323         code != TagConstants.FLOATLIT &&
1324         code != TagConstants.DOUBLELIT &&
1325         code != TagConstants.STRINGLIT &&
1326         code != TagConstants.CHARLIT &&
1327         code != TagConstants.LEXICALPRAGMA &&
1328         code != TagConstants.MODIFIERPRAGMA &&
1329         code != TagConstants.STMTPRAGMA &&
1330         code != TagConstants.TYPEDECLELEMPRAGMA &&
1331         code != TagConstants.TYPEMODIFIERPRAGMA; */
1332        public int code = TagConstants.NULL; // ! NULL ==> a punctuation string
1333    
1334        //@ invariant children != null;
1335        //@ invariant children.length == CHILDLEN;
1336        //@ invariant \typeof(children) == \type(PunctuationPrefixTree[]);
1337        public PunctuationPrefixTree[] children
1338                = new PunctuationPrefixTree[CHILDLEN];
1339    }