001 /* $Id $*/
002 /* Copyright 2000, 2001, Compaq Computer Corporation */
003
004 package javafe.parser;
005
006 import java.io.IOException;
007 import java.util.Hashtable;
008
009 import javafe.ast.Identifier;
010 import javafe.ast.PrettyPrint;
011 import javafe.ast.LexicalPragma;
012 import javafe.ast.LexicalPragmaVec;
013 import javafe.ast._SpecialParserInterface;
014
015 import javafe.util.Assert;
016 import javafe.util.ErrorSet;
017 import javafe.util.CorrelatedReader;
018 import javafe.util.Location;
019
020 /**
021
022 A <TT>Lex</TT> object generates a sequence of Java "input elements"
023 (that is, tokens) by converting the sequence of input characters and
024 line terminators generated by an underlying
025 <code>CorrelatedReader</code>.
026
027 <p> The conversion of input characters occurs according to the lexical
028 rules in Chapter Three of the <cite>Java Language
029 Specification</cite>. This specification describes three lexical
030 translation steps: the first two steps translate a raw Unicode input
031 stream into a "cooked" one in which Unicode escape characters from the
032 raw stream have been processed and in which line terminators have been
033 identified; the last step translates this cooked stream into a
034 sequence of Java "input elements" (comments, white space, identifiers,
035 tokens, literals, and punctuation). <code>Lex</code> objects perform
036 the last of these translations steps; the first two are performed by
037 an underlying <code>CorrelatedReader</code> that is given to the
038 <code>Lex</code> object as the source of input characters.
039
040 <p> Before a newly-created <code>Lex</code> object can be used, its
041 <code>restart</code> method must be called giving a
042 <code>CorrelatedReader</code> to scan. At any point, a
043 <code>Lex</code> can be restarted on a different underlying reader.
044
045 <p> The <TT>Lex</TT> class is thread safe, but instances of
046 <TT>Lex</TT> are not. That is, two different threads may safely
047 access two different instances of <TT>Lex</TT> concurrently, but they
048 may not access the same instance of <TT>Lex</TT> concurrently.
049
050
051 <h3> Simple scanning </h3>
052
053 <p> The <code>getNextToken</code> method of <code>Lex</code> objects
054 returns the translated token sequence, one token at a time. It
055 discards white space, and it processes comments as described below.
056 In addition, <code>getNextToken</code> fills in the <code>Token</code>
057 fields of <code>this</code> (<code>Lex</code> is a subclass of
058 <code>Token</code>); for example, <code>ttype</code> gets an integer
059 code defining the type of token returned and <code>startingLoc</code>
060 gives the location of the first character making up the token. If the
061 token is an identifier, <code>identifierVal</code> indicates which
062 one; if the token is a literal, <code>auxVal</code> gives its value.
063
064 <p> <code>Lex</code> objects report errors by calling both the
065 <code>fatal</code> and <code>error</code> methods of
066 <code>ErrorSet</code>. The fatal errors are unexpected characters,
067 unterminated comments and string and character literals, and IO errors
068 in the underlying input stream. Recoverable errors are overflows in
069 literals (including overflows in octal escape sequences), non-octal
070 digits in integer literals, the string <code>0x</code> (interpreted as
071 a malformed integer literal), missing digits in a floating-point
072 exponent, bad escape sequences in character and string literals,
073 character literals containing no or multiple characters, and the
074 character literal <code>'''</code> (interpreted as <code>'\''</code>).
075
076
077 <h3> Lookahead </h3>
078
079 <code>Lex</code> objects allow their clients to peek ahead into the
080 token stream by calling <code>lookahead</code>. This method returns
081 the token code for the future token, but it does not affect the
082 <code>Token</code> fields of <code>this</code>.
083
084 <p> If a call to <code>lookahead</code> needs to look past the set of
085 tokens already scanned, and those tokens have errors, then the errors
086 are reported immediately.
087
088
089 <h3> Extensibility: Keywords, punctuation </h3>
090
091 <p> A keyword is a Java identifier with a special token code.
092 Ordinarily, identifiers are associated with the code
093 <code>TagConstants.IDENT</code>. Keywords, while matching the lexical
094 grammar of identifiers, are associated with different codes. In fact,
095 each keyword is typically associated with its own code.
096
097 <p> The set of identifiers that a <code>Lex</code> object recognizes
098 as keywords is extensible. A keyword is added to a <code>Lex</code>
099 object by calling the <code>addKeyword</code> method. As a
100 convenience, a boolean given to the <code>Lex</code> constructor
101 indicates whether a newly-constructed <code>Lex</code> object should
102 automatically have all Java keywords added to it.
103
104 <p> A punctuation string is a string of punctuation characters
105 recognized by a <code>Lex</code> object to be a token. (Punctuation
106 characters are non-alphanumeric ASCII characters whose ASCII codes are
107 between 33 ('!') and 126 ('~') inclusive.) As with keywords, the set
108 of punctuation strings recognized by a <code>Lex</code> object is
109 extensible. A punctuation string is added to a <code>Lex</code>
110 object by calling the <code>addPunctuation</code> method. A boolean
111 given to the <code>Lex</code> constructor indicates whether a
112 newly-constructed <code>Lex</code> object should automatically have
113 all Java punctuation strings added to it.
114
115
116 <h3> Extensibility: comments and pragmas </h3>
117
118 <p> The handling of comments is special in two ways: the punctuation
119 strings that start comments is extensible, the text of comments can be
120 parsed for pragmas.
121
122 <h5> Comment recognition </h5>
123
124 <p> Ordinarily, keywords and punctuation strings are mapped to token
125 codes that are returned by calls to <code>getNextToken</code>.
126 However, two token codes are treated specially:
127 <code>TagConstants.C_COMMENT</code> and <code>TagConstants.EOL_COMMENT</code>.
128 These codes are used to indicate the start of C-like comments
129 (<code>/*...*</code><code>/</code>) and end-of-line comments
130 (<code>//...</code>), respectively. When a keyword or punctuation
131 string is mapped to these codes, it is handled like a comment
132 initiator rather than a regular token.
133
134 <p> For all newly-created <code>Lex</code> objects, <code>/*</code> is
135 mapped to <code>TagConstants.C_COMMENT</code> and <code>//</code> is mapped
136 to <code>TagConstants.EOL_COMMENT</code>. Other punctuation strings can be
137 made comment initiators by mapping them to comment-initiating codes.
138 This is more useful for <code>TagConstants.EOL_COMMENT</code> than for
139 <code>TagConstants.C_COMMENT</code>, since the string
140 <code>*</code><code>/</code> is hard-wired as the terminator of C-like
141 comments.
142
143 <h5> Pragma parsing </h5>
144
145 <code>Lex</code> objects are designed to support annotation of Java
146 programs through pragmas. A <a href = "../pragma-handling.html">
147 separate document </a> describes our overall aproach to pragmas. In
148 brief, our front-end supports two kinds of pragmas: control pragmas
149 that can appear anywhere in an input file and are collected in a list
150 apart from the parse tree, and syntax pragmas that can only appear in
151 certain grammatical contexts and become part of the parse tree.
152 Pragmas always appear in Java comments, at most one pragma per
153 comment. These comments must have one of the following forms:
154
155 <menu>
156 <li> <code>/*</code><i>tag</i> <i>white-space</i> <i>pragma-text</i><code>*</code><code>/</code>
157 <li> <code>//</code><i>tag</i> <i>white-space-minus-EOL</i> <i>pragma-text</i> <i>EOL</code>
158 </menu>
159
160 <p> When a <code>Lex</code> object is created, it can optionally be
161 associated with a <code>PragmaParser</code> object. If a
162 <code>Lex</code> object has no <code>PragmaParser</code>, it discards
163 all comments. Otherwise, the <code>Lex</code> object passes the first
164 character of the comment (or -1 if the comment is empty) to the
165 <code>checkTag</code> method of the <code>PragmaParser</code> object,
166 which returns <code>false</code> if the comment definitely does not
167 contain any pragmas. If the comment may contain pragmas, the
168 <code>Lex</code> object bundles the text between the delimiters of a
169 comment into a <code>CorrelatedReader</code> which it passes to the
170 <code>restart</code> method of its <code>PragmaParser</code>. (This
171 text excludes both the opening <code>/*</code> or <code>//</code> and
172 the closing <code>*</code><code>/</code> or line terminator.)
173
174 <p> The <code>Lex</code> object then calls <code>getNextPragma</code>
175 to read the pragmas out of the comment one at a time. The
176 <code>Lex</code> object does this in a lazy manner; that is, it reads
177 a pragma, returns it to the parser, and waits until the parser calls
178 for the next token before it attempts to read another pragma. The
179 <code>getNextPragma</code> method returns a boolean, returning
180 <code>false</code> if there are no more pragmas to be parsed. The
181 <code>getNextPragma</code> method takes a <code>Token</code> as an
182 argument, storing information about the pragma parsed into this
183 argument.
184
185 <p> When <code>PragmaParser.getNextPragma</code> returns a
186 <code>LexicalPragma</code>, the <code>Lex</code> object puts it in an
187 internal list rather than returning it to the parser. The list of
188 collected lexical pragmas can be retrieved by calling
189 <code>getLexicalPragmas</code>.
190
191 @see javafe.util.CorrelatedReader
192 @see javafe.parser.TagConstants
193 @see javafe.parser.Token
194 @see javafe.parser.PragmaParser
195
196 */
197
198 public class Lex extends Token
199 {
200 //// Underlying input stream
201
202 /**
203 * Current state of input stream underlying the scanner minus the
204 * first character. See <code>m_nextchr</code>. <p>
205 *
206 * This is null iff we are closed.
207 */
208
209 /*@ spec_public @*/ protected CorrelatedReader m_in = null;
210
211 /** Each call to <code>getNextToken</code> reads ahead one character
212 and leaves the result in <code>m_nextchr</code>. In other words,
213 between calls to <code>getNextToken</code>, the stream of
214 characters yet-to-scanned consists of the character in
215 <code>m_nextchr</code> followed by the characters remaining in
216 <code>m_in</code>. */
217
218 protected int m_nextchr;
219
220
221 //// Buffer used to hold the actual char's making up a token.
222
223 // The exact characters making up a token are copied into this array
224 // for all tokens. Although in some cases this isn't needed (e.g.,
225 // for string literals), we do it anyway in case someday we want to
226 // export that ability of getting a token's exact text rather than
227 // its interpretation, esp. in the case of literals.
228
229 /** The characters that constitute the current token. Only the
230 first <TT>textlen</TT> characters are part of the current token;
231 the actual length of <TT>text</TT> may be bigger. The lexer may
232 occasionally need to resize <TT>text</TT>, so the same array might
233 not be used throughout the lifetime of the lexer. */
234
235 //@ invariant text != null;
236 //@ invariant text.length > 4;
237 protected char[] text = new char[64]; // Invariant: text.length > 4
238 // 64 should be large enough that overflows won't happen for most inputs
239
240 /** The number of characters in the current token. The "current
241 token" is the one parsed by the previous call to
242 <TT>getNextToken</TT> (there is no "current token" between
243 creation of a lexer and the first call to getNextToken). */
244
245 //@ invariant 0 <= textlen && textlen <= text.length;
246 protected int textlen = 0;
247
248 /** Append 'c' to <CODE>text</CODE>, expanding if necessary. */
249 //@ modifies text, textlen;
250 //@ ensures textlen == \old(textlen)+1;
251 protected void append(int c) {
252 try {
253 text[textlen] = (char)c; //@ nowarn IndexTooBig; // caught
254 } catch (ArrayIndexOutOfBoundsException e) {
255 //@ assume textlen>=text.length;
256 char[] newtext = new char[textlen + 128];
257 System.arraycopy(text, 0, newtext, 0, textlen);
258 text = newtext;
259 text[textlen] = (char)c;
260 }
261 textlen++;
262 }
263
264
265 //// Instance variables for token queue and pragma parsing
266
267 protected final TokenQueue lookaheadq = new TokenQueue();
268
269 public /*@ non_null @*/ LexicalPragmaVec lexicalPragmas;
270
271 //@ invariant inPragma ==> pragmaParser != null;
272 protected PragmaParser pragmaParser;
273 protected boolean inPragma;
274
275
276 //// Constructors
277
278 /**
279 * Creates a lexical analyzer that will tokenize the characters
280 * read from an underlying <code>CorrelatedReader</code>. Before
281 * the newly-created scanner can be used, its <code>restart</code>
282 * method must be called on a <code>CorrelatedReader</code>. The
283 * <code>pragmaParser</code> object is used to parse pragmas out
284 * of comments; if it is <code>null</code>, all comments are
285 * discarded. The <code>isJava</code> flag controls the initial
286 * set of keywords and punctuation strings; if <code>true</code>,
287 * the new scanner will recognize Java's keywords and punctuation
288 * strings, if <code>false</code>, the new scanner will recognize
289 * <em>no</em> keywords or punctuation strings. If
290 * <code>isJava</code> is true, the token codes used for the
291 * Java's keywords and punctuation strings are those defined by
292 * the <code>TagConstants</code> class.
293 */
294
295 //@ ensures m_in==null;
296 public Lex(PragmaParser pragmaParser, boolean isJava) {
297 this.pragmaParser = pragmaParser;
298 lexicalPragmas = LexicalPragmaVec.make();
299
300 inPragma = false;
301 if (isJava) {
302 addJavaKeywords();
303 addJavaPunctuation();
304 } else {
305 addPunctuation("/*", TagConstants.C_COMMENT);
306 addPunctuation("//", TagConstants.EOL_COMMENT);
307 }
308 }
309
310
311 /** Start scaning a new <code>CorrelatedReader</code>. First closes
312 the old <code>CorrelatedReader</code> associated with
313 <code>this</code> (if there was one), and clears out the set of
314 collected lexical pragms. In addition to (re)-seting the
315 underlying input stream, this method scans the first token,
316 returning the token kind of the result and setting the
317 <code>Token</code> fields of <code>this</code>. If a
318 <code>CorrelatedReader</code> is already underlying
319 <code>this</code>, it is closed before the new reader is
320 installed. Note: The argument <code>in</code> is "captured" in
321 the internal, private state of the resulting scanner and should
322 not be used by other parts of the program. */
323 //@ public normal_behavior
324 //@ modifies m_in, ttype, auxVal, identifierVal, lexicalPragmas;
325 //@ ensures m_in != null;
326 //@ also
327 //@ protected normal_behavior
328 //@ modifies m_nextchr;
329 public int restart(/*@ non_null @*/ CorrelatedReader in) {
330 close();
331 try {
332 m_in = in;
333 m_nextchr = m_in.read();
334 } catch (IOException e) {
335 ErrorSet.fatal(m_in.getLocation(), e.toString());
336 return TagConstants.NULL; // Dummy
337 }
338 return getNextToken();
339 }
340
341
342 /** Closes the <code>CorrelatedReader</code> underlying
343 <code>this</code>, clears the set of collected lexical pragmas,
344 and in other ways frees up resources associated with
345 <code>this</code>. After being closed, a <code>Lex</code> object
346 can be restarted by calling <code>restart</code>. (An IO
347 exception raised by closing the underlying input stream will be
348 converted into a <code>javafe.util.FatalError</code> runtime
349 exception.) */
350 //@ public normal_behavior
351 //@ modifies m_in, lexicalPragmas;
352 //@ ensures m_in==null;
353 //@ also
354 //@ protected normal_behavior
355 //@ modifies inPragma;
356 public void close() {
357 if (m_in != null) {
358 m_in.close();
359 }
360 m_in = null;
361 lookaheadq.clear();
362 if (pragmaParser != null) {
363 pragmaParser.close();
364 lexicalPragmas = LexicalPragmaVec.make();
365 }
366 inPragma = false;
367 }
368
369
370
371 //// Public members for scanning
372
373 public void replaceLookaheadToken(int k, Token t) {
374 lookaheadq.setElementAt(k,t);
375 }
376
377 /** Scans next token from input stream. Returns the code of the
378 next token in the token stream and fills in the <code>Token</code>
379 fields of <CODE>this</CODE>. Note that the
380 <code>startingLoc</code> and <code>endingLoc</code> fields of
381 <code>this</code> are not accurate for the end-of-file token. */
382
383 //@ public normal_behavior
384 //@ requires m_in != null;
385 //@ modifies ttype, auxVal, identifierVal;
386 //@ also
387 //@ protected normal_behavior
388 //@ modifies inPragma, m_nextchr;
389 public int getNextToken() {
390 if (lookaheadq.notempty) {
391 lookaheadq.dequeue(this);
392 return ttype;
393 }
394
395 for(;;) {
396 if (inPragma)
397 if (! pragmaParser.getNextPragma(this)) {
398 inPragma = false;
399 } else if (ttype != TagConstants.LEXICALPRAGMA) {
400 return ttype;
401 } else {
402 lexicalPragmas.addElement((LexicalPragma)this.auxVal);
403 continue;
404 }
405
406 int t = scanToken();
407 if (t != TagConstants.C_COMMENT &&
408 t != TagConstants.EOL_COMMENT) {
409 return t;
410 }
411 else {
412 scanComment(t);
413 }
414 }
415 }
416
417 /** Returns token type of the <TT>k</TT>th future token, where k=0
418 is the current token. If <code>k</code> is past the end of the
419 token stream, <code>TagConstants.EOF</code> is returned. */
420
421 //@ requires k>=0;
422 //@ requires m_in != null;
423 public int lookahead(int k) {
424 if (k == 0) return ttype;
425 int lookahead_count = lookaheadq.size();
426
427 if (lookahead_count < k) { // Need to add more tokens in lookahead buffer
428 this.copyInto(savedState);
429 try {
430 for(int deficit = k - lookahead_count; 0 < deficit; deficit--) {
431 for(;;) {
432 if (inPragma)
433 if (! pragmaParser.getNextPragma(this))
434 inPragma = false;
435 else if (ttype != TagConstants.LEXICALPRAGMA)
436 break;
437 else {
438 lexicalPragmas.addElement((LexicalPragma)this.auxVal);
439 continue;
440 }
441
442 int t = scanToken();
443 if (t != TagConstants.C_COMMENT
444 && t != TagConstants.EOL_COMMENT)
445 break;
446 else scanComment(t);
447 }
448 lookaheadq.enqueue(this);
449 }
450 } finally {
451 savedState.copyInto(this);
452 savedState.clear();
453 }
454 }
455 return lookaheadq.elementAt(k-1).ttype;
456 }
457 //@ invariant savedState != null;
458 protected Token savedState = new Token();
459
460 public Token lookaheadToken(int k) {
461 if (k==0) return this;
462 lookahead(k);
463 return lookaheadq.elementAt(k-1);
464 }
465
466 /** Returns the set of lexical pragmas collected. It also clears
467 the set of lexical pragmas so that the next call will not include
468 them. (If this lexer has no <code>PragmaParser</code>, then an
469 empty vector is returned.) */
470
471 //@ ensures \result != null;
472 public LexicalPragmaVec getLexicalPragmas() {
473 return lexicalPragmas.copy();
474 }
475
476 /**
477 * Remove the first LexicalPragma from our set of lexical pragmas
478 * collected, returning it or null if our set is empty.
479 */
480 public LexicalPragma popLexicalPragma() {
481 if (lexicalPragmas.size()>0)
482 return lexicalPragmas.pop();
483
484 return null;
485 }
486
487
488 //// Internal scanning routines
489
490 /** Returns the code of the next token in the token stream, updating
491 the <code>Token</code> fields of <code>this</code> along the way.
492 Advances underlying stream to the character just past the last
493 character of the token returned, and changes the internal buffer
494 used by <code>getTokenText</code> to contain the text of this
495 token.
496
497 <p> In most cases, this method leaves <code>m_nextchr</code>
498 holding the character just after the token scanned and
499 <code>m_in</code> pointing to the character after that. However,
500 if <code>TagConstants.C_COMMENT</code> or
501 <code>TagConstants.EOL_COMMENT</code> is returned, it leaves
502 <code>m_in</code> pointing to the character just after the token
503 scanned and <code>m_nextchr</code> undefined. This aids in pragma
504 processing. */
505
506 //@ requires m_in != null;
507 private int scanToken() {
508 // The following "assert" is obvious from the program text, because
509 // the two arrays are non-null and their types do not coincide in any
510 // other way. ESC/Java can figure this out, too, so therefore the
511 // following "assert" produces no warning. This property is also
512 // cruicial to figuring out that the updates to "text[...]" in this
513 // body do not destroy the invariants declared in TagConstants about
514 // "punctuationCodes[...]". However, oddly enough, without being
515 // prompted to first prove this property, ESC/Java isn't able to
516 // prove the necessary thing about the updates to "text[...]".
517 //@ assert (Object)text != (Object)TagConstants.punctuationCodes;
518
519 try {
520 int nextchr = m_nextchr;
521 textlen = 0;
522
523 while (Character.isWhitespace((char)nextchr)) {
524 m_in.mark();
525 nextchr = m_in.read();
526 }
527 startingLoc = m_in.getLocation();
528
529 // Inline the identifier-handling code because it's the common case
530 if (Character.isJavaIdentifierStart((char)nextchr)) {
531 int h = 0;
532 do {
533 try {
534 text[textlen] = (char)nextchr;
535 textlen++;
536 } catch (ArrayIndexOutOfBoundsException e) { this.append(nextchr);}
537 h = _SpecialParserInterface.HC*h + nextchr;
538 nextchr = m_in.read();
539 } while (Character.isJavaIdentifierPart((char)nextchr));
540 m_nextchr = nextchr;
541 auxVal = null;
542 identifierVal = _SpecialParserInterface.intern(text, textlen, h);
543 if (onlyjavakeywords) {
544 ttype = _SpecialParserInterface.getTokenType(identifierVal);
545 } else if (keywords != null) {
546 Object val = keywords.get(identifierVal);
547 if (val != null) {
548 ttype = ((Integer)val).intValue();
549 // From the Unenforceable invariant on keyword:
550 /*@ assume ttype != TagConstants.BOOLEANLIT &&
551 ttype != TagConstants.INTLIT &&
552 ttype != TagConstants.LONGLIT &&
553 ttype != TagConstants.FLOATLIT &&
554 ttype != TagConstants.DOUBLELIT &&
555 ttype != TagConstants.STRINGLIT &&
556 ttype != TagConstants.CHARLIT &&
557 ttype != TagConstants.LEXICALPRAGMA &&
558 ttype != TagConstants.MODIFIERPRAGMA &&
559 ttype != TagConstants.STMTPRAGMA &&
560 ttype != TagConstants.TYPEDECLELEMPRAGMA &&
561 ttype != TagConstants.TYPEMODIFIERPRAGMA; */
562 } else if (javakeywords)
563 ttype = _SpecialParserInterface.getTokenType(identifierVal);
564 else ttype = TagConstants.IDENT;
565 } else ttype = TagConstants.IDENT;
566 endingLoc = m_in.getLocation(); // Chalin: trying to satisfy Token inv.
567 return ttype;
568 }
569 ttype = Token.CLEAR; // keep Token invariants happy
570 identifierVal = null;
571
572 if (Character.isDigit((char)nextchr))
573 return scanNumber(nextchr);
574 if (nextchr == '\'' || nextchr == '\"')
575 return scanCharOrString(nextchr);
576
577 scanJavaExtensions(nextchr);
578 if (ttype != TagConstants.NULL) return ttype;
579
580 scanPunctuation(nextchr);
581 if (ttype != TagConstants.NULL) return ttype;
582
583 if (nextchr == -1) {
584 m_nextchr = nextchr;
585 return ttype = TagConstants.EOF;
586 }
587
588 String s = PrettyPrint.toCanonicalString(TagConstants.CHARLIT,
589 new Integer(nextchr));
590 ErrorSet.fatal(m_in.getLocation(), "Unexpected character " + s);
591
592 //@ unreachable;
593 return ttype;
594
595 } catch (IOException e) {
596 ErrorSet.fatal(m_in.getLocation(), e.toString());
597 return TagConstants.NULL; // Dummy
598 }
599 }
600
601
602 /** Handle a comment. m_in points to the character just after the
603 "//" or "/*". The mark is set at the last character read. */
604
605 //@ requires m_in != null;
606 //@ requires !inPragma;
607 //@ modifies inPragma, m_nextchr;
608 private void scanComment(int commentKind) {
609 try {
610 m_in.mark();
611 int firstchr = m_in.read();
612 // System.out.println("scanComment: firstchr = '"+(char)firstchr+"'");
613
614 /* Decide if this comment contains a pragma. */
615
616 if (pragmaParser != null) {
617 if (pragmaParser.checkTag(firstchr)) {
618 Assert.notFalse(! inPragma);
619 inPragma = true;
620 }
621 }
622
623 // Scan the comment
624 boolean nonEmptyComment = false; // For empty multi-line comment
625
626 int locStartComment = m_in.getLocation();
627 int nextchr = firstchr;
628 if (commentKind == TagConstants.EOL_COMMENT) {
629 nonEmptyComment = true;
630 while (nextchr != -1 && nextchr != '\n')
631 nextchr = m_in.read();
632 } else {
633 for(;;) {
634 if (nextchr == -1) {
635 ErrorSet.fatal(
636 startingLoc,
637 "Unterminated or improperly nested comment or pragma");
638 }
639 int oldchr = nextchr;
640 nextchr = m_in.read();
641 if (oldchr == '*' && nextchr == '/') break;
642 nonEmptyComment = true;
643 }
644 }
645
646 // If the comment contains a pragma, set up the pragma parser
647 if (inPragma) {
648 if(nonEmptyComment) {
649 boolean eolComment = commentKind == TagConstants.EOL_COMMENT;
650 int discard = !eolComment ? 2 :
651 nextchr == '\n' ? 1 : 0;
652 CorrelatedReader nu = m_in.createReaderFromMark(discard);
653 pragmaParser.restart(nu, eolComment);
654 } else {
655 // Is an empty multi-line comment
656 inPragma = false;
657 }
658 }
659 if (!inPragma) {
660 m_in.clearMark();
661 }
662
663 // Clean up and return
664 m_nextchr = m_in.read();
665 } catch (IOException e) {
666 m_in.clearMark();
667 ErrorSet.fatal(m_in.getLocation(), "IO error");
668 }
669 } //@ nowarn Exception; // ignore Index... from createReaderFromMark
670
671
672 // Notes. The routines below all assume that the input stream to be
673 // scanned consist of their input parameter nextchr plus the characters
674 // in m_in; that is, the nextchr argument is playing the role that
675 // m_nextchr plays between calls to getNextToken. These routines also
676 // assume that textlen = 0 and startingLoc has already been filled in
677 // with the location of the character in nextchr.
678 // Unless otherwise noted, the routines below fill in all fields except
679 // startingLoc, that is, they fill in m_nextchr, text & textlen,
680 // plus the Token fields ttype, endingLoc, and auxVal.
681 // Errors are handled internally by these routines. Either the
682 // error is fatal, or the error is recoverable and recovery is done
683 // by the routines.
684
685 /** Scan a character or string constant. */
686
687 //@ requires m_in != null;
688 private int scanCharOrString(int nextchr) {
689 try {
690 int endquote = nextchr;
691 this.append(endquote);
692 boolean chr = (endquote == '\'');
693 nextchr = m_in.read();
694 stringLitLen = 0;
695 for(;;) {
696 if (nextchr != '\\' && nextchr != '\n' && nextchr != -1) {
697 // Normal case
698 this.append(nextchr);
699 if (nextchr == endquote) { nextchr = m_in.read(); break; }
700 this.stringLitAppend(nextchr);
701 nextchr = m_in.read();
702 continue;
703 }
704
705 if (nextchr == -1 || nextchr == '\n') {
706 if (chr) ErrorSet.fatal(startingLoc,
707 "Unterminated character literal");
708 else ErrorSet.fatal(startingLoc, "Unterminated string literal");
709 break;
710 }
711
712 // At this point, we know we're at the start of an escape sequence
713 this.append(nextchr);
714 nextchr = m_in.read();
715 if ('0' <= nextchr && nextchr <= '7') { // Octal sequence
716 int firstdigit = nextchr - '0';
717 int result = firstdigit;
718 this.append(nextchr);
719 nextchr = m_in.read();
720 if ('0' <= nextchr && nextchr <= '7') {
721 this.append(nextchr); result = 8*result + (nextchr - '0');
722 nextchr = m_in.read();
723 if (0 <= firstdigit && firstdigit <= 3
724 && '0' <= nextchr && nextchr <= '7') {
725 this.append(nextchr); result = 8*result + (nextchr - '0');
726 nextchr = m_in.read();
727 }
728 }
729 if (255 < result) {
730 ErrorSet.error(m_in.getLocation(),
731 "Octal escape sequence overflow");
732 this.stringLitAppend('\0');
733 } else this.stringLitAppend(result);
734 } else {
735 if (nextchr == 'b') this.stringLitAppend('\b');
736 else if (nextchr == 't') this.stringLitAppend('\t');
737 else if (nextchr == 'n') this.stringLitAppend('\n');
738 else if (nextchr == 'f') this.stringLitAppend('\f');
739 else if (nextchr == 'r') this.stringLitAppend('\r');
740 else if (nextchr == '\"') this.stringLitAppend('\"');
741 else if (nextchr == '\'') this.stringLitAppend('\'');
742 else if (nextchr == '\\') this.stringLitAppend('\\');
743 else {
744 ErrorSet.error(startingLoc, "Bad escape sequence");
745 continue;
746 }
747 this.append(nextchr);
748 nextchr = m_in.read();
749 }
750 }
751
752 if (chr) {
753 if (1 < stringLitLen)
754 ErrorSet.error(startingLoc,
755 "Character literal with multiple characters.");
756 else if (stringLitLen == 0)
757 if (nextchr == '\'') {
758 this.append(nextchr);
759 nextchr = m_in.read();
760 stringLit[0] = '\'';
761 ErrorSet.error(startingLoc, "Unquoted \' in character literal.");
762 } else {
763 stringLit[0] = '\0';
764 ErrorSet.error(startingLoc, "Empty character literal.");
765 }
766 ttype = TagConstants.CHARLIT;
767 auxVal = new Integer(stringLit[0]); // CF
768 } else {
769 ttype = TagConstants.STRINGLIT;
770 auxVal = String.valueOf(stringLit, 0, stringLitLen);
771 }
772 m_nextchr = nextchr;
773 endingLoc = m_in.getLocation();
774 return ttype;
775 } catch (IOException e) {
776 ErrorSet.fatal(m_in.getLocation(), e.toString());
777 return TagConstants.NULL; // Dummy
778 }
779 }
780
781
782 /** Scans a numeric literal. Requires <code>nextchr</code> is a
783 decimal digit. Reads a numeric literal into <code>text</code>.
784 Depending on the kind of literal found, will return one of
785 <code>TagConstants.INTLIT</code>,
786 <code>TagConstants.LONGLIT</code>,
787 <code>TagConstants.FLOATLIT</code> or
788 <code>TagConstants.DOUBLELIT</code>. If an error is detected, a
789 message is sent to <code>ErrorSet</code>, <code>m_in</code> is
790 advanced to what appears to be the end of the erroneous token, and
791 a legal literal is left in <code>text</code>. */
792
793 //@ requires m_in != null;
794 //@ modifies endingLoc, ttype, auxVal, m_nextchr, text, textlen;
795 //@ ensures \result==ttype;
796 /*@ ensures \result==TagConstants.INTLIT || \result==TagConstants.LONGLIT ||
797 \result==TagConstants.FLOATLIT || \result==TagConstants.DOUBLELIT ||
798 \result==TagConstants.MAX_INT_PLUS_ONE ||
799 \result==TagConstants.MAX_LONG_PLUS_ONE ; */
800 private int scanNumber(int nextchr) {
801 try {
802 // Get the first chunk of digits
803 int tentativeResult = 0;
804 while (Character.isDigit((char)nextchr)) {
805 this.append(nextchr);
806 tentativeResult = 10*tentativeResult + (nextchr - '0');
807 nextchr = m_in.read();
808 }
809 //@ assume textlen>0; // because know nextchar was initially a digit...
810
811 // Handle floating point literals in another subtroutine
812 if (nextchr == '.') {
813 m_in.mark();
814 int nextnextchr = m_in.read();
815 if (nextnextchr == '.') {
816 // Two . in a row - cannot be a double so
817 // back up
818 m_in.reset();
819 // fall through and let the text so far be handled as
820 // a integer literal
821 } else {
822 this.append(nextchr);
823 return finishFloatingPointLiteral(nextnextchr);
824 }
825 } else if (nextchr == 'e' || nextchr == 'E'
826 || nextchr == 'F' || nextchr == 'f'
827 || nextchr == 'd' || nextchr == 'D')
828 return finishFloatingPointLiteral(nextchr);
829
830 long result = 0;
831 if (text[0] != '0' || (textlen == 1 && nextchr != 'x' && nextchr != 'X'))
832 // Handle base-ten literal
833 if (textlen <= 9) result = tentativeResult;
834 else {
835 // Parse as negative value to avoid problems w/ -Long.MIN_VALUE
836 long mullimit = Long.MIN_VALUE / 10;
837 for(int i = 0; i < textlen; i++) {
838 int d = text[i] - '0';
839 long r2 = 10*result;
840 if (result < mullimit || r2 < Long.MIN_VALUE + d) {
841 ErrorSet.error(startingLoc, "Integer literal overflow");
842 result = 0;
843 break;
844 } else result = r2 - d;
845 }
846 if (nextchr == 'L' || nextchr == 'l') {
847 if (result == Long.MIN_VALUE) {
848 this.append(nextchr);
849 m_nextchr = m_in.read();
850 endingLoc = m_in.getLocation();
851 auxVal = null;
852 return ttype = TagConstants.MAX_LONG_PLUS_ONE;
853 }
854 } else if (result == Integer.MIN_VALUE) {
855 m_nextchr = nextchr;
856 endingLoc = m_in.getLocation();
857 auxVal = null;
858 return ttype = TagConstants.MAX_INT_PLUS_ONE;
859 }
860 result = -result;
861 }
862
863 else if (nextchr != 'x' && nextchr != 'X') {
864 // Handle octal literal
865 for(int i = 1; i < textlen; i++) {
866 int d = text[i] - '0';
867 if (0 <= d && d <= 7) {
868 if ((result >>> 61) != 0) {
869 ErrorSet.error(startingLoc, "Integer literal overflow");
870 result = 0;
871 break;
872 }
873 result = (result << 3) + d;
874 } else {
875 ErrorSet.error(startingLoc,
876 "Non-octal digit found in octal literal.");
877 result = 0;
878 break;
879 }
880 }
881
882 } else {
883 // Handle hex literal
884 this.append(nextchr);
885 boolean overflow = false;
886 for(nextchr = m_in.read(); ; nextchr = m_in.read()) {
887 int d = Character.digit((char)nextchr, 16);
888 if (d == -1) break;
889 this.append(nextchr);
890 if (! overflow)
891 if ((result >>> 60) != 0) overflow = true;
892 else result = (result << 4) + d;
893 }
894 if (textlen <= 2)
895 ErrorSet.error(startingLoc, "Too few digits in a hex literal.");
896 else if (overflow) {
897 ErrorSet.error(startingLoc, "Integer literal overflow");
898 result = 0;
899 }
900 }
901
902 if (nextchr == 'L' || nextchr == 'l') {
903 this.append(nextchr);
904 m_nextchr = m_in.read();
905 endingLoc = m_in.getLocation();
906 auxVal = new Long(result);
907 return ttype = TagConstants.LONGLIT;
908 }
909 if ((result & 0xffffffff00000000L) != 0) {
910 ErrorSet.error(startingLoc, "Integer literal overflow");
911 result = 0;
912 }
913 m_nextchr = nextchr;
914 endingLoc = m_in.getLocation();
915 auxVal = new Integer((int)result);
916 return ttype = TagConstants.INTLIT;
917 } catch (IOException e) {
918 ErrorSet.fatal(m_in.getLocation(), e.toString());
919 return TagConstants.NULL; // Dummy
920 }
921 }
922
923 /** Finishes scanning a floating-point literal.
924
925 <p> Requires: <code>text</code> contains a possibly empty sequence
926 of decimal digits followed by an optional <code>'.'</code>; also,
927 <code>text</code> cannot be empty. Further, let <i>s</i> be the
928 sequence of characters consisting of the characters in
929 <code>text</code> followed by <code>nextchr</code> followed by the
930 characters in <code>m_in</code>. This routine requires that a
931 prefix of <i>s</i> match the syntax of floating-point literals as
932 defined by the Java language specification.
933
934 <p> Ensures: Scans the floating-point literal in <i>s</i>.
935 Depending on the type of the literal, returns
936 <code>TagConstants.FLOATLIT</code> or
937 <code>TagConstants.DOUBLELIT</code> and sets sets
938 <code>auxVal</code> to either a <code>Float</code> or
939 <code>Double</code>. If an error is encountered, a message is
940 sent to <code>ErorrSet</code> and recovery is performed. */
941
942 //@ requires m_in != null;
943 //@ requires textlen>0;
944 //@ modifies ttype, auxVal, m_nextchr, endingLoc, text, textlen;
945 //@ ensures \result==ttype;
946 //@ ensures \result==TagConstants.FLOATLIT || \result==TagConstants.DOUBLELIT;
947 private int finishFloatingPointLiteral(int nextchr) {
948 try {
949 boolean error = false;
950 boolean zeroMantissa = true; // Used for underflow detection
951
952 // First, see if there are any non-zero digits in the non-fractional
953 // part of the mantissa
954 for(int i = 0, len = textlen - (text[textlen-1] == '.' ? 1 : 0);
955 i < len; i++)
956 if (text[i] != '0') { zeroMantissa = false; break; }
957
958 // If there's a fractional part, scan it in
959 while (Character.isDigit((char)nextchr)) {
960 if (nextchr != '0') zeroMantissa = false;
961 this.append(nextchr);
962 nextchr = m_in.read(); // throws IOException
963 }
964
965 // If there's an exponent, scan it in
966 if (nextchr == 'e' || nextchr == 'E') {
967 this.append(nextchr);
968 nextchr = m_in.read();
969 if (nextchr == '+' || nextchr == '-') {
970 this.append(nextchr);
971 nextchr = m_in.read();
972 }
973 while (Character.isDigit((char)nextchr)) {
974 this.append(nextchr);
975 nextchr = m_in.read();
976 }
977 if (! Character.isDigit(text[textlen-1])) {
978 ErrorSet.error(startingLoc, ("Digits required in exponent part"
979 + " of floating-point literal."));
980 error = true;
981 }
982 }
983
984 String s = (error ? "1.0" : String.valueOf(this.text, 0, textlen));
985
986 // Assert: s has one of the following forms:
987 // [0-9]+ (. [0-9]*)? (e [+-] [0-9]+)?
988 // . [0-9]+ (e [+-] [0-9]+)?
989 // Also, zeroMantissa is true iff all digits in the mantissa are zero
990 // (Todo: examine code more closely to see if this is really true.)
991
992 boolean f = (nextchr == 'F' || nextchr == 'f');
993 if (f || nextchr == 'D' || nextchr == 'd') {
994 this.append(nextchr);
995 m_nextchr = m_in.read();
996 } else m_nextchr = nextchr;
997 endingLoc = m_in.getLocation();
998
999 if (f) {
1000 Float result = Float.valueOf(s); // !throw NumberFormatException
1001 auxVal = result;
1002 if (result.isInfinite())
1003 ErrorSet.error(startingLoc, "Floating-point literal overflow");
1004 else if (! zeroMantissa && result.floatValue() == 0.0)
1005 ErrorSet.error(startingLoc, "Floating-point literal underflow");
1006 } else {
1007 Double result = Double.valueOf(s); // !throw NumberFormatException
1008 auxVal = result;
1009 if (result.isInfinite())
1010 ErrorSet.error(startingLoc, "Floating-point literal overflow");
1011 else if (! zeroMantissa && result.doubleValue() == 0.0D)
1012 ErrorSet.error(startingLoc, "Floating-point literal underflow");
1013 }
1014 return ttype = (f ? TagConstants.FLOATLIT : TagConstants.DOUBLELIT);
1015 } catch (IOException e) {
1016 ErrorSet.fatal(m_in.getLocation(), e.toString());
1017 return TagConstants.NULL; // Dummy
1018 }
1019 } //@ nowarn Exception; // NumberFormatException won't be thrown
1020
1021 /** Scans a punctuation string <em>or</em> a floating-point
1022 number. If input doesn't match either a floating-point number or
1023 any punctuation, returns <code>TagConstants.NULL</code>. Assumes
1024 <code>startingLoc</code> already filled in.
1025
1026 <p>The routine may change the mark arbitrarily.
1027
1028 <p> This method leaves <code>m_in</code> in a different state than
1029 the previous ones do. Ordinarily, <code>scanXXX</code> routines
1030 return with <code>m_nextchr</code> holding the character just
1031 after the token scanned and <code>m_in</code> pointing to the
1032 character after that. <code>scanPunctuation</code> does too, but
1033 only when the value returned is <em>not</em>
1034 <code>TagConstants.C_COMMENT</code> or
1035 <code>TagConstants.EOL_COMMENT</code>; in those two cases, it
1036 returns with <code>m_in</code> pointing to the character just
1037 after the token scanned and <code>m_nextchr</code> undefined.
1038 This aids in pragma processing. Also, if <code>TagConstants.NULL</code>
1039 is returned, then <code>m_nextchr</code> is undefined and
1040 <code>m_in</code> is where it was on entry.
1041 */
1042
1043 //@ requires m_in != null;
1044 //@ modifies ttype, auxVal, m_nextchr, textlen, endingLoc, text;
1045 //@ modifies m_in.marked;
1046 //@ ensures \result==ttype;
1047 private int scanPunctuation(int nextchr) {
1048 try {
1049 boolean possibleFloatingPointNumber = (nextchr == '.');
1050 text[0] = (char)nextchr;
1051 textlen = 1;
1052 PunctuationPrefixTree prefix = punctuationTable;
1053 PunctuationPrefixTree lastPunctuation = prefix;
1054 int lastPunctuationLength = 0;
1055 m_in.mark();
1056
1057 // The following loop, which has been unrolled once so the
1058 // first iteration can be handled specially, has the invariant:
1059 // prefix == null || depth(prefix) == textlen-1
1060 // nextchr == text[textlen-1]
1061 // (nextchr_0 + m_in_0) == (text[0,textlen) + m_in)
1062 // lastPunctuation != null && lastPunctuation is last valid punct seen
1063 // lastPunctuationLength == depth(lastPunctuation)
1064 // (nextchr_0+m_in_0)==(text[0,lastPunctuationLength]+m_in.resetedtext)
1065 // Stating part of this invariant graphically:
1066 // l t l = lastPunctuationLength; t = textlen
1067 // text: abcdefghij
1068 // nextchr_0+
1069 // m_in_0: abcdefghijklmnop
1070 // r nm r = resetted text, n = nextchr, m = m_in
1071
1072 int index = nextchr - '!';
1073 if (index < 0 || PunctuationPrefixTree.CHILDLEN <= index)
1074 prefix = null;
1075 else
1076 prefix = prefix.children[nextchr - '!'];
1077 if (prefix != null && prefix.code != TagConstants.NULL) {
1078 lastPunctuation = prefix;
1079 lastPunctuationLength = textlen;
1080 m_in.mark();
1081 }
1082 nextchr = m_in.read();
1083 if (possibleFloatingPointNumber && Character.isDigit((char)nextchr)) {
1084 // Deal with special case of floating-point numbers
1085 m_in.clearMark();
1086 return finishFloatingPointLiteral(nextchr);
1087 }
1088 this.append(nextchr);
1089 while (prefix != null) {
1090 index = nextchr - '!';
1091 if (index < 0 || PunctuationPrefixTree.CHILDLEN <= index)
1092 prefix = null;
1093 else
1094 prefix = prefix.children[nextchr - '!'];
1095 if (prefix != null && prefix.code != TagConstants.NULL) {
1096 lastPunctuation = prefix;
1097 lastPunctuationLength = textlen;
1098 m_in.mark();
1099 }
1100 nextchr = m_in.read();
1101 this.append(nextchr);
1102 }
1103 // End of unrolled loop
1104
1105 m_in.reset();
1106 textlen = lastPunctuationLength;
1107 endingLoc = m_in.getLocation();
1108 ttype = lastPunctuation.code;
1109 if (ttype != TagConstants.C_COMMENT &&
1110 ttype != TagConstants.EOL_COMMENT &&
1111 ttype != TagConstants.NULL) {
1112 m_nextchr = m_in.read();
1113 }
1114 return ttype;
1115 } catch (IOException e) {
1116 ErrorSet.fatal(m_in.getLocation(), e.toString());
1117 return TagConstants.NULL; // Dummy
1118 }
1119 }
1120
1121 /** Scans a Java extension. If input doesn't match any Java extension,
1122 returns <code>TagConstants.NULL</code>. Assumes <code>startingLoc</code>
1123 already filled in, and assumes <code>textlen</code> is 0.
1124
1125 <p>The routine may change the mark arbitrarily.
1126
1127 <p> If a Java extension is matched, returns with <code>m_nextchr</code>
1128 holding the character just after the token scanned and <code>m_in</code>
1129 pointing to the character after that.
1130 */
1131 //@ requires m_in != null;
1132 //@ requires textlen == 0;
1133 //@ modifies ttype, auxVal, m_nextchr;
1134 //@ modifies m_in.marked;
1135 //@ ensures \result==ttype;
1136 protected int scanJavaExtensions(int nextchr) {
1137 ttype = TagConstants.NULL;
1138 return ttype;
1139 }
1140
1141 //// Internal char buffer used by string scanner.
1142
1143 // Having this separate from text allows us to accumulate both the
1144 // original input in text and the escape-converted input in stringLit
1145
1146 //@ private invariant stringLit != null;
1147 //@ private invariant stringLit.length>=64;
1148 private char[] stringLit = new char[64];
1149
1150 //@ private invariant stringLitLen>=0;
1151 private int stringLitLen = 0;
1152
1153 private void stringLitAppend(int c) {
1154 try {
1155 stringLit[stringLitLen] = (char)c; //@ nowarn IndexTooBig; // caught
1156 } catch (ArrayIndexOutOfBoundsException e) {
1157 char[] newstringLit = new char[stringLitLen + 128];
1158 System.arraycopy(stringLit, 0, newstringLit, 0, stringLitLen);
1159 stringLit = newstringLit;
1160 stringLit[stringLitLen] = (char)c;
1161 }
1162 stringLitLen++;
1163 }
1164
1165
1166 //// The keyword and punctuation tables, plus methods for changing them
1167
1168 //@ private invariant punctuationTable != null;
1169 private PunctuationPrefixTree punctuationTable = new PunctuationPrefixTree();
1170
1171 /**
1172 * Unenforceable invariant: all tokenTypes in this table do not
1173 * require a non-null auxVal. (cf. Token.auxVal).
1174 */
1175 // Old specs from original full JML spec files. Must be
1176 // rewritten for current java.util.Hashtable specs.
1177 /* invariant (keywords != null) ==> (keywords.keyType == \type(Identifier) &&
1178 keywords.elementType == \type(Integer)); */
1179 protected Hashtable keywords = null;
1180
1181 protected boolean javakeywords = false, onlyjavakeywords = false;
1182 //@ invariant onlyjavakeywords == (javakeywords && keywords == null);
1183
1184 static {
1185 // When class initializes, change the <CODE>tokenType</CODE> field
1186 // of <CODE>Identifier</CODE> instances associated with keywords.
1187 for(int code = TagConstants.FIRST_KEYWORD;
1188 code <= TagConstants.LAST_KEYWORD;
1189 code++) {
1190 Identifier idn = Identifier.intern(TagConstants.toString(code));
1191 _SpecialParserInterface.setTokenType(idn, code);
1192 }
1193 }
1194
1195 /** Add all of Java's keywords to the scanner. The token codes used
1196 for these keywords are those defined by the <code>TagConstants</code>
1197 class. Requires that none of these keywords have been added
1198 already. */
1199
1200 public void addJavaKeywords() {
1201 javakeywords = true;
1202 if (keywords != null)
1203 // New values override existing ones...
1204 for(int i = TagConstants.FIRST_KEYWORD;
1205 i <= TagConstants.LAST_KEYWORD;
1206 i++)
1207 keywords.remove(Identifier.intern(TagConstants.toString(i)));
1208 else onlyjavakeywords = true;
1209 }
1210
1211
1212 /**
1213 * Add a keyword to a <code>Lex</code> object with the given code.
1214 * Requires that <code>newkeyword</code> is a Java identifier and
1215 * that <code>code</code> is not <code>TagConstants.NULL</code> or
1216 * a tokenType that requires auxVal to be non-null;
1217 * (cf. Token.auxVal).
1218 *
1219 * Also requires that the keyword hasn't already been added.
1220 */
1221 //@ requires code != TagConstants.NULL;
1222 //@ requires code != TagConstants.BOOLEANLIT;
1223 //@ requires code != TagConstants.INTLIT;
1224 //@ requires code != TagConstants.LONGLIT;
1225 //@ requires code != TagConstants.DOUBLELIT;
1226 //@ requires code != TagConstants.STRINGLIT;
1227 //@ requires code != TagConstants.CHARLIT;
1228 //@ requires code != TagConstants.LEXICALPRAGMA;
1229 //@ requires code != TagConstants.MODIFIERPRAGMA;
1230 //@ requires code != TagConstants.STMTPRAGMA;
1231 //@ requires code != TagConstants.TYPEDECLELEMPRAGMA;
1232 //@ requires code != TagConstants.TYPEMODIFIERPRAGMA;
1233 public void addKeyword(/*@ non_null @*/ String newkeyword, int code) {
1234 Assert.precondition(code != TagConstants.NULL);
1235 if (keywords == null) {
1236 keywords = new Hashtable();
1237 // Old specs from original full JML spec files. Must be
1238 // rewritten for current java.util.Hashtable specs.
1239 // set keywords.keyType = \type(Identifier);
1240 // set keywords.elementType = \type(Integer);
1241 }
1242 keywords.put(Identifier.intern(newkeyword), new Integer(code));
1243 onlyjavakeywords = false;
1244 }
1245
1246
1247
1248 /** Add all of Java's punctuation strings to the scanner. The codes
1249 used for these punctuation strings are the found in the
1250 <code>TagConstants</code> class. Requires that none of these
1251 punctuation strings have been added before. */
1252
1253 public void addJavaPunctuation() {
1254 Assert.notFalse(TagConstants.punctuationStrings.length ==
1255 TagConstants.punctuationCodes.length);
1256 for(int i = 0; i < TagConstants.punctuationStrings.length; i++)
1257 addPunctuation(TagConstants.punctuationStrings[i],
1258 TagConstants.punctuationCodes[i]);
1259 }
1260
1261 /** Add a punctuation string to a scanner associated with a given
1262 code. Requires that the characters in the punctuation string are
1263 all punctuation characters, that is, non-alphanumeric ASCII
1264 characters whose codes are between 33 ('!') and 126 ('~')
1265 inclusive. Also requires that the code is not
1266 <code>TagConstants.NULL</code> and that the punctuation string
1267 hasn't already been added. */
1268 //@ requires punctuation != null;
1269 //@ requires code != TagConstants.NULL;
1270 //@ requires code != TagConstants.IDENT;
1271 //@ requires code != TagConstants.BOOLEANLIT;
1272 //@ requires code != TagConstants.INTLIT;
1273 //@ requires code != TagConstants.LONGLIT;
1274 //@ requires code != TagConstants.FLOATLIT;
1275 //@ requires code != TagConstants.DOUBLELIT;
1276 //@ requires code != TagConstants.STRINGLIT;
1277 //@ requires code != TagConstants.CHARLIT;
1278 //@ requires code != TagConstants.LEXICALPRAGMA;
1279 //@ requires code != TagConstants.MODIFIERPRAGMA;
1280 //@ requires code != TagConstants.STMTPRAGMA;
1281 //@ requires code != TagConstants.TYPEDECLELEMPRAGMA;
1282 //@ requires code != TagConstants.TYPEMODIFIERPRAGMA;
1283 public void addPunctuation(String punctuation, int code) {
1284 Assert.precondition(code != TagConstants.NULL);
1285 PunctuationPrefixTree prefix = punctuationTable;
1286 for(int j = 0; j < punctuation.length(); j++) {
1287 int c = punctuation.charAt(j);
1288 Assert.precondition( //@ nowarn Pre;
1289 ('!' <= c && c <= '/') || (':' <= c && c <= '@')
1290 || ('[' <= c && c <= '`') || ('{' <= c && c <= '~'));
1291 int index = c - '!';
1292 PunctuationPrefixTree child = prefix.children[index];
1293 if (child == null)
1294 child = prefix.children[index] = new PunctuationPrefixTree();
1295 prefix = child;
1296 }
1297 prefix.code = code;
1298 }
1299
1300
1301 //// Diagnostic routines
1302
1303 /** Checks invariants (assumes that <CODE>Token</CODE> fields
1304 haven't been mucked with by outside code). <code>prefix</code> is
1305 used to prefix error messages with context provided by the
1306 caller. */
1307
1308 public void zzz(String prefix) {
1309 super.zzz();
1310 lookaheadq.zzz("Near character "
1311 + Location.toOffset(startingLoc)
1312 + ": " + prefix);
1313 }
1314 }
1315
1316 class PunctuationPrefixTree {
1317 public static final int CHILDLEN = 1 + '~' - '!';
1318
1319 /*@ invariant code != TagConstants.IDENT &&
1320 code != TagConstants.BOOLEANLIT &&
1321 code != TagConstants.INTLIT &&
1322 code != TagConstants.LONGLIT &&
1323 code != TagConstants.FLOATLIT &&
1324 code != TagConstants.DOUBLELIT &&
1325 code != TagConstants.STRINGLIT &&
1326 code != TagConstants.CHARLIT &&
1327 code != TagConstants.LEXICALPRAGMA &&
1328 code != TagConstants.MODIFIERPRAGMA &&
1329 code != TagConstants.STMTPRAGMA &&
1330 code != TagConstants.TYPEDECLELEMPRAGMA &&
1331 code != TagConstants.TYPEMODIFIERPRAGMA; */
1332 public int code = TagConstants.NULL; // ! NULL ==> a punctuation string
1333
1334 //@ invariant children != null;
1335 //@ invariant children.length == CHILDLEN;
1336 //@ invariant \typeof(children) == \type(PunctuationPrefixTree[]);
1337 public PunctuationPrefixTree[] children
1338 = new PunctuationPrefixTree[CHILDLEN];
1339 }