import java.io.*;
import java.util.*;

/**
  This class will convert text read from an input stream into a sequence of
  classified tokens, using the provided dictionary object to do the
  classification
*/
class Tokenizer {
	private Dictionary  dictionary;
	private Reader      input;
	private IOException errorObject;
	
	private Queue<Token> tokens = new LinkedList<Token>();

	/**
	  Creates a tokenizer that uses the given dictionary for
	  classification, and reads in from the provided Reader
	*/
	public Tokenizer(Dictionary dictionary, Reader input) {
		this.dictionary = dictionary;
		this.input      = input;
	}

	/**
	  Removes the next available token and returns it. If no
	  more tokens are available, will return a token with the
	  category TokenCategory.EndOfFile, and will keep doing so
	  as long as the method is called. If a token
	  cannot be classified by the dictionary, it is given the category
	  TokenCategory.Unknown. If there was an I/O error during the input,
	  and TokenCategory.Error token will be repeatedly returned.
	*/
	public Token nextToken() {
		if (errorObject != null)
			return new Token(TokenCategory.Error, errorObject.getMessage());
	
		bufferIfNeeded();
		if (tokens.isEmpty())
			return new Token(TokenCategory.EndOfFile, "");
		else
			return tokens.remove();
	}

	/**
	   Returns the category of the next available token, but does not
	   affect the list of upcoming tokens. If no more tokensare available
	   will returns TokenCategory.EndOfFile, and will keep doing so
	   for every subsequent call of the method.  Note that if a token
	   could not be classified by the dictionary, it is given the category
	   TokenCategory.Unknown. If there was an I/O error during the input,
	   and TokenCategory.Error token will be repeatedly returned.
	*/
	public TokenCategory lookahead() {
		if (errorObject != null)
			return TokenCategory.Error;
	
		bufferIfNeeded();
		if (tokens.isEmpty())
			return TokenCategory.EndOfFile;
		else
			return tokens.peek().category;
	}

	private char[]        readBuffer = new char[1024];
	private StringBuffer  currentString = new StringBuffer();
	private boolean       inBlank       = false;

	//Read data from the buffer, and fill in
	//the token queue...
	private void bufferIfNeeded() {
		if (!tokens.isEmpty()) return; //Already have stuff, nothing to do

		while (tokens.isEmpty()) {
			int read;

			try {
				read = input.read(readBuffer, 0, readBuffer.length);
			} catch (IOException ie) {
				errorObject = ie;
				return;
			}

			if (read == -1) { //End of file
				completeToken();
				return;
			}

			for (int pos = 0; pos < read; ++pos) {
				char letter = readBuffer[pos];

				//If we're lexing a blank, keep doing it as long as we
				//see ___
				if (inBlank) {
					if (letter == '_') {
						currentString.append(letter);
						continue;
					} else {
						completeToken();
					}
				}
				//End previous token when we see something
				//that is not a letter, or potential part of a 
				//possessive
				if (!Character.isLetter(letter) && letter != '\'')
					completeToken();
				//Other than ending words, we ignore whitespace
				if (Character.isWhitespace(letter))
					continue;

				//If we see _, we're inside a blank
				if (letter == '_')
					inBlank = true;

				//All other stuff goes into the current token...
				currentString.append(letter);
				
				//... But if it's not a letter of a blank, we want to 
				//end the -current- token to (since it must be punctuation)
				if (!Character.isLetter(letter) && letter != '\'' 
				    && letter != '_')
					completeToken();

			} // for (int pos = 0; pos < read; ++pos) {
		} //while (tokens.isEmpty()) {
	} //private void bufferIfNeeded() {

	// If non-empty, make a token out of currentString, queue it, and clears
	// currentString, inBlank
	private void completeToken() {
		if (currentString.length() != 0) {
			String value = currentString.toString();
			tokens.add(new Token(dictionary.classifyWord(value), value));
			currentString = new StringBuffer();
			inBlank       = false;
		}
	}
}
