// LLAnalyze -- Nathaniel Nystrom, February 2000
// For use in Cornell University Computer Science 412/413

// Class to represent a context-free grammar.

package Iota.util.grammar;

import java.util.*;

public class Grammar
{
    Nonterminal start;
    List rules;
    Set terms;
    Set nonterms;

    // Special end-of-file symbol used in follow sets.
    Terminal eof;

    // Next available symbol indices
    int nextNonterm;
    int nextTerm;

    // NULLABLE, FIRST, and FOLLOW sets for nonterminals, indexed by
    // symbol index.
    boolean[] nullable;
    Set[] first;
    Set[] follow;

    // Construct a new grammar.  The rules can be in EBNF form.
    // The constructor translates translates into BNF form.
    //
    // We assume all terminals are uniquely numbered and all nonterminals
    // are uniquely numbered (see Symbol.getIndex()).
    //
    // We assume no terminal is called "$".
    //
    public Grammar(Nonterminal start, List rules, Set terms, Set nonterms)
    {
	this.start = start;
	this.rules = new LinkedList(rules);
	this.terms = new HashSet(terms);
	this.nonterms = new HashSet(nonterms);

	this.nextNonterm = getNextSymbolIndex(nonterms);
	this.nextTerm = getNextSymbolIndex(terms);

	this.eof = new Terminal("$", nextTerm++);
	terms.add(eof);

	verify();

	canonicalize();

	computeNullable();
	computeFirst();
	computeFollow();
    }

    public Nonterminal getStart()
    {
	return start;
    }

    public Terminal getEOF()
    {
	return eof;
    }

    public List getRules()
    {
	return new ArrayList(rules);
    }

    public Set getTerminals()
    {
	return new HashSet(terms);
    }

    public Set getNonterminals()
    {
	return new HashSet(nonterms);
    }

    public boolean isNullable(Nonterminal s)
    {
	return nullable[s.getIndex()];
    }

    public Set getFirst(Nonterminal s)
    {
	if (first[s.getIndex()] == null) {
	    return new HashSet();
	}
	return first[s.getIndex()];
    }

    public Set getFollow(Nonterminal s)
    {
	if (follow[s.getIndex()] == null) {
	    return new HashSet();
	}
	return follow[s.getIndex()];
    }

    public int getNonterminalSize()
    {
	return nextNonterm;
    }

    public int getTerminalSize()
    {
	return nextTerm;
    }

    private int getNextSymbolIndex(Set symbols)
    {
	int next = 0;

	Iterator it = symbols.iterator();

	while (it.hasNext()) {
	    Symbol s = (Symbol) it.next();

	    if (s.getIndex() >= next) {
		next = s.getIndex() + 1;
	    }
	}

	return next;
    }

    // Compute nullable(X) for all nonterminals, X.
    private void computeNullable()
    {
	// Initialize flags to false.
	nullable = new boolean[nextNonterm];

	boolean changed = true;

	while (changed) {
	    changed = false;

	    Iterator it = rules.iterator();

	    while (it.hasNext()) {
		Rule rule = (Rule) it.next();

		Nonterminal lhs = rule.getLHS();
		List rhs = rule.getRHS();

		boolean allNullable = true;

		Iterator rit = rhs.iterator();

		while (rit.hasNext()) {
		    Expr e = (Expr) rit.next();

		    if (e instanceof Nonterminal) {
			Nonterminal nt = (Nonterminal) e;

			if (! nullable[nt.getIndex()]) {
			    allNullable = false;
			    break;
			}
		    }
		    else if (e instanceof Terminal) {
			allNullable = false;
			break;
		    }
		    else {
			throw new RuntimeException();
		    }
		}

		if (allNullable && ! nullable[lhs.getIndex()]) {
		    changed = true;
		    nullable[lhs.getIndex()] = true;
		}
	    }
	}
    }

    // Compute first(X) for all nonterminals, X.
    private void computeFirst()
    {
	// Initialize sets to empty.
	first = new Set[nextNonterm];

	for (int i = 0; i < first.length; i++) {
	    first[i] = new HashSet();
	}

	boolean changed = true;

	while (changed) {
	    changed = false;

	    Iterator it = rules.iterator();

	    while (it.hasNext()) {
		Rule rule = (Rule) it.next();

		Nonterminal lhs = rule.getLHS();
		List rhs = rule.getRHS();

		int oldSize = first[lhs.getIndex()].size();
		Set firstRHS = new HashSet();

		// Iterate over the RHS = Y1 Y2 ... Yk and add in FIRST(Yi)
		// until Yi is not nullable.
		Iterator rit = rhs.iterator();

		while (rit.hasNext()) {
		    Expr e = (Expr) rit.next();

		    if (e instanceof Nonterminal) {
			Nonterminal nt = (Nonterminal) e;

			firstRHS.addAll(first[nt.getIndex()]);

			if (! nullable[nt.getIndex()]) {
			    break;
			}
		    }
		    else if (e instanceof Terminal) {
			firstRHS.add(e);
			break;
		    }
		    else {
			throw new RuntimeException();
		    }
		}

		// Union FIRST(rhs) into FIRST(lhs) and see if the set grew.
		first[lhs.getIndex()].addAll(firstRHS);
		
		if (first[lhs.getIndex()].size() != oldSize) {
		    changed = true;
		}
	    }
	}
    }

    // Compute follow(X) for all nonterminals, X.
    private void computeFollow()
    {
	// Initialize sets to empty.
	follow = new Set[nextNonterm];

	for (int i = 0; i < first.length; i++) {
	    follow[i] = new HashSet();
	}

	follow[start.getIndex()].add(eof);

	boolean changed = true;

	while (changed) {
	    changed = false;

	    Iterator it = rules.iterator();

	    while (it.hasNext()) {
		Rule rule = (Rule) it.next();

		Nonterminal lhs = rule.getLHS();
		LinkedList rhs = new LinkedList(rule.getRHS());

		while (! rhs.isEmpty()) {
		    Expr e = (Expr) rhs.removeFirst();

		    if (e instanceof Nonterminal) {
			Nonterminal s = (Nonterminal) e;

			// The rule we're examining looks like:
			//     X -> Y1 Y2 ... Yi Yi+1 ... Yk
			// s is Yi and rhs contains Yi+1 ... Yk
			// FOLLOW(Yi) contains FIRST(Yi+1 ... Yk)
			// and also FOLLOW(Yi+1 ... Yi+j) if that is nullable.

			Set followSym = new HashSet();
			boolean allNullable = true;

			Iterator rit = rhs.iterator();

			while (rit.hasNext()) {
			    Expr e2 = (Expr) rit.next();

			    if (e2 instanceof Nonterminal) {
				Nonterminal nt = (Nonterminal) e2;

				followSym.addAll(first[nt.getIndex()]);

				if (! nullable[nt.getIndex()]) {
				    allNullable = false;
				    break;
				}
			    }
			    else if (e2 instanceof Terminal) {
				Terminal t = (Terminal) e2;
				followSym.add(t);
				allNullable = false;
				break;
			    }
			    else {
				throw new RuntimeException();
			    }
			}

			if (allNullable) {
			    followSym.addAll(follow[lhs.getIndex()]);
			}

			int oldSize = follow[s.getIndex()].size();

			follow[s.getIndex()].addAll(followSym);

			if (follow[s.getIndex()].size() != oldSize) {
			    changed = true;
			}
		    }
		    else if (e instanceof Terminal) {
		    }
		    else {
			throw new RuntimeException();
		    }
		}
	    }
	}
    }

    // Return the set of strings used in nonterminals names.
    private Set getNonterminalNames()
    {
	Set names = new HashSet();

	Iterator it = nonterms.iterator();

	while (it.hasNext()) {
	    Nonterminal s = (Nonterminal) it.next();
	    names.add(s.getString());
	}

	return names;
    }

    // Generate a uniquely-named new nonterminal 
    private Nonterminal newNonterminal(Nonterminal base, Set names)
    {
	for (int i = 1; ; i++) {
	    String s = base.getString() + i;
	    if (! names.contains(s)) {
		names.add(s);
		Nonterminal n = new Nonterminal(s, nextNonterm++);
		nonterms.add(n);
		return n;
	    }
	}
    } 

    // Replace rules with Star, Plus, and Question expressions with rules
    // without these features.
    private void canonicalize()
    {
	// Get all the nonterminal names so we can generate unique new names.
	Set names = getNonterminalNames();

	// Go through all the rules, adding new rules to the top of the
	// stack for later processing (in case of nested *, +, ?).
	// When done processing a rule, add it to the canonicalRules list.
	LinkedList canonicalRules = new LinkedList();
	LinkedList stack = new LinkedList(rules);

	while (! stack.isEmpty()) {
	    Rule rule = (Rule) stack.removeFirst();
	    List rhs = rule.getRHS();

	    List newRHS = new LinkedList();
	    boolean rhsChanged = false;

	    Iterator it = rhs.iterator();

	    while (it.hasNext()) {
		Expr e = (Expr) it.next();

		if (e instanceof Symbol) {
		    // Just copy symbols.
		    newRHS.add(e);
		}
		else if (e instanceof Question) {
		    // e -> x [ a b c ] y
		    // becomes:
		    // e -> x e' y
		    // e' -> a b c | empty

		    // Create e' and form the new rule: e -> x e' y.
		    Nonterminal s = newNonterminal(rule.getLHS(), names);
		    newRHS.add(s);
		    rhsChanged = true;

		    // New rule: e' -> empty
		    List c2 = new LinkedList();
		    stack.addFirst(new Rule(s, c2));

		    // New rule: e' -> a b c
		    List c1 = new LinkedList(((Question) e).getContents());
		    stack.addFirst(new Rule(s, c1));
		}
		else if (e instanceof Star) {
		    // e -> x ( a b c ) * y
		    // becomes:
		    // e -> x e' y
		    // e' -> a b c e' | empty

		    // Create e' and form the new rule: e -> x e' y.
		    Nonterminal s = newNonterminal(rule.getLHS(), names);
		    newRHS.add(s);
		    rhsChanged = true;

		    // New rule: e' -> empty
		    List c2 = new LinkedList();
		    stack.addFirst(new Rule(s, c2));

		    // New rule: e' -> a b c e'
		    List c1 = new LinkedList(((Star) e).getContents());
		    c1.add(s);
		    stack.addFirst(new Rule(s, c1));
		}
		else if (e instanceof Plus) {
		    // e -> x ( a b c ) + y
		    // becomes:
		    // e -> x e' y
		    // e' -> a b c e''
		    // e'' -> a b c e'' | empty

		    // Create e' and form the new rule: e -> x e' y.
		    Nonterminal s1 = newNonterminal(rule.getLHS(), names);
		    newRHS.add(s1);
		    rhsChanged = true;

		    // Create e''.
		    Nonterminal s2 = newNonterminal(rule.getLHS(), names);

		    // New rule: e'' -> empty
		    List c3 = new LinkedList();
		    stack.addFirst(new Rule(s2, c3));

		    // New rule: e'' -> a b c e''
		    List c2 = new LinkedList(((Plus) e).getContents());
		    c2.add(s2);
		    stack.addFirst(new Rule(s2, c2));

		    // New rule: e' -> a b c e''
		    List c1 = new LinkedList(((Plus) e).getContents());
		    c1.add(s2);
		    stack.addFirst(new Rule(s1, c1));
		}
	    }

	    if (rhsChanged) {
		// Push the new rule from the changed RHS.
		stack.addFirst(new Rule(rule.getLHS(), newRHS));
	    }
	    else {
		canonicalRules.add(rule);
	    }
	}

	rules = canonicalRules;
    }

    // Verify some properties about the grammar:
    //        1. Start symbol defined.
    //        2. No unused nonterminals.
    //        3. All nonterminals reachable from the start symbol.
    private void verify()
    {
	if (start == null) {
	    error("No start symbol specified.");
	}

	// Map from nonterminals to sets of rules with that nonterm on the LHS. 
	Map rulesMap = new HashMap();

	Iterator it = rules.iterator();

	while (it.hasNext()) {
	    Rule rule = (Rule) it.next();

	    Set ruleSet = (Set) rulesMap.get(rule.getLHS());

	    if (ruleSet == null) {
		ruleSet = new HashSet();
		rulesMap.put(rule.getLHS(), ruleSet);
	    }

	    ruleSet.add(rule);
	}
	
	// Check that all rules are reachable.
	Set reachable = new HashSet();

	LinkedList worklist = new LinkedList();
	worklist.add(start);

	while (! worklist.isEmpty()) {
	    Nonterminal s = (Nonterminal) worklist.removeFirst();

	    reachable.add(s);

	    Set ruleSet = (Set) rulesMap.get(s);

	    if (ruleSet == null || ruleSet.isEmpty()) {
		error("No rules for nonterminal " + s);
	    }

	    Iterator sit = ruleSet.iterator();

	    while (sit.hasNext()) {
		Rule rule = (Rule) sit.next();

		Set rhsNonterms = collectNonterms(rule.getRHS());
		
		rhsNonterms.removeAll(reachable);
		worklist.addAll(rhsNonterms);
	    }
	}

	// Print unreachable nonterminals.
	Set unreachable = new HashSet(nonterms);
	unreachable.removeAll(reachable);

	if (! unreachable.isEmpty()) {
	    it = unreachable.iterator();

	    while (it.hasNext()) {
		Nonterminal s = (Nonterminal) it.next();
		System.out.println("unreachable nonterminal: " + s);
	    }

	    System.exit(1);
	}
    }

    // Collect all the nonterminals in a list of expressions.
    Set collectNonterms(List exprs)
    {
	Set nonterms = new HashSet();

	Iterator it = exprs.iterator();

	while (it.hasNext()) {
	    Expr e = (Expr) it.next();

	    if (e instanceof Nonterminal) {
		nonterms.add(e);
	    }
	    else if (e instanceof Star) {
		nonterms.addAll(collectNonterms(((Star) e).getContents()));
	    }
	    else if (e instanceof Plus) {
		nonterms.addAll(collectNonterms(((Plus) e).getContents()));
	    }
	    else if (e instanceof Question) {
		nonterms.addAll(collectNonterms(((Question) e).getContents()));
	    }
	}

	return nonterms;
    }

    private void error(String message)
    {
	System.out.println(message);
	System.exit(1);
    }
}
