package org.himinbi.parser;

import java.util.Hashtable;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.List;
import java.util.Collection;
import java.util.Collections;
import java.util.Vector;
import java.util.Stack;
import java.util.Map;

import java.io.IOException;

import org.apache.log4j.Logger;
import org.apache.log4j.Level;

import org.himinbi.tokenizer.Token;
import org.himinbi.tokenizer.TokenStream;

public class LLParser extends ParserBase
{
    /**
     * Actions represents the parse table.
     */
    Map actions;

    /**
     * Element tro begin the parse with.
     */
    Element startSymbol;

    Grammar grammar;

    /**
     * Log4j logging category
     */
    static Logger log = Logger.getLogger(LLParser.class);

    public LLParser()
    {
    }

    public void setGrammar(Grammar grammar)
    {
        actions = generateParseTable(grammar);
        this.grammar = grammar;
        startSymbol = grammar.getStartSymbol();
    }

    public Map getParseTable()
    {
        return actions;
    }

    /**
     * Generates an LL parse table with The keys are non-terminals and
     * each returns another map that is keyed with terminals and returns
     * a Rule.
     */
    public static Map generateParseTable(Grammar grammar)
    {
        /**
         * The heart of a LL tabular parser is the parse table which
         * tells which grammar rule to choose next based on the
         * current grammar rule and the next terminal in the stream.
         *
         * The parse table is generated by first creating two
         * collections of terminals:
         *  first(A) is the set of all terminals that can appear first
         *   in an expansion of the non-terminal A
         *  follow(A) is the set of all terminals that can appear
         *   following the non-terminal A. This does not include
         *   the empty terminal
         */
        
        /**
         * To properly find the first and follow sets it is necessary to
         * which terminals may expand to the empty terminal. This includes
         * not only nonterminals that expand to the empty nonterminal, 
         * but also nonterminals that expand to a list of nonterminals
         * that expand to the empty terminal.
         *
         * To find which nonterminals may be empty iterate over all
         * the rules. If a nonterminal expands to the empty terminal
         * add it to the list or if consists entiery on nonterminals
         * that expand to the empty terminal add it. Repeat the iterations
         * until an entire iteration is done without making a change.
         */
        Map firstSets = new HashMap();
        Map followSets = new HashMap();

        /**
         * It is known that all the elements of the first and follow
         * sets are terminals, so they are stored as strings for
         * easier access without ant loss of information.
         *
         * The first for any terminal is itself.
         */
        Iterator allTerminals = grammar.getTerminals().iterator();
        while(allTerminals.hasNext())
        {
            Element next = (Element)allTerminals.next();
            firstSets.put(next,
                          Collections.singleton(next.getValue()));
        }

        /**
         * Every nonterminal has a first and follow set though it
         * may be empty for some elements (frequently follows is
         * empty for the start symbol.)
         */
        Iterator allNonterminals = grammar.getNonterminals().iterator();
        while(allNonterminals.hasNext())
        {
            Element next = (Element)allNonterminals.next();
            firstSets.put(next, new HashSet());
            followSets.put(next, new HashSet());
        }

        Collection emptyElements = getEmptyElements(grammar);

        if(log.isDebugEnabled())
        {
            log.debug("Empties:");
            Iterator empties = emptyElements.iterator();
            while(empties.hasNext())
            {
                log.debug(" " + empties.next());
            }
        }

        /**
         * To generate the firsts again iterate over all the rules.
         * Go through the expansion until then end is reached or a
         * nonempty element is found. Add all of the firsts for
         * each elements passed should be added.
         */
        boolean changesMade;
        do
        {
            changesMade = false;
            Iterator rules = grammar.getRules().iterator();
            while(rules.hasNext())
            {
                Rule rule = (Rule)rules.next();
                Set firsts = (Set)firstSets.get(rule.getProduct());
                Iterator elements = rule.getExpansion().iterator();
                Element nextElement;
                do
                {
                    nextElement = (Element)elements.next();
                    if(nextElement != Element.EMPTY_TERMINAL)
                    {
                        // The empty terminal does not appear in any first set
                        changesMade = (changesMade ||
                                       firsts.addAll((Set)firstSets.get(nextElement)));
                    }
                } while(emptyElements.contains(nextElement) && elements.hasNext());
            }
        } while(changesMade);

        if(log.isDebugEnabled())
        {
            printSets("First Sets:", firstSets, Level.DEBUG);
        }

        /**
         * The follows are found similarly to the firsts.
         * Again all of the rules are iterated over.
         * If an element, A, is a non-terminal then:
         *  If the next element, B, is set then add the firsts
         *   for B to the follows for A.
         *  If there is no next element then add the follows
         *   for the non-terminal that this rule is an expansion
         *   of to the follows for A.
         *  If B can be empty then repeat the process for the
         *   element following B.
         * Continue doing this until all the rules are interated
         *  over without making any changes.
         */
        do
        {
            changesMade = false;
            Iterator rules = grammar.getRules().iterator();
            while(rules.hasNext())
            {
                Rule rule = (Rule)rules.next();
                List elements = rule.getExpansion();
                int currentIndex = elements.size() - 1;
                Set follows = new HashSet();
                
                while(currentIndex >= 0)
                {
                    /**
                     * The easiest way to handle this is to walk backward
                     * through the expansion. Start at the end of the
                     * list with the follows for the product of the rule.
                     * Step backward til the first element is reached.
                     * Each time a step backward is taken if the element
                     * that is being stepped over can be empty then leave
                     * the follows that are already present. Either way
                     * add the firsts for the element being stepped over.
                     */
                    Element currentElement =
                        (Element)elements.get(currentIndex);
                    // Terminals don't need to track follows
                    if(currentElement.getType() == Element.NONTERMINAL_TYPE)
                    {
                        if(currentIndex == elements.size() - 1)
                        {
                            // If we are at the end of a rule then the follows
                            //  for this element are the things that can follow
                            //  the nonterminal that this is an expansion of
                            follows.addAll((Set)followSets.get(rule.getProduct()));
                        }
                        else
                        {
                            // Otherwise add the first things that can be seen
                            //  in the expansion of what follows this element
                            follows.addAll((Set)firstSets.
                                           get(elements.get(currentIndex + 1)));
                        }

                        changesMade = (changesMade ||
                                       ((Set)followSets.get
                                        (currentElement)).addAll(follows));

                        // Whenever an element may be empty then all of the
                        //  current follows will also hold for the next element
                        if(!emptyElements.contains(currentElement))
                        {
                            follows.clear();
                        }
                    }
                    currentIndex--;
                }
            }
        } while(changesMade);

        if(log.isDebugEnabled())
        {
            printSets("Follow Sets:", followSets, Level.DEBUG);
        }
        
        /**
         * Using the first and follow sets it is possible to generate the
         *  parse table for this grammar.
         *
         * Recall:
         *  first(A) is the set of all terminals that can appear first in an
         *   expansion of the non-terminal A including the empty terminal
         *  follow(A) is the set of all terminals that can appear following an
         *   expansion of the non-terminal A
         *
         * Using these we can define:
         *  action(A, a) which is the rule to use to expand the non-terminal A
         *   given the next terminal to be processed, a
         *
         * This is done by iterating over all the rules. For each rule, R,
         *  which is an expansion of A:
         *  If R starts with a terminal, b, then action(A, b) = R
         *  If R starts with a non-terminal, B, then action(A, b) = R for each
         *   terminal, b in first(B). Additionally, if first(B) contains
         *   the empty terminal then then action(A, c) = R for each terminal,
         *   c in follow(B)
         */
        Iterator rules = grammar.getRules().iterator();
        Hashtable actions = new Hashtable();
        while(rules.hasNext())
        {
            Rule rule = (Rule)rules.next();
            Element product = (Element)rule.getProduct();
            Hashtable action = (Hashtable)actions.get(product);
            if(action == null)
            {
                action = new Hashtable();
                actions.put(product, action);
            }

            Element first = (Element)rule.getExpansion().get(0);
            Vector terminalSet = new Vector();
            if(first != Element.EMPTY_TERMINAL)
            {
                Set firsts = (Set)firstSets.get(first);
                terminalSet.addAll(firsts);
            }

            if(emptyElements.contains(rule))
            {
                Set follows = (Set)followSets.get(product);
                if(follows != null)
                {
                    terminalSet.addAll(follows);
                } else {
                    throw new ParseError("No follow for " + product);
                }
            }
            
            Iterator terminals = terminalSet.iterator();
            while(terminals.hasNext())
            {
                String terminal = (String)terminals.next();
                if(action.containsKey(terminal))
                {
                    throw new IllegalArgumentException
                        ("Error: Multiple start terminals found for: " +
                         product + " on " + terminal + " " +
                         "(" + action.get(terminal) + "/" + rule);
                }
                action.put(terminal, rule);
            }
        }            
        return actions;
    }

    public static Collection getEmptyElements(Grammar grammar)
    {
        Collection emptyElements = new HashSet();

        // All other empty elements arise from this.
        emptyElements.add(Element.EMPTY_TERMINAL);

        boolean changesMade;
        do
        {
            changesMade = false;
            Iterator rules = grammar.getRules().iterator();
            while(rules.hasNext())
            {
                Rule rule = (Rule)rules.next();
                Iterator expansion = rule.getExpansion().iterator();

                boolean empty = true;
                Element nextElement = null;
                while(empty && expansion.hasNext())
                {
                    nextElement = (Element)expansion.next();
                    empty = (empty &&
                             emptyElements.contains(nextElement));
                }
                
                if(empty && nextElement != null)
                {
                    changesMade = (changesMade ||
                                   emptyElements.add(rule) ||
                                   emptyElements.add(rule.getProduct()));
                }
            }
        } while(changesMade);
        return emptyElements;
    }

    /**
     * Checks if any grammar rules with the same product have the same initial elements
     * in their completion. If there are then it attempts to modify the grammar using
     * left factorization so as to make the grammar suitable for LL(1) processing.
     * 
     * Left factorization looks like this. For example for this grammar rule:
     *
     * A = ('b', B) | ('b', C);
     *
     * A is ambiguous for a LL(1) parse because it is not possible to decide what to
     * expand A to based on the next terminal, 'b'. This rule can be replaced by
     * another rule that will produce an equivalent language:
     *
     * A = 'b', (B | C);
     *
     */ 
    public static void leftFactor(Grammar grammar)
    {
    }

    protected static void printSets(String title, Map sets, Level level)
    {
        log.log(level, title);
        Iterator keys = sets.keySet().iterator();
        while(keys.hasNext())
        {
            StringBuffer set = new StringBuffer();
            Element key = (Element)keys.next();
            set.append(key + " -> ");
            Iterator terminals = ((Set)sets.get(key)).iterator();

            while(terminals.hasNext())
            {
                String element = (String)terminals.next();
                set.append("'" + element + "'");
                if(terminals.hasNext())
                {
                    set.append(", ");
                }
            }
            log.log(level, "  " + set.toString());
        }
    }


    public void parse(TokenStream tokens)
        throws ParseException, IOException
    {
        fireParseStarted();

        if(!tokens.hasNext())
        {
            log.warn("Parsed empty token stream");
        }
        else
        {
            Stack expansions = new Stack();
            Element currentProduct = grammar.getStartSymbol();

            if(currentProduct == null)
            {
                throw new NullPointerException("No start symbol");
            }
            
            Iterator expansion = null;
            Token nextToken = tokens.nextToken();

            boolean error = false;
            do
            {
                // Descent needed is marked by a null expansion
                if(expansion == null)
                {
                    Rule nextRule =
                        (Rule)((Map)actions.get(currentProduct)).
                        get(nextToken.getType());
                    if(nextRule == null)
                    {
                        throw new IOException("No rule for " +
                                              nextToken.getType() +
                                              " from " +
                                              currentProduct.getValue());
                    }
                    expansion = nextRule.getExpansion().iterator();
                    if(!currentProduct.isAnonymous())
                    {
                        fireRuleStarted(currentProduct.getValue());
                    }
                }

                Element nextElement = null;

                // Skip over any empty terminals other then a final one
                while(expansion.hasNext() &&
                      (nextElement = (Element)expansion.next())
                      == Element.EMPTY_TERMINAL);

                // If the next element is a terminal try to eat
                //  a token off the stream
                if(nextElement.getType() == Element.TERMINAL_TYPE)
                {
                    if(nextToken.getType()
                       .equals(nextElement.getValue()))
                    {
                        fireTerminal(nextToken.getType(),
                                     nextToken.getValue());
                        
                        nextToken =
                            tokens.hasNext() ? tokens.nextToken() : null;
                    }
                    // Some rules will end with an empty terminal
                    //  (often the whole expansion is an empty terminal)
                    // The parser needs to be allowed to bubble up a
                    //  level to match the next token
                    else if(nextElement != Element.EMPTY_TERMINAL)
                    {
                        ParseError parseError =
                            new ParseError("Found terminal: " + 
                                           nextToken.getType() + 
                                           " expected " +
                                           nextElement.getValue());
                        fireParseError(parseError);
                        error = true;
                    }
                }
                // If the next element is a nonterminal then expand it
                else if(nextElement.getType() == Element.NONTERMINAL_TYPE)
                {
                    expansions.push(new Object[] { expansion, currentProduct });
                    currentProduct = nextElement;
                    expansion = null;
                }
                else
                {
                    ParseError parseError =
                        new ParseError("Unknown element type: " +
                                       nextElement);
                    fireParseError(parseError);
                    error = true;
                }

                // Bubble back up after any finished expansions
                while(expansion != null && !expansion.hasNext())
                {
                    if(!currentProduct.isAnonymous())
                    {
                        fireRuleFinished(currentProduct.getValue());
                    }
                    Object[] pop = (Object[])expansions.pop();
                    expansion = (Iterator)pop[0];
                    currentProduct = (Element)pop[1];
                }
                // Next token goes to null when there are no more
            } while(nextToken != null && !error);

            // Check to see if all the expansions were used
            // If they weren't, see if they can be finished
            //  using empty elements to get to an end
            if(!error && expansions.size() > 0)
            {
                Collection emptyElements = LLParser.getEmptyElements(grammar);
                boolean done = false;
                do
                {
                    if(expansion == null && expansions.size() > 0)
                    {
                        Object[] pop = (Object[])expansions.pop();
                        expansion = (Iterator)pop[0];
                        currentProduct = (Element)pop[1];
                    }
                    
                    Element testElement = null;
                    while(expansion.hasNext() && testElement == null)
                    {
                        testElement = (Element)expansion.next();
                        if(emptyElements.contains(testElement))
                        {
                            testElement = null;
                        }
                    }

                    // Each empty element is nulled, so if the element
                    //  is set here then it was nonempty and is an error
                    if(testElement != null)
                    {
                        ParseError parseError = new ParseError
                            ("Incomplete parse: unable to fulfill element: " +
                             testElement);
                        fireParseError(parseError);
                        error = true;
                    }
                    // Otherwise the expansion was completed
                    else
                    {
                        if(!currentProduct.isAnonymous())
                        {
                            fireRuleFinished(currentProduct.getValue());
                        }
                        expansion = null;
                    }
                } while(expansions.size() > 0 && !error);
            }
        }
        fireParseFinished();
    }
}