DynGenPar
Dynamic Generalized Parser
dyngenpar.cpp
Go to the documentation of this file.
00001 /* DynGenPar: Dynamic Generalized Parser
00002    Copyright (C) 2010-2012 Kevin Kofler <kevin.kofler@chello.at>
00003 
00004    Support by the Austrian Science Fund FWF under contract numbers
00005    P20631 and P23554 is gratefully acknowledged.
00006 
00007    This program is free software: you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation, either version 2 of the License, or
00010    (at your option) any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License
00018    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
00019 
00428 #include "dyngenpar.h"
00429 
00431 uint qHash(const QList<DynGenPar::Cat> &list) {
00432   uint result = 0, i = 0;
00433   foreach (DynGenPar::CatArg cat, list)
00434     result += (++i) * qHash(cat);
00435   return result;
00436 }
00437 
00438 namespace DynGenPar {
00439 
00441 uint qHash(const NextTokenConstraints &nextTokenConstraints)
00442 {
00443   return (qHash(nextTokenConstraints.expect) << 4)
00444          + qHash(nextTokenConstraints.taboo);
00445 }
00446 
00450 
00453 static void parseTreeToPmcfgSyntaxTreeUnify(Node &dest, const Node &src)
00454 {
00455   if (src.cat != dest.cat || src.data != dest.data)
00456     qFatal("invalid parse tree: PMCFG unification failed (mismatched nodes)");
00457 
00458   // check for metavariables and do the trivial unification
00459   if (src.children.isEmpty()) return;
00460   if (dest.children.isEmpty()) {
00461     dest.children = src.children;
00462     return;
00463   }
00464 
00465   int s = dest.children.size();
00466   if (src.children.size() != s)
00467     qFatal("invalid parse tree: PMCFG unification failed (mismatched "
00468            "alternative counts)");
00469   for (int i=0; i<s; i++) {
00470     const Alternative &srcAlternative = src.children.at(i);
00471     Alternative &destAlternative = dest.children[i];
00472     int l = destAlternative.size();
00473     if (srcAlternative.size() != l
00474         || srcAlternative.label() != destAlternative.label())
00475       qFatal("invalid parse tree: PMCFG unification failed (mismatched "
00476              "alternatives)");
00477     for (int j=0; j<l; j++)
00478       parseTreeToPmcfgSyntaxTreeUnify(destAlternative[j], srcAlternative.at(j));
00479   }
00480 }
00481 
00483 static void parseTreeToPmcfgSyntaxTreeRecurse(const Node &parseTree,
00484                                               Node &syntaxTree)
00485 {
00486   /* first copy the data (most useful if a token was passed as an argument to a
00487      PMCFG function, an extension to standard PMCFGs allowed by this
00488      implementation) */
00489   syntaxTree.data = parseTree.data;
00490   bool isFirst = true;
00491   foreach(const Alternative &alternative, parseTree.children) {
00492     // create a new alternative if this is not the first one
00493     if (isFirst)
00494       isFirst = false;
00495     else
00496       syntaxTree.children.append(Alternative());
00497     Alternative &newAlternative = syntaxTree.children.last();
00498 
00499     QVariant label = alternative.label();
00500     if (label.canConvert<PmcfgComponentInfo>()) {
00501       PmcfgComponentInfo componentInfo = label.value<PmcfgComponentInfo>();
00502       newAlternative.setLabel(componentInfo.pmcfgRule.label());
00503       int numArgs = componentInfo.pmcfgRule.size();
00504       for (int i=0; i<numArgs; i++) {
00505         CatArg arg = componentInfo.pmcfgRule.at(i);
00506         Node newChild(arg);
00507         const QVector<int> &argPositions = componentInfo.argPositions.at(i);
00508         if (argPositions.isEmpty())
00509           newChild.children.clear(); // no alternative == metavariable
00510         else {
00511           parseTreeToPmcfgSyntaxTreeRecurse(
00512             alternative.at(argPositions.first()), newChild);
00513           int s = argPositions.size();
00514           for (int i=1; i<s; i++) {
00515             Node tempChild(arg);
00516             parseTreeToPmcfgSyntaxTreeRecurse(alternative.at(
00517               argPositions.at(i)), tempChild);
00518             parseTreeToPmcfgSyntaxTreeUnify(newChild, tempChild);
00519           }
00520         }
00521         newAlternative.append(newChild);
00522       }
00523     } else newAlternative = alternative; // non-PMCFG category, copy parse tree
00524   }
00525 }
00526 
00528 Node parseTreeToPmcfgSyntaxTree(const Node &parseTree)
00529 {
00530   Node syntaxTree(parseTree.cat);
00531   parseTreeToPmcfgSyntaxTreeRecurse(parseTree, syntaxTree);
00532   return syntaxTree;
00533 }
00534 
00536 
00539 bool Parser::isLiteral(const QList<Cat> &list) const
00540 {
00541   foreach (CatArg cat, list)
00542     if (!isToken(cat)) return false;
00543   return true;
00544 }
00545 
00548 
00552 Cat Parser::effectiveCat(CatArg cat) const
00553 {
00554   if (pseudoCats.contains(cat)) // convert pseudo-category
00555     return pseudoCats.value(cat).first;
00556   return cat; // not a pseudo-category, just return the original category
00557 }
00558 
00561 
00564 void Parser::initCaches(void)
00565 {
00566   // clear caches
00567   initialGraph.clear();
00568   neighborhoods.clear();
00569   nullable.clear();
00570   epsilonMatches.clear();
00571 
00572   // search for nullable categories
00573   /* First consider only rules which directly expand to epsilon, i.e. with empty
00574      right hand side. */
00575   QHashIterator<Cat, QList<Rule> > it(rules);
00576   while (it.hasNext()) {
00577     it.next();
00578     Cat cat = it.key();
00579     foreach (const Rule &rule, it.value()) {
00580       if (rule.isEmpty()) {
00581         nullable.insert(cat);
00582         break;
00583       }
00584     }
00585   }
00586   /* Now consider rules which expand to nullable categories, inductively until
00587      no new nullables are added. This works because we do not have to consider
00588      directly or indirectly recursive rules, because those can only expand to
00589      epsilon if the category can already expand to epsilon without that rule. */
00590   unsigned newNullables;
00591   do {
00592     newNullables = 0u;
00593     it = rules;
00594     while (it.hasNext()) {
00595       it.next();
00596       Cat cat = it.key();
00597       if (nullable.contains(cat)) continue; // already nullable
00598       foreach (const Rule &rule, it.value()) {
00599         foreach (CatArg cati, rule) {
00600           if (!nullable.contains(effectiveCat(cati))) goto next_rule;
00601         }
00602         nullable.insert(cat);
00603         newNullables++;
00604         break;
00605 next_rule:
00606         ;
00607       }
00608     }
00609   } while (newNullables);
00610 
00611   // now compute the initial graph
00612   it = rules;
00613   while (it.hasNext()) {
00614     it.next();
00615     Cat cat = it.key();
00616     // only keep the rule number if we need it, in order to allow unification
00617     bool keepRuleNumbers = componentCats.contains(cat);
00618     QList<Rule> ruleList = it.value();
00619     int n = ruleList.size();
00620     for (int i=0; i<n; i++) {
00621       /* For each nonempty rule (empty rules do not show up in the initial
00622          graph), we insert at least one edge from the first item in the rule
00623          to the left hand side, with 0 epsilons skipped. Then, as long as the
00624          first i categories are nullable and the (i+1)th category exists, we
00625          insert an edge with i epsilons skipped from the (i+1)th category to the
00626          left hand side. */
00627       const Rule &rule = ruleList.at(i);
00628       QListIterator<Cat> it(rule);
00629       int epsilonsSkipped=0;
00630       while (it.hasNext()) {
00631         Cat cati = effectiveCat(it.next());
00632         initialGraph.insert(cati, FullRule(cat, rule, epsilonsSkipped,
00633                                            keepRuleNumbers ? i : 0));
00634         if (!nullable.contains(cati)) break;
00635         epsilonsSkipped++;
00636       }
00637     }
00638   }
00639 }
00640 
00643 
00648 void Parser::processRule(CatArg cat, const Rule &rule, int skip, int ruleno,
00649                          QQueue<Cat> &nullableQueue, bool &clearEpsilonMatches)
00650 {
00651   /* Continue processing the rule where we stopped when building the original
00652      initial graph. (If the rule is new, start at position 0.) As long as the
00653      first i categories are nullable and the (i+1)th category exists, we insert
00654      an edge with i epsilons skipped from the (i+1)th category to the left hand
00655      side.*/
00656   QListIterator<Cat> i(rule);
00657   int epsilonsSkipped=skip;
00658   while (i.hasNext()) {
00659     Cat cati = effectiveCat(i.next());
00660     initialGraph.insert(cati, FullRule(cat, rule, epsilonsSkipped, ruleno));
00661     if (!nullable.contains(cati)) break;
00662     epsilonsSkipped++;
00663   }
00664   if (epsilonsSkipped == rule.size()) { // !i.hasNext() is not sufficient!
00665     /* We reached the end of the rule, so the left hand side is now nullable
00666        and we have a new way to get epsilon matches. */
00667     clearEpsilonMatches = true;
00668     if (!nullable.contains(cat) && !nullableQueue.contains(cat))
00669       nullableQueue.enqueue(cat);
00670   }
00671 }
00672 
00675 void Parser::addRule(CatArg cat, const Rule &rule)
00676 {
00677   // add the rule to the grammar
00678   QList<Rule> &ruleList = rules[cat];
00679   // only keep the rule number if we need it, in order to allow unification
00680   int ruleno = componentCats.contains(cat) ? ruleList.size() : 0;
00681   ruleList << rule;
00682 
00683   // update initialGraph and nullable
00684   QQueue<Cat> nullableQueue; // new nullable categories
00685   bool clearEpsilonMatches = false;
00686   /* first process the rule we just added:
00687      Add the new edges, if any, to the initial graph, and mark the category as
00688      nullable if the rule can match the empty string. */
00689   processRule(cat, rule, 0, ruleno, nullableQueue, clearEpsilonMatches);
00690   // now handle categories which have become nullable
00691   while (!nullableQueue.isEmpty()) {
00692     Cat newNullable = nullableQueue.dequeue();
00693     // mark the category as nullable
00694     nullable.insert(newNullable);
00695     /* now update the initial graph:
00696        We need to go through all the edges going out of this category in the
00697        initial graph and process the rule starting from where we stopped when
00698        building the initial graph (due to the non-nullable category). If we
00699        reach the end of the rule, the left hand side has become nullable as
00700        well, and is added to the queue if it wasn't already nullable. */
00701     foreach (const FullRule &rule, initialGraph.values(newNullable))
00702       processRule(rule.cat, rule.rule, rule.epsilonsSkipped+1, rule.ruleno,
00703                   nullableQueue, clearEpsilonMatches);
00704   }
00705   if (clearEpsilonMatches) epsilonMatches.clear();
00706 
00707   /* FIXME: Can we update the cache of neighborhoods instead of clearing it?
00708             And if we can, is it worth it? For now, just clear the cache. */
00709   neighborhoods.clear();
00710 }
00711 
00713 
00714 bool Parser::computePmcfgDimension(CatArg cat, const Rule &rule,
00715                                    const Pmcfg &pmcfg)
00716 {
00717   // look the function up
00718   Function function = pmcfg.lookupFunction(rule.label());
00719   /* check if the dimension is consistent with existing rules for the category,
00720      if any; generate a list of components if we don't have any yet */
00721   int dim = function.size();
00722   if (!dim) {
00723     qWarning("invalid PMCFG function");
00724     return false; // invalid function
00725   }
00726 
00727   if (dim == 1) {
00728     if (catComponents.contains(cat)) {
00729       qWarning("dimension mismatch: 1D function for multidimensional category");
00730       return false; // dimension mismatch
00731     }
00732   } else {
00733     if (rules.contains(cat)) {
00734       qWarning("dimension mismatch: multidimensional function for 1D category");
00735       return false; // dimension mismatch
00736     }
00737     if (catComponents.contains(cat)) {
00738       if (catComponents.value(cat).size() != dim) {
00739         qWarning("dimension mismatch: %dD function for %dD category", dim,
00740                  catComponents.value(cat).size());
00741         return false; // dimension mismatch
00742       }
00743     } else { // not known yet, we need to create a mapping
00744       QList<Cat> components;
00745       for (int i=0; i<dim; i++) {
00746         // generate an internal category for the component
00747 #ifdef DYNGENPAR_INTEGER_CATEGORIES
00748         Cat component = generateCat();
00749 #else
00750         Cat component = QString("%1[%2]").arg(cat).arg(i);
00751 #endif
00752         components.append(component);
00753         componentCats.insert(component, qMakePair(cat, i));
00754       }
00755       catComponents.insert(cat, components);
00756     }
00757   }
00758   return true;
00759 }
00760 
00762 
00763 bool Parser::convertPmcfgRule(CatArg cat, const Rule &rule, const Pmcfg &pmcfg,
00764                               bool updateCaches)
00765 {
00766   // look the function up
00767   Function function = pmcfg.lookupFunction(rule.label());
00768   /* get the list of components to use (assume that the dimensions have been
00769      already validated by computePmcfgDimension) */
00770   int dim = function.size();
00771   if (!dim) {
00772     qWarning("invalid PMCFG function");
00773     return false; // invalid function
00774   }
00775 
00776   QList<Cat> components;
00777   if (dim == 1)
00778     components.append(cat);
00779   else
00780     components = catComponents.value(cat);
00781 
00782   // determine the number of used arguments of the function
00783   int numArgs = 0;
00784   foreach(const Sequence &sequence, function) {
00785     foreach(const Term &term, sequence) {
00786       if (term.isComponent() && term.arg >= numArgs)
00787         numArgs = term.arg + 1;
00788     }
00789   }
00790 
00791   // check that enough arguments were passed to the function
00792   if (rule.size() < numArgs) {
00793     qWarning("not enough arguments for PMCFG function");
00794     return false;
00795   }
00796 
00797   // find the components of the arguments passed
00798   QVector<QList<Cat> > argComponents(numArgs);
00799   for (int i=0; i<numArgs; i++) {
00800     CatArg arg = rule.at(i);
00801     if (catComponents.contains(arg))
00802       argComponents[i] = catComponents.value(arg);
00803     else if (pmcfg.rules.contains(arg) || pmcfg.cfRules.contains(arg)
00804              || pmcfg.tokens.contains(arg))
00805       argComponents[i].append(arg);
00806     else
00807       /* unused category, ignore the unreachable rule
00808          We do not know the correct dimension for the unused category. */
00809       return true;
00810   }
00811 
00812   // verify the dimensions of the arguments passed
00813   foreach(const Sequence &sequence, function) {
00814     foreach(const Term &term, sequence) {
00815       if (term.isComponent()
00816           && term.component >= argComponents.at(term.arg).size()) {
00817         qWarning("dimension mismatch: attempt to use component %d of a %dD "
00818                  "category", term.component, argComponents.at(term.arg).size());
00819         return false; // dimension mismatch
00820       }
00821     }
00822   }
00823 
00824   // determine the number of times each argument is used
00825   QVector<int> usageCounts(numArgs);
00826   foreach(const Sequence &sequence, function) {
00827     foreach(const Term &term, sequence) {
00828       if (term.isComponent())
00829         usageCounts[term.arg]++;
00830     }
00831   }
00832 
00833   // generate pseudo-categories
00834   QVector<QHash<int, Cat> > newPseudoCats(numArgs);
00835   foreach(const Sequence &sequence, function) {
00836     foreach(const Term &term, sequence) {
00837       if (term.isComponent() && usageCounts.at(term.arg) > 1) {
00838         if (!newPseudoCats[term.arg].contains(term.component)) {
00839 #ifdef DYNGENPAR_INTEGER_CATEGORIES
00840           Cat pseudoCat = generateCat();
00841 #else
00842           Cat pseudoCat = QString("Pseudo%1").arg(generateCat());
00843 #endif
00844           newPseudoCats[term.arg].insert(term.component, pseudoCat);
00845         }
00846       }
00847     }
00848   }
00849 
00850   // record pseudo-categories
00851   for (int i=0; i<numArgs; i++) {
00852     const QHash<int, Cat> &pseudoCatHash = newPseudoCats.at(i);
00853     if (!pseudoCatHash.isEmpty()) {
00854       QList<Cat> pseudoCatList = pseudoCatHash.values();
00855       QHashIterator<int, Cat> it(pseudoCatHash);
00856       while (it.hasNext()) {
00857         it.next();
00858         Cat pseudoCat = it.value();
00859         pseudoCats.insert(pseudoCat, qMakePair(argComponents.at(i).at(it.key()),
00860                                                pseudoCatList));
00861       }
00862     }
00863   }
00864 
00865   // now finish evaluating the function and record the converted rule
00866   for (int i=0; i<dim; i++) {
00867     const Sequence &sequence = function.at(i);
00868     PmcfgComponentInfo componentInfo(rule);
00869     int s = sequence.size();
00870     for (int j=0; j<s; j++) {
00871       const Term &term = sequence.at(j);
00872       if (term.isComponent())
00873         componentInfo.argPositions[term.arg].append(j);
00874     }
00875     Rule internalRule(QVariant::fromValue(componentInfo));
00876     // copy next token constraints from the PMCFG sequence
00877     internalRule.nextTokenConstraints = sequence.nextTokenConstraints;
00878     /* Also try to use next token constraints from the PMCFG rule. That's not a
00879        good place to write them because the constraints will affect all
00880        components of a multi-dimensional rule, which is probably not what you
00881        want. But at least for 1D rules, it makes sense, and it costs us almost
00882        nothing to support this. (Appending an empty list is trivial.) */
00883     internalRule.nextTokenConstraints.expect.append(
00884       rule.nextTokenConstraints.expect);
00885     internalRule.nextTokenConstraints.taboo.append(
00886       rule.nextTokenConstraints.taboo);
00887     foreach (const Term &term, sequence)
00888       internalRule.append(term.isToken() ? term.token
00889                           : newPseudoCats.at(term.arg).isEmpty()
00890                             ? argComponents.at(term.arg).at(term.component)
00891                             : newPseudoCats.at(term.arg).value(term.component));
00892 
00893     CatArg catComponent = components.at(i);
00894     if (updateCaches)
00895       addRule(catComponent, internalRule);
00896     else
00897       rules[catComponent].append(internalRule);
00898   }
00899 
00900   return true;
00901 }
00902 
00904 
00913 bool Parser::loadPmcfg(const Pmcfg &pmcfg)
00914 {
00915   // copy tokens, startCat and cfRules
00916   tokens = pmcfg.tokens;
00917   startCat = pmcfg.startCat;
00918   rules = pmcfg.cfRules;
00919 
00920   // convert functions and PMCFG rules to rules and pseudo-categories
00921   pseudoCats.clear();
00922   componentCats.clear();
00923   catComponents.clear();
00924   {
00925     QHashIterator<Cat, QList<Rule> > it(pmcfg.rules);
00926     while (it.hasNext()) {
00927       Cat cat = it.next().key();
00928       foreach (const Rule &rule, it.value())
00929         if (!computePmcfgDimension(cat, rule, pmcfg)) return false;
00930     }
00931   }
00932   {
00933     QHashIterator<Cat, QList<Rule> > it(pmcfg.rules);
00934     while (it.hasNext()) {
00935       Cat cat = it.next().key();
00936       foreach (const Rule &rule, it.value())
00937         if (!convertPmcfgRule(cat, rule, pmcfg, false)) return false;
00938     }
00939   }
00940 
00941   // initialize the initial graph and the caches
00942   initCaches();
00943   return true;
00944 }
00945 
00949 
00955 bool Parser::addPmcfgRule(Pmcfg &pmcfg, CatArg cat, const Rule &rule)
00956 {
00957   if (!computePmcfgDimension(cat, rule, pmcfg)
00958       || !convertPmcfgRule(cat, rule, pmcfg, true)) return false;
00959   pmcfg.rules[cat].append(rule);
00960   return true;
00961 }
00962 
00964 
00969 bool Parser::reachable(CatArg cat, CatArg target, QSet<Cat> mark)
00970 {
00971   if (cat == target) return true;
00972   mark.insert(cat);
00973   foreach (const FullRule &rule, initialGraph.values(cat)) {
00974     if (mark.contains(rule.cat)) continue;
00975     if (reachable(rule.cat, target, mark)) return true;
00976   }
00977   return false;
00978 }
00979 
00981 
00985 QList<FullRule> Parser::neighborhood(CatArg cat, CatArg target)
00986 {
00987   // use cached value if we have it
00988   QPair<Cat, Cat> key(cat, target);
00989   if (neighborhoods.contains(key)) return neighborhoods.value(key);
00990 
00991   QList<FullRule> result;
00992   foreach (const FullRule &rule, initialGraph.values(cat)) {
00993     if (reachable(rule.cat, target, QSet<Cat>())) result.append(rule);
00994   }
00995   // cache the result for future reuse
00996   neighborhoods.insert(key, result);
00997   return result;
00998 }
00999 
01001 void Parser::finalizeMatches(QList<Match> &matches, CatArg cat,
01002                              const PseudoCatScope &scope)
01003 {
01004   int s = matches.size();
01005   for (int i=0; i<s; i++) {
01006     Match &m = matches[i];
01007     PseudoCatScope childScope = m.scope;
01008     m.scope = scope;
01009     m.scope.pConstraints().insert(cat,
01010       qMakePair(qMakePair(m.tree, m.nextTokenConstraints), 0));
01011     foreach (CatArg cati, pseudoCats.value(cat).second)
01012       m.scope.mcfgConstraints().insert(cati, qMakePair(m.ruleno, childScope));
01013     m.ruleno = 0; // We don't need ruleno anymore.
01014   }
01015 }
01016 
01018 void Parser::copyScope(QList<Match> &matches, const PseudoCatScope &scope)
01019 {
01020   if (scope.isNull()) return; // don't bother copying a null scope
01021   int s = matches.size();
01022   for (int i=0; i<s; i++)
01023     matches[i].scope = scope;
01024 }
01025 
01027 QList<Match> Parser::matchCFToEpsilon(CatArg cat, QSet<Cat> mark)
01028 {
01029   QList<Match> result;
01030   bool haveNextTokenConstraints = false;
01031 
01032   mark.insert(cat);
01033   bool isFirst = true;
01034   foreach (const Rule &rule, rules.value(cat)) {
01035     // only consider nullable rules with only unmarked categories
01036     foreach (CatArg cati, rule) {
01037       if (!nullable.contains(cati) || mark.contains(cati)) goto next_rule;
01038     }
01039     {
01040       QList<Match> currentMatches;
01041       Node node(cat);
01042       node.children.first().setLabel(rule.label());
01043       currentMatches.append(Match(0, node, 0, PseudoCatScope()));
01044       if (!rule.nextTokenConstraints.expect.isEmpty()) {
01045         haveNextTokenConstraints = true;
01046         currentMatches.first().nextTokenConstraints.expect =
01047           rule.nextTokenConstraints.expect;
01048       }
01049       if (!rule.nextTokenConstraints.taboo.isEmpty()) {
01050         haveNextTokenConstraints = true;
01051         currentMatches.first().nextTokenConstraints.taboo =
01052           rule.nextTokenConstraints.taboo;
01053       }
01054       foreach (CatArg cati, rule) {
01055         int s = currentMatches.size();
01056         for (int i=0; i<s; ) {
01057           Match &m = currentMatches[i];
01058           const QList<Match> componentMatches 
01059             = matchToEpsilonRecurse(cati, mark, m.scope);
01060           if (componentMatches.isEmpty()) {
01061             if (i) currentMatches.swap(0, i);
01062             currentMatches.removeFirst();
01063             s--;
01064           } else {
01065             int cs = componentMatches.size();
01066             for (int j=1; j<cs; j++) {
01067               const Match &cm = componentMatches.at(j);
01068               Node newTree = m.tree;
01069               newTree.children.first().append(cm.tree);
01070               currentMatches.append(Match(m.len + cm.len, newTree, 0, cm.scope,
01071                                           m.nextTokenConstraints));
01072               if (!cm.nextTokenConstraints.expect.isEmpty()) {
01073                 haveNextTokenConstraints = true;
01074                 currentMatches.last().nextTokenConstraints.expect.append(
01075                   cm.nextTokenConstraints.expect);
01076               }
01077               if (!cm.nextTokenConstraints.taboo.isEmpty()) {
01078                 haveNextTokenConstraints = true;
01079                 currentMatches.last().nextTokenConstraints.taboo.append(
01080                   cm.nextTokenConstraints.taboo);
01081               }
01082             }
01083             const Match &cm = componentMatches.first();
01084             m.len += cm.len;
01085             m.tree.children.first().append(cm.tree);
01086             m.scope = cm.scope;
01087             if (!cm.nextTokenConstraints.expect.isEmpty()) {
01088               haveNextTokenConstraints = true;
01089               m.nextTokenConstraints.expect.append(
01090                 cm.nextTokenConstraints.expect);
01091             }
01092             if (!cm.nextTokenConstraints.taboo.isEmpty()) {
01093               haveNextTokenConstraints = true;
01094               m.nextTokenConstraints.taboo.append(
01095                 cm.nextTokenConstraints.taboo);
01096             }
01097             i++;
01098           }
01099         }
01100       }
01101       if (currentMatches.isEmpty()) goto next_rule;
01102       if (haveNextTokenConstraints) {
01103         /* We cannot just unify all the matches to one in this case. We will run
01104            the full unification process at the end. But clear the scopes. */
01105         int s = currentMatches.size();
01106         for (int i=0; i<s; i++) currentMatches[i].scope = PseudoCatScope();
01107         result.append(currentMatches);
01108         goto next_rule;
01109       }
01110       /* unify the matches:
01111          * if this is the first matching rule, make the first match the current
01112            result
01113          * otherwise, append the new alternative(s) from the first match to the
01114            current result
01115          * if there is more than one match, also append the alternative(s) from
01116            the additional matches */
01117       int i=0, s=currentMatches.size();
01118       if (isFirst) {
01119         isFirst = false;
01120         result.append(currentMatches.first());
01121         result.first().scope = PseudoCatScope(); // clear scope
01122         i++;
01123       }
01124       Match &firstResult = result.first();
01125       for (; i<s; i++)
01126         firstResult.tree.children.append(currentMatches.at(i).tree.children);
01127     }
01128 next_rule:
01129     ;
01130   }
01131   /* unify the result now (as much as possible) if we have next token
01132      constraints, otherwise it is already unified to a single match */
01133   if (haveNextTokenConstraints) unify(result);
01134   return result;
01135 }
01136 
01139 QList<Match> Parser::matchEffectiveCatToEpsilon(CatArg cat, QSet<Cat> mark)
01140 {
01141   QList<Match> result;
01142 
01143   /* if the effective category is 1-dimensional, don't bother about rule numbers
01144      and child scopes - they are not needed for P constraints (the only ones we
01145      can have on pseudo-categories for 1-dimensional effective categories) */
01146   if (!componentCats.contains(cat))
01147     return matchCFToEpsilon(cat, mark);
01148 
01149   mark.insert(cat);
01150   QList<Rule> ruleList = rules.value(cat);
01151   int s = ruleList.size();
01152   for (int i=0; i<s; i++) {
01153     const Rule &rule = ruleList.at(i);
01154     // only consider nullable rules with only unmarked categories
01155     foreach (CatArg cati, rule) {
01156       if (!nullable.contains(cati) || mark.contains(cati)) goto next_rule;
01157     }
01158     {
01159       QList<Match> currentMatches;
01160       Node node(cat);
01161       node.children.first().setLabel(rule.label());
01162       currentMatches.append(Match(0, node, i, PseudoCatScope(),
01163                                   rule.nextTokenConstraints));
01164       foreach (CatArg cati, rule) {
01165         int s = currentMatches.size();
01166         for (int i=0; i<s; ) {
01167           Match &m = currentMatches[i];
01168           const QList<Match> componentMatches
01169             = matchToEpsilonRecurse(cati, mark, m.scope);
01170           if (componentMatches.isEmpty()) {
01171             if (i) currentMatches.swap(0, i);
01172             currentMatches.removeFirst();
01173             s--;
01174           } else {
01175             int cs = componentMatches.size();
01176             for (int j=1; j<cs; j++) {
01177               const Match &cm = componentMatches.at(j);
01178               Node newTree = m.tree;
01179               newTree.children.first().append(cm.tree);
01180               currentMatches.append(Match(m.len + cm.len, newTree, m.ruleno,
01181                                           cm.scope, m.nextTokenConstraints));
01182               currentMatches.last().nextTokenConstraints.expect.append(
01183                 cm.nextTokenConstraints.expect);
01184               currentMatches.last().nextTokenConstraints.taboo.append(
01185                 cm.nextTokenConstraints.taboo);
01186             }
01187             const Match &cm = componentMatches.first();
01188             m.len += cm.len;
01189             m.tree.children.first().append(cm.tree);
01190             m.scope = cm.scope;
01191             m.nextTokenConstraints.expect.append(
01192               cm.nextTokenConstraints.expect);
01193             m.nextTokenConstraints.taboo.append(cm.nextTokenConstraints.taboo);
01194             i++;
01195           }
01196         }
01197       }
01198       // in this case, we cannot unify, so just append the matches to the result
01199       result.append(currentMatches);
01200     }
01201 next_rule:
01202     ;
01203   }
01204   return result;
01205 }
01206 
01208 QList<Match> Parser::matchToEpsilonRecurse(CatArg cat, QSet<Cat> mark,
01209                                            const PseudoCatScope &scope)
01210 {
01211   QList<Match> result;
01212 
01213   if (scope.hasPConstraint(cat)) {
01214     // handle P constraint
01215     QPair<QPair<Node, NextTokenConstraints>, int> pConstraint
01216       = scope.pConstraint(cat);
01217     if (!pConstraint.second) // only matches if length == 0
01218       result.append(Match(0, pConstraint.first.first, 0, scope,
01219                           pConstraint.first.second));
01220     return result;
01221   }
01222 
01223   // if this is a pseudo-category, it is not known yet, so get the effective
01224   // category (if this is not a pseudo-category, effCat is cat itself)
01225   Cat effCat = effectiveCat(cat);
01226 
01227   if (effCat != cat) { // cat is a pseudo-category
01228     if (mark.contains(effCat)) return result; // prevent infinite recursion
01229     mark.insert(cat);
01230 
01231     if (scope.hasMcfgConstraint(cat)) {
01232       // handle MCFG constraint
01233       QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat);
01234       const Rule &rule = rules.value(effCat).at(mcfgConstraint.first);
01235       /* check if we can (possibly - there could be other PMCFG constraints
01236          preventing it) produce epsilon with this rule, and make sure the
01237          categories in the rule are not marked to prevent infinite recursion */
01238       foreach (CatArg cati, rule) {
01239         if (!nullable.contains(cati) || mark.contains(cati)) return result;
01240       }
01241       Node node(effCat);
01242       node.children.first().setLabel(rule.label());
01243       result.append(Match(0, node, mcfgConstraint.first, scope,
01244                           rule.nextTokenConstraints));
01245       foreach (CatArg cati, rule) {
01246         int s = result.size();
01247         for (int i=0; i<s; ) {
01248           Match &m = result[i];
01249           const QList<Match> componentMatches
01250             = matchToEpsilonRecurse(cati, mark, m.scope);
01251           if (componentMatches.isEmpty()) {
01252             if (i) result.swap(0, i);
01253             result.removeFirst();
01254             s--;
01255           } else {
01256             int cs = componentMatches.size();
01257             for (int j=1; j<cs; j++) {
01258               const Match &cm = componentMatches.at(j);
01259               Node newTree = m.tree;
01260               newTree.children.first().append(cm.tree);
01261               result.append(Match(m.len + cm.len, newTree, m.ruleno,
01262                                   cm.scope, m.nextTokenConstraints));
01263               result.last().nextTokenConstraints.expect.append(
01264                 cm.nextTokenConstraints.expect);
01265               result.last().nextTokenConstraints.taboo.append(
01266                 cm.nextTokenConstraints.taboo);
01267             }
01268             const Match &cm = componentMatches.first();
01269             m.len += cm.len;
01270             m.tree.children.first().append(cm.tree);
01271             m.scope = cm.scope;
01272             m.nextTokenConstraints.expect.append(
01273               cm.nextTokenConstraints.expect);
01274             m.nextTokenConstraints.taboo.append(cm.nextTokenConstraints.taboo);
01275             i++;
01276           }
01277         }
01278       }
01279     } else // no PMCFG constraints on this pseudo-category
01280       result = matchEffectiveCatToEpsilon(effCat, mark);
01281 
01282     finalizeMatches(result, cat, scope);
01283   } else { // cat is a true category
01284     result = matchCFToEpsilon(cat, mark);
01285     copyScope(result, scope);
01286   }
01287 
01288   return result;
01289 }
01290 
01292 
01297 QList<Match> Parser::matchToEpsilon(CatArg cat, const PseudoCatScope &scope)
01298 {
01299   QList<Match> result;
01300 
01301   if (scope.hasPConstraint(cat)) {
01302     // handle P constraint
01303     QPair<QPair<Node, NextTokenConstraints>, int> pConstraint
01304       = scope.pConstraint(cat);
01305     if (!pConstraint.second) // only matches if length == 0
01306       result.append(Match(0, pConstraint.first.first, 0, scope,
01307                           pConstraint.first.second));
01308     return result;
01309   }
01310 
01311   // if this is a pseudo-category, it is not known yet, so get the effective
01312   // category (if this is not a pseudo-category, effCat is cat itself)
01313   Cat effCat = effectiveCat(cat);
01314 
01315   if (effCat != cat) { // cat is a pseudo-category
01316     if (scope.hasMcfgConstraint(cat)) {
01317       // handle MCFG constraint
01318       QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat);
01319       const Rule &rule = rules.value(effCat).at(mcfgConstraint.first);
01320       /* check if we can (possibly - there could be other PMCFG constraints
01321          preventing it) produce epsilon with this rule */
01322       foreach (CatArg cati, rule) {
01323         if (!nullable.contains(cati)) return result;
01324       }
01325       Node node(effCat);
01326       node.children.first().setLabel(rule.label());
01327       result.append(Match(0, node, mcfgConstraint.first, scope,
01328                           rule.nextTokenConstraints));
01329       foreach (CatArg cati, rule) {
01330         int s = result.size();
01331         for (int i=0; i<s; ) {
01332           Match &m = result[i];
01333           const QList<Match> componentMatches = matchToEpsilon(cati, m.scope);
01334           if (componentMatches.isEmpty()) {
01335             if (i) result.swap(0, i);
01336             result.removeFirst();
01337             s--;
01338           } else {
01339             int cs = componentMatches.size();
01340             for (int j=1; j<cs; j++) {
01341               const Match &cm = componentMatches.at(j);
01342               Node newTree = m.tree;
01343               newTree.children.first().append(cm.tree);
01344               result.append(Match(m.len + cm.len, newTree, m.ruleno,
01345                                   cm.scope, m.nextTokenConstraints));
01346               result.last().nextTokenConstraints.expect.append(
01347                 cm.nextTokenConstraints.expect);
01348               result.last().nextTokenConstraints.taboo.append(
01349                 cm.nextTokenConstraints.taboo);
01350             }
01351             const Match &cm = componentMatches.first();
01352             m.len += cm.len;
01353             m.tree.children.first().append(cm.tree);
01354             m.scope = cm.scope;
01355             m.nextTokenConstraints.expect.append(
01356               cm.nextTokenConstraints.expect);
01357             m.nextTokenConstraints.taboo.append(cm.nextTokenConstraints.taboo);
01358             i++;
01359           }
01360         }
01361       }
01362     } else { // no PMCFG constraints on this pseudo-category
01363       // We cannot cache the result here because the scopes can change.
01364       /* FIXME: Well, we could cache the result with a dummy scope and then
01365                 compute the union. Is that worth it? */
01366       result = matchEffectiveCatToEpsilon(effCat, QSet<Cat>());
01367     }
01368 
01369     finalizeMatches(result, cat, scope);
01370   } else { // cat is a true category
01371     // memoize the matches for efficiency
01372     // FIXME: this will probably need to get smarter (or disabled) for
01373     //        context-sensitive constraints
01374     if (epsilonMatches.contains(cat))
01375       result = epsilonMatches.value(cat);
01376     else {
01377       result = matchCFToEpsilon(cat, QSet<Cat>());
01378       epsilonMatches.insert(cat, result);
01379     }
01380     copyScope(result, scope);
01381   }
01382 
01383   return result;
01384 }
01385 
01387 bool Parser::matchesTokenRecurse(CatArg cat, CatArg token, QSet<Cat> mark) const
01388 {
01389   // Handle the trivial case first.
01390   if (isToken(cat)) return cat == token;
01391 
01392   // Now cat is a nonterminal.
01393   mark.insert(cat);
01394   foreach (const Rule &rule, rules.value(cat)) {
01395     // an epsilon rule cannot match a token
01396     if (rule.isEmpty()) continue;
01397     // for each element of the rule, try matching that element to the token and
01398     // the rest to epsilon
01399     int l = rule.size();
01400     for (int i=0; i<l; i++) {
01401       CatArg cati = rule.at(i);
01402       /* Left recursion cannot match a token if it doesn't match without the
01403          recursion because tokens are atomic. */
01404       if (mark.contains(cati)) continue; // ignore left recursion
01405       for (int j=0; j<l; j++) {
01406         if (i == j) continue;
01407         if (!nullable.contains(rule.at(j))) goto skip_this_i; // match epsilon
01408       }
01409       if (matchesTokenRecurse(cati, token, mark)) return true; // match token
01410 skip_this_i: ;
01411     }
01412   }
01413 
01414   // None of the rules matched.
01415   return false;
01416 }
01417 
01419 
01428 bool Parser::matchesToken(CatArg cat, CatArg token) const
01429 {
01430   return matchesTokenRecurse(cat, token, QSet<Cat>());
01431 }
01432 
01434 
01441 void Parser::collectLeaves(const Node &tree, QList<Node> &leaves)
01442 {
01443   if (isToken(tree.cat))
01444     leaves.append(tree);
01445   else {
01446     // consider only the first alternative - they must all match the same tokens
01447     foreach (const Node &node, tree.children.first())
01448       collectLeaves(node, leaves);
01449   }
01450 }
01451 
01453 bool Parser::validateNextTokenConstraints(CatArg token,
01454   const NextTokenConstraints &nextTokenConstraints) const
01455 {
01456   foreach(CatArg expect, nextTokenConstraints.expect)
01457     if (!matchesToken(expect, token)) return false;
01458   foreach(CatArg taboo, nextTokenConstraints.taboo)
01459     if (matchesToken(taboo, token)) return false;
01460   return true;
01461 }
01462 
01464 
01470 QList<Match> Parser::match(CatArg cat, int pos, const PseudoCatScope &scope,
01471                            const StackItem &stack,
01472                            const NextTokenConstraints &nextTokenConstraints)
01473 {
01474   QList<Match> result;
01475 
01476   if (scope.hasPConstraint(cat)) {
01477     // handle P constraint
01478     QPair<QPair<Node, NextTokenConstraints>, int> pConstraint
01479       = scope.pConstraint(cat);
01480     if (pConstraint.second) { // length != 0
01481       QList<Node> leaves;
01482       collectLeaves(pConstraint.first.first, leaves);
01483       /* check the next token constraints right now, it's no use keeping the
01484          type 6 item just to have the shift throw it away, and this way we don't
01485          have to keep 2 sets of next token constraints in the type 6 item (one
01486          for the first token and one for the next token after the last) */
01487       if (validateNextTokenConstraints(leaves.first().cat,
01488                                        nextTokenConstraints)) {
01489         StackItem type6(stack, leaves, 0, pConstraint.first.first, scope,
01490                         pConstraint.first.second);
01491         nextStacks.append(type6);
01492       }
01493     } else { // length == 0
01494       NextTokenConstraints newNextTokenConstraints = nextTokenConstraints;
01495       newNextTokenConstraints.expect.append(pConstraint.first.second.expect);
01496       newNextTokenConstraints.taboo.append(pConstraint.first.second.taboo);
01497       result.append(Match(0, pConstraint.first.first, 0, scope,
01498                           newNextTokenConstraints));
01499     }
01500     return result;
01501   }
01502 
01503   // if this is a pseudo-category, get the effective category (if this is not a
01504   // pseudo-category, effCat is cat itself)
01505   Cat effCat = effectiveCat(cat);
01506 
01507   if (scope.hasMcfgConstraint(cat)) {
01508     // handle MCFG constraint
01509     QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat);
01510     const Rule &rule = rules.value(effCat).at(mcfgConstraint.first);
01511 
01512     // perform a top-down expansion of rule
01513     {
01514       StackItem type5(stack, cat, scope);
01515       Node node(effCat);
01516       node.children.first().setLabel(rule.label());
01517       result = matchRemaining(rule, 0, pos, 0, node, mcfgConstraint.second,
01518                               mcfgConstraint.first, type5,
01519                               nextTokenConstraints);
01520     }
01521 
01522     finalizeMatches(result, cat, scope);
01523   } else { // no PMCFG constraints on this category or pseudo-category
01524     // try epsilon matches first
01525     if (nullable.contains(effCat)) {
01526       result = matchToEpsilon(cat, scope);
01527       if (!nextTokenConstraints.expect.isEmpty()
01528           || !nextTokenConstraints.taboo.isEmpty()) {
01529         int s = result.size();
01530         for (int i=0; i<s; i++) {
01531           NextTokenConstraints &newNextTokenConstraints
01532             = result[i].nextTokenConstraints;
01533           newNextTokenConstraints.expect.append(nextTokenConstraints.expect);
01534           newNextTokenConstraints.taboo.append(nextTokenConstraints.taboo);
01535         }
01536       }
01537     }
01538 
01539     // now we want a nonempty match
01540     // prepare the parse stack for the next shift
01541     /* We do not store the next token constraints in the type 0 item because
01542        the type 3 parent already carries them, and they can be different for the
01543        different type 3 parents in the DAG-structured stack. */
01544     if (stack.type() < 0) { // toplevel match
01545       StackItem type0(QList<StackItem>(), cat, effCat, pos, scope);
01546       nextStacks.append(type0);
01547     } else { // subordinate match, i.e. nonempty stack
01548       // memoize type 0 item indices to build a graph-structured stack
01549       if (scope.isNull() && type0Indices.contains(cat))
01550         nextStacks[type0Indices.value(cat)].addParent(stack);
01551       else {
01552         QList<StackItem> parents;
01553         parents << stack;
01554         StackItem type0(parents, cat, effCat, pos, scope);
01555         if (scope.isNull()) type0Indices.insert(cat, nextStacks.size());
01556         nextStacks.append(type0);
01557       }
01558     }
01559   }
01560 
01561   // return the result (only epsilon matches, the rest is deferred)
01562   return result;
01563 }
01564 
01566 
01570 QList<Match> Parser::matchRemaining(const Rule &rule, int len, int curr, int i,
01571                                     const Node &tree,
01572                                     const PseudoCatScope &scope, int ruleno,
01573                                     const StackItem &stack,
01574                                     const NextTokenConstraints
01575                                       &nextTokenConstraints)
01576 {
01577   QList<Match> result;
01578   if (i < rule.size()) {
01579     QList<Match> matches;
01580     {
01581       StackItem type3(stack, rule, len, curr, i, tree, scope, ruleno,
01582                       nextTokenConstraints);
01583       matches = match(rule.at(i), curr, scope, type3, nextTokenConstraints);
01584     }
01585     foreach (const Match &m, matches) {
01586       Node newTree = tree;
01587       newTree.children.first().append(m.tree);
01588       result.append(matchRemaining(rule, len+m.len, curr+m.len, i+1, newTree,
01589                                    m.scope, ruleno, stack,
01590                                    m.nextTokenConstraints));
01591     }
01592   } else {
01593     if (rule.action) {
01594       ActionInfo info(tree);
01595       rule.action->execute(info);
01596     }
01597     NextTokenConstraints newNextTokenConstraints = nextTokenConstraints;
01598     newNextTokenConstraints.expect.append(rule.nextTokenConstraints.expect);
01599     newNextTokenConstraints.taboo.append(rule.nextTokenConstraints.taboo);
01600     result.append(Match(len, tree, ruleno, scope, newNextTokenConstraints));
01601   }
01602   return result;
01603 }
01604 
01606 
01609 Cat Parser::findFirstToken(const Node &tree)
01610 {
01611   CatArg cat = tree.cat;
01612   if (isToken(cat))
01613     return cat;
01614   else {
01615     // consider only the first alternative - they must all match the same tokens
01616     foreach (const Node &node, tree.children.first()) {
01617       Cat result = findFirstToken(node);
01618       if (!IS_EPSILON(result)) return result;
01619     }
01620     return Cat(); // This tree matches epsilon.
01621   }
01622 }
01623 
01626 void Parser::unify(QList<Match> &matches)
01627 {
01628   int l = matches.size();
01629   if (l > 1) {
01630     /* For plain CFGs, we'd only need to match length and category here. We add
01631        the rule number and the scope for PMCFG matches and the next token
01632        constraints for scannerless parsing support. */
01633     QHash<QPair<QPair<QPair<int, Cat>, QPair<int, PseudoCatScope> >,
01634                 NextTokenConstraints>, int>
01635       indexOfLenCat;
01636     {
01637       const Match &m = matches.first();
01638       indexOfLenCat.insert(qMakePair(qMakePair(qMakePair(m.len, m.tree.cat),
01639                                                qMakePair(m.ruleno, m.scope)),
01640                                      m.nextTokenConstraints), 0);
01641     }
01642     for (int i=1; i<l; ) {
01643       const Match &m = matches.at(i);
01644       QPair<QPair<QPair<int, Cat>, QPair<int, PseudoCatScope> >,
01645             NextTokenConstraints> key(qMakePair(qMakePair(m.len, m.tree.cat),
01646                                                 qMakePair(m.ruleno, m.scope)),
01647                                       m.nextTokenConstraints);
01648       if (indexOfLenCat.contains(key)) {
01649         matches[indexOfLenCat.value(key)].tree.children.append(m.tree.children);
01650         matches.removeAt(i);
01651         l--;
01652       } else indexOfLenCat.insert(key, i++);
01653     }
01654   }
01655 }
01656 
01659 
01667 QList<Match> Parser::reduce(CatArg cat, CatArg target, int pos, int len,
01668                             const Node &tree, const StackItem &stack,
01669                             const PseudoCatScope &scope, int ruleno,
01670                             const NextTokenConstraints &nextTokenConstraints,
01671                             QSet<Cat> mark)
01672 {
01673   QList<Match> result;
01674   if (cat == target) result.append(Match(len, tree, ruleno, scope,
01675                                          nextTokenConstraints));
01676   QList<Match> matches;
01677   {
01678     StackItem type4(stack, target, pos, len);
01679     // for each rule to get towards the target category
01680     foreach (const FullRule &rule, neighborhood(cat, target)) {
01681       QList<Match> currentMatches;
01682       Node node(rule.cat);
01683       node.children.first().setLabel(rule.rule.label());
01684       int epsilonsSkipped = rule.epsilonsSkipped;
01685       // if we are reducing a pseudo-category, set the correct scope
01686       PseudoCatScope newScope;
01687       CatArg pseudoCat = rule.rule.at(epsilonsSkipped);
01688       if (pseudoCat != cat) {
01689         newScope.pConstraints().insert(pseudoCat,
01690           qMakePair(qMakePair(tree, nextTokenConstraints), len));
01691         foreach (CatArg cati, pseudoCats.value(pseudoCat).second)
01692           newScope.mcfgConstraints().insert(cati, qMakePair(ruleno, scope));
01693       }
01694       currentMatches.append(Match(0, node, rule.ruleno, newScope,
01695                                   nextTokenConstraints));
01696       /* Find the first token in the tree so we can validate the next token
01697          constraints of the epsilon matches. If we don't have epsilons skipped,
01698          don't bother doing this work.
01699          Also note that tree cannot be an epsilon tree because we never reduce
01700          epsilon. */
01701       Cat firstToken = Cat();
01702       if (epsilonsSkipped)
01703         firstToken = findFirstToken(tree);
01704       // match first items to epsilon
01705       for (int i=0; i<epsilonsSkipped; i++) {
01706         int s = currentMatches.size();
01707         for (int i=0; i<s; ) {
01708           Match &m = currentMatches[i];
01709           QList<Match> componentMatches = matchToEpsilon(rule.rule.at(i),
01710                                                          m.scope);
01711           /* Here, we need to validate the next token constraints in our epsilon
01712              matches against the first token of the tree we're reducing. */
01713           /* FIXME: Would it be more efficient to do this in neighborhood? It'd
01714                     help prediction in some (rare?) cases, too. But it'd make
01715                     neighborhoods dependent on the first token, which hurts
01716                     caching badly. */
01717           {
01718             int cs = componentMatches.size();
01719             for (int j=0; j<cs; ) {
01720               if (validateNextTokenConstraints(firstToken,
01721                     componentMatches.at(j).nextTokenConstraints)) j++; else {
01722                 if (j) componentMatches.swap(0, j);
01723                 componentMatches.removeFirst();
01724                 cs--;
01725               }
01726             }
01727           }
01728           if (componentMatches.isEmpty()) {
01729             if (i) currentMatches.swap(0, i);
01730             currentMatches.removeFirst();
01731             s--;
01732           } else {
01733             int cs = componentMatches.size();
01734             for (int j=1; j<cs; j++) {
01735               const Match &cm = componentMatches.at(j);
01736               Node newTree = m.tree;
01737               newTree.children.first().append(cm.tree);
01738               currentMatches.append(Match(m.len + cm.len, newTree, m.ruleno,
01739                                           cm.scope, m.nextTokenConstraints));
01740             }
01741             const Match &cm
01742               = static_cast<const QList<Match> &>(componentMatches).first();
01743             m.len += cm.len;
01744             m.tree.children.first().append(cm.tree);
01745             m.scope = cm.scope;
01746             i++;
01747           }
01748         }
01749       }
01750       foreach (const Match &m, currentMatches) {
01751         Node newTree = m.tree;
01752         newTree.children.first().append(tree);
01753         // match remaining rule items
01754         matches.append(matchRemaining(rule.rule, 0, pos, epsilonsSkipped+1,
01755                                       newTree, m.scope, m.ruleno, type4,
01756                                       m.nextTokenConstraints));
01757       }
01758     }
01759   }
01760   // unify matches to a shared representation to avoid needless bifurcation
01761   unify(matches);
01762   {
01763     StackItem type2(stack, 0);
01764     foreach (const Match &m, matches) {
01765       Cat newCat = m.tree.cat;
01766       if (m.len)
01767         qFatal("length of a reduction increased in a non-shift codepath");
01768       // reduction with unchanged length
01769       // avoid infinite loops (if we have epsilon productions or redundant
01770       // X->...->X rules)
01771       mark.insert(cat);
01772       if (mark.contains(newCat)) continue;
01773       // now reduce the rule
01774       result.append(reduce(newCat, target, pos, len, m.tree, type2, m.scope,
01775                            m.ruleno, m.nextTokenConstraints, mark));
01776     }
01777   }
01778   // do another unification pass on the result
01779   unify(result);
01780   return result;
01781 }
01782 
01784 
01787 QList<Match> Parser::processStackItem(const StackItem &item,
01788                                       const QList<Match> &matches)
01789 {
01790   switch (item.type()) {
01791     case 0:
01792       qFatal("type 0 items not supported by processStackItem");
01793     case 1:
01794       {
01795         const StackItemType1 *data
01796           = static_cast<const StackItemType1 *>(item.data());
01797         const QList<StackItem> &parents = data->parents();
01798         Cat cat = data->cat();
01799         Cat effCat = data->effCat();
01800         PseudoCatScope scope = data->scope();
01801 
01802         QList<Match> processedMatches = matches;
01803         if (effCat != cat) // cat is a pseudo-category
01804           finalizeMatches(processedMatches, cat, scope);
01805         else
01806           copyScope(processedMatches, scope);
01807 
01808         if (parents.isEmpty()) // toplevel item
01809           return processedMatches;
01810         else {
01811           QList<Match> result;
01812           foreach (const StackItem &parent, parents)
01813             result.append(processStackItem(parent, processedMatches));
01814           return result;
01815         }
01816       }
01817       break;
01818     case 2:
01819       {
01820         const StackItemType2 *data
01821           = static_cast<const StackItemType2 *>(item.data());
01822         const StackItem &parent = data->parent();
01823 
01824         // unify matches, deferring processing if necessary
01825         if (data->derefUsage()) {
01826           collectedMatches[data].append(matches);
01827           return QList<Match>();
01828         }
01829         QList<Match> unifiedMatches = collectedMatches.value(data);
01830         unifiedMatches.append(matches);
01831         unify(unifiedMatches);
01832 
01833         return processStackItem(parent, unifiedMatches);
01834       }
01835       break;
01836     case 3:
01837       {
01838         const StackItemType3 *data
01839           = static_cast<const StackItemType3 *>(item.data());
01840         const StackItem &parent = data->parent();
01841         Rule rule = data->rule();
01842         int len = data->len();
01843         int curr = data->curr();
01844         int i = data->i();
01845         Node tree = data->tree();
01846         PseudoCatScope scope = data->scope();
01847         int ruleno = data->ruleno();
01848 
01849         QList<Match> result;
01850         foreach (const Match &m, matches) {
01851           Node newTree = tree;
01852           newTree.children.first().append(m.tree);
01853           result.append(matchRemaining(rule, len+m.len, curr+m.len, i+1,
01854                                        newTree, m.scope, ruleno, parent,
01855                                        m.nextTokenConstraints));
01856         }
01857 
01858         return processStackItem(parent, result);
01859       }
01860       break;
01861     case 4:
01862       {
01863         const StackItemType4 *data
01864           = static_cast<const StackItemType4 *>(item.data());
01865         const StackItem &parent = data->parent();
01866         Cat target = data->target();
01867         int pos = data->pos();
01868         int len = data->len();
01869 
01870         // unify matches, deferring processing if necessary
01871         if (data->derefUsage()) {
01872           collectedMatches[data].append(matches);
01873           return QList<Match>();
01874         }
01875         QList<Match> unifiedMatches = collectedMatches.value(data);
01876         unifiedMatches.append(matches);
01877         unify(unifiedMatches);
01878 
01879         QList<Match> result;
01880         {
01881           StackItem type2(parent, 0);
01882           foreach (const Match &m, unifiedMatches) {
01883             Cat newCat = m.tree.cat;
01884             if (!m.len)
01885               qFatal("reduction with unchanged length was deferred");
01886             // the length increased, reduce the rule, resetting mark
01887             result.append(reduce(newCat, target, pos+m.len, len+m.len, m.tree,
01888                                  type2, m.scope, m.ruleno,
01889                                  m.nextTokenConstraints));
01890           }
01891         }
01892         // do another unification pass on the result
01893         unify(result);
01894 
01895         return processStackItem(parent, result);
01896       }
01897     case 5:
01898       {
01899         const StackItemType5 *data
01900           = static_cast<const StackItemType5 *>(item.data());
01901         const StackItem &parent = data->parent();
01902         Cat cat = data->cat();
01903         PseudoCatScope scope = data->scope();
01904 
01905         QList<Match> result = matches;
01906         finalizeMatches(result, cat, scope);
01907 
01908         return processStackItem(parent, result);
01909       }
01910       break;
01911     case 6:
01912       qFatal("type 6 items not supported by processStackItem");
01913     default:
01914       qFatal("invalid stack item type");
01915   }
01916 }
01917 
01919 
01926 QList<Match> Parser::processStack(const StackItem &stack, CatArg token)
01927 {
01928   switch (stack.type()) {
01929     case 0:
01930       {
01931         const StackItemType0 *data
01932           = static_cast<const StackItemType0 *>(stack.data());
01933         const QList<StackItem> &parents = data->parents();
01934         Cat cat = data->cat();
01935         Cat effCat = data->effCat();
01936         int pos = data->pos();
01937         PseudoCatScope scope = data->scope();
01938         QList<Match> matches;
01939 
01940         // type 0 item: finish processing of match after a shift
01941         if (isToken(cat)) {
01942           if (token == cat) matches.append(Match(1, inputSource->parseTree(), 0,
01943                                                  scope));
01944         } else {
01945           // reduce the matched token (and possibly additional tokens, with
01946           // deferred processing) to the target category
01947           StackItem type1(parents, cat, effCat, scope);
01948           // The token carries no next token constraints.
01949           matches = reduce(token, effCat, pos, 1, inputSource->parseTree(),
01950                            type1, PseudoCatScope(), 0, NextTokenConstraints());
01951 
01952           if (effCat != cat) // cat is a pseudo-category
01953             finalizeMatches(matches, cat, scope);
01954           else
01955             copyScope(matches, scope);
01956         }
01957 
01958         if (parents.isEmpty()) // toplevel item
01959           return matches;
01960         else {
01961           QList<Match> result;
01962           foreach (const StackItem &parent, parents)
01963             result.append(processStackItem(parent, matches));
01964           return result;
01965         }
01966       }
01967     case 6:
01968       {
01969         const StackItemType6 *data
01970           = static_cast<const StackItemType6 *>(stack.data());
01971         const StackItem &parent = data->parent();
01972         QList<Node> leaves = data->leaves();
01973         int i = data->i();
01974         Node tree = data->tree();
01975         PseudoCatScope scope = data->scope();
01976         NextTokenConstraints nextTokenConstraints
01977           = data->nextTokenConstraints();
01978         QList<Match> matches;
01979 
01980         // type 6 item: finish processing of a P constraint
01981         // match the i-th leaf against the shifted token
01982         if (inputSource->matchParseTree(leaves.at(i++))) {
01983           int l = leaves.size();
01984           if (i == l) // we're done matching the constraint
01985             matches.append(Match(l, tree, 0, scope, nextTokenConstraints));
01986           else { // not done yet, create a new type 6 item
01987             StackItem type6(parent, leaves, i, tree, scope,
01988                             nextTokenConstraints);
01989             nextStacks.append(type6);
01990           }
01991         }
01992 
01993         return processStackItem(parent, matches);
01994       }
01995     default:
01996       qFatal("invalid stack (toplevel item not of type 0 or 6)");
01997   }
01998 }
01999 
02002 void Parser::computeUsageCounts(const StackItem &item)
02003 {
02004   switch (item.type()) {
02005     case 0:
02006       {
02007         const StackItemType0 *data
02008           = static_cast<const StackItemType0 *>(item.data());
02009         const QList<StackItem> &parents = data->parents();
02010 
02011         foreach (const StackItem &parent, parents)
02012           computeUsageCounts(parent);
02013       }
02014       break;
02015     case 1:
02016       {
02017         const StackItemType1 *data
02018           = static_cast<const StackItemType1 *>(item.data());
02019         const QList<StackItem> &parents = data->parents();
02020 
02021         foreach (const StackItem &parent, parents)
02022           computeUsageCounts(parent);
02023       }
02024       break;
02025     case 2:
02026       {
02027         const StackItemType2 *data
02028           = static_cast<const StackItemType2 *>(item.data());
02029         const StackItem &parent = data->parent();
02030 
02031         if (!data->refUsage()) computeUsageCounts(parent);
02032       }
02033       break;
02034     case 3:
02035       {
02036         const StackItemType3 *data
02037           = static_cast<const StackItemType3 *>(item.data());
02038         const StackItem &parent = data->parent();
02039 
02040         computeUsageCounts(parent);
02041       }
02042       break;
02043     case 4:
02044       {
02045         const StackItemType4 *data
02046           = static_cast<const StackItemType4 *>(item.data());
02047         const StackItem &parent = data->parent();
02048 
02049         if (!data->refUsage()) computeUsageCounts(parent);
02050       }
02051       break;
02052     case 5:
02053       {
02054         const StackItemType5 *data
02055           = static_cast<const StackItemType5 *>(item.data());
02056         const StackItem &parent = data->parent();
02057 
02058         computeUsageCounts(parent);
02059       }
02060       break;
02061     case 6:
02062       {
02063         const StackItemType6 *data
02064           = static_cast<const StackItemType6 *>(item.data());
02065         const StackItem &parent = data->parent();
02066 
02067         computeUsageCounts(parent);
02068       }
02069       break;
02070     default:
02071       qFatal("invalid stack item type");
02072   }
02073 }
02074 
02076 
02081 bool Parser::shift(int pos)
02082 {
02083   if (!inputSource->rewindTo(pos, currentLexerState)) {
02084     qWarning("invalid input position");
02085     return false;
02086   }
02087   Cat token = inputSource->nextToken();
02088   if (IS_EPSILON(token))
02089     return false;
02090 
02091   // delete all the stacks which don't satisfy the next token constraints
02092   {
02093     int l = nextStacks.size();
02094     for (int i=0; i<l; ) {
02095       StackItem &stack = nextStacks[i];
02096       // type 6 items are already filtered for next token constraints
02097       if (stack.type()) i++; else {
02098         const StackItemType0 *data
02099           = static_cast<const StackItemType0 *>(stack.data());
02100         QList<StackItem> parents = data->parents();
02101         // no parents = toplevel item, skip processing to avoid killing this
02102         if (parents.empty()) i++; else {
02103           int n = parents.size();
02104           for (int j=0; j<n; ) {
02105             const StackItem &parent = parents.at(j);
02106             const StackItemType3 *parentData
02107               = static_cast<const StackItemType3 *>(parent.data());
02108             if (validateNextTokenConstraints(token,
02109                   parentData->nextTokenConstraints())) j++; else {
02110               if (j) parents.swap(0, j);
02111               parents.removeFirst();
02112               n--;
02113             }
02114           }
02115           // no parents left, kill the type 0 item
02116           if (parents.isEmpty()) {
02117             if (i) nextStacks.swap(0, i);
02118             nextStacks.removeFirst();
02119             l--;
02120           } else {
02121             stack.setParents(parents);
02122             i++;
02123           }
02124         }
02125       }
02126     }
02127   }
02128 
02129   // if no more stacks, set errPos and errToken and return false
02130   // (this happens if we had a valid complete parse, but no continuations, and
02131   // now there's extra input)
02132   if (nextStacks.isEmpty()) {
02133     errPos = pos;
02134     errToken = token;
02135     return false;
02136   }
02137 
02138   // process stack
02139   currentMatches.clear();
02140   QList<StackItem> currentStacks = nextStacks;
02141   nextStacks.clear();
02142   foreach (const StackItem &stack, currentStacks)
02143     computeUsageCounts(stack);
02144   foreach (const StackItem &stack, currentStacks)
02145     currentMatches.append(processStack(stack, token));
02146   type0Indices.clear();
02147   collectedMatches.clear();
02148 
02149   // if no more stacks and no current matches, set errPos and errToken, restore
02150   // the previous nextStacks to allow running prediction on them and return
02151   // false
02152   if (nextStacks.isEmpty() && currentMatches.isEmpty()) {
02153     errPos = pos;
02154     errToken = token;
02155     nextStacks = currentStacks;
02156     return false;
02157   }
02158 
02159   currentLexerState = inputSource->saveState();
02160   return true;
02161 }
02162 
02164 
02239 QList<Match> Parser::parse(int *errorPos, Cat *errorToken, int *incrementalPos,
02240                            QList<StackItem> *incrementalStacks,
02241                            QList<Match> *incrementalMatches,
02242                            LexerState *lexerState)
02243 {
02244   int pos = 0;
02245 
02246   if (!incrementalPos || *incrementalPos < 0) // start a new parse
02247     currentMatches = match(startCat, 0, PseudoCatScope(), StackItem(),
02248                            NextTokenConstraints());
02249   else { // continue an incremental parse
02250     pos = *incrementalPos;
02251     if (incrementalMatches) currentMatches = *incrementalMatches;
02252     if (incrementalStacks) nextStacks = *incrementalStacks;
02253   }
02254 
02255   if (lexerState)
02256     currentLexerState = *lexerState;
02257   else
02258     currentLexerState.clear();
02259 
02260   // shift tokens while possible
02261   errPos = -1;
02262   while (shift(pos)) pos++;
02263 
02264   // error handling
02265   if (errPos >= 0) { // parse error
02266     if (errorPos) *errorPos = errPos;
02267     if (errorToken) *errorToken = errToken;
02268     if (incrementalPos) *incrementalPos = errPos;
02269     if (incrementalStacks) *incrementalStacks = nextStacks;
02270     if (incrementalMatches) incrementalMatches->clear();
02271     if (lexerState) *lexerState = currentLexerState;
02272     errPos = -1;
02273     errToken = Cat();
02274     nextStacks.clear();
02275     currentMatches.clear();
02276     currentLexerState.clear();
02277     return QList<Match>();
02278   }
02279   if (errorPos) *errorPos = -1;
02280   if (errorToken) *errorToken = Cat();
02281 
02282   // filter all the matches which have expect-type next token constraints
02283   {
02284     int s = currentMatches.size();
02285     for (int i=0; i<s; ) {
02286       Match &m = currentMatches[i];
02287       if (m.nextTokenConstraints.expect.isEmpty()) i++; else {
02288         if (i) currentMatches.swap(0, i);
02289         currentMatches.removeFirst();
02290         s--;
02291       }
02292     }
02293   }
02294 
02295   // incremental parsing
02296   if (incrementalPos) *incrementalPos = pos;
02297   if (incrementalMatches) *incrementalMatches = currentMatches;
02298   if (incrementalStacks) *incrementalStacks = nextStacks;
02299   if (lexerState) *lexerState = currentLexerState;
02300   nextStacks.clear();
02301   currentLexerState.clear();
02302 
02303   // accept current matches
02304   QList<Match> result = currentMatches;
02305   currentMatches.clear();
02306   return result;
02307 }
02308 
02311 
02317 Predictions Parser::computePredictions(const QList<StackItem> &stacks) const
02318 {
02319   Predictions predict;
02320   foreach (const StackItem &stack, stacks) {
02321     if (stack.type()) { // type 6 item
02322       const StackItemType6 *data
02323         = static_cast<const StackItemType6 *>(stack.data());
02324       predict.insert(data->leaves().at(data->i()).cat);
02325     } else // type 0 item
02326       predict.insert(static_cast<const StackItemType0 *>(stack.data())
02327                        ->effCat());
02328   }
02329   return predict;
02330 }
02331 
02333 void Parser::expandNonterminalPredictionRecurse(CatArg cat,
02334                                                 QHash<Cat, QSet<Cat> > &result,
02335                                                 QSet<Cat> &mark) const
02336 {
02337   mark.insert(cat);
02338   foreach (const Rule &rule, rules.value(cat)) {
02339     QListIterator<Cat> i(rule);
02340     /* For each nonempty rule (empty rules are not interesting for prediction),
02341        we handle at least the first item in the rule as a prediction: if it's a
02342        token, it is our prediction and the corresponding nonterminal is this
02343        category; if it's a nonterminal, we need to process the prediction
02344        recursively, unless it is already marked, indicating direct or indirect
02345        left recursion (and allowing us to stop) or the repetition of an already
02346        tried possibility (also allowing us to stop, since we do not care about
02347        parse trees here, only the last category). Then, as long as the first i
02348        categories are nullable and the (i+1)th category exists, we repeat the
02349        process for the (i+1)th category. */
02350     while (i.hasNext()) {
02351       Cat cati = effectiveCat(i.next());
02352       if (isToken(cati)) {
02353         result[cat].insert(cati); // record nonterminal and token
02354         break; // tokens are not nullable
02355       }
02356       // now cati is a nonterminal
02357       if (!mark.contains(cati))
02358         expandNonterminalPredictionRecurse(cati, result, mark);
02359       if (!nullable.contains(cati)) break;
02360     }
02361   }
02362 }
02363 
02367 
02377 QHash<Cat, QSet<Cat> > Parser::expandNonterminalPrediction(CatArg cat) const
02378 {
02379   QHash<Cat, QSet<Cat> > result;
02380   QSet<Cat> mark;
02381   if (isToken(cat))
02382     qWarning("trying to expand terminal prediction");
02383   else
02384     expandNonterminalPredictionRecurse(cat, result, mark);
02385   return result;
02386 }
02387 
02389 void Parser::expandNonterminalPredictionRecurseC(CatArg cat,
02390                                                  QHash<Cat, QSet<Cat> > &result,
02391                                                  QSet<Cat> mark, int ruleno,
02392                                                  const PseudoCatScope &scope,
02393                                                  const NextTokenConstraints
02394                                                    &nextTokenConstraints)
02395 {
02396   mark.insert(cat);
02397   QList<Rule> ruleList = rules.value(cat);
02398   int firstRule = ruleno >= 0 ? ruleno : 0;
02399   int lastRule = ruleno >= 0 ? ruleno : ruleList.size() - 1;
02400   for (int currRule = firstRule; currRule <= lastRule; currRule++) {
02401     const Rule &rule = ruleList.at(currRule);
02402     QListIterator<Cat> i(rule);
02403     /* For each nonempty rule (empty rules are not interesting for prediction),
02404        we handle at least the first item in the rule as a prediction: if it's a
02405        token, it is our prediction and the corresponding nonterminal is this
02406        category; if it's a nonterminal, we need to process the prediction
02407        recursively, unless it is already marked, indicating direct or indirect
02408        left recursion (and allowing us to stop). Then, as long as the first i
02409        categories are nullable and the (i+1)th category exists, we repeat the
02410        process for the (i+1)th category.
02411        We optimize the common case of no epsilons to skip because we do not have
02412        to consider context-sensitive constraints in that case. */
02413     Cat cat1 = Cat();
02414     if (i.hasNext()) {
02415       cat1 = i.next();
02416       Cat effCat1 = effectiveCat(cat1);
02417       if (isToken(effCat1)) {
02418         if (validateNextTokenConstraints(effCat1, nextTokenConstraints))
02419           result[cat].insert(effCat1); // record nonterminal and token
02420         continue; // tokens are not nullable
02421       }
02422       // now effCat1 is a nonterminal
02423       /* If we have a P-constraint here, that constraint forces the category to
02424          be epsilon (because it must be identical to something already matched,
02425          which is epsilon). So don't predict inside it. */
02426       if (!mark.contains(effCat1)
02427           && !scope.hasPConstraint(cat1)) {
02428         int mcfgRuleno = -1;
02429         PseudoCatScope mcfgScope;
02430         if (scope.hasMcfgConstraint(cat1)) {
02431           QPair<int, PseudoCatScope> mcfgConstraint
02432             = scope.mcfgConstraint(cat1);
02433           mcfgRuleno = mcfgConstraint.first;
02434           mcfgScope = mcfgConstraint.second;
02435         }
02436         expandNonterminalPredictionRecurseC(effCat1, result, mark, mcfgRuleno,
02437                                             mcfgScope, nextTokenConstraints);
02438       }
02439       if (!nullable.contains(effCat1)) continue;
02440     }
02441     {
02442       QList<Match> currentMatches;
02443       // match cat(i-1) to epsilon first
02444       /* FIXME: We discard the parse trees here. Would it be more efficient
02445                 to use a variant of matchToEpsilon which doesn't build them?
02446                 OTOH, this way, we can reuse the matchToEpsilon cache. */
02447       const QList<Match> componentMatches = matchToEpsilon(cat1, scope);
02448       if (componentMatches.isEmpty())
02449         continue; // We didn't match epsilon, so we're done with this rule.
02450       /* If we have one version without context-sensitive constraints, the
02451          others are redundant. */
02452       bool haveUnconstrained = false;
02453       foreach (const Match &cm, componentMatches) {
02454         if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
02455             && cm.nextTokenConstraints.taboo.isEmpty()) {
02456           haveUnconstrained = true;
02457           break;
02458         }
02459       }
02460       if (haveUnconstrained)
02461         currentMatches.append(Match(0, Node(), 0, scope, nextTokenConstraints));
02462       else {
02463         foreach (const Match &cm, componentMatches) {
02464           currentMatches.append(Match(0, Node(), 0, cm.scope,
02465                                       nextTokenConstraints));
02466           if (!cm.nextTokenConstraints.expect.isEmpty())
02467             currentMatches.last().nextTokenConstraints.expect.append(
02468               cm.nextTokenConstraints.expect);
02469           if (!cm.nextTokenConstraints.taboo.isEmpty())
02470             currentMatches.last().nextTokenConstraints.taboo.append(
02471               cm.nextTokenConstraints.taboo);
02472         }
02473       }
02474       while (i.hasNext()) {
02475         // now update cati for the new i
02476         Cat cati = i.next();
02477         Cat effCati = effectiveCat(cati);
02478         if (isToken(effCati)) {
02479           bool nextTokenConstraintsPass = false;
02480           foreach (const Match &m, currentMatches)
02481             if (validateNextTokenConstraints(effCati, m.nextTokenConstraints)) {
02482               nextTokenConstraintsPass = true;
02483               break;
02484             }
02485           if (nextTokenConstraintsPass)
02486             result[cat].insert(effCati); // record nonterminal and token
02487           break; // tokens are not nullable
02488         }
02489         // now effCati is a nonterminal
02490         if (!mark.contains(effCati)) {
02491           foreach (const Match &m, currentMatches)
02492             /* If we have a P-constraint here, that constraint forces the
02493                category to be epsilon (because it must be identical to something
02494                already matched, which is epsilon). So don't predict inside it.
02495                */
02496             if (!m.scope.hasPConstraint(cati)) {
02497               int mcfgRuleno = -1;
02498               PseudoCatScope mcfgScope;
02499               if (m.scope.hasMcfgConstraint(cati)) {
02500                 QPair<int, PseudoCatScope> mcfgConstraint
02501                   = m.scope.mcfgConstraint(cati);
02502                 mcfgRuleno = mcfgConstraint.first;
02503                 mcfgScope = mcfgConstraint.second;
02504               }
02505               expandNonterminalPredictionRecurseC(effCati, result, mark,
02506                                                   mcfgRuleno, mcfgScope,
02507                                                   m.nextTokenConstraints);
02508             }
02509         }
02510         if (!nullable.contains(effCati)) break;
02511         int s = currentMatches.size();
02512         for (int j=0; j<s; ) {
02513           Match &m = currentMatches[j];
02514           /* FIXME: We discard the parse trees here. Would it be more efficient
02515                     to use a variant of matchToEpsilon which doesn't build them?
02516                     OTOH, this way, we can reuse the matchToEpsilon cache. */
02517           const QList<Match> componentMatches = matchToEpsilon(effCati,
02518                                                                m.scope);
02519           if (componentMatches.isEmpty()) {
02520             if (j) currentMatches.swap(0, j);
02521             currentMatches.removeFirst();
02522             s--;
02523           } else {
02524             int cs = componentMatches.size();
02525             /* If we have one version without context-sensitive constraints, the
02526                others are redundant. */
02527             bool haveUnconstrained = false;
02528             for (int k=0; k<cs; k++) {
02529               const Match &cm = componentMatches.at(k);
02530               if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
02531                   && cm.nextTokenConstraints.taboo.isEmpty()) {
02532                 haveUnconstrained = true;
02533                 break;
02534               }
02535             }
02536             if (!haveUnconstrained) {
02537               for (int k=1; k<cs; k++) {
02538                 const Match &cm = componentMatches.at(k);
02539                 currentMatches.append(Match(0, Node(), 0, cm.scope,
02540                                             m.nextTokenConstraints));
02541                 if (!cm.nextTokenConstraints.expect.isEmpty())
02542                   currentMatches.last().nextTokenConstraints.expect.append(
02543                     cm.nextTokenConstraints.expect);
02544                 if (!cm.nextTokenConstraints.taboo.isEmpty())
02545                   currentMatches.last().nextTokenConstraints.taboo.append(
02546                     cm.nextTokenConstraints.taboo);
02547               }
02548               const Match &cm = componentMatches.first();
02549               m.scope = cm.scope;
02550               if (!cm.nextTokenConstraints.expect.isEmpty())
02551                 m.nextTokenConstraints.expect.append(
02552                   cm.nextTokenConstraints.expect);
02553               if (!cm.nextTokenConstraints.taboo.isEmpty())
02554                 m.nextTokenConstraints.taboo.append(
02555                   cm.nextTokenConstraints.taboo);
02556             }
02557             j++;
02558           }
02559         }
02560         if (currentMatches.isEmpty())
02561           break; // We didn't match epsilon, so we're done with this rule.
02562       }
02563     }
02564   }
02565 }
02566 
02570 
02576 QHash<Cat, QSet<Cat> > Parser::expandNonterminalPredictionC(CatArg cat)
02577 {
02578   QHash<Cat, QSet<Cat> > result;
02579   if (isToken(cat))
02580     qWarning("trying to expand terminal prediction");
02581   else
02582     expandNonterminalPredictionRecurseC(cat, result, QSet<Cat>(), -1,
02583                                         PseudoCatScope(),
02584                                         NextTokenConstraints());
02585   return result;
02586 }
02587 
02590 
02608 MultiPredictions Parser::computeMultiPredictions(const QList<StackItem> &stacks)
02609   const
02610 {
02611   MultiPredictions predictMulti;
02612   foreach (const StackItem &stack, stacks) {
02613     if (stack.type()) { // type 6 item
02614       /* We will define the literal as the full string of the P constraint here,
02615          which is very easy to obtain. This might not strictly be a literal in
02616          the sense of the definition, since it could be obtained from a whole
02617          tree of nonterminals, but it definitely always contains the literal (in
02618          the strict sense) containing the predicted category. */
02619       const StackItemType6 *data
02620         = static_cast<const StackItemType6 *>(stack.data());
02621       QList<Node> leaves = data->leaves();
02622       int i = data->i();
02623       QList<Cat> prediction, literal;
02624       int l = leaves.size();
02625       for (int j=i; j<l; j++)
02626         prediction.append(leaves.at(j).cat);
02627       literal = prediction;
02628       for (int j=i-1; j>=0; j--)
02629         literal.prepend(leaves.at(j).cat);
02630       MultiPrediction multiPrediction(literal, data->tree().cat);
02631       if (!predictMulti.contains(prediction, multiPrediction))
02632         predictMulti.insert(prediction, multiPrediction);
02633     } else { // type 0 item
02634       const StackItemType0 *data
02635         = static_cast<const StackItemType0 *>(stack.data());
02636       Cat cat = data->effCat();
02637       if (isToken(cat)) {
02638         const QList<StackItem> &parents = data->parents();
02639         if (parents.isEmpty()) { // huh, start category is a terminal?
02640           QList<Cat> list;
02641           list << cat;
02642           MultiPrediction multiPrediction(list, cat);
02643           if (!predictMulti.contains(list, multiPrediction))
02644             predictMulti.insert(list, multiPrediction);
02645         } else {
02646           foreach (const StackItem &parent, parents) {
02647             const StackItemType3 *parentData
02648               = static_cast<const StackItemType3 *>(parent.data());
02649             Rule rule = parentData->rule();
02650             int i = parentData->i();
02651             int len = 1;
02652             while (i+len < rule.size() && isToken(rule.at(i+len))) len++;
02653             QList<Cat> literal = rule.mid(i, len);
02654             while (i > 0 && isToken(rule.at(i-1))) i--, len++;
02655             MultiPrediction multiPrediction(rule.mid(i, len),
02656                                             parentData->tree().cat);
02657             if (!predictMulti.contains(literal, multiPrediction))
02658               predictMulti.insert(literal, multiPrediction);
02659           }
02660         }
02661       } else {
02662         QList<Cat> list;
02663         list << cat;
02664         MultiPrediction multiPrediction(list, cat);
02665         if (!predictMulti.contains(list, multiPrediction))
02666           predictMulti.insert(list, multiPrediction);
02667       }
02668     }
02669   }
02670   return predictMulti;
02671 }
02672 
02674 void Parser::expandNonterminalPredictionMultiRecurse(CatArg cat,
02675  QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> &mark) const
02676 {
02677   mark.insert(cat);
02678   foreach (const Rule &rule, rules.value(cat)) {
02679     int l = rule.size();
02680     /* For each nonempty rule (empty rules are not interesting for prediction),
02681        we handle at least the first item in the rule as a prediction: if it's a
02682        token, the literal starting with that token is our prediction and the
02683        corresponding nonterminal is this category; if it's a nonterminal, we
02684        need to process the prediction recursively, unless it is already marked,
02685        indicating direct or indirect left recursion (and allowing us to stop) or
02686        the repetition of an already tried possibility (also allowing us to stop,
02687        since we do not care about parse trees here, only the last category).
02688        Then, as long as the first i categories are nullable and the (i+1)th
02689        category exists, we repeat the process for the (i+1)th category. */
02690     for (int i = 0; i < l; i++) {
02691       Cat cati = effectiveCat(rule.at(i));
02692       if (isToken(cati)) {
02693         int len = 1;
02694         while (i+len < l && isToken(rule.at(i+len))) len++;
02695         result[cat].insert(rule.mid(i, len)); // record nonterminal and literal
02696         break; // tokens are not nullable
02697       }
02698       // now cati is a nonterminal
02699       if (!mark.contains(cati))
02700         expandNonterminalPredictionMultiRecurse(cati, result, mark);
02701       if (!nullable.contains(cati)) break;
02702     }
02703   }
02704 }
02705 
02709 
02719 QHash<Cat, QSet<QList<Cat> > >
02720   Parser::expandNonterminalPredictionMulti(CatArg cat) const
02721 {
02722   QHash<Cat, QSet<QList<Cat> > > result;
02723   QSet<Cat> mark;
02724   if (isToken(cat))
02725     qWarning("trying to expand terminal prediction");
02726   else
02727     expandNonterminalPredictionMultiRecurse(cat, result, mark);
02728   return result;
02729 }
02730 
02732 void Parser::expandNonterminalPredictionMultiRecurseC(CatArg cat,
02733   QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> mark, int ruleno,
02734   const PseudoCatScope &scope, const NextTokenConstraints &nextTokenConstraints)
02735 {
02736   mark.insert(cat);
02737   QList<Rule> ruleList = rules.value(cat);
02738   int firstRule = ruleno >= 0 ? ruleno : 0;
02739   int lastRule = ruleno >= 0 ? ruleno : ruleList.size() - 1;
02740   for (int currRule = firstRule; currRule <= lastRule; currRule++) {
02741     const Rule &rule = ruleList.at(currRule);
02742     int l = rule.size();
02743     /* For each nonempty rule (empty rules are not interesting for prediction),
02744        we handle at least the first item in the rule as a prediction: if it's a
02745        token, the literal starting with that token is our prediction and the
02746        corresponding nonterminal is this category; if it's a nonterminal, we
02747        need to process the prediction recursively, unless it is already marked,
02748        indicating direct or indirect left recursion (and allowing us to stop).
02749        Then, as long as the first i categories are nullable and the (i+1)th
02750        category exists, we repeat the process for the (i+1)th category.
02751        We optimize the common case of no epsilons to skip because we do not have
02752        to consider context-sensitive constraints in that case. */
02753     Cat cat1 = Cat();
02754     if (l) {
02755       cat1 = rule.first();
02756       Cat effCat1 = effectiveCat(cat1);
02757       if (isToken(effCat1)) {
02758         if (validateNextTokenConstraints(effCat1, nextTokenConstraints)) {
02759           int len = 1;
02760           while (len < l && isToken(rule.at(len))) len++;
02761           // record nonterminal and literal
02762           result[cat].insert(rule.mid(0, len));
02763         }
02764         continue; // tokens are not nullable
02765       }
02766       // now effCat1 is a nonterminal
02767       /* If we have a P-constraint here, that constraint forces the category to
02768          be epsilon (because it must be identical to something already matched,
02769          which is epsilon). So don't predict inside it. */
02770       if (!mark.contains(effCat1)
02771           && !scope.hasPConstraint(cat1)) {
02772         int mcfgRuleno = -1;
02773         PseudoCatScope mcfgScope;
02774         if (scope.hasMcfgConstraint(cat1)) {
02775           QPair<int, PseudoCatScope> mcfgConstraint
02776             = scope.mcfgConstraint(cat1);
02777           mcfgRuleno = mcfgConstraint.first;
02778           mcfgScope = mcfgConstraint.second;
02779         }
02780         expandNonterminalPredictionMultiRecurseC(effCat1, result, mark,
02781                                                  mcfgRuleno, mcfgScope,
02782                                                  nextTokenConstraints);
02783       }
02784       if (!nullable.contains(effCat1)) continue;
02785     }
02786     {
02787       QList<Match> currentMatches;
02788       // match cat(i-1) to epsilon first
02789       /* FIXME: We discard the parse trees here. Would it be more efficient
02790                 to use a variant of matchToEpsilon which doesn't build them?
02791                 OTOH, this way, we can reuse the matchToEpsilon cache. */
02792       const QList<Match> componentMatches = matchToEpsilon(cat1, scope);
02793       if (componentMatches.isEmpty())
02794         continue; // We didn't match epsilon, so we're done with this rule.
02795       /* If we have one version without context-sensitive constraints, the
02796          others are redundant. */
02797       bool haveUnconstrained = false;
02798       foreach (const Match &cm, componentMatches) {
02799         if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
02800             && cm.nextTokenConstraints.taboo.isEmpty()) {
02801           haveUnconstrained = true;
02802           break;
02803         }
02804       }
02805       if (haveUnconstrained)
02806         currentMatches.append(Match(0, Node(), 0, scope, nextTokenConstraints));
02807       else {
02808         foreach (const Match &cm, componentMatches) {
02809           currentMatches.append(Match(0, Node(), 0, cm.scope,
02810                                       nextTokenConstraints));
02811           if (!cm.nextTokenConstraints.expect.isEmpty())
02812             currentMatches.last().nextTokenConstraints.expect.append(
02813               cm.nextTokenConstraints.expect);
02814           if (!cm.nextTokenConstraints.taboo.isEmpty())
02815             currentMatches.last().nextTokenConstraints.taboo.append(
02816               cm.nextTokenConstraints.taboo);
02817         }
02818       }
02819       for (int i = 1; i < l; i++) {
02820         // now update cati for the new i
02821         Cat cati = rule.at(i);
02822         Cat effCati = effectiveCat(cati);
02823         if (isToken(effCati)) {
02824           bool nextTokenConstraintsPass = false;
02825           foreach (const Match &m, currentMatches)
02826             if (validateNextTokenConstraints(effCati, m.nextTokenConstraints)) {
02827               nextTokenConstraintsPass = true;
02828               break;
02829             }
02830           if (nextTokenConstraintsPass) {
02831             int len = 1;
02832             while (i+len < l && isToken(rule.at(i+len))) len++;
02833             // record nonterminal and literal
02834             result[cat].insert(rule.mid(i, len));
02835           }
02836           break; // tokens are not nullable
02837         }
02838         // now effCati is a nonterminal
02839         if (!mark.contains(effCati)) {
02840           foreach (const Match &m, currentMatches)
02841             /* If we have a P-constraint here, that constraint forces the
02842                category to be epsilon (because it must be identical to something
02843                already matched, which is epsilon). So don't predict inside it.
02844                */
02845             if (!m.scope.hasPConstraint(cati)) {
02846               int mcfgRuleno = -1;
02847               PseudoCatScope mcfgScope;
02848               if (m.scope.hasMcfgConstraint(cati)) {
02849                 QPair<int, PseudoCatScope> mcfgConstraint
02850                   = m.scope.mcfgConstraint(cati);
02851                 mcfgRuleno = mcfgConstraint.first;
02852                 mcfgScope = mcfgConstraint.second;
02853               }
02854               expandNonterminalPredictionMultiRecurseC(effCati, result, mark,
02855                                                        mcfgRuleno, mcfgScope,
02856                                                        m.nextTokenConstraints);
02857             }
02858         }
02859         if (!nullable.contains(effCati)) break;
02860         int s = currentMatches.size();
02861         for (int j=0; j<s; ) {
02862           Match &m = currentMatches[j];
02863           /* FIXME: We discard the parse trees here. Would it be more efficient
02864                     to use a variant of matchToEpsilon which doesn't build them?
02865                     OTOH, this way, we can reuse the matchToEpsilon cache. */
02866           const QList<Match> componentMatches = matchToEpsilon(effCati,
02867                                                                m.scope);
02868           if (componentMatches.isEmpty()) {
02869             if (j) currentMatches.swap(0, j);
02870             currentMatches.removeFirst();
02871             s--;
02872           } else {
02873             int cs = componentMatches.size();
02874             /* If we have one version without context-sensitive constraints, the
02875                others are redundant. */
02876             bool haveUnconstrained = false;
02877             for (int k=0; k<cs; k++) {
02878               const Match &cm = componentMatches.at(k);
02879               if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
02880                   && cm.nextTokenConstraints.taboo.isEmpty()) {
02881                 haveUnconstrained = true;
02882                 break;
02883               }
02884             }
02885             if (!haveUnconstrained) {
02886               for (int k=1; k<cs; k++) {
02887                 const Match &cm = componentMatches.at(k);
02888                 currentMatches.append(Match(0, Node(), 0, cm.scope,
02889                                             m.nextTokenConstraints));
02890                 if (!cm.nextTokenConstraints.expect.isEmpty())
02891                   currentMatches.last().nextTokenConstraints.expect.append(
02892                     cm.nextTokenConstraints.expect);
02893                 if (!cm.nextTokenConstraints.taboo.isEmpty())
02894                   currentMatches.last().nextTokenConstraints.taboo.append(
02895                     cm.nextTokenConstraints.taboo);
02896               }
02897               const Match &cm = componentMatches.first();
02898               m.scope = cm.scope;
02899               if (!cm.nextTokenConstraints.expect.isEmpty())
02900                 m.nextTokenConstraints.expect.append(
02901                   cm.nextTokenConstraints.expect);
02902               if (!cm.nextTokenConstraints.taboo.isEmpty())
02903                 m.nextTokenConstraints.taboo.append(
02904                   cm.nextTokenConstraints.taboo);
02905             }
02906             j++;
02907           }
02908         }
02909         if (currentMatches.isEmpty())
02910           break; // We didn't match epsilon, so we're done with this rule.
02911       }
02912     }
02913   }
02914 }
02915 
02919 
02925 QHash<Cat, QSet<QList<Cat> > >
02926   Parser::expandNonterminalPredictionMultiC(CatArg cat)
02927 {
02928   QHash<Cat, QSet<QList<Cat> > > result;
02929   if (isToken(cat))
02930     qWarning("trying to expand terminal prediction");
02931   else
02932     expandNonterminalPredictionMultiRecurseC(cat, result, QSet<Cat>(), -1,
02933                                              PseudoCatScope(),
02934                                              NextTokenConstraints());
02935   return result;
02936 }
02937 
02940 
02949 ConstrainedPredictions Parser::computeConstrainedPredictions(
02950   const QList<StackItem> &stacks) const
02951 {
02952   ConstrainedPredictions predict;
02953   foreach (const StackItem &stack, stacks) {
02954     if (stack.type()) { // type 6 item
02955       const StackItemType6 *data
02956         = static_cast<const StackItemType6 *>(stack.data());
02957       CatArg cat = data->leaves().at(data->i()).cat;
02958       NextTokenConstraints nextTokenConstraints;
02959       if (!predict.contains(cat, nextTokenConstraints))
02960         predict.insert(cat, nextTokenConstraints);
02961     } else { // type 0 item
02962       const StackItemType0 *data
02963         = static_cast<const StackItemType0 *>(stack.data());
02964       Cat cat = data->effCat();
02965       const QList<StackItem> &parents = data->parents();
02966       if (parents.isEmpty()) { // start category, no constraints
02967         NextTokenConstraints nextTokenConstraints;
02968         if (!predict.contains(cat, nextTokenConstraints))
02969           predict.insert(cat, nextTokenConstraints);
02970       } else {
02971         if (isToken(cat)) {
02972           // validate the next token constraints immediately for tokens
02973           bool nextTokenConstraintsPass = false;
02974           foreach (const StackItem &parent, parents) {
02975             const StackItemType3 *parentData
02976               = static_cast<const StackItemType3 *>(parent.data());
02977             NextTokenConstraints nextTokenConstraints
02978               = parentData->nextTokenConstraints();
02979             if (validateNextTokenConstraints(cat, nextTokenConstraints)) {
02980               nextTokenConstraintsPass = true;
02981               break;
02982             }
02983           }
02984           if (nextTokenConstraintsPass) {
02985             NextTokenConstraints nextTokenConstraints;
02986             if (!predict.contains(cat, nextTokenConstraints))
02987               predict.insert(cat, nextTokenConstraints);
02988           }
02989         } else {
02990           foreach (const StackItem &parent, parents) {
02991             const StackItemType3 *parentData
02992               = static_cast<const StackItemType3 *>(parent.data());
02993             NextTokenConstraints nextTokenConstraints
02994               = parentData->nextTokenConstraints();
02995             if (!predict.contains(cat, nextTokenConstraints))
02996               predict.insert(cat, nextTokenConstraints);
02997           }
02998         }
02999       }
03000     }
03001   }
03002   return predict;
03003 }
03004 
03008 
03017 QHash<Cat, QSet<Cat> > Parser::expandNonterminalPredictionC(CatArg cat,
03018   const NextTokenConstraints &nextTokenConstraints)
03019 {
03020   QHash<Cat, QSet<Cat> > result;
03021   if (isToken(cat))
03022     qWarning("trying to expand terminal prediction");
03023   else
03024     expandNonterminalPredictionRecurseC(cat, result, QSet<Cat>(), -1,
03025                                         PseudoCatScope(),
03026                                         nextTokenConstraints);
03027   return result;
03028 }
03029 
03033 
03044 QHash<Cat, QSet<Cat> > Parser::expandNonterminalPredictionC(CatArg cat,
03045   const QList<NextTokenConstraints> &nextTokenConstraintsList)
03046 {
03047   switch (nextTokenConstraintsList.size()) {
03048     case 0: // An empty disjunctive list is false, probably not what you want!
03049       qWarning("list of next token constraints is empty");
03050       return QHash<Cat, QSet<Cat> >();
03051     case 1: // common case, handle the next token constraints directly
03052       return expandNonterminalPredictionC(cat,
03053                                           nextTokenConstraintsList.first());
03054     default: // general case, predict first, filter later
03055       {
03056         QHash<Cat, QSet<Cat> > result = expandNonterminalPredictionC(cat);
03057         QHash<Cat, QSet<Cat> > filteredResult;
03058         QHashIterator<Cat, QSet<Cat> > it(result);
03059         while (it.hasNext()) {
03060           it.next();
03061           foreach (CatArg cati, it.value()) {
03062             bool nextTokenConstraintsPass = false;
03063             foreach (const NextTokenConstraints &nextTokenConstraints,
03064                      nextTokenConstraintsList) {
03065               if (validateNextTokenConstraints(cati, nextTokenConstraints)) {
03066                 nextTokenConstraintsPass = true;
03067                 break;
03068               }
03069             }
03070             if (nextTokenConstraintsPass)
03071               filteredResult[it.key()].insert(cati);
03072           }
03073         }
03074         return filteredResult;
03075       }
03076   }
03077 }
03078 
03081 
03101 ConstrainedMultiPredictions Parser::computeConstrainedMultiPredictions(
03102   const QList<StackItem> &stacks) const
03103 {
03104   ConstrainedMultiPredictions predictMulti;
03105   foreach (const StackItem &stack, stacks) {
03106     if (stack.type()) { // type 6 item
03107       /* We will define the literal as the full string of the P constraint here,
03108          which is very easy to obtain. This might not strictly be a literal in
03109          the sense of the definition, since it could be obtained from a whole
03110          tree of nonterminals, but it definitely always contains the literal (in
03111          the strict sense) containing the predicted category. */
03112       const StackItemType6 *data
03113         = static_cast<const StackItemType6 *>(stack.data());
03114       QList<Node> leaves = data->leaves();
03115       int i = data->i();
03116       QList<Cat> prediction, literal;
03117       int l = leaves.size();
03118       for (int j=i; j<l; j++)
03119         prediction.append(leaves.at(j).cat);
03120       literal = prediction;
03121       for (int j=i-1; j>=0; j--)
03122         literal.prepend(leaves.at(j).cat);
03123       ConstrainedMultiPrediction multiPrediction(literal, data->tree().cat);
03124       if (!predictMulti.contains(prediction, multiPrediction))
03125         predictMulti.insert(prediction, multiPrediction);
03126     } else { // type 0 item
03127       const StackItemType0 *data
03128         = static_cast<const StackItemType0 *>(stack.data());
03129       Cat cat = data->effCat();
03130       const QList<StackItem> &parents = data->parents();
03131       if (isToken(cat)) {
03132         if (parents.isEmpty()) { // huh, start category is a terminal?
03133           QList<Cat> list;
03134           list << cat;
03135           ConstrainedMultiPrediction multiPrediction(list, cat);
03136           if (!predictMulti.contains(list, multiPrediction))
03137             predictMulti.insert(list, multiPrediction);
03138         } else {
03139           foreach (const StackItem &parent, parents) {
03140             const StackItemType3 *parentData
03141               = static_cast<const StackItemType3 *>(parent.data());
03142             // validate the next token constraints immediately for tokens
03143             NextTokenConstraints nextTokenConstraints
03144               = parentData->nextTokenConstraints();
03145             if (!validateNextTokenConstraints(cat, nextTokenConstraints))
03146               continue;
03147             Rule rule = parentData->rule();
03148             int i = parentData->i();
03149             int len = 1;
03150             while (i+len < rule.size() && isToken(rule.at(i+len))) len++;
03151             QList<Cat> literal = rule.mid(i, len);
03152             while (i > 0 && isToken(rule.at(i-1))) i--, len++;
03153             ConstrainedMultiPrediction multiPrediction(rule.mid(i, len),
03154                                                        parentData->tree().cat);
03155             if (!predictMulti.contains(literal, multiPrediction))
03156               predictMulti.insert(literal, multiPrediction);
03157           }
03158         }
03159       } else {
03160         if (parents.isEmpty()) { // start category, no constraints
03161           QList<Cat> list;
03162           list << cat;
03163           ConstrainedMultiPrediction multiPrediction(list, cat);
03164           if (!predictMulti.contains(list, multiPrediction))
03165             predictMulti.insert(list, multiPrediction);
03166         } else {
03167           foreach (const StackItem &parent, parents) {
03168             const StackItemType3 *parentData
03169               = static_cast<const StackItemType3 *>(parent.data());
03170             NextTokenConstraints nextTokenConstraints
03171               = parentData->nextTokenConstraints();
03172             QList<Cat> list;
03173             list << cat;
03174             ConstrainedMultiPrediction multiPrediction(list, cat,
03175                                                        nextTokenConstraints);
03176             if (!predictMulti.contains(list, multiPrediction))
03177               predictMulti.insert(list, multiPrediction);
03178           }
03179         }
03180       }
03181     }
03182   }
03183   return predictMulti;
03184 }
03185 
03189 
03198 QHash<Cat, QSet<QList<Cat> > >
03199   Parser::expandNonterminalPredictionMultiC(CatArg cat,
03200   const NextTokenConstraints &nextTokenConstraints)
03201 {
03202   QHash<Cat, QSet<QList<Cat> > > result;
03203   if (isToken(cat))
03204     qWarning("trying to expand terminal prediction");
03205   else
03206     expandNonterminalPredictionMultiRecurseC(cat, result, QSet<Cat>(), -1,
03207                                              PseudoCatScope(),
03208                                              nextTokenConstraints);
03209   return result;
03210 }
03211 
03215 
03226 QHash<Cat, QSet<QList<Cat> > >
03227   Parser::expandNonterminalPredictionMultiC(CatArg cat,
03228   const QList<NextTokenConstraints> &nextTokenConstraintsList)
03229 {
03230   switch (nextTokenConstraintsList.size()) {
03231     case 0: // An empty disjunctive list is false, probably not what you want!
03232       qWarning("list of next token constraints is empty");
03233       return QHash<Cat, QSet<QList<Cat> > >();
03234     case 1: // common case, handle the next token constraints directly
03235       return expandNonterminalPredictionMultiC(cat,
03236                nextTokenConstraintsList.first());
03237     default: // general case, predict first, filter later
03238       {
03239         QHash<Cat, QSet<QList<Cat> > > result
03240           = expandNonterminalPredictionMultiC(cat);
03241         QHash<Cat, QSet<QList<Cat> > > filteredResult;
03242         QHashIterator<Cat, QSet<QList<Cat> > > it(result);
03243         while (it.hasNext()) {
03244           it.next();
03245           foreach (const QList<Cat> &literal, it.value()) {
03246             CatArg cati = literal.first();
03247             bool nextTokenConstraintsPass = false;
03248             foreach (const NextTokenConstraints &nextTokenConstraints,
03249                      nextTokenConstraintsList) {
03250               if (validateNextTokenConstraints(cati, nextTokenConstraints)) {
03251                 nextTokenConstraintsPass = true;
03252                 break;
03253               }
03254             }
03255             if (nextTokenConstraintsPass)
03256               filteredResult[it.key()].insert(literal);
03257           }
03258         }
03259         return filteredResult;
03260       }
03261   }
03262 }
03263 
03264 } // end namespace
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines