|
DynGenPar
Dynamic Generalized Parser
|
00001 /* DynGenPar: Dynamic Generalized Parser 00002 Copyright (C) 2010-2012 Kevin Kofler <kevin.kofler@chello.at> 00003 00004 Support by the Austrian Science Fund FWF under contract numbers 00005 P20631 and P23554 is gratefully acknowledged. 00006 00007 This program is free software: you can redistribute it and/or modify 00008 it under the terms of the GNU General Public License as published by 00009 the Free Software Foundation, either version 2 of the License, or 00010 (at your option) any later version. 00011 00012 This program is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 GNU General Public License for more details. 00016 00017 You should have received a copy of the GNU General Public License 00018 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 00019 00428 #include "dyngenpar.h" 00429 00431 uint qHash(const QList<DynGenPar::Cat> &list) { 00432 uint result = 0, i = 0; 00433 foreach (DynGenPar::CatArg cat, list) 00434 result += (++i) * qHash(cat); 00435 return result; 00436 } 00437 00438 namespace DynGenPar { 00439 00441 uint qHash(const NextTokenConstraints &nextTokenConstraints) 00442 { 00443 return (qHash(nextTokenConstraints.expect) << 4) 00444 + qHash(nextTokenConstraints.taboo); 00445 } 00446 00450 00453 static void parseTreeToPmcfgSyntaxTreeUnify(Node &dest, const Node &src) 00454 { 00455 if (src.cat != dest.cat || src.data != dest.data) 00456 qFatal("invalid parse tree: PMCFG unification failed (mismatched nodes)"); 00457 00458 // check for metavariables and do the trivial unification 00459 if (src.children.isEmpty()) return; 00460 if (dest.children.isEmpty()) { 00461 dest.children = src.children; 00462 return; 00463 } 00464 00465 int s = dest.children.size(); 00466 if (src.children.size() != s) 00467 qFatal("invalid parse tree: PMCFG unification failed (mismatched " 00468 "alternative counts)"); 00469 for (int i=0; i<s; i++) { 00470 const Alternative &srcAlternative = src.children.at(i); 00471 Alternative &destAlternative = dest.children[i]; 00472 int l = destAlternative.size(); 00473 if (srcAlternative.size() != l 00474 || srcAlternative.label() != destAlternative.label()) 00475 qFatal("invalid parse tree: PMCFG unification failed (mismatched " 00476 "alternatives)"); 00477 for (int j=0; j<l; j++) 00478 parseTreeToPmcfgSyntaxTreeUnify(destAlternative[j], srcAlternative.at(j)); 00479 } 00480 } 00481 00483 static void parseTreeToPmcfgSyntaxTreeRecurse(const Node &parseTree, 00484 Node &syntaxTree) 00485 { 00486 /* first copy the data (most useful if a token was passed as an argument to a 00487 PMCFG function, an extension to standard PMCFGs allowed by this 00488 implementation) */ 00489 syntaxTree.data = parseTree.data; 00490 bool isFirst = true; 00491 foreach(const Alternative &alternative, parseTree.children) { 00492 // create a new alternative if this is not the first one 00493 if (isFirst) 00494 isFirst = false; 00495 else 00496 syntaxTree.children.append(Alternative()); 00497 Alternative &newAlternative = syntaxTree.children.last(); 00498 00499 QVariant label = alternative.label(); 00500 if (label.canConvert<PmcfgComponentInfo>()) { 00501 PmcfgComponentInfo componentInfo = label.value<PmcfgComponentInfo>(); 00502 newAlternative.setLabel(componentInfo.pmcfgRule.label()); 00503 int numArgs = componentInfo.pmcfgRule.size(); 00504 for (int i=0; i<numArgs; i++) { 00505 CatArg arg = componentInfo.pmcfgRule.at(i); 00506 Node newChild(arg); 00507 const QVector<int> &argPositions = componentInfo.argPositions.at(i); 00508 if (argPositions.isEmpty()) 00509 newChild.children.clear(); // no alternative == metavariable 00510 else { 00511 parseTreeToPmcfgSyntaxTreeRecurse( 00512 alternative.at(argPositions.first()), newChild); 00513 int s = argPositions.size(); 00514 for (int i=1; i<s; i++) { 00515 Node tempChild(arg); 00516 parseTreeToPmcfgSyntaxTreeRecurse(alternative.at( 00517 argPositions.at(i)), tempChild); 00518 parseTreeToPmcfgSyntaxTreeUnify(newChild, tempChild); 00519 } 00520 } 00521 newAlternative.append(newChild); 00522 } 00523 } else newAlternative = alternative; // non-PMCFG category, copy parse tree 00524 } 00525 } 00526 00528 Node parseTreeToPmcfgSyntaxTree(const Node &parseTree) 00529 { 00530 Node syntaxTree(parseTree.cat); 00531 parseTreeToPmcfgSyntaxTreeRecurse(parseTree, syntaxTree); 00532 return syntaxTree; 00533 } 00534 00536 00539 bool Parser::isLiteral(const QList<Cat> &list) const 00540 { 00541 foreach (CatArg cat, list) 00542 if (!isToken(cat)) return false; 00543 return true; 00544 } 00545 00548 00552 Cat Parser::effectiveCat(CatArg cat) const 00553 { 00554 if (pseudoCats.contains(cat)) // convert pseudo-category 00555 return pseudoCats.value(cat).first; 00556 return cat; // not a pseudo-category, just return the original category 00557 } 00558 00561 00564 void Parser::initCaches(void) 00565 { 00566 // clear caches 00567 initialGraph.clear(); 00568 neighborhoods.clear(); 00569 nullable.clear(); 00570 epsilonMatches.clear(); 00571 00572 // search for nullable categories 00573 /* First consider only rules which directly expand to epsilon, i.e. with empty 00574 right hand side. */ 00575 QHashIterator<Cat, QList<Rule> > it(rules); 00576 while (it.hasNext()) { 00577 it.next(); 00578 Cat cat = it.key(); 00579 foreach (const Rule &rule, it.value()) { 00580 if (rule.isEmpty()) { 00581 nullable.insert(cat); 00582 break; 00583 } 00584 } 00585 } 00586 /* Now consider rules which expand to nullable categories, inductively until 00587 no new nullables are added. This works because we do not have to consider 00588 directly or indirectly recursive rules, because those can only expand to 00589 epsilon if the category can already expand to epsilon without that rule. */ 00590 unsigned newNullables; 00591 do { 00592 newNullables = 0u; 00593 it = rules; 00594 while (it.hasNext()) { 00595 it.next(); 00596 Cat cat = it.key(); 00597 if (nullable.contains(cat)) continue; // already nullable 00598 foreach (const Rule &rule, it.value()) { 00599 foreach (CatArg cati, rule) { 00600 if (!nullable.contains(effectiveCat(cati))) goto next_rule; 00601 } 00602 nullable.insert(cat); 00603 newNullables++; 00604 break; 00605 next_rule: 00606 ; 00607 } 00608 } 00609 } while (newNullables); 00610 00611 // now compute the initial graph 00612 it = rules; 00613 while (it.hasNext()) { 00614 it.next(); 00615 Cat cat = it.key(); 00616 // only keep the rule number if we need it, in order to allow unification 00617 bool keepRuleNumbers = componentCats.contains(cat); 00618 QList<Rule> ruleList = it.value(); 00619 int n = ruleList.size(); 00620 for (int i=0; i<n; i++) { 00621 /* For each nonempty rule (empty rules do not show up in the initial 00622 graph), we insert at least one edge from the first item in the rule 00623 to the left hand side, with 0 epsilons skipped. Then, as long as the 00624 first i categories are nullable and the (i+1)th category exists, we 00625 insert an edge with i epsilons skipped from the (i+1)th category to the 00626 left hand side. */ 00627 const Rule &rule = ruleList.at(i); 00628 QListIterator<Cat> it(rule); 00629 int epsilonsSkipped=0; 00630 while (it.hasNext()) { 00631 Cat cati = effectiveCat(it.next()); 00632 initialGraph.insert(cati, FullRule(cat, rule, epsilonsSkipped, 00633 keepRuleNumbers ? i : 0)); 00634 if (!nullable.contains(cati)) break; 00635 epsilonsSkipped++; 00636 } 00637 } 00638 } 00639 } 00640 00643 00648 void Parser::processRule(CatArg cat, const Rule &rule, int skip, int ruleno, 00649 QQueue<Cat> &nullableQueue, bool &clearEpsilonMatches) 00650 { 00651 /* Continue processing the rule where we stopped when building the original 00652 initial graph. (If the rule is new, start at position 0.) As long as the 00653 first i categories are nullable and the (i+1)th category exists, we insert 00654 an edge with i epsilons skipped from the (i+1)th category to the left hand 00655 side.*/ 00656 QListIterator<Cat> i(rule); 00657 int epsilonsSkipped=skip; 00658 while (i.hasNext()) { 00659 Cat cati = effectiveCat(i.next()); 00660 initialGraph.insert(cati, FullRule(cat, rule, epsilonsSkipped, ruleno)); 00661 if (!nullable.contains(cati)) break; 00662 epsilonsSkipped++; 00663 } 00664 if (epsilonsSkipped == rule.size()) { // !i.hasNext() is not sufficient! 00665 /* We reached the end of the rule, so the left hand side is now nullable 00666 and we have a new way to get epsilon matches. */ 00667 clearEpsilonMatches = true; 00668 if (!nullable.contains(cat) && !nullableQueue.contains(cat)) 00669 nullableQueue.enqueue(cat); 00670 } 00671 } 00672 00675 void Parser::addRule(CatArg cat, const Rule &rule) 00676 { 00677 // add the rule to the grammar 00678 QList<Rule> &ruleList = rules[cat]; 00679 // only keep the rule number if we need it, in order to allow unification 00680 int ruleno = componentCats.contains(cat) ? ruleList.size() : 0; 00681 ruleList << rule; 00682 00683 // update initialGraph and nullable 00684 QQueue<Cat> nullableQueue; // new nullable categories 00685 bool clearEpsilonMatches = false; 00686 /* first process the rule we just added: 00687 Add the new edges, if any, to the initial graph, and mark the category as 00688 nullable if the rule can match the empty string. */ 00689 processRule(cat, rule, 0, ruleno, nullableQueue, clearEpsilonMatches); 00690 // now handle categories which have become nullable 00691 while (!nullableQueue.isEmpty()) { 00692 Cat newNullable = nullableQueue.dequeue(); 00693 // mark the category as nullable 00694 nullable.insert(newNullable); 00695 /* now update the initial graph: 00696 We need to go through all the edges going out of this category in the 00697 initial graph and process the rule starting from where we stopped when 00698 building the initial graph (due to the non-nullable category). If we 00699 reach the end of the rule, the left hand side has become nullable as 00700 well, and is added to the queue if it wasn't already nullable. */ 00701 foreach (const FullRule &rule, initialGraph.values(newNullable)) 00702 processRule(rule.cat, rule.rule, rule.epsilonsSkipped+1, rule.ruleno, 00703 nullableQueue, clearEpsilonMatches); 00704 } 00705 if (clearEpsilonMatches) epsilonMatches.clear(); 00706 00707 /* FIXME: Can we update the cache of neighborhoods instead of clearing it? 00708 And if we can, is it worth it? For now, just clear the cache. */ 00709 neighborhoods.clear(); 00710 } 00711 00713 00714 bool Parser::computePmcfgDimension(CatArg cat, const Rule &rule, 00715 const Pmcfg &pmcfg) 00716 { 00717 // look the function up 00718 Function function = pmcfg.lookupFunction(rule.label()); 00719 /* check if the dimension is consistent with existing rules for the category, 00720 if any; generate a list of components if we don't have any yet */ 00721 int dim = function.size(); 00722 if (!dim) { 00723 qWarning("invalid PMCFG function"); 00724 return false; // invalid function 00725 } 00726 00727 if (dim == 1) { 00728 if (catComponents.contains(cat)) { 00729 qWarning("dimension mismatch: 1D function for multidimensional category"); 00730 return false; // dimension mismatch 00731 } 00732 } else { 00733 if (rules.contains(cat)) { 00734 qWarning("dimension mismatch: multidimensional function for 1D category"); 00735 return false; // dimension mismatch 00736 } 00737 if (catComponents.contains(cat)) { 00738 if (catComponents.value(cat).size() != dim) { 00739 qWarning("dimension mismatch: %dD function for %dD category", dim, 00740 catComponents.value(cat).size()); 00741 return false; // dimension mismatch 00742 } 00743 } else { // not known yet, we need to create a mapping 00744 QList<Cat> components; 00745 for (int i=0; i<dim; i++) { 00746 // generate an internal category for the component 00747 #ifdef DYNGENPAR_INTEGER_CATEGORIES 00748 Cat component = generateCat(); 00749 #else 00750 Cat component = QString("%1[%2]").arg(cat).arg(i); 00751 #endif 00752 components.append(component); 00753 componentCats.insert(component, qMakePair(cat, i)); 00754 } 00755 catComponents.insert(cat, components); 00756 } 00757 } 00758 return true; 00759 } 00760 00762 00763 bool Parser::convertPmcfgRule(CatArg cat, const Rule &rule, const Pmcfg &pmcfg, 00764 bool updateCaches) 00765 { 00766 // look the function up 00767 Function function = pmcfg.lookupFunction(rule.label()); 00768 /* get the list of components to use (assume that the dimensions have been 00769 already validated by computePmcfgDimension) */ 00770 int dim = function.size(); 00771 if (!dim) { 00772 qWarning("invalid PMCFG function"); 00773 return false; // invalid function 00774 } 00775 00776 QList<Cat> components; 00777 if (dim == 1) 00778 components.append(cat); 00779 else 00780 components = catComponents.value(cat); 00781 00782 // determine the number of used arguments of the function 00783 int numArgs = 0; 00784 foreach(const Sequence &sequence, function) { 00785 foreach(const Term &term, sequence) { 00786 if (term.isComponent() && term.arg >= numArgs) 00787 numArgs = term.arg + 1; 00788 } 00789 } 00790 00791 // check that enough arguments were passed to the function 00792 if (rule.size() < numArgs) { 00793 qWarning("not enough arguments for PMCFG function"); 00794 return false; 00795 } 00796 00797 // find the components of the arguments passed 00798 QVector<QList<Cat> > argComponents(numArgs); 00799 for (int i=0; i<numArgs; i++) { 00800 CatArg arg = rule.at(i); 00801 if (catComponents.contains(arg)) 00802 argComponents[i] = catComponents.value(arg); 00803 else if (pmcfg.rules.contains(arg) || pmcfg.cfRules.contains(arg) 00804 || pmcfg.tokens.contains(arg)) 00805 argComponents[i].append(arg); 00806 else 00807 /* unused category, ignore the unreachable rule 00808 We do not know the correct dimension for the unused category. */ 00809 return true; 00810 } 00811 00812 // verify the dimensions of the arguments passed 00813 foreach(const Sequence &sequence, function) { 00814 foreach(const Term &term, sequence) { 00815 if (term.isComponent() 00816 && term.component >= argComponents.at(term.arg).size()) { 00817 qWarning("dimension mismatch: attempt to use component %d of a %dD " 00818 "category", term.component, argComponents.at(term.arg).size()); 00819 return false; // dimension mismatch 00820 } 00821 } 00822 } 00823 00824 // determine the number of times each argument is used 00825 QVector<int> usageCounts(numArgs); 00826 foreach(const Sequence &sequence, function) { 00827 foreach(const Term &term, sequence) { 00828 if (term.isComponent()) 00829 usageCounts[term.arg]++; 00830 } 00831 } 00832 00833 // generate pseudo-categories 00834 QVector<QHash<int, Cat> > newPseudoCats(numArgs); 00835 foreach(const Sequence &sequence, function) { 00836 foreach(const Term &term, sequence) { 00837 if (term.isComponent() && usageCounts.at(term.arg) > 1) { 00838 if (!newPseudoCats[term.arg].contains(term.component)) { 00839 #ifdef DYNGENPAR_INTEGER_CATEGORIES 00840 Cat pseudoCat = generateCat(); 00841 #else 00842 Cat pseudoCat = QString("Pseudo%1").arg(generateCat()); 00843 #endif 00844 newPseudoCats[term.arg].insert(term.component, pseudoCat); 00845 } 00846 } 00847 } 00848 } 00849 00850 // record pseudo-categories 00851 for (int i=0; i<numArgs; i++) { 00852 const QHash<int, Cat> &pseudoCatHash = newPseudoCats.at(i); 00853 if (!pseudoCatHash.isEmpty()) { 00854 QList<Cat> pseudoCatList = pseudoCatHash.values(); 00855 QHashIterator<int, Cat> it(pseudoCatHash); 00856 while (it.hasNext()) { 00857 it.next(); 00858 Cat pseudoCat = it.value(); 00859 pseudoCats.insert(pseudoCat, qMakePair(argComponents.at(i).at(it.key()), 00860 pseudoCatList)); 00861 } 00862 } 00863 } 00864 00865 // now finish evaluating the function and record the converted rule 00866 for (int i=0; i<dim; i++) { 00867 const Sequence &sequence = function.at(i); 00868 PmcfgComponentInfo componentInfo(rule); 00869 int s = sequence.size(); 00870 for (int j=0; j<s; j++) { 00871 const Term &term = sequence.at(j); 00872 if (term.isComponent()) 00873 componentInfo.argPositions[term.arg].append(j); 00874 } 00875 Rule internalRule(QVariant::fromValue(componentInfo)); 00876 // copy next token constraints from the PMCFG sequence 00877 internalRule.nextTokenConstraints = sequence.nextTokenConstraints; 00878 /* Also try to use next token constraints from the PMCFG rule. That's not a 00879 good place to write them because the constraints will affect all 00880 components of a multi-dimensional rule, which is probably not what you 00881 want. But at least for 1D rules, it makes sense, and it costs us almost 00882 nothing to support this. (Appending an empty list is trivial.) */ 00883 internalRule.nextTokenConstraints.expect.append( 00884 rule.nextTokenConstraints.expect); 00885 internalRule.nextTokenConstraints.taboo.append( 00886 rule.nextTokenConstraints.taboo); 00887 foreach (const Term &term, sequence) 00888 internalRule.append(term.isToken() ? term.token 00889 : newPseudoCats.at(term.arg).isEmpty() 00890 ? argComponents.at(term.arg).at(term.component) 00891 : newPseudoCats.at(term.arg).value(term.component)); 00892 00893 CatArg catComponent = components.at(i); 00894 if (updateCaches) 00895 addRule(catComponent, internalRule); 00896 else 00897 rules[catComponent].append(internalRule); 00898 } 00899 00900 return true; 00901 } 00902 00904 00913 bool Parser::loadPmcfg(const Pmcfg &pmcfg) 00914 { 00915 // copy tokens, startCat and cfRules 00916 tokens = pmcfg.tokens; 00917 startCat = pmcfg.startCat; 00918 rules = pmcfg.cfRules; 00919 00920 // convert functions and PMCFG rules to rules and pseudo-categories 00921 pseudoCats.clear(); 00922 componentCats.clear(); 00923 catComponents.clear(); 00924 { 00925 QHashIterator<Cat, QList<Rule> > it(pmcfg.rules); 00926 while (it.hasNext()) { 00927 Cat cat = it.next().key(); 00928 foreach (const Rule &rule, it.value()) 00929 if (!computePmcfgDimension(cat, rule, pmcfg)) return false; 00930 } 00931 } 00932 { 00933 QHashIterator<Cat, QList<Rule> > it(pmcfg.rules); 00934 while (it.hasNext()) { 00935 Cat cat = it.next().key(); 00936 foreach (const Rule &rule, it.value()) 00937 if (!convertPmcfgRule(cat, rule, pmcfg, false)) return false; 00938 } 00939 } 00940 00941 // initialize the initial graph and the caches 00942 initCaches(); 00943 return true; 00944 } 00945 00949 00955 bool Parser::addPmcfgRule(Pmcfg &pmcfg, CatArg cat, const Rule &rule) 00956 { 00957 if (!computePmcfgDimension(cat, rule, pmcfg) 00958 || !convertPmcfgRule(cat, rule, pmcfg, true)) return false; 00959 pmcfg.rules[cat].append(rule); 00960 return true; 00961 } 00962 00964 00969 bool Parser::reachable(CatArg cat, CatArg target, QSet<Cat> mark) 00970 { 00971 if (cat == target) return true; 00972 mark.insert(cat); 00973 foreach (const FullRule &rule, initialGraph.values(cat)) { 00974 if (mark.contains(rule.cat)) continue; 00975 if (reachable(rule.cat, target, mark)) return true; 00976 } 00977 return false; 00978 } 00979 00981 00985 QList<FullRule> Parser::neighborhood(CatArg cat, CatArg target) 00986 { 00987 // use cached value if we have it 00988 QPair<Cat, Cat> key(cat, target); 00989 if (neighborhoods.contains(key)) return neighborhoods.value(key); 00990 00991 QList<FullRule> result; 00992 foreach (const FullRule &rule, initialGraph.values(cat)) { 00993 if (reachable(rule.cat, target, QSet<Cat>())) result.append(rule); 00994 } 00995 // cache the result for future reuse 00996 neighborhoods.insert(key, result); 00997 return result; 00998 } 00999 01001 void Parser::finalizeMatches(QList<Match> &matches, CatArg cat, 01002 const PseudoCatScope &scope) 01003 { 01004 int s = matches.size(); 01005 for (int i=0; i<s; i++) { 01006 Match &m = matches[i]; 01007 PseudoCatScope childScope = m.scope; 01008 m.scope = scope; 01009 m.scope.pConstraints().insert(cat, 01010 qMakePair(qMakePair(m.tree, m.nextTokenConstraints), 0)); 01011 foreach (CatArg cati, pseudoCats.value(cat).second) 01012 m.scope.mcfgConstraints().insert(cati, qMakePair(m.ruleno, childScope)); 01013 m.ruleno = 0; // We don't need ruleno anymore. 01014 } 01015 } 01016 01018 void Parser::copyScope(QList<Match> &matches, const PseudoCatScope &scope) 01019 { 01020 if (scope.isNull()) return; // don't bother copying a null scope 01021 int s = matches.size(); 01022 for (int i=0; i<s; i++) 01023 matches[i].scope = scope; 01024 } 01025 01027 QList<Match> Parser::matchCFToEpsilon(CatArg cat, QSet<Cat> mark) 01028 { 01029 QList<Match> result; 01030 bool haveNextTokenConstraints = false; 01031 01032 mark.insert(cat); 01033 bool isFirst = true; 01034 foreach (const Rule &rule, rules.value(cat)) { 01035 // only consider nullable rules with only unmarked categories 01036 foreach (CatArg cati, rule) { 01037 if (!nullable.contains(cati) || mark.contains(cati)) goto next_rule; 01038 } 01039 { 01040 QList<Match> currentMatches; 01041 Node node(cat); 01042 node.children.first().setLabel(rule.label()); 01043 currentMatches.append(Match(0, node, 0, PseudoCatScope())); 01044 if (!rule.nextTokenConstraints.expect.isEmpty()) { 01045 haveNextTokenConstraints = true; 01046 currentMatches.first().nextTokenConstraints.expect = 01047 rule.nextTokenConstraints.expect; 01048 } 01049 if (!rule.nextTokenConstraints.taboo.isEmpty()) { 01050 haveNextTokenConstraints = true; 01051 currentMatches.first().nextTokenConstraints.taboo = 01052 rule.nextTokenConstraints.taboo; 01053 } 01054 foreach (CatArg cati, rule) { 01055 int s = currentMatches.size(); 01056 for (int i=0; i<s; ) { 01057 Match &m = currentMatches[i]; 01058 const QList<Match> componentMatches 01059 = matchToEpsilonRecurse(cati, mark, m.scope); 01060 if (componentMatches.isEmpty()) { 01061 if (i) currentMatches.swap(0, i); 01062 currentMatches.removeFirst(); 01063 s--; 01064 } else { 01065 int cs = componentMatches.size(); 01066 for (int j=1; j<cs; j++) { 01067 const Match &cm = componentMatches.at(j); 01068 Node newTree = m.tree; 01069 newTree.children.first().append(cm.tree); 01070 currentMatches.append(Match(m.len + cm.len, newTree, 0, cm.scope, 01071 m.nextTokenConstraints)); 01072 if (!cm.nextTokenConstraints.expect.isEmpty()) { 01073 haveNextTokenConstraints = true; 01074 currentMatches.last().nextTokenConstraints.expect.append( 01075 cm.nextTokenConstraints.expect); 01076 } 01077 if (!cm.nextTokenConstraints.taboo.isEmpty()) { 01078 haveNextTokenConstraints = true; 01079 currentMatches.last().nextTokenConstraints.taboo.append( 01080 cm.nextTokenConstraints.taboo); 01081 } 01082 } 01083 const Match &cm = componentMatches.first(); 01084 m.len += cm.len; 01085 m.tree.children.first().append(cm.tree); 01086 m.scope = cm.scope; 01087 if (!cm.nextTokenConstraints.expect.isEmpty()) { 01088 haveNextTokenConstraints = true; 01089 m.nextTokenConstraints.expect.append( 01090 cm.nextTokenConstraints.expect); 01091 } 01092 if (!cm.nextTokenConstraints.taboo.isEmpty()) { 01093 haveNextTokenConstraints = true; 01094 m.nextTokenConstraints.taboo.append( 01095 cm.nextTokenConstraints.taboo); 01096 } 01097 i++; 01098 } 01099 } 01100 } 01101 if (currentMatches.isEmpty()) goto next_rule; 01102 if (haveNextTokenConstraints) { 01103 /* We cannot just unify all the matches to one in this case. We will run 01104 the full unification process at the end. But clear the scopes. */ 01105 int s = currentMatches.size(); 01106 for (int i=0; i<s; i++) currentMatches[i].scope = PseudoCatScope(); 01107 result.append(currentMatches); 01108 goto next_rule; 01109 } 01110 /* unify the matches: 01111 * if this is the first matching rule, make the first match the current 01112 result 01113 * otherwise, append the new alternative(s) from the first match to the 01114 current result 01115 * if there is more than one match, also append the alternative(s) from 01116 the additional matches */ 01117 int i=0, s=currentMatches.size(); 01118 if (isFirst) { 01119 isFirst = false; 01120 result.append(currentMatches.first()); 01121 result.first().scope = PseudoCatScope(); // clear scope 01122 i++; 01123 } 01124 Match &firstResult = result.first(); 01125 for (; i<s; i++) 01126 firstResult.tree.children.append(currentMatches.at(i).tree.children); 01127 } 01128 next_rule: 01129 ; 01130 } 01131 /* unify the result now (as much as possible) if we have next token 01132 constraints, otherwise it is already unified to a single match */ 01133 if (haveNextTokenConstraints) unify(result); 01134 return result; 01135 } 01136 01139 QList<Match> Parser::matchEffectiveCatToEpsilon(CatArg cat, QSet<Cat> mark) 01140 { 01141 QList<Match> result; 01142 01143 /* if the effective category is 1-dimensional, don't bother about rule numbers 01144 and child scopes - they are not needed for P constraints (the only ones we 01145 can have on pseudo-categories for 1-dimensional effective categories) */ 01146 if (!componentCats.contains(cat)) 01147 return matchCFToEpsilon(cat, mark); 01148 01149 mark.insert(cat); 01150 QList<Rule> ruleList = rules.value(cat); 01151 int s = ruleList.size(); 01152 for (int i=0; i<s; i++) { 01153 const Rule &rule = ruleList.at(i); 01154 // only consider nullable rules with only unmarked categories 01155 foreach (CatArg cati, rule) { 01156 if (!nullable.contains(cati) || mark.contains(cati)) goto next_rule; 01157 } 01158 { 01159 QList<Match> currentMatches; 01160 Node node(cat); 01161 node.children.first().setLabel(rule.label()); 01162 currentMatches.append(Match(0, node, i, PseudoCatScope(), 01163 rule.nextTokenConstraints)); 01164 foreach (CatArg cati, rule) { 01165 int s = currentMatches.size(); 01166 for (int i=0; i<s; ) { 01167 Match &m = currentMatches[i]; 01168 const QList<Match> componentMatches 01169 = matchToEpsilonRecurse(cati, mark, m.scope); 01170 if (componentMatches.isEmpty()) { 01171 if (i) currentMatches.swap(0, i); 01172 currentMatches.removeFirst(); 01173 s--; 01174 } else { 01175 int cs = componentMatches.size(); 01176 for (int j=1; j<cs; j++) { 01177 const Match &cm = componentMatches.at(j); 01178 Node newTree = m.tree; 01179 newTree.children.first().append(cm.tree); 01180 currentMatches.append(Match(m.len + cm.len, newTree, m.ruleno, 01181 cm.scope, m.nextTokenConstraints)); 01182 currentMatches.last().nextTokenConstraints.expect.append( 01183 cm.nextTokenConstraints.expect); 01184 currentMatches.last().nextTokenConstraints.taboo.append( 01185 cm.nextTokenConstraints.taboo); 01186 } 01187 const Match &cm = componentMatches.first(); 01188 m.len += cm.len; 01189 m.tree.children.first().append(cm.tree); 01190 m.scope = cm.scope; 01191 m.nextTokenConstraints.expect.append( 01192 cm.nextTokenConstraints.expect); 01193 m.nextTokenConstraints.taboo.append(cm.nextTokenConstraints.taboo); 01194 i++; 01195 } 01196 } 01197 } 01198 // in this case, we cannot unify, so just append the matches to the result 01199 result.append(currentMatches); 01200 } 01201 next_rule: 01202 ; 01203 } 01204 return result; 01205 } 01206 01208 QList<Match> Parser::matchToEpsilonRecurse(CatArg cat, QSet<Cat> mark, 01209 const PseudoCatScope &scope) 01210 { 01211 QList<Match> result; 01212 01213 if (scope.hasPConstraint(cat)) { 01214 // handle P constraint 01215 QPair<QPair<Node, NextTokenConstraints>, int> pConstraint 01216 = scope.pConstraint(cat); 01217 if (!pConstraint.second) // only matches if length == 0 01218 result.append(Match(0, pConstraint.first.first, 0, scope, 01219 pConstraint.first.second)); 01220 return result; 01221 } 01222 01223 // if this is a pseudo-category, it is not known yet, so get the effective 01224 // category (if this is not a pseudo-category, effCat is cat itself) 01225 Cat effCat = effectiveCat(cat); 01226 01227 if (effCat != cat) { // cat is a pseudo-category 01228 if (mark.contains(effCat)) return result; // prevent infinite recursion 01229 mark.insert(cat); 01230 01231 if (scope.hasMcfgConstraint(cat)) { 01232 // handle MCFG constraint 01233 QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat); 01234 const Rule &rule = rules.value(effCat).at(mcfgConstraint.first); 01235 /* check if we can (possibly - there could be other PMCFG constraints 01236 preventing it) produce epsilon with this rule, and make sure the 01237 categories in the rule are not marked to prevent infinite recursion */ 01238 foreach (CatArg cati, rule) { 01239 if (!nullable.contains(cati) || mark.contains(cati)) return result; 01240 } 01241 Node node(effCat); 01242 node.children.first().setLabel(rule.label()); 01243 result.append(Match(0, node, mcfgConstraint.first, scope, 01244 rule.nextTokenConstraints)); 01245 foreach (CatArg cati, rule) { 01246 int s = result.size(); 01247 for (int i=0; i<s; ) { 01248 Match &m = result[i]; 01249 const QList<Match> componentMatches 01250 = matchToEpsilonRecurse(cati, mark, m.scope); 01251 if (componentMatches.isEmpty()) { 01252 if (i) result.swap(0, i); 01253 result.removeFirst(); 01254 s--; 01255 } else { 01256 int cs = componentMatches.size(); 01257 for (int j=1; j<cs; j++) { 01258 const Match &cm = componentMatches.at(j); 01259 Node newTree = m.tree; 01260 newTree.children.first().append(cm.tree); 01261 result.append(Match(m.len + cm.len, newTree, m.ruleno, 01262 cm.scope, m.nextTokenConstraints)); 01263 result.last().nextTokenConstraints.expect.append( 01264 cm.nextTokenConstraints.expect); 01265 result.last().nextTokenConstraints.taboo.append( 01266 cm.nextTokenConstraints.taboo); 01267 } 01268 const Match &cm = componentMatches.first(); 01269 m.len += cm.len; 01270 m.tree.children.first().append(cm.tree); 01271 m.scope = cm.scope; 01272 m.nextTokenConstraints.expect.append( 01273 cm.nextTokenConstraints.expect); 01274 m.nextTokenConstraints.taboo.append(cm.nextTokenConstraints.taboo); 01275 i++; 01276 } 01277 } 01278 } 01279 } else // no PMCFG constraints on this pseudo-category 01280 result = matchEffectiveCatToEpsilon(effCat, mark); 01281 01282 finalizeMatches(result, cat, scope); 01283 } else { // cat is a true category 01284 result = matchCFToEpsilon(cat, mark); 01285 copyScope(result, scope); 01286 } 01287 01288 return result; 01289 } 01290 01292 01297 QList<Match> Parser::matchToEpsilon(CatArg cat, const PseudoCatScope &scope) 01298 { 01299 QList<Match> result; 01300 01301 if (scope.hasPConstraint(cat)) { 01302 // handle P constraint 01303 QPair<QPair<Node, NextTokenConstraints>, int> pConstraint 01304 = scope.pConstraint(cat); 01305 if (!pConstraint.second) // only matches if length == 0 01306 result.append(Match(0, pConstraint.first.first, 0, scope, 01307 pConstraint.first.second)); 01308 return result; 01309 } 01310 01311 // if this is a pseudo-category, it is not known yet, so get the effective 01312 // category (if this is not a pseudo-category, effCat is cat itself) 01313 Cat effCat = effectiveCat(cat); 01314 01315 if (effCat != cat) { // cat is a pseudo-category 01316 if (scope.hasMcfgConstraint(cat)) { 01317 // handle MCFG constraint 01318 QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat); 01319 const Rule &rule = rules.value(effCat).at(mcfgConstraint.first); 01320 /* check if we can (possibly - there could be other PMCFG constraints 01321 preventing it) produce epsilon with this rule */ 01322 foreach (CatArg cati, rule) { 01323 if (!nullable.contains(cati)) return result; 01324 } 01325 Node node(effCat); 01326 node.children.first().setLabel(rule.label()); 01327 result.append(Match(0, node, mcfgConstraint.first, scope, 01328 rule.nextTokenConstraints)); 01329 foreach (CatArg cati, rule) { 01330 int s = result.size(); 01331 for (int i=0; i<s; ) { 01332 Match &m = result[i]; 01333 const QList<Match> componentMatches = matchToEpsilon(cati, m.scope); 01334 if (componentMatches.isEmpty()) { 01335 if (i) result.swap(0, i); 01336 result.removeFirst(); 01337 s--; 01338 } else { 01339 int cs = componentMatches.size(); 01340 for (int j=1; j<cs; j++) { 01341 const Match &cm = componentMatches.at(j); 01342 Node newTree = m.tree; 01343 newTree.children.first().append(cm.tree); 01344 result.append(Match(m.len + cm.len, newTree, m.ruleno, 01345 cm.scope, m.nextTokenConstraints)); 01346 result.last().nextTokenConstraints.expect.append( 01347 cm.nextTokenConstraints.expect); 01348 result.last().nextTokenConstraints.taboo.append( 01349 cm.nextTokenConstraints.taboo); 01350 } 01351 const Match &cm = componentMatches.first(); 01352 m.len += cm.len; 01353 m.tree.children.first().append(cm.tree); 01354 m.scope = cm.scope; 01355 m.nextTokenConstraints.expect.append( 01356 cm.nextTokenConstraints.expect); 01357 m.nextTokenConstraints.taboo.append(cm.nextTokenConstraints.taboo); 01358 i++; 01359 } 01360 } 01361 } 01362 } else { // no PMCFG constraints on this pseudo-category 01363 // We cannot cache the result here because the scopes can change. 01364 /* FIXME: Well, we could cache the result with a dummy scope and then 01365 compute the union. Is that worth it? */ 01366 result = matchEffectiveCatToEpsilon(effCat, QSet<Cat>()); 01367 } 01368 01369 finalizeMatches(result, cat, scope); 01370 } else { // cat is a true category 01371 // memoize the matches for efficiency 01372 // FIXME: this will probably need to get smarter (or disabled) for 01373 // context-sensitive constraints 01374 if (epsilonMatches.contains(cat)) 01375 result = epsilonMatches.value(cat); 01376 else { 01377 result = matchCFToEpsilon(cat, QSet<Cat>()); 01378 epsilonMatches.insert(cat, result); 01379 } 01380 copyScope(result, scope); 01381 } 01382 01383 return result; 01384 } 01385 01387 bool Parser::matchesTokenRecurse(CatArg cat, CatArg token, QSet<Cat> mark) const 01388 { 01389 // Handle the trivial case first. 01390 if (isToken(cat)) return cat == token; 01391 01392 // Now cat is a nonterminal. 01393 mark.insert(cat); 01394 foreach (const Rule &rule, rules.value(cat)) { 01395 // an epsilon rule cannot match a token 01396 if (rule.isEmpty()) continue; 01397 // for each element of the rule, try matching that element to the token and 01398 // the rest to epsilon 01399 int l = rule.size(); 01400 for (int i=0; i<l; i++) { 01401 CatArg cati = rule.at(i); 01402 /* Left recursion cannot match a token if it doesn't match without the 01403 recursion because tokens are atomic. */ 01404 if (mark.contains(cati)) continue; // ignore left recursion 01405 for (int j=0; j<l; j++) { 01406 if (i == j) continue; 01407 if (!nullable.contains(rule.at(j))) goto skip_this_i; // match epsilon 01408 } 01409 if (matchesTokenRecurse(cati, token, mark)) return true; // match token 01410 skip_this_i: ; 01411 } 01412 } 01413 01414 // None of the rules matched. 01415 return false; 01416 } 01417 01419 01428 bool Parser::matchesToken(CatArg cat, CatArg token) const 01429 { 01430 return matchesTokenRecurse(cat, token, QSet<Cat>()); 01431 } 01432 01434 01441 void Parser::collectLeaves(const Node &tree, QList<Node> &leaves) 01442 { 01443 if (isToken(tree.cat)) 01444 leaves.append(tree); 01445 else { 01446 // consider only the first alternative - they must all match the same tokens 01447 foreach (const Node &node, tree.children.first()) 01448 collectLeaves(node, leaves); 01449 } 01450 } 01451 01453 bool Parser::validateNextTokenConstraints(CatArg token, 01454 const NextTokenConstraints &nextTokenConstraints) const 01455 { 01456 foreach(CatArg expect, nextTokenConstraints.expect) 01457 if (!matchesToken(expect, token)) return false; 01458 foreach(CatArg taboo, nextTokenConstraints.taboo) 01459 if (matchesToken(taboo, token)) return false; 01460 return true; 01461 } 01462 01464 01470 QList<Match> Parser::match(CatArg cat, int pos, const PseudoCatScope &scope, 01471 const StackItem &stack, 01472 const NextTokenConstraints &nextTokenConstraints) 01473 { 01474 QList<Match> result; 01475 01476 if (scope.hasPConstraint(cat)) { 01477 // handle P constraint 01478 QPair<QPair<Node, NextTokenConstraints>, int> pConstraint 01479 = scope.pConstraint(cat); 01480 if (pConstraint.second) { // length != 0 01481 QList<Node> leaves; 01482 collectLeaves(pConstraint.first.first, leaves); 01483 /* check the next token constraints right now, it's no use keeping the 01484 type 6 item just to have the shift throw it away, and this way we don't 01485 have to keep 2 sets of next token constraints in the type 6 item (one 01486 for the first token and one for the next token after the last) */ 01487 if (validateNextTokenConstraints(leaves.first().cat, 01488 nextTokenConstraints)) { 01489 StackItem type6(stack, leaves, 0, pConstraint.first.first, scope, 01490 pConstraint.first.second); 01491 nextStacks.append(type6); 01492 } 01493 } else { // length == 0 01494 NextTokenConstraints newNextTokenConstraints = nextTokenConstraints; 01495 newNextTokenConstraints.expect.append(pConstraint.first.second.expect); 01496 newNextTokenConstraints.taboo.append(pConstraint.first.second.taboo); 01497 result.append(Match(0, pConstraint.first.first, 0, scope, 01498 newNextTokenConstraints)); 01499 } 01500 return result; 01501 } 01502 01503 // if this is a pseudo-category, get the effective category (if this is not a 01504 // pseudo-category, effCat is cat itself) 01505 Cat effCat = effectiveCat(cat); 01506 01507 if (scope.hasMcfgConstraint(cat)) { 01508 // handle MCFG constraint 01509 QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat); 01510 const Rule &rule = rules.value(effCat).at(mcfgConstraint.first); 01511 01512 // perform a top-down expansion of rule 01513 { 01514 StackItem type5(stack, cat, scope); 01515 Node node(effCat); 01516 node.children.first().setLabel(rule.label()); 01517 result = matchRemaining(rule, 0, pos, 0, node, mcfgConstraint.second, 01518 mcfgConstraint.first, type5, 01519 nextTokenConstraints); 01520 } 01521 01522 finalizeMatches(result, cat, scope); 01523 } else { // no PMCFG constraints on this category or pseudo-category 01524 // try epsilon matches first 01525 if (nullable.contains(effCat)) { 01526 result = matchToEpsilon(cat, scope); 01527 if (!nextTokenConstraints.expect.isEmpty() 01528 || !nextTokenConstraints.taboo.isEmpty()) { 01529 int s = result.size(); 01530 for (int i=0; i<s; i++) { 01531 NextTokenConstraints &newNextTokenConstraints 01532 = result[i].nextTokenConstraints; 01533 newNextTokenConstraints.expect.append(nextTokenConstraints.expect); 01534 newNextTokenConstraints.taboo.append(nextTokenConstraints.taboo); 01535 } 01536 } 01537 } 01538 01539 // now we want a nonempty match 01540 // prepare the parse stack for the next shift 01541 /* We do not store the next token constraints in the type 0 item because 01542 the type 3 parent already carries them, and they can be different for the 01543 different type 3 parents in the DAG-structured stack. */ 01544 if (stack.type() < 0) { // toplevel match 01545 StackItem type0(QList<StackItem>(), cat, effCat, pos, scope); 01546 nextStacks.append(type0); 01547 } else { // subordinate match, i.e. nonempty stack 01548 // memoize type 0 item indices to build a graph-structured stack 01549 if (scope.isNull() && type0Indices.contains(cat)) 01550 nextStacks[type0Indices.value(cat)].addParent(stack); 01551 else { 01552 QList<StackItem> parents; 01553 parents << stack; 01554 StackItem type0(parents, cat, effCat, pos, scope); 01555 if (scope.isNull()) type0Indices.insert(cat, nextStacks.size()); 01556 nextStacks.append(type0); 01557 } 01558 } 01559 } 01560 01561 // return the result (only epsilon matches, the rest is deferred) 01562 return result; 01563 } 01564 01566 01570 QList<Match> Parser::matchRemaining(const Rule &rule, int len, int curr, int i, 01571 const Node &tree, 01572 const PseudoCatScope &scope, int ruleno, 01573 const StackItem &stack, 01574 const NextTokenConstraints 01575 &nextTokenConstraints) 01576 { 01577 QList<Match> result; 01578 if (i < rule.size()) { 01579 QList<Match> matches; 01580 { 01581 StackItem type3(stack, rule, len, curr, i, tree, scope, ruleno, 01582 nextTokenConstraints); 01583 matches = match(rule.at(i), curr, scope, type3, nextTokenConstraints); 01584 } 01585 foreach (const Match &m, matches) { 01586 Node newTree = tree; 01587 newTree.children.first().append(m.tree); 01588 result.append(matchRemaining(rule, len+m.len, curr+m.len, i+1, newTree, 01589 m.scope, ruleno, stack, 01590 m.nextTokenConstraints)); 01591 } 01592 } else { 01593 if (rule.action) { 01594 ActionInfo info(tree); 01595 rule.action->execute(info); 01596 } 01597 NextTokenConstraints newNextTokenConstraints = nextTokenConstraints; 01598 newNextTokenConstraints.expect.append(rule.nextTokenConstraints.expect); 01599 newNextTokenConstraints.taboo.append(rule.nextTokenConstraints.taboo); 01600 result.append(Match(len, tree, ruleno, scope, newNextTokenConstraints)); 01601 } 01602 return result; 01603 } 01604 01606 01609 Cat Parser::findFirstToken(const Node &tree) 01610 { 01611 CatArg cat = tree.cat; 01612 if (isToken(cat)) 01613 return cat; 01614 else { 01615 // consider only the first alternative - they must all match the same tokens 01616 foreach (const Node &node, tree.children.first()) { 01617 Cat result = findFirstToken(node); 01618 if (!IS_EPSILON(result)) return result; 01619 } 01620 return Cat(); // This tree matches epsilon. 01621 } 01622 } 01623 01626 void Parser::unify(QList<Match> &matches) 01627 { 01628 int l = matches.size(); 01629 if (l > 1) { 01630 /* For plain CFGs, we'd only need to match length and category here. We add 01631 the rule number and the scope for PMCFG matches and the next token 01632 constraints for scannerless parsing support. */ 01633 QHash<QPair<QPair<QPair<int, Cat>, QPair<int, PseudoCatScope> >, 01634 NextTokenConstraints>, int> 01635 indexOfLenCat; 01636 { 01637 const Match &m = matches.first(); 01638 indexOfLenCat.insert(qMakePair(qMakePair(qMakePair(m.len, m.tree.cat), 01639 qMakePair(m.ruleno, m.scope)), 01640 m.nextTokenConstraints), 0); 01641 } 01642 for (int i=1; i<l; ) { 01643 const Match &m = matches.at(i); 01644 QPair<QPair<QPair<int, Cat>, QPair<int, PseudoCatScope> >, 01645 NextTokenConstraints> key(qMakePair(qMakePair(m.len, m.tree.cat), 01646 qMakePair(m.ruleno, m.scope)), 01647 m.nextTokenConstraints); 01648 if (indexOfLenCat.contains(key)) { 01649 matches[indexOfLenCat.value(key)].tree.children.append(m.tree.children); 01650 matches.removeAt(i); 01651 l--; 01652 } else indexOfLenCat.insert(key, i++); 01653 } 01654 } 01655 } 01656 01659 01667 QList<Match> Parser::reduce(CatArg cat, CatArg target, int pos, int len, 01668 const Node &tree, const StackItem &stack, 01669 const PseudoCatScope &scope, int ruleno, 01670 const NextTokenConstraints &nextTokenConstraints, 01671 QSet<Cat> mark) 01672 { 01673 QList<Match> result; 01674 if (cat == target) result.append(Match(len, tree, ruleno, scope, 01675 nextTokenConstraints)); 01676 QList<Match> matches; 01677 { 01678 StackItem type4(stack, target, pos, len); 01679 // for each rule to get towards the target category 01680 foreach (const FullRule &rule, neighborhood(cat, target)) { 01681 QList<Match> currentMatches; 01682 Node node(rule.cat); 01683 node.children.first().setLabel(rule.rule.label()); 01684 int epsilonsSkipped = rule.epsilonsSkipped; 01685 // if we are reducing a pseudo-category, set the correct scope 01686 PseudoCatScope newScope; 01687 CatArg pseudoCat = rule.rule.at(epsilonsSkipped); 01688 if (pseudoCat != cat) { 01689 newScope.pConstraints().insert(pseudoCat, 01690 qMakePair(qMakePair(tree, nextTokenConstraints), len)); 01691 foreach (CatArg cati, pseudoCats.value(pseudoCat).second) 01692 newScope.mcfgConstraints().insert(cati, qMakePair(ruleno, scope)); 01693 } 01694 currentMatches.append(Match(0, node, rule.ruleno, newScope, 01695 nextTokenConstraints)); 01696 /* Find the first token in the tree so we can validate the next token 01697 constraints of the epsilon matches. If we don't have epsilons skipped, 01698 don't bother doing this work. 01699 Also note that tree cannot be an epsilon tree because we never reduce 01700 epsilon. */ 01701 Cat firstToken = Cat(); 01702 if (epsilonsSkipped) 01703 firstToken = findFirstToken(tree); 01704 // match first items to epsilon 01705 for (int i=0; i<epsilonsSkipped; i++) { 01706 int s = currentMatches.size(); 01707 for (int i=0; i<s; ) { 01708 Match &m = currentMatches[i]; 01709 QList<Match> componentMatches = matchToEpsilon(rule.rule.at(i), 01710 m.scope); 01711 /* Here, we need to validate the next token constraints in our epsilon 01712 matches against the first token of the tree we're reducing. */ 01713 /* FIXME: Would it be more efficient to do this in neighborhood? It'd 01714 help prediction in some (rare?) cases, too. But it'd make 01715 neighborhoods dependent on the first token, which hurts 01716 caching badly. */ 01717 { 01718 int cs = componentMatches.size(); 01719 for (int j=0; j<cs; ) { 01720 if (validateNextTokenConstraints(firstToken, 01721 componentMatches.at(j).nextTokenConstraints)) j++; else { 01722 if (j) componentMatches.swap(0, j); 01723 componentMatches.removeFirst(); 01724 cs--; 01725 } 01726 } 01727 } 01728 if (componentMatches.isEmpty()) { 01729 if (i) currentMatches.swap(0, i); 01730 currentMatches.removeFirst(); 01731 s--; 01732 } else { 01733 int cs = componentMatches.size(); 01734 for (int j=1; j<cs; j++) { 01735 const Match &cm = componentMatches.at(j); 01736 Node newTree = m.tree; 01737 newTree.children.first().append(cm.tree); 01738 currentMatches.append(Match(m.len + cm.len, newTree, m.ruleno, 01739 cm.scope, m.nextTokenConstraints)); 01740 } 01741 const Match &cm 01742 = static_cast<const QList<Match> &>(componentMatches).first(); 01743 m.len += cm.len; 01744 m.tree.children.first().append(cm.tree); 01745 m.scope = cm.scope; 01746 i++; 01747 } 01748 } 01749 } 01750 foreach (const Match &m, currentMatches) { 01751 Node newTree = m.tree; 01752 newTree.children.first().append(tree); 01753 // match remaining rule items 01754 matches.append(matchRemaining(rule.rule, 0, pos, epsilonsSkipped+1, 01755 newTree, m.scope, m.ruleno, type4, 01756 m.nextTokenConstraints)); 01757 } 01758 } 01759 } 01760 // unify matches to a shared representation to avoid needless bifurcation 01761 unify(matches); 01762 { 01763 StackItem type2(stack, 0); 01764 foreach (const Match &m, matches) { 01765 Cat newCat = m.tree.cat; 01766 if (m.len) 01767 qFatal("length of a reduction increased in a non-shift codepath"); 01768 // reduction with unchanged length 01769 // avoid infinite loops (if we have epsilon productions or redundant 01770 // X->...->X rules) 01771 mark.insert(cat); 01772 if (mark.contains(newCat)) continue; 01773 // now reduce the rule 01774 result.append(reduce(newCat, target, pos, len, m.tree, type2, m.scope, 01775 m.ruleno, m.nextTokenConstraints, mark)); 01776 } 01777 } 01778 // do another unification pass on the result 01779 unify(result); 01780 return result; 01781 } 01782 01784 01787 QList<Match> Parser::processStackItem(const StackItem &item, 01788 const QList<Match> &matches) 01789 { 01790 switch (item.type()) { 01791 case 0: 01792 qFatal("type 0 items not supported by processStackItem"); 01793 case 1: 01794 { 01795 const StackItemType1 *data 01796 = static_cast<const StackItemType1 *>(item.data()); 01797 const QList<StackItem> &parents = data->parents(); 01798 Cat cat = data->cat(); 01799 Cat effCat = data->effCat(); 01800 PseudoCatScope scope = data->scope(); 01801 01802 QList<Match> processedMatches = matches; 01803 if (effCat != cat) // cat is a pseudo-category 01804 finalizeMatches(processedMatches, cat, scope); 01805 else 01806 copyScope(processedMatches, scope); 01807 01808 if (parents.isEmpty()) // toplevel item 01809 return processedMatches; 01810 else { 01811 QList<Match> result; 01812 foreach (const StackItem &parent, parents) 01813 result.append(processStackItem(parent, processedMatches)); 01814 return result; 01815 } 01816 } 01817 break; 01818 case 2: 01819 { 01820 const StackItemType2 *data 01821 = static_cast<const StackItemType2 *>(item.data()); 01822 const StackItem &parent = data->parent(); 01823 01824 // unify matches, deferring processing if necessary 01825 if (data->derefUsage()) { 01826 collectedMatches[data].append(matches); 01827 return QList<Match>(); 01828 } 01829 QList<Match> unifiedMatches = collectedMatches.value(data); 01830 unifiedMatches.append(matches); 01831 unify(unifiedMatches); 01832 01833 return processStackItem(parent, unifiedMatches); 01834 } 01835 break; 01836 case 3: 01837 { 01838 const StackItemType3 *data 01839 = static_cast<const StackItemType3 *>(item.data()); 01840 const StackItem &parent = data->parent(); 01841 Rule rule = data->rule(); 01842 int len = data->len(); 01843 int curr = data->curr(); 01844 int i = data->i(); 01845 Node tree = data->tree(); 01846 PseudoCatScope scope = data->scope(); 01847 int ruleno = data->ruleno(); 01848 01849 QList<Match> result; 01850 foreach (const Match &m, matches) { 01851 Node newTree = tree; 01852 newTree.children.first().append(m.tree); 01853 result.append(matchRemaining(rule, len+m.len, curr+m.len, i+1, 01854 newTree, m.scope, ruleno, parent, 01855 m.nextTokenConstraints)); 01856 } 01857 01858 return processStackItem(parent, result); 01859 } 01860 break; 01861 case 4: 01862 { 01863 const StackItemType4 *data 01864 = static_cast<const StackItemType4 *>(item.data()); 01865 const StackItem &parent = data->parent(); 01866 Cat target = data->target(); 01867 int pos = data->pos(); 01868 int len = data->len(); 01869 01870 // unify matches, deferring processing if necessary 01871 if (data->derefUsage()) { 01872 collectedMatches[data].append(matches); 01873 return QList<Match>(); 01874 } 01875 QList<Match> unifiedMatches = collectedMatches.value(data); 01876 unifiedMatches.append(matches); 01877 unify(unifiedMatches); 01878 01879 QList<Match> result; 01880 { 01881 StackItem type2(parent, 0); 01882 foreach (const Match &m, unifiedMatches) { 01883 Cat newCat = m.tree.cat; 01884 if (!m.len) 01885 qFatal("reduction with unchanged length was deferred"); 01886 // the length increased, reduce the rule, resetting mark 01887 result.append(reduce(newCat, target, pos+m.len, len+m.len, m.tree, 01888 type2, m.scope, m.ruleno, 01889 m.nextTokenConstraints)); 01890 } 01891 } 01892 // do another unification pass on the result 01893 unify(result); 01894 01895 return processStackItem(parent, result); 01896 } 01897 case 5: 01898 { 01899 const StackItemType5 *data 01900 = static_cast<const StackItemType5 *>(item.data()); 01901 const StackItem &parent = data->parent(); 01902 Cat cat = data->cat(); 01903 PseudoCatScope scope = data->scope(); 01904 01905 QList<Match> result = matches; 01906 finalizeMatches(result, cat, scope); 01907 01908 return processStackItem(parent, result); 01909 } 01910 break; 01911 case 6: 01912 qFatal("type 6 items not supported by processStackItem"); 01913 default: 01914 qFatal("invalid stack item type"); 01915 } 01916 } 01917 01919 01926 QList<Match> Parser::processStack(const StackItem &stack, CatArg token) 01927 { 01928 switch (stack.type()) { 01929 case 0: 01930 { 01931 const StackItemType0 *data 01932 = static_cast<const StackItemType0 *>(stack.data()); 01933 const QList<StackItem> &parents = data->parents(); 01934 Cat cat = data->cat(); 01935 Cat effCat = data->effCat(); 01936 int pos = data->pos(); 01937 PseudoCatScope scope = data->scope(); 01938 QList<Match> matches; 01939 01940 // type 0 item: finish processing of match after a shift 01941 if (isToken(cat)) { 01942 if (token == cat) matches.append(Match(1, inputSource->parseTree(), 0, 01943 scope)); 01944 } else { 01945 // reduce the matched token (and possibly additional tokens, with 01946 // deferred processing) to the target category 01947 StackItem type1(parents, cat, effCat, scope); 01948 // The token carries no next token constraints. 01949 matches = reduce(token, effCat, pos, 1, inputSource->parseTree(), 01950 type1, PseudoCatScope(), 0, NextTokenConstraints()); 01951 01952 if (effCat != cat) // cat is a pseudo-category 01953 finalizeMatches(matches, cat, scope); 01954 else 01955 copyScope(matches, scope); 01956 } 01957 01958 if (parents.isEmpty()) // toplevel item 01959 return matches; 01960 else { 01961 QList<Match> result; 01962 foreach (const StackItem &parent, parents) 01963 result.append(processStackItem(parent, matches)); 01964 return result; 01965 } 01966 } 01967 case 6: 01968 { 01969 const StackItemType6 *data 01970 = static_cast<const StackItemType6 *>(stack.data()); 01971 const StackItem &parent = data->parent(); 01972 QList<Node> leaves = data->leaves(); 01973 int i = data->i(); 01974 Node tree = data->tree(); 01975 PseudoCatScope scope = data->scope(); 01976 NextTokenConstraints nextTokenConstraints 01977 = data->nextTokenConstraints(); 01978 QList<Match> matches; 01979 01980 // type 6 item: finish processing of a P constraint 01981 // match the i-th leaf against the shifted token 01982 if (inputSource->matchParseTree(leaves.at(i++))) { 01983 int l = leaves.size(); 01984 if (i == l) // we're done matching the constraint 01985 matches.append(Match(l, tree, 0, scope, nextTokenConstraints)); 01986 else { // not done yet, create a new type 6 item 01987 StackItem type6(parent, leaves, i, tree, scope, 01988 nextTokenConstraints); 01989 nextStacks.append(type6); 01990 } 01991 } 01992 01993 return processStackItem(parent, matches); 01994 } 01995 default: 01996 qFatal("invalid stack (toplevel item not of type 0 or 6)"); 01997 } 01998 } 01999 02002 void Parser::computeUsageCounts(const StackItem &item) 02003 { 02004 switch (item.type()) { 02005 case 0: 02006 { 02007 const StackItemType0 *data 02008 = static_cast<const StackItemType0 *>(item.data()); 02009 const QList<StackItem> &parents = data->parents(); 02010 02011 foreach (const StackItem &parent, parents) 02012 computeUsageCounts(parent); 02013 } 02014 break; 02015 case 1: 02016 { 02017 const StackItemType1 *data 02018 = static_cast<const StackItemType1 *>(item.data()); 02019 const QList<StackItem> &parents = data->parents(); 02020 02021 foreach (const StackItem &parent, parents) 02022 computeUsageCounts(parent); 02023 } 02024 break; 02025 case 2: 02026 { 02027 const StackItemType2 *data 02028 = static_cast<const StackItemType2 *>(item.data()); 02029 const StackItem &parent = data->parent(); 02030 02031 if (!data->refUsage()) computeUsageCounts(parent); 02032 } 02033 break; 02034 case 3: 02035 { 02036 const StackItemType3 *data 02037 = static_cast<const StackItemType3 *>(item.data()); 02038 const StackItem &parent = data->parent(); 02039 02040 computeUsageCounts(parent); 02041 } 02042 break; 02043 case 4: 02044 { 02045 const StackItemType4 *data 02046 = static_cast<const StackItemType4 *>(item.data()); 02047 const StackItem &parent = data->parent(); 02048 02049 if (!data->refUsage()) computeUsageCounts(parent); 02050 } 02051 break; 02052 case 5: 02053 { 02054 const StackItemType5 *data 02055 = static_cast<const StackItemType5 *>(item.data()); 02056 const StackItem &parent = data->parent(); 02057 02058 computeUsageCounts(parent); 02059 } 02060 break; 02061 case 6: 02062 { 02063 const StackItemType6 *data 02064 = static_cast<const StackItemType6 *>(item.data()); 02065 const StackItem &parent = data->parent(); 02066 02067 computeUsageCounts(parent); 02068 } 02069 break; 02070 default: 02071 qFatal("invalid stack item type"); 02072 } 02073 } 02074 02076 02081 bool Parser::shift(int pos) 02082 { 02083 if (!inputSource->rewindTo(pos, currentLexerState)) { 02084 qWarning("invalid input position"); 02085 return false; 02086 } 02087 Cat token = inputSource->nextToken(); 02088 if (IS_EPSILON(token)) 02089 return false; 02090 02091 // delete all the stacks which don't satisfy the next token constraints 02092 { 02093 int l = nextStacks.size(); 02094 for (int i=0; i<l; ) { 02095 StackItem &stack = nextStacks[i]; 02096 // type 6 items are already filtered for next token constraints 02097 if (stack.type()) i++; else { 02098 const StackItemType0 *data 02099 = static_cast<const StackItemType0 *>(stack.data()); 02100 QList<StackItem> parents = data->parents(); 02101 // no parents = toplevel item, skip processing to avoid killing this 02102 if (parents.empty()) i++; else { 02103 int n = parents.size(); 02104 for (int j=0; j<n; ) { 02105 const StackItem &parent = parents.at(j); 02106 const StackItemType3 *parentData 02107 = static_cast<const StackItemType3 *>(parent.data()); 02108 if (validateNextTokenConstraints(token, 02109 parentData->nextTokenConstraints())) j++; else { 02110 if (j) parents.swap(0, j); 02111 parents.removeFirst(); 02112 n--; 02113 } 02114 } 02115 // no parents left, kill the type 0 item 02116 if (parents.isEmpty()) { 02117 if (i) nextStacks.swap(0, i); 02118 nextStacks.removeFirst(); 02119 l--; 02120 } else { 02121 stack.setParents(parents); 02122 i++; 02123 } 02124 } 02125 } 02126 } 02127 } 02128 02129 // if no more stacks, set errPos and errToken and return false 02130 // (this happens if we had a valid complete parse, but no continuations, and 02131 // now there's extra input) 02132 if (nextStacks.isEmpty()) { 02133 errPos = pos; 02134 errToken = token; 02135 return false; 02136 } 02137 02138 // process stack 02139 currentMatches.clear(); 02140 QList<StackItem> currentStacks = nextStacks; 02141 nextStacks.clear(); 02142 foreach (const StackItem &stack, currentStacks) 02143 computeUsageCounts(stack); 02144 foreach (const StackItem &stack, currentStacks) 02145 currentMatches.append(processStack(stack, token)); 02146 type0Indices.clear(); 02147 collectedMatches.clear(); 02148 02149 // if no more stacks and no current matches, set errPos and errToken, restore 02150 // the previous nextStacks to allow running prediction on them and return 02151 // false 02152 if (nextStacks.isEmpty() && currentMatches.isEmpty()) { 02153 errPos = pos; 02154 errToken = token; 02155 nextStacks = currentStacks; 02156 return false; 02157 } 02158 02159 currentLexerState = inputSource->saveState(); 02160 return true; 02161 } 02162 02164 02239 QList<Match> Parser::parse(int *errorPos, Cat *errorToken, int *incrementalPos, 02240 QList<StackItem> *incrementalStacks, 02241 QList<Match> *incrementalMatches, 02242 LexerState *lexerState) 02243 { 02244 int pos = 0; 02245 02246 if (!incrementalPos || *incrementalPos < 0) // start a new parse 02247 currentMatches = match(startCat, 0, PseudoCatScope(), StackItem(), 02248 NextTokenConstraints()); 02249 else { // continue an incremental parse 02250 pos = *incrementalPos; 02251 if (incrementalMatches) currentMatches = *incrementalMatches; 02252 if (incrementalStacks) nextStacks = *incrementalStacks; 02253 } 02254 02255 if (lexerState) 02256 currentLexerState = *lexerState; 02257 else 02258 currentLexerState.clear(); 02259 02260 // shift tokens while possible 02261 errPos = -1; 02262 while (shift(pos)) pos++; 02263 02264 // error handling 02265 if (errPos >= 0) { // parse error 02266 if (errorPos) *errorPos = errPos; 02267 if (errorToken) *errorToken = errToken; 02268 if (incrementalPos) *incrementalPos = errPos; 02269 if (incrementalStacks) *incrementalStacks = nextStacks; 02270 if (incrementalMatches) incrementalMatches->clear(); 02271 if (lexerState) *lexerState = currentLexerState; 02272 errPos = -1; 02273 errToken = Cat(); 02274 nextStacks.clear(); 02275 currentMatches.clear(); 02276 currentLexerState.clear(); 02277 return QList<Match>(); 02278 } 02279 if (errorPos) *errorPos = -1; 02280 if (errorToken) *errorToken = Cat(); 02281 02282 // filter all the matches which have expect-type next token constraints 02283 { 02284 int s = currentMatches.size(); 02285 for (int i=0; i<s; ) { 02286 Match &m = currentMatches[i]; 02287 if (m.nextTokenConstraints.expect.isEmpty()) i++; else { 02288 if (i) currentMatches.swap(0, i); 02289 currentMatches.removeFirst(); 02290 s--; 02291 } 02292 } 02293 } 02294 02295 // incremental parsing 02296 if (incrementalPos) *incrementalPos = pos; 02297 if (incrementalMatches) *incrementalMatches = currentMatches; 02298 if (incrementalStacks) *incrementalStacks = nextStacks; 02299 if (lexerState) *lexerState = currentLexerState; 02300 nextStacks.clear(); 02301 currentLexerState.clear(); 02302 02303 // accept current matches 02304 QList<Match> result = currentMatches; 02305 currentMatches.clear(); 02306 return result; 02307 } 02308 02311 02317 Predictions Parser::computePredictions(const QList<StackItem> &stacks) const 02318 { 02319 Predictions predict; 02320 foreach (const StackItem &stack, stacks) { 02321 if (stack.type()) { // type 6 item 02322 const StackItemType6 *data 02323 = static_cast<const StackItemType6 *>(stack.data()); 02324 predict.insert(data->leaves().at(data->i()).cat); 02325 } else // type 0 item 02326 predict.insert(static_cast<const StackItemType0 *>(stack.data()) 02327 ->effCat()); 02328 } 02329 return predict; 02330 } 02331 02333 void Parser::expandNonterminalPredictionRecurse(CatArg cat, 02334 QHash<Cat, QSet<Cat> > &result, 02335 QSet<Cat> &mark) const 02336 { 02337 mark.insert(cat); 02338 foreach (const Rule &rule, rules.value(cat)) { 02339 QListIterator<Cat> i(rule); 02340 /* For each nonempty rule (empty rules are not interesting for prediction), 02341 we handle at least the first item in the rule as a prediction: if it's a 02342 token, it is our prediction and the corresponding nonterminal is this 02343 category; if it's a nonterminal, we need to process the prediction 02344 recursively, unless it is already marked, indicating direct or indirect 02345 left recursion (and allowing us to stop) or the repetition of an already 02346 tried possibility (also allowing us to stop, since we do not care about 02347 parse trees here, only the last category). Then, as long as the first i 02348 categories are nullable and the (i+1)th category exists, we repeat the 02349 process for the (i+1)th category. */ 02350 while (i.hasNext()) { 02351 Cat cati = effectiveCat(i.next()); 02352 if (isToken(cati)) { 02353 result[cat].insert(cati); // record nonterminal and token 02354 break; // tokens are not nullable 02355 } 02356 // now cati is a nonterminal 02357 if (!mark.contains(cati)) 02358 expandNonterminalPredictionRecurse(cati, result, mark); 02359 if (!nullable.contains(cati)) break; 02360 } 02361 } 02362 } 02363 02367 02377 QHash<Cat, QSet<Cat> > Parser::expandNonterminalPrediction(CatArg cat) const 02378 { 02379 QHash<Cat, QSet<Cat> > result; 02380 QSet<Cat> mark; 02381 if (isToken(cat)) 02382 qWarning("trying to expand terminal prediction"); 02383 else 02384 expandNonterminalPredictionRecurse(cat, result, mark); 02385 return result; 02386 } 02387 02389 void Parser::expandNonterminalPredictionRecurseC(CatArg cat, 02390 QHash<Cat, QSet<Cat> > &result, 02391 QSet<Cat> mark, int ruleno, 02392 const PseudoCatScope &scope, 02393 const NextTokenConstraints 02394 &nextTokenConstraints) 02395 { 02396 mark.insert(cat); 02397 QList<Rule> ruleList = rules.value(cat); 02398 int firstRule = ruleno >= 0 ? ruleno : 0; 02399 int lastRule = ruleno >= 0 ? ruleno : ruleList.size() - 1; 02400 for (int currRule = firstRule; currRule <= lastRule; currRule++) { 02401 const Rule &rule = ruleList.at(currRule); 02402 QListIterator<Cat> i(rule); 02403 /* For each nonempty rule (empty rules are not interesting for prediction), 02404 we handle at least the first item in the rule as a prediction: if it's a 02405 token, it is our prediction and the corresponding nonterminal is this 02406 category; if it's a nonterminal, we need to process the prediction 02407 recursively, unless it is already marked, indicating direct or indirect 02408 left recursion (and allowing us to stop). Then, as long as the first i 02409 categories are nullable and the (i+1)th category exists, we repeat the 02410 process for the (i+1)th category. 02411 We optimize the common case of no epsilons to skip because we do not have 02412 to consider context-sensitive constraints in that case. */ 02413 Cat cat1 = Cat(); 02414 if (i.hasNext()) { 02415 cat1 = i.next(); 02416 Cat effCat1 = effectiveCat(cat1); 02417 if (isToken(effCat1)) { 02418 if (validateNextTokenConstraints(effCat1, nextTokenConstraints)) 02419 result[cat].insert(effCat1); // record nonterminal and token 02420 continue; // tokens are not nullable 02421 } 02422 // now effCat1 is a nonterminal 02423 /* If we have a P-constraint here, that constraint forces the category to 02424 be epsilon (because it must be identical to something already matched, 02425 which is epsilon). So don't predict inside it. */ 02426 if (!mark.contains(effCat1) 02427 && !scope.hasPConstraint(cat1)) { 02428 int mcfgRuleno = -1; 02429 PseudoCatScope mcfgScope; 02430 if (scope.hasMcfgConstraint(cat1)) { 02431 QPair<int, PseudoCatScope> mcfgConstraint 02432 = scope.mcfgConstraint(cat1); 02433 mcfgRuleno = mcfgConstraint.first; 02434 mcfgScope = mcfgConstraint.second; 02435 } 02436 expandNonterminalPredictionRecurseC(effCat1, result, mark, mcfgRuleno, 02437 mcfgScope, nextTokenConstraints); 02438 } 02439 if (!nullable.contains(effCat1)) continue; 02440 } 02441 { 02442 QList<Match> currentMatches; 02443 // match cat(i-1) to epsilon first 02444 /* FIXME: We discard the parse trees here. Would it be more efficient 02445 to use a variant of matchToEpsilon which doesn't build them? 02446 OTOH, this way, we can reuse the matchToEpsilon cache. */ 02447 const QList<Match> componentMatches = matchToEpsilon(cat1, scope); 02448 if (componentMatches.isEmpty()) 02449 continue; // We didn't match epsilon, so we're done with this rule. 02450 /* If we have one version without context-sensitive constraints, the 02451 others are redundant. */ 02452 bool haveUnconstrained = false; 02453 foreach (const Match &cm, componentMatches) { 02454 if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty() 02455 && cm.nextTokenConstraints.taboo.isEmpty()) { 02456 haveUnconstrained = true; 02457 break; 02458 } 02459 } 02460 if (haveUnconstrained) 02461 currentMatches.append(Match(0, Node(), 0, scope, nextTokenConstraints)); 02462 else { 02463 foreach (const Match &cm, componentMatches) { 02464 currentMatches.append(Match(0, Node(), 0, cm.scope, 02465 nextTokenConstraints)); 02466 if (!cm.nextTokenConstraints.expect.isEmpty()) 02467 currentMatches.last().nextTokenConstraints.expect.append( 02468 cm.nextTokenConstraints.expect); 02469 if (!cm.nextTokenConstraints.taboo.isEmpty()) 02470 currentMatches.last().nextTokenConstraints.taboo.append( 02471 cm.nextTokenConstraints.taboo); 02472 } 02473 } 02474 while (i.hasNext()) { 02475 // now update cati for the new i 02476 Cat cati = i.next(); 02477 Cat effCati = effectiveCat(cati); 02478 if (isToken(effCati)) { 02479 bool nextTokenConstraintsPass = false; 02480 foreach (const Match &m, currentMatches) 02481 if (validateNextTokenConstraints(effCati, m.nextTokenConstraints)) { 02482 nextTokenConstraintsPass = true; 02483 break; 02484 } 02485 if (nextTokenConstraintsPass) 02486 result[cat].insert(effCati); // record nonterminal and token 02487 break; // tokens are not nullable 02488 } 02489 // now effCati is a nonterminal 02490 if (!mark.contains(effCati)) { 02491 foreach (const Match &m, currentMatches) 02492 /* If we have a P-constraint here, that constraint forces the 02493 category to be epsilon (because it must be identical to something 02494 already matched, which is epsilon). So don't predict inside it. 02495 */ 02496 if (!m.scope.hasPConstraint(cati)) { 02497 int mcfgRuleno = -1; 02498 PseudoCatScope mcfgScope; 02499 if (m.scope.hasMcfgConstraint(cati)) { 02500 QPair<int, PseudoCatScope> mcfgConstraint 02501 = m.scope.mcfgConstraint(cati); 02502 mcfgRuleno = mcfgConstraint.first; 02503 mcfgScope = mcfgConstraint.second; 02504 } 02505 expandNonterminalPredictionRecurseC(effCati, result, mark, 02506 mcfgRuleno, mcfgScope, 02507 m.nextTokenConstraints); 02508 } 02509 } 02510 if (!nullable.contains(effCati)) break; 02511 int s = currentMatches.size(); 02512 for (int j=0; j<s; ) { 02513 Match &m = currentMatches[j]; 02514 /* FIXME: We discard the parse trees here. Would it be more efficient 02515 to use a variant of matchToEpsilon which doesn't build them? 02516 OTOH, this way, we can reuse the matchToEpsilon cache. */ 02517 const QList<Match> componentMatches = matchToEpsilon(effCati, 02518 m.scope); 02519 if (componentMatches.isEmpty()) { 02520 if (j) currentMatches.swap(0, j); 02521 currentMatches.removeFirst(); 02522 s--; 02523 } else { 02524 int cs = componentMatches.size(); 02525 /* If we have one version without context-sensitive constraints, the 02526 others are redundant. */ 02527 bool haveUnconstrained = false; 02528 for (int k=0; k<cs; k++) { 02529 const Match &cm = componentMatches.at(k); 02530 if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty() 02531 && cm.nextTokenConstraints.taboo.isEmpty()) { 02532 haveUnconstrained = true; 02533 break; 02534 } 02535 } 02536 if (!haveUnconstrained) { 02537 for (int k=1; k<cs; k++) { 02538 const Match &cm = componentMatches.at(k); 02539 currentMatches.append(Match(0, Node(), 0, cm.scope, 02540 m.nextTokenConstraints)); 02541 if (!cm.nextTokenConstraints.expect.isEmpty()) 02542 currentMatches.last().nextTokenConstraints.expect.append( 02543 cm.nextTokenConstraints.expect); 02544 if (!cm.nextTokenConstraints.taboo.isEmpty()) 02545 currentMatches.last().nextTokenConstraints.taboo.append( 02546 cm.nextTokenConstraints.taboo); 02547 } 02548 const Match &cm = componentMatches.first(); 02549 m.scope = cm.scope; 02550 if (!cm.nextTokenConstraints.expect.isEmpty()) 02551 m.nextTokenConstraints.expect.append( 02552 cm.nextTokenConstraints.expect); 02553 if (!cm.nextTokenConstraints.taboo.isEmpty()) 02554 m.nextTokenConstraints.taboo.append( 02555 cm.nextTokenConstraints.taboo); 02556 } 02557 j++; 02558 } 02559 } 02560 if (currentMatches.isEmpty()) 02561 break; // We didn't match epsilon, so we're done with this rule. 02562 } 02563 } 02564 } 02565 } 02566 02570 02576 QHash<Cat, QSet<Cat> > Parser::expandNonterminalPredictionC(CatArg cat) 02577 { 02578 QHash<Cat, QSet<Cat> > result; 02579 if (isToken(cat)) 02580 qWarning("trying to expand terminal prediction"); 02581 else 02582 expandNonterminalPredictionRecurseC(cat, result, QSet<Cat>(), -1, 02583 PseudoCatScope(), 02584 NextTokenConstraints()); 02585 return result; 02586 } 02587 02590 02608 MultiPredictions Parser::computeMultiPredictions(const QList<StackItem> &stacks) 02609 const 02610 { 02611 MultiPredictions predictMulti; 02612 foreach (const StackItem &stack, stacks) { 02613 if (stack.type()) { // type 6 item 02614 /* We will define the literal as the full string of the P constraint here, 02615 which is very easy to obtain. This might not strictly be a literal in 02616 the sense of the definition, since it could be obtained from a whole 02617 tree of nonterminals, but it definitely always contains the literal (in 02618 the strict sense) containing the predicted category. */ 02619 const StackItemType6 *data 02620 = static_cast<const StackItemType6 *>(stack.data()); 02621 QList<Node> leaves = data->leaves(); 02622 int i = data->i(); 02623 QList<Cat> prediction, literal; 02624 int l = leaves.size(); 02625 for (int j=i; j<l; j++) 02626 prediction.append(leaves.at(j).cat); 02627 literal = prediction; 02628 for (int j=i-1; j>=0; j--) 02629 literal.prepend(leaves.at(j).cat); 02630 MultiPrediction multiPrediction(literal, data->tree().cat); 02631 if (!predictMulti.contains(prediction, multiPrediction)) 02632 predictMulti.insert(prediction, multiPrediction); 02633 } else { // type 0 item 02634 const StackItemType0 *data 02635 = static_cast<const StackItemType0 *>(stack.data()); 02636 Cat cat = data->effCat(); 02637 if (isToken(cat)) { 02638 const QList<StackItem> &parents = data->parents(); 02639 if (parents.isEmpty()) { // huh, start category is a terminal? 02640 QList<Cat> list; 02641 list << cat; 02642 MultiPrediction multiPrediction(list, cat); 02643 if (!predictMulti.contains(list, multiPrediction)) 02644 predictMulti.insert(list, multiPrediction); 02645 } else { 02646 foreach (const StackItem &parent, parents) { 02647 const StackItemType3 *parentData 02648 = static_cast<const StackItemType3 *>(parent.data()); 02649 Rule rule = parentData->rule(); 02650 int i = parentData->i(); 02651 int len = 1; 02652 while (i+len < rule.size() && isToken(rule.at(i+len))) len++; 02653 QList<Cat> literal = rule.mid(i, len); 02654 while (i > 0 && isToken(rule.at(i-1))) i--, len++; 02655 MultiPrediction multiPrediction(rule.mid(i, len), 02656 parentData->tree().cat); 02657 if (!predictMulti.contains(literal, multiPrediction)) 02658 predictMulti.insert(literal, multiPrediction); 02659 } 02660 } 02661 } else { 02662 QList<Cat> list; 02663 list << cat; 02664 MultiPrediction multiPrediction(list, cat); 02665 if (!predictMulti.contains(list, multiPrediction)) 02666 predictMulti.insert(list, multiPrediction); 02667 } 02668 } 02669 } 02670 return predictMulti; 02671 } 02672 02674 void Parser::expandNonterminalPredictionMultiRecurse(CatArg cat, 02675 QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> &mark) const 02676 { 02677 mark.insert(cat); 02678 foreach (const Rule &rule, rules.value(cat)) { 02679 int l = rule.size(); 02680 /* For each nonempty rule (empty rules are not interesting for prediction), 02681 we handle at least the first item in the rule as a prediction: if it's a 02682 token, the literal starting with that token is our prediction and the 02683 corresponding nonterminal is this category; if it's a nonterminal, we 02684 need to process the prediction recursively, unless it is already marked, 02685 indicating direct or indirect left recursion (and allowing us to stop) or 02686 the repetition of an already tried possibility (also allowing us to stop, 02687 since we do not care about parse trees here, only the last category). 02688 Then, as long as the first i categories are nullable and the (i+1)th 02689 category exists, we repeat the process for the (i+1)th category. */ 02690 for (int i = 0; i < l; i++) { 02691 Cat cati = effectiveCat(rule.at(i)); 02692 if (isToken(cati)) { 02693 int len = 1; 02694 while (i+len < l && isToken(rule.at(i+len))) len++; 02695 result[cat].insert(rule.mid(i, len)); // record nonterminal and literal 02696 break; // tokens are not nullable 02697 } 02698 // now cati is a nonterminal 02699 if (!mark.contains(cati)) 02700 expandNonterminalPredictionMultiRecurse(cati, result, mark); 02701 if (!nullable.contains(cati)) break; 02702 } 02703 } 02704 } 02705 02709 02719 QHash<Cat, QSet<QList<Cat> > > 02720 Parser::expandNonterminalPredictionMulti(CatArg cat) const 02721 { 02722 QHash<Cat, QSet<QList<Cat> > > result; 02723 QSet<Cat> mark; 02724 if (isToken(cat)) 02725 qWarning("trying to expand terminal prediction"); 02726 else 02727 expandNonterminalPredictionMultiRecurse(cat, result, mark); 02728 return result; 02729 } 02730 02732 void Parser::expandNonterminalPredictionMultiRecurseC(CatArg cat, 02733 QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> mark, int ruleno, 02734 const PseudoCatScope &scope, const NextTokenConstraints &nextTokenConstraints) 02735 { 02736 mark.insert(cat); 02737 QList<Rule> ruleList = rules.value(cat); 02738 int firstRule = ruleno >= 0 ? ruleno : 0; 02739 int lastRule = ruleno >= 0 ? ruleno : ruleList.size() - 1; 02740 for (int currRule = firstRule; currRule <= lastRule; currRule++) { 02741 const Rule &rule = ruleList.at(currRule); 02742 int l = rule.size(); 02743 /* For each nonempty rule (empty rules are not interesting for prediction), 02744 we handle at least the first item in the rule as a prediction: if it's a 02745 token, the literal starting with that token is our prediction and the 02746 corresponding nonterminal is this category; if it's a nonterminal, we 02747 need to process the prediction recursively, unless it is already marked, 02748 indicating direct or indirect left recursion (and allowing us to stop). 02749 Then, as long as the first i categories are nullable and the (i+1)th 02750 category exists, we repeat the process for the (i+1)th category. 02751 We optimize the common case of no epsilons to skip because we do not have 02752 to consider context-sensitive constraints in that case. */ 02753 Cat cat1 = Cat(); 02754 if (l) { 02755 cat1 = rule.first(); 02756 Cat effCat1 = effectiveCat(cat1); 02757 if (isToken(effCat1)) { 02758 if (validateNextTokenConstraints(effCat1, nextTokenConstraints)) { 02759 int len = 1; 02760 while (len < l && isToken(rule.at(len))) len++; 02761 // record nonterminal and literal 02762 result[cat].insert(rule.mid(0, len)); 02763 } 02764 continue; // tokens are not nullable 02765 } 02766 // now effCat1 is a nonterminal 02767 /* If we have a P-constraint here, that constraint forces the category to 02768 be epsilon (because it must be identical to something already matched, 02769 which is epsilon). So don't predict inside it. */ 02770 if (!mark.contains(effCat1) 02771 && !scope.hasPConstraint(cat1)) { 02772 int mcfgRuleno = -1; 02773 PseudoCatScope mcfgScope; 02774 if (scope.hasMcfgConstraint(cat1)) { 02775 QPair<int, PseudoCatScope> mcfgConstraint 02776 = scope.mcfgConstraint(cat1); 02777 mcfgRuleno = mcfgConstraint.first; 02778 mcfgScope = mcfgConstraint.second; 02779 } 02780 expandNonterminalPredictionMultiRecurseC(effCat1, result, mark, 02781 mcfgRuleno, mcfgScope, 02782 nextTokenConstraints); 02783 } 02784 if (!nullable.contains(effCat1)) continue; 02785 } 02786 { 02787 QList<Match> currentMatches; 02788 // match cat(i-1) to epsilon first 02789 /* FIXME: We discard the parse trees here. Would it be more efficient 02790 to use a variant of matchToEpsilon which doesn't build them? 02791 OTOH, this way, we can reuse the matchToEpsilon cache. */ 02792 const QList<Match> componentMatches = matchToEpsilon(cat1, scope); 02793 if (componentMatches.isEmpty()) 02794 continue; // We didn't match epsilon, so we're done with this rule. 02795 /* If we have one version without context-sensitive constraints, the 02796 others are redundant. */ 02797 bool haveUnconstrained = false; 02798 foreach (const Match &cm, componentMatches) { 02799 if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty() 02800 && cm.nextTokenConstraints.taboo.isEmpty()) { 02801 haveUnconstrained = true; 02802 break; 02803 } 02804 } 02805 if (haveUnconstrained) 02806 currentMatches.append(Match(0, Node(), 0, scope, nextTokenConstraints)); 02807 else { 02808 foreach (const Match &cm, componentMatches) { 02809 currentMatches.append(Match(0, Node(), 0, cm.scope, 02810 nextTokenConstraints)); 02811 if (!cm.nextTokenConstraints.expect.isEmpty()) 02812 currentMatches.last().nextTokenConstraints.expect.append( 02813 cm.nextTokenConstraints.expect); 02814 if (!cm.nextTokenConstraints.taboo.isEmpty()) 02815 currentMatches.last().nextTokenConstraints.taboo.append( 02816 cm.nextTokenConstraints.taboo); 02817 } 02818 } 02819 for (int i = 1; i < l; i++) { 02820 // now update cati for the new i 02821 Cat cati = rule.at(i); 02822 Cat effCati = effectiveCat(cati); 02823 if (isToken(effCati)) { 02824 bool nextTokenConstraintsPass = false; 02825 foreach (const Match &m, currentMatches) 02826 if (validateNextTokenConstraints(effCati, m.nextTokenConstraints)) { 02827 nextTokenConstraintsPass = true; 02828 break; 02829 } 02830 if (nextTokenConstraintsPass) { 02831 int len = 1; 02832 while (i+len < l && isToken(rule.at(i+len))) len++; 02833 // record nonterminal and literal 02834 result[cat].insert(rule.mid(i, len)); 02835 } 02836 break; // tokens are not nullable 02837 } 02838 // now effCati is a nonterminal 02839 if (!mark.contains(effCati)) { 02840 foreach (const Match &m, currentMatches) 02841 /* If we have a P-constraint here, that constraint forces the 02842 category to be epsilon (because it must be identical to something 02843 already matched, which is epsilon). So don't predict inside it. 02844 */ 02845 if (!m.scope.hasPConstraint(cati)) { 02846 int mcfgRuleno = -1; 02847 PseudoCatScope mcfgScope; 02848 if (m.scope.hasMcfgConstraint(cati)) { 02849 QPair<int, PseudoCatScope> mcfgConstraint 02850 = m.scope.mcfgConstraint(cati); 02851 mcfgRuleno = mcfgConstraint.first; 02852 mcfgScope = mcfgConstraint.second; 02853 } 02854 expandNonterminalPredictionMultiRecurseC(effCati, result, mark, 02855 mcfgRuleno, mcfgScope, 02856 m.nextTokenConstraints); 02857 } 02858 } 02859 if (!nullable.contains(effCati)) break; 02860 int s = currentMatches.size(); 02861 for (int j=0; j<s; ) { 02862 Match &m = currentMatches[j]; 02863 /* FIXME: We discard the parse trees here. Would it be more efficient 02864 to use a variant of matchToEpsilon which doesn't build them? 02865 OTOH, this way, we can reuse the matchToEpsilon cache. */ 02866 const QList<Match> componentMatches = matchToEpsilon(effCati, 02867 m.scope); 02868 if (componentMatches.isEmpty()) { 02869 if (j) currentMatches.swap(0, j); 02870 currentMatches.removeFirst(); 02871 s--; 02872 } else { 02873 int cs = componentMatches.size(); 02874 /* If we have one version without context-sensitive constraints, the 02875 others are redundant. */ 02876 bool haveUnconstrained = false; 02877 for (int k=0; k<cs; k++) { 02878 const Match &cm = componentMatches.at(k); 02879 if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty() 02880 && cm.nextTokenConstraints.taboo.isEmpty()) { 02881 haveUnconstrained = true; 02882 break; 02883 } 02884 } 02885 if (!haveUnconstrained) { 02886 for (int k=1; k<cs; k++) { 02887 const Match &cm = componentMatches.at(k); 02888 currentMatches.append(Match(0, Node(), 0, cm.scope, 02889 m.nextTokenConstraints)); 02890 if (!cm.nextTokenConstraints.expect.isEmpty()) 02891 currentMatches.last().nextTokenConstraints.expect.append( 02892 cm.nextTokenConstraints.expect); 02893 if (!cm.nextTokenConstraints.taboo.isEmpty()) 02894 currentMatches.last().nextTokenConstraints.taboo.append( 02895 cm.nextTokenConstraints.taboo); 02896 } 02897 const Match &cm = componentMatches.first(); 02898 m.scope = cm.scope; 02899 if (!cm.nextTokenConstraints.expect.isEmpty()) 02900 m.nextTokenConstraints.expect.append( 02901 cm.nextTokenConstraints.expect); 02902 if (!cm.nextTokenConstraints.taboo.isEmpty()) 02903 m.nextTokenConstraints.taboo.append( 02904 cm.nextTokenConstraints.taboo); 02905 } 02906 j++; 02907 } 02908 } 02909 if (currentMatches.isEmpty()) 02910 break; // We didn't match epsilon, so we're done with this rule. 02911 } 02912 } 02913 } 02914 } 02915 02919 02925 QHash<Cat, QSet<QList<Cat> > > 02926 Parser::expandNonterminalPredictionMultiC(CatArg cat) 02927 { 02928 QHash<Cat, QSet<QList<Cat> > > result; 02929 if (isToken(cat)) 02930 qWarning("trying to expand terminal prediction"); 02931 else 02932 expandNonterminalPredictionMultiRecurseC(cat, result, QSet<Cat>(), -1, 02933 PseudoCatScope(), 02934 NextTokenConstraints()); 02935 return result; 02936 } 02937 02940 02949 ConstrainedPredictions Parser::computeConstrainedPredictions( 02950 const QList<StackItem> &stacks) const 02951 { 02952 ConstrainedPredictions predict; 02953 foreach (const StackItem &stack, stacks) { 02954 if (stack.type()) { // type 6 item 02955 const StackItemType6 *data 02956 = static_cast<const StackItemType6 *>(stack.data()); 02957 CatArg cat = data->leaves().at(data->i()).cat; 02958 NextTokenConstraints nextTokenConstraints; 02959 if (!predict.contains(cat, nextTokenConstraints)) 02960 predict.insert(cat, nextTokenConstraints); 02961 } else { // type 0 item 02962 const StackItemType0 *data 02963 = static_cast<const StackItemType0 *>(stack.data()); 02964 Cat cat = data->effCat(); 02965 const QList<StackItem> &parents = data->parents(); 02966 if (parents.isEmpty()) { // start category, no constraints 02967 NextTokenConstraints nextTokenConstraints; 02968 if (!predict.contains(cat, nextTokenConstraints)) 02969 predict.insert(cat, nextTokenConstraints); 02970 } else { 02971 if (isToken(cat)) { 02972 // validate the next token constraints immediately for tokens 02973 bool nextTokenConstraintsPass = false; 02974 foreach (const StackItem &parent, parents) { 02975 const StackItemType3 *parentData 02976 = static_cast<const StackItemType3 *>(parent.data()); 02977 NextTokenConstraints nextTokenConstraints 02978 = parentData->nextTokenConstraints(); 02979 if (validateNextTokenConstraints(cat, nextTokenConstraints)) { 02980 nextTokenConstraintsPass = true; 02981 break; 02982 } 02983 } 02984 if (nextTokenConstraintsPass) { 02985 NextTokenConstraints nextTokenConstraints; 02986 if (!predict.contains(cat, nextTokenConstraints)) 02987 predict.insert(cat, nextTokenConstraints); 02988 } 02989 } else { 02990 foreach (const StackItem &parent, parents) { 02991 const StackItemType3 *parentData 02992 = static_cast<const StackItemType3 *>(parent.data()); 02993 NextTokenConstraints nextTokenConstraints 02994 = parentData->nextTokenConstraints(); 02995 if (!predict.contains(cat, nextTokenConstraints)) 02996 predict.insert(cat, nextTokenConstraints); 02997 } 02998 } 02999 } 03000 } 03001 } 03002 return predict; 03003 } 03004 03008 03017 QHash<Cat, QSet<Cat> > Parser::expandNonterminalPredictionC(CatArg cat, 03018 const NextTokenConstraints &nextTokenConstraints) 03019 { 03020 QHash<Cat, QSet<Cat> > result; 03021 if (isToken(cat)) 03022 qWarning("trying to expand terminal prediction"); 03023 else 03024 expandNonterminalPredictionRecurseC(cat, result, QSet<Cat>(), -1, 03025 PseudoCatScope(), 03026 nextTokenConstraints); 03027 return result; 03028 } 03029 03033 03044 QHash<Cat, QSet<Cat> > Parser::expandNonterminalPredictionC(CatArg cat, 03045 const QList<NextTokenConstraints> &nextTokenConstraintsList) 03046 { 03047 switch (nextTokenConstraintsList.size()) { 03048 case 0: // An empty disjunctive list is false, probably not what you want! 03049 qWarning("list of next token constraints is empty"); 03050 return QHash<Cat, QSet<Cat> >(); 03051 case 1: // common case, handle the next token constraints directly 03052 return expandNonterminalPredictionC(cat, 03053 nextTokenConstraintsList.first()); 03054 default: // general case, predict first, filter later 03055 { 03056 QHash<Cat, QSet<Cat> > result = expandNonterminalPredictionC(cat); 03057 QHash<Cat, QSet<Cat> > filteredResult; 03058 QHashIterator<Cat, QSet<Cat> > it(result); 03059 while (it.hasNext()) { 03060 it.next(); 03061 foreach (CatArg cati, it.value()) { 03062 bool nextTokenConstraintsPass = false; 03063 foreach (const NextTokenConstraints &nextTokenConstraints, 03064 nextTokenConstraintsList) { 03065 if (validateNextTokenConstraints(cati, nextTokenConstraints)) { 03066 nextTokenConstraintsPass = true; 03067 break; 03068 } 03069 } 03070 if (nextTokenConstraintsPass) 03071 filteredResult[it.key()].insert(cati); 03072 } 03073 } 03074 return filteredResult; 03075 } 03076 } 03077 } 03078 03081 03101 ConstrainedMultiPredictions Parser::computeConstrainedMultiPredictions( 03102 const QList<StackItem> &stacks) const 03103 { 03104 ConstrainedMultiPredictions predictMulti; 03105 foreach (const StackItem &stack, stacks) { 03106 if (stack.type()) { // type 6 item 03107 /* We will define the literal as the full string of the P constraint here, 03108 which is very easy to obtain. This might not strictly be a literal in 03109 the sense of the definition, since it could be obtained from a whole 03110 tree of nonterminals, but it definitely always contains the literal (in 03111 the strict sense) containing the predicted category. */ 03112 const StackItemType6 *data 03113 = static_cast<const StackItemType6 *>(stack.data()); 03114 QList<Node> leaves = data->leaves(); 03115 int i = data->i(); 03116 QList<Cat> prediction, literal; 03117 int l = leaves.size(); 03118 for (int j=i; j<l; j++) 03119 prediction.append(leaves.at(j).cat); 03120 literal = prediction; 03121 for (int j=i-1; j>=0; j--) 03122 literal.prepend(leaves.at(j).cat); 03123 ConstrainedMultiPrediction multiPrediction(literal, data->tree().cat); 03124 if (!predictMulti.contains(prediction, multiPrediction)) 03125 predictMulti.insert(prediction, multiPrediction); 03126 } else { // type 0 item 03127 const StackItemType0 *data 03128 = static_cast<const StackItemType0 *>(stack.data()); 03129 Cat cat = data->effCat(); 03130 const QList<StackItem> &parents = data->parents(); 03131 if (isToken(cat)) { 03132 if (parents.isEmpty()) { // huh, start category is a terminal? 03133 QList<Cat> list; 03134 list << cat; 03135 ConstrainedMultiPrediction multiPrediction(list, cat); 03136 if (!predictMulti.contains(list, multiPrediction)) 03137 predictMulti.insert(list, multiPrediction); 03138 } else { 03139 foreach (const StackItem &parent, parents) { 03140 const StackItemType3 *parentData 03141 = static_cast<const StackItemType3 *>(parent.data()); 03142 // validate the next token constraints immediately for tokens 03143 NextTokenConstraints nextTokenConstraints 03144 = parentData->nextTokenConstraints(); 03145 if (!validateNextTokenConstraints(cat, nextTokenConstraints)) 03146 continue; 03147 Rule rule = parentData->rule(); 03148 int i = parentData->i(); 03149 int len = 1; 03150 while (i+len < rule.size() && isToken(rule.at(i+len))) len++; 03151 QList<Cat> literal = rule.mid(i, len); 03152 while (i > 0 && isToken(rule.at(i-1))) i--, len++; 03153 ConstrainedMultiPrediction multiPrediction(rule.mid(i, len), 03154 parentData->tree().cat); 03155 if (!predictMulti.contains(literal, multiPrediction)) 03156 predictMulti.insert(literal, multiPrediction); 03157 } 03158 } 03159 } else { 03160 if (parents.isEmpty()) { // start category, no constraints 03161 QList<Cat> list; 03162 list << cat; 03163 ConstrainedMultiPrediction multiPrediction(list, cat); 03164 if (!predictMulti.contains(list, multiPrediction)) 03165 predictMulti.insert(list, multiPrediction); 03166 } else { 03167 foreach (const StackItem &parent, parents) { 03168 const StackItemType3 *parentData 03169 = static_cast<const StackItemType3 *>(parent.data()); 03170 NextTokenConstraints nextTokenConstraints 03171 = parentData->nextTokenConstraints(); 03172 QList<Cat> list; 03173 list << cat; 03174 ConstrainedMultiPrediction multiPrediction(list, cat, 03175 nextTokenConstraints); 03176 if (!predictMulti.contains(list, multiPrediction)) 03177 predictMulti.insert(list, multiPrediction); 03178 } 03179 } 03180 } 03181 } 03182 } 03183 return predictMulti; 03184 } 03185 03189 03198 QHash<Cat, QSet<QList<Cat> > > 03199 Parser::expandNonterminalPredictionMultiC(CatArg cat, 03200 const NextTokenConstraints &nextTokenConstraints) 03201 { 03202 QHash<Cat, QSet<QList<Cat> > > result; 03203 if (isToken(cat)) 03204 qWarning("trying to expand terminal prediction"); 03205 else 03206 expandNonterminalPredictionMultiRecurseC(cat, result, QSet<Cat>(), -1, 03207 PseudoCatScope(), 03208 nextTokenConstraints); 03209 return result; 03210 } 03211 03215 03226 QHash<Cat, QSet<QList<Cat> > > 03227 Parser::expandNonterminalPredictionMultiC(CatArg cat, 03228 const QList<NextTokenConstraints> &nextTokenConstraintsList) 03229 { 03230 switch (nextTokenConstraintsList.size()) { 03231 case 0: // An empty disjunctive list is false, probably not what you want! 03232 qWarning("list of next token constraints is empty"); 03233 return QHash<Cat, QSet<QList<Cat> > >(); 03234 case 1: // common case, handle the next token constraints directly 03235 return expandNonterminalPredictionMultiC(cat, 03236 nextTokenConstraintsList.first()); 03237 default: // general case, predict first, filter later 03238 { 03239 QHash<Cat, QSet<QList<Cat> > > result 03240 = expandNonterminalPredictionMultiC(cat); 03241 QHash<Cat, QSet<QList<Cat> > > filteredResult; 03242 QHashIterator<Cat, QSet<QList<Cat> > > it(result); 03243 while (it.hasNext()) { 03244 it.next(); 03245 foreach (const QList<Cat> &literal, it.value()) { 03246 CatArg cati = literal.first(); 03247 bool nextTokenConstraintsPass = false; 03248 foreach (const NextTokenConstraints &nextTokenConstraints, 03249 nextTokenConstraintsList) { 03250 if (validateNextTokenConstraints(cati, nextTokenConstraints)) { 03251 nextTokenConstraintsPass = true; 03252 break; 03253 } 03254 } 03255 if (nextTokenConstraintsPass) 03256 filteredResult[it.key()].insert(literal); 03257 } 03258 } 03259 return filteredResult; 03260 } 03261 } 03262 } 03263 03264 } // end namespace
1.7.4