DynGenPar
Dynamic Generalized Parser
dyngenpar.cpp
Go to the documentation of this file.
1 /* DynGenPar: Dynamic Generalized Parser
2  Copyright (C) 2010-2012 Kevin Kofler <kevin.kofler@chello.at>
3  Copyright (C) 2014-2015 DAGOPT Optimization Technologies GmbH
4  written by Kevin Kofler <kofler@dagopt.com>
5 
6  Support by the Austrian Science Fund FWF under contract numbers
7  P20631 and P23554 is gratefully acknowledged.
8 
9  This program is free software: you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 2 of the License, or
12  (at your option) any later version.
13 
14  This program is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 
434 #include "dyngenpar.h"
435 
437 uint qHash(const QList<DynGenPar::Cat> &list) {
438  uint result = 0, i = 0;
439  foreach (DynGenPar::CatArg cat, list)
440  result += (++i) * qHash(cat);
441  return result;
442 }
443 
444 namespace DynGenPar {
446 bool Rule::serializeLabels = true;
447 
449 bool Rule::serializeActions = true;
450 
452 uint qHash(const NextTokenConstraints &nextTokenConstraints)
453 {
454  return (qHash(nextTokenConstraints.expect) << 4)
455  + qHash(nextTokenConstraints.taboo);
456 }
457 
461 
464 static void parseTreeToPmcfgSyntaxTreeUnify(Node &dest, const Node &src)
465 {
466  if (src.cat != dest.cat || src.data != dest.data)
467  qFatal("invalid parse tree: PMCFG unification failed (mismatched nodes)");
468 
469  // check for metavariables and do the trivial unification
470  if (src.children.isEmpty()) return;
471  if (dest.children.isEmpty()) {
472  dest.children = src.children;
473  return;
474  }
475 
476  int s = dest.children.size();
477  if (src.children.size() != s)
478  qFatal("invalid parse tree: PMCFG unification failed (mismatched "
479  "alternative counts)");
480  for (int i=0; i<s; i++) {
481  const Alternative &srcAlternative = src.children.at(i);
482  Alternative &destAlternative = dest.children[i];
483  int l = destAlternative.size();
484  if (srcAlternative.size() != l
485  || srcAlternative.label() != destAlternative.label())
486  qFatal("invalid parse tree: PMCFG unification failed (mismatched "
487  "alternatives)");
488  for (int j=0; j<l; j++)
489  parseTreeToPmcfgSyntaxTreeUnify(destAlternative[j], srcAlternative.at(j));
490  }
491 }
492 
494 static void parseTreeToPmcfgSyntaxTreeRecurse(const Node &parseTree,
495  Node &syntaxTree)
496 {
497  /* first copy the data (most useful if a token was passed as an argument to a
498  PMCFG function, an extension to standard PMCFGs allowed by this
499  implementation) */
500  syntaxTree.data = parseTree.data;
501  bool isFirst = true;
502  foreach(const Alternative &alternative, parseTree.children) {
503  // create a new alternative if this is not the first one
504  if (isFirst)
505  isFirst = false;
506  else
507  syntaxTree.children.append(Alternative());
508  Alternative &newAlternative = syntaxTree.children.last();
509 
510  QVariant label = alternative.label();
511  if (label.canConvert<PmcfgComponentInfo>()) {
512  PmcfgComponentInfo componentInfo = label.value<PmcfgComponentInfo>();
513  newAlternative.setLabel(componentInfo.pmcfgRule.label());
514  int numArgs = componentInfo.pmcfgRule.size();
515  for (int i=0; i<numArgs; i++) {
516  CatArg arg = componentInfo.pmcfgRule.at(i);
517  Node newChild(arg);
518  const QVector<int> &argPositions = componentInfo.argPositions.at(i);
519  if (argPositions.isEmpty())
520  newChild.children.clear(); // no alternative == metavariable
521  else {
522  parseTreeToPmcfgSyntaxTreeRecurse(
523  alternative.at(argPositions.first()), newChild);
524  int s = argPositions.size();
525  for (int i=1; i<s; i++) {
526  Node tempChild(arg);
527  parseTreeToPmcfgSyntaxTreeRecurse(alternative.at(
528  argPositions.at(i)), tempChild);
529  parseTreeToPmcfgSyntaxTreeUnify(newChild, tempChild);
530  }
531  }
532  newAlternative.append(newChild);
533  }
534  } else newAlternative = alternative; // non-PMCFG category, copy parse tree
535  }
536 }
537 
540 {
541  Node syntaxTree(parseTree.cat);
542  parseTreeToPmcfgSyntaxTreeRecurse(parseTree, syntaxTree);
543  return syntaxTree;
544 }
545 
547 QHash<QString, ActionDeserializer *> Action::deserializers;
548 
550 
553 bool Parser::isLiteral(const QList<Cat> &list) const
554 {
555  foreach (CatArg cat, list)
556  if (!isToken(cat)) return false;
557  return true;
558 }
559 
562 
566 Cat Parser::effectiveCat(CatArg cat) const
567 {
568  if (pseudoCats.contains(cat)) // convert pseudo-category
569  return pseudoCats.value(cat).first;
570  return cat; // not a pseudo-category, just return the original category
571 }
572 
575 
579 {
580  // clear caches
581  initialGraph.clear();
582  neighborhoods.clear();
583  nullable.clear();
584  epsilonMatches.clear();
585 
586  // search for nullable categories
587  /* First consider only rules which directly expand to epsilon, i.e. with empty
588  right hand side. */
589  QHashIterator<Cat, QList<Rule> > it(rules);
590  while (it.hasNext()) {
591  it.next();
592  Cat cat = it.key();
593  foreach (const Rule &rule, it.value()) {
594  if (rule.isEmpty()) {
595  nullable.insert(cat);
596  break;
597  }
598  }
599  }
600  /* Now consider rules which expand to nullable categories, inductively until
601  no new nullables are added. This works because we do not have to consider
602  directly or indirectly recursive rules, because those can only expand to
603  epsilon if the category can already expand to epsilon without that rule. */
604  unsigned newNullables;
605  do {
606  newNullables = 0u;
607  it = rules;
608  while (it.hasNext()) {
609  it.next();
610  Cat cat = it.key();
611  if (nullable.contains(cat)) continue; // already nullable
612  foreach (const Rule &rule, it.value()) {
613  foreach (CatArg cati, rule) {
614  if (!nullable.contains(effectiveCat(cati))) goto next_rule;
615  }
616  nullable.insert(cat);
617  newNullables++;
618  break;
619 next_rule:
620  ;
621  }
622  }
623  } while (newNullables);
624 
625  // now compute the initial graph
626  it = rules;
627  while (it.hasNext()) {
628  it.next();
629  Cat cat = it.key();
630  // only keep the rule number if we need it, in order to allow unification
631  bool keepRuleNumbers = componentCats.contains(cat);
632  QList<Rule> ruleList = it.value();
633  int n = ruleList.size();
634  for (int i=0; i<n; i++) {
635  /* For each nonempty rule (empty rules do not show up in the initial
636  graph), we insert at least one edge from the first item in the rule
637  to the left hand side, with 0 epsilons skipped. Then, as long as the
638  first i categories are nullable and the (i+1)th category exists, we
639  insert an edge with i epsilons skipped from the (i+1)th category to the
640  left hand side. */
641  const Rule &rule = ruleList.at(i);
642  QListIterator<Cat> it(rule);
643  int epsilonsSkipped=0;
644  while (it.hasNext()) {
645  Cat cati = effectiveCat(it.next());
646  initialGraph.insert(cati, FullRule(cat, rule, epsilonsSkipped,
647  keepRuleNumbers ? i : 0));
648  if (!nullable.contains(cati)) break;
649  epsilonsSkipped++;
650  }
651  }
652  }
653 }
654 
657 
662 void Parser::processRule(CatArg cat, const Rule &rule, int skip, int ruleno,
663  QQueue<Cat> &nullableQueue, bool &clearEpsilonMatches)
664 {
665  /* Continue processing the rule where we stopped when building the original
666  initial graph. (If the rule is new, start at position 0.) As long as the
667  first i categories are nullable and the (i+1)th category exists, we insert
668  an edge with i epsilons skipped from the (i+1)th category to the left hand
669  side.*/
670  QListIterator<Cat> i(rule);
671  int epsilonsSkipped=skip;
672  while (i.hasNext()) {
673  Cat cati = effectiveCat(i.next());
674  initialGraph.insert(cati, FullRule(cat, rule, epsilonsSkipped, ruleno));
675  if (!nullable.contains(cati)) break;
676  epsilonsSkipped++;
677  }
678  if (epsilonsSkipped == rule.size()) { // !i.hasNext() is not sufficient!
679  /* We reached the end of the rule, so the left hand side is now nullable
680  and we have a new way to get epsilon matches. */
681  clearEpsilonMatches = true;
682  if (!nullable.contains(cat) && !nullableQueue.contains(cat))
683  nullableQueue.enqueue(cat);
684  }
685 }
686 
689 void Parser::addRule(CatArg cat, const Rule &rule)
690 {
691  // add the rule to the grammar
692  QList<Rule> &ruleList = rules[cat];
693  // only keep the rule number if we need it, in order to allow unification
694  int ruleno = componentCats.contains(cat) ? ruleList.size() : 0;
695  ruleList << rule;
696 
697  // update initialGraph and nullable
698  QQueue<Cat> nullableQueue; // new nullable categories
699  bool clearEpsilonMatches = false;
700  /* first process the rule we just added:
701  Add the new edges, if any, to the initial graph, and mark the category as
702  nullable if the rule can match the empty string. */
703  processRule(cat, rule, 0, ruleno, nullableQueue, clearEpsilonMatches);
704  // now handle categories which have become nullable
705  while (!nullableQueue.isEmpty()) {
706  Cat newNullable = nullableQueue.dequeue();
707  // mark the category as nullable
708  nullable.insert(newNullable);
709  /* now update the initial graph:
710  We need to go through all the edges going out of this category in the
711  initial graph and process the rule starting from where we stopped when
712  building the initial graph (due to the non-nullable category). If we
713  reach the end of the rule, the left hand side has become nullable as
714  well, and is added to the queue if it wasn't already nullable. */
715  foreach (const FullRule &rule, initialGraph.values(newNullable))
716  processRule(rule.cat, rule.rule, rule.epsilonsSkipped+1, rule.ruleno,
717  nullableQueue, clearEpsilonMatches);
718  }
719  if (clearEpsilonMatches) {
720  epsilonMatches.clear();
721  neighborhoods.clear();
722  } else if (!rule.isEmpty()) {
723  // remove only those neighborhoods that are no longer valid
724  typedef QPair<Cat, Cat> CatPair;
725  QHashIterator<CatPair, QList<FullRule> > it(neighborhoods);
726  QList<CatPair> invalidate;
727  CatArg ruleFirst = rule.first();
728  bool firstIsNullable = nullable.contains(ruleFirst);
729  while (it.hasNext()) {
730  it.next();
731  const CatPair &key = it.key();
732  if (reachable(cat, key.second, QSet<Cat>())
733  && (firstIsNullable || reachable(key.first, ruleFirst, QSet<Cat>()))) {
734  invalidate << key;
735  }
736  }
737  foreach (const CatPair &key, invalidate) neighborhoods.remove(key);
738  }
739 }
740 
742 
743 bool Parser::computePmcfgDimension(CatArg cat, const Rule &rule,
744  const Pmcfg &pmcfg)
745 {
746  // look the function up
747  Function function = pmcfg.lookupFunction(rule.label());
748  /* check if the dimension is consistent with existing rules for the category,
749  if any; generate a list of components if we don't have any yet */
750  int dim = function.size();
751  if (dim == 1) {
752  if (catComponents.contains(cat)) {
753  qWarning("dimension mismatch: 1D function for multidimensional category");
754  return false; // dimension mismatch
755  }
756  } else {
757  if (rules.contains(cat)) {
758  qWarning("dimension mismatch: multidimensional function for 1D category");
759  return false; // dimension mismatch
760  }
761  if (catComponents.contains(cat)) {
762  if (catComponents.value(cat).size() != dim) {
763  qWarning("dimension mismatch: %dD function for %dD category", dim,
764  catComponents.value(cat).size());
765  return false; // dimension mismatch
766  }
767  } else { // not known yet, we need to create a mapping
768  QList<Cat> components;
769  for (int i=0; i<dim; i++) {
770  // generate an internal category for the component
771 #ifdef DYNGENPAR_INTEGER_CATEGORIES
772  Cat component = generateCat();
773 #else
774  Cat component = QString("%1[%2]").arg(cat).arg(i);
775 #endif
776  components.append(component);
777  componentCats.insert(component, qMakePair(cat, i));
778  }
779  catComponents.insert(cat, components);
780  }
781  }
782  return true;
783 }
784 
786 
787 bool Parser::convertPmcfgRule(CatArg cat, const Rule &rule, const Pmcfg &pmcfg,
788  bool updateCaches)
789 {
790  // look the function up
791  Function function = pmcfg.lookupFunction(rule.label());
792  /* get the list of components to use (assume that the dimensions have been
793  already validated by computePmcfgDimension) */
794  int dim = function.size();
795  QList<Cat> components;
796  if (dim == 1)
797  components.append(cat);
798  else
799  components = catComponents.value(cat);
800 
801  // determine the number of used arguments of the function
802  int numArgs = 0;
803  foreach(const Sequence &sequence, function) {
804  foreach(const Term &term, sequence) {
805  if (term.isComponent() && term.arg >= numArgs)
806  numArgs = term.arg + 1;
807  }
808  }
809 
810  // check that enough arguments were passed to the function
811  if (rule.size() < numArgs) {
812  qWarning("not enough arguments for PMCFG function");
813  return false;
814  }
815 
816  // find the components of the arguments passed
817  QVector<QList<Cat> > argComponents(numArgs);
818  for (int i=0; i<numArgs; i++) {
819  CatArg arg = rule.at(i);
820  if (catComponents.contains(arg))
821  argComponents[i] = catComponents.value(arg);
822  else if (pmcfg.rules.contains(arg) || pmcfg.cfRules.contains(arg)
823  || pmcfg.tokens.contains(arg))
824  argComponents[i].append(arg);
825  else
826  /* unused category, ignore the unreachable rule
827  We do not know the correct dimension for the unused category. */
828  return true;
829  }
830 
831  // verify the dimensions of the arguments passed
832  foreach(const Sequence &sequence, function) {
833  foreach(const Term &term, sequence) {
834  if (term.isComponent()
835  && term.component >= argComponents.at(term.arg).size()) {
836  qWarning("dimension mismatch: attempt to use component %d of a %dD "
837  "category", term.component, argComponents.at(term.arg).size());
838  return false; // dimension mismatch
839  }
840  }
841  }
842 
843  // determine the number of times each argument is used
844  QVector<int> usageCounts(numArgs);
845  foreach(const Sequence &sequence, function) {
846  foreach(const Term &term, sequence) {
847  if (term.isComponent())
848  usageCounts[term.arg]++;
849  }
850  }
851 
852  // generate pseudo-categories
853  QVector<QHash<int, Cat> > newPseudoCats(numArgs);
854  foreach(const Sequence &sequence, function) {
855  foreach(const Term &term, sequence) {
856  if (term.isComponent() && usageCounts.at(term.arg) > 1) {
857  if (!newPseudoCats[term.arg].contains(term.component)) {
858 #ifdef DYNGENPAR_INTEGER_CATEGORIES
859  Cat pseudoCat = generateCat();
860 #else
861  Cat pseudoCat = QString("Pseudo%1").arg(generateCat());
862 #endif
863  newPseudoCats[term.arg].insert(term.component, pseudoCat);
864  }
865  }
866  }
867  }
868 
869  // record pseudo-categories
870  for (int i=0; i<numArgs; i++) {
871  const QHash<int, Cat> &pseudoCatHash = newPseudoCats.at(i);
872  if (!pseudoCatHash.isEmpty()) {
873  QList<Cat> pseudoCatList = pseudoCatHash.values();
874  QHashIterator<int, Cat> it(pseudoCatHash);
875  while (it.hasNext()) {
876  it.next();
877  Cat pseudoCat = it.value();
878  pseudoCats.insert(pseudoCat, qMakePair(argComponents.at(i).at(it.key()),
879  pseudoCatList));
880  }
881  }
882  }
883 
884  // now finish evaluating the function and record the converted rule
885  for (int i=0; i<dim; i++) {
886  const Sequence &sequence = function.at(i);
887  PmcfgComponentInfo componentInfo(rule);
888  int s = sequence.size();
889  for (int j=0; j<s; j++) {
890  const Term &term = sequence.at(j);
891  if (term.isComponent())
892  componentInfo.argPositions[term.arg].append(j);
893  }
894  Rule internalRule(QVariant::fromValue(componentInfo));
895  // copy next token constraints from the PMCFG sequence
896  internalRule.nextTokenConstraints = sequence.nextTokenConstraints;
897  /* Also try to use next token constraints from the PMCFG rule. That's not a
898  good place to write them because the constraints will affect all
899  components of a multi-dimensional rule, which is probably not what you
900  want. But at least for 1D rules, it makes sense, and it costs us almost
901  nothing to support this. (Appending an empty list is trivial.) */
902  internalRule.nextTokenConstraints.expect.append(
904  internalRule.nextTokenConstraints.taboo.append(
906  foreach (const Term &term, sequence)
907  internalRule.append(term.isToken() ? term.token
908  : newPseudoCats.at(term.arg).isEmpty()
909  ? argComponents.at(term.arg).at(term.component)
910  : newPseudoCats.at(term.arg).value(term.component));
911 
912  CatArg catComponent = components.at(i);
913  if (updateCaches)
914  addRule(catComponent, internalRule);
915  else
916  rules[catComponent].append(internalRule);
917  }
918 
919  return true;
920 }
921 
923 
932 bool Parser::loadPmcfg(const Pmcfg &pmcfg)
933 {
934  // copy tokens, startCat and cfRules
935  tokens = pmcfg.tokens;
936  startCat = pmcfg.startCat;
937  rules = pmcfg.cfRules;
938 
939  // convert functions and PMCFG rules to rules and pseudo-categories
940  pseudoCats.clear();
941  componentCats.clear();
942  catComponents.clear();
943  {
944  QHashIterator<Cat, QList<Rule> > it(pmcfg.rules);
945  while (it.hasNext()) {
946  Cat cat = it.next().key();
947  foreach (const Rule &rule, it.value())
948  if (!computePmcfgDimension(cat, rule, pmcfg)) return false;
949  }
950  }
951  {
952  QHashIterator<Cat, QList<Rule> > it(pmcfg.rules);
953  while (it.hasNext()) {
954  Cat cat = it.next().key();
955  foreach (const Rule &rule, it.value())
956  if (!convertPmcfgRule(cat, rule, pmcfg, false)) return false;
957  }
958  }
959 
960  // initialize the initial graph and the caches
961  initCaches();
962  return true;
963 }
964 
968 
974 bool Parser::addPmcfgRule(Pmcfg &pmcfg, CatArg cat, const Rule &rule)
975 {
976  if (!computePmcfgDimension(cat, rule, pmcfg)
977  || !convertPmcfgRule(cat, rule, pmcfg, true)) return false;
978  pmcfg.rules[cat].append(rule);
979  return true;
980 }
981 
983 
988 bool Parser::reachable(CatArg cat, CatArg target, QSet<Cat> mark)
989 {
990  if (cat == target) return true;
991  mark.insert(cat);
992  foreach (const FullRule &rule, initialGraph.values(cat)) {
993  if (mark.contains(rule.cat)) continue;
994  if (reachable(rule.cat, target, mark)) return true;
995  }
996  return false;
997 }
998 
1000 
1004 QList<FullRule> Parser::neighborhood(CatArg cat, CatArg target)
1005 {
1006  // use cached value if we have it
1007  QPair<Cat, Cat> key(cat, target);
1008  if (neighborhoods.contains(key)) return neighborhoods.value(key);
1009 
1010  QList<FullRule> result;
1011  foreach (const FullRule &rule, initialGraph.values(cat)) {
1012  if (reachable(rule.cat, target, QSet<Cat>())) result.append(rule);
1013  }
1014  // cache the result for future reuse
1015  neighborhoods.insert(key, result);
1016  return result;
1017 }
1018 
1020 void Parser::finalizeMatches(QList<Match> &matches, CatArg cat,
1021  const PseudoCatScope &scope)
1022 {
1023  int s = matches.size();
1024  for (int i=0; i<s; i++) {
1025  Match &m = matches[i];
1026  PseudoCatScope childScope = m.scope;
1027  m.scope = scope;
1028  m.scope.pConstraints().insert(cat,
1029  qMakePair(qMakePair(m.tree, m.nextTokenConstraints), 0));
1030  foreach (CatArg cati, pseudoCats.value(cat).second)
1031  m.scope.mcfgConstraints().insert(cati, qMakePair(m.ruleno, childScope));
1032  m.ruleno = 0; // We don't need ruleno anymore.
1033  }
1034 }
1035 
1037 void Parser::copyScope(QList<Match> &matches, const PseudoCatScope &scope)
1038 {
1039  if (scope.isNull()) return; // don't bother copying a null scope
1040  int s = matches.size();
1041  for (int i=0; i<s; i++)
1042  matches[i].scope = scope;
1043 }
1044 
1046 QList<Match> Parser::matchCFToEpsilon(CatArg cat, QSet<Cat> mark)
1047 {
1048  QList<Match> result;
1049  bool haveNextTokenConstraints = false;
1050 
1051  mark.insert(cat);
1052  bool isFirst = true;
1053  foreach (const Rule &rule, rules.value(cat)) {
1054  // only consider nullable rules with only unmarked categories
1055  foreach (CatArg cati, rule) {
1056  if (!nullable.contains(cati) || mark.contains(cati)) goto next_rule;
1057  }
1058  {
1059  QList<Match> currentMatches;
1060  Node node(cat);
1061  node.children.first().setLabel(rule.label());
1062  currentMatches.append(Match(0, node, 0, PseudoCatScope()));
1063  if (!rule.nextTokenConstraints.expect.isEmpty()) {
1064  haveNextTokenConstraints = true;
1065  currentMatches.first().nextTokenConstraints.expect =
1067  }
1068  if (!rule.nextTokenConstraints.taboo.isEmpty()) {
1069  haveNextTokenConstraints = true;
1070  currentMatches.first().nextTokenConstraints.taboo =
1072  }
1073  foreach (CatArg cati, rule) {
1074  int s = currentMatches.size();
1075  for (int i=0; i<s; ) {
1076  Match &m = currentMatches[i];
1077  const QList<Match> componentMatches
1078  = matchToEpsilonRecurse(cati, mark, m.scope);
1079  if (componentMatches.isEmpty()) {
1080  if (i) currentMatches.swap(0, i);
1081  currentMatches.removeFirst();
1082  s--;
1083  } else {
1084  int cs = componentMatches.size();
1085  for (int j=1; j<cs; j++) {
1086  const Match &cm = componentMatches.at(j);
1087  Node newTree = m.tree;
1088  newTree.children.first().append(cm.tree);
1089  currentMatches.append(Match(m.len + cm.len, newTree, 0, cm.scope,
1091  if (!cm.nextTokenConstraints.expect.isEmpty()) {
1092  haveNextTokenConstraints = true;
1093  currentMatches.last().nextTokenConstraints.expect.append(
1095  }
1096  if (!cm.nextTokenConstraints.taboo.isEmpty()) {
1097  haveNextTokenConstraints = true;
1098  currentMatches.last().nextTokenConstraints.taboo.append(
1100  }
1101  }
1102  const Match &cm = componentMatches.first();
1103  m.len += cm.len;
1104  m.tree.children.first().append(cm.tree);
1105  m.scope = cm.scope;
1106  if (!cm.nextTokenConstraints.expect.isEmpty()) {
1107  haveNextTokenConstraints = true;
1108  m.nextTokenConstraints.expect.append(
1110  }
1111  if (!cm.nextTokenConstraints.taboo.isEmpty()) {
1112  haveNextTokenConstraints = true;
1113  m.nextTokenConstraints.taboo.append(
1115  }
1116  i++;
1117  }
1118  }
1119  }
1120  if (currentMatches.isEmpty()) goto next_rule;
1121  if (haveNextTokenConstraints) {
1122  /* We cannot just unify all the matches to one in this case. We will run
1123  the full unification process at the end. But clear the scopes. */
1124  int s = currentMatches.size();
1125  for (int i=0; i<s; i++) currentMatches[i].scope = PseudoCatScope();
1126  result.append(currentMatches);
1127  goto next_rule;
1128  }
1129  /* unify the matches:
1130  * if this is the first matching rule, make the first match the current
1131  result
1132  * otherwise, append the new alternative(s) from the first match to the
1133  current result
1134  * if there is more than one match, also append the alternative(s) from
1135  the additional matches */
1136  int i=0, s=currentMatches.size();
1137  if (isFirst) {
1138  isFirst = false;
1139  result.append(currentMatches.first());
1140  result.first().scope = PseudoCatScope(); // clear scope
1141  i++;
1142  }
1143  Match &firstResult = result.first();
1144  for (; i<s; i++)
1145  firstResult.tree.children.append(currentMatches.at(i).tree.children);
1146  }
1147 next_rule:
1148  ;
1149  }
1150  /* unify the result now (as much as possible) if we have next token
1151  constraints, otherwise it is already unified to a single match */
1152  if (haveNextTokenConstraints) unify(result);
1153  return result;
1154 }
1155 
1158 QList<Match> Parser::matchEffectiveCatToEpsilon(CatArg cat, QSet<Cat> mark)
1159 {
1160  QList<Match> result;
1161 
1162  /* if the effective category is 1-dimensional, don't bother about rule numbers
1163  and child scopes - they are not needed for P constraints (the only ones we
1164  can have on pseudo-categories for 1-dimensional effective categories) */
1165  if (!componentCats.contains(cat))
1166  return matchCFToEpsilon(cat, mark);
1167 
1168  mark.insert(cat);
1169  QList<Rule> ruleList = rules.value(cat);
1170  int s = ruleList.size();
1171  for (int i=0; i<s; i++) {
1172  const Rule &rule = ruleList.at(i);
1173  // only consider nullable rules with only unmarked categories
1174  foreach (CatArg cati, rule) {
1175  if (!nullable.contains(cati) || mark.contains(cati)) goto next_rule;
1176  }
1177  {
1178  QList<Match> currentMatches;
1179  Node node(cat);
1180  node.children.first().setLabel(rule.label());
1181  currentMatches.append(Match(0, node, i, PseudoCatScope(),
1182  rule.nextTokenConstraints));
1183  foreach (CatArg cati, rule) {
1184  int s = currentMatches.size();
1185  for (int i=0; i<s; ) {
1186  Match &m = currentMatches[i];
1187  const QList<Match> componentMatches
1188  = matchToEpsilonRecurse(cati, mark, m.scope);
1189  if (componentMatches.isEmpty()) {
1190  if (i) currentMatches.swap(0, i);
1191  currentMatches.removeFirst();
1192  s--;
1193  } else {
1194  int cs = componentMatches.size();
1195  for (int j=1; j<cs; j++) {
1196  const Match &cm = componentMatches.at(j);
1197  Node newTree = m.tree;
1198  newTree.children.first().append(cm.tree);
1199  currentMatches.append(Match(m.len + cm.len, newTree, m.ruleno,
1200  cm.scope, m.nextTokenConstraints));
1201  currentMatches.last().nextTokenConstraints.expect.append(
1203  currentMatches.last().nextTokenConstraints.taboo.append(
1205  }
1206  const Match &cm = componentMatches.first();
1207  m.len += cm.len;
1208  m.tree.children.first().append(cm.tree);
1209  m.scope = cm.scope;
1210  m.nextTokenConstraints.expect.append(
1213  i++;
1214  }
1215  }
1216  }
1217  // in this case, we cannot unify, so just append the matches to the result
1218  result.append(currentMatches);
1219  }
1220 next_rule:
1221  ;
1222  }
1223  return result;
1224 }
1225 
1227 QList<Match> Parser::matchToEpsilonRecurse(CatArg cat, QSet<Cat> mark,
1228  const PseudoCatScope &scope)
1229 {
1230  QList<Match> result;
1231 
1232  if (scope.hasPConstraint(cat)) {
1233  // handle P constraint
1234  QPair<QPair<Node, NextTokenConstraints>, int> pConstraint
1235  = scope.pConstraint(cat);
1236  if (!pConstraint.second) // only matches if length == 0
1237  result.append(Match(0, pConstraint.first.first, 0, scope,
1238  pConstraint.first.second));
1239  return result;
1240  }
1241 
1242  // if this is a pseudo-category, it is not known yet, so get the effective
1243  // category (if this is not a pseudo-category, effCat is cat itself)
1244  Cat effCat = effectiveCat(cat);
1245 
1246  if (effCat != cat) { // cat is a pseudo-category
1247  if (mark.contains(effCat)) return result; // prevent infinite recursion
1248  mark.insert(cat);
1249 
1250  if (scope.hasMcfgConstraint(cat)) {
1251  // handle MCFG constraint
1252  QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat);
1253  const Rule &rule = rules.value(effCat).at(mcfgConstraint.first);
1254  /* check if we can (possibly - there could be other PMCFG constraints
1255  preventing it) produce epsilon with this rule, and make sure the
1256  categories in the rule are not marked to prevent infinite recursion */
1257  foreach (CatArg cati, rule) {
1258  if (!nullable.contains(cati) || mark.contains(cati)) return result;
1259  }
1260  Node node(effCat);
1261  node.children.first().setLabel(rule.label());
1262  result.append(Match(0, node, mcfgConstraint.first, scope,
1263  rule.nextTokenConstraints));
1264  foreach (CatArg cati, rule) {
1265  int s = result.size();
1266  for (int i=0; i<s; ) {
1267  Match &m = result[i];
1268  const QList<Match> componentMatches
1269  = matchToEpsilonRecurse(cati, mark, m.scope);
1270  if (componentMatches.isEmpty()) {
1271  if (i) result.swap(0, i);
1272  result.removeFirst();
1273  s--;
1274  } else {
1275  int cs = componentMatches.size();
1276  for (int j=1; j<cs; j++) {
1277  const Match &cm = componentMatches.at(j);
1278  Node newTree = m.tree;
1279  newTree.children.first().append(cm.tree);
1280  result.append(Match(m.len + cm.len, newTree, m.ruleno,
1281  cm.scope, m.nextTokenConstraints));
1282  result.last().nextTokenConstraints.expect.append(
1284  result.last().nextTokenConstraints.taboo.append(
1286  }
1287  const Match &cm = componentMatches.first();
1288  m.len += cm.len;
1289  m.tree.children.first().append(cm.tree);
1290  m.scope = cm.scope;
1291  m.nextTokenConstraints.expect.append(
1294  i++;
1295  }
1296  }
1297  }
1298  } else // no PMCFG constraints on this pseudo-category
1299  result = matchEffectiveCatToEpsilon(effCat, mark);
1300 
1301  finalizeMatches(result, cat, scope);
1302  } else { // cat is a true category
1303  result = matchCFToEpsilon(cat, mark);
1304  copyScope(result, scope);
1305  }
1306 
1307  return result;
1308 }
1309 
1311 
1316 QList<Match> Parser::matchToEpsilon(CatArg cat, const PseudoCatScope &scope)
1317 {
1318  QList<Match> result;
1319 
1320  if (scope.hasPConstraint(cat)) {
1321  // handle P constraint
1322  QPair<QPair<Node, NextTokenConstraints>, int> pConstraint
1323  = scope.pConstraint(cat);
1324  if (!pConstraint.second) // only matches if length == 0
1325  result.append(Match(0, pConstraint.first.first, 0, scope,
1326  pConstraint.first.second));
1327  return result;
1328  }
1329 
1330  // if this is a pseudo-category, it is not known yet, so get the effective
1331  // category (if this is not a pseudo-category, effCat is cat itself)
1332  Cat effCat = effectiveCat(cat);
1333 
1334  if (effCat != cat) { // cat is a pseudo-category
1335  if (scope.hasMcfgConstraint(cat)) {
1336  // handle MCFG constraint
1337  QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat);
1338  const Rule &rule = rules.value(effCat).at(mcfgConstraint.first);
1339  /* check if we can (possibly - there could be other PMCFG constraints
1340  preventing it) produce epsilon with this rule */
1341  foreach (CatArg cati, rule) {
1342  if (!nullable.contains(cati)) return result;
1343  }
1344  Node node(effCat);
1345  node.children.first().setLabel(rule.label());
1346  result.append(Match(0, node, mcfgConstraint.first, scope,
1347  rule.nextTokenConstraints));
1348  foreach (CatArg cati, rule) {
1349  int s = result.size();
1350  for (int i=0; i<s; ) {
1351  Match &m = result[i];
1352  const QList<Match> componentMatches = matchToEpsilon(cati, m.scope);
1353  if (componentMatches.isEmpty()) {
1354  if (i) result.swap(0, i);
1355  result.removeFirst();
1356  s--;
1357  } else {
1358  int cs = componentMatches.size();
1359  for (int j=1; j<cs; j++) {
1360  const Match &cm = componentMatches.at(j);
1361  Node newTree = m.tree;
1362  newTree.children.first().append(cm.tree);
1363  result.append(Match(m.len + cm.len, newTree, m.ruleno,
1364  cm.scope, m.nextTokenConstraints));
1365  result.last().nextTokenConstraints.expect.append(
1367  result.last().nextTokenConstraints.taboo.append(
1369  }
1370  const Match &cm = componentMatches.first();
1371  m.len += cm.len;
1372  m.tree.children.first().append(cm.tree);
1373  m.scope = cm.scope;
1374  m.nextTokenConstraints.expect.append(
1377  i++;
1378  }
1379  }
1380  }
1381  } else { // no PMCFG constraints on this pseudo-category
1382  // We cannot cache the result here because the scopes can change.
1383  /* FIXME: Well, we could cache the result with a dummy scope and then
1384  compute the union. Is that worth it? */
1385  result = matchEffectiveCatToEpsilon(effCat, QSet<Cat>());
1386  }
1387 
1388  finalizeMatches(result, cat, scope);
1389  } else { // cat is a true category
1390  // memoize the matches for efficiency
1391  // FIXME: this will probably need to get smarter (or disabled) for
1392  // context-sensitive constraints
1393  if (epsilonMatches.contains(cat))
1394  result = epsilonMatches.value(cat);
1395  else {
1396  result = matchCFToEpsilon(cat, QSet<Cat>());
1397  epsilonMatches.insert(cat, result);
1398  }
1399  copyScope(result, scope);
1400  }
1401 
1402  return result;
1403 }
1404 
1406 bool Parser::matchesTokenRecurse(CatArg cat, CatArg token, QSet<Cat> mark) const
1407 {
1408  // Handle the trivial case first.
1409  if (isToken(cat)) return cat == token;
1410 
1411  // Now cat is a nonterminal.
1412  mark.insert(cat);
1413  foreach (const Rule &rule, rules.value(cat)) {
1414  // an epsilon rule cannot match a token
1415  if (rule.isEmpty()) continue;
1416  // for each element of the rule, try matching that element to the token and
1417  // the rest to epsilon
1418  int l = rule.size();
1419  for (int i=0; i<l; i++) {
1420  CatArg cati = rule.at(i);
1421  /* Left recursion cannot match a token if it doesn't match without the
1422  recursion because tokens are atomic. */
1423  if (mark.contains(cati)) continue; // ignore left recursion
1424  for (int j=0; j<l; j++) {
1425  if (i == j) continue;
1426  if (!nullable.contains(rule.at(j))) goto skip_this_i; // match epsilon
1427  }
1428  if (matchesTokenRecurse(cati, token, mark)) return true; // match token
1429 skip_this_i: ;
1430  }
1431  }
1432 
1433  // None of the rules matched.
1434  return false;
1435 }
1436 
1438 
1447 bool Parser::matchesToken(CatArg cat, CatArg token) const
1448 {
1449  return matchesTokenRecurse(cat, token, QSet<Cat>());
1450 }
1451 
1453 
1460 void Parser::collectLeaves(const Node &tree, QList<Node> &leaves)
1461 {
1462  if (isToken(tree.cat))
1463  leaves.append(tree);
1464  else {
1465  // consider only the first alternative - they must all match the same tokens
1466  foreach (const Node &node, tree.children.first())
1467  collectLeaves(node, leaves);
1468  }
1469 }
1470 
1472 bool Parser::validateNextTokenConstraints(CatArg token,
1473  const NextTokenConstraints &nextTokenConstraints) const
1474 {
1475  foreach(CatArg expect, nextTokenConstraints.expect)
1476  if (!matchesToken(expect, token)) return false;
1477  foreach(CatArg taboo, nextTokenConstraints.taboo)
1478  if (matchesToken(taboo, token)) return false;
1479  return true;
1480 }
1481 
1483 
1489 QList<Match> Parser::match(CatArg cat, int pos, const PseudoCatScope &scope,
1490  const StackItem &stack,
1491  const NextTokenConstraints &nextTokenConstraints)
1492 {
1493  QList<Match> result;
1494 
1495  if (scope.hasPConstraint(cat)) {
1496  // handle P constraint
1497  QPair<QPair<Node, NextTokenConstraints>, int> pConstraint
1498  = scope.pConstraint(cat);
1499  if (pConstraint.second) { // length != 0
1500  QList<Node> leaves;
1501  collectLeaves(pConstraint.first.first, leaves);
1502  /* check the next token constraints right now, it's no use keeping the
1503  type 6 item just to have the shift throw it away, and this way we don't
1504  have to keep 2 sets of next token constraints in the type 6 item (one
1505  for the first token and one for the next token after the last) */
1506  if (validateNextTokenConstraints(leaves.first().cat,
1507  nextTokenConstraints)) {
1508  StackItem type6(stack, leaves, 0, pConstraint.first.first, scope,
1509  pConstraint.first.second);
1510  nextStacks.append(type6);
1511  }
1512  } else { // length == 0
1513  NextTokenConstraints newNextTokenConstraints = nextTokenConstraints;
1514  newNextTokenConstraints.expect.append(pConstraint.first.second.expect);
1515  newNextTokenConstraints.taboo.append(pConstraint.first.second.taboo);
1516  result.append(Match(0, pConstraint.first.first, 0, scope,
1517  newNextTokenConstraints));
1518  }
1519  return result;
1520  }
1521 
1522  // if this is a pseudo-category, get the effective category (if this is not a
1523  // pseudo-category, effCat is cat itself)
1524  Cat effCat = effectiveCat(cat);
1525 
1526  if (scope.hasMcfgConstraint(cat)) {
1527  // handle MCFG constraint
1528  QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat);
1529  const Rule &rule = rules.value(effCat).at(mcfgConstraint.first);
1530 
1531  // perform a top-down expansion of rule
1532  {
1533  StackItem type5(stack, cat, scope);
1534  Node node(effCat);
1535  node.children.first().setLabel(rule.label());
1536  result = matchRemaining(rule, 0, pos, 0, node, mcfgConstraint.second,
1537  mcfgConstraint.first, type5,
1538  nextTokenConstraints);
1539  }
1540 
1541  finalizeMatches(result, cat, scope);
1542  } else { // no PMCFG constraints on this category or pseudo-category
1543  // try epsilon matches first
1544  if (nullable.contains(effCat)) {
1545  result = matchToEpsilon(cat, scope);
1546  if (!nextTokenConstraints.expect.isEmpty()
1547  || !nextTokenConstraints.taboo.isEmpty()) {
1548  int s = result.size();
1549  for (int i=0; i<s; i++) {
1550  NextTokenConstraints &newNextTokenConstraints
1551  = result[i].nextTokenConstraints;
1552  newNextTokenConstraints.expect.append(nextTokenConstraints.expect);
1553  newNextTokenConstraints.taboo.append(nextTokenConstraints.taboo);
1554  }
1555  }
1556  }
1557 
1558  // now we want a nonempty match
1559  // prepare the parse stack for the next shift
1560  /* We do not store the next token constraints in the type 0 item because
1561  the type 3 parent already carries them, and they can be different for the
1562  different type 3 parents in the DAG-structured stack. */
1563  if (stack.type() < 0) { // toplevel match
1564  StackItem type0(QList<StackItem>(), cat, effCat, pos, scope);
1565  nextStacks.append(type0);
1566  } else { // subordinate match, i.e. nonempty stack
1567  // memoize type 0 item indices to build a graph-structured stack
1568  if (scope.isNull() && type0Indices.contains(cat))
1569  nextStacks[type0Indices.value(cat)].addParent(stack);
1570  else {
1571  QList<StackItem> parents;
1572  parents << stack;
1573  StackItem type0(parents, cat, effCat, pos, scope);
1574  if (scope.isNull()) type0Indices.insert(cat, nextStacks.size());
1575  nextStacks.append(type0);
1576  }
1577  }
1578  }
1579 
1580  // return the result (only epsilon matches, the rest is deferred)
1581  return result;
1582 }
1583 
1585 
1589 QList<Match> Parser::matchRemaining(const Rule &rule, int len, int curr, int i,
1590  const Node &tree,
1591  const PseudoCatScope &scope, int ruleno,
1592  const StackItem &stack,
1593  const NextTokenConstraints
1594  &nextTokenConstraints)
1595 {
1596  QList<Match> result;
1597  if (i < rule.size()) {
1598  QList<Match> matches;
1599  {
1600  StackItem type3(stack, rule, len, curr, i, tree, ruleno,
1601  nextTokenConstraints);
1602  matches = match(rule.at(i), curr, scope, type3, nextTokenConstraints);
1603  }
1604  foreach (const Match &m, matches) {
1605  Node newTree = tree;
1606  newTree.children.first().append(m.tree);
1607  result.append(matchRemaining(rule, len+m.len, curr+m.len, i+1, newTree,
1608  m.scope, ruleno, stack,
1610  }
1611  } else {
1612  if (rule.action) {
1613  ActionInfo info(tree);
1614  rule.action->execute(info);
1615  }
1616  NextTokenConstraints newNextTokenConstraints = nextTokenConstraints;
1617  newNextTokenConstraints.expect.append(rule.nextTokenConstraints.expect);
1618  newNextTokenConstraints.taboo.append(rule.nextTokenConstraints.taboo);
1619  result.append(Match(len, tree, ruleno, scope, newNextTokenConstraints));
1620  }
1621  return result;
1622 }
1623 
1625 
1628 Cat Parser::findFirstToken(const Node &tree)
1629 {
1630  CatArg cat = tree.cat;
1631  if (isToken(cat))
1632  return cat;
1633  else {
1634  // consider only the first alternative - they must all match the same tokens
1635  foreach (const Node &node, tree.children.first()) {
1636  Cat result = findFirstToken(node);
1637  if (!IS_EPSILON(result)) return result;
1638  }
1639  return Cat(); // This tree matches epsilon.
1640  }
1641 }
1642 
1645 void Parser::unify(QList<Match> &matches)
1646 {
1647  int l = matches.size();
1648  if (l > 1) {
1649  /* For plain CFGs, we'd only need to match length and category here. We add
1650  the rule number and the scope for PMCFG matches and the next token
1651  constraints for scannerless parsing support. */
1652  QHash<QPair<QPair<QPair<int, Cat>, QPair<int, PseudoCatScope> >,
1653  NextTokenConstraints>, int>
1654  indexOfLenCat;
1655  {
1656  const Match &m = matches.first();
1657  indexOfLenCat.insert(qMakePair(qMakePair(qMakePair(m.len, m.tree.cat),
1658  qMakePair(m.ruleno, m.scope)),
1659  m.nextTokenConstraints), 0);
1660  }
1661  for (int i=1; i<l; ) {
1662  const Match &m = matches.at(i);
1663  QPair<QPair<QPair<int, Cat>, QPair<int, PseudoCatScope> >,
1664  NextTokenConstraints> key(qMakePair(qMakePair(m.len, m.tree.cat),
1665  qMakePair(m.ruleno, m.scope)),
1667  if (indexOfLenCat.contains(key)) {
1668  matches[indexOfLenCat.value(key)].tree.children.append(m.tree.children);
1669  matches.removeAt(i);
1670  l--;
1671  } else indexOfLenCat.insert(key, i++);
1672  }
1673  }
1674 }
1675 
1678 
1686 QList<Match> Parser::reduce(CatArg cat, CatArg target, int pos, int len,
1687  const Node &tree, const StackItem &stack,
1688  const PseudoCatScope &scope, int ruleno,
1689  const NextTokenConstraints &nextTokenConstraints,
1690  QSet<Cat> mark)
1691 {
1692  QList<Match> result;
1693  if (cat == target) result.append(Match(len, tree, ruleno, scope,
1694  nextTokenConstraints));
1695  QList<Match> matches;
1696  {
1697  StackItem type4(stack, target, pos, len);
1698  // for each rule to get towards the target category
1699  foreach (const FullRule &rule, neighborhood(cat, target)) {
1700  QList<Match> currentMatches;
1701  Node node(rule.cat);
1702  node.children.first().setLabel(rule.rule.label());
1703  int epsilonsSkipped = rule.epsilonsSkipped;
1704  // if we are reducing a pseudo-category, set the correct scope
1705  PseudoCatScope newScope;
1706  CatArg pseudoCat = rule.rule.at(epsilonsSkipped);
1707  if (pseudoCat != cat) {
1708  newScope.pConstraints().insert(pseudoCat,
1709  qMakePair(qMakePair(tree, nextTokenConstraints), len));
1710  foreach (CatArg cati, pseudoCats.value(pseudoCat).second)
1711  newScope.mcfgConstraints().insert(cati, qMakePair(ruleno, scope));
1712  }
1713  currentMatches.append(Match(0, node, rule.ruleno, newScope,
1714  nextTokenConstraints));
1715  /* Find the first token in the tree so we can validate the next token
1716  constraints of the epsilon matches. If we don't have epsilons skipped,
1717  don't bother doing this work.
1718  Also note that tree cannot be an epsilon tree because we never reduce
1719  epsilon. */
1720  Cat firstToken = Cat();
1721  if (epsilonsSkipped)
1722  firstToken = findFirstToken(tree);
1723  // match first items to epsilon
1724  for (int k=0; k<epsilonsSkipped; k++) {
1725  int s = currentMatches.size();
1726  for (int i=0; i<s; ) {
1727  Match &m = currentMatches[i];
1728  QList<Match> componentMatches = matchToEpsilon(rule.rule.at(k),
1729  m.scope);
1730  /* Here, we need to validate the next token constraints in our epsilon
1731  matches against the first token of the tree we're reducing. */
1732  /* FIXME: Would it be more efficient to do this in neighborhood? It'd
1733  help prediction in some (rare?) cases, too. But it'd make
1734  neighborhoods dependent on the first token, which hurts
1735  caching badly. */
1736  {
1737  int cs = componentMatches.size();
1738  for (int j=0; j<cs; ) {
1739  if (validateNextTokenConstraints(firstToken,
1740  componentMatches.at(j).nextTokenConstraints)) j++; else {
1741  if (j) componentMatches.swap(0, j);
1742  componentMatches.removeFirst();
1743  cs--;
1744  }
1745  }
1746  }
1747  if (componentMatches.isEmpty()) {
1748  if (i) currentMatches.swap(0, i);
1749  currentMatches.removeFirst();
1750  s--;
1751  } else {
1752  int cs = componentMatches.size();
1753  for (int j=1; j<cs; j++) {
1754  const Match &cm = componentMatches.at(j);
1755  Node newTree = m.tree;
1756  newTree.children.first().append(cm.tree);
1757  currentMatches.append(Match(m.len + cm.len, newTree, m.ruleno,
1758  cm.scope, m.nextTokenConstraints));
1759  }
1760  const Match &cm
1761  = static_cast<const QList<Match> &>(componentMatches).first();
1762  m.len += cm.len;
1763  m.tree.children.first().append(cm.tree);
1764  m.scope = cm.scope;
1765  i++;
1766  }
1767  }
1768  }
1769  foreach (const Match &m, currentMatches) {
1770  Node newTree = m.tree;
1771  newTree.children.first().append(tree);
1772  // match remaining rule items
1773  matches.append(matchRemaining(rule.rule, 0, pos, epsilonsSkipped+1,
1774  newTree, m.scope, m.ruleno, type4,
1776  }
1777  }
1778  }
1779  // unify matches to a shared representation to avoid needless bifurcation
1780  unify(matches);
1781  {
1782  StackItem type2(stack, 0);
1783  foreach (const Match &m, matches) {
1784  Cat newCat = m.tree.cat;
1785  if (m.len)
1786  qFatal("length of a reduction increased in a non-shift codepath");
1787  // reduction with unchanged length
1788  // avoid infinite loops (if we have epsilon productions or redundant
1789  // X->...->X rules)
1790  mark.insert(cat);
1791  if (mark.contains(newCat)) continue;
1792  // now reduce the rule
1793  result.append(reduce(newCat, target, pos, len, m.tree, type2, m.scope,
1794  m.ruleno, m.nextTokenConstraints, mark));
1795  }
1796  }
1797  // do another unification pass on the result
1798  unify(result);
1799  return result;
1800 }
1801 
1803 
1806 QList<Match> Parser::processStackItem(const StackItem &item,
1807  const QList<Match> &matches)
1808 {
1809  StackItem nextItem = item;
1810  QList<Match> nextMatches = matches;
1811  tailcall: {
1812  const StackItem &item = nextItem;
1813  const QList<Match> &matches = nextMatches;
1814  // Do not bother proceeding any further with an empty list of matches, as
1815  // the result will necessarily be empty as well.
1816  if (matches.isEmpty()) return matches;
1817  switch (item.type()) {
1818  case 0:
1819  qFatal("type 0 items not supported by processStackItem");
1820  case 1:
1821  {
1822  const StackItemType1 *data
1823  = static_cast<const StackItemType1 *>(item.data());
1824  const QList<StackItem> &parents = data->parents();
1825  Cat cat = data->cat();
1826  Cat effCat = data->effCat();
1827  PseudoCatScope scope = data->scope();
1828 
1829  if (effCat != cat) // cat is a pseudo-category
1830  finalizeMatches(nextMatches, cat, scope);
1831  else
1832  copyScope(nextMatches, scope);
1833 
1834  if (parents.isEmpty()) // toplevel item
1835  return nextMatches;
1836  else if (parents.count() == 1) {
1837  nextItem = parents.first();
1838  goto tailcall;
1839  } else {
1840  branched = true;
1841  QList<Match> result;
1842  foreach (const StackItem &parent, parents)
1843  result.append(processStackItem(parent, nextMatches));
1844  return result;
1845  }
1846  }
1847  break;
1848  case 2:
1849  {
1850  const StackItemType2 *data
1851  = static_cast<const StackItemType2 *>(item.data());
1852  const StackItem &parent = data->parent();
1853 
1854  // skip unification if we know there is nothing further coming to this
1855  // unification point
1856  bool present;
1857  if (branched)
1858  present = collectedMatches.contains(data);
1859  else {
1860  if (priorityQueue.isEmpty()) {
1861  type_2_skip:
1862  nextItem = parent;
1863  goto tailcall;
1864  }
1865  int headDepth = priorityQueue.head().depth();
1866  int depth = data->depth();
1867  if (headDepth < depth) goto type_2_skip;
1868  if (headDepth == depth) {
1869  present = collectedMatches.contains(data);
1870  if (!present) goto type_2_skip;
1871  } else present = collectedMatches.contains(data);
1872  }
1873 
1874  // unify matches, deferring processing
1875  if (present) {
1876  QPair<bool, QList<Match> > &entry = collectedMatches[data];
1877  entry.first = true;
1878  entry.second.append(matches);
1879  } else {
1880  collectedMatches[data] = qMakePair(false, matches);
1881  priorityQueue.enqueue(item);
1882  }
1883  return QList<Match>();
1884  }
1885  break;
1886  case 3:
1887  {
1888  const StackItemType3 *data
1889  = static_cast<const StackItemType3 *>(item.data());
1890  const StackItem &parent = data->parent();
1891  Rule rule = data->rule();
1892  int len = data->len();
1893  int curr = data->curr();
1894  int i = data->i();
1895  Node tree = data->tree();
1896  int ruleno = data->ruleno();
1897 
1898  QList<Match> result;
1899  foreach (const Match &m, matches) {
1900  Node newTree = tree;
1901  newTree.children.first().append(m.tree);
1902  result.append(matchRemaining(rule, len+m.len, curr+m.len, i+1,
1903  newTree, m.scope, ruleno, parent,
1905  }
1906 
1907  nextMatches = result;
1908  nextItem = parent;
1909  goto tailcall;
1910  }
1911  break;
1912  case 4:
1913  {
1914  const StackItemType4 *data
1915  = static_cast<const StackItemType4 *>(item.data());
1916  const StackItem &parent = data->parent();
1917  Cat target = data->target();
1918  int pos = data->pos();
1919  int len = data->len();
1920 
1921  // skip unification if we know there is nothing further coming to this
1922  // unification point
1923  bool present;
1924  if (branched)
1925  present = collectedMatches.contains(data);
1926  else {
1927  if (priorityQueue.isEmpty()) {
1928  type_4_skip:
1929  QList<Match> result;
1930  {
1931  StackItem type2(parent, 0);
1932  foreach (const Match &m, matches) {
1933  Cat newCat = m.tree.cat;
1934  if (!m.len)
1935  qFatal("reduction with unchanged length was deferred");
1936  // the length increased, reduce the rule, resetting mark
1937  result.append(reduce(newCat, target, pos+m.len, len+m.len,
1938  m.tree, type2, m.scope, m.ruleno,
1940  }
1941  }
1942  // do another unification pass on the result
1943  unify(result);
1944 
1945  nextMatches = result;
1946  nextItem = parent;
1947  goto tailcall;
1948  }
1949  int headDepth = priorityQueue.head().depth();
1950  int depth = data->depth();
1951  if (headDepth < depth) goto type_4_skip;
1952  if (headDepth == depth) {
1953  present = collectedMatches.contains(data);
1954  if (!present) goto type_4_skip;
1955  } else present = collectedMatches.contains(data);
1956  }
1957 
1958  // unify matches, deferring processing
1959  if (present) {
1960  QPair<bool, QList<Match> > &entry = collectedMatches[data];
1961  entry.first = true;
1962  entry.second.append(matches);
1963  } else {
1964  collectedMatches[data] = qMakePair(false, matches);
1965  priorityQueue.enqueue(item);
1966  }
1967  return QList<Match>();
1968  }
1969  case 5:
1970  {
1971  const StackItemType5 *data
1972  = static_cast<const StackItemType5 *>(item.data());
1973  const StackItem &parent = data->parent();
1974  Cat cat = data->cat();
1975  PseudoCatScope scope = data->scope();
1976 
1977  finalizeMatches(nextMatches, cat, scope);
1978 
1979  nextItem = parent;
1980  goto tailcall;
1981  }
1982  break;
1983  case 6:
1984  qFatal("type 6 items not supported by processStackItem");
1985  default:
1986  qFatal("invalid stack item type");
1987  }
1988  }
1989 }
1990 
1992 
1999 QList<Match> Parser::processStack(const StackItem &stack, CatArg token)
2000 {
2001  branched = false; // reset branched flag
2002  switch (stack.type()) {
2003  // type 0 and 6 are toplevel items
2004  case 0:
2005  {
2006  const StackItemType0 *data
2007  = static_cast<const StackItemType0 *>(stack.data());
2008  const QList<StackItem> &parents = data->parents();
2009  Cat cat = data->cat();
2010  Cat effCat = data->effCat();
2011  int pos = data->pos();
2012  PseudoCatScope scope = data->scope();
2013  QList<Match> matches;
2014 
2015  // type 0 item: finish processing of match after a shift
2016  if (isToken(cat)) {
2017  if (token == cat) matches.append(Match(1, inputSource->parseTree(), 0,
2018  scope));
2019  } else {
2020  // reduce the matched token (and possibly additional tokens, with
2021  // deferred processing) to the target category
2022  StackItem type1(parents, cat, effCat, scope);
2023  // The token carries no next token constraints.
2024  matches = reduce(token, effCat, pos, 1, inputSource->parseTree(),
2025  type1, PseudoCatScope(), 0, NextTokenConstraints());
2026 
2027  if (effCat != cat) // cat is a pseudo-category
2028  finalizeMatches(matches, cat, scope);
2029  else
2030  copyScope(matches, scope);
2031  }
2032 
2033  if (parents.isEmpty()) // toplevel item
2034  return matches;
2035  else {
2036  QList<Match> result;
2037  if (parents.count() > 1) branched = true;
2038  foreach (const StackItem &parent, parents)
2039  result.append(processStackItem(parent, matches));
2040  return result;
2041  }
2042  }
2043  case 6:
2044  {
2045  const StackItemType6 *data
2046  = static_cast<const StackItemType6 *>(stack.data());
2047  const StackItem &parent = data->parent();
2048  QList<Node> leaves = data->leaves();
2049  int i = data->i();
2050  Node tree = data->tree();
2051  PseudoCatScope scope = data->scope();
2052  NextTokenConstraints nextTokenConstraints
2053  = data->nextTokenConstraints();
2054  QList<Match> matches;
2055 
2056  // type 6 item: finish processing of a P constraint
2057  // match the i-th leaf against the shifted token
2058  if (inputSource->matchParseTree(leaves.at(i++))) {
2059  int l = leaves.size();
2060  if (i == l) // we're done matching the constraint
2061  matches.append(Match(l, tree, 0, scope, nextTokenConstraints));
2062  else { // not done yet, create a new type 6 item
2063  StackItem type6(parent, leaves, i, tree, scope,
2064  nextTokenConstraints);
2065  nextStacks.append(type6);
2066  }
2067  }
2068 
2069  return processStackItem(parent, matches);
2070  }
2071  // case 2 and 4 are unification points, so look for matches collected so far
2072  // and process them
2073  case 2:
2074  case 4:
2075  {
2076  // We use take (= value + remove) instead of value here to ensure that
2077  // processStackItem will not send us back here.
2078  QPair<bool, QList<Match> > entry = collectedMatches.take(stack.data());
2079  if (entry.first) // need unification
2080  unify(entry.second);
2081  return processStackItem(stack, entry.second);
2082  }
2083  default:
2084  qFatal("invalid stack (expected toplevel item or unification point)");
2085  }
2086 }
2087 
2089 
2094 bool Parser::shift(int pos)
2095 {
2096  if (!inputSource->rewindTo(pos, currentLexerState)) {
2097  qWarning("invalid input position");
2098  return false;
2099  }
2100  Cat token = inputSource->nextToken();
2101  if (IS_EPSILON(token))
2102  return false;
2103 
2104  // delete all the stacks which don't satisfy the next token constraints
2105  {
2106  int l = nextStacks.size();
2107  for (int i=0; i<l; ) {
2108  StackItem &stack = nextStacks[i];
2109  // type 6 items are already filtered for next token constraints
2110  if (stack.type()) i++; else {
2111  const StackItemType0 *data
2112  = static_cast<const StackItemType0 *>(stack.data());
2113  QList<StackItem> parents = data->parents();
2114  // no parents = toplevel item, skip processing to avoid killing this
2115  if (parents.empty()) i++; else {
2116  int n = parents.size();
2117  for (int j=0; j<n; ) {
2118  const StackItem &parent = parents.at(j);
2119  const StackItemType3 *parentData
2120  = static_cast<const StackItemType3 *>(parent.data());
2121  if (validateNextTokenConstraints(token,
2122  parentData->nextTokenConstraints())) j++; else {
2123  if (j) parents.swap(0, j);
2124  parents.removeFirst();
2125  n--;
2126  }
2127  }
2128  // no parents left, kill the type 0 item
2129  if (parents.isEmpty()) {
2130  if (i) nextStacks.swap(0, i);
2131  nextStacks.removeFirst();
2132  l--;
2133  } else {
2134  stack.setParents(parents);
2135  i++;
2136  }
2137  }
2138  }
2139  }
2140  }
2141 
2142  // if no more stacks, set errPos and errToken and return false
2143  // (this happens if we had a valid complete parse, but no continuations, and
2144  // now there's extra input)
2145  if (nextStacks.isEmpty()) {
2146  errPos = pos;
2147  errToken = token;
2148  return false;
2149  }
2150 
2151  // process stack
2152  currentMatches.clear();
2153  QList<StackItem> currentStacks = nextStacks;
2154  nextStacks.clear();
2155  priorityQueue = Private::PriorityQueue<StackItem>(currentStacks);
2156  while (!priorityQueue.isEmpty())
2157  currentMatches.append(processStack(priorityQueue.dequeue(), token));
2158  type0Indices.clear();
2159 
2160  // if no more stacks and no current matches, set errPos and errToken, restore
2161  // the previous nextStacks to allow running prediction on them and return
2162  // false
2163  if (nextStacks.isEmpty() && currentMatches.isEmpty()) {
2164  errPos = pos;
2165  errToken = token;
2166  nextStacks = currentStacks;
2167  return false;
2168  }
2169 
2170  currentLexerState = inputSource->saveState();
2171  return true;
2172 }
2173 
2175 
2250 QList<Match> Parser::parse(int *errorPos, Cat *errorToken, int *incrementalPos,
2251  QList<StackItem> *incrementalStacks,
2252  QList<Match> *incrementalMatches,
2253  LexerState *lexerState)
2254 {
2255  int pos = 0;
2256 
2257  if (!incrementalPos || *incrementalPos < 0) // start a new parse
2258  currentMatches = match(startCat, 0, PseudoCatScope(), StackItem(),
2260  else { // continue an incremental parse
2261  pos = *incrementalPos;
2262  if (incrementalMatches) currentMatches = *incrementalMatches;
2263  if (incrementalStacks) nextStacks = *incrementalStacks;
2264  }
2265 
2266  if (lexerState)
2267  currentLexerState = *lexerState;
2268  else
2269  currentLexerState.clear();
2270 
2271  // shift tokens while possible
2272  errPos = -1;
2273  while (shift(pos)) pos++;
2274 
2275  // error handling
2276  if (errPos >= 0) { // parse error
2277  if (errorPos) *errorPos = errPos;
2278  if (errorToken) *errorToken = errToken;
2279  if (incrementalPos) *incrementalPos = errPos;
2280  if (incrementalStacks) *incrementalStacks = nextStacks;
2281  if (incrementalMatches) incrementalMatches->clear();
2282  if (lexerState) *lexerState = currentLexerState;
2283  errPos = -1;
2284  errToken = Cat();
2285  nextStacks.clear();
2286  currentMatches.clear();
2287  currentLexerState.clear();
2288  return QList<Match>();
2289  }
2290  if (errorPos) *errorPos = -1;
2291  if (errorToken) *errorToken = Cat();
2292 
2293  // filter all the matches which have expect-type next token constraints
2294  {
2295  int s = currentMatches.size();
2296  for (int i=0; i<s; ) {
2297  Match &m = currentMatches[i];
2298  if (m.nextTokenConstraints.expect.isEmpty()) i++; else {
2299  if (i) currentMatches.swap(0, i);
2300  currentMatches.removeFirst();
2301  s--;
2302  }
2303  }
2304  }
2305 
2306  // incremental parsing
2307  if (incrementalPos) *incrementalPos = pos;
2308  if (incrementalMatches) *incrementalMatches = currentMatches;
2309  if (incrementalStacks) *incrementalStacks = nextStacks;
2310  if (lexerState) *lexerState = currentLexerState;
2311  nextStacks.clear();
2312  currentLexerState.clear();
2313 
2314  // accept current matches
2315  QList<Match> result = currentMatches;
2316  currentMatches.clear();
2317  return result;
2318 }
2319 
2322 
2329 {
2330  Predictions predict;
2331  foreach (const StackItem &stack, stacks) {
2332  if (stack.type()) { // type 6 item
2333  const StackItemType6 *data
2334  = static_cast<const StackItemType6 *>(stack.data());
2335  predict.insert(data->leaves().at(data->i()).cat);
2336  } else // type 0 item
2337  predict.insert(static_cast<const StackItemType0 *>(stack.data())
2338  ->effCat());
2339  }
2340  return predict;
2341 }
2342 
2344 void Parser::expandNonterminalPredictionRecurse(CatArg cat,
2345  QHash<Cat, QSet<Cat> > &result,
2346  QSet<Cat> &mark) const
2347 {
2348  mark.insert(cat);
2349  foreach (const Rule &rule, rules.value(cat)) {
2350  QListIterator<Cat> i(rule);
2351  /* For each nonempty rule (empty rules are not interesting for prediction),
2352  we handle at least the first item in the rule as a prediction: if it's a
2353  token, it is our prediction and the corresponding nonterminal is this
2354  category; if it's a nonterminal, we need to process the prediction
2355  recursively, unless it is already marked, indicating direct or indirect
2356  left recursion (and allowing us to stop) or the repetition of an already
2357  tried possibility (also allowing us to stop, since we do not care about
2358  parse trees here, only the last category). Then, as long as the first i
2359  categories are nullable and the (i+1)th category exists, we repeat the
2360  process for the (i+1)th category. */
2361  while (i.hasNext()) {
2362  Cat cati = effectiveCat(i.next());
2363  if (isToken(cati)) {
2364  result[cat].insert(cati); // record nonterminal and token
2365  break; // tokens are not nullable
2366  }
2367  // now cati is a nonterminal
2368  if (!mark.contains(cati))
2369  expandNonterminalPredictionRecurse(cati, result, mark);
2370  if (!nullable.contains(cati)) break;
2371  }
2372  }
2373 }
2374 
2378 
2388 QHash<Cat, QSet<Cat> > Parser::expandNonterminalPrediction(CatArg cat) const
2389 {
2390  QHash<Cat, QSet<Cat> > result;
2391  QSet<Cat> mark;
2392  if (isToken(cat))
2393  qWarning("trying to expand terminal prediction");
2394  else
2395  expandNonterminalPredictionRecurse(cat, result, mark);
2396  return result;
2397 }
2398 
2400 void Parser::expandNonterminalPredictionRecurseC(CatArg cat,
2401  QHash<Cat, QSet<Cat> > &result,
2402  QSet<Cat> mark, int ruleno,
2403  const PseudoCatScope &scope,
2404  const NextTokenConstraints
2405  &nextTokenConstraints)
2406 {
2407  mark.insert(cat);
2408  QList<Rule> ruleList = rules.value(cat);
2409  int firstRule = ruleno >= 0 ? ruleno : 0;
2410  int lastRule = ruleno >= 0 ? ruleno : ruleList.size() - 1;
2411  for (int currRule = firstRule; currRule <= lastRule; currRule++) {
2412  const Rule &rule = ruleList.at(currRule);
2413  QListIterator<Cat> i(rule);
2414  /* For each nonempty rule (empty rules are not interesting for prediction),
2415  we handle at least the first item in the rule as a prediction: if it's a
2416  token, it is our prediction and the corresponding nonterminal is this
2417  category; if it's a nonterminal, we need to process the prediction
2418  recursively, unless it is already marked, indicating direct or indirect
2419  left recursion (and allowing us to stop). Then, as long as the first i
2420  categories are nullable and the (i+1)th category exists, we repeat the
2421  process for the (i+1)th category.
2422  We optimize the common case of no epsilons to skip because we do not have
2423  to consider context-sensitive constraints in that case. */
2424  Cat cat1 = Cat();
2425  if (i.hasNext()) {
2426  cat1 = i.next();
2427  Cat effCat1 = effectiveCat(cat1);
2428  if (isToken(effCat1)) {
2429  if (validateNextTokenConstraints(effCat1, nextTokenConstraints))
2430  result[cat].insert(effCat1); // record nonterminal and token
2431  continue; // tokens are not nullable
2432  }
2433  // now effCat1 is a nonterminal
2434  /* If we have a P-constraint here, that constraint forces the category to
2435  be epsilon (because it must be identical to something already matched,
2436  which is epsilon). So don't predict inside it. */
2437  if (!mark.contains(effCat1)
2438  && !scope.hasPConstraint(cat1)) {
2439  int mcfgRuleno = -1;
2440  PseudoCatScope mcfgScope;
2441  if (scope.hasMcfgConstraint(cat1)) {
2442  QPair<int, PseudoCatScope> mcfgConstraint
2443  = scope.mcfgConstraint(cat1);
2444  mcfgRuleno = mcfgConstraint.first;
2445  mcfgScope = mcfgConstraint.second;
2446  }
2447  expandNonterminalPredictionRecurseC(effCat1, result, mark, mcfgRuleno,
2448  mcfgScope, nextTokenConstraints);
2449  }
2450  if (!nullable.contains(effCat1)) continue;
2451  }
2452  {
2453  QList<Match> currentMatches;
2454  // match cat(i-1) to epsilon first
2455  /* FIXME: We discard the parse trees here. Would it be more efficient
2456  to use a variant of matchToEpsilon which doesn't build them?
2457  OTOH, this way, we can reuse the matchToEpsilon cache. */
2458  const QList<Match> componentMatches = matchToEpsilon(cat1, scope);
2459  if (componentMatches.isEmpty())
2460  continue; // We didn't match epsilon, so we're done with this rule.
2461  /* If we have one version without context-sensitive constraints, the
2462  others are redundant. */
2463  bool haveUnconstrained = false;
2464  foreach (const Match &cm, componentMatches) {
2465  if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
2466  && cm.nextTokenConstraints.taboo.isEmpty()) {
2467  haveUnconstrained = true;
2468  break;
2469  }
2470  }
2471  if (haveUnconstrained)
2472  currentMatches.append(Match(0, Node(), 0, scope, nextTokenConstraints));
2473  else {
2474  foreach (const Match &cm, componentMatches) {
2475  currentMatches.append(Match(0, Node(), 0, cm.scope,
2476  nextTokenConstraints));
2477  if (!cm.nextTokenConstraints.expect.isEmpty())
2478  currentMatches.last().nextTokenConstraints.expect.append(
2480  if (!cm.nextTokenConstraints.taboo.isEmpty())
2481  currentMatches.last().nextTokenConstraints.taboo.append(
2483  }
2484  }
2485  while (i.hasNext()) {
2486  // now update cati for the new i
2487  Cat cati = i.next();
2488  Cat effCati = effectiveCat(cati);
2489  if (isToken(effCati)) {
2490  bool nextTokenConstraintsPass = false;
2491  foreach (const Match &m, currentMatches)
2492  if (validateNextTokenConstraints(effCati, m.nextTokenConstraints)) {
2493  nextTokenConstraintsPass = true;
2494  break;
2495  }
2496  if (nextTokenConstraintsPass)
2497  result[cat].insert(effCati); // record nonterminal and token
2498  break; // tokens are not nullable
2499  }
2500  // now effCati is a nonterminal
2501  if (!mark.contains(effCati)) {
2502  foreach (const Match &m, currentMatches)
2503  /* If we have a P-constraint here, that constraint forces the
2504  category to be epsilon (because it must be identical to something
2505  already matched, which is epsilon). So don't predict inside it.
2506  */
2507  if (!m.scope.hasPConstraint(cati)) {
2508  int mcfgRuleno = -1;
2509  PseudoCatScope mcfgScope;
2510  if (m.scope.hasMcfgConstraint(cati)) {
2511  QPair<int, PseudoCatScope> mcfgConstraint
2512  = m.scope.mcfgConstraint(cati);
2513  mcfgRuleno = mcfgConstraint.first;
2514  mcfgScope = mcfgConstraint.second;
2515  }
2516  expandNonterminalPredictionRecurseC(effCati, result, mark,
2517  mcfgRuleno, mcfgScope,
2519  }
2520  }
2521  if (!nullable.contains(effCati)) break;
2522  int s = currentMatches.size();
2523  for (int j=0; j<s; ) {
2524  Match &m = currentMatches[j];
2525  /* FIXME: We discard the parse trees here. Would it be more efficient
2526  to use a variant of matchToEpsilon which doesn't build them?
2527  OTOH, this way, we can reuse the matchToEpsilon cache. */
2528  const QList<Match> componentMatches = matchToEpsilon(effCati,
2529  m.scope);
2530  if (componentMatches.isEmpty()) {
2531  if (j) currentMatches.swap(0, j);
2532  currentMatches.removeFirst();
2533  s--;
2534  } else {
2535  int cs = componentMatches.size();
2536  /* If we have one version without context-sensitive constraints, the
2537  others are redundant. */
2538  bool haveUnconstrained = false;
2539  for (int k=0; k<cs; k++) {
2540  const Match &cm = componentMatches.at(k);
2541  if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
2542  && cm.nextTokenConstraints.taboo.isEmpty()) {
2543  haveUnconstrained = true;
2544  break;
2545  }
2546  }
2547  if (!haveUnconstrained) {
2548  for (int k=1; k<cs; k++) {
2549  const Match &cm = componentMatches.at(k);
2550  currentMatches.append(Match(0, Node(), 0, cm.scope,
2552  if (!cm.nextTokenConstraints.expect.isEmpty())
2553  currentMatches.last().nextTokenConstraints.expect.append(
2555  if (!cm.nextTokenConstraints.taboo.isEmpty())
2556  currentMatches.last().nextTokenConstraints.taboo.append(
2558  }
2559  const Match &cm = componentMatches.first();
2560  m.scope = cm.scope;
2561  if (!cm.nextTokenConstraints.expect.isEmpty())
2562  m.nextTokenConstraints.expect.append(
2564  if (!cm.nextTokenConstraints.taboo.isEmpty())
2565  m.nextTokenConstraints.taboo.append(
2567  }
2568  j++;
2569  }
2570  }
2571  if (currentMatches.isEmpty())
2572  break; // We didn't match epsilon, so we're done with this rule.
2573  }
2574  }
2575  }
2576 }
2577 
2581 
2588 {
2589  QHash<Cat, QSet<Cat> > result;
2590  if (isToken(cat))
2591  qWarning("trying to expand terminal prediction");
2592  else
2593  expandNonterminalPredictionRecurseC(cat, result, QSet<Cat>(), -1,
2594  PseudoCatScope(),
2596  return result;
2597 }
2598 
2601 
2620  const
2621 {
2622  MultiPredictions predictMulti;
2623  foreach (const StackItem &stack, stacks) {
2624  if (stack.type()) { // type 6 item
2625  /* We will define the literal as the full string of the P constraint here,
2626  which is very easy to obtain. This might not strictly be a literal in
2627  the sense of the definition, since it could be obtained from a whole
2628  tree of nonterminals, but it definitely always contains the literal (in
2629  the strict sense) containing the predicted category. */
2630  const StackItemType6 *data
2631  = static_cast<const StackItemType6 *>(stack.data());
2632  QList<Node> leaves = data->leaves();
2633  int i = data->i();
2634  QList<Cat> prediction, literal;
2635  int l = leaves.size();
2636  for (int j=i; j<l; j++)
2637  prediction.append(leaves.at(j).cat);
2638  literal = prediction;
2639  for (int j=i-1; j>=0; j--)
2640  literal.prepend(leaves.at(j).cat);
2641  MultiPrediction multiPrediction(literal, data->tree().cat);
2642  if (!predictMulti.contains(prediction, multiPrediction))
2643  predictMulti.insert(prediction, multiPrediction);
2644  } else { // type 0 item
2645  const StackItemType0 *data
2646  = static_cast<const StackItemType0 *>(stack.data());
2647  Cat cat = data->effCat();
2648  if (isToken(cat)) {
2649  const QList<StackItem> &parents = data->parents();
2650  if (parents.isEmpty()) { // huh, start category is a terminal?
2651  QList<Cat> list;
2652  list << cat;
2653  MultiPrediction multiPrediction(list, cat);
2654  if (!predictMulti.contains(list, multiPrediction))
2655  predictMulti.insert(list, multiPrediction);
2656  } else {
2657  foreach (const StackItem &parent, parents) {
2658  const StackItemType3 *parentData
2659  = static_cast<const StackItemType3 *>(parent.data());
2660  Rule rule = parentData->rule();
2661  int i = parentData->i();
2662  int len = 1;
2663  while (i+len < rule.size() && isToken(rule.at(i+len))) len++;
2664  QList<Cat> literal = rule.mid(i, len);
2665  while (i > 0 && isToken(rule.at(i-1))) i--, len++;
2666  MultiPrediction multiPrediction(rule.mid(i, len),
2667  parentData->tree().cat);
2668  if (!predictMulti.contains(literal, multiPrediction))
2669  predictMulti.insert(literal, multiPrediction);
2670  }
2671  }
2672  } else {
2673  QList<Cat> list;
2674  list << cat;
2675  MultiPrediction multiPrediction(list, cat);
2676  if (!predictMulti.contains(list, multiPrediction))
2677  predictMulti.insert(list, multiPrediction);
2678  }
2679  }
2680  }
2681  return predictMulti;
2682 }
2683 
2685 void Parser::expandNonterminalPredictionMultiRecurse(CatArg cat,
2686  QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> &mark) const
2687 {
2688  mark.insert(cat);
2689  foreach (const Rule &rule, rules.value(cat)) {
2690  int l = rule.size();
2691  /* For each nonempty rule (empty rules are not interesting for prediction),
2692  we handle at least the first item in the rule as a prediction: if it's a
2693  token, the literal starting with that token is our prediction and the
2694  corresponding nonterminal is this category; if it's a nonterminal, we
2695  need to process the prediction recursively, unless it is already marked,
2696  indicating direct or indirect left recursion (and allowing us to stop) or
2697  the repetition of an already tried possibility (also allowing us to stop,
2698  since we do not care about parse trees here, only the last category).
2699  Then, as long as the first i categories are nullable and the (i+1)th
2700  category exists, we repeat the process for the (i+1)th category. */
2701  for (int i = 0; i < l; i++) {
2702  Cat cati = effectiveCat(rule.at(i));
2703  if (isToken(cati)) {
2704  int len = 1;
2705  while (i+len < l && isToken(rule.at(i+len))) len++;
2706  result[cat].insert(rule.mid(i, len)); // record nonterminal and literal
2707  break; // tokens are not nullable
2708  }
2709  // now cati is a nonterminal
2710  if (!mark.contains(cati))
2711  expandNonterminalPredictionMultiRecurse(cati, result, mark);
2712  if (!nullable.contains(cati)) break;
2713  }
2714  }
2715 }
2716 
2720 
2730 QHash<Cat, QSet<QList<Cat> > >
2732 {
2733  QHash<Cat, QSet<QList<Cat> > > result;
2734  QSet<Cat> mark;
2735  if (isToken(cat))
2736  qWarning("trying to expand terminal prediction");
2737  else
2738  expandNonterminalPredictionMultiRecurse(cat, result, mark);
2739  return result;
2740 }
2741 
2743 void Parser::expandNonterminalPredictionMultiRecurseC(CatArg cat,
2744  QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> mark, int ruleno,
2745  const PseudoCatScope &scope, const NextTokenConstraints &nextTokenConstraints)
2746 {
2747  mark.insert(cat);
2748  QList<Rule> ruleList = rules.value(cat);
2749  int firstRule = ruleno >= 0 ? ruleno : 0;
2750  int lastRule = ruleno >= 0 ? ruleno : ruleList.size() - 1;
2751  for (int currRule = firstRule; currRule <= lastRule; currRule++) {
2752  const Rule &rule = ruleList.at(currRule);
2753  int l = rule.size();
2754  /* For each nonempty rule (empty rules are not interesting for prediction),
2755  we handle at least the first item in the rule as a prediction: if it's a
2756  token, the literal starting with that token is our prediction and the
2757  corresponding nonterminal is this category; if it's a nonterminal, we
2758  need to process the prediction recursively, unless it is already marked,
2759  indicating direct or indirect left recursion (and allowing us to stop).
2760  Then, as long as the first i categories are nullable and the (i+1)th
2761  category exists, we repeat the process for the (i+1)th category.
2762  We optimize the common case of no epsilons to skip because we do not have
2763  to consider context-sensitive constraints in that case. */
2764  Cat cat1 = Cat();
2765  if (l) {
2766  cat1 = rule.first();
2767  Cat effCat1 = effectiveCat(cat1);
2768  if (isToken(effCat1)) {
2769  if (validateNextTokenConstraints(effCat1, nextTokenConstraints)) {
2770  int len = 1;
2771  while (len < l && isToken(rule.at(len))) len++;
2772  // record nonterminal and literal
2773  result[cat].insert(rule.mid(0, len));
2774  }
2775  continue; // tokens are not nullable
2776  }
2777  // now effCat1 is a nonterminal
2778  /* If we have a P-constraint here, that constraint forces the category to
2779  be epsilon (because it must be identical to something already matched,
2780  which is epsilon). So don't predict inside it. */
2781  if (!mark.contains(effCat1)
2782  && !scope.hasPConstraint(cat1)) {
2783  int mcfgRuleno = -1;
2784  PseudoCatScope mcfgScope;
2785  if (scope.hasMcfgConstraint(cat1)) {
2786  QPair<int, PseudoCatScope> mcfgConstraint
2787  = scope.mcfgConstraint(cat1);
2788  mcfgRuleno = mcfgConstraint.first;
2789  mcfgScope = mcfgConstraint.second;
2790  }
2791  expandNonterminalPredictionMultiRecurseC(effCat1, result, mark,
2792  mcfgRuleno, mcfgScope,
2793  nextTokenConstraints);
2794  }
2795  if (!nullable.contains(effCat1)) continue;
2796  }
2797  {
2798  QList<Match> currentMatches;
2799  // match cat(i-1) to epsilon first
2800  /* FIXME: We discard the parse trees here. Would it be more efficient
2801  to use a variant of matchToEpsilon which doesn't build them?
2802  OTOH, this way, we can reuse the matchToEpsilon cache. */
2803  const QList<Match> componentMatches = matchToEpsilon(cat1, scope);
2804  if (componentMatches.isEmpty())
2805  continue; // We didn't match epsilon, so we're done with this rule.
2806  /* If we have one version without context-sensitive constraints, the
2807  others are redundant. */
2808  bool haveUnconstrained = false;
2809  foreach (const Match &cm, componentMatches) {
2810  if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
2811  && cm.nextTokenConstraints.taboo.isEmpty()) {
2812  haveUnconstrained = true;
2813  break;
2814  }
2815  }
2816  if (haveUnconstrained)
2817  currentMatches.append(Match(0, Node(), 0, scope, nextTokenConstraints));
2818  else {
2819  foreach (const Match &cm, componentMatches) {
2820  currentMatches.append(Match(0, Node(), 0, cm.scope,
2821  nextTokenConstraints));
2822  if (!cm.nextTokenConstraints.expect.isEmpty())
2823  currentMatches.last().nextTokenConstraints.expect.append(
2825  if (!cm.nextTokenConstraints.taboo.isEmpty())
2826  currentMatches.last().nextTokenConstraints.taboo.append(
2828  }
2829  }
2830  for (int i = 1; i < l; i++) {
2831  // now update cati for the new i
2832  Cat cati = rule.at(i);
2833  Cat effCati = effectiveCat(cati);
2834  if (isToken(effCati)) {
2835  bool nextTokenConstraintsPass = false;
2836  foreach (const Match &m, currentMatches)
2837  if (validateNextTokenConstraints(effCati, m.nextTokenConstraints)) {
2838  nextTokenConstraintsPass = true;
2839  break;
2840  }
2841  if (nextTokenConstraintsPass) {
2842  int len = 1;
2843  while (i+len < l && isToken(rule.at(i+len))) len++;
2844  // record nonterminal and literal
2845  result[cat].insert(rule.mid(i, len));
2846  }
2847  break; // tokens are not nullable
2848  }
2849  // now effCati is a nonterminal
2850  if (!mark.contains(effCati)) {
2851  foreach (const Match &m, currentMatches)
2852  /* If we have a P-constraint here, that constraint forces the
2853  category to be epsilon (because it must be identical to something
2854  already matched, which is epsilon). So don't predict inside it.
2855  */
2856  if (!m.scope.hasPConstraint(cati)) {
2857  int mcfgRuleno = -1;
2858  PseudoCatScope mcfgScope;
2859  if (m.scope.hasMcfgConstraint(cati)) {
2860  QPair<int, PseudoCatScope> mcfgConstraint
2861  = m.scope.mcfgConstraint(cati);
2862  mcfgRuleno = mcfgConstraint.first;
2863  mcfgScope = mcfgConstraint.second;
2864  }
2865  expandNonterminalPredictionMultiRecurseC(effCati, result, mark,
2866  mcfgRuleno, mcfgScope,
2868  }
2869  }
2870  if (!nullable.contains(effCati)) break;
2871  int s = currentMatches.size();
2872  for (int j=0; j<s; ) {
2873  Match &m = currentMatches[j];
2874  /* FIXME: We discard the parse trees here. Would it be more efficient
2875  to use a variant of matchToEpsilon which doesn't build them?
2876  OTOH, this way, we can reuse the matchToEpsilon cache. */
2877  const QList<Match> componentMatches = matchToEpsilon(effCati,
2878  m.scope);
2879  if (componentMatches.isEmpty()) {
2880  if (j) currentMatches.swap(0, j);
2881  currentMatches.removeFirst();
2882  s--;
2883  } else {
2884  int cs = componentMatches.size();
2885  /* If we have one version without context-sensitive constraints, the
2886  others are redundant. */
2887  bool haveUnconstrained = false;
2888  for (int k=0; k<cs; k++) {
2889  const Match &cm = componentMatches.at(k);
2890  if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
2891  && cm.nextTokenConstraints.taboo.isEmpty()) {
2892  haveUnconstrained = true;
2893  break;
2894  }
2895  }
2896  if (!haveUnconstrained) {
2897  for (int k=1; k<cs; k++) {
2898  const Match &cm = componentMatches.at(k);
2899  currentMatches.append(Match(0, Node(), 0, cm.scope,
2901  if (!cm.nextTokenConstraints.expect.isEmpty())
2902  currentMatches.last().nextTokenConstraints.expect.append(
2904  if (!cm.nextTokenConstraints.taboo.isEmpty())
2905  currentMatches.last().nextTokenConstraints.taboo.append(
2907  }
2908  const Match &cm = componentMatches.first();
2909  m.scope = cm.scope;
2910  if (!cm.nextTokenConstraints.expect.isEmpty())
2911  m.nextTokenConstraints.expect.append(
2913  if (!cm.nextTokenConstraints.taboo.isEmpty())
2914  m.nextTokenConstraints.taboo.append(
2916  }
2917  j++;
2918  }
2919  }
2920  if (currentMatches.isEmpty())
2921  break; // We didn't match epsilon, so we're done with this rule.
2922  }
2923  }
2924  }
2925 }
2926 
2930 
2936 QHash<Cat, QSet<QList<Cat> > >
2938 {
2939  QHash<Cat, QSet<QList<Cat> > > result;
2940  if (isToken(cat))
2941  qWarning("trying to expand terminal prediction");
2942  else
2943  expandNonterminalPredictionMultiRecurseC(cat, result, QSet<Cat>(), -1,
2944  PseudoCatScope(),
2946  return result;
2947 }
2948 
2951 
2961  const QList<StackItem> &stacks) const
2962 {
2963  ConstrainedPredictions predict;
2964  foreach (const StackItem &stack, stacks) {
2965  if (stack.type()) { // type 6 item
2966  const StackItemType6 *data
2967  = static_cast<const StackItemType6 *>(stack.data());
2968  CatArg cat = data->leaves().at(data->i()).cat;
2969  NextTokenConstraints nextTokenConstraints;
2970  if (!predict.contains(cat, nextTokenConstraints))
2971  predict.insert(cat, nextTokenConstraints);
2972  } else { // type 0 item
2973  const StackItemType0 *data
2974  = static_cast<const StackItemType0 *>(stack.data());
2975  Cat cat = data->effCat();
2976  const QList<StackItem> &parents = data->parents();
2977  if (parents.isEmpty()) { // start category, no constraints
2978  NextTokenConstraints nextTokenConstraints;
2979  if (!predict.contains(cat, nextTokenConstraints))
2980  predict.insert(cat, nextTokenConstraints);
2981  } else {
2982  if (isToken(cat)) {
2983  // validate the next token constraints immediately for tokens
2984  bool nextTokenConstraintsPass = false;
2985  foreach (const StackItem &parent, parents) {
2986  const StackItemType3 *parentData
2987  = static_cast<const StackItemType3 *>(parent.data());
2988  NextTokenConstraints nextTokenConstraints
2989  = parentData->nextTokenConstraints();
2990  if (validateNextTokenConstraints(cat, nextTokenConstraints)) {
2991  nextTokenConstraintsPass = true;
2992  break;
2993  }
2994  }
2995  if (nextTokenConstraintsPass) {
2996  NextTokenConstraints nextTokenConstraints;
2997  if (!predict.contains(cat, nextTokenConstraints))
2998  predict.insert(cat, nextTokenConstraints);
2999  }
3000  } else {
3001  foreach (const StackItem &parent, parents) {
3002  const StackItemType3 *parentData
3003  = static_cast<const StackItemType3 *>(parent.data());
3004  NextTokenConstraints nextTokenConstraints
3005  = parentData->nextTokenConstraints();
3006  if (!predict.contains(cat, nextTokenConstraints))
3007  predict.insert(cat, nextTokenConstraints);
3008  }
3009  }
3010  }
3011  }
3012  }
3013  return predict;
3014 }
3015 
3019 
3029  const NextTokenConstraints &nextTokenConstraints)
3030 {
3031  QHash<Cat, QSet<Cat> > result;
3032  if (isToken(cat))
3033  qWarning("trying to expand terminal prediction");
3034  else
3035  expandNonterminalPredictionRecurseC(cat, result, QSet<Cat>(), -1,
3036  PseudoCatScope(),
3037  nextTokenConstraints);
3038  return result;
3039 }
3040 
3044 
3056  const QList<NextTokenConstraints> &nextTokenConstraintsList)
3057 {
3058  switch (nextTokenConstraintsList.size()) {
3059  case 0: // An empty disjunctive list is false, probably not what you want!
3060  qWarning("list of next token constraints is empty");
3061  return QHash<Cat, QSet<Cat> >();
3062  case 1: // common case, handle the next token constraints directly
3063  return expandNonterminalPredictionC(cat,
3064  nextTokenConstraintsList.first());
3065  default: // general case, predict first, filter later
3066  {
3067  QHash<Cat, QSet<Cat> > result = expandNonterminalPredictionC(cat);
3068  QHash<Cat, QSet<Cat> > filteredResult;
3069  QHashIterator<Cat, QSet<Cat> > it(result);
3070  while (it.hasNext()) {
3071  it.next();
3072  foreach (CatArg cati, it.value()) {
3073  bool nextTokenConstraintsPass = false;
3074  foreach (const NextTokenConstraints &nextTokenConstraints,
3075  nextTokenConstraintsList) {
3076  if (validateNextTokenConstraints(cati, nextTokenConstraints)) {
3077  nextTokenConstraintsPass = true;
3078  break;
3079  }
3080  }
3081  if (nextTokenConstraintsPass)
3082  filteredResult[it.key()].insert(cati);
3083  }
3084  }
3085  return filteredResult;
3086  }
3087  }
3088 }
3089 
3092 
3113  const QList<StackItem> &stacks) const
3114 {
3115  ConstrainedMultiPredictions predictMulti;
3116  foreach (const StackItem &stack, stacks) {
3117  if (stack.type()) { // type 6 item
3118  /* We will define the literal as the full string of the P constraint here,
3119  which is very easy to obtain. This might not strictly be a literal in
3120  the sense of the definition, since it could be obtained from a whole
3121  tree of nonterminals, but it definitely always contains the literal (in
3122  the strict sense) containing the predicted category. */
3123  const StackItemType6 *data
3124  = static_cast<const StackItemType6 *>(stack.data());
3125  QList<Node> leaves = data->leaves();
3126  int i = data->i();
3127  QList<Cat> prediction, literal;
3128  int l = leaves.size();
3129  for (int j=i; j<l; j++)
3130  prediction.append(leaves.at(j).cat);
3131  literal = prediction;
3132  for (int j=i-1; j>=0; j--)
3133  literal.prepend(leaves.at(j).cat);
3134  ConstrainedMultiPrediction multiPrediction(literal, data->tree().cat);
3135  if (!predictMulti.contains(prediction, multiPrediction))
3136  predictMulti.insert(prediction, multiPrediction);
3137  } else { // type 0 item
3138  const StackItemType0 *data
3139  = static_cast<const StackItemType0 *>(stack.data());
3140  Cat cat = data->effCat();
3141  const QList<StackItem> &parents = data->parents();
3142  if (isToken(cat)) {
3143  if (parents.isEmpty()) { // huh, start category is a terminal?
3144  QList<Cat> list;
3145  list << cat;
3146  ConstrainedMultiPrediction multiPrediction(list, cat);
3147  if (!predictMulti.contains(list, multiPrediction))
3148  predictMulti.insert(list, multiPrediction);
3149  } else {
3150  foreach (const StackItem &parent, parents) {
3151  const StackItemType3 *parentData
3152  = static_cast<const StackItemType3 *>(parent.data());
3153  // validate the next token constraints immediately for tokens
3154  NextTokenConstraints nextTokenConstraints
3155  = parentData->nextTokenConstraints();
3156  if (!validateNextTokenConstraints(cat, nextTokenConstraints))
3157  continue;
3158  Rule rule = parentData->rule();
3159  int i = parentData->i();
3160  int len = 1;
3161  while (i+len < rule.size() && isToken(rule.at(i+len))) len++;
3162  QList<Cat> literal = rule.mid(i, len);
3163  while (i > 0 && isToken(rule.at(i-1))) i--, len++;
3164  ConstrainedMultiPrediction multiPrediction(rule.mid(i, len),
3165  parentData->tree().cat);
3166  if (!predictMulti.contains(literal, multiPrediction))
3167  predictMulti.insert(literal, multiPrediction);
3168  }
3169  }
3170  } else {
3171  if (parents.isEmpty()) { // start category, no constraints
3172  QList<Cat> list;
3173  list << cat;
3174  ConstrainedMultiPrediction multiPrediction(list, cat);
3175  if (!predictMulti.contains(list, multiPrediction))
3176  predictMulti.insert(list, multiPrediction);
3177  } else {
3178  foreach (const StackItem &parent, parents) {
3179  const StackItemType3 *parentData
3180  = static_cast<const StackItemType3 *>(parent.data());
3181  NextTokenConstraints nextTokenConstraints
3182  = parentData->nextTokenConstraints();
3183  QList<Cat> list;
3184  list << cat;
3185  ConstrainedMultiPrediction multiPrediction(list, cat,
3186  nextTokenConstraints);
3187  if (!predictMulti.contains(list, multiPrediction))
3188  predictMulti.insert(list, multiPrediction);
3189  }
3190  }
3191  }
3192  }
3193  }
3194  return predictMulti;
3195 }
3196 
3200 
3209 QHash<Cat, QSet<QList<Cat> > >
3211  const NextTokenConstraints &nextTokenConstraints)
3212 {
3213  QHash<Cat, QSet<QList<Cat> > > result;
3214  if (isToken(cat))
3215  qWarning("trying to expand terminal prediction");
3216  else
3217  expandNonterminalPredictionMultiRecurseC(cat, result, QSet<Cat>(), -1,
3218  PseudoCatScope(),
3219  nextTokenConstraints);
3220  return result;
3221 }
3222 
3226 
3237 QHash<Cat, QSet<QList<Cat> > >
3239  const QList<NextTokenConstraints> &nextTokenConstraintsList)
3240 {
3241  switch (nextTokenConstraintsList.size()) {
3242  case 0: // An empty disjunctive list is false, probably not what you want!
3243  qWarning("list of next token constraints is empty");
3244  return QHash<Cat, QSet<QList<Cat> > >();
3245  case 1: // common case, handle the next token constraints directly
3247  nextTokenConstraintsList.first());
3248  default: // general case, predict first, filter later
3249  {
3250  QHash<Cat, QSet<QList<Cat> > > result
3252  QHash<Cat, QSet<QList<Cat> > > filteredResult;
3253  QHashIterator<Cat, QSet<QList<Cat> > > it(result);
3254  while (it.hasNext()) {
3255  it.next();
3256  foreach (const QList<Cat> &literal, it.value()) {
3257  CatArg cati = literal.first();
3258  bool nextTokenConstraintsPass = false;
3259  foreach (const NextTokenConstraints &nextTokenConstraints,
3260  nextTokenConstraintsList) {
3261  if (validateNextTokenConstraints(cati, nextTokenConstraints)) {
3262  nextTokenConstraintsPass = true;
3263  break;
3264  }
3265  }
3266  if (nextTokenConstraintsPass)
3267  filteredResult[it.key()].insert(literal);
3268  }
3269  }
3270  return filteredResult;
3271  }
3272  }
3273 }
3274 
3275 } // end namespace
const StackItem & parent() const
Definition: dyngenpar.h:716
PseudoCatScope scope() const
Definition: dyngenpar.h:720
ConstrainedMultiPredictions computeConstrainedMultiPredictions(const QList< StackItem > &stacks) const
compute a set of multi-token predictions from the stacks produced by an incremental parse ...
Definition: dyngenpar.cpp:3112
QHash< Cat, QPair< Cat, int > > componentCats
maps categories which represent components of a multi-component category to the category and componen...
Definition: dyngenpar.h:1253
const StackItem & parent() const
Definition: dyngenpar.h:660
PseudoCatScope scope() const
Definition: dyngenpar.h:542
type 6 item: during match, we&#39;re matching a P constraint
Definition: dyngenpar.h:699
virtual LexerState saveState()
saves the current lexer state, by default a null LexerState
Definition: dyngenpar.h:858
QPair< QPair< Node, NextTokenConstraints >, int > pConstraint(CatArg cat) const
Definition: dyngenpar.h:369
MultiPredictions computeMultiPredictions(const QList< StackItem > &stacks) const
compute a set of multi-token predictions from the stacks produced by an incremental parse ...
Definition: dyngenpar.cpp:2619
const QList< StackItem > & parents() const
Definition: dyngenpar.h:574
node in the parse tree
Definition: dyngenpar.h:317
QHash< Cat, QSet< Cat > > expandNonterminalPrediction(CatArg cat) const
expand a nonterminal prediction to the possible initial tokens and the nonterminals they immediately ...
Definition: dyngenpar.cpp:2388
data passed to parser actions
Definition: dyngenpar.h:411
TokenSet tokens
tokens
Definition: dyngenpar.h:1219
uint qHash(const NextTokenConstraints &nextTokenConstraints)
simple hash function for next token constraints
Definition: dyngenpar.cpp:452
bool hasPConstraint(CatArg cat) const
Definition: dyngenpar.h:363
NextTokenConstraints nextTokenConstraints
Definition: dyngenpar.h:406
QList< Cat > expect
list of context-free categories the next token MUST match
Definition: dyngenpar.h:92
QString Cat
Category type: string or integer depending on DYNGENPAR_INTEGER_CATEGORIES.
Definition: dyngenpar.h:68
QHash< Cat, QPair< QPair< Node, NextTokenConstraints >, int > > & pConstraints()
Definition: dyngenpar.h:355
NextTokenConstraints nextTokenConstraints() const
Definition: dyngenpar.h:721
type 0 item: during match, we&#39;re waiting for a token to shift
Definition: dyngenpar.h:511
term in the expression of a component of a PMCFG function
Definition: dyngenpar.h:932
PseudoCatScope scope
Definition: dyngenpar.h:405
component of a PMCFG function, a sequence of terms
Definition: dyngenpar.h:953
const QList< StackItem > & parents() const
Definition: dyngenpar.h:538
(possibly partial) match
Definition: dyngenpar.h:394
#define IS_EPSILON(cat)
Definition: dyngenpar.h:69
QList< Match > parse(int *errorPos=0, Cat *errorToken=0, int *incrementalPos=0, QList< StackItem > *incrementalStacks=0, QList< Match > *incrementalMatches=0, LexerState *lexerState=0)
parse the input string
Definition: dyngenpar.cpp:2250
RuleSet cfRules
optional context-free rules
Definition: dyngenpar.h:1065
QHash< Cat, QSet< Cat > > expandNonterminalPredictionC(CatArg cat)
expand a nonterminal prediction to the possible initial tokens and the nonterminals they immediately ...
Definition: dyngenpar.cpp:2587
Node parseTree()
get the parse tree for the last shifted token
Definition: dyngenpar.h:825
QHash< Cat, QPair< int, PseudoCatScope > > & mcfgConstraints()
Definition: dyngenpar.h:359
multi-token predictions
Definition: dyngenpar.h:214
const StackItem & parent() const
Definition: dyngenpar.h:688
TokenSet tokens
set of true tokens
Definition: dyngenpar.h:1053
QVariant label() const
Definition: dyngenpar.h:137
void addRule(CatArg cat, const Rule &rule)
adds a new rule to the grammar, updates the nullable categories and the initial graph and clears the ...
Definition: dyngenpar.cpp:689
QHash< Cat, QList< Cat > > catComponents
maps multi-component categories to the list of their components
Definition: dyngenpar.h:1259
static bool serializeActions
whether the operator<<(QDataStream &, const Rule &) should serialize actions
Definition: dyngenpar.h:129
Function lookupFunction(const QVariant &id) const
Definition: dyngenpar.h:1078
virtual void execute(const ActionInfo &info)=0
Cat startCat
start category
Definition: dyngenpar.h:1056
type 3 item: during matchRemaining, we&#39;re executing a match
Definition: dyngenpar.h:607
uint qHash(const QList< DynGenPar::Cat > &list)
simple hash function for lists of categories
Definition: dyngenpar.cpp:437
attached to the parse trees as rule labels to allow obtaining syntax trees
Definition: dyngenpar.h:1090
bool isToken(CatArg cat) const
Definition: dyngenpar.h:1141
type 1 item: during type 0 item processing, we&#39;re executing a reduce
Definition: dyngenpar.h:552
bool hasMcfgConstraint(CatArg cat) const
Definition: dyngenpar.h:366
QList< Cat > taboo
list of context-free categories the next token MUST NOT match
Definition: dyngenpar.h:100
virtual bool rewindTo(int pos, const LexerState &=LexerState())
rewind to an older position (requires buffering)
Definition: dyngenpar.h:853
QHash< Cat, QSet< QList< Cat > > > expandNonterminalPredictionMulti(CatArg cat) const
expand a nonterminal prediction to the possible initial nonempty literals (strings of one or more tok...
Definition: dyngenpar.cpp:2731
PseudoCatScope scope() const
Definition: dyngenpar.h:577
TokenSource * inputSource
input source
Definition: dyngenpar.h:1264
Action * action
Definition: dyngenpar.h:140
QSet< Cat > Predictions
Definition: dyngenpar.h:211
bool isLiteral(const QList< Cat > &list) const
is a given list of categories a literal?
Definition: dyngenpar.cpp:553
QMultiHash< QList< Cat >, ConstrainedMultiPrediction > ConstrainedMultiPredictions
Definition: dyngenpar.h:255
QMultiHash< Cat, NextTokenConstraints > ConstrainedPredictions
Definition: dyngenpar.h:230
QHash< Cat, QSet< QList< Cat > > > expandNonterminalPredictionMultiC(CatArg cat)
expand a nonterminal prediction to the possible initial nonempty literals (strings of one or more tok...
Definition: dyngenpar.cpp:2937
RuleSet rules
set of PMCFG rules
Definition: dyngenpar.h:1050
Cat startCat
start category
Definition: dyngenpar.h:1221
int ruleno
used for PMCFGs
Definition: dyngenpar.h:403
int type() const
Definition: dyngenpar.h:497
bool loadPmcfg(const Pmcfg &pmcfg)
loads a PMCFG in standard form, converting it to the internal representation
Definition: dyngenpar.cpp:932
QMultiHash< QList< Cat >, MultiPrediction > MultiPredictions
Definition: dyngenpar.h:229
type 4 item: during reduce, we&#39;re executing a matchRemaining
Definition: dyngenpar.h:646
RuleSet rules
grammar rules
Definition: dyngenpar.h:1217
multi-token predictions with next token constraints
Definition: dyngenpar.h:233
type 2 item: during reduce, we&#39;re recursively executing another reduce
Definition: dyngenpar.h:587
const StackItem & parent() const
Definition: dyngenpar.h:600
const Cat & CatArg
Category type (string or integer) when passed as an argument.
Definition: dyngenpar.h:80
virtual bool matchParseTree(const Node &treeToMatch)
match the parse tree for the last shifted token against the given tree
Definition: dyngenpar.h:836
Predictions computePredictions(const QList< StackItem > &stacks) const
compute a set of predictions from the stacks produced by an incremental parse
Definition: dyngenpar.cpp:2328
bool isComponent() const
Definition: dyngenpar.h:942
QHash< Cat, QPair< Cat, QList< Cat > > > pseudoCats
pseudo-categories, used to represent PMCFGs internally
Definition: dyngenpar.h:1247
void setLabel(const QVariant &label)
Definition: dyngenpar.h:285
ConstrainedPredictions computeConstrainedPredictions(const QList< StackItem > &stacks) const
compute a set of predictions from the stacks produced by an incremental parse
Definition: dyngenpar.cpp:2960
QList< Node > leaves() const
Definition: dyngenpar.h:717
void setParents(const QList< StackItem > &parents)
Definition: dyngenpar.h:500
int ruleno
needed for PMCFGs (to match components of rules to each other)
Definition: dyngenpar.h:269
PMCFG function.
Definition: dyngenpar.h:992
QVariant label() const
Definition: dyngenpar.h:284
void initCaches()
clears all caches, then computes the nullable categories and the initial graph
Definition: dyngenpar.cpp:578
PseudoCatScope scope() const
Definition: dyngenpar.h:690
bool isToken() const
Definition: dyngenpar.h:943
QVector< QVector< int > > argPositions
Definition: dyngenpar.h:1095
full rule as found in the initial graph
Definition: dyngenpar.h:258
const StackItemData * data() const
Definition: dyngenpar.h:501
static bool serializeLabels
whether the operator<<(QDataStream &, const Rule &) should serialize labels
Definition: dyngenpar.h:128
QVariant data
Definition: dyngenpar.h:324
NextTokenConstraints nextTokenConstraints
Definition: dyngenpar.h:139
type 5 item: during match (of an MCFG constraint), we&#39;re executing a matchRemaining ...
Definition: dyngenpar.h:673
rule constraints affecting the next token, for scannerless parsing
Definition: dyngenpar.h:84
QList< Alternative > children
Definition: dyngenpar.h:325
Node parseTreeToPmcfgSyntaxTree(const Node &parseTree)
converts a parse tree obtained from a PMCFG to a PMCFG syntax tree
Definition: dyngenpar.cpp:539
QPair< int, PseudoCatScope > mcfgConstraint(CatArg cat) const
Definition: dyngenpar.h:374
Cat nextToken()
get the next token from the input, increment current position, save parse tree
Definition: dyngenpar.h:813
bool addPmcfgRule(Pmcfg &pmcfg, CatArg cat, const Rule &rule)
adds a new rule to the grammar (both the PMCFG and the internal representation), updates the nullable...
Definition: dyngenpar.cpp:974
const StackItem & parent() const
Definition: dyngenpar.h:624
NextTokenConstraints nextTokenConstraints
Definition: dyngenpar.h:955
NextTokenConstraints nextTokenConstraints() const
Definition: dyngenpar.h:631