DynGenPar
Dynamic Generalized Parser
dyngenpar.cpp
Go to the documentation of this file.
1 /* DynGenPar: Dynamic Generalized Parser
2  Copyright (C) 2010-2012 Kevin Kofler <kevin.kofler@chello.at>
3  Copyright (C) 2014-2018 DAGOPT Optimization Technologies GmbH
4  written by Kevin Kofler <kofler@dagopt.com>
5 
6  Support by the Austrian Science Fund FWF under contract numbers
7  P20631 and P23554 is gratefully acknowledged.
8 
9  This program is free software: you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 2 of the License, or
12  (at your option) any later version.
13 
14  This program is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 
434 #include "dyngenpar.h"
435 
437 uint qHash(const QList<DynGenPar::Cat> &list) {
438  uint result = 0, i = 0;
439  foreach (DynGenPar::CatArg cat, list)
440  result += (++i) * qHash(cat);
441  return result;
442 }
443 
444 namespace DynGenPar {
446 bool Rule::serializeLabels = true;
447 
449 bool Rule::serializeActions = true;
450 
452 uint qHash(const NextTokenConstraints &nextTokenConstraints)
453 {
454  return (qHash(nextTokenConstraints.expect) << 4)
455  + qHash(nextTokenConstraints.taboo);
456 }
457 
461 
464 static void parseTreeToPmcfgSyntaxTreeUnify(Node &dest, const Node &src)
465 {
466  if (src.cat != dest.cat || src.data != dest.data)
467  qFatal("invalid parse tree: PMCFG unification failed (mismatched nodes)");
468 
469  // check for metavariables and do the trivial unification
470  if (src.children.isEmpty()) return;
471  if (dest.children.isEmpty()) {
472  dest.children = src.children;
473  return;
474  }
475 
476  int s = dest.children.size();
477  if (src.children.size() != s)
478  qFatal("invalid parse tree: PMCFG unification failed (mismatched "
479  "alternative counts)");
480  for (int i=0; i<s; i++) {
481  const Alternative &srcAlternative = src.children.at(i);
482  Alternative &destAlternative = dest.children[i];
483  int l = destAlternative.size();
484  if (srcAlternative.size() != l
485  || srcAlternative.label() != destAlternative.label())
486  qFatal("invalid parse tree: PMCFG unification failed (mismatched "
487  "alternatives)");
488  for (int j=0; j<l; j++)
489  parseTreeToPmcfgSyntaxTreeUnify(destAlternative[j], srcAlternative.at(j));
490  }
491 }
492 
494 static void parseTreeToPmcfgSyntaxTreeRecurse(const Node &parseTree,
495  Node &syntaxTree)
496 {
497  /* first copy the data (most useful if a token was passed as an argument to a
498  PMCFG function, an extension to standard PMCFGs allowed by this
499  implementation) */
500  syntaxTree.data = parseTree.data;
501  bool isFirst = true;
502  foreach(const Alternative &alternative, parseTree.children) {
503  // create a new alternative if this is not the first one
504  if (isFirst)
505  isFirst = false;
506  else
507  syntaxTree.children.append(Alternative());
508  Alternative &newAlternative = syntaxTree.children.last();
509 
510  QVariant label = alternative.label();
511  if (label.canConvert<PmcfgComponentInfo>()) {
512  PmcfgComponentInfo componentInfo = label.value<PmcfgComponentInfo>();
513  newAlternative.setLabel(componentInfo.pmcfgRule.label());
514  int numArgs = componentInfo.pmcfgRule.size();
515  for (int i=0; i<numArgs; i++) {
516  CatArg arg = componentInfo.pmcfgRule.at(i);
517  Node newChild(arg);
518  const QVector<int> &argPositions = componentInfo.argPositions.at(i);
519  if (argPositions.isEmpty())
520  newChild.children.clear(); // no alternative == metavariable
521  else {
522  parseTreeToPmcfgSyntaxTreeRecurse(
523  alternative.at(argPositions.first()), newChild);
524  int s = argPositions.size();
525  for (int i=1; i<s; i++) {
526  Node tempChild(arg);
527  parseTreeToPmcfgSyntaxTreeRecurse(alternative.at(
528  argPositions.at(i)), tempChild);
529  parseTreeToPmcfgSyntaxTreeUnify(newChild, tempChild);
530  }
531  }
532  newAlternative.append(newChild);
533  }
534  } else newAlternative = alternative; // non-PMCFG category, copy parse tree
535  }
536 }
537 
540 {
541  Node syntaxTree(parseTree.cat);
542  parseTreeToPmcfgSyntaxTreeRecurse(parseTree, syntaxTree);
543  return syntaxTree;
544 }
545 
547 QHash<QString, ActionDeserializer *> Action::deserializers;
548 
550 
553 bool Parser::isLiteral(const QList<Cat> &list) const
554 {
555  foreach (CatArg cat, list)
556  if (!isToken(cat)) return false;
557  return true;
558 }
559 
562 
566 Cat Parser::effectiveCat(CatArg cat) const
567 {
568  if (pseudoCats.contains(cat)) // convert pseudo-category
569  return pseudoCats.value(cat).first;
570  return cat; // not a pseudo-category, just return the original category
571 }
572 
575 
579 {
580  // clear caches
581  initialGraph.clear();
582  neighborhoods.clear();
583  nullable.clear();
584  epsilonMatches.clear();
585 
586  // search for nullable categories
587  /* First consider only rules which directly expand to epsilon, i.e. with empty
588  right hand side. */
589  QHashIterator<Cat, QList<Rule> > it(rules);
590  while (it.hasNext()) {
591  it.next();
592  Cat cat = it.key();
593  foreach (const Rule &rule, it.value()) {
594  if (rule.isEmpty()) {
595  nullable.insert(cat);
596  break;
597  }
598  }
599  }
600  /* Now consider rules which expand to nullable categories, inductively until
601  no new nullables are added. This works because we do not have to consider
602  directly or indirectly recursive rules, because those can only expand to
603  epsilon if the category can already expand to epsilon without that rule. */
604  unsigned newNullables;
605  do {
606  newNullables = 0u;
607  it = rules;
608  while (it.hasNext()) {
609  it.next();
610  Cat cat = it.key();
611  if (nullable.contains(cat)) continue; // already nullable
612  foreach (const Rule &rule, it.value()) {
613  foreach (CatArg cati, rule) {
614  if (!nullable.contains(effectiveCat(cati))) goto next_rule;
615  }
616  nullable.insert(cat);
617  newNullables++;
618  break;
619 next_rule:
620  ;
621  }
622  }
623  } while (newNullables);
624 
625  // now compute the initial graph
626  it = rules;
627  while (it.hasNext()) {
628  it.next();
629  Cat cat = it.key();
630  // only keep the rule number if we need it, in order to allow unification
631  bool keepRuleNumbers = componentCats.contains(cat);
632  QList<Rule> ruleList = it.value();
633  int n = ruleList.size();
634  for (int i=0; i<n; i++) {
635  /* For each nonempty rule (empty rules do not show up in the initial
636  graph), we insert at least one edge from the first item in the rule
637  to the left hand side, with 0 epsilons skipped. Then, as long as the
638  first i categories are nullable and the (i+1)th category exists, we
639  insert an edge with i epsilons skipped from the (i+1)th category to the
640  left hand side. */
641  const Rule &rule = ruleList.at(i);
642  QListIterator<Cat> it(rule);
643  int epsilonsSkipped=0;
644  while (it.hasNext()) {
645  Cat cati = effectiveCat(it.next());
646  initialGraph.insert(cati, FullRule(cat, rule, epsilonsSkipped,
647  keepRuleNumbers ? i : 0));
648  if (!nullable.contains(cati)) break;
649  epsilonsSkipped++;
650  }
651  }
652  }
653 }
654 
657 
662 void Parser::processRule(CatArg cat, const Rule &rule, int skip, int ruleno,
663  QQueue<Cat> &nullableQueue, bool &clearEpsilonMatches)
664 {
665  /* Continue processing the rule where we stopped when building the original
666  initial graph. (If the rule is new, start at position 0.) As long as the
667  first i categories are nullable and the (i+1)th category exists, we insert
668  an edge with i epsilons skipped from the (i+1)th category to the left hand
669  side.*/
670  QListIterator<Cat> i(rule);
671  int epsilonsSkipped=skip;
672  while (i.hasNext()) {
673  Cat cati = effectiveCat(i.next());
674  initialGraph.insert(cati, FullRule(cat, rule, epsilonsSkipped, ruleno));
675  if (!nullable.contains(cati)) break;
676  epsilonsSkipped++;
677  }
678  if (epsilonsSkipped == rule.size()) { // !i.hasNext() is not sufficient!
679  /* We reached the end of the rule, so the left hand side is now nullable
680  and we have a new way to get epsilon matches. */
681  clearEpsilonMatches = true;
682  if (!nullable.contains(cat) && !nullableQueue.contains(cat))
683  nullableQueue.enqueue(cat);
684  }
685 }
686 
689 void Parser::addRule(CatArg cat, const Rule &rule)
690 {
691  // add the rule to the grammar
692  QList<Rule> &ruleList = rules[cat];
693  // only keep the rule number if we need it, in order to allow unification
694  int ruleno = componentCats.contains(cat) ? ruleList.size() : 0;
695  ruleList << rule;
696 
697  // update initialGraph and nullable
698  QQueue<Cat> nullableQueue; // new nullable categories
699  bool clearEpsilonMatches = false;
700  /* first process the rule we just added:
701  Add the new edges, if any, to the initial graph, and mark the category as
702  nullable if the rule can match the empty string. */
703  processRule(cat, rule, 0, ruleno, nullableQueue, clearEpsilonMatches);
704  // now handle categories which have become nullable
705  while (!nullableQueue.isEmpty()) {
706  Cat newNullable = nullableQueue.dequeue();
707  // mark the category as nullable
708  nullable.insert(newNullable);
709  /* now update the initial graph:
710  We need to go through all the edges going out of this category in the
711  initial graph and process the rule starting from where we stopped when
712  building the initial graph (due to the non-nullable category). If we
713  reach the end of the rule, the left hand side has become nullable as
714  well, and is added to the queue if it wasn't already nullable. */
715  foreach (const FullRule &rule, initialGraph.values(newNullable))
716  processRule(rule.cat, rule.rule, rule.epsilonsSkipped+1, rule.ruleno,
717  nullableQueue, clearEpsilonMatches);
718  }
719  if (clearEpsilonMatches) {
720  epsilonMatches.clear();
721  neighborhoods.clear();
722  } else if (!rule.isEmpty()) {
723  // remove only those neighborhoods that are no longer valid
724  typedef QPair<Cat, Cat> CatPair;
725  QHashIterator<CatPair, QList<FullRule> > it(neighborhoods);
726  QList<CatPair> invalidate;
727  CatArg ruleFirst = rule.first();
728  bool firstIsNullable = nullable.contains(ruleFirst);
729  while (it.hasNext()) {
730  it.next();
731  const CatPair &key = it.key();
732  if (reachable(cat, key.second, QSet<Cat>())
733  && (firstIsNullable || reachable(key.first, ruleFirst, QSet<Cat>()))) {
734  invalidate << key;
735  }
736  }
737  foreach (const CatPair &key, invalidate) neighborhoods.remove(key);
738  }
739 }
740 
742 
743 bool Parser::computePmcfgDimension(CatArg cat, const Rule &rule,
744  const Pmcfg &pmcfg)
745 {
746  // look the function up
747  Function function = pmcfg.lookupFunction(rule.label());
748  /* check if the dimension is consistent with existing rules for the category,
749  if any; generate a list of components if we don't have any yet */
750  int dim = function.size();
751  if (dim == 1) {
752  if (catComponents.contains(cat)) {
753  qWarning("dimension mismatch: 1D function for multidimensional category");
754  return false; // dimension mismatch
755  }
756  } else {
757  if (rules.contains(cat)) {
758  qWarning("dimension mismatch: multidimensional function for 1D category");
759  return false; // dimension mismatch
760  }
761  if (catComponents.contains(cat)) {
762  if (catComponents.value(cat).size() != dim) {
763  qWarning("dimension mismatch: %dD function for %dD category", dim,
764  catComponents.value(cat).size());
765  return false; // dimension mismatch
766  }
767  } else { // not known yet, we need to create a mapping
768  QList<Cat> components;
769  for (int i=0; i<dim; i++) {
770  // generate an internal category for the component
771 #ifdef DYNGENPAR_INTEGER_CATEGORIES
772  Cat component = generateCat();
773 #else
774  Cat component = QString("%1[%2]").arg(cat).arg(i);
775 #endif
776  components.append(component);
777  componentCats.insert(component, qMakePair(cat, i));
778  }
779  catComponents.insert(cat, components);
780  }
781  }
782  return true;
783 }
784 
786 
787 bool Parser::convertPmcfgRule(CatArg cat, const Rule &rule, const Pmcfg &pmcfg,
788  bool updateCaches)
789 {
790  // look the function up
791  Function function = pmcfg.lookupFunction(rule.label());
792  /* get the list of components to use (assume that the dimensions have been
793  already validated by computePmcfgDimension) */
794  int dim = function.size();
795  QList<Cat> components;
796  if (dim == 1)
797  components.append(cat);
798  else
799  components = catComponents.value(cat);
800 
801  // determine the number of used arguments of the function
802  int numArgs = 0;
803  foreach(const Sequence &sequence, function) {
804  foreach(const Term &term, sequence) {
805  if (term.isComponent() && term.arg >= numArgs)
806  numArgs = term.arg + 1;
807  }
808  }
809 
810  // check that enough arguments were passed to the function
811  if (rule.size() < numArgs) {
812  qWarning("not enough arguments for PMCFG function");
813  return false;
814  }
815 
816  // find the components of the arguments passed
817  QVector<QList<Cat> > argComponents(numArgs);
818  for (int i=0; i<numArgs; i++) {
819  CatArg arg = rule.at(i);
820  if (catComponents.contains(arg))
821  argComponents[i] = catComponents.value(arg);
822  else if (pmcfg.rules.contains(arg) || pmcfg.cfRules.contains(arg)
823  || pmcfg.tokens.contains(arg))
824  argComponents[i].append(arg);
825  else
826  /* unused category, ignore the unreachable rule
827  We do not know the correct dimension for the unused category. */
828  return true;
829  }
830 
831  // verify the dimensions of the arguments passed
832  foreach(const Sequence &sequence, function) {
833  foreach(const Term &term, sequence) {
834  if (term.isComponent()
835  && term.component >= argComponents.at(term.arg).size()) {
836  qWarning("dimension mismatch: attempt to use component %d of a %dD "
837  "category", term.component, argComponents.at(term.arg).size());
838  return false; // dimension mismatch
839  }
840  }
841  }
842 
843  // determine the number of times each argument is used
844  QVector<int> usageCounts(numArgs);
845  foreach(const Sequence &sequence, function) {
846  foreach(const Term &term, sequence) {
847  if (term.isComponent())
848  usageCounts[term.arg]++;
849  }
850  }
851 
852  // generate pseudo-categories
853  QVector<QHash<int, Cat> > newPseudoCats(numArgs);
854  foreach(const Sequence &sequence, function) {
855  foreach(const Term &term, sequence) {
856  if (term.isComponent() && usageCounts.at(term.arg) > 1) {
857  if (!newPseudoCats[term.arg].contains(term.component)) {
858 #ifdef DYNGENPAR_INTEGER_CATEGORIES
859  Cat pseudoCat = generateCat();
860 #else
861  Cat pseudoCat = QString("Pseudo%1").arg(generateCat());
862 #endif
863  newPseudoCats[term.arg].insert(term.component, pseudoCat);
864  }
865  }
866  }
867  }
868 
869  // record pseudo-categories
870  for (int i=0; i<numArgs; i++) {
871  const QHash<int, Cat> &pseudoCatHash = newPseudoCats.at(i);
872  if (!pseudoCatHash.isEmpty()) {
873  QList<Cat> pseudoCatList = pseudoCatHash.values();
874  QHashIterator<int, Cat> it(pseudoCatHash);
875  while (it.hasNext()) {
876  it.next();
877  Cat pseudoCat = it.value();
878  pseudoCats.insert(pseudoCat, qMakePair(argComponents.at(i).at(it.key()),
879  pseudoCatList));
880  }
881  }
882  }
883 
884  // now finish evaluating the function and record the converted rule
885  for (int i=0; i<dim; i++) {
886  const Sequence &sequence = function.at(i);
887  PmcfgComponentInfo componentInfo(rule);
888  int s = sequence.size();
889  for (int j=0; j<s; j++) {
890  const Term &term = sequence.at(j);
891  if (term.isComponent())
892  componentInfo.argPositions[term.arg].append(j);
893  }
894  Rule internalRule(QVariant::fromValue(componentInfo));
895  // copy next token constraints from the PMCFG sequence
896  internalRule.nextTokenConstraints = sequence.nextTokenConstraints;
897  /* Also try to use next token constraints from the PMCFG rule. That's not a
898  good place to write them because the constraints will affect all
899  components of a multi-dimensional rule, which is probably not what you
900  want. But at least for 1D rules, it makes sense, and it costs us almost
901  nothing to support this. (Appending an empty list is trivial.) */
902  internalRule.nextTokenConstraints.expect.append(
904  internalRule.nextTokenConstraints.taboo.append(
906  foreach (const Term &term, sequence)
907  internalRule.append(term.isToken() ? term.token
908  : newPseudoCats.at(term.arg).isEmpty()
909  ? argComponents.at(term.arg).at(term.component)
910  : newPseudoCats.at(term.arg).value(term.component));
911 
912  CatArg catComponent = components.at(i);
913  if (updateCaches)
914  addRule(catComponent, internalRule);
915  else
916  rules[catComponent].append(internalRule);
917  }
918 
919  return true;
920 }
921 
923 
932 bool Parser::loadPmcfg(const Pmcfg &pmcfg)
933 {
934  // copy tokens, startCat and cfRules
935  tokens = pmcfg.tokens;
936  startCat = pmcfg.startCat;
937  rules = pmcfg.cfRules;
938 
939  // convert functions and PMCFG rules to rules and pseudo-categories
940  pseudoCats.clear();
941  componentCats.clear();
942  catComponents.clear();
943  {
944  QHashIterator<Cat, QList<Rule> > it(pmcfg.rules);
945  while (it.hasNext()) {
946  Cat cat = it.next().key();
947  foreach (const Rule &rule, it.value())
948  if (!computePmcfgDimension(cat, rule, pmcfg)) return false;
949  }
950  }
951  {
952  QHashIterator<Cat, QList<Rule> > it(pmcfg.rules);
953  while (it.hasNext()) {
954  Cat cat = it.next().key();
955  foreach (const Rule &rule, it.value())
956  if (!convertPmcfgRule(cat, rule, pmcfg, false)) return false;
957  }
958  }
959 
960  // initialize the initial graph and the caches
961  initCaches();
962  return true;
963 }
964 
968 
974 bool Parser::addPmcfgRule(Pmcfg &pmcfg, CatArg cat, const Rule &rule)
975 {
976  if (!computePmcfgDimension(cat, rule, pmcfg)
977  || !convertPmcfgRule(cat, rule, pmcfg, true)) return false;
978  pmcfg.rules[cat].append(rule);
979  return true;
980 }
981 
983 
988 bool Parser::reachable(CatArg cat, CatArg target, QSet<Cat> mark)
989 {
990  if (cat == target) return true;
991  mark.insert(cat);
992  foreach (const FullRule &rule, initialGraph.values(cat)) {
993  if (mark.contains(rule.cat)) continue;
994  if (reachable(rule.cat, target, mark)) return true;
995  }
996  return false;
997 }
998 
1000 
1004 QList<FullRule> Parser::neighborhood(CatArg cat, CatArg target)
1005 {
1006  // use cached value if we have it
1007  QPair<Cat, Cat> key(cat, target);
1008  if (neighborhoods.contains(key)) return neighborhoods.value(key);
1009 
1010  QList<FullRule> result;
1011  foreach (const FullRule &rule, initialGraph.values(cat)) {
1012  if (reachable(rule.cat, target, QSet<Cat>())) result.append(rule);
1013  }
1014  // cache the result for future reuse
1015  neighborhoods.insert(key, result);
1016  return result;
1017 }
1018 
1020 void Parser::finalizeMatches(QList<Match> &matches, CatArg cat,
1021  const PseudoCatScope &scope)
1022 {
1023  int s = matches.size();
1024  for (int i=0; i<s; i++) {
1025  Match &m = matches[i];
1026  PseudoCatScope childScope = m.scope;
1027  m.scope = scope;
1028  m.scope.pConstraints().insert(cat,
1029  qMakePair(qMakePair(m.tree, m.nextTokenConstraints), 0));
1030  foreach (CatArg cati, pseudoCats.value(cat).second)
1031  m.scope.mcfgConstraints().insert(cati, qMakePair(m.ruleno, childScope));
1032  m.ruleno = 0; // We don't need ruleno anymore.
1033  }
1034 }
1035 
1037 void Parser::copyScope(QList<Match> &matches, const PseudoCatScope &scope)
1038 {
1039  if (scope.isNull()) return; // don't bother copying a null scope
1040  int s = matches.size();
1041  for (int i=0; i<s; i++)
1042  matches[i].scope = scope;
1043 }
1044 
1046 QList<Match> Parser::matchCFToEpsilon(CatArg cat, QSet<Cat> mark)
1047 {
1048  QList<Match> result;
1049  bool haveNextTokenConstraints = false;
1050 
1051  mark.insert(cat);
1052  bool isFirst = true;
1053  foreach (const Rule &rule, rules.value(cat)) {
1054  // only consider nullable rules with only unmarked categories
1055  foreach (CatArg cati, rule) {
1056  if (!nullable.contains(cati) || mark.contains(cati)) goto next_rule;
1057  }
1058  {
1059  QList<Match> currentMatches;
1060  Node node(cat);
1061  node.children.first().setLabel(rule.label());
1062  currentMatches.append(Match(0, node, 0, PseudoCatScope()));
1063  if (!rule.nextTokenConstraints.expect.isEmpty()) {
1064  haveNextTokenConstraints = true;
1065  currentMatches.first().nextTokenConstraints.expect =
1067  }
1068  if (!rule.nextTokenConstraints.taboo.isEmpty()) {
1069  haveNextTokenConstraints = true;
1070  currentMatches.first().nextTokenConstraints.taboo =
1072  }
1073  foreach (CatArg cati, rule) {
1074  int s = currentMatches.size();
1075  for (int i=0; i<s; ) {
1076  Match &m = currentMatches[i];
1077  const QList<Match> componentMatches
1078  = matchToEpsilonRecurse(cati, mark, m.scope);
1079  if (componentMatches.isEmpty()) {
1080  if (i) currentMatches.swap(0, i);
1081  currentMatches.removeFirst();
1082  s--;
1083  } else {
1084  int cs = componentMatches.size();
1085  for (int j=1; j<cs; j++) {
1086  const Match &cm = componentMatches.at(j);
1087  Node newTree = m.tree;
1088  newTree.children.first().append(cm.tree);
1089  currentMatches.append(Match(m.len + cm.len, newTree, 0, cm.scope,
1091  if (!cm.nextTokenConstraints.expect.isEmpty()) {
1092  haveNextTokenConstraints = true;
1093  currentMatches.last().nextTokenConstraints.expect.append(
1095  }
1096  if (!cm.nextTokenConstraints.taboo.isEmpty()) {
1097  haveNextTokenConstraints = true;
1098  currentMatches.last().nextTokenConstraints.taboo.append(
1100  }
1101  }
1102  const Match &cm = componentMatches.first();
1103  m.len += cm.len;
1104  m.tree.children.first().append(cm.tree);
1105  m.scope = cm.scope;
1106  if (!cm.nextTokenConstraints.expect.isEmpty()) {
1107  haveNextTokenConstraints = true;
1108  m.nextTokenConstraints.expect.append(
1110  }
1111  if (!cm.nextTokenConstraints.taboo.isEmpty()) {
1112  haveNextTokenConstraints = true;
1113  m.nextTokenConstraints.taboo.append(
1115  }
1116  i++;
1117  }
1118  }
1119  }
1120  if (currentMatches.isEmpty()) goto next_rule;
1121  if (haveNextTokenConstraints) {
1122  /* We cannot just unify all the matches to one in this case. We will run
1123  the full unification process at the end. But clear the scopes. */
1124  int s = currentMatches.size();
1125  for (int i=0; i<s; i++) currentMatches[i].scope = PseudoCatScope();
1126  result.append(currentMatches);
1127  goto next_rule;
1128  }
1129  /* unify the matches:
1130  * if this is the first matching rule, make the first match the current
1131  result
1132  * otherwise, append the new alternative(s) from the first match to the
1133  current result
1134  * if there is more than one match, also append the alternative(s) from
1135  the additional matches */
1136  int i=0, s=currentMatches.size();
1137  if (isFirst) {
1138  isFirst = false;
1139  result.append(currentMatches.first());
1140  result.first().scope = PseudoCatScope(); // clear scope
1141  i++;
1142  }
1143  Match &firstResult = result.first();
1144  for (; i<s; i++)
1145  firstResult.tree.children.append(currentMatches.at(i).tree.children);
1146  }
1147 next_rule:
1148  ;
1149  }
1150  /* unify the result now (as much as possible) if we have next token
1151  constraints, otherwise it is already unified to a single match */
1152  if (haveNextTokenConstraints) unify(result);
1153  return result;
1154 }
1155 
1158 QList<Match> Parser::matchEffectiveCatToEpsilon(CatArg cat, QSet<Cat> mark)
1159 {
1160  QList<Match> result;
1161 
1162  /* if the effective category is 1-dimensional, don't bother about rule numbers
1163  and child scopes - they are not needed for P constraints (the only ones we
1164  can have on pseudo-categories for 1-dimensional effective categories) */
1165  if (!componentCats.contains(cat))
1166  return matchCFToEpsilon(cat, mark);
1167 
1168  mark.insert(cat);
1169  QList<Rule> ruleList = rules.value(cat);
1170  int s = ruleList.size();
1171  for (int i=0; i<s; i++) {
1172  const Rule &rule = ruleList.at(i);
1173  // only consider nullable rules with only unmarked categories
1174  foreach (CatArg cati, rule) {
1175  if (!nullable.contains(cati) || mark.contains(cati)) goto next_rule;
1176  }
1177  {
1178  QList<Match> currentMatches;
1179  Node node(cat);
1180  node.children.first().setLabel(rule.label());
1181  currentMatches.append(Match(0, node, i, PseudoCatScope(),
1182  rule.nextTokenConstraints));
1183  foreach (CatArg cati, rule) {
1184  int s = currentMatches.size();
1185  for (int i=0; i<s; ) {
1186  Match &m = currentMatches[i];
1187  const QList<Match> componentMatches
1188  = matchToEpsilonRecurse(cati, mark, m.scope);
1189  if (componentMatches.isEmpty()) {
1190  if (i) currentMatches.swap(0, i);
1191  currentMatches.removeFirst();
1192  s--;
1193  } else {
1194  int cs = componentMatches.size();
1195  for (int j=1; j<cs; j++) {
1196  const Match &cm = componentMatches.at(j);
1197  Node newTree = m.tree;
1198  newTree.children.first().append(cm.tree);
1199  currentMatches.append(Match(m.len + cm.len, newTree, m.ruleno,
1200  cm.scope, m.nextTokenConstraints));
1201  currentMatches.last().nextTokenConstraints.expect.append(
1203  currentMatches.last().nextTokenConstraints.taboo.append(
1205  }
1206  const Match &cm = componentMatches.first();
1207  m.len += cm.len;
1208  m.tree.children.first().append(cm.tree);
1209  m.scope = cm.scope;
1210  m.nextTokenConstraints.expect.append(
1213  i++;
1214  }
1215  }
1216  }
1217  // in this case, we cannot unify, so just append the matches to the result
1218  result.append(currentMatches);
1219  }
1220 next_rule:
1221  ;
1222  }
1223  return result;
1224 }
1225 
1227 QList<Match> Parser::matchToEpsilonRecurse(CatArg cat, QSet<Cat> mark,
1228  const PseudoCatScope &scope)
1229 {
1230  QList<Match> result;
1231 
1232  if (scope.hasPConstraint(cat)) {
1233  // handle P constraint
1234  QPair<QPair<Node, NextTokenConstraints>, int> pConstraint
1235  = scope.pConstraint(cat);
1236  if (!pConstraint.second) // only matches if length == 0
1237  result.append(Match(0, pConstraint.first.first, 0, scope,
1238  pConstraint.first.second));
1239  return result;
1240  }
1241 
1242  // if this is a pseudo-category, it is not known yet, so get the effective
1243  // category (if this is not a pseudo-category, effCat is cat itself)
1244  Cat effCat = effectiveCat(cat);
1245 
1246  if (effCat != cat) { // cat is a pseudo-category
1247  if (mark.contains(effCat)) return result; // prevent infinite recursion
1248  mark.insert(cat);
1249 
1250  if (scope.hasMcfgConstraint(cat)) {
1251  // handle MCFG constraint
1252  QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat);
1253  const Rule &rule = rules.value(effCat).at(mcfgConstraint.first);
1254  /* check if we can (possibly - there could be other PMCFG constraints
1255  preventing it) produce epsilon with this rule, and make sure the
1256  categories in the rule are not marked to prevent infinite recursion */
1257  foreach (CatArg cati, rule) {
1258  if (!nullable.contains(cati) || mark.contains(cati)) return result;
1259  }
1260  Node node(effCat);
1261  node.children.first().setLabel(rule.label());
1262  result.append(Match(0, node, mcfgConstraint.first, scope,
1263  rule.nextTokenConstraints));
1264  foreach (CatArg cati, rule) {
1265  int s = result.size();
1266  for (int i=0; i<s; ) {
1267  Match &m = result[i];
1268  const QList<Match> componentMatches
1269  = matchToEpsilonRecurse(cati, mark, m.scope);
1270  if (componentMatches.isEmpty()) {
1271  if (i) result.swap(0, i);
1272  result.removeFirst();
1273  s--;
1274  } else {
1275  int cs = componentMatches.size();
1276  for (int j=1; j<cs; j++) {
1277  const Match &cm = componentMatches.at(j);
1278  Node newTree = m.tree;
1279  newTree.children.first().append(cm.tree);
1280  result.append(Match(m.len + cm.len, newTree, m.ruleno,
1281  cm.scope, m.nextTokenConstraints));
1282  result.last().nextTokenConstraints.expect.append(
1284  result.last().nextTokenConstraints.taboo.append(
1286  }
1287  const Match &cm = componentMatches.first();
1288  m.len += cm.len;
1289  m.tree.children.first().append(cm.tree);
1290  m.scope = cm.scope;
1291  m.nextTokenConstraints.expect.append(
1294  i++;
1295  }
1296  }
1297  }
1298  } else // no PMCFG constraints on this pseudo-category
1299  result = matchEffectiveCatToEpsilon(effCat, mark);
1300 
1301  finalizeMatches(result, cat, scope);
1302  } else { // cat is a true category
1303  result = matchCFToEpsilon(cat, mark);
1304  copyScope(result, scope);
1305  }
1306 
1307  return result;
1308 }
1309 
1311 
1316 QList<Match> Parser::matchToEpsilon(CatArg cat, const PseudoCatScope &scope)
1317 {
1318  QList<Match> result;
1319 
1320  if (scope.hasPConstraint(cat)) {
1321  // handle P constraint
1322  QPair<QPair<Node, NextTokenConstraints>, int> pConstraint
1323  = scope.pConstraint(cat);
1324  if (!pConstraint.second) // only matches if length == 0
1325  result.append(Match(0, pConstraint.first.first, 0, scope,
1326  pConstraint.first.second));
1327  return result;
1328  }
1329 
1330  // if this is a pseudo-category, it is not known yet, so get the effective
1331  // category (if this is not a pseudo-category, effCat is cat itself)
1332  Cat effCat = effectiveCat(cat);
1333 
1334  if (effCat != cat) { // cat is a pseudo-category
1335  if (scope.hasMcfgConstraint(cat)) {
1336  // handle MCFG constraint
1337  QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat);
1338  const Rule &rule = rules.value(effCat).at(mcfgConstraint.first);
1339  /* check if we can (possibly - there could be other PMCFG constraints
1340  preventing it) produce epsilon with this rule */
1341  foreach (CatArg cati, rule) {
1342  if (!nullable.contains(cati)) return result;
1343  }
1344  Node node(effCat);
1345  node.children.first().setLabel(rule.label());
1346  result.append(Match(0, node, mcfgConstraint.first, scope,
1347  rule.nextTokenConstraints));
1348  foreach (CatArg cati, rule) {
1349  int s = result.size();
1350  for (int i=0; i<s; ) {
1351  Match &m = result[i];
1352  const QList<Match> componentMatches = matchToEpsilon(cati, m.scope);
1353  if (componentMatches.isEmpty()) {
1354  if (i) result.swap(0, i);
1355  result.removeFirst();
1356  s--;
1357  } else {
1358  int cs = componentMatches.size();
1359  for (int j=1; j<cs; j++) {
1360  const Match &cm = componentMatches.at(j);
1361  Node newTree = m.tree;
1362  newTree.children.first().append(cm.tree);
1363  result.append(Match(m.len + cm.len, newTree, m.ruleno,
1364  cm.scope, m.nextTokenConstraints));
1365  result.last().nextTokenConstraints.expect.append(
1367  result.last().nextTokenConstraints.taboo.append(
1369  }
1370  const Match &cm = componentMatches.first();
1371  m.len += cm.len;
1372  m.tree.children.first().append(cm.tree);
1373  m.scope = cm.scope;
1374  m.nextTokenConstraints.expect.append(
1377  i++;
1378  }
1379  }
1380  }
1381  } else { // no PMCFG constraints on this pseudo-category
1382  // We cannot cache the result here because the scopes can change.
1383  /* FIXME: Well, we could cache the result with a dummy scope and then
1384  compute the union. Is that worth it? */
1385  result = matchEffectiveCatToEpsilon(effCat, QSet<Cat>());
1386  }
1387 
1388  finalizeMatches(result, cat, scope);
1389  } else { // cat is a true category
1390  // memoize the matches for efficiency
1391  // FIXME: this will probably need to get smarter (or disabled) for
1392  // context-sensitive constraints
1393  if (epsilonMatches.contains(cat))
1394  result = epsilonMatches.value(cat);
1395  else {
1396  result = matchCFToEpsilon(cat, QSet<Cat>());
1397  epsilonMatches.insert(cat, result);
1398  }
1399  copyScope(result, scope);
1400  }
1401 
1402  return result;
1403 }
1404 
1406 bool Parser::matchesTokenRecurse(CatArg cat, CatArg token, QSet<Cat> mark) const
1407 {
1408  // Handle the trivial case first.
1409  if (isToken(cat)) return cat == token;
1410 
1411  // Now cat is a nonterminal.
1412  mark.insert(cat);
1413  foreach (const Rule &rule, rules.value(cat)) {
1414  // an epsilon rule cannot match a token
1415  if (rule.isEmpty()) continue;
1416  // for each element of the rule, try matching that element to the token and
1417  // the rest to epsilon
1418  int l = rule.size();
1419  for (int i=0; i<l; i++) {
1420  CatArg cati = rule.at(i);
1421  /* Left recursion cannot match a token if it doesn't match without the
1422  recursion because tokens are atomic. */
1423  if (mark.contains(cati)) continue; // ignore left recursion
1424  for (int j=0; j<l; j++) {
1425  if (i == j) continue;
1426  if (!nullable.contains(rule.at(j))) goto skip_this_i; // match epsilon
1427  }
1428  if (matchesTokenRecurse(cati, token, mark)) return true; // match token
1429 skip_this_i: ;
1430  }
1431  }
1432 
1433  // None of the rules matched.
1434  return false;
1435 }
1436 
1438 
1447 bool Parser::matchesToken(CatArg cat, CatArg token) const
1448 {
1449  return matchesTokenRecurse(cat, token, QSet<Cat>());
1450 }
1451 
1453 
1460 void Parser::collectLeaves(const Node &tree, QList<Node> &leaves)
1461 {
1462  if (isToken(tree.cat))
1463  leaves.append(tree);
1464  else {
1465  // consider only the first alternative - they must all match the same tokens
1466  foreach (const Node &node, tree.children.first())
1467  collectLeaves(node, leaves);
1468  }
1469 }
1470 
1472 bool Parser::validateNextTokenConstraints(CatArg token,
1473  const NextTokenConstraints &nextTokenConstraints) const
1474 {
1475  foreach(CatArg expect, nextTokenConstraints.expect)
1476  if (!matchesToken(expect, token)) return false;
1477  foreach(CatArg taboo, nextTokenConstraints.taboo)
1478  if (matchesToken(taboo, token)) return false;
1479  return true;
1480 }
1481 
1483 
1489 QList<Match> Parser::match(CatArg cat, int pos, const PseudoCatScope &scope,
1490  const StackItem &stack,
1491  const NextTokenConstraints &nextTokenConstraints)
1492 {
1493  QList<Match> result;
1494 
1495  if (scope.hasPConstraint(cat)) {
1496  // handle P constraint
1497  QPair<QPair<Node, NextTokenConstraints>, int> pConstraint
1498  = scope.pConstraint(cat);
1499  if (pConstraint.second) { // length != 0
1500  QList<Node> leaves;
1501  collectLeaves(pConstraint.first.first, leaves);
1502  /* check the next token constraints right now, it's no use keeping the
1503  type 6 item just to have the shift throw it away, and this way we don't
1504  have to keep 2 sets of next token constraints in the type 6 item (one
1505  for the first token and one for the next token after the last) */
1506  if (validateNextTokenConstraints(leaves.first().cat,
1507  nextTokenConstraints)) {
1508  StackItem type6(stack, leaves, 0, pConstraint.first.first, scope,
1509  pConstraint.first.second);
1510  nextStacks.append(type6);
1511  }
1512  } else { // length == 0
1513  NextTokenConstraints newNextTokenConstraints = nextTokenConstraints;
1514  newNextTokenConstraints.expect.append(pConstraint.first.second.expect);
1515  newNextTokenConstraints.taboo.append(pConstraint.first.second.taboo);
1516  result.append(Match(0, pConstraint.first.first, 0, scope,
1517  newNextTokenConstraints));
1518  }
1519  return result;
1520  }
1521 
1522  // if this is a pseudo-category, get the effective category (if this is not a
1523  // pseudo-category, effCat is cat itself)
1524  Cat effCat = effectiveCat(cat);
1525 
1526  if (scope.hasMcfgConstraint(cat)) {
1527  // handle MCFG constraint
1528  QPair<int, PseudoCatScope> mcfgConstraint = scope.mcfgConstraint(cat);
1529  const Rule &rule = rules.value(effCat).at(mcfgConstraint.first);
1530 
1531  // perform a top-down expansion of rule
1532  {
1533  StackItem type5(stack, cat, scope);
1534  Node node(effCat);
1535  node.children.first().setLabel(rule.label());
1536  result = matchRemaining(rule, 0, pos, 0, node, mcfgConstraint.second,
1537  mcfgConstraint.first, type5,
1538  nextTokenConstraints);
1539  }
1540 
1541  finalizeMatches(result, cat, scope);
1542  } else { // no PMCFG constraints on this category or pseudo-category
1543  // try epsilon matches first
1544  if (nullable.contains(effCat)) {
1545  result = matchToEpsilon(cat, scope);
1546  if (!nextTokenConstraints.expect.isEmpty()
1547  || !nextTokenConstraints.taboo.isEmpty()) {
1548  int s = result.size();
1549  for (int i=0; i<s; i++) {
1550  NextTokenConstraints &newNextTokenConstraints
1551  = result[i].nextTokenConstraints;
1552  newNextTokenConstraints.expect.append(nextTokenConstraints.expect);
1553  newNextTokenConstraints.taboo.append(nextTokenConstraints.taboo);
1554  }
1555  }
1556  }
1557 
1558  // now we want a nonempty match
1559  // prepare the parse stack for the next shift
1560  /* We do not store the next token constraints in the type 0 item because
1561  the type 3 parent already carries them, and they can be different for the
1562  different type 3 parents in the DAG-structured stack. */
1563  if (stack.type() < 0) { // toplevel match
1564  StackItem type0(QList<StackItem>(), cat, effCat, pos, scope);
1565  nextStacks.append(type0);
1566  } else { // subordinate match, i.e. nonempty stack
1567  // memoize type 0 item indices to build a graph-structured stack
1568  if (scope.isNull() && type0Indices.contains(cat))
1569  nextStacks[type0Indices.value(cat)].addParent(stack);
1570  else {
1571  QList<StackItem> parents;
1572  parents << stack;
1573  StackItem type0(parents, cat, effCat, pos, scope);
1574  if (scope.isNull()) type0Indices.insert(cat, nextStacks.size());
1575  nextStacks.append(type0);
1576  }
1577  }
1578  }
1579 
1580  // return the result (only epsilon matches, the rest is deferred)
1581  return result;
1582 }
1583 
1585 
1597 bool Parser::verifyLookaheadRule(const Rule &rule, int i, int &curr,
1598  int &remaining, QSet<Cat> &mark)
1599 {
1600  while (i < rule.size()) {
1601  Cat cati = effectiveCat(rule.at(i++));
1602  if (isToken(cati)) {
1603  Cat lookahead = inputSource->nextToken();
1604  curr++;
1605  mark.clear();
1606  if (lookahead != cati) {
1607  return false;
1608  }
1609  if (!(--remaining)) {
1610  return true;
1611  }
1612  } else { // cati is a nonterminal
1613  if (!mark.contains(cati)) {
1614  mark.insert(cati);
1615  QList<Rule> catiRules = rules.value(cati);
1616  switch (catiRules.size()) {
1617  case 0:
1618  return false;
1619  case 1:
1620  {
1621  Rule catiRule = catiRules.first();
1622  if (!verifyLookaheadRule(catiRule, 0, curr, remaining, mark))
1623  return false;
1624  if (!remaining) return true;
1625  }
1626  break;
1627  default:
1628  foreach (const Rule &catiRule, catiRules) {
1629  Node parseTree = inputSource->parseTree();
1630  LexerState lexerState = inputSource->saveState();
1631  int currCopy = curr;
1632  int remainingCopy = remaining;
1633  QSet<Cat> markCopy = mark;
1634  if (!verifyLookaheadRule(catiRule, 0, currCopy, remainingCopy,
1635  markCopy)) {
1636  inputSource->rewindTo(curr, parseTree, lexerState);
1637  continue;
1638  }
1639  if (!remainingCopy) {
1640  curr = currCopy;
1641  remaining = 0;
1642  mark = markCopy;
1643  return true;
1644  }
1645  if (verifyLookaheadRule(rule, i, currCopy, remainingCopy,
1646  markCopy)) {
1647  curr = currCopy;
1648  remaining = remainingCopy;
1649  mark = markCopy;
1650  return true;
1651  }
1652  inputSource->rewindTo(curr, parseTree, lexerState);
1653  }
1654  return false;
1655  }
1656  }
1657  }
1658  }
1659  return true;
1660 }
1661 
1663 
1673 bool Parser::verifyLookahead(const StackItem &stack, CatArg cat, int pos,
1674  int nTokens)
1675 {
1676  switch (stack.type()) {
1677  case 0:
1678  qFatal("type 0 items not supported by verifyLookahead");
1679  case 1:
1680  {
1681  const StackItemType1 *data
1682  = static_cast<const StackItemType1 *>(stack.data());
1683  const QList<StackItem> &parents = data->parents();
1684  if (parents.isEmpty()) { // start category
1685  Node parseTree = inputSource->parseTree();
1686  LexerState lexerState = inputSource->saveState();
1687  Cat lookahead = inputSource->nextToken();
1688  bool ret = IS_EPSILON(lookahead);
1689  inputSource->rewindTo(pos, parseTree, lexerState);
1690  return ret;
1691  }
1692  foreach (const StackItem &parent, parents)
1693  if (verifyLookahead(parent, cat, pos, nTokens))
1694  return true;
1695  return false;
1696  }
1697  case 2:
1698  {
1699  const StackItemType2 *data
1700  = static_cast<const StackItemType2 *>(stack.data());
1701  const StackItem &parent = data->parent();
1702  return verifyLookahead(parent, cat, pos, nTokens);
1703  }
1704  case 3:
1705  {
1706  const StackItemType3 *data
1707  = static_cast<const StackItemType3 *>(stack.data());
1708  const StackItem &parent = data->parent();
1709  Node tree = data->tree();
1710  Rule rule = data->rule();
1711  int i = data->i() + 1;
1712  Node parseTree = inputSource->parseTree();
1713  LexerState lexerState = inputSource->saveState();
1714  int curr = pos;
1715  int remaining = nTokens;
1716  QSet<Cat> mark;
1717  bool ret = verifyLookaheadRule(rule, i, curr, remaining, mark);
1718  inputSource->rewindTo(pos, parseTree, lexerState);
1719  if (!ret) return false;
1720  if (!remaining) return true;
1721  return verifyLookahead(parent, tree.cat, curr, remaining);
1722  }
1723  case 4:
1724  {
1725  const StackItemType4 *data
1726  = static_cast<const StackItemType4 *>(stack.data());
1727  const StackItem &parent = data->parent();
1728  Cat target = data->target();
1729  Cat effCat = effectiveCat(cat);
1730  foreach (const FullRule &rule, neighborhood(effCat, target)) {
1731  Node parseTree = inputSource->parseTree();
1732  LexerState lexerState = inputSource->saveState();
1733  int i = rule.epsilonsSkipped + 1;
1734  int curr = pos;
1735  int remaining = nTokens;
1736  QSet<Cat> mark;
1737  bool ret = verifyLookaheadRule(rule.rule, i, curr, remaining, mark);
1738  inputSource->rewindTo(pos, parseTree, lexerState);
1739  if (ret) {
1740  if (!remaining) return true;
1741  if (verifyLookahead(stack, rule.cat, curr, remaining)) return true;
1742  }
1743  }
1744  if (effCat == target)
1745  return verifyLookahead(parent, target, pos, nTokens);
1746  else
1747  return false;
1748  }
1749  case 5:
1750  {
1751  const StackItemType5 *data
1752  = static_cast<const StackItemType5 *>(stack.data());
1753  const StackItem &parent = data->parent();
1754  Cat dataCat = data->cat();
1755  return verifyLookahead(parent, dataCat, pos, nTokens);
1756  }
1757  case 6:
1758  qFatal("type 6 items not supported by verifyLookahead");
1759  default:
1760  qFatal("invalid stack item type");
1761  }
1762 }
1763 
1764 
1766 
1770 QList<Match> Parser::matchRemaining(const Rule &rule, int len, int curr, int i,
1771  const Node &tree,
1772  const PseudoCatScope &scope, int ruleno,
1773  const StackItem &stack,
1774  const NextTokenConstraints
1775  &nextTokenConstraints)
1776 {
1777  QList<Match> result;
1778  if (i < rule.size()) {
1779  QList<Match> matches;
1780  {
1781  StackItem type3(stack, rule, len, curr, i, tree, ruleno,
1782  nextTokenConstraints);
1783  matches = match(rule.at(i), curr, scope, type3, nextTokenConstraints);
1784  }
1785  foreach (const Match &m, matches) {
1786  Node newTree = tree;
1787  newTree.children.first().append(m.tree);
1788  result.append(matchRemaining(rule, len+m.len, curr+m.len, i+1, newTree,
1789  m.scope, ruleno, stack,
1791  }
1792  } else {
1793  if (rule.action) {
1794  int lookaheadTokens = rule.action->lookaheadTokens();
1795  if (lookaheadTokens <= 0 // looking ahead 0 tokens is trivial
1796  || verifyLookahead(stack, tree.cat, inputSource->currentPosition(),
1797  lookaheadTokens)) {
1798  ActionInfo info(tree, this);
1799  rule.action->execute(info);
1800  } else return result; // reject the matches if we are not executing the
1801  // action, anything else would confuse the app
1802  }
1803  NextTokenConstraints newNextTokenConstraints = nextTokenConstraints;
1804  newNextTokenConstraints.expect.append(rule.nextTokenConstraints.expect);
1805  newNextTokenConstraints.taboo.append(rule.nextTokenConstraints.taboo);
1806  result.append(Match(len, tree, ruleno, scope, newNextTokenConstraints));
1807  }
1808  return result;
1809 }
1810 
1812 
1815 Cat Parser::findFirstToken(const Node &tree)
1816 {
1817  CatArg cat = tree.cat;
1818  if (isToken(cat))
1819  return cat;
1820  else {
1821  // consider only the first alternative - they must all match the same tokens
1822  foreach (const Node &node, tree.children.first()) {
1823  Cat result = findFirstToken(node);
1824  if (!IS_EPSILON(result)) return result;
1825  }
1826  return Cat(); // This tree matches epsilon.
1827  }
1828 }
1829 
1832 void Parser::unify(QList<Match> &matches)
1833 {
1834  int l = matches.size();
1835  if (l > 1) {
1836  /* For plain CFGs, we'd only need to match length and category here. We add
1837  the rule number and the scope for PMCFG matches and the next token
1838  constraints for scannerless parsing support. */
1839  QHash<QPair<QPair<QPair<int, Cat>, QPair<int, PseudoCatScope> >,
1840  NextTokenConstraints>, int>
1841  indexOfLenCat;
1842  {
1843  const Match &m = matches.first();
1844  indexOfLenCat.insert(qMakePair(qMakePair(qMakePair(m.len, m.tree.cat),
1845  qMakePair(m.ruleno, m.scope)),
1846  m.nextTokenConstraints), 0);
1847  }
1848  for (int i=1; i<l; ) {
1849  const Match &m = matches.at(i);
1850  QPair<QPair<QPair<int, Cat>, QPair<int, PseudoCatScope> >,
1851  NextTokenConstraints> key(qMakePair(qMakePair(m.len, m.tree.cat),
1852  qMakePair(m.ruleno, m.scope)),
1854  if (indexOfLenCat.contains(key)) {
1855  matches[indexOfLenCat.value(key)].tree.children.append(m.tree.children);
1856  matches.removeAt(i);
1857  l--;
1858  } else indexOfLenCat.insert(key, i++);
1859  }
1860  }
1861 }
1862 
1865 
1873 QList<Match> Parser::reduce(CatArg cat, CatArg target, int pos, int len,
1874  const Node &tree, const StackItem &stack,
1875  const PseudoCatScope &scope, int ruleno,
1876  const NextTokenConstraints &nextTokenConstraints,
1877  QSet<Cat> mark)
1878 {
1879  QList<Match> result;
1880  if (cat == target) result.append(Match(len, tree, ruleno, scope,
1881  nextTokenConstraints));
1882  QList<Match> matches;
1883  {
1884  StackItem type4(stack, target, pos, len);
1885  // for each rule to get towards the target category
1886  foreach (const FullRule &rule, neighborhood(cat, target)) {
1887  QList<Match> currentMatches;
1888  Node node(rule.cat);
1889  node.children.first().setLabel(rule.rule.label());
1890  int epsilonsSkipped = rule.epsilonsSkipped;
1891  // if we are reducing a pseudo-category, set the correct scope
1892  PseudoCatScope newScope;
1893  CatArg pseudoCat = rule.rule.at(epsilonsSkipped);
1894  if (pseudoCat != cat) {
1895  newScope.pConstraints().insert(pseudoCat,
1896  qMakePair(qMakePair(tree, nextTokenConstraints), len));
1897  foreach (CatArg cati, pseudoCats.value(pseudoCat).second)
1898  newScope.mcfgConstraints().insert(cati, qMakePair(ruleno, scope));
1899  }
1900  currentMatches.append(Match(0, node, rule.ruleno, newScope,
1901  nextTokenConstraints));
1902  /* Find the first token in the tree so we can validate the next token
1903  constraints of the epsilon matches. If we don't have epsilons skipped,
1904  don't bother doing this work.
1905  Also note that tree cannot be an epsilon tree because we never reduce
1906  epsilon. */
1907  Cat firstToken = Cat();
1908  if (epsilonsSkipped)
1909  firstToken = findFirstToken(tree);
1910  // match first items to epsilon
1911  for (int k=0; k<epsilonsSkipped; k++) {
1912  int s = currentMatches.size();
1913  for (int i=0; i<s; ) {
1914  Match &m = currentMatches[i];
1915  QList<Match> componentMatches = matchToEpsilon(rule.rule.at(k),
1916  m.scope);
1917  /* Here, we need to validate the next token constraints in our epsilon
1918  matches against the first token of the tree we're reducing. */
1919  /* FIXME: Would it be more efficient to do this in neighborhood? It'd
1920  help prediction in some (rare?) cases, too. But it'd make
1921  neighborhoods dependent on the first token, which hurts
1922  caching badly. */
1923  {
1924  int cs = componentMatches.size();
1925  for (int j=0; j<cs; ) {
1926  if (validateNextTokenConstraints(firstToken,
1927  componentMatches.at(j).nextTokenConstraints)) j++; else {
1928  if (j) componentMatches.swap(0, j);
1929  componentMatches.removeFirst();
1930  cs--;
1931  }
1932  }
1933  }
1934  if (componentMatches.isEmpty()) {
1935  if (i) currentMatches.swap(0, i);
1936  currentMatches.removeFirst();
1937  s--;
1938  } else {
1939  int cs = componentMatches.size();
1940  for (int j=1; j<cs; j++) {
1941  const Match &cm = componentMatches.at(j);
1942  Node newTree = m.tree;
1943  newTree.children.first().append(cm.tree);
1944  currentMatches.append(Match(m.len + cm.len, newTree, m.ruleno,
1945  cm.scope, m.nextTokenConstraints));
1946  }
1947  const Match &cm
1948  = static_cast<const QList<Match> &>(componentMatches).first();
1949  m.len += cm.len;
1950  m.tree.children.first().append(cm.tree);
1951  m.scope = cm.scope;
1952  i++;
1953  }
1954  }
1955  }
1956  foreach (const Match &m, currentMatches) {
1957  Node newTree = m.tree;
1958  newTree.children.first().append(tree);
1959  // match remaining rule items
1960  matches.append(matchRemaining(rule.rule, 0, pos, epsilonsSkipped+1,
1961  newTree, m.scope, m.ruleno, type4,
1963  }
1964  }
1965  }
1966  // unify matches to a shared representation to avoid needless bifurcation
1967  unify(matches);
1968  {
1969  StackItem type2(stack, 0);
1970  foreach (const Match &m, matches) {
1971  Cat newCat = m.tree.cat;
1972  if (m.len)
1973  qFatal("length of a reduction increased in a non-shift codepath");
1974  // reduction with unchanged length
1975  // avoid infinite loops (if we have epsilon productions or redundant
1976  // X->...->X rules)
1977  mark.insert(cat);
1978  if (mark.contains(newCat)) continue;
1979  // now reduce the rule
1980  result.append(reduce(newCat, target, pos, len, m.tree, type2, m.scope,
1981  m.ruleno, m.nextTokenConstraints, mark));
1982  }
1983  }
1984  // do another unification pass on the result
1985  unify(result);
1986  return result;
1987 }
1988 
1990 
1993 QList<Match> Parser::processStackItem(const StackItem &item,
1994  const QList<Match> &matches)
1995 {
1996  StackItem nextItem = item;
1997  QList<Match> nextMatches = matches;
1998  tailcall: {
1999  const StackItem &item = nextItem;
2000  const QList<Match> &matches = nextMatches;
2001  // Do not bother proceeding any further with an empty list of matches, as
2002  // the result will necessarily be empty as well.
2003  if (matches.isEmpty()) return matches;
2004  switch (item.type()) {
2005  case 0:
2006  qFatal("type 0 items not supported by processStackItem");
2007  case 1:
2008  {
2009  const StackItemType1 *data
2010  = static_cast<const StackItemType1 *>(item.data());
2011  const QList<StackItem> &parents = data->parents();
2012  Cat cat = data->cat();
2013  Cat effCat = data->effCat();
2014  PseudoCatScope scope = data->scope();
2015 
2016  if (effCat != cat) // cat is a pseudo-category
2017  finalizeMatches(nextMatches, cat, scope);
2018  else
2019  copyScope(nextMatches, scope);
2020 
2021  if (parents.isEmpty()) // toplevel item
2022  return nextMatches;
2023  else if (parents.count() == 1) {
2024  nextItem = parents.first();
2025  goto tailcall;
2026  } else {
2027  branched = true;
2028  QList<Match> result;
2029  foreach (const StackItem &parent, parents)
2030  result.append(processStackItem(parent, nextMatches));
2031  return result;
2032  }
2033  }
2034  break;
2035  case 2:
2036  {
2037  const StackItemType2 *data
2038  = static_cast<const StackItemType2 *>(item.data());
2039  const StackItem &parent = data->parent();
2040 
2041  // skip unification if we know there is nothing further coming to this
2042  // unification point
2043  bool present;
2044  if (branched)
2045  present = collectedMatches.contains(data);
2046  else {
2047  if (priorityQueue.isEmpty()) {
2048  type_2_skip:
2049  nextItem = parent;
2050  goto tailcall;
2051  }
2052  int headDepth = priorityQueue.head().depth();
2053  int depth = data->depth();
2054  if (headDepth < depth) goto type_2_skip;
2055  if (headDepth == depth) {
2056  present = collectedMatches.contains(data);
2057  if (!present) goto type_2_skip;
2058  } else present = collectedMatches.contains(data);
2059  }
2060 
2061  // unify matches, deferring processing
2062  if (present) {
2063  QPair<bool, QList<Match> > &entry = collectedMatches[data];
2064  entry.first = true;
2065  entry.second.append(matches);
2066  } else {
2067  collectedMatches[data] = qMakePair(false, matches);
2068  priorityQueue.enqueue(item);
2069  }
2070  return QList<Match>();
2071  }
2072  break;
2073  case 3:
2074  {
2075  const StackItemType3 *data
2076  = static_cast<const StackItemType3 *>(item.data());
2077  const StackItem &parent = data->parent();
2078  Rule rule = data->rule();
2079  int len = data->len();
2080  int curr = data->curr();
2081  int i = data->i();
2082  Node tree = data->tree();
2083  int ruleno = data->ruleno();
2084 
2085  QList<Match> result;
2086  foreach (const Match &m, matches) {
2087  Node newTree = tree;
2088  newTree.children.first().append(m.tree);
2089  result.append(matchRemaining(rule, len+m.len, curr+m.len, i+1,
2090  newTree, m.scope, ruleno, parent,
2092  }
2093 
2094  nextMatches = result;
2095  nextItem = parent;
2096  goto tailcall;
2097  }
2098  break;
2099  case 4:
2100  {
2101  const StackItemType4 *data
2102  = static_cast<const StackItemType4 *>(item.data());
2103  const StackItem &parent = data->parent();
2104  Cat target = data->target();
2105  int pos = data->pos();
2106  int len = data->len();
2107 
2108  // skip unification if we know there is nothing further coming to this
2109  // unification point
2110  bool present;
2111  if (branched)
2112  present = collectedMatches.contains(data);
2113  else {
2114  if (priorityQueue.isEmpty()) {
2115  type_4_skip:
2116  QList<Match> result;
2117  {
2118  StackItem type2(parent, 0);
2119  foreach (const Match &m, matches) {
2120  Cat newCat = m.tree.cat;
2121  if (!m.len)
2122  qFatal("reduction with unchanged length was deferred");
2123  // the length increased, reduce the rule, resetting mark
2124  result.append(reduce(newCat, target, pos+m.len, len+m.len,
2125  m.tree, type2, m.scope, m.ruleno,
2127  }
2128  }
2129  // do another unification pass on the result
2130  unify(result);
2131 
2132  nextMatches = result;
2133  nextItem = parent;
2134  goto tailcall;
2135  }
2136  int headDepth = priorityQueue.head().depth();
2137  int depth = data->depth();
2138  if (headDepth < depth) goto type_4_skip;
2139  if (headDepth == depth) {
2140  present = collectedMatches.contains(data);
2141  if (!present) goto type_4_skip;
2142  } else present = collectedMatches.contains(data);
2143  }
2144 
2145  // unify matches, deferring processing
2146  if (present) {
2147  QPair<bool, QList<Match> > &entry = collectedMatches[data];
2148  entry.first = true;
2149  entry.second.append(matches);
2150  } else {
2151  collectedMatches[data] = qMakePair(false, matches);
2152  priorityQueue.enqueue(item);
2153  }
2154  return QList<Match>();
2155  }
2156  case 5:
2157  {
2158  const StackItemType5 *data
2159  = static_cast<const StackItemType5 *>(item.data());
2160  const StackItem &parent = data->parent();
2161  Cat cat = data->cat();
2162  PseudoCatScope scope = data->scope();
2163 
2164  finalizeMatches(nextMatches, cat, scope);
2165 
2166  nextItem = parent;
2167  goto tailcall;
2168  }
2169  break;
2170  case 6:
2171  qFatal("type 6 items not supported by processStackItem");
2172  default:
2173  qFatal("invalid stack item type");
2174  }
2175  }
2176 }
2177 
2179 
2186 QList<Match> Parser::processStack(const StackItem &stack, CatArg token)
2187 {
2188  branched = false; // reset branched flag
2189  switch (stack.type()) {
2190  // type 0 and 6 are toplevel items
2191  case 0:
2192  {
2193  const StackItemType0 *data
2194  = static_cast<const StackItemType0 *>(stack.data());
2195  const QList<StackItem> &parents = data->parents();
2196  Cat cat = data->cat();
2197  Cat effCat = data->effCat();
2198  int pos = data->pos();
2199  PseudoCatScope scope = data->scope();
2200  QList<Match> matches;
2201 
2202  // type 0 item: finish processing of match after a shift
2203  if (isToken(cat)) {
2204  if (token == cat) matches.append(Match(1, inputSource->parseTree(), 0,
2205  scope));
2206  } else {
2207  // reduce the matched token (and possibly additional tokens, with
2208  // deferred processing) to the target category
2209  StackItem type1(parents, cat, effCat, scope);
2210  // The token carries no next token constraints.
2211  matches = reduce(token, effCat, pos, 1, inputSource->parseTree(),
2212  type1, PseudoCatScope(), 0, NextTokenConstraints());
2213 
2214  if (effCat != cat) // cat is a pseudo-category
2215  finalizeMatches(matches, cat, scope);
2216  else
2217  copyScope(matches, scope);
2218  }
2219 
2220  if (parents.isEmpty()) // toplevel item
2221  return matches;
2222  else {
2223  QList<Match> result;
2224  if (parents.count() > 1) branched = true;
2225  foreach (const StackItem &parent, parents)
2226  result.append(processStackItem(parent, matches));
2227  return result;
2228  }
2229  }
2230  case 6:
2231  {
2232  const StackItemType6 *data
2233  = static_cast<const StackItemType6 *>(stack.data());
2234  const StackItem &parent = data->parent();
2235  QList<Node> leaves = data->leaves();
2236  int i = data->i();
2237  Node tree = data->tree();
2238  PseudoCatScope scope = data->scope();
2239  NextTokenConstraints nextTokenConstraints
2240  = data->nextTokenConstraints();
2241  QList<Match> matches;
2242 
2243  // type 6 item: finish processing of a P constraint
2244  // match the i-th leaf against the shifted token
2245  if (inputSource->matchParseTree(leaves.at(i++))) {
2246  int l = leaves.size();
2247  if (i == l) // we're done matching the constraint
2248  matches.append(Match(l, tree, 0, scope, nextTokenConstraints));
2249  else { // not done yet, create a new type 6 item
2250  StackItem type6(parent, leaves, i, tree, scope,
2251  nextTokenConstraints);
2252  nextStacks.append(type6);
2253  }
2254  }
2255 
2256  return processStackItem(parent, matches);
2257  }
2258  // case 2 and 4 are unification points, so look for matches collected so far
2259  // and process them
2260  case 2:
2261  case 4:
2262  {
2263  // We use take (= value + remove) instead of value here to ensure that
2264  // processStackItem will not send us back here.
2265  QPair<bool, QList<Match> > entry = collectedMatches.take(stack.data());
2266  if (entry.first) // need unification
2267  unify(entry.second);
2268  return processStackItem(stack, entry.second);
2269  }
2270  default:
2271  qFatal("invalid stack (expected toplevel item or unification point)");
2272  }
2273 }
2274 
2276 
2281 bool Parser::shift(int pos)
2282 {
2283  if (!inputSource->rewindTo(pos, currentLexerState)) {
2284  qWarning("invalid input position");
2285  return false;
2286  }
2287  Cat token = inputSource->nextToken();
2288  if (IS_EPSILON(token))
2289  return false;
2290 
2291  // delete all the stacks which don't satisfy the next token constraints
2292  {
2293  int l = nextStacks.size();
2294  for (int i=0; i<l; ) {
2295  StackItem &stack = nextStacks[i];
2296  // type 6 items are already filtered for next token constraints
2297  if (stack.type()) i++; else {
2298  const StackItemType0 *data
2299  = static_cast<const StackItemType0 *>(stack.data());
2300  QList<StackItem> parents = data->parents();
2301  // no parents = toplevel item, skip processing to avoid killing this
2302  if (parents.empty()) i++; else {
2303  int n = parents.size();
2304  for (int j=0; j<n; ) {
2305  const StackItem &parent = parents.at(j);
2306  const StackItemType3 *parentData
2307  = static_cast<const StackItemType3 *>(parent.data());
2308  if (validateNextTokenConstraints(token,
2309  parentData->nextTokenConstraints())) j++; else {
2310  if (j) parents.swap(0, j);
2311  parents.removeFirst();
2312  n--;
2313  }
2314  }
2315  // no parents left, kill the type 0 item
2316  if (parents.isEmpty()) {
2317  if (i) nextStacks.swap(0, i);
2318  nextStacks.removeFirst();
2319  l--;
2320  } else {
2321  stack.setParents(parents);
2322  i++;
2323  }
2324  }
2325  }
2326  }
2327  }
2328 
2329  // if no more stacks, set errPos and errToken and return false
2330  // (this happens if we had a valid complete parse, but no continuations, and
2331  // now there's extra input)
2332  if (nextStacks.isEmpty()) {
2333  errPos = pos;
2334  errToken = token;
2335  return false;
2336  }
2337 
2338  // process stack
2339  currentMatches.clear();
2340  QList<StackItem> currentStacks = nextStacks;
2341  nextStacks.clear();
2342  priorityQueue = Private::PriorityQueue<StackItem>(currentStacks);
2343  while (!priorityQueue.isEmpty())
2344  currentMatches.append(processStack(priorityQueue.dequeue(), token));
2345  type0Indices.clear();
2346 
2347  // if no more stacks and no current matches, set errPos and errToken, restore
2348  // the previous nextStacks to allow running prediction on them and return
2349  // false
2350  if (nextStacks.isEmpty() && currentMatches.isEmpty()) {
2351  errPos = pos;
2352  errToken = token;
2353  nextStacks = currentStacks;
2354  return false;
2355  }
2356 
2357  currentLexerState = inputSource->saveState();
2358  return true;
2359 }
2360 
2362 
2437 QList<Match> Parser::parse(int *errorPos, Cat *errorToken, int *incrementalPos,
2438  QList<StackItem> *incrementalStacks,
2439  QList<Match> *incrementalMatches,
2440  LexerState *lexerState)
2441 {
2442  int pos = 0;
2443 
2444  if (!incrementalPos || *incrementalPos < 0) // start a new parse
2445  currentMatches = match(startCat, 0, PseudoCatScope(), StackItem(),
2447  else { // continue an incremental parse
2448  pos = *incrementalPos;
2449  if (incrementalMatches) currentMatches = *incrementalMatches;
2450  if (incrementalStacks) nextStacks = *incrementalStacks;
2451  }
2452 
2453  if (lexerState)
2454  currentLexerState = *lexerState;
2455  else
2456  currentLexerState.clear();
2457 
2458  // shift tokens while possible
2459  errPos = -1;
2460  while (shift(pos)) pos++;
2461 
2462  // error handling
2463  if (errPos >= 0) { // parse error
2464  if (errorPos) *errorPos = errPos;
2465  if (errorToken) *errorToken = errToken;
2466  if (incrementalPos) *incrementalPos = errPos;
2467  if (incrementalStacks) *incrementalStacks = nextStacks;
2468  if (incrementalMatches) incrementalMatches->clear();
2469  if (lexerState) *lexerState = currentLexerState;
2470  errPos = -1;
2471  errToken = Cat();
2472  nextStacks.clear();
2473  currentMatches.clear();
2474  currentLexerState.clear();
2475  return QList<Match>();
2476  }
2477  if (errorPos) *errorPos = -1;
2478  if (errorToken) *errorToken = Cat();
2479 
2480  // filter all the matches which have expect-type next token constraints
2481  {
2482  int s = currentMatches.size();
2483  for (int i=0; i<s; ) {
2484  Match &m = currentMatches[i];
2485  if (m.nextTokenConstraints.expect.isEmpty()) i++; else {
2486  if (i) currentMatches.swap(0, i);
2487  currentMatches.removeFirst();
2488  s--;
2489  }
2490  }
2491  }
2492 
2493  // incremental parsing
2494  if (incrementalPos) *incrementalPos = pos;
2495  if (incrementalMatches) *incrementalMatches = currentMatches;
2496  if (incrementalStacks) *incrementalStacks = nextStacks;
2497  if (lexerState) *lexerState = currentLexerState;
2498  nextStacks.clear();
2499  currentLexerState.clear();
2500 
2501  // accept current matches
2502  QList<Match> result = currentMatches;
2503  currentMatches.clear();
2504  return result;
2505 }
2506 
2509 
2516 {
2517  Predictions predict;
2518  foreach (const StackItem &stack, stacks) {
2519  if (stack.type()) { // type 6 item
2520  const StackItemType6 *data
2521  = static_cast<const StackItemType6 *>(stack.data());
2522  predict.insert(data->leaves().at(data->i()).cat);
2523  } else // type 0 item
2524  predict.insert(static_cast<const StackItemType0 *>(stack.data())
2525  ->effCat());
2526  }
2527  return predict;
2528 }
2529 
2531 void Parser::expandNonterminalPredictionRecurse(CatArg cat,
2532  QHash<Cat, QSet<Cat> > &result,
2533  QSet<Cat> &mark) const
2534 {
2535  mark.insert(cat);
2536  foreach (const Rule &rule, rules.value(cat)) {
2537  QListIterator<Cat> i(rule);
2538  /* For each nonempty rule (empty rules are not interesting for prediction),
2539  we handle at least the first item in the rule as a prediction: if it's a
2540  token, it is our prediction and the corresponding nonterminal is this
2541  category; if it's a nonterminal, we need to process the prediction
2542  recursively, unless it is already marked, indicating direct or indirect
2543  left recursion (and allowing us to stop) or the repetition of an already
2544  tried possibility (also allowing us to stop, since we do not care about
2545  parse trees here, only the last category). Then, as long as the first i
2546  categories are nullable and the (i+1)th category exists, we repeat the
2547  process for the (i+1)th category. */
2548  while (i.hasNext()) {
2549  Cat cati = effectiveCat(i.next());
2550  if (isToken(cati)) {
2551  result[cat].insert(cati); // record nonterminal and token
2552  break; // tokens are not nullable
2553  }
2554  // now cati is a nonterminal
2555  if (!mark.contains(cati))
2556  expandNonterminalPredictionRecurse(cati, result, mark);
2557  if (!nullable.contains(cati)) break;
2558  }
2559  }
2560 }
2561 
2565 
2575 QHash<Cat, QSet<Cat> > Parser::expandNonterminalPrediction(CatArg cat) const
2576 {
2577  QHash<Cat, QSet<Cat> > result;
2578  QSet<Cat> mark;
2579  if (isToken(cat))
2580  qWarning("trying to expand terminal prediction");
2581  else
2582  expandNonterminalPredictionRecurse(cat, result, mark);
2583  return result;
2584 }
2585 
2587 void Parser::expandNonterminalPredictionRecurseC(CatArg cat,
2588  QHash<Cat, QSet<Cat> > &result,
2589  QSet<Cat> mark, int ruleno,
2590  const PseudoCatScope &scope,
2591  const NextTokenConstraints
2592  &nextTokenConstraints)
2593 {
2594  mark.insert(cat);
2595  QList<Rule> ruleList = rules.value(cat);
2596  int firstRule = ruleno >= 0 ? ruleno : 0;
2597  int lastRule = ruleno >= 0 ? ruleno : ruleList.size() - 1;
2598  for (int currRule = firstRule; currRule <= lastRule; currRule++) {
2599  const Rule &rule = ruleList.at(currRule);
2600  QListIterator<Cat> i(rule);
2601  /* For each nonempty rule (empty rules are not interesting for prediction),
2602  we handle at least the first item in the rule as a prediction: if it's a
2603  token, it is our prediction and the corresponding nonterminal is this
2604  category; if it's a nonterminal, we need to process the prediction
2605  recursively, unless it is already marked, indicating direct or indirect
2606  left recursion (and allowing us to stop). Then, as long as the first i
2607  categories are nullable and the (i+1)th category exists, we repeat the
2608  process for the (i+1)th category.
2609  We optimize the common case of no epsilons to skip because we do not have
2610  to consider context-sensitive constraints in that case. */
2611  Cat cat1 = Cat();
2612  if (i.hasNext()) {
2613  cat1 = i.next();
2614  Cat effCat1 = effectiveCat(cat1);
2615  if (isToken(effCat1)) {
2616  if (validateNextTokenConstraints(effCat1, nextTokenConstraints))
2617  result[cat].insert(effCat1); // record nonterminal and token
2618  continue; // tokens are not nullable
2619  }
2620  // now effCat1 is a nonterminal
2621  /* If we have a P-constraint here, that constraint forces the category to
2622  be epsilon (because it must be identical to something already matched,
2623  which is epsilon). So don't predict inside it. */
2624  if (!mark.contains(effCat1)
2625  && !scope.hasPConstraint(cat1)) {
2626  int mcfgRuleno = -1;
2627  PseudoCatScope mcfgScope;
2628  if (scope.hasMcfgConstraint(cat1)) {
2629  QPair<int, PseudoCatScope> mcfgConstraint
2630  = scope.mcfgConstraint(cat1);
2631  mcfgRuleno = mcfgConstraint.first;
2632  mcfgScope = mcfgConstraint.second;
2633  }
2634  expandNonterminalPredictionRecurseC(effCat1, result, mark, mcfgRuleno,
2635  mcfgScope, nextTokenConstraints);
2636  }
2637  if (!nullable.contains(effCat1)) continue;
2638  }
2639  {
2640  QList<Match> currentMatches;
2641  // match cat(i-1) to epsilon first
2642  /* FIXME: We discard the parse trees here. Would it be more efficient
2643  to use a variant of matchToEpsilon which doesn't build them?
2644  OTOH, this way, we can reuse the matchToEpsilon cache. */
2645  const QList<Match> componentMatches = matchToEpsilon(cat1, scope);
2646  if (componentMatches.isEmpty())
2647  continue; // We didn't match epsilon, so we're done with this rule.
2648  /* If we have one version without context-sensitive constraints, the
2649  others are redundant. */
2650  bool haveUnconstrained = false;
2651  foreach (const Match &cm, componentMatches) {
2652  if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
2653  && cm.nextTokenConstraints.taboo.isEmpty()) {
2654  haveUnconstrained = true;
2655  break;
2656  }
2657  }
2658  if (haveUnconstrained)
2659  currentMatches.append(Match(0, Node(), 0, scope, nextTokenConstraints));
2660  else {
2661  foreach (const Match &cm, componentMatches) {
2662  currentMatches.append(Match(0, Node(), 0, cm.scope,
2663  nextTokenConstraints));
2664  if (!cm.nextTokenConstraints.expect.isEmpty())
2665  currentMatches.last().nextTokenConstraints.expect.append(
2667  if (!cm.nextTokenConstraints.taboo.isEmpty())
2668  currentMatches.last().nextTokenConstraints.taboo.append(
2670  }
2671  }
2672  while (i.hasNext()) {
2673  // now update cati for the new i
2674  Cat cati = i.next();
2675  Cat effCati = effectiveCat(cati);
2676  if (isToken(effCati)) {
2677  bool nextTokenConstraintsPass = false;
2678  foreach (const Match &m, currentMatches)
2679  if (validateNextTokenConstraints(effCati, m.nextTokenConstraints)) {
2680  nextTokenConstraintsPass = true;
2681  break;
2682  }
2683  if (nextTokenConstraintsPass)
2684  result[cat].insert(effCati); // record nonterminal and token
2685  break; // tokens are not nullable
2686  }
2687  // now effCati is a nonterminal
2688  if (!mark.contains(effCati)) {
2689  foreach (const Match &m, currentMatches)
2690  /* If we have a P-constraint here, that constraint forces the
2691  category to be epsilon (because it must be identical to something
2692  already matched, which is epsilon). So don't predict inside it.
2693  */
2694  if (!m.scope.hasPConstraint(cati)) {
2695  int mcfgRuleno = -1;
2696  PseudoCatScope mcfgScope;
2697  if (m.scope.hasMcfgConstraint(cati)) {
2698  QPair<int, PseudoCatScope> mcfgConstraint
2699  = m.scope.mcfgConstraint(cati);
2700  mcfgRuleno = mcfgConstraint.first;
2701  mcfgScope = mcfgConstraint.second;
2702  }
2703  expandNonterminalPredictionRecurseC(effCati, result, mark,
2704  mcfgRuleno, mcfgScope,
2706  }
2707  }
2708  if (!nullable.contains(effCati)) break;
2709  int s = currentMatches.size();
2710  for (int j=0; j<s; ) {
2711  Match &m = currentMatches[j];
2712  /* FIXME: We discard the parse trees here. Would it be more efficient
2713  to use a variant of matchToEpsilon which doesn't build them?
2714  OTOH, this way, we can reuse the matchToEpsilon cache. */
2715  const QList<Match> componentMatches = matchToEpsilon(effCati,
2716  m.scope);
2717  if (componentMatches.isEmpty()) {
2718  if (j) currentMatches.swap(0, j);
2719  currentMatches.removeFirst();
2720  s--;
2721  } else {
2722  int cs = componentMatches.size();
2723  /* If we have one version without context-sensitive constraints, the
2724  others are redundant. */
2725  bool haveUnconstrained = false;
2726  for (int k=0; k<cs; k++) {
2727  const Match &cm = componentMatches.at(k);
2728  if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
2729  && cm.nextTokenConstraints.taboo.isEmpty()) {
2730  haveUnconstrained = true;
2731  break;
2732  }
2733  }
2734  if (!haveUnconstrained) {
2735  for (int k=1; k<cs; k++) {
2736  const Match &cm = componentMatches.at(k);
2737  currentMatches.append(Match(0, Node(), 0, cm.scope,
2739  if (!cm.nextTokenConstraints.expect.isEmpty())
2740  currentMatches.last().nextTokenConstraints.expect.append(
2742  if (!cm.nextTokenConstraints.taboo.isEmpty())
2743  currentMatches.last().nextTokenConstraints.taboo.append(
2745  }
2746  const Match &cm = componentMatches.first();
2747  m.scope = cm.scope;
2748  if (!cm.nextTokenConstraints.expect.isEmpty())
2749  m.nextTokenConstraints.expect.append(
2751  if (!cm.nextTokenConstraints.taboo.isEmpty())
2752  m.nextTokenConstraints.taboo.append(
2754  }
2755  j++;
2756  }
2757  }
2758  if (currentMatches.isEmpty())
2759  break; // We didn't match epsilon, so we're done with this rule.
2760  }
2761  }
2762  }
2763 }
2764 
2768 
2775 {
2776  QHash<Cat, QSet<Cat> > result;
2777  if (isToken(cat))
2778  qWarning("trying to expand terminal prediction");
2779  else
2780  expandNonterminalPredictionRecurseC(cat, result, QSet<Cat>(), -1,
2781  PseudoCatScope(),
2783  return result;
2784 }
2785 
2788 
2807  const
2808 {
2809  MultiPredictions predictMulti;
2810  foreach (const StackItem &stack, stacks) {
2811  if (stack.type()) { // type 6 item
2812  /* We will define the literal as the full string of the P constraint here,
2813  which is very easy to obtain. This might not strictly be a literal in
2814  the sense of the definition, since it could be obtained from a whole
2815  tree of nonterminals, but it definitely always contains the literal (in
2816  the strict sense) containing the predicted category. */
2817  const StackItemType6 *data
2818  = static_cast<const StackItemType6 *>(stack.data());
2819  QList<Node> leaves = data->leaves();
2820  int i = data->i();
2821  QList<Cat> prediction, literal;
2822  int l = leaves.size();
2823  for (int j=i; j<l; j++)
2824  prediction.append(leaves.at(j).cat);
2825  literal = prediction;
2826  for (int j=i-1; j>=0; j--)
2827  literal.prepend(leaves.at(j).cat);
2828  MultiPrediction multiPrediction(literal, data->tree().cat);
2829  if (!predictMulti.contains(prediction, multiPrediction))
2830  predictMulti.insert(prediction, multiPrediction);
2831  } else { // type 0 item
2832  const StackItemType0 *data
2833  = static_cast<const StackItemType0 *>(stack.data());
2834  Cat cat = data->effCat();
2835  if (isToken(cat)) {
2836  const QList<StackItem> &parents = data->parents();
2837  if (parents.isEmpty()) { // huh, start category is a terminal?
2838  QList<Cat> list;
2839  list << cat;
2840  MultiPrediction multiPrediction(list, cat);
2841  if (!predictMulti.contains(list, multiPrediction))
2842  predictMulti.insert(list, multiPrediction);
2843  } else {
2844  foreach (const StackItem &parent, parents) {
2845  const StackItemType3 *parentData
2846  = static_cast<const StackItemType3 *>(parent.data());
2847  Rule rule = parentData->rule();
2848  int i = parentData->i();
2849  int len = 1;
2850  while (i+len < rule.size() && isToken(rule.at(i+len))) len++;
2851  QList<Cat> literal = rule.mid(i, len);
2852  while (i > 0 && isToken(rule.at(i-1))) i--, len++;
2853  MultiPrediction multiPrediction(rule.mid(i, len),
2854  parentData->tree().cat);
2855  if (!predictMulti.contains(literal, multiPrediction))
2856  predictMulti.insert(literal, multiPrediction);
2857  }
2858  }
2859  } else {
2860  QList<Cat> list;
2861  list << cat;
2862  MultiPrediction multiPrediction(list, cat);
2863  if (!predictMulti.contains(list, multiPrediction))
2864  predictMulti.insert(list, multiPrediction);
2865  }
2866  }
2867  }
2868  return predictMulti;
2869 }
2870 
2872 void Parser::expandNonterminalPredictionMultiRecurse(CatArg cat,
2873  QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> &mark) const
2874 {
2875  mark.insert(cat);
2876  foreach (const Rule &rule, rules.value(cat)) {
2877  int l = rule.size();
2878  /* For each nonempty rule (empty rules are not interesting for prediction),
2879  we handle at least the first item in the rule as a prediction: if it's a
2880  token, the literal starting with that token is our prediction and the
2881  corresponding nonterminal is this category; if it's a nonterminal, we
2882  need to process the prediction recursively, unless it is already marked,
2883  indicating direct or indirect left recursion (and allowing us to stop) or
2884  the repetition of an already tried possibility (also allowing us to stop,
2885  since we do not care about parse trees here, only the last category).
2886  Then, as long as the first i categories are nullable and the (i+1)th
2887  category exists, we repeat the process for the (i+1)th category. */
2888  for (int i = 0; i < l; i++) {
2889  Cat cati = effectiveCat(rule.at(i));
2890  if (isToken(cati)) {
2891  int len = 1;
2892  while (i+len < l && isToken(rule.at(i+len))) len++;
2893  result[cat].insert(rule.mid(i, len)); // record nonterminal and literal
2894  break; // tokens are not nullable
2895  }
2896  // now cati is a nonterminal
2897  if (!mark.contains(cati))
2898  expandNonterminalPredictionMultiRecurse(cati, result, mark);
2899  if (!nullable.contains(cati)) break;
2900  }
2901  }
2902 }
2903 
2907 
2917 QHash<Cat, QSet<QList<Cat> > >
2919 {
2920  QHash<Cat, QSet<QList<Cat> > > result;
2921  QSet<Cat> mark;
2922  if (isToken(cat))
2923  qWarning("trying to expand terminal prediction");
2924  else
2925  expandNonterminalPredictionMultiRecurse(cat, result, mark);
2926  return result;
2927 }
2928 
2930 void Parser::expandNonterminalPredictionMultiRecurseC(CatArg cat,
2931  QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> mark, int ruleno,
2932  const PseudoCatScope &scope, const NextTokenConstraints &nextTokenConstraints)
2933 {
2934  mark.insert(cat);
2935  QList<Rule> ruleList = rules.value(cat);
2936  int firstRule = ruleno >= 0 ? ruleno : 0;
2937  int lastRule = ruleno >= 0 ? ruleno : ruleList.size() - 1;
2938  for (int currRule = firstRule; currRule <= lastRule; currRule++) {
2939  const Rule &rule = ruleList.at(currRule);
2940  int l = rule.size();
2941  /* For each nonempty rule (empty rules are not interesting for prediction),
2942  we handle at least the first item in the rule as a prediction: if it's a
2943  token, the literal starting with that token is our prediction and the
2944  corresponding nonterminal is this category; if it's a nonterminal, we
2945  need to process the prediction recursively, unless it is already marked,
2946  indicating direct or indirect left recursion (and allowing us to stop).
2947  Then, as long as the first i categories are nullable and the (i+1)th
2948  category exists, we repeat the process for the (i+1)th category.
2949  We optimize the common case of no epsilons to skip because we do not have
2950  to consider context-sensitive constraints in that case. */
2951  Cat cat1 = Cat();
2952  if (l) {
2953  cat1 = rule.first();
2954  Cat effCat1 = effectiveCat(cat1);
2955  if (isToken(effCat1)) {
2956  if (validateNextTokenConstraints(effCat1, nextTokenConstraints)) {
2957  int len = 1;
2958  while (len < l && isToken(rule.at(len))) len++;
2959  // record nonterminal and literal
2960  result[cat].insert(rule.mid(0, len));
2961  }
2962  continue; // tokens are not nullable
2963  }
2964  // now effCat1 is a nonterminal
2965  /* If we have a P-constraint here, that constraint forces the category to
2966  be epsilon (because it must be identical to something already matched,
2967  which is epsilon). So don't predict inside it. */
2968  if (!mark.contains(effCat1)
2969  && !scope.hasPConstraint(cat1)) {
2970  int mcfgRuleno = -1;
2971  PseudoCatScope mcfgScope;
2972  if (scope.hasMcfgConstraint(cat1)) {
2973  QPair<int, PseudoCatScope> mcfgConstraint
2974  = scope.mcfgConstraint(cat1);
2975  mcfgRuleno = mcfgConstraint.first;
2976  mcfgScope = mcfgConstraint.second;
2977  }
2978  expandNonterminalPredictionMultiRecurseC(effCat1, result, mark,
2979  mcfgRuleno, mcfgScope,
2980  nextTokenConstraints);
2981  }
2982  if (!nullable.contains(effCat1)) continue;
2983  }
2984  {
2985  QList<Match> currentMatches;
2986  // match cat(i-1) to epsilon first
2987  /* FIXME: We discard the parse trees here. Would it be more efficient
2988  to use a variant of matchToEpsilon which doesn't build them?
2989  OTOH, this way, we can reuse the matchToEpsilon cache. */
2990  const QList<Match> componentMatches = matchToEpsilon(cat1, scope);
2991  if (componentMatches.isEmpty())
2992  continue; // We didn't match epsilon, so we're done with this rule.
2993  /* If we have one version without context-sensitive constraints, the
2994  others are redundant. */
2995  bool haveUnconstrained = false;
2996  foreach (const Match &cm, componentMatches) {
2997  if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
2998  && cm.nextTokenConstraints.taboo.isEmpty()) {
2999  haveUnconstrained = true;
3000  break;
3001  }
3002  }
3003  if (haveUnconstrained)
3004  currentMatches.append(Match(0, Node(), 0, scope, nextTokenConstraints));
3005  else {
3006  foreach (const Match &cm, componentMatches) {
3007  currentMatches.append(Match(0, Node(), 0, cm.scope,
3008  nextTokenConstraints));
3009  if (!cm.nextTokenConstraints.expect.isEmpty())
3010  currentMatches.last().nextTokenConstraints.expect.append(
3012  if (!cm.nextTokenConstraints.taboo.isEmpty())
3013  currentMatches.last().nextTokenConstraints.taboo.append(
3015  }
3016  }
3017  for (int i = 1; i < l; i++) {
3018  // now update cati for the new i
3019  Cat cati = rule.at(i);
3020  Cat effCati = effectiveCat(cati);
3021  if (isToken(effCati)) {
3022  bool nextTokenConstraintsPass = false;
3023  foreach (const Match &m, currentMatches)
3024  if (validateNextTokenConstraints(effCati, m.nextTokenConstraints)) {
3025  nextTokenConstraintsPass = true;
3026  break;
3027  }
3028  if (nextTokenConstraintsPass) {
3029  int len = 1;
3030  while (i+len < l && isToken(rule.at(i+len))) len++;
3031  // record nonterminal and literal
3032  result[cat].insert(rule.mid(i, len));
3033  }
3034  break; // tokens are not nullable
3035  }
3036  // now effCati is a nonterminal
3037  if (!mark.contains(effCati)) {
3038  foreach (const Match &m, currentMatches)
3039  /* If we have a P-constraint here, that constraint forces the
3040  category to be epsilon (because it must be identical to something
3041  already matched, which is epsilon). So don't predict inside it.
3042  */
3043  if (!m.scope.hasPConstraint(cati)) {
3044  int mcfgRuleno = -1;
3045  PseudoCatScope mcfgScope;
3046  if (m.scope.hasMcfgConstraint(cati)) {
3047  QPair<int, PseudoCatScope> mcfgConstraint
3048  = m.scope.mcfgConstraint(cati);
3049  mcfgRuleno = mcfgConstraint.first;
3050  mcfgScope = mcfgConstraint.second;
3051  }
3052  expandNonterminalPredictionMultiRecurseC(effCati, result, mark,
3053  mcfgRuleno, mcfgScope,
3055  }
3056  }
3057  if (!nullable.contains(effCati)) break;
3058  int s = currentMatches.size();
3059  for (int j=0; j<s; ) {
3060  Match &m = currentMatches[j];
3061  /* FIXME: We discard the parse trees here. Would it be more efficient
3062  to use a variant of matchToEpsilon which doesn't build them?
3063  OTOH, this way, we can reuse the matchToEpsilon cache. */
3064  const QList<Match> componentMatches = matchToEpsilon(effCati,
3065  m.scope);
3066  if (componentMatches.isEmpty()) {
3067  if (j) currentMatches.swap(0, j);
3068  currentMatches.removeFirst();
3069  s--;
3070  } else {
3071  int cs = componentMatches.size();
3072  /* If we have one version without context-sensitive constraints, the
3073  others are redundant. */
3074  bool haveUnconstrained = false;
3075  for (int k=0; k<cs; k++) {
3076  const Match &cm = componentMatches.at(k);
3077  if (cm.scope.isNull() && cm.nextTokenConstraints.expect.isEmpty()
3078  && cm.nextTokenConstraints.taboo.isEmpty()) {
3079  haveUnconstrained = true;
3080  break;
3081  }
3082  }
3083  if (!haveUnconstrained) {
3084  for (int k=1; k<cs; k++) {
3085  const Match &cm = componentMatches.at(k);
3086  currentMatches.append(Match(0, Node(), 0, cm.scope,
3088  if (!cm.nextTokenConstraints.expect.isEmpty())
3089  currentMatches.last().nextTokenConstraints.expect.append(
3091  if (!cm.nextTokenConstraints.taboo.isEmpty())
3092  currentMatches.last().nextTokenConstraints.taboo.append(
3094  }
3095  const Match &cm = componentMatches.first();
3096  m.scope = cm.scope;
3097  if (!cm.nextTokenConstraints.expect.isEmpty())
3098  m.nextTokenConstraints.expect.append(
3100  if (!cm.nextTokenConstraints.taboo.isEmpty())
3101  m.nextTokenConstraints.taboo.append(
3103  }
3104  j++;
3105  }
3106  }
3107  if (currentMatches.isEmpty())
3108  break; // We didn't match epsilon, so we're done with this rule.
3109  }
3110  }
3111  }
3112 }
3113 
3117 
3123 QHash<Cat, QSet<QList<Cat> > >
3125 {
3126  QHash<Cat, QSet<QList<Cat> > > result;
3127  if (isToken(cat))
3128  qWarning("trying to expand terminal prediction");
3129  else
3130  expandNonterminalPredictionMultiRecurseC(cat, result, QSet<Cat>(), -1,
3131  PseudoCatScope(),
3133  return result;
3134 }
3135 
3138 
3148  const QList<StackItem> &stacks) const
3149 {
3150  ConstrainedPredictions predict;
3151  foreach (const StackItem &stack, stacks) {
3152  if (stack.type()) { // type 6 item
3153  const StackItemType6 *data
3154  = static_cast<const StackItemType6 *>(stack.data());
3155  CatArg cat = data->leaves().at(data->i()).cat;
3156  NextTokenConstraints nextTokenConstraints;
3157  if (!predict.contains(cat, nextTokenConstraints))
3158  predict.insert(cat, nextTokenConstraints);
3159  } else { // type 0 item
3160  const StackItemType0 *data
3161  = static_cast<const StackItemType0 *>(stack.data());
3162  Cat cat = data->effCat();
3163  const QList<StackItem> &parents = data->parents();
3164  if (parents.isEmpty()) { // start category, no constraints
3165  NextTokenConstraints nextTokenConstraints;
3166  if (!predict.contains(cat, nextTokenConstraints))
3167  predict.insert(cat, nextTokenConstraints);
3168  } else {
3169  if (isToken(cat)) {
3170  // validate the next token constraints immediately for tokens
3171  bool nextTokenConstraintsPass = false;
3172  foreach (const StackItem &parent, parents) {
3173  const StackItemType3 *parentData
3174  = static_cast<const StackItemType3 *>(parent.data());
3175  NextTokenConstraints nextTokenConstraints
3176  = parentData->nextTokenConstraints();
3177  if (validateNextTokenConstraints(cat, nextTokenConstraints)) {
3178  nextTokenConstraintsPass = true;
3179  break;
3180  }
3181  }
3182  if (nextTokenConstraintsPass) {
3183  NextTokenConstraints nextTokenConstraints;
3184  if (!predict.contains(cat, nextTokenConstraints))
3185  predict.insert(cat, nextTokenConstraints);
3186  }
3187  } else {
3188  foreach (const StackItem &parent, parents) {
3189  const StackItemType3 *parentData
3190  = static_cast<const StackItemType3 *>(parent.data());
3191  NextTokenConstraints nextTokenConstraints
3192  = parentData->nextTokenConstraints();
3193  if (!predict.contains(cat, nextTokenConstraints))
3194  predict.insert(cat, nextTokenConstraints);
3195  }
3196  }
3197  }
3198  }
3199  }
3200  return predict;
3201 }
3202 
3206 
3216  const NextTokenConstraints &nextTokenConstraints)
3217 {
3218  QHash<Cat, QSet<Cat> > result;
3219  if (isToken(cat))
3220  qWarning("trying to expand terminal prediction");
3221  else
3222  expandNonterminalPredictionRecurseC(cat, result, QSet<Cat>(), -1,
3223  PseudoCatScope(),
3224  nextTokenConstraints);
3225  return result;
3226 }
3227 
3231 
3243  const QList<NextTokenConstraints> &nextTokenConstraintsList)
3244 {
3245  switch (nextTokenConstraintsList.size()) {
3246  case 0: // An empty disjunctive list is false, probably not what you want!
3247  qWarning("list of next token constraints is empty");
3248  return QHash<Cat, QSet<Cat> >();
3249  case 1: // common case, handle the next token constraints directly
3250  return expandNonterminalPredictionC(cat,
3251  nextTokenConstraintsList.first());
3252  default: // general case, predict first, filter later
3253  {
3254  QHash<Cat, QSet<Cat> > result = expandNonterminalPredictionC(cat);
3255  QHash<Cat, QSet<Cat> > filteredResult;
3256  QHashIterator<Cat, QSet<Cat> > it(result);
3257  while (it.hasNext()) {
3258  it.next();
3259  foreach (CatArg cati, it.value()) {
3260  bool nextTokenConstraintsPass = false;
3261  foreach (const NextTokenConstraints &nextTokenConstraints,
3262  nextTokenConstraintsList) {
3263  if (validateNextTokenConstraints(cati, nextTokenConstraints)) {
3264  nextTokenConstraintsPass = true;
3265  break;
3266  }
3267  }
3268  if (nextTokenConstraintsPass)
3269  filteredResult[it.key()].insert(cati);
3270  }
3271  }
3272  return filteredResult;
3273  }
3274  }
3275 }
3276 
3279 
3300  const QList<StackItem> &stacks) const
3301 {
3302  ConstrainedMultiPredictions predictMulti;
3303  foreach (const StackItem &stack, stacks) {
3304  if (stack.type()) { // type 6 item
3305  /* We will define the literal as the full string of the P constraint here,
3306  which is very easy to obtain. This might not strictly be a literal in
3307  the sense of the definition, since it could be obtained from a whole
3308  tree of nonterminals, but it definitely always contains the literal (in
3309  the strict sense) containing the predicted category. */
3310  const StackItemType6 *data
3311  = static_cast<const StackItemType6 *>(stack.data());
3312  QList<Node> leaves = data->leaves();
3313  int i = data->i();
3314  QList<Cat> prediction, literal;
3315  int l = leaves.size();
3316  for (int j=i; j<l; j++)
3317  prediction.append(leaves.at(j).cat);
3318  literal = prediction;
3319  for (int j=i-1; j>=0; j--)
3320  literal.prepend(leaves.at(j).cat);
3321  ConstrainedMultiPrediction multiPrediction(literal, data->tree().cat);
3322  if (!predictMulti.contains(prediction, multiPrediction))
3323  predictMulti.insert(prediction, multiPrediction);
3324  } else { // type 0 item
3325  const StackItemType0 *data
3326  = static_cast<const StackItemType0 *>(stack.data());
3327  Cat cat = data->effCat();
3328  const QList<StackItem> &parents = data->parents();
3329  if (isToken(cat)) {
3330  if (parents.isEmpty()) { // huh, start category is a terminal?
3331  QList<Cat> list;
3332  list << cat;
3333  ConstrainedMultiPrediction multiPrediction(list, cat);
3334  if (!predictMulti.contains(list, multiPrediction))
3335  predictMulti.insert(list, multiPrediction);
3336  } else {
3337  foreach (const StackItem &parent, parents) {
3338  const StackItemType3 *parentData
3339  = static_cast<const StackItemType3 *>(parent.data());
3340  // validate the next token constraints immediately for tokens
3341  NextTokenConstraints nextTokenConstraints
3342  = parentData->nextTokenConstraints();
3343  if (!validateNextTokenConstraints(cat, nextTokenConstraints))
3344  continue;
3345  Rule rule = parentData->rule();
3346  int i = parentData->i();
3347  int len = 1;
3348  while (i+len < rule.size() && isToken(rule.at(i+len))) len++;
3349  QList<Cat> literal = rule.mid(i, len);
3350  while (i > 0 && isToken(rule.at(i-1))) i--, len++;
3351  ConstrainedMultiPrediction multiPrediction(rule.mid(i, len),
3352  parentData->tree().cat);
3353  if (!predictMulti.contains(literal, multiPrediction))
3354  predictMulti.insert(literal, multiPrediction);
3355  }
3356  }
3357  } else {
3358  if (parents.isEmpty()) { // start category, no constraints
3359  QList<Cat> list;
3360  list << cat;
3361  ConstrainedMultiPrediction multiPrediction(list, cat);
3362  if (!predictMulti.contains(list, multiPrediction))
3363  predictMulti.insert(list, multiPrediction);
3364  } else {
3365  foreach (const StackItem &parent, parents) {
3366  const StackItemType3 *parentData
3367  = static_cast<const StackItemType3 *>(parent.data());
3368  NextTokenConstraints nextTokenConstraints
3369  = parentData->nextTokenConstraints();
3370  QList<Cat> list;
3371  list << cat;
3372  ConstrainedMultiPrediction multiPrediction(list, cat,
3373  nextTokenConstraints);
3374  if (!predictMulti.contains(list, multiPrediction))
3375  predictMulti.insert(list, multiPrediction);
3376  }
3377  }
3378  }
3379  }
3380  }
3381  return predictMulti;
3382 }
3383 
3387 
3396 QHash<Cat, QSet<QList<Cat> > >
3398  const NextTokenConstraints &nextTokenConstraints)
3399 {
3400  QHash<Cat, QSet<QList<Cat> > > result;
3401  if (isToken(cat))
3402  qWarning("trying to expand terminal prediction");
3403  else
3404  expandNonterminalPredictionMultiRecurseC(cat, result, QSet<Cat>(), -1,
3405  PseudoCatScope(),
3406  nextTokenConstraints);
3407  return result;
3408 }
3409 
3413 
3424 QHash<Cat, QSet<QList<Cat> > >
3426  const QList<NextTokenConstraints> &nextTokenConstraintsList)
3427 {
3428  switch (nextTokenConstraintsList.size()) {
3429  case 0: // An empty disjunctive list is false, probably not what you want!
3430  qWarning("list of next token constraints is empty");
3431  return QHash<Cat, QSet<QList<Cat> > >();
3432  case 1: // common case, handle the next token constraints directly
3434  nextTokenConstraintsList.first());
3435  default: // general case, predict first, filter later
3436  {
3437  QHash<Cat, QSet<QList<Cat> > > result
3439  QHash<Cat, QSet<QList<Cat> > > filteredResult;
3440  QHashIterator<Cat, QSet<QList<Cat> > > it(result);
3441  while (it.hasNext()) {
3442  it.next();
3443  foreach (const QList<Cat> &literal, it.value()) {
3444  CatArg cati = literal.first();
3445  bool nextTokenConstraintsPass = false;
3446  foreach (const NextTokenConstraints &nextTokenConstraints,
3447  nextTokenConstraintsList) {
3448  if (validateNextTokenConstraints(cati, nextTokenConstraints)) {
3449  nextTokenConstraintsPass = true;
3450  break;
3451  }
3452  }
3453  if (nextTokenConstraintsPass)
3454  filteredResult[it.key()].insert(literal);
3455  }
3456  }
3457  return filteredResult;
3458  }
3459  }
3460 }
3461 
3462 } // end namespace
const StackItem & parent() const
Definition: dyngenpar.h:725
PseudoCatScope scope() const
Definition: dyngenpar.h:729
ConstrainedMultiPredictions computeConstrainedMultiPredictions(const QList< StackItem > &stacks) const
compute a set of multi-token predictions from the stacks produced by an incremental parse ...
Definition: dyngenpar.cpp:3299
QHash< Cat, QPair< Cat, int > > componentCats
maps categories which represent components of a multi-component category to the category and componen...
Definition: dyngenpar.h:1294
const StackItem & parent() const
Definition: dyngenpar.h:669
PseudoCatScope scope() const
Definition: dyngenpar.h:551
type 6 item: during match, we&#39;re matching a P constraint
Definition: dyngenpar.h:708
virtual LexerState saveState()
saves the current lexer state, by default a null LexerState
Definition: dyngenpar.h:881
QPair< QPair< Node, NextTokenConstraints >, int > pConstraint(CatArg cat) const
Definition: dyngenpar.h:372
MultiPredictions computeMultiPredictions(const QList< StackItem > &stacks) const
compute a set of multi-token predictions from the stacks produced by an incremental parse ...
Definition: dyngenpar.cpp:2806
const QList< StackItem > & parents() const
Definition: dyngenpar.h:583
node in the parse tree
Definition: dyngenpar.h:320
QHash< Cat, QSet< Cat > > expandNonterminalPrediction(CatArg cat) const
expand a nonterminal prediction to the possible initial tokens and the nonterminals they immediately ...
Definition: dyngenpar.cpp:2575
data passed to parser actions
Definition: dyngenpar.h:415
TokenSet tokens
tokens
Definition: dyngenpar.h:1260
uint qHash(const NextTokenConstraints &nextTokenConstraints)
simple hash function for next token constraints
Definition: dyngenpar.cpp:452
bool hasPConstraint(CatArg cat) const
Definition: dyngenpar.h:366
virtual int lookaheadTokens() const
the number of tokens to look ahead before deciding to execute the action
Definition: dyngenpar.h:447
NextTokenConstraints nextTokenConstraints
Definition: dyngenpar.h:409
QList< Cat > expect
list of context-free categories the next token MUST match
Definition: dyngenpar.h:95
QString Cat
Category type: string or integer depending on DYNGENPAR_INTEGER_CATEGORIES.
Definition: dyngenpar.h:71
QHash< Cat, QPair< QPair< Node, NextTokenConstraints >, int > > & pConstraints()
Definition: dyngenpar.h:358
NextTokenConstraints nextTokenConstraints() const
Definition: dyngenpar.h:730
type 0 item: during match, we&#39;re waiting for a token to shift
Definition: dyngenpar.h:520
term in the expression of a component of a PMCFG function
Definition: dyngenpar.h:955
PseudoCatScope scope
Definition: dyngenpar.h:408
component of a PMCFG function, a sequence of terms
Definition: dyngenpar.h:976
const QList< StackItem > & parents() const
Definition: dyngenpar.h:547
(possibly partial) match
Definition: dyngenpar.h:397
#define IS_EPSILON(cat)
Definition: dyngenpar.h:72
QList< Match > parse(int *errorPos=0, Cat *errorToken=0, int *incrementalPos=0, QList< StackItem > *incrementalStacks=0, QList< Match > *incrementalMatches=0, LexerState *lexerState=0)
parse the input string
Definition: dyngenpar.cpp:2437
RuleSet cfRules
optional context-free rules
Definition: dyngenpar.h:1088
QHash< Cat, QSet< Cat > > expandNonterminalPredictionC(CatArg cat)
expand a nonterminal prediction to the possible initial tokens and the nonterminals they immediately ...
Definition: dyngenpar.cpp:2774
Node parseTree()
get the parse tree for the last shifted token
Definition: dyngenpar.h:834
QHash< Cat, QPair< int, PseudoCatScope > > & mcfgConstraints()
Definition: dyngenpar.h:362
multi-token predictions
Definition: dyngenpar.h:217
const StackItem & parent() const
Definition: dyngenpar.h:697
TokenSet tokens
set of true tokens
Definition: dyngenpar.h:1076
QVariant label() const
Definition: dyngenpar.h:140
void addRule(CatArg cat, const Rule &rule)
adds a new rule to the grammar, updates the nullable categories and the initial graph and clears the ...
Definition: dyngenpar.cpp:689
QHash< Cat, QList< Cat > > catComponents
maps multi-component categories to the list of their components
Definition: dyngenpar.h:1300
static bool serializeActions
whether the operator<<(QDataStream &, const Rule &) should serialize actions
Definition: dyngenpar.h:132
Function lookupFunction(const QVariant &id) const
Definition: dyngenpar.h:1101
virtual void execute(const ActionInfo &info)=0
Cat startCat
start category
Definition: dyngenpar.h:1079
type 3 item: during matchRemaining, we&#39;re executing a match
Definition: dyngenpar.h:616
uint qHash(const QList< DynGenPar::Cat > &list)
simple hash function for lists of categories
Definition: dyngenpar.cpp:437
attached to the parse trees as rule labels to allow obtaining syntax trees
Definition: dyngenpar.h:1113
bool isToken(CatArg cat) const
Definition: dyngenpar.h:1164
type 1 item: during type 0 item processing, we&#39;re executing a reduce
Definition: dyngenpar.h:561
bool hasMcfgConstraint(CatArg cat) const
Definition: dyngenpar.h:369
QList< Cat > taboo
list of context-free categories the next token MUST NOT match
Definition: dyngenpar.h:103
virtual bool rewindTo(int pos, const LexerState &=LexerState())
rewind to an older position (requires buffering)
Definition: dyngenpar.h:862
QHash< Cat, QSet< QList< Cat > > > expandNonterminalPredictionMulti(CatArg cat) const
expand a nonterminal prediction to the possible initial nonempty literals (strings of one or more tok...
Definition: dyngenpar.cpp:2918
PseudoCatScope scope() const
Definition: dyngenpar.h:586
TokenSource * inputSource
input source
Definition: dyngenpar.h:1305
Action * action
Definition: dyngenpar.h:143
QSet< Cat > Predictions
Definition: dyngenpar.h:214
bool isLiteral(const QList< Cat > &list) const
is a given list of categories a literal?
Definition: dyngenpar.cpp:553
QMultiHash< QList< Cat >, ConstrainedMultiPrediction > ConstrainedMultiPredictions
Definition: dyngenpar.h:258
QMultiHash< Cat, NextTokenConstraints > ConstrainedPredictions
Definition: dyngenpar.h:233
QHash< Cat, QSet< QList< Cat > > > expandNonterminalPredictionMultiC(CatArg cat)
expand a nonterminal prediction to the possible initial nonempty literals (strings of one or more tok...
Definition: dyngenpar.cpp:3124
RuleSet rules
set of PMCFG rules
Definition: dyngenpar.h:1073
int currentPosition()
get the current input position
Definition: dyngenpar.h:849
Cat startCat
start category
Definition: dyngenpar.h:1262
int ruleno
used for PMCFGs
Definition: dyngenpar.h:406
int type() const
Definition: dyngenpar.h:506
bool loadPmcfg(const Pmcfg &pmcfg)
loads a PMCFG in standard form, converting it to the internal representation
Definition: dyngenpar.cpp:932
QMultiHash< QList< Cat >, MultiPrediction > MultiPredictions
Definition: dyngenpar.h:232
type 4 item: during reduce, we&#39;re executing a matchRemaining
Definition: dyngenpar.h:655
RuleSet rules
grammar rules
Definition: dyngenpar.h:1258
multi-token predictions with next token constraints
Definition: dyngenpar.h:236
type 2 item: during reduce, we&#39;re recursively executing another reduce
Definition: dyngenpar.h:596
const StackItem & parent() const
Definition: dyngenpar.h:609
const Cat & CatArg
Category type (string or integer) when passed as an argument.
Definition: dyngenpar.h:83
virtual bool matchParseTree(const Node &treeToMatch)
match the parse tree for the last shifted token against the given tree
Definition: dyngenpar.h:845
Predictions computePredictions(const QList< StackItem > &stacks) const
compute a set of predictions from the stacks produced by an incremental parse
Definition: dyngenpar.cpp:2515
bool isComponent() const
Definition: dyngenpar.h:965
QHash< Cat, QPair< Cat, QList< Cat > > > pseudoCats
pseudo-categories, used to represent PMCFGs internally
Definition: dyngenpar.h:1288
void setLabel(const QVariant &label)
Definition: dyngenpar.h:288
ConstrainedPredictions computeConstrainedPredictions(const QList< StackItem > &stacks) const
compute a set of predictions from the stacks produced by an incremental parse
Definition: dyngenpar.cpp:3147
QList< Node > leaves() const
Definition: dyngenpar.h:726
void setParents(const QList< StackItem > &parents)
Definition: dyngenpar.h:509
int ruleno
needed for PMCFGs (to match components of rules to each other)
Definition: dyngenpar.h:272
PMCFG function.
Definition: dyngenpar.h:1015
QVariant label() const
Definition: dyngenpar.h:287
void initCaches()
clears all caches, then computes the nullable categories and the initial graph
Definition: dyngenpar.cpp:578
PseudoCatScope scope() const
Definition: dyngenpar.h:699
bool isToken() const
Definition: dyngenpar.h:966
QVector< QVector< int > > argPositions
Definition: dyngenpar.h:1118
full rule as found in the initial graph
Definition: dyngenpar.h:261
const StackItemData * data() const
Definition: dyngenpar.h:510
static bool serializeLabels
whether the operator<<(QDataStream &, const Rule &) should serialize labels
Definition: dyngenpar.h:131
QVariant data
Definition: dyngenpar.h:327
NextTokenConstraints nextTokenConstraints
Definition: dyngenpar.h:142
type 5 item: during match (of an MCFG constraint), we&#39;re executing a matchRemaining ...
Definition: dyngenpar.h:682
rule constraints affecting the next token, for scannerless parsing
Definition: dyngenpar.h:87
QList< Alternative > children
Definition: dyngenpar.h:328
Node parseTreeToPmcfgSyntaxTree(const Node &parseTree)
converts a parse tree obtained from a PMCFG to a PMCFG syntax tree
Definition: dyngenpar.cpp:539
QPair< int, PseudoCatScope > mcfgConstraint(CatArg cat) const
Definition: dyngenpar.h:377
Cat nextToken()
get the next token from the input, increment current position, save parse tree
Definition: dyngenpar.h:822
bool addPmcfgRule(Pmcfg &pmcfg, CatArg cat, const Rule &rule)
adds a new rule to the grammar (both the PMCFG and the internal representation), updates the nullable...
Definition: dyngenpar.cpp:974
const StackItem & parent() const
Definition: dyngenpar.h:633
NextTokenConstraints nextTokenConstraints
Definition: dyngenpar.h:978
NextTokenConstraints nextTokenConstraints() const
Definition: dyngenpar.h:640