|
DynGenPar
Dynamic Generalized Parser
|
00001 /* DynGenPar: Dynamic Generalized Parser 00002 Copyright (C) 2010-2012 Kevin Kofler <kevin.kofler@chello.at> 00003 00004 Support by the Austrian Science Fund FWF under contract numbers 00005 P20631 and P23554 is gratefully acknowledged. 00006 00007 This program is free software: you can redistribute it and/or modify 00008 it under the terms of the GNU General Public License as published by 00009 the Free Software Foundation, either version 2 of the License, or 00010 (at your option) any later version. 00011 00012 This program is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 GNU General Public License for more details. 00016 00017 You should have received a copy of the GNU General Public License 00018 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 00019 00020 // Don't use pragma once here, Qt Jambi doesn't support it. 00021 #ifndef DYNGENPAR_H 00022 #define DYNGENPAR_H 00023 00024 #include <QString> 00025 #include <QList> 00026 #include <QStringList> 00027 #include <QHash> 00028 #include <QMultiHash> 00029 #include <QSet> 00030 #include <QPair> 00031 #include <QQueue> 00032 #include <QSharedData> 00033 #include <QVariant> 00034 #include <QVector> 00035 00036 // Tell GCC that qFatal does not return. 00037 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4)) 00038 #define qFatal(x...) (qFatal(x), __builtin_unreachable()) 00039 #endif 00040 00041 namespace DynGenPar { 00042 00043 // --- general data structures (both context-free grammars and PMCFGs) --- 00044 00045 #ifdef DYNGENPAR_INTEGER_CATEGORIES 00046 typedef int Cat; 00047 #define IS_EPSILON(cat) (!(cat)) 00048 typedef int CatArg; 00049 #else 00050 00051 00057 typedef QString Cat; 00058 #define IS_EPSILON(cat) ((cat).isNull()) 00059 00060 00069 typedef const Cat &CatArg; 00070 #endif 00071 00073 template<class MultiHashType> 00074 QHash<typename MultiHashType::key_type, 00075 QList<typename MultiHashType::mapped_type> > 00076 multiHashToListHash(const MultiHashType &multiHash) 00077 { 00078 QHash<typename MultiHashType::key_type, 00079 QList<typename MultiHashType::mapped_type> > result; 00080 QHashIterator<typename MultiHashType::key_type, 00081 typename MultiHashType::mapped_type> it(multiHash); 00082 while (it.hasNext()) { 00083 it.next(); 00084 result[it.key()].append(it.value()); 00085 } 00086 return result; 00087 } 00088 00090 struct NextTokenConstraints { 00092 00098 QList<Cat> expect; 00100 00106 QList<Cat> taboo; 00108 bool operator==(const NextTokenConstraints &other) const { 00109 return expect == other.expect && taboo == other.taboo; 00110 } 00111 }; 00112 uint qHash(const NextTokenConstraints &nextTokenConstraints); 00113 00115 class Action; 00116 class Rule : public QList<Cat> { 00117 public: 00118 Rule() : QList<Cat>(), action(0) {} 00119 explicit Rule(const QVariant &label) 00120 : QList<Cat>(), action(0), lbl(label) {} 00121 explicit Rule(const QList<Cat> &list) 00122 : QList<Cat>(list), action(0), lbl() {} 00123 Rule(const QList<Cat> &list, const QVariant &label) 00124 : QList<Cat>(list), action(0), lbl(label) {} 00125 QVariant label() const {return lbl;} 00126 void setLabel(const QVariant &label) {lbl = label;} 00127 NextTokenConstraints nextTokenConstraints; 00128 Action *action; 00129 Rule &operator+=(const QList<Cat> &other) { 00130 QList<Cat>::operator+=(other); 00131 return *this; 00132 } 00133 Rule &operator+=(const Cat &value) { // QList always wants a const & 00134 QList<Cat>::operator+=(value); 00135 return *this; 00136 } 00137 Rule &operator<<(const QList<Cat> &other) { 00138 QList<Cat>::operator<<(other); 00139 return *this; 00140 } 00141 Rule &operator<<(const Cat &value) { // QList always wants a const & 00142 QList<Cat>::operator<<(value); 00143 return *this; 00144 } 00146 void add(const Cat &value) { // QList always wants a const & 00147 append(value); 00148 } 00150 QList<Cat> &toList() { return (QList<Cat> &) *this; } 00151 00152 private: 00153 QVariant lbl; 00154 }; 00155 00156 typedef QHash<Cat, QList<Rule> > RuleSet; 00157 typedef QSet<Cat> TokenSet; 00158 00160 00163 struct Cfg { 00165 Cfg() : rules(), tokens(), startCat() {} 00166 Cfg(const RuleSet &r, const TokenSet &t, CatArg sc) 00167 : rules(r), tokens(t), startCat(sc) {} 00168 RuleSet rules; 00169 TokenSet tokens; 00170 Cat startCat; 00171 bool isToken(CatArg cat) const {return tokens.contains(cat);} 00172 void addToken(CatArg cat) {tokens.insert(cat);} 00173 }; 00174 00175 typedef QSet<Cat> Predictions; 00176 00178 struct MultiPrediction { 00180 MultiPrediction() : fullLiteral(), cat() {} 00181 MultiPrediction(const QList<Cat> &fullLit, CatArg c) 00182 : fullLiteral(fullLit), cat(c) {} 00183 QList<Cat> fullLiteral; 00184 Cat cat; 00185 00187 bool operator==(const MultiPrediction &other) const { 00188 return fullLiteral == other.fullLiteral && cat == other.cat; 00189 } 00190 }; 00191 typedef QMultiHash<QList<Cat>, MultiPrediction> MultiPredictions; 00192 typedef QMultiHash<Cat, NextTokenConstraints> ConstrainedPredictions; 00193 00195 struct ConstrainedMultiPrediction { 00197 ConstrainedMultiPrediction() 00198 : fullLiteral(), cat(), nextTokenConstraints() {} 00199 ConstrainedMultiPrediction(const QList<Cat> &fullLit, CatArg c) 00200 : fullLiteral(fullLit), cat(c), nextTokenConstraints() {} 00201 ConstrainedMultiPrediction(const QList<Cat> &fullLit, CatArg c, 00202 NextTokenConstraints ntc) 00203 : fullLiteral(fullLit), cat(c), nextTokenConstraints(ntc) {} 00204 QList<Cat> fullLiteral; 00205 Cat cat; 00206 NextTokenConstraints nextTokenConstraints; 00207 00209 bool operator==(const ConstrainedMultiPrediction &other) const { 00210 return fullLiteral == other.fullLiteral && cat == other.cat 00211 && nextTokenConstraints == other.nextTokenConstraints; 00212 } 00213 }; 00214 typedef QMultiHash<QList<Cat>, ConstrainedMultiPrediction> 00215 ConstrainedMultiPredictions; 00216 00218 struct FullRule { 00220 FullRule() : cat(), rule(), epsilonsSkipped(0), ruleno(0) {} 00221 FullRule(CatArg c, const Rule &r, int epsSkipped, int n) 00222 : cat(c), rule(r), epsilonsSkipped(epsSkipped), ruleno(n) {} 00223 Cat cat; 00224 Rule rule; 00225 int epsilonsSkipped; 00227 00229 int ruleno; 00230 }; 00231 00233 struct Node; 00234 class Alternative : public QList<Node> { 00235 public: 00236 Alternative() : QList<DynGenPar::Node>() {} 00237 explicit Alternative(const QVariant &label) 00238 : QList<DynGenPar::Node>(), lbl(label) {} 00239 explicit Alternative(const QList<DynGenPar::Node> &list) 00240 : QList<DynGenPar::Node>(list), lbl() {} 00241 Alternative(const QList<DynGenPar::Node> &list, const QVariant &label) 00242 : QList<DynGenPar::Node>(list), lbl(label) {} 00243 QVariant label() const {return lbl;} 00244 void setLabel(const QVariant &label) {lbl = label;} 00245 Alternative &operator+=(const QList<DynGenPar::Node> &other) { 00246 QList<DynGenPar::Node>::operator+=(other); 00247 return *this; 00248 } 00249 Alternative &operator+=(const DynGenPar::Node &value) { 00250 QList<DynGenPar::Node>::operator+=(value); 00251 return *this; 00252 } 00253 Alternative &operator<<(const QList<DynGenPar::Node> &other) { 00254 QList<DynGenPar::Node>::operator<<(other); 00255 return *this; 00256 } 00257 Alternative &operator<<(const DynGenPar::Node &value) { 00258 QList<DynGenPar::Node>::operator<<(value); 00259 return *this; 00260 } 00262 void add(const DynGenPar::Node &value) { 00263 append(value); 00264 } 00266 QList<DynGenPar::Node> &toList() { 00267 return (QList<DynGenPar::Node> &) *this; 00268 } 00269 00270 private: 00271 QVariant lbl; 00272 }; 00273 00275 struct Node { 00276 Node() {} 00277 Node(CatArg c) : cat(c) {children.append(Alternative());} 00278 Node(CatArg c, const QVariant &d) : cat(c), data(d) { 00279 children.append(Alternative()); 00280 } 00281 Cat cat; 00282 QVariant data; 00283 QList<Alternative> children; 00285 bool operator==(const Node &other) const { 00286 return cat == other.cat && data == other.data && children == other.children; 00287 } 00288 }; 00289 00291 class PseudoCatScope; 00292 class PseudoCatScopeData : public QSharedData { 00293 public: 00295 00300 QHash<Cat, QPair<QPair<Node, NextTokenConstraints>, int> > pConstraints; 00302 00305 QHash<Cat, QPair<int, PseudoCatScope> > mcfgConstraints; 00306 }; 00307 class PseudoCatScope { 00308 public: 00309 PseudoCatScope() : d() {} 00310 QHash<Cat, QPair<QPair<Node, NextTokenConstraints>, int> > &pConstraints() { 00311 ensureNonNull(); 00312 return d->pConstraints; 00313 }; 00314 QHash<Cat, QPair<int, PseudoCatScope> > &mcfgConstraints() { 00315 ensureNonNull(); 00316 return d->mcfgConstraints; 00317 }; 00318 bool hasPConstraint(CatArg cat) const { 00319 return d ? d->pConstraints.contains(cat) : false; 00320 }; 00321 bool hasMcfgConstraint(CatArg cat) const { 00322 return d ? d->mcfgConstraints.contains(cat) : false; 00323 }; 00324 QPair<QPair<Node, NextTokenConstraints>, int> pConstraint(CatArg cat) const 00325 { 00326 return d ? d->pConstraints.value(cat) 00327 : QPair<QPair<Node, NextTokenConstraints>, int>(); 00328 }; 00329 QPair<int, PseudoCatScope> mcfgConstraint(CatArg cat) const { 00330 return d ? d->mcfgConstraints.value(cat) : QPair<int, PseudoCatScope>(); 00331 }; 00332 bool isNull() const { return !d.constData(); } 00334 const PseudoCatScopeData *data() const { return d.constData(); } 00335 bool operator==(const PseudoCatScope &other) const { return d == other.d; } 00336 private: 00337 QSharedDataPointer<PseudoCatScopeData> d; 00338 void ensureNonNull() { 00339 if (!d.constData()) d = new PseudoCatScopeData; 00340 } 00341 }; 00342 inline uint qHash(const PseudoCatScope &scope) { 00343 return qHash(scope.data()); 00344 } 00345 00347 struct Match { 00349 Match() : len(0), tree(), ruleno(0), scope(), nextTokenConstraints() {} 00350 Match(int l, Node t, int n, PseudoCatScope s) 00351 : len(l), tree(t), ruleno(n), scope(s), nextTokenConstraints() {} 00352 Match(int l, Node t, int n, PseudoCatScope s, const NextTokenConstraints &nt) 00353 : len(l), tree(t), ruleno(n), scope(s), nextTokenConstraints(nt) {} 00354 int len; 00355 Node tree; 00356 int ruleno; 00357 00358 PseudoCatScope scope; 00359 NextTokenConstraints nextTokenConstraints; 00360 }; 00361 00363 struct ActionInfo { 00365 ActionInfo() : tree() {} 00366 ActionInfo(const Node &t) : tree(t) {} 00367 Node tree; 00368 }; 00369 00371 class Action { 00372 public: 00373 virtual ~Action() {} 00374 virtual void execute(const ActionInfo &info) = 0; 00375 }; 00376 00377 00378 // --- data structures for DAG-structured parse stacks --- 00379 00381 class StackItem; 00382 class StackItemData : public QSharedData { 00383 public: 00384 virtual ~StackItemData() {} 00385 virtual StackItemData *clone() = 0; 00386 virtual int type() const = 0; 00387 virtual void addParent(const StackItem &parent) = 0; 00388 virtual void setParents(const QList<StackItem> &parents) = 0; 00389 }; 00390 00391 class UnifiedStackItemData : public StackItemData { 00392 public: 00393 UnifiedStackItemData() : usageCount(0) {} 00394 virtual ~UnifiedStackItemData() {} 00402 unsigned refUsage() const {return usageCount++;} 00403 unsigned derefUsage() const {return --usageCount;} 00405 private: 00407 mutable unsigned usageCount; 00408 }; 00409 00410 class StackItem { 00411 public: 00412 StackItem() : d() {} 00413 StackItem(const QList<StackItem> &parents, CatArg cat, CatArg effCat, 00414 int pos, const PseudoCatScope &scope); 00415 StackItem(const QList<StackItem> &parents, CatArg cat, CatArg effCat, 00416 const PseudoCatScope &scope); 00417 StackItem(const StackItem &parent, int dummy); 00418 StackItem(const StackItem &parent, const Rule &rule, int len, int curr, 00419 int i, const Node &tree, const PseudoCatScope &scope, int ruleno, 00420 const NextTokenConstraints &nextTokenConstraints); 00421 StackItem(const StackItem &parent, CatArg target, int pos, int len); 00423 StackItem(const StackItem &parent, CatArg cat, 00424 const PseudoCatScope &scope); 00425 StackItem(const StackItem &parent, const QList<Node> &leaves, int i, 00426 const Node &tree, const PseudoCatScope &scope, 00427 const NextTokenConstraints &nextTokenConstraints); 00428 int type() const {return d ? d->type() : -1;} 00429 void addParent(const StackItem &parent) {d->addParent(parent);} 00430 void setParents(const QList<StackItem> &parents) {d->setParents(parents);} 00431 const StackItemData *data() const {return d.constData();} 00432 private: 00433 QSharedDataPointer<StackItemData> d; 00434 }; 00435 00437 class StackItemType0 : public StackItemData { 00438 public: 00439 StackItemType0(const QList<StackItem> &parents, CatArg cat, CatArg effCat, 00440 int pos, const PseudoCatScope &scope) 00441 : m_parents(parents), m_cat(cat), m_effCat(effCat), m_pos(pos), 00442 m_scope(scope) {} 00443 virtual ~StackItemType0() {} 00444 virtual StackItemData *clone() {return new StackItemType0(*this);} 00445 virtual int type() const {return 0;} 00446 virtual void addParent(const StackItem &parent) {m_parents.append(parent);} 00447 virtual void setParents(const QList<StackItem> &parents) { 00448 m_parents = parents; 00449 } 00450 const QList<StackItem> &parents() const {return m_parents;} 00451 Cat cat() const {return m_cat;} 00452 Cat effCat() const {return m_effCat;} 00453 int pos() const {return m_pos;} 00454 PseudoCatScope scope() const {return m_scope;} 00455 private: 00456 QList<StackItem> m_parents; 00457 Cat m_cat, m_effCat; 00458 int m_pos; 00459 PseudoCatScope m_scope; 00460 }; 00461 00463 class StackItemType1 : public StackItemData { 00464 public: 00465 StackItemType1(const QList<StackItem> &parents, CatArg cat, CatArg effCat, 00466 const PseudoCatScope &scope) : 00467 m_parents(parents), m_cat(cat), m_effCat(effCat), m_scope(scope) {} 00468 virtual ~StackItemType1() {} 00469 virtual StackItemData *clone() {return new StackItemType1(*this);} 00470 virtual int type() const {return 1;} 00471 virtual void addParent(const StackItem &parent) {m_parents.append(parent);} 00472 virtual void setParents(const QList<StackItem> &parents) { 00473 m_parents = parents; 00474 } 00475 const QList<StackItem> &parents() const {return m_parents;} 00476 Cat cat() const {return m_cat;} 00477 Cat effCat() const {return m_effCat;} 00478 PseudoCatScope scope() const {return m_scope;} 00479 private: 00480 QList<StackItem> m_parents; 00481 Cat m_cat, m_effCat; 00482 PseudoCatScope m_scope; 00483 }; 00484 00486 00487 class StackItemType2 : public UnifiedStackItemData { 00488 public: 00489 StackItemType2(const StackItem &parent) : m_parent(parent) {} 00490 virtual ~StackItemType2() {} 00491 virtual StackItemData *clone() {return new StackItemType2(*this);} 00492 virtual int type() const {return 2;} 00493 virtual void addParent(const StackItem &) { 00494 qFatal("cannot call addParent on type 2 item"); 00495 } 00496 virtual void setParents(const QList<StackItem> &) { 00497 qFatal("cannot call setParents on type 2 item"); 00498 } 00499 const StackItem &parent() const {return m_parent;} 00500 private: 00501 StackItem m_parent; 00502 }; 00503 00505 class StackItemType3 : public StackItemData { 00506 public: 00507 StackItemType3(const StackItem &parent, const Rule &rule, int len, int curr, 00508 int i, const Node &tree, const PseudoCatScope &scope, 00509 int ruleno, const NextTokenConstraints &nextTokenConstraints) 00510 : m_parent(parent), m_rule(rule), m_len(len), m_curr(curr), m_i(i), 00511 m_tree(tree), m_scope(scope), m_ruleno(ruleno), 00512 m_nextTokenConstraints(nextTokenConstraints) {} 00513 virtual ~StackItemType3() {} 00514 virtual StackItemData *clone() {return new StackItemType3(*this);} 00515 virtual int type() const {return 3;} 00516 virtual void addParent(const StackItem &) { 00517 qFatal("cannot call addParent on type 3 item"); 00518 } 00519 virtual void setParents(const QList<StackItem> &) { 00520 qFatal("cannot call setParents on type 3 item"); 00521 } 00522 const StackItem &parent() const {return m_parent;} 00523 Rule rule() const {return m_rule;} 00524 int len() const {return m_len;} 00525 int curr() const {return m_curr;} 00526 int i() const {return m_i;} 00527 Node tree() const {return m_tree;} 00528 PseudoCatScope scope() const {return m_scope;} 00529 int ruleno() const {return m_ruleno;} 00530 NextTokenConstraints nextTokenConstraints() const { 00531 return m_nextTokenConstraints; 00532 } 00533 private: 00534 StackItem m_parent; 00535 Rule m_rule; 00536 int m_len, m_curr, m_i; 00537 Node m_tree; 00538 PseudoCatScope m_scope; 00539 int m_ruleno; 00540 NextTokenConstraints m_nextTokenConstraints; 00541 }; 00542 00544 00545 class StackItemType4 : public UnifiedStackItemData { 00546 public: 00547 StackItemType4(const StackItem &parent, CatArg target, int pos, int len) 00548 : m_parent(parent), m_target(target), m_pos(pos), m_len(len) {} 00549 virtual ~StackItemType4() {} 00550 virtual StackItemData *clone() {return new StackItemType4(*this);} 00551 virtual int type() const {return 4;} 00552 virtual void addParent(const StackItem &) { 00553 qFatal("cannot call addParent on type 4 item"); 00554 } 00555 virtual void setParents(const QList<StackItem> &) { 00556 qFatal("cannot call setParents on type 4 item"); 00557 } 00558 const StackItem &parent() const {return m_parent;} 00559 Cat target() const {return m_target;} 00560 int pos() const {return m_pos;} 00561 int len() const {return m_len;} 00562 private: 00563 StackItem m_parent; 00564 Cat m_target; 00565 int m_pos, m_len; 00566 }; 00567 00570 class StackItemType5 : public StackItemData { 00571 public: 00572 StackItemType5(const StackItem &parent, CatArg cat, 00573 const PseudoCatScope &scope) 00574 : m_parent(parent), m_cat(cat), m_scope(scope) {} 00575 virtual ~StackItemType5() {} 00576 virtual StackItemData *clone() {return new StackItemType5(*this);} 00577 virtual int type() const {return 5;} 00578 virtual void addParent(const StackItem &) { 00579 qFatal("cannot call addParent on type 5 item"); 00580 } 00581 virtual void setParents(const QList<StackItem> &) { 00582 qFatal("cannot call setParents on type 5 item"); 00583 } 00584 const StackItem &parent() const {return m_parent;} 00585 Cat cat() const {return m_cat;} 00586 PseudoCatScope scope() const {return m_scope;} 00587 private: 00588 StackItem m_parent; 00589 Cat m_cat; 00590 PseudoCatScope m_scope; 00591 }; 00592 00594 class StackItemType6 : public StackItemData { 00595 public: 00596 StackItemType6(const StackItem &parent, const QList<Node> &leaves, int i, 00597 const Node &tree, const PseudoCatScope &scope, 00598 const NextTokenConstraints &nextTokenConstraints) 00599 : m_parent(parent), m_leaves(leaves), m_i(i), m_tree(tree), 00600 m_scope(scope), m_nextTokenConstraints(nextTokenConstraints) {} 00601 virtual ~StackItemType6() {} 00602 virtual StackItemData *clone() {return new StackItemType6(*this);} 00603 virtual int type() const {return 6;} 00604 virtual void addParent(const StackItem &) { 00605 qFatal("cannot call addParent on type 6 item"); 00606 } 00607 virtual void setParents(const QList<StackItem> &) { 00608 qFatal("cannot call setParents on type 6 item"); 00609 } 00610 const StackItem &parent() const {return m_parent;} 00611 QList<Node> leaves() const {return m_leaves;} 00612 int i() const {return m_i;} 00613 Node tree() const {return m_tree;} 00614 PseudoCatScope scope() const {return m_scope;} 00615 NextTokenConstraints nextTokenConstraints() const { 00616 return m_nextTokenConstraints; 00617 } 00618 private: 00619 StackItem m_parent; 00620 QList<Node> m_leaves; 00621 int m_i; 00622 Node m_tree; 00623 PseudoCatScope m_scope; 00624 NextTokenConstraints m_nextTokenConstraints; 00625 }; 00626 00627 inline StackItem::StackItem(const QList<StackItem> &parents, CatArg cat, 00628 CatArg effCat, int pos, 00629 const PseudoCatScope &scope) 00630 { 00631 d = new StackItemType0(parents, cat, effCat, pos, scope); 00632 } 00633 00634 inline StackItem::StackItem(const QList<StackItem> &parents, CatArg cat, 00635 CatArg effCat, const PseudoCatScope &scope) 00636 { 00637 d = new StackItemType1(parents, cat, effCat, scope); 00638 } 00639 00640 inline StackItem::StackItem(const StackItem &parent, int) 00641 { 00642 d = new StackItemType2(parent); 00643 } 00644 00645 inline StackItem::StackItem(const StackItem &parent, const Rule &rule, int len, 00646 int curr, int i, const Node &tree, 00647 const PseudoCatScope &scope, int ruleno, 00648 const NextTokenConstraints &nextTokenConstraints) 00649 { 00650 d = new StackItemType3(parent, rule, len, curr, i, tree, scope, ruleno, 00651 nextTokenConstraints); 00652 } 00653 00654 inline StackItem::StackItem(const StackItem &parent, CatArg target, int pos, 00655 int len) 00656 { 00657 d = new StackItemType4(parent, target, pos, len); 00658 } 00659 00660 inline StackItem::StackItem(const StackItem &parent, CatArg cat, 00661 const PseudoCatScope &scope) 00662 { 00663 d = new StackItemType5(parent, cat, scope); 00664 } 00665 00666 inline StackItem::StackItem(const StackItem &parent, const QList<Node> &leaves, 00667 int i, const Node &tree, 00668 const PseudoCatScope &scope, 00669 const NextTokenConstraints &nextTokenConstraints) 00670 { 00671 d = new StackItemType6(parent, leaves, i, tree, scope, nextTokenConstraints); 00672 } 00673 00674 00675 // --- token source abstraction --- 00676 00678 class AbstractLexerStateData : public QSharedData { 00679 public: 00680 virtual ~AbstractLexerStateData() {} 00681 virtual AbstractLexerStateData *clone() = 0; 00682 }; 00683 00684 class LexerState { 00685 public: 00686 LexerState() : d() {} 00687 LexerState(AbstractLexerStateData *data) : d(data) {} 00688 void clear() { d = 0; } 00689 bool isNull() const { return !d.constData(); } 00690 const AbstractLexerStateData *data() const { return d.constData(); } 00691 bool operator==(const LexerState &other) const { return d == other.d; } 00692 private: 00693 QSharedDataPointer<AbstractLexerStateData> d; 00694 }; 00695 00696 class TokenSource { 00697 public: 00698 TokenSource() : currPos(0) {} 00699 virtual ~TokenSource() {} 00702 00705 Cat nextToken() { 00706 tree = Node(); 00707 Cat result = readToken(); 00708 if (!IS_EPSILON(result)) { 00709 // If the implementation of readToken didn't create a parse tree, create 00710 // the default one: a leaf node. 00711 if (tree.children.isEmpty()) tree = Node(result); 00712 currPos++; 00713 } 00714 return result; 00715 } 00717 Node parseTree() { 00718 return tree; 00719 } 00721 00728 virtual bool matchParseTree(const Node &treeToMatch) { 00729 return treeToMatch.cat == tree.cat; 00730 } 00732 int currentPosition() {return currPos;} 00734 00741 virtual bool rewindTo(int pos, const LexerState & = LexerState()) { 00742 tree = Node(); 00743 return (pos == currPos); 00744 } 00746 virtual LexerState saveState() {return LexerState();} 00747 protected: 00749 virtual Cat readToken() = 0; 00751 00752 bool simpleRewind(int pos) { 00753 tree = Node(); // destroy saved parse tree 00754 if (pos > currPos) // cannot rewind forward 00755 return false; 00756 currPos = pos; 00757 return true; 00758 } 00759 int currPos; 00761 00767 Node tree; 00768 }; 00769 00770 class ListTokenSource : public TokenSource { 00771 public: 00772 ListTokenSource() : TokenSource() {} 00773 virtual ~ListTokenSource() {} 00774 QList<Cat> inputTokens; 00776 virtual bool rewindTo(int pos, const LexerState & = LexerState()) { 00777 return simpleRewind(pos); 00778 } 00779 protected: 00781 virtual Cat readToken() { 00782 if (currPos < inputTokens.size()) 00783 return inputTokens.at(currPos); 00784 return Cat(); 00785 } 00786 }; 00787 00789 00792 struct TextPosition { 00793 TextPosition() : line(0), col(0) {} 00794 TextPosition(int l, int c) : line(l), col(c) {} 00795 00796 void reset() { line = col = 0; } 00797 00799 void countCharacter(unsigned char c) { 00800 // ignore CR 00801 if (c == '\r') return; 00802 00803 if (c == '\n') { // LF 00804 line++; 00805 col = 0; 00806 } else col++; // horizontal character 00807 } 00808 00809 int line; 00810 int col; 00811 }; 00812 00813 00814 // --- data structures for PMCFGs (in standard representation) --- 00815 00817 struct Term { 00819 Term() : arg(-1), component(0), token() {} 00820 Term(int a, int c) : arg(a), component(c), token() {} 00821 Term(CatArg t) : arg(-1), component(0), token(t) {} 00822 int arg, component; 00826 Cat token; 00827 bool isComponent() const {return arg >= 0;} 00828 bool isToken() const {return arg < 0;} 00830 bool operator==(const Term &other) const { 00831 return arg == other.arg && component == other.component 00832 && token == other.token; 00833 } 00834 }; 00835 00837 class Sequence : public QList<Term> { 00838 public: 00839 NextTokenConstraints nextTokenConstraints; 00840 00841 Sequence() : QList<Term>(), nextTokenConstraints() {} 00842 explicit Sequence(const NextTokenConstraints &ntc) 00843 : QList<Term>(), nextTokenConstraints(ntc) {} 00844 explicit Sequence(const QList<Term> &list) 00845 : QList<Term>(list), nextTokenConstraints() {} 00846 Sequence(const QList<Term> &list, const NextTokenConstraints &ntc) 00847 : QList<Term>(list), nextTokenConstraints(ntc) {} 00848 Sequence &operator+=(const QList<Term> &other) { 00849 QList<Term>::operator+=(other); 00850 return *this; 00851 } 00852 Sequence &operator+=(const Term &value) { 00853 QList<Term>::operator+=(value); 00854 return *this; 00855 } 00856 Sequence &operator<<(const QList<Term> &other) { 00857 QList<Term>::operator<<(other); 00858 return *this; 00859 } 00860 Sequence &operator<<(const Term &value) { 00861 QList<Term>::operator<<(value); 00862 return *this; 00863 } 00865 void add(const Term &value) { 00866 append(value); 00867 } 00869 QList<Term> &toList() { 00870 return (QList<Term> &) *this; 00871 } 00872 }; 00873 00875 class Function: public QList<Sequence> { 00876 public: 00877 Function() : QList<Sequence>() {} 00878 explicit Function(const QList<Sequence> &list) : QList<Sequence>(list) {} 00879 Function &operator+=(const QList<Sequence> &other) { 00880 QList<Sequence>::operator+=(other); 00881 return *this; 00882 } 00883 Function &operator+=(const Sequence &value) { 00884 QList<Sequence>::operator+=(value); 00885 return *this; 00886 } 00887 Function &operator<<(const QList<Sequence> &other) { 00888 QList<Sequence>::operator<<(other); 00889 return *this; 00890 } 00891 Function &operator<<(const Sequence &value) { 00892 QList<Sequence>::operator<<(value); 00893 return *this; 00894 } 00896 void add(const Sequence &value) { 00897 append(value); 00898 } 00900 QList<Sequence> &toList() { 00901 return (QList<Sequence> &) *this; 00902 } 00903 }; 00904 00906 struct Pmcfg { 00908 00909 00910 00913 QList<Function> functions; 00915 00932 RuleSet rules; 00934 00935 TokenSet tokens; 00937 00938 Cat startCat; 00940 00947 RuleSet cfRules; 00949 00951 00952 QHash<int, QString> functionNames; 00953 QHash<QString, int> functionIndices; 00954 void addFunction(const QString &name, const Function &function) { 00955 int index = functions.size(); 00956 functions.append(function); 00957 functionNames.insert(index, name); 00958 functionIndices.insert(name, index); 00959 } 00960 Function lookupFunction(const QVariant &id) const { 00961 if (id.type() == QVariant::Int) return functions.at(id.toInt()); 00962 00963 QString name = id.toString(); 00964 if (!functionIndices.contains(name)) return Function(); 00965 return functions.at(functionIndices.value(name)); 00966 } 00968 }; 00969 00971 struct PmcfgComponentInfo { 00972 PmcfgComponentInfo() {} 00973 PmcfgComponentInfo(const Rule &rule) 00974 : pmcfgRule(rule), argPositions(rule.size()) {} 00975 Rule pmcfgRule; 00976 QVector<QVector<int> > argPositions; 00980 }; 00981 00982 Node parseTreeToPmcfgSyntaxTree(const Node &parseTree); 00983 00984 00985 // --- parse state struct, for bindings --- 00986 00988 struct ParseState { 00989 ParseState() 00990 : errorPos(-1), errorToken(), incrementalPos(-1), incrementalStacks(), 00991 incrementalMatches(), lexerState() {} 00992 // explicitly implement the default copy constructor to get it bound 00993 ParseState(const ParseState &other) 00994 : errorPos(other.errorPos), errorToken(other.errorToken), 00995 incrementalPos(other.incrementalPos), 00996 incrementalStacks(other.incrementalStacks), 00997 incrementalMatches(other.incrementalMatches), lexerState(other.lexerState) 00998 {} 00999 01000 int errorPos; 01001 Cat errorToken; 01002 int incrementalPos; 01003 QList<StackItem> incrementalStacks; 01004 QList<Match> incrementalMatches; 01005 LexerState lexerState; 01006 01007 void reset() { *this = ParseState(); } 01008 }; 01009 01010 01011 // --- main class --- 01012 01014 class Parser { 01015 public: 01016 // interface methods 01017 Parser(TokenSource *tokenSource) 01018 : inputSource(tokenSource), lastGeneratedCat(0) {} 01019 virtual ~Parser() {} 01020 bool isToken(CatArg cat) const {return tokens.contains(cat);} 01021 void addToken(CatArg cat) {tokens.insert(cat);} 01022 bool isLiteral(const QList<Cat> &list) const; 01023 void initCaches(); 01024 void addRule(CatArg cat, const Rule &rule); 01025 void loadCfg(const Cfg &cfg) { 01026 rules = cfg.rules; 01027 tokens = cfg.tokens; 01028 startCat = cfg.startCat; 01029 initCaches(); 01030 } 01031 bool loadPmcfg(const Pmcfg &pmcfg); 01032 bool addPmcfgRule(Pmcfg &pmcfg, CatArg cat, const Rule &rule); 01033 QList<Match> parse(int *errorPos = 0, Cat *errorToken = 0, 01034 int *incrementalPos = 0, 01035 QList<StackItem> *incrementalStacks = 0, 01036 QList<Match> *incrementalMatches = 0, 01037 LexerState *lexerState = 0); 01039 QList<Match> parse(ParseState *parseState) { 01040 return parse(&parseState->errorPos, &parseState->errorToken, 01041 &parseState->incrementalPos, &parseState->incrementalStacks, 01042 &parseState->incrementalMatches, &parseState->lexerState); 01043 } 01044 Predictions computePredictions(const QList<StackItem> &stacks) const; 01046 Predictions computePredictions(const ParseState &parseState) const { 01047 return computePredictions(parseState.incrementalStacks); 01048 } 01049 QHash<Cat, QSet<Cat> > expandNonterminalPrediction(CatArg cat) const; 01050 QHash<Cat, QSet<Cat> > expandNonterminalPredictionC(CatArg cat); 01051 MultiPredictions computeMultiPredictions(const QList<StackItem> &stacks) 01052 const; 01054 MultiPredictions computeMultiPredictions(const ParseState &parseState) const 01055 { 01056 return computeMultiPredictions(parseState.incrementalStacks); 01057 } 01059 01065 QHash<QList<Cat>, QList<MultiPrediction> > 01066 computeMultiPredictionsJava(const ParseState &parseState) const 01067 { 01068 return multiHashToListHash(computeMultiPredictions(parseState)); 01069 } 01070 QHash<Cat, QSet<QList<Cat> > > 01071 expandNonterminalPredictionMulti(CatArg cat) const; 01072 QHash<Cat, QSet<QList<Cat> > > 01073 expandNonterminalPredictionMultiC(CatArg cat); 01074 ConstrainedPredictions computeConstrainedPredictions( 01075 const QList<StackItem> &stacks) const; 01077 ConstrainedPredictions computeConstrainedPredictions( 01078 const ParseState &parseState) const { 01079 return computeConstrainedPredictions(parseState.incrementalStacks); 01080 } 01082 01088 QHash<Cat, QList<NextTokenConstraints> > 01089 computeConstrainedPredictionsJava(const ParseState &parseState) const 01090 { 01091 return multiHashToListHash(computeConstrainedPredictions(parseState)); 01092 } 01093 QHash<Cat, QSet<Cat> > expandNonterminalPredictionC(CatArg cat, 01094 const NextTokenConstraints &nextTokenConstraints); 01095 QHash<Cat, QSet<Cat> > expandNonterminalPredictionC(CatArg cat, 01096 const QList<NextTokenConstraints> &nextTokenConstraintsList); 01097 ConstrainedMultiPredictions computeConstrainedMultiPredictions( 01098 const QList<StackItem> &stacks) const; 01100 ConstrainedMultiPredictions computeConstrainedMultiPredictions( 01101 const ParseState &parseState) const 01102 { 01103 return computeConstrainedMultiPredictions(parseState.incrementalStacks); 01104 } 01106 01112 QHash<QList<Cat>, QList<ConstrainedMultiPrediction> > 01113 computeConstrainedMultiPredictionsJava(const ParseState &parseState) const 01114 { 01115 return multiHashToListHash(computeConstrainedMultiPredictions( 01116 parseState)); 01117 } 01118 QHash<Cat, QSet<QList<Cat> > > 01119 expandNonterminalPredictionMultiC(CatArg cat, 01120 const NextTokenConstraints &nextTokenConstraints); 01121 QHash<Cat, QSet<QList<Cat> > > 01122 expandNonterminalPredictionMultiC(CatArg cat, 01123 const QList<NextTokenConstraints> &nextTokenConstraintsList); 01124 01126 01127 01128 01131 RuleSet rules; 01133 TokenSet tokens; 01135 Cat startCat; 01137 01139 01140 01141 01161 QHash<Cat, QPair<Cat, QList<Cat> > > pseudoCats; 01164 01167 QHash<Cat, QPair<Cat, int> > componentCats; 01169 01173 QHash<Cat, QList<Cat> > catComponents; 01175 01176 protected: 01178 TokenSource *inputSource; 01179 01180 private: 01181 // helper methods 01182 Cat effectiveCat(CatArg cat) const; 01183 void processRule(CatArg cat, const Rule &rule, int skip, int ruleno, 01184 QQueue<Cat> &nullableQueue, bool &clearEpsilonMatches); 01185 bool computePmcfgDimension(CatArg cat, const Rule &rule, 01186 const Pmcfg &pmcfg); 01187 bool convertPmcfgRule(CatArg cat, const Rule &rule, const Pmcfg &pmcfg, 01188 bool updateCaches); 01189 bool reachable(CatArg cat, CatArg target, QSet<Cat> mark); 01190 QList<FullRule> neighborhood(CatArg cat, CatArg target); 01191 void finalizeMatches(QList<Match> &matches, CatArg cat, 01192 const PseudoCatScope &scope); 01193 void copyScope(QList<Match> &matches, const PseudoCatScope &scope); 01194 QList<Match> matchCFToEpsilon(CatArg cat, QSet<Cat> mark); 01195 QList<Match> matchEffectiveCatToEpsilon(CatArg cat, QSet<Cat> mark); 01196 QList<Match> matchToEpsilonRecurse(CatArg cat, QSet<Cat> mark, 01197 const PseudoCatScope &scope); 01198 QList<Match> matchToEpsilon(CatArg cat, const PseudoCatScope &scope); 01199 bool matchesTokenRecurse(CatArg cat, CatArg token, QSet<Cat> mark) const; 01200 bool matchesToken(CatArg cat, CatArg token) const; 01201 void collectLeaves(const Node &tree, QList<Node> &leaves); 01202 bool validateNextTokenConstraints(CatArg token, 01203 const NextTokenConstraints &nextTokenConstraints) const; 01204 QList<Match> match(CatArg cat, int pos, const PseudoCatScope &scope, 01205 const StackItem &stack, 01206 const NextTokenConstraints &nextTokenConstraints); 01207 QList<Match> matchRemaining(const Rule &rule, int len, int curr, int i, 01208 const Node &tree, const PseudoCatScope &scope, 01209 int ruleno, const StackItem &stack, 01210 const NextTokenConstraints 01211 &nextTokenConstraints); 01212 Cat findFirstToken(const Node &tree); 01213 void unify(QList<Match> &matches); 01214 QList<Match> reduce(CatArg cat, CatArg target, int pos, int len, 01215 const Node &tree, const StackItem &stack, 01216 const PseudoCatScope &scope, int ruleno, 01217 const NextTokenConstraints &nextTokenConstraints, 01218 QSet<Cat> mark = QSet<Cat>()); 01219 QList<Match> processStackItem(const StackItem &item, 01220 const QList<Match> &matches); 01221 QList<Match> processStack(const StackItem &stack, CatArg token); 01222 void computeUsageCounts(const StackItem &item); 01223 bool shift(int pos); 01224 void expandNonterminalPredictionRecurse(CatArg cat, 01225 QHash<Cat, QSet<Cat> > &result, 01226 QSet<Cat> &mark) const; 01227 void expandNonterminalPredictionRecurseC(CatArg cat, 01228 QHash<Cat, QSet<Cat> > &result, 01229 QSet<Cat> mark, int ruleno, 01230 const PseudoCatScope &scope, 01231 const NextTokenConstraints 01232 &nextTokenConstraints); 01233 void expandNonterminalPredictionMultiRecurse(CatArg cat, 01234 QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> &mark) const; 01235 void expandNonterminalPredictionMultiRecurseC(CatArg cat, 01236 QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> mark, int ruleno, 01237 const PseudoCatScope &scope, 01238 const NextTokenConstraints &nextTokenConstraints); 01239 int generateCat() { return --lastGeneratedCat; } 01240 01242 QMultiHash<Cat, FullRule> initialGraph; 01244 QHash<QPair<Cat, Cat>, QList<FullRule> > neighborhoods; 01246 QSet<Cat> nullable; 01248 QHash<Cat, QList<Match> > epsilonMatches; 01249 01251 QList<StackItem> nextStacks; 01253 QList<Match> currentMatches; 01255 LexerState currentLexerState; 01257 QHash<Cat, int> type0Indices; 01259 01263 QHash<const StackItemData *, QList<Match> > collectedMatches; 01265 int errPos; 01267 Cat errToken; 01269 int lastGeneratedCat; 01270 }; 01271 01272 } // end namespace 01273 01274 01275 // --- utility functions in the global namespace --- 01276 01277 Q_DECLARE_METATYPE(DynGenPar::PmcfgComponentInfo) 01278 01279 uint qHash(const QList<DynGenPar::Cat> &list); 01280 01281 template<> 01282 inline DynGenPar::StackItemData 01283 *QSharedDataPointer<DynGenPar::StackItemData>::clone() 01284 { 01285 return d->clone(); 01286 } 01287 01288 template<> 01289 inline DynGenPar::AbstractLexerStateData 01290 *QSharedDataPointer<DynGenPar::AbstractLexerStateData>::clone() 01291 { 01292 return d->clone(); 01293 } 01294 01295 #endif // DYNGENPAR_H
1.7.4