DynGenPar
Dynamic Generalized Parser
dyngenpar.h
Go to the documentation of this file.
00001 /* DynGenPar: Dynamic Generalized Parser
00002    Copyright (C) 2010-2012 Kevin Kofler <kevin.kofler@chello.at>
00003 
00004    Support by the Austrian Science Fund FWF under contract numbers
00005    P20631 and P23554 is gratefully acknowledged.
00006 
00007    This program is free software: you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation, either version 2 of the License, or
00010    (at your option) any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License
00018    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
00019 
00020 // Don't use pragma once here, Qt Jambi doesn't support it.
00021 #ifndef DYNGENPAR_H
00022 #define DYNGENPAR_H
00023 
00024 #include <QString>
00025 #include <QList>
00026 #include <QStringList>
00027 #include <QHash>
00028 #include <QMultiHash>
00029 #include <QSet>
00030 #include <QPair>
00031 #include <QQueue>
00032 #include <QSharedData>
00033 #include <QVariant>
00034 #include <QVector>
00035 
00036 // Tell GCC that qFatal does not return.
00037 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4))
00038 #define qFatal(x...) (qFatal(x), __builtin_unreachable())
00039 #endif
00040 
00041 namespace DynGenPar {
00042 
00043 // --- general data structures (both context-free grammars and PMCFGs) ---
00044 
00045 #ifdef DYNGENPAR_INTEGER_CATEGORIES
00046 typedef int Cat;
00047 #define IS_EPSILON(cat) (!(cat))
00048 typedef int CatArg;
00049 #else
00050 
00051 
00057 typedef QString Cat;
00058 #define IS_EPSILON(cat) ((cat).isNull())
00059 
00060 
00069 typedef const Cat &CatArg;
00070 #endif
00071 
00073 template<class MultiHashType>
00074   QHash<typename MultiHashType::key_type,
00075         QList<typename MultiHashType::mapped_type> >
00076   multiHashToListHash(const MultiHashType &multiHash)
00077 {
00078   QHash<typename MultiHashType::key_type,
00079         QList<typename MultiHashType::mapped_type> > result;
00080   QHashIterator<typename MultiHashType::key_type,
00081                 typename MultiHashType::mapped_type> it(multiHash);
00082   while (it.hasNext()) {
00083     it.next();
00084     result[it.key()].append(it.value());
00085   }
00086   return result;
00087 }
00088 
00090 struct NextTokenConstraints {
00092 
00098   QList<Cat> expect;
00100 
00106   QList<Cat> taboo;
00108   bool operator==(const NextTokenConstraints &other) const {
00109     return expect == other.expect && taboo == other.taboo;
00110   }
00111 };
00112 uint qHash(const NextTokenConstraints &nextTokenConstraints);
00113 
00115 class Action;
00116 class Rule : public QList<Cat> {
00117   public:
00118     Rule() : QList<Cat>(), action(0) {}
00119     explicit Rule(const QVariant &label)
00120       : QList<Cat>(), action(0), lbl(label) {}
00121     explicit Rule(const QList<Cat> &list)
00122       : QList<Cat>(list), action(0), lbl() {}
00123     Rule(const QList<Cat> &list, const QVariant &label)
00124       : QList<Cat>(list), action(0), lbl(label) {}
00125     QVariant label() const {return lbl;}
00126     void setLabel(const QVariant &label) {lbl = label;}
00127     NextTokenConstraints nextTokenConstraints;
00128     Action *action;
00129     Rule &operator+=(const QList<Cat> &other) {
00130       QList<Cat>::operator+=(other);
00131       return *this;
00132     }
00133     Rule &operator+=(const Cat &value) { // QList always wants a const &
00134       QList<Cat>::operator+=(value);
00135       return *this;
00136     }
00137     Rule &operator<<(const QList<Cat> &other) {
00138       QList<Cat>::operator<<(other);
00139       return *this;
00140     }
00141     Rule &operator<<(const Cat &value) { // QList always wants a const &
00142       QList<Cat>::operator<<(value);
00143       return *this;
00144     }
00146     void add(const Cat &value) { // QList always wants a const &
00147       append(value);
00148     }
00150     QList<Cat> &toList() { return (QList<Cat> &) *this; }
00151 
00152   private:
00153     QVariant lbl;
00154 };
00155 
00156 typedef QHash<Cat, QList<Rule> > RuleSet;
00157 typedef QSet<Cat> TokenSet;
00158 
00160 
00163 struct Cfg {
00165   Cfg() : rules(), tokens(), startCat() {}
00166   Cfg(const RuleSet &r, const TokenSet &t, CatArg sc)
00167     : rules(r), tokens(t), startCat(sc) {}
00168   RuleSet rules;
00169   TokenSet tokens;
00170   Cat startCat;
00171   bool isToken(CatArg cat) const {return tokens.contains(cat);}
00172   void addToken(CatArg cat) {tokens.insert(cat);}
00173 };
00174 
00175 typedef QSet<Cat> Predictions;
00176 
00178 struct MultiPrediction {
00180   MultiPrediction() : fullLiteral(), cat() {}
00181   MultiPrediction(const QList<Cat> &fullLit, CatArg c)
00182     : fullLiteral(fullLit), cat(c) {}
00183   QList<Cat> fullLiteral; 
00184   Cat cat; 
00185 
00187   bool operator==(const MultiPrediction &other) const {
00188     return fullLiteral == other.fullLiteral && cat == other.cat;
00189   }
00190 };
00191 typedef QMultiHash<QList<Cat>, MultiPrediction> MultiPredictions;
00192 typedef QMultiHash<Cat, NextTokenConstraints> ConstrainedPredictions;
00193 
00195 struct ConstrainedMultiPrediction {
00197   ConstrainedMultiPrediction()
00198     : fullLiteral(), cat(), nextTokenConstraints() {}
00199   ConstrainedMultiPrediction(const QList<Cat> &fullLit, CatArg c)
00200     : fullLiteral(fullLit), cat(c), nextTokenConstraints() {}
00201   ConstrainedMultiPrediction(const QList<Cat> &fullLit, CatArg c,
00202                              NextTokenConstraints ntc)
00203     : fullLiteral(fullLit), cat(c), nextTokenConstraints(ntc) {}
00204   QList<Cat> fullLiteral; 
00205   Cat cat; 
00206   NextTokenConstraints nextTokenConstraints; 
00207 
00209   bool operator==(const ConstrainedMultiPrediction &other) const {
00210     return fullLiteral == other.fullLiteral && cat == other.cat
00211            && nextTokenConstraints == other.nextTokenConstraints;
00212   }
00213 };
00214 typedef QMultiHash<QList<Cat>, ConstrainedMultiPrediction>
00215   ConstrainedMultiPredictions;
00216 
00218 struct FullRule {
00220   FullRule() : cat(), rule(), epsilonsSkipped(0), ruleno(0) {}
00221   FullRule(CatArg c, const Rule &r, int epsSkipped, int n)
00222     : cat(c), rule(r), epsilonsSkipped(epsSkipped), ruleno(n) {}
00223   Cat cat;
00224   Rule rule;
00225   int epsilonsSkipped;
00227 
00229   int ruleno;
00230 };
00231 
00233 struct Node;
00234 class Alternative : public QList<Node> {
00235   public:
00236     Alternative() : QList<DynGenPar::Node>() {}
00237     explicit Alternative(const QVariant &label)
00238       : QList<DynGenPar::Node>(), lbl(label) {}
00239     explicit Alternative(const QList<DynGenPar::Node> &list)
00240       : QList<DynGenPar::Node>(list), lbl() {}
00241     Alternative(const QList<DynGenPar::Node> &list, const QVariant &label)
00242       : QList<DynGenPar::Node>(list), lbl(label) {}
00243     QVariant label() const {return lbl;}
00244     void setLabel(const QVariant &label) {lbl = label;}
00245     Alternative &operator+=(const QList<DynGenPar::Node> &other) {
00246       QList<DynGenPar::Node>::operator+=(other);
00247       return *this;
00248     }
00249     Alternative &operator+=(const DynGenPar::Node &value) {
00250       QList<DynGenPar::Node>::operator+=(value);
00251       return *this;
00252     }
00253     Alternative &operator<<(const QList<DynGenPar::Node> &other) {
00254       QList<DynGenPar::Node>::operator<<(other);
00255       return *this;
00256     }
00257     Alternative &operator<<(const DynGenPar::Node &value) {
00258       QList<DynGenPar::Node>::operator<<(value);
00259       return *this;
00260     }
00262     void add(const DynGenPar::Node &value) {
00263       append(value);
00264     }
00266     QList<DynGenPar::Node> &toList() {
00267       return (QList<DynGenPar::Node> &) *this;
00268     }
00269 
00270   private:
00271     QVariant lbl;
00272 };
00273 
00275 struct Node {
00276   Node() {} 
00277   Node(CatArg c) : cat(c) {children.append(Alternative());}
00278   Node(CatArg c, const QVariant &d) : cat(c), data(d) {
00279     children.append(Alternative());
00280   }
00281   Cat cat;
00282   QVariant data;
00283   QList<Alternative> children;
00285   bool operator==(const Node &other) const {
00286     return cat == other.cat && data == other.data && children == other.children;
00287   }
00288 };
00289 
00291 class PseudoCatScope;
00292 class PseudoCatScopeData : public QSharedData {
00293   public:
00295 
00300     QHash<Cat, QPair<QPair<Node, NextTokenConstraints>, int> > pConstraints;
00302 
00305     QHash<Cat, QPair<int, PseudoCatScope> > mcfgConstraints;
00306 };
00307 class PseudoCatScope {
00308   public:
00309     PseudoCatScope() : d() {}
00310     QHash<Cat, QPair<QPair<Node, NextTokenConstraints>, int> > &pConstraints() {
00311       ensureNonNull();
00312       return d->pConstraints;
00313     };
00314     QHash<Cat, QPair<int, PseudoCatScope> > &mcfgConstraints() {
00315       ensureNonNull();
00316       return d->mcfgConstraints;
00317     };
00318     bool hasPConstraint(CatArg cat) const {
00319       return d ? d->pConstraints.contains(cat) : false;
00320     };
00321     bool hasMcfgConstraint(CatArg cat) const {
00322       return d ? d->mcfgConstraints.contains(cat) : false;
00323     };
00324     QPair<QPair<Node, NextTokenConstraints>, int> pConstraint(CatArg cat) const
00325     {
00326       return d ? d->pConstraints.value(cat)
00327                : QPair<QPair<Node, NextTokenConstraints>, int>();
00328     };
00329     QPair<int, PseudoCatScope> mcfgConstraint(CatArg cat) const {
00330       return d ? d->mcfgConstraints.value(cat) : QPair<int, PseudoCatScope>();
00331     };
00332     bool isNull() const { return !d.constData(); }
00334     const PseudoCatScopeData *data() const { return d.constData(); }
00335     bool operator==(const PseudoCatScope &other) const { return d == other.d; }
00336   private:
00337     QSharedDataPointer<PseudoCatScopeData> d;
00338     void ensureNonNull() {
00339       if (!d.constData()) d = new PseudoCatScopeData;
00340     }
00341 };
00342 inline uint qHash(const PseudoCatScope &scope) {
00343   return qHash(scope.data());
00344 }
00345 
00347 struct Match {
00349   Match() : len(0), tree(), ruleno(0), scope(), nextTokenConstraints() {}
00350   Match(int l, Node t, int n, PseudoCatScope s)
00351     : len(l), tree(t), ruleno(n), scope(s), nextTokenConstraints() {}
00352   Match(int l, Node t, int n, PseudoCatScope s, const NextTokenConstraints &nt)
00353     : len(l), tree(t), ruleno(n), scope(s), nextTokenConstraints(nt) {}
00354   int len;
00355   Node tree;
00356   int ruleno; 
00357 
00358   PseudoCatScope scope;
00359   NextTokenConstraints nextTokenConstraints;
00360 };
00361 
00363 struct ActionInfo {
00365   ActionInfo() : tree() {}
00366   ActionInfo(const Node &t) : tree(t) {}
00367   Node tree;
00368 };
00369 
00371 class Action {
00372   public:
00373     virtual ~Action() {}
00374     virtual void execute(const ActionInfo &info) = 0;
00375 };
00376 
00377 
00378 // --- data structures for DAG-structured parse stacks ---
00379 
00381 class StackItem;
00382 class StackItemData : public QSharedData {
00383   public:
00384     virtual ~StackItemData() {}
00385     virtual StackItemData *clone() = 0;
00386     virtual int type() const = 0;
00387     virtual void addParent(const StackItem &parent) = 0;
00388     virtual void setParents(const QList<StackItem> &parents) = 0;
00389 };
00390 
00391 class UnifiedStackItemData : public StackItemData {
00392   public:
00393     UnifiedStackItemData() : usageCount(0) {}
00394     virtual ~UnifiedStackItemData() {}
00402     unsigned refUsage() const {return usageCount++;}
00403     unsigned derefUsage() const {return --usageCount;}
00405   private:
00407     mutable unsigned usageCount;
00408 };
00409 
00410 class StackItem {
00411   public:
00412     StackItem() : d() {} 
00413     StackItem(const QList<StackItem> &parents, CatArg cat, CatArg effCat,
00414               int pos, const PseudoCatScope &scope); 
00415     StackItem(const QList<StackItem> &parents, CatArg cat, CatArg effCat,
00416               const PseudoCatScope &scope); 
00417     StackItem(const StackItem &parent, int dummy); 
00418     StackItem(const StackItem &parent, const Rule &rule, int len, int curr,
00419               int i, const Node &tree, const PseudoCatScope &scope, int ruleno,
00420               const NextTokenConstraints &nextTokenConstraints); 
00421     StackItem(const StackItem &parent, CatArg target, int pos, int len);
00423     StackItem(const StackItem &parent, CatArg cat,
00424               const PseudoCatScope &scope); 
00425     StackItem(const StackItem &parent, const QList<Node> &leaves, int i,
00426               const Node &tree, const PseudoCatScope &scope,
00427               const NextTokenConstraints &nextTokenConstraints); 
00428     int type() const {return d ? d->type() : -1;}
00429     void addParent(const StackItem &parent) {d->addParent(parent);}
00430     void setParents(const QList<StackItem> &parents) {d->setParents(parents);}
00431     const StackItemData *data() const {return d.constData();}
00432   private:
00433     QSharedDataPointer<StackItemData> d;
00434 };
00435 
00437 class StackItemType0 : public StackItemData {
00438   public:
00439     StackItemType0(const QList<StackItem> &parents, CatArg cat, CatArg effCat,
00440                    int pos, const PseudoCatScope &scope)
00441       : m_parents(parents), m_cat(cat), m_effCat(effCat), m_pos(pos),
00442         m_scope(scope) {}
00443     virtual ~StackItemType0() {}
00444     virtual StackItemData *clone() {return new StackItemType0(*this);}
00445     virtual int type() const {return 0;}
00446     virtual void addParent(const StackItem &parent) {m_parents.append(parent);}
00447     virtual void setParents(const QList<StackItem> &parents) {
00448       m_parents = parents;
00449     }
00450     const QList<StackItem> &parents() const {return m_parents;}
00451     Cat cat() const {return m_cat;}
00452     Cat effCat() const {return m_effCat;}
00453     int pos() const {return m_pos;}
00454     PseudoCatScope scope() const {return m_scope;}
00455   private:
00456     QList<StackItem> m_parents;
00457     Cat m_cat, m_effCat;
00458     int m_pos;
00459     PseudoCatScope m_scope;
00460 };
00461 
00463 class StackItemType1 : public StackItemData {
00464   public:
00465     StackItemType1(const QList<StackItem> &parents, CatArg cat, CatArg effCat,
00466                    const PseudoCatScope &scope) :
00467       m_parents(parents), m_cat(cat), m_effCat(effCat), m_scope(scope) {}
00468     virtual ~StackItemType1() {}
00469     virtual StackItemData *clone() {return new StackItemType1(*this);}
00470     virtual int type() const {return 1;}
00471     virtual void addParent(const StackItem &parent) {m_parents.append(parent);}
00472     virtual void setParents(const QList<StackItem> &parents) {
00473       m_parents = parents;
00474     }
00475     const QList<StackItem> &parents() const {return m_parents;}
00476     Cat cat() const {return m_cat;}
00477     Cat effCat() const {return m_effCat;}
00478     PseudoCatScope scope() const {return m_scope;}
00479   private:
00480     QList<StackItem> m_parents;
00481     Cat m_cat, m_effCat;
00482     PseudoCatScope m_scope;
00483 };
00484 
00486 
00487 class StackItemType2 : public UnifiedStackItemData {
00488   public:
00489     StackItemType2(const StackItem &parent) : m_parent(parent) {}
00490     virtual ~StackItemType2() {}
00491     virtual StackItemData *clone() {return new StackItemType2(*this);}
00492     virtual int type() const {return 2;}
00493     virtual void addParent(const StackItem &) {
00494       qFatal("cannot call addParent on type 2 item");
00495     }
00496     virtual void setParents(const QList<StackItem> &) {
00497       qFatal("cannot call setParents on type 2 item");
00498     }
00499     const StackItem &parent() const {return m_parent;}
00500   private:
00501     StackItem m_parent;
00502 };
00503 
00505 class StackItemType3 : public StackItemData {
00506   public:
00507     StackItemType3(const StackItem &parent, const Rule &rule, int len, int curr,
00508                    int i, const Node &tree, const PseudoCatScope &scope,
00509                    int ruleno, const NextTokenConstraints &nextTokenConstraints)
00510       : m_parent(parent), m_rule(rule), m_len(len), m_curr(curr), m_i(i),
00511         m_tree(tree), m_scope(scope), m_ruleno(ruleno),
00512         m_nextTokenConstraints(nextTokenConstraints) {}
00513     virtual ~StackItemType3() {}
00514     virtual StackItemData *clone() {return new StackItemType3(*this);}
00515     virtual int type() const {return 3;}
00516     virtual void addParent(const StackItem &) {
00517       qFatal("cannot call addParent on type 3 item");
00518     }
00519     virtual void setParents(const QList<StackItem> &) {
00520       qFatal("cannot call setParents on type 3 item");
00521     }
00522     const StackItem &parent() const {return m_parent;}
00523     Rule rule() const {return m_rule;}
00524     int len() const {return m_len;}
00525     int curr() const {return m_curr;}
00526     int i() const {return m_i;}
00527     Node tree() const {return m_tree;}
00528     PseudoCatScope scope() const {return m_scope;}
00529     int ruleno() const {return m_ruleno;}
00530     NextTokenConstraints nextTokenConstraints() const {
00531       return m_nextTokenConstraints;
00532     }
00533   private:
00534     StackItem m_parent;
00535     Rule m_rule;
00536     int m_len, m_curr, m_i;
00537     Node m_tree;
00538     PseudoCatScope m_scope;
00539     int m_ruleno;
00540     NextTokenConstraints m_nextTokenConstraints;
00541 };
00542 
00544 
00545 class StackItemType4 : public UnifiedStackItemData {
00546   public:
00547     StackItemType4(const StackItem &parent, CatArg target, int pos, int len)
00548       : m_parent(parent), m_target(target), m_pos(pos), m_len(len) {}
00549     virtual ~StackItemType4() {}
00550     virtual StackItemData *clone() {return new StackItemType4(*this);}
00551     virtual int type() const {return 4;}
00552     virtual void addParent(const StackItem &) {
00553       qFatal("cannot call addParent on type 4 item");
00554     }
00555     virtual void setParents(const QList<StackItem> &) {
00556       qFatal("cannot call setParents on type 4 item");
00557     }
00558     const StackItem &parent() const {return m_parent;}
00559     Cat target() const {return m_target;}
00560     int pos() const {return m_pos;}
00561     int len() const {return m_len;}
00562   private:
00563     StackItem m_parent;
00564     Cat m_target;
00565     int m_pos, m_len;
00566 };
00567 
00570 class StackItemType5 : public StackItemData {
00571   public:
00572     StackItemType5(const StackItem &parent, CatArg cat,
00573                    const PseudoCatScope &scope)
00574       : m_parent(parent), m_cat(cat), m_scope(scope) {}
00575     virtual ~StackItemType5() {}
00576     virtual StackItemData *clone() {return new StackItemType5(*this);}
00577     virtual int type() const {return 5;}
00578     virtual void addParent(const StackItem &) {
00579       qFatal("cannot call addParent on type 5 item");
00580     }
00581     virtual void setParents(const QList<StackItem> &) {
00582       qFatal("cannot call setParents on type 5 item");
00583     }
00584     const StackItem &parent() const {return m_parent;}
00585     Cat cat() const {return m_cat;}
00586     PseudoCatScope scope() const {return m_scope;}
00587   private:
00588     StackItem m_parent;
00589     Cat m_cat;
00590     PseudoCatScope m_scope;
00591 };
00592 
00594 class StackItemType6 : public StackItemData {
00595   public:
00596     StackItemType6(const StackItem &parent, const QList<Node> &leaves, int i,
00597                    const Node &tree, const PseudoCatScope &scope,
00598                    const NextTokenConstraints &nextTokenConstraints)
00599       : m_parent(parent), m_leaves(leaves), m_i(i), m_tree(tree),
00600         m_scope(scope), m_nextTokenConstraints(nextTokenConstraints) {}
00601     virtual ~StackItemType6() {}
00602     virtual StackItemData *clone() {return new StackItemType6(*this);}
00603     virtual int type() const {return 6;}
00604     virtual void addParent(const StackItem &) {
00605       qFatal("cannot call addParent on type 6 item");
00606     }
00607     virtual void setParents(const QList<StackItem> &) {
00608       qFatal("cannot call setParents on type 6 item");
00609     }
00610     const StackItem &parent() const {return m_parent;}
00611     QList<Node> leaves() const {return m_leaves;}
00612     int i() const {return m_i;}
00613     Node tree() const {return m_tree;}
00614     PseudoCatScope scope() const {return m_scope;}
00615     NextTokenConstraints nextTokenConstraints() const {
00616       return m_nextTokenConstraints;
00617     }
00618   private:
00619     StackItem m_parent;
00620     QList<Node> m_leaves;
00621     int m_i;
00622     Node m_tree;
00623     PseudoCatScope m_scope;
00624     NextTokenConstraints m_nextTokenConstraints;
00625 };
00626 
00627 inline StackItem::StackItem(const QList<StackItem> &parents, CatArg cat,
00628                             CatArg effCat, int pos,
00629                             const PseudoCatScope &scope)
00630 {
00631   d = new StackItemType0(parents, cat, effCat, pos, scope);
00632 }
00633 
00634 inline StackItem::StackItem(const QList<StackItem> &parents, CatArg cat,
00635                             CatArg effCat, const PseudoCatScope &scope)
00636 {
00637   d = new StackItemType1(parents, cat, effCat, scope);
00638 }
00639 
00640 inline StackItem::StackItem(const StackItem &parent, int)
00641 {
00642   d = new StackItemType2(parent);
00643 }
00644 
00645 inline StackItem::StackItem(const StackItem &parent, const Rule &rule, int len,
00646                             int curr, int i, const Node &tree,
00647                             const PseudoCatScope &scope, int ruleno,
00648                             const NextTokenConstraints &nextTokenConstraints)
00649 {
00650   d = new StackItemType3(parent, rule, len, curr, i, tree, scope, ruleno,
00651                          nextTokenConstraints);
00652 }
00653 
00654 inline StackItem::StackItem(const StackItem &parent, CatArg target, int pos,
00655                             int len)
00656 {
00657   d = new StackItemType4(parent, target, pos, len);
00658 }
00659 
00660 inline StackItem::StackItem(const StackItem &parent, CatArg cat,
00661                             const PseudoCatScope &scope)
00662 {
00663   d = new StackItemType5(parent, cat, scope);
00664 }
00665 
00666 inline StackItem::StackItem(const StackItem &parent, const QList<Node> &leaves,
00667                             int i, const Node &tree,
00668                             const PseudoCatScope &scope,
00669                             const NextTokenConstraints &nextTokenConstraints)
00670 {
00671   d = new StackItemType6(parent, leaves, i, tree, scope, nextTokenConstraints);
00672 }
00673 
00674 
00675 // --- token source abstraction ---
00676 
00678 class AbstractLexerStateData : public QSharedData {
00679   public:
00680     virtual ~AbstractLexerStateData() {}
00681     virtual AbstractLexerStateData *clone() = 0;
00682 };
00683 
00684 class LexerState {
00685   public:
00686     LexerState() : d() {}
00687     LexerState(AbstractLexerStateData *data) : d(data) {}
00688     void clear() { d = 0; }
00689     bool isNull() const { return !d.constData(); }
00690     const AbstractLexerStateData *data() const { return d.constData(); }
00691     bool operator==(const LexerState &other) const { return d == other.d; }
00692   private:
00693     QSharedDataPointer<AbstractLexerStateData> d;
00694 };
00695 
00696 class TokenSource {
00697   public:
00698     TokenSource() : currPos(0) {}
00699     virtual ~TokenSource() {}
00702 
00705     Cat nextToken() {
00706       tree = Node();
00707       Cat result = readToken();
00708       if (!IS_EPSILON(result)) {
00709         // If the implementation of readToken didn't create a parse tree, create
00710         // the default one: a leaf node.
00711         if (tree.children.isEmpty()) tree = Node(result);
00712         currPos++;
00713       }
00714       return result;
00715     }
00717     Node parseTree() {
00718       return tree;
00719     }
00721 
00728     virtual bool matchParseTree(const Node &treeToMatch) {
00729       return treeToMatch.cat == tree.cat;
00730     }
00732     int currentPosition() {return currPos;}
00734 
00741     virtual bool rewindTo(int pos, const LexerState & = LexerState()) {
00742       tree = Node();
00743       return (pos == currPos);
00744     }
00746     virtual LexerState saveState() {return LexerState();}
00747   protected:
00749     virtual Cat readToken() = 0;
00751 
00752     bool simpleRewind(int pos) {
00753       tree = Node(); // destroy saved parse tree
00754       if (pos > currPos) // cannot rewind forward
00755         return false;
00756       currPos = pos;
00757       return true;
00758     }
00759     int currPos;
00761 
00767     Node tree;
00768 };
00769 
00770 class ListTokenSource : public TokenSource {
00771   public:
00772     ListTokenSource() : TokenSource() {}
00773     virtual ~ListTokenSource() {}
00774     QList<Cat> inputTokens;
00776     virtual bool rewindTo(int pos, const LexerState & = LexerState()) {
00777       return simpleRewind(pos);
00778     }
00779   protected:
00781     virtual Cat readToken() {
00782       if (currPos < inputTokens.size())
00783         return inputTokens.at(currPos);
00784       return Cat();
00785     }
00786 };
00787 
00789 
00792 struct TextPosition {
00793   TextPosition() : line(0), col(0) {}
00794   TextPosition(int l, int c) : line(l), col(c) {}
00795 
00796   void reset() { line = col = 0; }
00797 
00799   void countCharacter(unsigned char c) {
00800     // ignore CR
00801     if (c == '\r') return;
00802 
00803     if (c == '\n') { // LF
00804       line++;
00805       col = 0;
00806     } else col++; // horizontal character
00807   }
00808 
00809   int line; 
00810   int col; 
00811 };
00812 
00813 
00814 // --- data structures for PMCFGs (in standard representation) ---
00815 
00817 struct Term {
00819   Term() : arg(-1), component(0), token() {}
00820   Term(int a, int c) : arg(a), component(c), token() {}
00821   Term(CatArg t) : arg(-1), component(0), token(t) {}
00822   int arg, component;
00826   Cat token;
00827   bool isComponent() const {return arg >= 0;}
00828   bool isToken() const {return arg < 0;}
00830   bool operator==(const Term &other) const {
00831     return arg == other.arg && component == other.component
00832            && token == other.token;
00833   }
00834 };
00835 
00837 class Sequence : public QList<Term> {
00838   public:
00839     NextTokenConstraints nextTokenConstraints;
00840 
00841     Sequence() : QList<Term>(), nextTokenConstraints() {}
00842     explicit Sequence(const NextTokenConstraints &ntc)
00843       : QList<Term>(), nextTokenConstraints(ntc) {}
00844     explicit Sequence(const QList<Term> &list)
00845       : QList<Term>(list), nextTokenConstraints() {}
00846     Sequence(const QList<Term> &list, const NextTokenConstraints &ntc)
00847       : QList<Term>(list), nextTokenConstraints(ntc) {}
00848     Sequence &operator+=(const QList<Term> &other) {
00849       QList<Term>::operator+=(other);
00850       return *this;
00851     }
00852     Sequence &operator+=(const Term &value) {
00853       QList<Term>::operator+=(value);
00854       return *this;
00855     }
00856     Sequence &operator<<(const QList<Term> &other) {
00857       QList<Term>::operator<<(other);
00858       return *this;
00859     }
00860     Sequence &operator<<(const Term &value) {
00861       QList<Term>::operator<<(value);
00862       return *this;
00863     }
00865     void add(const Term &value) {
00866       append(value);
00867     }
00869     QList<Term> &toList() {
00870       return (QList<Term> &) *this;
00871     }
00872 };
00873 
00875 class Function: public QList<Sequence> {
00876   public:
00877     Function() : QList<Sequence>() {}
00878     explicit Function(const QList<Sequence> &list) : QList<Sequence>(list) {}
00879     Function &operator+=(const QList<Sequence> &other) {
00880       QList<Sequence>::operator+=(other);
00881       return *this;
00882     }
00883     Function &operator+=(const Sequence &value) {
00884       QList<Sequence>::operator+=(value);
00885       return *this;
00886     }
00887     Function &operator<<(const QList<Sequence> &other) {
00888       QList<Sequence>::operator<<(other);
00889       return *this;
00890     }
00891     Function &operator<<(const Sequence &value) {
00892       QList<Sequence>::operator<<(value);
00893       return *this;
00894     }
00896     void add(const Sequence &value) {
00897       append(value);
00898     }
00900     QList<Sequence> &toList() {
00901       return (QList<Sequence> &) *this;
00902     }
00903 };
00904 
00906 struct Pmcfg {
00908 
00909 
00910 
00913   QList<Function> functions;
00915 
00932   RuleSet rules;
00934 
00935   TokenSet tokens;
00937 
00938   Cat startCat;
00940 
00947   RuleSet cfRules;
00949 
00951 
00952   QHash<int, QString> functionNames;
00953   QHash<QString, int> functionIndices;
00954   void addFunction(const QString &name, const Function &function) {
00955     int index = functions.size();
00956     functions.append(function);
00957     functionNames.insert(index, name);
00958     functionIndices.insert(name, index);
00959   }
00960   Function lookupFunction(const QVariant &id) const {
00961     if (id.type() == QVariant::Int) return functions.at(id.toInt());
00962 
00963     QString name = id.toString();
00964     if (!functionIndices.contains(name)) return Function();
00965     return functions.at(functionIndices.value(name));
00966   }
00968 };
00969 
00971 struct PmcfgComponentInfo {
00972   PmcfgComponentInfo() {}
00973   PmcfgComponentInfo(const Rule &rule)
00974     : pmcfgRule(rule), argPositions(rule.size()) {}
00975   Rule pmcfgRule;
00976   QVector<QVector<int> > argPositions; 
00980 };
00981 
00982 Node parseTreeToPmcfgSyntaxTree(const Node &parseTree);
00983 
00984 
00985 // --- parse state struct, for bindings ---
00986 
00988 struct ParseState {
00989   ParseState()
00990     : errorPos(-1), errorToken(), incrementalPos(-1), incrementalStacks(),
00991       incrementalMatches(), lexerState() {}
00992   // explicitly implement the default copy constructor to get it bound
00993   ParseState(const ParseState &other)
00994     : errorPos(other.errorPos), errorToken(other.errorToken),
00995       incrementalPos(other.incrementalPos),
00996       incrementalStacks(other.incrementalStacks),
00997       incrementalMatches(other.incrementalMatches), lexerState(other.lexerState)
00998       {}
00999 
01000   int errorPos;
01001   Cat errorToken;
01002   int incrementalPos;
01003   QList<StackItem> incrementalStacks;
01004   QList<Match> incrementalMatches;
01005   LexerState lexerState;
01006 
01007   void reset() { *this = ParseState(); }
01008 };
01009 
01010 
01011 // --- main class ---
01012 
01014 class Parser {
01015   public:
01016     // interface methods
01017     Parser(TokenSource *tokenSource)
01018       : inputSource(tokenSource), lastGeneratedCat(0) {}
01019     virtual ~Parser() {}
01020     bool isToken(CatArg cat) const {return tokens.contains(cat);}
01021     void addToken(CatArg cat) {tokens.insert(cat);}
01022     bool isLiteral(const QList<Cat> &list) const;
01023     void initCaches();
01024     void addRule(CatArg cat, const Rule &rule);
01025     void loadCfg(const Cfg &cfg) {
01026       rules = cfg.rules;
01027       tokens = cfg.tokens;
01028       startCat = cfg.startCat;
01029       initCaches();
01030     }
01031     bool loadPmcfg(const Pmcfg &pmcfg);
01032     bool addPmcfgRule(Pmcfg &pmcfg, CatArg cat, const Rule &rule);
01033     QList<Match> parse(int *errorPos = 0, Cat *errorToken = 0,
01034                        int *incrementalPos = 0,
01035                        QList<StackItem> *incrementalStacks = 0,
01036                        QList<Match> *incrementalMatches = 0,
01037                        LexerState *lexerState = 0);
01039     QList<Match> parse(ParseState *parseState) {
01040       return parse(&parseState->errorPos, &parseState->errorToken,
01041                    &parseState->incrementalPos, &parseState->incrementalStacks,
01042                    &parseState->incrementalMatches, &parseState->lexerState);
01043     }
01044     Predictions computePredictions(const QList<StackItem> &stacks) const;
01046     Predictions computePredictions(const ParseState &parseState) const {
01047       return computePredictions(parseState.incrementalStacks);
01048     }
01049     QHash<Cat, QSet<Cat> > expandNonterminalPrediction(CatArg cat) const;
01050     QHash<Cat, QSet<Cat> > expandNonterminalPredictionC(CatArg cat);
01051     MultiPredictions computeMultiPredictions(const QList<StackItem> &stacks)
01052       const;
01054     MultiPredictions computeMultiPredictions(const ParseState &parseState) const
01055     {
01056       return computeMultiPredictions(parseState.incrementalStacks);
01057     }
01059 
01065     QHash<QList<Cat>, QList<MultiPrediction> >
01066       computeMultiPredictionsJava(const ParseState &parseState) const
01067     {
01068       return multiHashToListHash(computeMultiPredictions(parseState));
01069     }
01070     QHash<Cat, QSet<QList<Cat> > >
01071       expandNonterminalPredictionMulti(CatArg cat) const;
01072     QHash<Cat, QSet<QList<Cat> > >
01073       expandNonterminalPredictionMultiC(CatArg cat);
01074     ConstrainedPredictions computeConstrainedPredictions(
01075       const QList<StackItem> &stacks) const;
01077     ConstrainedPredictions computeConstrainedPredictions(
01078       const ParseState &parseState) const {
01079       return computeConstrainedPredictions(parseState.incrementalStacks);
01080     }
01082 
01088     QHash<Cat, QList<NextTokenConstraints> >
01089       computeConstrainedPredictionsJava(const ParseState &parseState) const
01090     {
01091       return multiHashToListHash(computeConstrainedPredictions(parseState));
01092     }
01093     QHash<Cat, QSet<Cat> > expandNonterminalPredictionC(CatArg cat,
01094       const NextTokenConstraints &nextTokenConstraints);
01095     QHash<Cat, QSet<Cat> > expandNonterminalPredictionC(CatArg cat,
01096       const QList<NextTokenConstraints> &nextTokenConstraintsList);
01097     ConstrainedMultiPredictions computeConstrainedMultiPredictions(
01098       const QList<StackItem> &stacks) const;
01100     ConstrainedMultiPredictions computeConstrainedMultiPredictions(
01101       const ParseState &parseState) const
01102     {
01103       return computeConstrainedMultiPredictions(parseState.incrementalStacks);
01104     }
01106 
01112     QHash<QList<Cat>, QList<ConstrainedMultiPrediction> >
01113       computeConstrainedMultiPredictionsJava(const ParseState &parseState) const
01114     {
01115       return multiHashToListHash(computeConstrainedMultiPredictions(
01116                                    parseState));
01117     }
01118     QHash<Cat, QSet<QList<Cat> > >
01119       expandNonterminalPredictionMultiC(CatArg cat,
01120       const NextTokenConstraints &nextTokenConstraints);
01121     QHash<Cat, QSet<QList<Cat> > >
01122       expandNonterminalPredictionMultiC(CatArg cat,
01123       const QList<NextTokenConstraints> &nextTokenConstraintsList);
01124 
01126 
01127 
01128 
01131     RuleSet rules;
01133     TokenSet tokens;
01135     Cat startCat;
01137 
01139 
01140 
01141 
01161     QHash<Cat, QPair<Cat, QList<Cat> > > pseudoCats;
01164 
01167     QHash<Cat, QPair<Cat, int> > componentCats;
01169 
01173     QHash<Cat, QList<Cat> > catComponents;
01175 
01176   protected:
01178     TokenSource *inputSource;
01179 
01180   private:
01181     // helper methods
01182     Cat effectiveCat(CatArg cat) const;
01183     void processRule(CatArg cat, const Rule &rule, int skip, int ruleno,
01184                      QQueue<Cat> &nullableQueue, bool &clearEpsilonMatches);
01185     bool computePmcfgDimension(CatArg cat, const Rule &rule,
01186                                const Pmcfg &pmcfg);
01187     bool convertPmcfgRule(CatArg cat, const Rule &rule, const Pmcfg &pmcfg,
01188                           bool updateCaches);
01189     bool reachable(CatArg cat, CatArg target, QSet<Cat> mark);
01190     QList<FullRule> neighborhood(CatArg cat, CatArg target);
01191     void finalizeMatches(QList<Match> &matches, CatArg cat,
01192                          const PseudoCatScope &scope);
01193     void copyScope(QList<Match> &matches, const PseudoCatScope &scope);
01194     QList<Match> matchCFToEpsilon(CatArg cat, QSet<Cat> mark);
01195     QList<Match> matchEffectiveCatToEpsilon(CatArg cat, QSet<Cat> mark);
01196     QList<Match> matchToEpsilonRecurse(CatArg cat, QSet<Cat> mark,
01197                                        const PseudoCatScope &scope);
01198     QList<Match> matchToEpsilon(CatArg cat, const PseudoCatScope &scope);
01199     bool matchesTokenRecurse(CatArg cat, CatArg token, QSet<Cat> mark) const;
01200     bool matchesToken(CatArg cat, CatArg token) const;
01201     void collectLeaves(const Node &tree, QList<Node> &leaves);
01202     bool validateNextTokenConstraints(CatArg token,
01203       const NextTokenConstraints &nextTokenConstraints) const;
01204     QList<Match> match(CatArg cat, int pos, const PseudoCatScope &scope,
01205                        const StackItem &stack,
01206                        const NextTokenConstraints &nextTokenConstraints);
01207     QList<Match> matchRemaining(const Rule &rule, int len, int curr, int i,
01208                                 const Node &tree, const PseudoCatScope &scope,
01209                                 int ruleno, const StackItem &stack,
01210                                 const NextTokenConstraints
01211                                   &nextTokenConstraints);
01212     Cat findFirstToken(const Node &tree);
01213     void unify(QList<Match> &matches);
01214     QList<Match> reduce(CatArg cat, CatArg target, int pos, int len,
01215                         const Node &tree, const StackItem &stack,
01216                         const PseudoCatScope &scope, int ruleno,
01217                         const NextTokenConstraints &nextTokenConstraints,
01218                         QSet<Cat> mark = QSet<Cat>());
01219     QList<Match> processStackItem(const StackItem &item,
01220                                   const QList<Match> &matches);
01221     QList<Match> processStack(const StackItem &stack, CatArg token);
01222     void computeUsageCounts(const StackItem &item);
01223     bool shift(int pos);
01224     void expandNonterminalPredictionRecurse(CatArg cat,
01225                                             QHash<Cat, QSet<Cat> > &result,
01226                                             QSet<Cat> &mark) const;
01227     void expandNonterminalPredictionRecurseC(CatArg cat,
01228                                              QHash<Cat, QSet<Cat> > &result,
01229                                              QSet<Cat> mark, int ruleno,
01230                                              const PseudoCatScope &scope,
01231                                              const NextTokenConstraints
01232                                                &nextTokenConstraints);
01233     void expandNonterminalPredictionMultiRecurse(CatArg cat,
01234       QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> &mark) const;
01235     void expandNonterminalPredictionMultiRecurseC(CatArg cat,
01236       QHash<Cat, QSet<QList<Cat> > > &result, QSet<Cat> mark, int ruleno,
01237       const PseudoCatScope &scope,
01238       const NextTokenConstraints &nextTokenConstraints);
01239     int generateCat() { return --lastGeneratedCat; }
01240 
01242     QMultiHash<Cat, FullRule> initialGraph;
01244     QHash<QPair<Cat, Cat>, QList<FullRule> > neighborhoods;
01246     QSet<Cat> nullable;
01248     QHash<Cat, QList<Match> > epsilonMatches;
01249 
01251     QList<StackItem> nextStacks;
01253     QList<Match> currentMatches;
01255     LexerState currentLexerState;
01257     QHash<Cat, int> type0Indices;
01259 
01263     QHash<const StackItemData *, QList<Match> > collectedMatches;
01265     int errPos;
01267     Cat errToken;
01269     int lastGeneratedCat;
01270 };
01271 
01272 } // end namespace
01273 
01274 
01275 // --- utility functions in the global namespace ---
01276 
01277 Q_DECLARE_METATYPE(DynGenPar::PmcfgComponentInfo)
01278 
01279 uint qHash(const QList<DynGenPar::Cat> &list);
01280 
01281 template<>
01282   inline DynGenPar::StackItemData
01283     *QSharedDataPointer<DynGenPar::StackItemData>::clone()
01284 {
01285   return d->clone();
01286 }
01287 
01288 template<>
01289   inline DynGenPar::AbstractLexerStateData
01290     *QSharedDataPointer<DynGenPar::AbstractLexerStateData>::clone()
01291 {
01292   return d->clone();
01293 }
01294 
01295 #endif // DYNGENPAR_H
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines