DynGenPar
Dynamic Generalized Parser
pgf.cpp
Go to the documentation of this file.
1 /* DynGenPar: Dynamic Generalized Parser - PGF import
2  Copyright (C) 2010-2012 Kevin Kofler <kevin.kofler@chello.at>
3  Copyright (C) 2014-2016 DAGOPT Optimization Technologies GmbH
4  written by Kevin Kofler <kofler@dagopt.com>
5 
6  Support by the Austrian Science Fund FWF under contract numbers
7  P20631 and P23554 is gratefully acknowledged.
8 
9  This program is free software: you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 2 of the License, or
12  (at your option) any later version.
13 
14  This program is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 
22 #include "pgf.h"
23 
24 #include <cstdio>
25 #include <cmath>
26 #include <cctype>
27 #include <QIODevice>
28 #include <QFile>
29 #include <QBuffer>
30 
31 namespace DynGenPar {
32 
34 namespace PgfInternals {
35 
36 // Convenience macro to avoid copying&pasting namespace boilerplate
37 #define PGFINTERNALS_DECLARE_TYPEINFO(type, typeclass) \
38  } /* end namespace PgfInternals */ \
39  DYNGENPAR_DECLARE_TYPEINFO(PgfInternals::type, typeclass); \
40  namespace PgfInternals {
41 
45 class HaskellDataStream {
46  public:
47  enum Status {Ok, ReadPastEnd, ReadCorruptData, IOError};
48  HaskellDataStream(QIODevice *d) : iodevice(d), m_status(Ok), m_version(0),
49  m_minorVersion(0) {}
50  bool atEnd() const {return iodevice->atEnd();}
51  QIODevice *device() const {return iodevice;}
52  void setStatus (Status status) {m_status = status;}
53  Status status() const {return m_status;}
54 
55  void readVersion();
56  unsigned short version() {return m_version;}
57  unsigned short minorVersion() {return m_minorVersion;}
58 
59  HaskellDataStream &operator>>(bool &b);
60  HaskellDataStream &operator>>(char &c);
61  HaskellDataStream &operator>>(int &i);
62  HaskellDataStream &operator>>(double &d);
63  HaskellDataStream &operator>>(QByteArray &bytes);
64  HaskellDataStream &operator>>(QString &s);
65 
66  private:
67  char nextChar();
68  unsigned char nextUChar() {return (unsigned char) nextChar();}
69 
70  QIODevice *iodevice;
71  Status m_status;
72 
73  unsigned short m_version;
74  unsigned short m_minorVersion;
75 };
76 PGFINTERNALS_DECLARE_TYPEINFO(HaskellDataStream, Q_MOVABLE_TYPE);
77 PGFINTERNALS_DECLARE_TYPEINFO(HaskellDataStream::Status,
78  Q_PRIMITIVE_TYPE);
79 
80 char HaskellDataStream::nextChar()
81 {
82  if (m_status != Ok) return 0;
83  if (iodevice->atEnd()) {
84  m_status = ReadPastEnd;
85  return 0;
86  }
87  char c;
88  if (!iodevice->getChar(&c)) {
89  m_status = IOError;
90  return 0;
91  }
92  return c;
93 }
94 
96 void HaskellDataStream::readVersion()
97 {
98  unsigned char c1 = nextUChar();
99  unsigned char c2 = nextUChar();
100  m_version = (c1 << 8) + c2;
101  if (m_version != 1 && m_version != 2 && m_status == Ok)
102  m_status = ReadCorruptData;
103  c1 = nextUChar();
104  c2 = nextUChar();
105  m_minorVersion = (c1 << 8) + c2;
106  if (((m_version == 1 && m_minorVersion)
107  || (m_version == 2 && m_minorVersion > 1))
108  && m_status == Ok) m_status = ReadCorruptData;
109 }
110 
111 HaskellDataStream &HaskellDataStream::operator>>(bool &b)
112 {
113  unsigned char c = nextUChar();
114  if (c > 1) {
115  m_status = ReadCorruptData;
116  b = false;
117  return *this;
118  }
119 
120  b = (bool) c;
121  return *this;
122 }
123 
124 HaskellDataStream &HaskellDataStream::operator>>(char &c)
125 {
126  c = nextChar();
127  return *this;
128 }
129 
130 HaskellDataStream &HaskellDataStream::operator>>(int &i)
131 {
132  unsigned char c = nextUChar();
133  unsigned u = c & 0x7Fu;
134  unsigned shift = 0;
135 
136  while (c & 0x80u) {
137  c = nextUChar();
138  u |= (c & 0x7Fu) << (shift += 7u);
139  }
140 
141  i = (int) u;
142 
143  return *this;
144 }
145 
146 HaskellDataStream &HaskellDataStream::operator>>(double &d)
147 {
148  // read an IEEE 754 big-endian double-precision floating point number
149  unsigned char c1 = nextUChar();
150  unsigned char c2 = nextUChar();
151  bool negative = (c1 & 0x80);
152  int exponent = (((c1 & 0x7f) << 4) + (c2 >> 4));
153  unsigned char c3 = nextUChar();
154  unsigned char c4 = nextUChar();
155  unsigned char c5 = nextUChar();
156  unsigned char c6 = nextUChar();
157  unsigned char c7 = nextUChar();
158  unsigned char c8 = nextUChar();
159  if (!exponent) {
160  d = 0.;
161  return *this;
162  }
163  exponent -= (1023 + 52);
164  unsigned long long mantissa = (1ull << 52)
165  + ((unsigned long long) (c2 & 0xf) << 48)
166  + ((unsigned long long) c3 << 40)
167  + ((unsigned long long) c4 << 32)
168  + ((unsigned long long) c5 << 24)
169  + ((unsigned long long) c6 << 16)
170  + ((unsigned long long) c7 << 8)
171  + (unsigned long long) c8;
172 
173  d = std::ldexp((double) mantissa, exponent);
174  if (negative) d = -d;
175 
176  return *this;
177 }
178 
179 HaskellDataStream &HaskellDataStream::operator>>(QByteArray &bytes)
180 {
181  bytes = QByteArray();
182 
183  int len;
184  operator>>(len);
185 
186  if (len < 0) {
187  m_status = ReadCorruptData;
188  return *this;
189  }
190 
191  for (int i=0; i<len; i++) {
192  bytes.append(nextChar());
193  }
194 
195  return *this;
196 }
197 
198 HaskellDataStream &HaskellDataStream::operator>>(QString &s)
199 {
200  int len;
201  operator>>(len);
202 
203  if (len < 0) {
204  m_status = ReadCorruptData;
205  s = QString();
206  return *this;
207  }
208 
209  /* We need to count Unicode codepoints for the UTF-8 bytes read.
210  Why don't they put a byte count in the serialization format??? */
211  QByteArray bytes;
212  for (int i=0; i<len; i++) {
213  char c = nextChar();
214  bytes.append(c);
215  if (c >= 0) continue;
216  if (!(c & 0x40)) {
217  m_status = ReadCorruptData;
218  s = QString();
219  return *this;
220  }
221  int n = 1;
222  if (c & 0x20) {
223  n++;
224  if (c & 0x10) {
225  n++;
226  if (c & 8) {
227  m_status = ReadCorruptData;
228  s = QString();
229  return *this;
230  }
231  }
232  }
233  for (int j=0; j<n; j++) {
234  c = nextChar();
235  if ((c & 0xc0) != 0x80) {
236  m_status = ReadCorruptData;
237  s = QString();
238  return *this;
239  }
240  bytes.append(c);
241  }
242  }
243 
244  s = QString::fromUtf8(bytes);
245 
246  return *this;
247 }
248 
250 HaskellDataStream &operator>>(HaskellDataStream &stream, QVariant &v)
251 {
252  char type;
253  stream >> type;
254 
255  switch (type) {
256  case 0: // string
257  {
258  QString s;
259  stream >> s;
260  v = s;
261  }
262  break;
263  case 1: // int
264  {
265  int i;
266  stream >> i;
267  v = i;
268  }
269  break;
270  case 2: // double
271  {
272  double d;
273  stream >> d;
274  v = d;
275  }
276  break;
277  default:
278  stream.setStatus(HaskellDataStream::ReadCorruptData);
279  v = QVariant();
280  break;
281  };
282 
283  return stream;
284 }
285 
286 template <typename T1, typename T2> HaskellDataStream
287  &operator>>(HaskellDataStream &stream, QPair<T1, T2> &p)
288 {
289  return (stream >> p.first >> p.second);
290 }
291 
292 template <typename T> HaskellDataStream &operator>>(HaskellDataStream &stream,
293  QList<T> &l)
294 {
295  int n;
296  stream >> n;
297 
298  if (n < 0) {
299  stream.setStatus(HaskellDataStream::ReadCorruptData);
300  l = QList<T>();
301  return stream;
302  }
303 
304  for (int i=0; i<n; i++) {
305  T entry;
306  stream >> entry;
307  l.append(entry);
308  }
309 
310  return stream;
311 }
312 
313 template <typename T1, typename T2> HaskellDataStream
314  &operator>>(HaskellDataStream &stream, QHash<T1, T2> &h)
315 {
316  int n;
317  stream >> n;
318 
319  if (n < 0) {
320  stream.setStatus(HaskellDataStream::ReadCorruptData);
321  h = QHash<T1, T2>();
322  return stream;
323  }
324 
325  for (int i=0; i<n; i++) {
326  T1 key;
327  stream >> key;
328  T2 value;
329  stream >> value;
330  h.insert(key, value);
331  }
332 
333  return stream;
334 }
335 
336 struct Type {
337  // dummy structure, we don't need abstract types
338 };
339 PGFINTERNALS_DECLARE_TYPEINFO(Type, Q_PRIMITIVE_TYPE);
340 
341 HaskellDataStream &operator>>(HaskellDataStream &stream, Type &dummy);
342 
343 struct Hypo {
344  // dummy structure, we don't need abstract types
345 };
346 PGFINTERNALS_DECLARE_TYPEINFO(Hypo, Q_PRIMITIVE_TYPE);
347 
348 HaskellDataStream &operator>>(HaskellDataStream &stream, Hypo &dummy);
349 
350 struct Expr {
351  // dummy structure, we don't need abstract types
352 };
353 PGFINTERNALS_DECLARE_TYPEINFO(Expr, Q_PRIMITIVE_TYPE);
354 
355 HaskellDataStream &operator>>(HaskellDataStream &, Expr &)
356 {
357  qFatal("dependent types not supported");
358 }
359 
360 HaskellDataStream &operator>>(HaskellDataStream &stream, Type &)
361 {
362  QList<Hypo> hypos;
363  QByteArray cat;
364  QList<Expr> exps;
365  return (stream >> hypos >> cat >> exps);
366 }
367 
368 HaskellDataStream &operator>>(HaskellDataStream &stream, Hypo &)
369 {
370  bool bindtype;
371  QByteArray cat;
372  Type type;
373  return (stream >> bindtype >> cat >> type);
374 }
375 
376 struct Equation {
377  // dummy structure, we don't need abstract types
378 };
379 PGFINTERNALS_DECLARE_TYPEINFO(Equation, Q_PRIMITIVE_TYPE);
380 
381 HaskellDataStream &operator>>(HaskellDataStream &, Equation &)
382 {
383  qFatal("dependent types not supported");
384 }
385 
386 struct AbsFun {
387  // dummy structure, we don't need abstract types
388 };
389 PGFINTERNALS_DECLARE_TYPEINFO(AbsFun, Q_PRIMITIVE_TYPE);
390 
391 HaskellDataStream &operator>>(HaskellDataStream &stream, AbsFun &)
392 {
393  Type type;
394  int arity;
395  bool haveEquations;
396  stream >> type >> arity >> haveEquations;
397  if (haveEquations) {
398  QList<Equation> equations;
399  stream >> equations;
400  }
401  double probability;
402  return (stream >> probability);
403 }
404 
405 struct AbsCat {
406  QList<Hypo> context;
408 };
409 PGFINTERNALS_DECLARE_TYPEINFO(AbsCat, Q_MOVABLE_TYPE);
410 
411 HaskellDataStream &operator>>(HaskellDataStream &stream, AbsCat &absCat)
412 {
413  if (stream.version() >= 2) {
414  double probability; // ignored
415  return (stream >> absCat.context >> absCat.functions >> probability);
416  } else return (stream >> absCat.context >> absCat.functions);
417 }
418 
419 struct Symbol {
420  enum SymType {Cat, Lit, Var, KS, KP, BIND, SOFT_BIND, NE, SOFT_SPACE, CAPIT,
421  ALL_CAPIT, Invalid = -1};
422  SymType symtype;
423  int n, l;
424  QStringList tokens;
426 };
427 PGFINTERNALS_DECLARE_TYPEINFO(Symbol, Q_MOVABLE_TYPE);
428 PGFINTERNALS_DECLARE_TYPEINFO(Symbol::SymType, Q_PRIMITIVE_TYPE);
429 
430 HaskellDataStream &operator>>(HaskellDataStream &stream, Symbol &symbol)
431 {
432  char symtype;
433  stream >> symtype;
434  switch (symtype) {
435  case (int) Symbol::Cat:
436  case (int) Symbol::Lit:
437  case (int) Symbol::Var:
438  stream >> symbol.n >> symbol.l;
439  break;
440  case (int) Symbol::KS:
441  if (stream.version() >= 2) {
442  QString token;
443  stream >> token;
444  symbol.tokens = QStringList(token);
445  } else stream >> symbol.tokens;
446  break;
447  case (int) Symbol::KP:
448  if (stream.version() >= 2) {
449  QList<Symbol> symbols;
450  typedef QPair<QList<Symbol>, QStringList> SymAlternative;
451  QList<SymAlternative> symAlternatives;
452  stream >> symbols >> symAlternatives;
453  symbol.tokens.clear();
454  foreach (const Symbol &sym, symbols) {
455  if (sym.symtype == Symbol::KS) {
456  symbol.tokens << sym.tokens;
457  } else {
458  qFatal("non-token symbols in pre prefixes not supported");
459  }
460  }
461  symbol.alternatives.clear();
462  foreach (const SymAlternative &symAlternative, symAlternatives) {
463  QPair<QStringList, QStringList> alternative;
464  foreach (const Symbol & sym, symAlternative.first) {
465  switch (sym.symtype) {
466  case (int) Symbol::KS:
467  alternative.first << sym.tokens;
468  break;
469  case (int) Symbol::BIND:
470  alternative.first << PreludeBind;
471  break;
472  case (int) Symbol::SOFT_BIND:
473  case (int) Symbol::SOFT_SPACE:
474  case (int) Symbol::CAPIT:
475  case (int) Symbol::ALL_CAPIT:
476  // ignore
477  break;
478  default:
479  qFatal("non-token, non-bind symbols in pre not supported");
480  }
481  }
482  alternative.second = symAlternative.second;
483  symbol.alternatives << alternative;
484  }
485  } else stream >> symbol.tokens >> symbol.alternatives;
486  break;
487  case (int) Symbol::BIND:
488  case (int) Symbol::SOFT_BIND:
489  case (int) Symbol::NE:
490  if (stream.version() >= 2) break; else goto corrupt;
491  case (int) Symbol::SOFT_SPACE:
492  case (int) Symbol::CAPIT:
493  case (int) Symbol::ALL_CAPIT:
494  if (stream.version() > 2
495  || (stream.version() == 2 && stream.minorVersion() >= 1)) break;
496  // else fall through
497  default:
498  corrupt:
499  stream.setStatus(HaskellDataStream::ReadCorruptData);
500  symtype = -1;
501  break;
502  };
503  symbol.symtype = (Symbol::SymType) symtype;
504 
505  return stream;
506 }
507 
508 struct CncFun {
509  QByteArray fun;
510  QList<int> lins;
511 };
512 PGFINTERNALS_DECLARE_TYPEINFO(CncFun, Q_MOVABLE_TYPE);
513 
514 HaskellDataStream &operator>>(HaskellDataStream &stream, CncFun &cncfun)
515 {
516  return (stream >> cncfun.fun >> cncfun.lins);
517 }
518 
519 struct PArg {
520  // QList<QPair<int, int> > hypos; // dependent types
521  int fid;
522 };
523 PGFINTERNALS_DECLARE_TYPEINFO(PArg, Q_PRIMITIVE_TYPE);
524 
525 HaskellDataStream &operator>>(HaskellDataStream &stream, PArg &parg)
526 {
527  int numHypos;
528  stream >> numHypos;
529  if (numHypos)
530  qFatal("dependent types not supported");
531 
532  return (stream >> parg.fid);
533 }
534 
535 struct Production {
536  bool isCoercion;
537  int id; // of the function for an apply, of the category for a coercion
538  QList<PArg> args;
539 };
540 PGFINTERNALS_DECLARE_TYPEINFO(Production, Q_MOVABLE_TYPE);
541 
542 HaskellDataStream &operator>>(HaskellDataStream &stream, Production &production)
543 {
544  stream >> production.isCoercion >> production.id;
545 
546  if (!production.isCoercion)
547  stream >> production.args;
548 
549  return stream;
550 }
551 
552 struct CncCat {
553  int s, e;
554  QStringList labels;
555 };
556 PGFINTERNALS_DECLARE_TYPEINFO(CncCat, Q_MOVABLE_TYPE);
557 
558 HaskellDataStream &operator>>(HaskellDataStream &stream, CncCat &cnccat)
559 {
560  return (stream >> cnccat.s >> cnccat.e >> cnccat.labels);
561 }
562 
563 struct Concr {
564  QHash<QByteArray, QVariant> cflags;
565  QHash<QByteArray, QString> printnames;
566  QList<QList<Symbol> > sequences;
567  QList<CncFun> cncfuns;
568  QHash<int, QList<int> > lindefs;
569  QHash<int, QList<Production> > productions;
570  QHash<QByteArray, CncCat> cnccats;
571  int totalCats;
572 };
573 PGFINTERNALS_DECLARE_TYPEINFO(Concr, Q_MOVABLE_TYPE);
574 
575 HaskellDataStream &operator>>(HaskellDataStream &stream, Concr &concrete)
576 {
577  if (stream.version() >= 2) {
578  QHash<int, QList<int> > linrefs; // ignored
579  return (stream >> concrete.cflags >> concrete.printnames
580  >> concrete.sequences >> concrete.cncfuns >> concrete.lindefs
581  >> linrefs >> concrete.productions >> concrete.cnccats
582  >> concrete.totalCats);
583  } else {
584  return (stream >> concrete.cflags >> concrete.printnames
585  >> concrete.sequences >> concrete.cncfuns >> concrete.lindefs
586  >> concrete.productions >> concrete.cnccats
587  >> concrete.totalCats);
588  }
589 }
590 
591 } // end namespace PgfInternals
593 namespace {} // dummy anonymous namespace to capture Doxygen comments
594 
596 
602 Pgf::Pgf(const QString &fileName, const QString &concreteName)
603 {
604  using namespace PgfInternals;
605  /* negative categories and categories 0 and 1 are reserved, so we add
606  CAT_OFFSET to all GF category IDs */
607  static const int CAT_OFFSET = 6;
608 
609  // deserialize the Haskell byte stream
610  QFile file(fileName);
611  file.open(QIODevice::ReadOnly);
612  HaskellDataStream stream(&file);
613 
614 #define CHECK_STATUS() if (stream.status() != HaskellDataStream::Ok) \
615  qFatal("invalid PGF file or wrong version of GF")
616 
617  stream.readVersion();
618  CHECK_STATUS();
619 
620  QHash<QByteArray, QVariant> gflags;
621  stream >> gflags;
622  CHECK_STATUS();
623 
624  QByteArray absname;
625  stream >> absname;
626  CHECK_STATUS();
627 
628  QHash<QByteArray, QVariant> aflags;
629  stream >> aflags;
630  CHECK_STATUS();
631 
632  QHash<QByteArray, AbsFun> funs;
633  stream >> funs;
634  CHECK_STATUS();
635 
636  QHash<QByteArray, AbsCat> cats;
637  stream >> cats;
638  CHECK_STATUS();
639 
640  QHash<QByteArray, Concr> concretes;
641  stream >> concretes;
642  CHECK_STATUS();
643 
644  if (!stream.atEnd()) qFatal("invalid PGF file or wrong version of GF");
645 
646 #undef CHECK_STATUS
647 
648  if (concretes.isEmpty())
649  qFatal("invalid PGF file or wrong version of GF");
650 
651  Concr concrete;
652  if (concreteName.isEmpty()) {
653  if (concretes.size() > 1)
654  qFatal("must specify the name of the concrete grammar");
655  QHashIterator<QByteArray, Concr> it(concretes);
656  concrete = it.next().value();
657  } else {
658  QByteArray concreteName8Bit = concreteName.toLocal8Bit();
659  if (!concretes.contains(concreteName8Bit))
660  qFatal("concrete grammar '%s' not found in PGF file",
661  concreteName8Bit.data());
662  concrete = concretes.value(concreteName8Bit);
663  }
664 
665  // find the start category
666  if (!aflags.contains("startcat")) qFatal("no start category specified");
667  QByteArray startcatName = aflags.value("startcat").toString().toLocal8Bit();
668  CncCat startcat = concrete.cnccats.value(startcatName);
669  if (startcat.s != startcat.e)
670  qFatal("start category '%s' has parameters", startcatName.data());
671  pmcfg.startCat = startcat.s + CAT_OFFSET;
672 
673  // fetch category and component names
674  catNames << "EPSILON" << "LEX_ERROR";
675  while (catNames.size() < concrete.totalCats + CAT_OFFSET)
676  catNames.append(QString());
677  {
678  QHashIterator<QByteArray, CncCat> it(concrete.cnccats);
679  while (it.hasNext()) {
680  CncCat cnccat = it.next().value();
681  QString cat = QString::fromLocal8Bit(it.key());
682  for (int i=cnccat.s; i<=cnccat.e; i++)
683  catNames[i + CAT_OFFSET] = cat;
684  componentNames.insert(cat, cnccat.labels);
685  }
686  }
687 
688  // mark the special GF categories as tokens
689  for (int i=2; i<CAT_OFFSET; i++) pmcfg.tokens.insert(i);
690 
691  // convert the sequences and tokens
692  QList<Sequence> sequences;
693  typedef QPair<QString, int> PrefixMatch;
694  typedef QPair<int, QList<PrefixMatch> > PrefixInfo;
695  QList<PrefixInfo> prefixInfo;
696  int nonExistCat = 0; // dummy category generated for Prelude.nonExist
697  foreach (const QList<Symbol> &gfsequence, concrete.sequences) {
698  Sequence sequence;
699  bool isSuffix = false;
700  foreach (const Symbol &symbol, gfsequence) {
701  switch (symbol.symtype) {
702  case Symbol::Cat: // category
703  case Symbol::Lit: // literal
704  case Symbol::Var: // variable
705  if (isSuffix) {
706  qWarning("ignoring unsupported use of Prelude.Bind (only suffix "
707  "tokens supported)");
708  isSuffix = false;
709  }
710  sequence.append(Term(symbol.n, symbol.l));
711  break;
712  case Symbol::KS: // string of tokens
713  {
714  QStringList tokens = symbol.tokens;
715  foreach (const QString &token, tokens) {
716  if (token == PreludeBind) {
717  if (isSuffix)
718  qWarning("ignoring duplicate Prelude.Bind");
719  isSuffix = true;
720  continue;
721  }
722  if (isSuffix) {
723  typedef QPair<QString, int> Suffix;
724  int id = 0;
725  foreach (const Suffix &suffix, suffixes) {
726  if (suffix.first == token) {
727  id = suffix.second;
728  break;
729  }
730  }
731  if (!id) {
732  id = catNames.size();
733  catNames.append(QString(PreludeBind) + ' ' + token);
734  suffixes.append(qMakePair(token, id));
735  pmcfg.tokens.insert(id);
736  }
737  sequence.append(id);
738  isSuffix = false;
739  } else {
740  /* Some PGF files put '.' at the end of a token, which confuses
741  the lexer. (Those PGF grammars really ought to be fixed!)
742  Work around that. */
743  QStringList splitTokens;
744  if (token.endsWith('.') && token != ".") {
745  QString choppedToken = token;
746  choppedToken.chop(1);
747  splitTokens.append(choppedToken);
748  splitTokens.append(".");
749  } else splitTokens.append(token);
750  foreach (const QString &splitToken, splitTokens) {
751  if (tokenHash.contains(splitToken))
752  sequence.append(tokenHash.value(splitToken));
753  else {
754  int id = catNames.size();
755  catNames.append(splitToken);
756  tokenHash.insert(splitToken, id);
757  pmcfg.tokens.insert(id);
758  sequence.append(id);
759  }
760  }
761  }
762  }
763  }
764  break;
765  case Symbol::KP: // pre
766  {
767  if (isSuffix) {
768  qWarning("ignoring unsupported use of Prelude.Bind (only suffix "
769  "tokens supported)");
770  isSuffix = false;
771  }
772  // convert pre to a context-free rule accepting all the alternatives
773  // record the information needed to build next token constraints
774  QList<QStringList> uniqueAlternatives;
775  uniqueAlternatives.append(symbol.tokens);
776  QList<PrefixMatch> info;
777  typedef QPair<QStringList, QStringList> PreAlternative;
778  QList<int> constraintIds;
779  foreach (const PreAlternative &alternative, symbol.alternatives) {
780  int index = uniqueAlternatives.indexOf(alternative.first);
781  if (index > 0) {
782  QString &constraintName = catNames[constraintIds[index-1]];
783  constraintName.append('|');
784  constraintName.append(alternative.second.join("*|"));
785  constraintName.append('*');
786  } else if (index < 0) {
787  index = uniqueAlternatives.size();
788  uniqueAlternatives.append(alternative.first);
789  constraintIds.append(catNames.size());
790  catNames.append(alternative.second.join("*|")+'*');
791  }
792  foreach (const QString &prefix, alternative.second)
793  info.append(qMakePair(prefix, index));
794  }
795  QStringList uniqueAlternativeNames;
796  foreach (const QStringList &uniqueAlternative, uniqueAlternatives)
797  uniqueAlternativeNames.append(uniqueAlternative.join(" "));
798  QString name = "pre {" + uniqueAlternativeNames.join("; ") + '}';
799  int id = catNames.size();
800  catNames.append(name);
801  QList<Rule> cfRules;
802  int index = 0;
803  foreach (const QStringList &uniqueAlternative, uniqueAlternatives) {
804  Rule rule;
805  foreach (const QString &token, uniqueAlternative) {
806  if (token == PreludeBind) {
807  qWarning("ignoring unsupported use of Prelude.Bind in pre");
808  continue;
809  }
810  if (tokenHash.contains(token))
811  rule.append(tokenHash.value(token));
812  else {
813  int tokenId = catNames.size();
814  catNames.append(token);
815  tokenHash.insert(token, tokenId);
816  pmcfg.tokens.insert(tokenId);
817  rule.append(tokenId);
818  }
819  }
820  if (index++)
821  rule.nextTokenConstraints.expect.append(constraintIds[index-2]);
822  cfRules.append(rule);
823  }
824  pmcfg.cfRules[id] = cfRules;
825  sequence.append(id);
826  prefixInfo.append(qMakePair(id, info));
827  }
828  break;
829  case Symbol::BIND: // Prelude.BIND (since GF 3.6)
830  if (isSuffix)
831  qWarning("ignoring duplicate Prelude.Bind");
832  isSuffix = true;
833  break;
834  case Symbol::SOFT_BIND: // Prelude.SOFT_BIND (since GF 3.6)
835  case Symbol::SOFT_SPACE: // Prelude.SOFT_SPACE (since GF 3.7)
836  case Symbol::CAPIT: // Prelude.CAPIT (since GF 3.7)
837  case Symbol::ALL_CAPIT: // Prelude.ALL_CAPIT (since GF 3.7)
838  // We have no use for this at the moment. Ignore it.
839  break;
840  case Symbol::NE: // Prelude.nonExist (since GF 3.6)
841  // Generate an unreachable dummy category for non-existing forms.
842  // Reuse the same category for all of them.
843  if (!nonExistCat) {
844  nonExistCat = catNames.size();
845  catNames.append("nonExist");
846  }
847  sequence.append(nonExistCat);
848  isSuffix = false;
849  break;
850  default:
851  qFatal("invalid symbol");
852  }
853  }
854  if (isSuffix)
855  qWarning("ignoring unsupported use of Prelude.Bind (only suffix tokens "
856  "supported)");
857  sequences.append(sequence);
858  }
859 
860  // build next token constraints
861  foreach (const PrefixInfo &info, prefixInfo) {
862  QList<Rule> &cfRules = pmcfg.cfRules[info.first];
863  Rule &defaultRule = cfRules.first();
864  int s = cfRules.size();
865  QHashIterator<QString, int> it(tokenHash);
866  while (it.hasNext()) {
867  it.next();
868  const QString &token = it.key();
869  if (token.startsWith('$') || token.startsWith('`')) {
870  // allow any article in front of formulas
871  const int &tokenId = it.value();
872  for (int i=1; i<s; i++)
873  pmcfg.cfRules[cfRules[i].nextTokenConstraints.expect.first()]
874  .append(Rule() << tokenId);
875  } else {
876  int ruleno = 0;
877  foreach (const PrefixMatch &prefix, info.second) {
878  if (token.startsWith(prefix.first)) {
879  ruleno = prefix.second;
880  break;
881  }
882  }
883  if (ruleno) {
884  const int &tokenId = it.value();
885  pmcfg.cfRules[cfRules[ruleno].nextTokenConstraints.expect.first()]
886  .append(Rule() << tokenId);
887  defaultRule.nextTokenConstraints.taboo.append(tokenId);
888  }
889  }
890  }
891  }
892 
893  // convert the lindefs to coercion functions
894  QHash<QString, int> coercionFunctions;
895  {
896  QHashIterator<int, QList<int> > it (concrete.lindefs);
897  while (it.hasNext()) {
898  const QList<int> lindefs = it.next().value();
899  if (lindefs.size() != 1) qFatal("expected exactly 1 lindef per category");
900  QString cat = catNames.at(it.key() + CAT_OFFSET);
901  coercionFunctions.insert(cat, lindefs.first());
902  }
903  }
904  // In format version 1, firstFunction was always concrete.lindefs.size().
905  // Format version 2 has one or more corresponding linrefs for every lindef.
906  // (Usually one, but in the English resource grammar, verb categories have two
907  // linref functions each.) So we have to loop to find the first function.
908  int i = 0;
909  int numFunctions = concrete.cncfuns.size();
910  for (; i<numFunctions; i++) {
911  const CncFun &cncfun = concrete.cncfuns.at(i);
912  QString name = QString::fromLocal8Bit(cncfun.fun);
913  if (!name.startsWith("lindef ")) {
914  // We found the first actual function.
915  break;
916  }
917  int dim = cncfun.lins.size();
918  Function function;
919  for (int j=0; j<dim; j++)
920  function.append(Sequence() << Term(0,j));
921  pmcfg.functions.append(function);
922  functionNames.append(name.replace("lindef ", "coerce "));
923  }
924  firstFunction = i;
925 
926  // convert the functions (actual ones, skip lindefs)
927  for (; i<numFunctions; i++) {
928  const CncFun &cncfun = concrete.cncfuns.at(i);
929  Function function;
930  foreach (int lin, cncfun.lins) function.append(sequences.at(lin));
931  pmcfg.functions.append(function);
932  functionNames.append(QString::fromLocal8Bit(cncfun.fun));
933  }
934 
935  // convert the productions
936  {
937  // fill in a map needed for nested coercions
938  QHash<int, int> coercions;
939  QHashIterator<int, QList<Production> > it (concrete.productions);
940  while (it.hasNext()) {
941  const QList<Production> productions = it.next().value();
942  int lhs = it.key() + CAT_OFFSET;
943  foreach (const Production &production, productions)
944  if (production.isCoercion) {
945  int rhs = production.id + CAT_OFFSET;
946  if (!coercions.contains(lhs) || rhs < coercions.value(lhs))
947  coercions.insert(lhs, rhs);
948  }
949  }
950  it = concrete.productions;
951  while (it.hasNext()) {
952  const QList<Production> productions = it.next().value();
953  int lhs = it.key() + CAT_OFFSET;
954  QList<Rule> &rules = pmcfg.rules[lhs];
955  foreach (const Production &production, productions) {
956  if (production.isCoercion) {
957  int cat = production.id + CAT_OFFSET;
958  QString catName = catNames.at(cat);
959  // support nested coercions
960  while (catName.isEmpty()
961  || catName.endsWith("(coerced)")) {
962  if (!coercions.contains(cat))
963  qFatal("failed to look up real category for C%d", production.id);
964  cat = coercions.value(cat);
965  catName = catNames.at(cat);
966  }
967  QString catNameCoerced = catName + "(coerced)";
968  catNames[lhs] = catNameCoerced;
969  if (!componentNames.contains(catNameCoerced))
970  componentNames.insert(catNameCoerced,
971  componentNames.value(catName));
972  Rule rule(coercionFunctions.value(catName));
973  rule.append(cat);
974  rules.append(rule);
975  } else {
976  Rule rule(production.id);
977  foreach (PArg parg, production.args)
978  rule.append(parg.fid + CAT_OFFSET);
979  rules.append(rule);
980  }
981  }
982  }
983  }
984 }
985 
987 class PgfLexerStateData : public AbstractLexerStateData {
988  public:
989  virtual AbstractLexerStateData *clone() {
990  return new PgfLexerStateData(*this);
991  }
992  qint64 streamPos;
993  bool inFormula;
994  bool pastFormula;
995  QList<int> suffixes;
996 };
997 DYNGENPAR_DECLARE_TYPEINFO(PgfLexerStateData, Q_MOVABLE_TYPE);
998 
1000 class PgfTokenSource : public TokenSource {
1001  public:
1002  // The PGF is owned by the parser, so we keep only a pointer in this class.
1003  PgfTokenSource(Pgf *p)
1004  : TokenSource(), pgf_p(p), stream(new QFile()), inFormula(false),
1005  pastFormula(false)
1006  {static_cast<QFile *>(stream)->open(stdin, QIODevice::ReadOnly);}
1007  virtual ~PgfTokenSource() {delete stream;}
1008  // We also require the data to match where there is any.
1009  virtual bool matchParseTree(const Node &treeToMatch) {
1010  return (treeToMatch.cat == tree.cat) && (treeToMatch.data == tree.data);
1011  }
1012  virtual bool rewindTo(int pos,
1013  const LexerState &lexerState = LexerState()) {
1014  /* Sequential streams cannot be rewound at all.
1015  For other streams, we need a non-null lexer state to rewind to a
1016  non-zero position. */
1017  if (stream->isSequential() || (pos && lexerState.isNull()))
1018  return TokenSource::rewindTo(pos);
1019  else if (!pos && currPos) { // rewind to position zero = reset
1020  stream->reset();
1021  reset();
1022  return true;
1023  } else {
1024  if (pos != currPos) {
1025  const PgfLexerStateData *data
1026  = static_cast<const PgfLexerStateData *>(lexerState.data());
1027  if (!stream->seek(data->streamPos)) {
1028  tree = Node();
1029  return false;
1030  }
1031  inFormula = data->inFormula;
1032  pastFormula = data->pastFormula;
1033  suffixes = data->suffixes;
1034  }
1035  return simpleRewind(pos);
1036  }
1037  }
1038  virtual LexerState saveState() {
1039  /* We cannot rewind if the stream is sequential, so don't bother saving a
1040  lexer state in that case. */
1041  if (stream->isSequential()) return LexerState();
1042  PgfLexerStateData *data = new PgfLexerStateData;
1043  data->streamPos = stream->pos();
1044  data->inFormula = inFormula;
1045  data->pastFormula = pastFormula;
1046  data->suffixes = suffixes;
1047  return LexerState(data);
1048  }
1049  void setInputStdin() {
1050  delete stream;
1051  stream = new QFile();
1052  static_cast<QFile *>(stream)->open(stdin, QIODevice::ReadOnly);
1053  reset();
1054  }
1055  void setInputFile(const QString &fileName) {
1056  delete stream;
1057  stream = new QFile(fileName);
1058  stream->open(QIODevice::ReadOnly);
1059  reset();
1060  }
1061  void setInputBytes(const QByteArray &bytes) {
1062  delete stream;
1063  stream = new QBuffer();
1064  static_cast<QBuffer *>(stream)->setData(bytes);
1065  stream->open(QIODevice::ReadOnly);
1066  reset();
1067  }
1068  void setInputString(const QString &string) {
1069  setInputBytes(string.toLocal8Bit());
1070  }
1071  void setInputBuffer(QByteArray *buffer) {
1072  delete stream;
1073  stream = new QBuffer(buffer);
1074  stream->open(QIODevice::ReadOnly);
1075  reset();
1076  }
1077  protected:
1078  virtual Cat readToken();
1079  private:
1080  void reset() {
1081  tree = Node();
1082  currPos = 0;
1083  inFormula = pastFormula = false;
1084  suffixes.clear();
1085  }
1086  Pgf *pgf_p;
1087  QIODevice *stream;
1088  bool inFormula;
1089  bool pastFormula;
1090  QList<int> suffixes;
1091 };
1092 DYNGENPAR_DECLARE_TYPEINFO(PgfTokenSource, Q_MOVABLE_TYPE);
1093 
1094 static bool isTextChar(char c)
1095 {
1096  return (c >= 'a' && c <= 'z')
1097  || (c >= 'A' && c <= 'Z')
1098  || (c < 0) // non-ASCII UTF-8 characters
1099  || c == '_' || c == '\'' || c == '\\';
1100 }
1101 
1102 Cat PgfTokenSource::readToken()
1103 {
1104  /* Check for saved suffixes before checking anything else, even the end of the
1105  stream. The suffixes are already matched from previous stream input, thus
1106  the input only counts as consumed when the suffixes are consumed. */
1107  if (!suffixes.isEmpty())
1108  return suffixes.takeFirst();
1109 
1110  if (stream->atEnd()) return PgfTokenEpsilon; // end of input
1111 
1112  char c;
1113  QByteArray tokenBytes;
1114 
1115  if (inFormula) {
1116  while (true) {
1117  if (stream->atEnd()) break; /* allows incremental parsing and predicting
1118  '$' rather than failing on "unexpected
1119  'LEX_ERROR'" */
1120  if (!stream->getChar(&c)) return PgfTokenLexError;
1121  if (c == '$') {
1122  stream->ungetChar(c);
1123  break;
1124  }
1125  tokenBytes.append(c);
1126  if (c == '\\') {
1127  if (stream->atEnd()) break; /* allows incremental parsing and predicting
1128  '$' rather than failing on "unexpected
1129  'LEX_ERROR'" */
1130  if (!stream->getChar(&c)) return PgfTokenLexError;
1131  tokenBytes.append(c);
1132  }
1133  }
1134  tree = Node(PgfTokenString);
1135  tree.data = QString::fromLocal8Bit(tokenBytes.data()).trimmed();
1136  inFormula = false;
1137  pastFormula = true;
1138  return PgfTokenString;
1139  }
1140 
1141  int state = 0;
1142  bool continueLoop = true;
1143  bool isNumber = false;
1144  while (continueLoop) {
1145  if (!stream->getChar(&c)) return PgfTokenLexError;
1146  switch (state) {
1147  default: // no token yet
1148  if (std::isspace(c)) {
1149  if (stream->atEnd()) return PgfTokenEpsilon; // end of input
1150  } else if (isTextChar(c)
1151  || (pastFormula && c == '$')) {
1152  pastFormula = false;
1153  tokenBytes.append(c);
1154  state = 1;
1155  if (stream->atEnd()) continueLoop = false; // end of input
1156  } else if (c == '$') {
1157  inFormula = true;
1158  tokenBytes.append(c);
1159  continueLoop = false;
1160  } else if (c == '-') {
1161  tokenBytes.append(c);
1162  state = 2;
1163  if (stream->atEnd()) continueLoop = false; // end of input
1164  } else if (c >= '0' && c <= '9') {
1165  tokenBytes.append(c);
1166  isNumber = true;
1167  state = 3;
1168  if (stream->atEnd()) continueLoop = false; // end of input
1169  } else {
1170  tokenBytes.append(c);
1171  continueLoop = false;
1172  }
1173  break;
1174  case 1: // parsing a word
1175  if (isTextChar(c) || c == '-' || (c >= '0' && c <= '9')) {
1176  tokenBytes.append(c);
1177  if (stream->atEnd()) continueLoop = false; // end of input
1178  } else {
1179  stream->ungetChar(c);
1180  continueLoop = false;
1181  }
1182  break;
1183  case 2: // dash
1184  if (c == '-') {
1185  tokenBytes.append(c);
1186  if (stream->atEnd()) continueLoop = false; // end of input
1187  } else if (isTextChar(c)) {
1188  tokenBytes.append(c);
1189  state = 1;
1190  if (stream->atEnd()) continueLoop = false; // end of input
1191  } else if (c >= '0' && c <= '9') {
1192  tokenBytes.append(c);
1193  isNumber = true;
1194  state = 3;
1195  if (stream->atEnd()) continueLoop = false; // end of input
1196  } else {
1197  stream->ungetChar(c);
1198  continueLoop = false;
1199  }
1200  break;
1201  case 3: // parsing a number
1202  if ((c >= '0' && c <= '9') || c == 'e' || c == '-') {
1203  tokenBytes.append(c);
1204  if (stream->atEnd()) continueLoop = false; // end of input
1205  } else if (c == '.') { // dot, could be integer + period or float
1206  if (stream->atEnd()) { // period at end of file, number stops here
1207  stream->ungetChar(c);
1208  continueLoop = false;
1209  } else {
1210  char c2;
1211  if (stream->peek(&c2, 1) < 1) return PgfTokenLexError;
1212  if (c2 >= '0' && c2 <= '9') // float, accept the dot
1213  tokenBytes.append(c);
1214  else { // period, number stops here
1215  stream->ungetChar(c);
1216  continueLoop = false;
1217  }
1218  }
1219  } else {
1220  stream->ungetChar(c);
1221  continueLoop = false;
1222  }
1223  break;
1224  }
1225  }
1226 
1227  QString tokenString = QString::fromLocal8Bit(tokenBytes.data());
1228  if (isNumber) {
1229  if (tokenString.contains('.') || tokenString.contains('e')) { // float
1230  bool ok;
1231  double d = tokenString.toDouble(&ok);
1232  if (!ok) return PgfTokenLexError;
1233  tree = Node(PgfTokenFloat);
1234  tree.data = d;
1235  return PgfTokenFloat;
1236  } else { // integer
1237  bool ok;
1238  int i = tokenString.toInt(&ok);
1239  if (!ok) return PgfTokenLexError;
1240  tree = Node(PgfTokenInt);
1241  tree.data = i;
1242  return PgfTokenInt;
1243  }
1244  } else { // word
1245  /* default = string (will be returned if the word is not known)
1246  TODO: Do we want to make this configurable? If a String isn't acceptable
1247  at this place, we will get a parse error anyway, it will just be an
1248  unexpected token "String" rather than "LEX_ERROR". But we may
1249  want/need to support more ways of handling String tokens in the
1250  future. (For example, what about strings which are also other
1251  tokens? When do we want to return them as String? Do we use
1252  prediction information?) */
1253  int token = PgfTokenString;
1254  if (pgf_p->tokenHash.contains(tokenString))
1255  token = pgf_p->tokenHash.value(tokenString);
1256  else {
1257  // try lowercasing the first character
1258  QString tokenStringLower = tokenString;
1259  tokenStringLower[0] = tokenStringLower[0].toLower();
1260  if (pgf_p->tokenHash.contains(tokenStringLower))
1261  token = pgf_p->tokenHash.value(tokenStringLower);
1262  else { // now try suffixes
1263  QString tokenStringChopped = tokenString;
1264  do {
1265  typedef QPair<QString, int> Suffix;
1266  bool suffixMatched = false;
1267  foreach (const Suffix &suffix, pgf_p->suffixes) {
1268  if (tokenStringChopped.endsWith(suffix.first)) {
1269  suffixMatched = true;
1270  suffixes.prepend(suffix.second);
1271  int suffixLen = suffix.first.size();
1272  tokenStringChopped.chop(suffixLen);
1273  tokenStringLower.chop(suffixLen);
1274  break;
1275  }
1276  }
1277  if (!suffixMatched) break;
1278  if (tokenStringChopped.isEmpty()) /* can happen during incremental
1279  parsing */
1280  token = suffixes.takeFirst();
1281  if (pgf_p->tokenHash.contains(tokenStringChopped))
1282  token = pgf_p->tokenHash.value(tokenStringChopped);
1283  else if (pgf_p->tokenHash.contains(tokenStringLower))
1284  token = pgf_p->tokenHash.value(tokenStringLower);
1285  } while (token == PgfTokenString);
1286  if (token == PgfTokenString) {
1287  /* If we don't know the word, we return it as a String (see TODO
1288  above). */
1289  suffixes.clear();
1290  tree = Node(PgfTokenString);
1291  tree.data = tokenString;
1292  }
1293  }
1294  }
1295  return token;
1296  }
1297 }
1298 
1299 PgfParser::PgfParser(const Pgf &p) : Parser(new PgfTokenSource(&pgf)), pgf(p)
1300 {
1301  init();
1302 }
1303 
1304 PgfParser::PgfParser(const QString &fileName, const QString &concreteName)
1305  : Parser(new PgfTokenSource(&pgf)), pgf(fileName, concreteName)
1306 {
1307  init();
1308 }
1309 
1311 {
1312  static_cast<PgfTokenSource *>(inputSource)->setInputStdin();
1313 }
1314 
1315 void PgfParser::setInputFile(const QString &fileName)
1316 {
1317  static_cast<PgfTokenSource *>(inputSource)->setInputFile(fileName);
1318 }
1319 
1320 void PgfParser::setInputBytes(const QByteArray &bytes)
1321 {
1322  static_cast<PgfTokenSource *>(inputSource)->setInputBytes(bytes);
1323 }
1324 
1325 void PgfParser::setInputString(const QString &string)
1326 {
1327  static_cast<PgfTokenSource *>(inputSource)->setInputString(string);
1328 }
1329 
1330 void PgfParser::setInputBuffer(QByteArray *buffer)
1331 {
1332  static_cast<PgfTokenSource *>(inputSource)->setInputBuffer(buffer);
1333 }
1334 
1335 QString PgfParser::catName(int cat) const
1336 {
1337  if (cat < 0) { // generated category
1338  if (pseudoCats.contains(cat)) {
1339  // pseudo-category, get the effective one
1340  cat = pseudoCats.value(cat).first;
1341  if (cat >= 0) return pgf.catNames.at(cat);
1342  }
1343  if (componentCats.contains(cat)) {
1344  QPair<Cat, int> component = componentCats.value(cat);
1345  QString name = pgf.catNames.at(component.first);
1346  if (pgf.componentNames.contains(name))
1347  return QString("%1[%2]").arg(name).arg(pgf.componentNames.value(name)
1348  .at(component.second));
1349  else
1350  return QString("%1[%2]").arg(name).arg(component.second);
1351  } else return QString::number(cat); // unknown generated category
1352  } else return pgf.catNames.at(cat);
1353 }
1354 
1356 {
1357  for (int i=0; i<tree.children.size(); ) {
1358  const Alternative &child = tree.children.at(i);
1359  QVariant label = child.label();
1360  if (label.type() == QVariant::Int && label.toInt() < pgf.firstFunction) {
1361  tree.children.append(child.first().children);
1362  tree.children.removeAt(i);
1363  } else i++;
1364  }
1365  int s = tree.children.size();
1366  for (int i=0; i<s; i++) {
1367  Alternative &child = tree.children[i];
1368  int l = child.size();
1369  for (int j=0; j<l; j++)
1371  }
1372 }
1373 
1374 } // end namespace
PgfParser(const Pgf &p)
Definition: pgf.cpp:1299
void setInputString(const QString &string)
Definition: pgf.cpp:1325
QDataStream & operator>>(QDataStream &stream, DynGenPar::Action *&data)
Definition: dyngenpar.h:1471
QHash< Cat, QPair< Cat, int > > componentCats
maps categories which represent components of a multi-component category to the category and componen...
Definition: dyngenpar.h:1294
void setInputBytes(const QByteArray &bytes)
Definition: pgf.cpp:1320
node in the parse tree
Definition: dyngenpar.h:320
representation of the information in .pgf files in a format we can process
Definition: pgf.h:52
#define CHECK_STATUS()
QList< Cat > expect
list of context-free categories the next token MUST match
Definition: dyngenpar.h:95
QString Cat
Category type: string or integer depending on DYNGENPAR_INTEGER_CATEGORIES.
Definition: dyngenpar.h:71
QStringList catNames
names of categories, in general not unique
Definition: pgf.h:59
term in the expression of a component of a PMCFG function
Definition: dyngenpar.h:955
component of a PMCFG function, a sequence of terms
Definition: dyngenpar.h:976
QHash< QString, QStringList > componentNames
names of category components
Definition: pgf.h:64
QString catName(int cat) const
Definition: pgf.cpp:1335
void setInputStdin()
Definition: pgf.cpp:1310
void setInputFile(const QString &fileName)
Definition: pgf.cpp:1315
QList< Cat > taboo
list of context-free categories the next token MUST NOT match
Definition: dyngenpar.h:103
virtual bool rewindTo(int pos, const LexerState &=LexerState())
rewind to an older position (requires buffering)
Definition: dyngenpar.h:862
TokenSource * inputSource
input source
Definition: dyngenpar.h:1305
int firstFunction
the function ID of the first non-coercion function
Definition: pgf.h:65
API for stateful lexers to save their state for rewinding.
Definition: dyngenpar.h:793
void setInputBuffer(QByteArray *buffer)
Definition: pgf.cpp:1330
void filterCoercionsFromSyntaxTree(Node &tree) const
Definition: pgf.cpp:1355
QHash< Cat, QPair< Cat, QList< Cat > > > pseudoCats
pseudo-categories, used to represent PMCFGs internally
Definition: dyngenpar.h:1288
PMCFG function.
Definition: dyngenpar.h:1015
QVariant label() const
Definition: dyngenpar.h:287
Pgf()
dummy default constructor for bindings
Definition: pgf.h:54
QVariant data
Definition: dyngenpar.h:327
NextTokenConstraints nextTokenConstraints
Definition: dyngenpar.h:142
QList< Alternative > children
Definition: dyngenpar.h:328
main class
Definition: dyngenpar.h:1158
STATIC const char *const PreludeBind
Definition: pgf.h:48