DynGenPar
Dynamic Generalized Parser
bytetokensource.h
Go to the documentation of this file.
1 /* DynGenPar: Dynamic Generalized Parser - Byte token source
2  Copyright (C) 2011-2012 Kevin Kofler <kevin.kofler@chello.at>
3  Copyright (C) 2015 DAGOPT Optimization Technologies GmbH
4  written by Kevin Kofler <kofler@dagopt.com>
5 
6  Support by the Austrian Science Fund FWF under contract numbers
7  P20631 and P23554 is gratefully acknowledged.
8 
9  This program is free software: you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation, either version 2 of the License, or
12  (at your option) any later version.
13 
14  This program is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 
22 #pragma once
23 
24 #ifndef DYNGENPAR_INTEGER_CATEGORIES
25 #define DYNGENPAR_INTEGER_CATEGORIES
26 #endif
27 
28 #include <cstdio>
29 #include <QIODevice>
30 #include <QFile>
31 #include <QBuffer>
32 
33 #include "dyngenpar.h"
34 
35 enum ByteTokens {
37  ByteTokenNul = 256,
39 };
40 Q_DECLARE_TYPEINFO(ByteTokens, Q_PRIMITIVE_TYPE);
41 
42 namespace DynGenPar {
43 
44 class ByteTokenSource : public TokenSource {
45  public:
46  ByteTokenSource() : TokenSource(), stream(new QFile()) {
47  static_cast<QFile *>(stream)->open(stdin, QIODevice::ReadOnly);
48  }
49  ByteTokenSource(const QString &fileName)
50  : TokenSource(), stream(new QFile(fileName)) {
51  stream->open(QIODevice::ReadOnly);
52  }
53  virtual ~ByteTokenSource() {delete stream;}
54  virtual bool rewindTo(int pos, const LexerState & = LexerState()) {
55  // Sequential streams cannot be rewound.
56  if (stream->isSequential())
57  return TokenSource::rewindTo(pos);
58  else {
59  if (pos != currPos && !stream->seek(pos)) {
60  tree = Node();
61  return false;
62  }
63  return simpleRewind(pos);
64  }
65  }
66  void setInputStdin() {
67  delete stream;
68  stream = new QFile();
69  static_cast<QFile *>(stream)->open(stdin, QIODevice::ReadOnly);
70  reset();
71  }
72  void setInputFile(const QString &fileName) {
73  delete stream;
74  stream = new QFile(fileName);
75  stream->open(QIODevice::ReadOnly);
76  reset();
77  }
78  void setInputBytes(const QByteArray &bytes) {
79  delete stream;
80  stream = new QBuffer();
81  static_cast<QBuffer *>(stream)->setData(bytes);
82  stream->open(QIODevice::ReadOnly | QIODevice::Unbuffered);
83  reset();
84  }
85  void setInputString(const QString &string) {
86  setInputBytes(string.toLocal8Bit());
87  }
88  void setInputBuffer(QByteArray *buffer) {
89  delete stream;
90  stream = new QBuffer(buffer);
91  stream->open(QIODevice::ReadOnly | QIODevice::Unbuffered);
92  reset();
93  }
94  protected:
95  virtual Cat readToken() {
96  if (stream->atEnd()) return ByteTokenEpsilon;
97  char c;
98  if (!stream->getChar(&c)) return ByteTokenError;
99  if (c) return (unsigned char)c; else return ByteTokenNul;
100  }
101  QIODevice *stream;
102  virtual void reset() {
103  tree = Node();
104  currPos = 0;
105  }
106 };
107 DYNGENPAR_DECLARE_TYPEINFO(ByteTokenSource, Q_MOVABLE_TYPE);
108 
110 
117  public:
118  TextByteLexerStateData(qint64 streamPosition, TextPosition textPosition)
119  : AbstractLexerStateData(), streamPos(streamPosition),
120  textPos(textPosition) {}
122  return new TextByteLexerStateData(*this);
123  }
124  qint64 streamPos;
126 };
127 DYNGENPAR_DECLARE_TYPEINFO(TextByteLexerStateData, Q_MOVABLE_TYPE);
128 
130  public:
132  TextByteTokenSource(const QString &fileName) : ByteTokenSource(fileName) {}
133  virtual ~TextByteTokenSource() {}
135  virtual bool rewindTo(int pos,
136  const LexerState &lexerState = LexerState()) {
137  /* Sequential streams cannot be rewound at all.
138  For other streams, we need a non-null lexer state to rewind to a
139  non-zero position. */
140  if (stream->isSequential() || (pos && lexerState.isNull()))
141  return TokenSource::rewindTo(pos);
142  else if (!pos && currPos) { // rewind to position zero = reset
143  stream->reset();
144  reset();
145  return true;
146  } else {
147  if (pos != currPos) {
148  const TextByteLexerStateData *data
149  = static_cast<const TextByteLexerStateData *>(lexerState.data());
150  if (!stream->seek(data->streamPos)) {
151  tree = Node();
152  return false;
153  }
154  textPos = data->textPos;
155  }
156  return simpleRewind(pos);
157  }
158  }
161  virtual LexerState saveState() {
163  = new TextByteLexerStateData(stream->pos(), textPos);
164  return LexerState(data);
165  }
167  static TextPosition textPosition(const LexerState &lexerState) {
168  return lexerState.isNull() ? TextPosition() :
169  static_cast<const TextByteLexerStateData *>(lexerState.data())
170  ->textPos;
171  }
172  protected:
174  virtual Cat readToken() {
175  Cat token;
176  do {
177  token = ByteTokenSource::readToken();
178  } while (token == '\r');
179  if (token && token <= ByteTokenNul)
180  textPos.countCharacter((unsigned char) token);
181  return token;
182  }
183  virtual void reset() {
185  textPos.reset();
186  }
187  private:
188  TextPosition textPos;
189 };
190 DYNGENPAR_DECLARE_TYPEINFO(TextByteTokenSource, Q_MOVABLE_TYPE);
191 
192 } // end namespace
we have to remap this because 0 is epsilon
const AbstractLexerStateData * data() const
Definition: dyngenpar.h:806
void setInputString(const QString &string)
static TextPosition textPosition(const LexerState &lexerState)
Retrieves the text position (line and column) stored in the lexer state.
void setInputFile(const QString &fileName)
ByteTokens
void setInputBytes(const QByteArray &bytes)
Q_DECLARE_TYPEINFO(ByteTokens, Q_PRIMITIVE_TYPE)
node in the parse tree
Definition: dyngenpar.h:320
virtual Cat readToken()
Overrides readToken to strip CRs.
virtual Cat readToken()
get the next token from the input, to be implemented by subclasses
QString Cat
Category type: string or integer depending on DYNGENPAR_INTEGER_CATEGORIES.
Definition: dyngenpar.h:71
TextByteTokenSource(const QString &fileName)
ByteTokenSource(const QString &fileName)
You should not have to use this class directly, ever.
bool simpleRewind(int pos, bool rewindOnly=false)
basic implementation of rewindTo for subclasses which support it
Definition: dyngenpar.h:887
text position
Definition: dyngenpar.h:929
TextByteLexerStateData(qint64 streamPosition, TextPosition textPosition)
virtual bool rewindTo(int pos, const LexerState &=LexerState())
rewind to an older position (requires buffering)
Definition: dyngenpar.h:862
API for stateful lexers to save their state for rewinding.
Definition: dyngenpar.h:793
Node tree
sub-parse-tree for hierarchical parsing
Definition: dyngenpar.h:902
virtual AbstractLexerStateData * clone()
bool isNull() const
Definition: dyngenpar.h:805
virtual bool rewindTo(int pos, const LexerState &=LexerState())
rewind to an older position (requires buffering)
virtual bool rewindTo(int pos, const LexerState &lexerState=LexerState())
We can only rewind if we have a lexer state with the true position.
void setInputBuffer(QByteArray *buffer)
virtual LexerState saveState()
Saves the true stream position (including CRs) and the text position in lines and columns into a lexe...