Parser.h
Go to the documentation of this file.
1 /****
2  * Parser.h
3  *
4  * Copyright 2021 mikee47 <mike@sillyhouse.net>
5  *
6  * This file is part of the CsvReader Library
7  *
8  * This library is free software: you can redistribute it and/or modify it under the terms of the
9  * GNU General Public License as published by the Free Software Foundation, version 3 or later.
10  *
11  * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
12  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13  * See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along with this library.
16  * If not, see <https://www.gnu.org/licenses/>.
17  *
18  ****/
19 
20 #pragma once
21 
22 #include <Delegate.h>
23 #include <Data/CStringArray.h>
25 
26 namespace CSV
27 {
31 struct Cursor {
32  int start;
33  unsigned end;
34 
38  size_t length() const
39  {
40  return (start < 0) ? 0 : end - unsigned(start);
41  }
42 
46  operator String() const
47  {
48  String s;
49  s += '[';
50  s += start;
51  s += ',';
52  s += length();
53  s += ']';
54  return s;
55  }
56 };
57 
80 class Parser
81 {
82 public:
86  struct Options {
90  const char* commentChars = nullptr;
94  uint16_t lineLength = 256;
99  char fieldSeparator = ',';
103  bool parseEscape = false;
107  bool wantComments = false;
108  };
109 
110  static constexpr int BOF{-1};
111 
116  Parser(const Options& options) : options(options)
117  {
118  }
119 
126  bool push(Stream& source);
127 
136  bool push(const char* data, size_t length, size_t& offset);
137 
143  bool flush();
144 
150  bool readRow(IDataSourceStream& source);
151 
157  void reset(int offset = BOF);
158 
162  const CStringArray& getRow() const
163  {
164  return row;
165  }
166 
173  int tell() const
174  {
175  return cursor.start;
176  }
177 
181  const Cursor& getCursor() const
182  {
183  return cursor;
184  }
185 
189  unsigned getStreamPos() const
190  {
191  return sourcePos - taillen;
192  }
193 
194  const Options& getOptions() const
195  {
196  return options;
197  }
198 
199 private:
200  size_t fillBuffer(Stream* source);
201  bool parseRow(bool eof);
202 
203  Options options;
204  CStringArray row;
205  String buffer;
206  Cursor cursor{BOF};
207  unsigned sourcePos{0};
208  uint16_t tailpos{0};
209  uint16_t taillen{0};
210 };
211 
212 } // namespace CSV
Class to parse a CSV file.
Definition: Parser.h:81
bool readRow(IDataSourceStream &source)
Read a single data row using data from provided DataSourceStream.
Parser(const Options &options)
Construct a CSV parser.
Definition: Parser.h:116
bool push(const char *data, size_t length, size_t &offset)
Read a single data row, taking data if required from provided buffer.
bool flush()
Call to read additional rows after all data pushed.
const CStringArray & getRow() const
Get current row.
Definition: Parser.h:162
bool push(Stream &source)
Read a single data row, taking data if required from provided Stream.
const Cursor & getCursor() const
Get cursor position for current row.
Definition: Parser.h:181
int tell() const
Get cursor position for current row.
Definition: Parser.h:173
static constexpr int BOF
Indicates 'Before First Record'.
Definition: Parser.h:110
unsigned getStreamPos() const
Get stream position where next record will be read from.
Definition: Parser.h:189
void reset(int offset=BOF)
Reset parser to initial conditions.
const Options & getOptions() const
Definition: Parser.h:194
Class to manage a double null-terminated list of strings, such as "one\0two\0three\0".
Definition: CStringArray.h:22
Base class for read-only stream.
Definition: DataSourceStream.h:46
Base Stream class.
Definition: Wiring/Stream.h:33
The String class.
Definition: WString.h:133
Definition: Parser.h:27
BitSet< uint8_t, Option, 2 > Options
Definition: Components/IFS/src/include/IFS/Debug.h:33
Contains location details of the current record in the source stream.
Definition: Parser.h:31
size_t length() const
Get number of source characters in record data.
Definition: Parser.h:38
int start
BOF if there is no current record.
Definition: Parser.h:32
unsigned end
One-past end of record.
Definition: Parser.h:33
Parsing options.
Definition: Parser.h:86
uint16_t lineLength
Definition: Parser.h:94
bool wantComments
Set to true to return comment lines, otherwise they're discarded.
Definition: Parser.h:107
const char * commentChars
Definition: Parser.h:90
bool parseEscape
Set to true to handle escape sequences ( , \t, etc.)
Definition: Parser.h:103
char fieldSeparator
Definition: Parser.h:99