10#include <unordered_map>
11#include <unordered_set>
15#include "../external/mio.hpp"
16#include "col_names.hpp"
24 constexpr const int UNINITIALIZED_FIELD = -1;
28 template<
typename OutArray,
typename T =
typename OutArray::type>
100 bool eof() {
return this->_eof; }
108 CONSTEXPR_17
ParseFlags parse_flag(
const char ch)
const noexcept {
112 CONSTEXPR_17
ParseFlags compound_parse_flag(
const char ch)
const noexcept {
119 void set_output(
RowCollection& rows) { this->_records = &rows; }
125 RawCSVDataPtr data_ptr =
nullptr;
126 ColNamesPtr _col_names =
nullptr;
127 CSVFieldList* fields =
nullptr;
128 int field_start = UNINITIALIZED_FIELD;
129 size_t field_length = 0;
159 bool quote_escape =
false;
160 bool field_has_double_quote =
false;
166 bool unicode_bom_scan =
false;
167 bool _utf8_bom =
false;
172 CONSTEXPR_17
bool ws_flag(
const char ch)
const noexcept {
176 size_t& current_row_start() {
177 return this->current_row.data_start;
189 void trim_utf8_bom();
208 template<
typename TStream>
215 const ColNamesPtr& col_names =
nullptr
228 void next(
size_t bytes = ITERATION_CHUNK_SIZE)
override {
229 if (this->eof())
return;
232 this->field_start = UNINITIALIZED_FIELD;
233 this->field_length = 0;
234 this->reset_data_ptr();
235 this->data_ptr->_data = std::make_shared<std::string>();
237 if (source_size == 0) {
238 const auto start = _source.tellg();
239 _source.seekg(0, std::ios::end);
240 const auto end = _source.tellg();
241 _source.seekg(0, std::ios::beg);
243 source_size = end - start;
247 size_t length = std::min(source_size - stream_pos, bytes);
248 std::unique_ptr<char[]> buff(
new char[length]);
249 _source.seekg(stream_pos, std::ios::beg);
250 _source.read(buff.get(), length);
251 stream_pos = _source.tellg();
252 ((std::string*)(this->data_ptr->_data.get()))->assign(buff.get(), length);
255 this->data_ptr->data = *((std::string*)this->data_ptr->_data.get());
258 this->current_row =
CSVRow(this->data_ptr);
259 size_t remainder = this->
parse();
261 if (stream_pos == source_size || no_chunk()) {
266 this->stream_pos -= (length - remainder);
272 size_t stream_pos = 0;
288 const ColNamesPtr& col_names =
nullptr
290 this->_filename = filename.data();
291 this->source_size = get_file_size(filename);
296 void next(
size_t bytes)
override;
299 std::string _filename;
Data structure for representing CSV rows.
Abstract base class which provides CSV parsing logic.
CONSTEXPR bool no_chunk() const
Whether or not source needs to be read in chunks.
ParseFlagMap _parse_flags
An array where the (i + 128)th slot gives the ParseFlags for ASCII character i.
void reset_data_ptr()
Create a new RawCSVDataPtr for a new chunk of data.
bool eof()
Whether or not we have reached the end of source.
void end_feed()
Indicate the last block of data has been parsed.
size_t parse()
Parse the current chunk of data *.
virtual void next(size_t bytes)=0
Parse the next block of data.
size_t source_size
The size of the incoming CSV.
CONSTEXPR bool utf8_bom() const
Whether or not this CSV has a UTF-8 byte order mark.
Parser for memory-mapped files.
A class for parsing CSV data from a std::stringstream or an std::ifstream
void next(size_t bytes=ITERATION_CHUNK_SIZE) override
Parse the next block of data.
A std::deque wrapper which allows multiple read and write threads to concurrently access it along wit...
A standalone header file containing shared code.
#define CONSTEXPR
Expands to constexpr in decent compilers and inline otherwise.
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Defines the data type used for storing information about a CSV row.
std::array< ParseFlags, 256 > ParseFlagMap
An array which maps ASCII chars to a parsing flag.
std::array< bool, 256 > WhitespaceMap
An array which maps ASCII chars to a flag indicating if it is whitespace.
constexpr size_t ITERATION_CHUNK_SIZE
Chunk size for lazy-loading large CSV files.
CSV_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter)
Create a vector v where each index i corresponds to the ASCII number for a character and,...
ParseFlags
An enum used for describing the significance of each character with respect to CSV parsing.
@ NOT_SPECIAL
Characters with no special meaning or escaped delimiters and newlines.
@ NEWLINE
Characters which signify a new row.
@ QUOTE
Characters which may signify a quote escape.
@ DELIMITER
Characters which signify a new field.
constexpr ParseFlags quote_escape_flag(ParseFlags flag, bool quote_escape) noexcept
Transform the ParseFlags given the context of whether or not the current field is quote escaped.
CSV_CONST CONSTEXPR_17 WhitespaceMap make_ws_flags(const char *ws_chars, size_t n_chars)
Create a vector v where each index i corresponds to the ASCII number for a character c and,...
CSV_CONST CONSTEXPR_17 OutArray arrayToDefault(T &&value)
Helper constexpr function to initialize an array with all the elements set to value.
The all encompassing namespace.
CSVReader parse(csv::string_view in, CSVFormat format)
Shorthand function for parsing an in-memory CSV string.
constexpr unsigned CHAR_OFFSET
Offset to convert char into array index.
nonstd::string_view string_view
The string_view class used by this library.
Thread-safe deque for producer-consumer patterns.