22#include "parser/mmap.hpp"
23#include "parser/scheduler.hpp"
24#include "parser/stream.hpp"
69 #ifndef DOXYGEN_SHOULD_SKIP_THIS
71 using difference_type = std::ptrdiff_t;
73 using reference =
CSVRow & ;
74 using iterator_category = std::input_iterator_tag;
82 CONSTEXPR_14 reference
operator*() {
return this->row; }
83 CONSTEXPR_14 reference
operator*()
const {
return const_cast<reference
>(this->row); }
86 CONSTEXPR_14 pointer
operator->() {
return &(this->row); }
87 CONSTEXPR_14 pointer
operator->()
const {
return const_cast<pointer
>(&(this->row)); }
96 return (this->daddy == other.daddy) && (this->i == other.i);
124 read_scheduler_(format.is_threading_enabled()) {
125#if defined(__EMSCRIPTEN__)
127 new std::ifstream(std::string(filename), std::ios::binary)
131 internals::throw_cannot_open_file(filename);
136 this->init_parser(std::unique_ptr<internals::parser::CSVParserDriverBase>(
137 new internals::parser::MmapParser(filename, format, this->
col_names)
154 template<
typename TStream,
155 csv::enable_if_t<std::is_base_of<std::istream, TStream>::value,
int> = 0>
158 read_scheduler_(format.is_threading_enabled()) {
159 this->init_from_stream(source, format);
171 read_scheduler_(format.is_threading_enabled()) {
173 throw std::invalid_argument(internals::ERROR_READER_NULL_STREAM);
192 read_scheduler_(other._format.is_threading_enabled()) {
193 other.read_scheduler_.join();
194 this->move_state_from(other);
202 if (
this == &other) {
206 this->read_scheduler_.join();
207 other.read_scheduler_.join();
208 this->move_state_from(other);
214 this->read_scheduler_.join();
260 bool read_chunk(std::vector<CSVRow>& out,
size_t max_rows);
262 CSV_CONST iterator
end() const noexcept;
265 bool eof() const noexcept {
return this->
parser->eof(); }
279 static const std::vector<std::string> empty_col_names;
290 return this->
col_names->index_of(col_name);
312 return this->
parser ? this->
parser->speculative_diagnostics()
313 : internals::SpeculativeParseDiagnostics();
318 return this->
parser ? this->
parser->parse_worker_count() : 1;
341 internals::ColNamesPtr
col_names = std::make_shared<internals::ColNames>();
344 std::unique_ptr<internals::parser::CSVParserDriverBase>
parser =
nullptr;
347 std::unique_ptr<RowCollection>
records{
new RowCollection(100)};
364 bool read_csv(
size_t bytes = internals::CSV_CHUNK_SIZE_DEFAULT);
371 bool header_trimmed =
false;
375 size_t _chunk_size = internals::CSV_CHUNK_SIZE_DEFAULT;
376 bool _read_requested =
false;
377 internals::CSVReadScheduler read_scheduler_;
380 void move_state_from(
CSVReader& other)
noexcept {
381 this->_format = std::move(other._format);
382 this->col_names = std::move(other.col_names);
383 this->parser = std::move(other.parser);
384 this->
records = std::move(other.records);
385 this->owned_stream = std::move(other.owned_stream);
386 this->n_cols = other.n_cols;
387 this->_n_rows = other._n_rows;
388 this->header_trimmed = other.header_trimmed;
389 this->_chunk_size = other._chunk_size;
390 this->_read_requested = other._read_requested;
391 this->read_scheduler_.set_threading_enabled(this->_format.is_threading_enabled());
392 this->read_scheduler_.adopt_exception(other.read_scheduler_.take_exception());
393 other.reset_after_move();
396 void reset_after_move() noexcept {
399 this->header_trimmed =
false;
400 this->_read_requested =
false;
401 this->_chunk_size = internals::CSV_CHUNK_SIZE_DEFAULT;
412 void init_parser(std::unique_ptr<internals::parser::CSVParserDriverBase>
parser);
414 template<
typename TStream,
415 csv::enable_if_t<std::is_base_of<std::istream, TStream>::value,
int> = 0>
416 void init_from_stream(TStream& source, CSVFormat format) {
418 std::unique_ptr<internals::parser::CSVParserDriverBase>(
419 new internals::parser::StreamParser<TStream>(source, format, this->col_names)
425 void initial_read() {
426 this->read_scheduler_.run([
this] { this->
read_csv(this->_chunk_size); });
427 this->read_scheduler_.join();
428 this->read_scheduler_.rethrow_exception_if_any();
440 bool accept_row(CSVRow&& candidate, CSVRow* single_row, std::vector<CSVRow>* batch_rows);
448 bool check_for_rows();
455 void drain_rows_into_chunk(std::vector<CSVRow>& out,
size_t max_rows);
461 internals::csv_write_rows_input_range<CSVReader>,
462 "CSVReader must remain compatible with csv::DelimWriter::write_rows()."
An input iterator capable of handling large files.
CONSTEXPR bool operator==(const iterator &other) const noexcept
Returns true if iterators were constructed from the same CSVReader and point to the same row.
iterator & operator++()
Pre-increment iterator.
CONSTEXPR_14 reference operator*()
Access the CSVRow held by the iterator.
CONSTEXPR_14 pointer operator->()
Return a pointer to the CSVRow the iterator has stopped at.
Main class for parsing CSVs from files and in-memory sources.
CONSTEXPR bool empty() const noexcept
Whether or not the file or stream contains valid CSV rows, not including the header.
bool read_chunk(std::vector< CSVRow > &out, size_t max_rows)
Read up to max_rows rows into a caller-owned batch buffer.
bool utf8_bom() const noexcept
Whether or not CSV was prefixed with a UTF-8 bom.
internals::SpeculativeParseDiagnostics speculative_diagnostics() const noexcept
Return speculative-parsing counters for filename-backed readers.
CSVFormat get_format() const
Return the resolved parsing format for this CSV source.
internals::ConstColNamesPtr col_names_ptr() const noexcept
Internal accessor for preserving resolved column-name lookup policy across helper types.
int index_of(csv::string_view col_name) const
Return the index of col_name, or csv::CSV_NOT_FOUND if absent.
size_t parse_worker_count() const noexcept
Return the number of parser worker threads used by the active parser.
const std::vector< std::string > & get_col_names() const
Return the active column names in CSV order.
CSVReader & operator=(const CSVReader &)=delete
Not copyable.
CSV_CONST iterator end() const noexcept
A placeholder for the imaginary past-the-end row in a CSV.
CONSTEXPR size_t n_rows() const noexcept
Retrieves the number of rows that have been read so far.
CSVReader(std::unique_ptr< std::istream > source, const CSVFormat &format=CSVFormat::guess_csv())
Construct CSVReader from an owned std::istream.
bool eof() const noexcept
Returns true if we have reached end of file.
CSVReader & operator=(CSVReader &&other) noexcept
Move assignment.
CSVReader(CSVReader &&other) noexcept
Move constructor.
bool read_row(CSVRow &row)
Retrieve the next CSV row, returning true while more rows are available.
CSVReader(TStream &source, CSVFormat format=CSVFormat::guess_csv())
Construct CSVReader from std::istream.
CSVReader(const CSVReader &)=delete
Not copyable.
iterator begin()
Return an iterator to the first row in the reader.
CSVReader(csv::string_view filename, const CSVFormat &format=CSVFormat::guess_csv())
Construct CSVReader from filename.
Data structure for representing CSV rows.
A standalone header file containing shared code.
#define CONSTEXPR
Expands to constexpr in decent compilers and inline otherwise.
Shared exception message templates and throw helpers.
CSV scalar type classification adapter.
std::unique_ptr< std::istream > owned_stream
Optional owned stream used by two paths: 1) Emscripten filename-constructor fallback to stream parsin...
std::unique_ptr< RowCollection > records
Queue of parsed CSV rows.
size_t _n_rows
How many rows (minus header) have been read so far.
bool read_csv(size_t bytes=internals::CSV_CHUNK_SIZE_DEFAULT)
Read a chunk of CSV data.
internals::ColNamesPtr col_names
Pointer to a object containing column information.
void set_col_names(const std::vector< std::string > &)
Sets this reader's column names and associated data.
std::unique_ptr< internals::parser::CSVParserDriverBase > parser
Helper class which actually does the parsing.
size_t n_cols
The number of columns in this CSV.
The all encompassing namespace.
std::string_view string_view
The string_view class used by this library.