9#pragma region Reading helpers
12 if (!this->
records->empty())
return true;
14 if (this->read_scheduler_.wait_if_active(
15 [
this] { return this->records->is_waitable(); },
16 [
this] { this->records->wait(); }
21 this->read_scheduler_.join();
22 this->read_scheduler_.rethrow_exception_if_any();
24 if (this->
parser->eof())
return false;
26 if (this->_read_requested && this->
records->empty()) {
27 internals::throw_row_too_large_for_chunk(this->_chunk_size);
30 this->read_scheduler_.run(
31 [
this] { this->
read_csv(this->_chunk_size); },
32 [
this] { this->
records->notify_all(); }
34 this->read_scheduler_.rethrow_exception_if_any();
35 this->_read_requested =
true;
40#pragma endregion Reading helpers
44#pragma region Format and header helpers
47 std::unique_ptr<internals::parser::CSVParserDriverBase> parser_impl
49 auto resolved = parser_impl->get_resolved_format();
50 this->_format = resolved.format;
51 this->_chunk_size = this->_format.get_chunk_size();
52 this->
n_cols = resolved.n_cols;
54 if (!this->_format.col_names.empty()) {
58 this->
parser = std::move(parser_impl);
68 new_format.col_names = this->
col_names->get_col_names();
69 new_format.header = this->_format.header;
75 if (!this->header_trimmed) {
76 for (
int i = 0; i <= this->_format.header && !this->
records->empty(); i++) {
77 if (i == this->_format.header && this->col_names->empty()) {
85 this->header_trimmed =
true;
92 this->
col_names->set_policy(this->_format.get_column_name_policy());
94 this->
n_cols = names.size();
97#pragma endregion Format and header helpers
101#pragma region Reading helpers
103 CSV_INLINE bool CSVReader::accept_row(
CSVRow&& candidate,
CSVRow* single_row, std::vector<CSVRow>* batch_rows) {
104 const auto policy = this->_format.variable_column_policy;
105 const size_t next_row_size = candidate.size();
107 if (policy == VariableColumnPolicy::KEEP_NON_EMPTY && next_row_size == 0) {
111 if (next_row_size != this->
n_cols &&
112 (policy == VariableColumnPolicy::THROW || policy == VariableColumnPolicy::IGNORE_ROW)) {
113 if (policy == VariableColumnPolicy::THROW) {
114 if (candidate.size() < this->n_cols) {
115 internals::throw_line_too_short(candidate.raw_str());
118 internals::throw_line_too_long(candidate.raw_str());
124 if (single_row !=
nullptr) {
125 *single_row = std::move(candidate);
126 }
else if (batch_rows !=
nullptr) {
127 batch_rows->push_back(std::move(candidate));
133 this->_read_requested =
false;
137 CSV_INLINE void CSVReader::drain_rows_into_chunk(std::vector<CSVRow>& out,
size_t max_rows) {
138 std::vector<CSVRow> drained;
139 drained.reserve(max_rows - out.size());
140 this->
records->drain_front(drained, max_rows - out.size());
142 for (
size_t i = 0; i < drained.size(); ++i) {
143 this->accept_row(std::move(drained[i]),
nullptr, &out);
147#pragma endregion Reading helpers
151#pragma region Worker reading methods
182 this->
parser->next(bytes);
184 if (!this->header_trimmed) {
200 while (this->check_for_rows()) {
204 if (this->accept_row(this->
records->pop_front(), &row,
nullptr))
218 while (out.size() < max_rows) {
219 if (check_for_rows()) {
224 const size_t before_size = out.size();
225 this->drain_rows_into_chunk(out, max_rows);
227 if (out.size() == before_size) {
231 else return !out.empty();
237#pragma endregion Worker reading methods
bool read_chunk(std::vector< CSVRow > &out, size_t max_rows)
Read up to max_rows rows into a caller-owned batch buffer.
CSVFormat get_format() const
Return the resolved parsing format for this CSV source.
bool read_row(CSVRow &row)
Retrieve the next CSV row, returning true while more rows are available.
Data structure for representing CSV rows.
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Defines functionality needed for basic CSV parsing.
std::unique_ptr< RowCollection > records
Queue of parsed CSV rows.
size_t _n_rows
How many rows (minus header) have been read so far.
bool read_csv(size_t bytes=internals::CSV_CHUNK_SIZE_DEFAULT)
Read a chunk of CSV data.
internals::ColNamesPtr col_names
Pointer to a object containing column information.
void set_col_names(const std::vector< std::string > &)
Sets this reader's column names and associated data.
std::unique_ptr< internals::parser::CSVParserDriverBase > parser
Helper class which actually does the parsing.
size_t n_cols
The number of columns in this CSV.
The all encompassing namespace.