11 std::stringstream
ret;
12 for (
size_t i = 0; i < row.size(); i++) {
14 if (i + 1 < row.size())
ret << delim;
30 auto trim_chars =
format.get_trim_chars();
35 parser.set_output(rows);
43 std::unordered_map<size_t, size_t>
row_tally = { { 0, 0 } };
46 std::unordered_map<size_t, size_t>
row_when = { { 0, 0 } };
53 parser.set_output(rows);
56 for (
size_t i = 0; i < rows.size(); i++) {
72 size_t header_row = 0;
139 auto head = internals::get_csv_head(filename);
142 if (format.guess_delim()) {
143 auto guess_result =
guess_format(filename, format.get_possible_delims());
152 auto head = internals::get_csv_head(filename);
168 auto head = internals::get_csv_head(filename);
171 this->_chunk_size = format.get_chunk_size();
173 if (format.guess_delim()) {
179 if (format.header != -1 || !format.col_names.empty()) {
180 format.header = guess_result.header_row;
183 this->_format = format;
186 if (!format.col_names.empty())
189 this->
parser = std::unique_ptr<Parser>(
new Parser(filename, format, this->
col_names));
190 this->initial_read();
200 new_format.col_names = this->
col_names->get_col_names();
201 new_format.header = this->_format.header;
212 return std::vector<std::string>();
220 for (
size_t i = 0; i < _col_names.size(); i++)
221 if (_col_names[i] == col_name)
return (
int)i;
227 if (!this->header_trimmed) {
228 for (
int i = 0; i <= this->_format.header && !this->
records->empty(); i++) {
229 if (i == this->_format.header && this->col_names->empty()) {
237 this->header_trimmed =
true;
247 this->
n_cols = names.size();
278 this->
parser->next(bytes);
280 if (!this->header_trimmed) {
287 this->set_read_csv_exception(std::current_exception());
313 if (this->
records->is_waitable()) {
320 if (this->read_csv_worker.joinable())
321 this->read_csv_worker.join();
324 this->rethrow_read_csv_exception_if_any();
336 if (this->_read_requested && this->
records->empty()) {
337 throw std::runtime_error(
338 "End of file not reached and no more records parsed. "
339 "This likely indicates a CSV row larger than the chunk size of " +
340 std::to_string(this->_chunk_size) +
" bytes. "
341 "Use CSVFormat::chunk_size() to increase the chunk size."
350 this->_read_requested =
true;
353 else if (this->
records->front().size() != this->n_cols &&
354 this->_format.variable_column_policy != VariableColumnPolicy::KEEP) {
355 auto errored_row = this->
records->pop_front();
357 if (this->_format.variable_column_policy == VariableColumnPolicy::THROW) {
358 if (errored_row.size() < this->n_cols)
365 row = this->
records->pop_front();
367 this->_read_requested =
false;
CSVFormat get_format() const
Return the format of the original raw CSV.
int index_of(csv::string_view col_name) const
Return the index of the column name if found or csv::CSV_NOT_FOUND otherwise.
bool read_row(CSVRow &row)
Retrieve rows as CSVRow objects, returning true if more rows are available.
std::vector< std::string > get_col_names() const
Return the CSV's column names as a vector of strings.
CSVReader(csv::string_view filename, CSVFormat format=CSVFormat::guess_csv())
Construct CSVReader from filename using memory-mapped I/O.
Data structure for representing CSV rows.
Parser for memory-mapped files.
A class for parsing CSV data from a std::stringstream or an std::ifstream
void next(size_t bytes=ITERATION_CHUNK_SIZE) override
Parse the next block of data.
A std::deque wrapper which allows multiple read and write threads to concurrently access it along wit...
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Defines functionality needed for basic CSV parsing.
std::unique_ptr< RowCollection > records
Queue of parsed CSV rows.
size_t _n_rows
How many rows (minus header) have been read so far.
bool read_csv(size_t bytes=internals::ITERATION_CHUNK_SIZE)
Read a chunk of CSV data.
internals::ColNamesPtr col_names
Pointer to a object containing column information.
void set_col_names(const std::vector< std::string > &)
Sets this reader's column names and associated data.
std::unique_ptr< internals::IBasicCSVParser > parser
Helper class which actually does the parsing.
size_t n_cols
The number of columns in this CSV.
std::vector< std::string > _get_col_names(csv::string_view head, CSVFormat format)
Return a CSV's column names.
std::string format_row(const std::vector< std::string > &row, csv::string_view delim)
CSVGuessResult _guess_format(csv::string_view head, const std::vector< char > &delims)
Guess the delimiter used by a delimiter-separated values file.
CSV_CONST CONSTEXPR_17 OutArray arrayToDefault(T &&value)
Helper constexpr function to initialize an array with all the elements set to value.
The all encompassing namespace.
std::vector< std::string > get_col_names(csv::string_view filename, CSVFormat format)
Return a CSV's column names.
internals::ThreadSafeDeque< CSVRow > RowCollection
Standard type for storing collection of rows.
constexpr int CSV_NOT_FOUND
Integer indicating a requested column wasn't found.
CSVGuessResult guess_format(csv::string_view filename, const std::vector< char > &delims)
Guess the delimiter used by a delimiter-separated values file.
nonstd::string_view string_view
The string_view class used by this library.
Stores the inferred format of a CSV file.