19#include "../external/mio.hpp"
68 const std::vector<char>& delims = {
',',
'|',
'\t',
';',
'^',
'~' });
119 #ifndef DOXYGEN_SHOULD_SKIP_THIS
120 using value_type =
CSVRow;
121 using difference_type = std::ptrdiff_t;
122 using pointer =
CSVRow * ;
123 using reference =
CSVRow & ;
124 using iterator_category = std::input_iterator_tag;
132 CONSTEXPR_14 reference
operator*() {
return this->row; }
133 CONSTEXPR_14 reference
operator*()
const {
return const_cast<reference
>(this->row); }
137 CONSTEXPR_14 pointer
operator->()
const {
return const_cast<pointer
>(&(this->row)); }
146 return (this->daddy == other.daddy) && (this->i == other.i);
181 template<
typename TStream,
182 csv::enable_if_t<std::is_base_of<std::istream, TStream>::value,
int> = 0>
184 auto head = internals::get_csv_head(source);
188 this->_chunk_size = format.get_chunk_size();
190 if (format.guess_delim()) {
192 format.delimiter(guess_result.delim);
196 if (format.header != -1 || !format.col_names.empty()) {
197 format.header = guess_result.header_row;
199 this->_format = format;
202 if (!format.col_names.empty())
205 this->
parser = std::unique_ptr<Parser>(
207 this->initial_read();
216 if (this->read_csv_worker.joinable()) {
217 this->read_csv_worker.join();
225 CSV_CONST iterator
end() const noexcept;
228 bool eof() const noexcept {
return this->
parser->eof(); }
274 internals::ColNamesPtr
col_names = std::make_shared<internals::ColNames>();
277 std::unique_ptr<internals::IBasicCSVParser>
parser =
nullptr;
294 bool header_trimmed =
false;
298 std::thread read_csv_worker;
300 bool _read_requested =
false;
304 std::exception_ptr read_csv_exception =
nullptr;
305 std::mutex read_csv_exception_lock;
307 void set_read_csv_exception(std::exception_ptr eptr) {
308 std::lock_guard<std::mutex> lock(this->read_csv_exception_lock);
309 this->read_csv_exception = std::move(eptr);
312 std::exception_ptr take_read_csv_exception() {
313 std::lock_guard<std::mutex> lock(this->read_csv_exception_lock);
314 auto eptr = this->read_csv_exception;
315 this->read_csv_exception =
nullptr;
319 void rethrow_read_csv_exception_if_any() {
320 if (
auto eptr = this->take_read_csv_exception()) {
321 std::rethrow_exception(eptr);
326 void initial_read() {
328 this->read_csv_worker.join();
329 this->rethrow_read_csv_exception_if_any();
Contains the main CSV parsing algorithm and various utility functions.
An input iterator capable of handling large files.
CONSTEXPR bool operator==(const iterator &other) const noexcept
Returns true if iterators were constructed from the same CSVReader and point to the same row.
iterator & operator++()
Pre-increment iterator.
CONSTEXPR_14 reference operator*()
Access the CSVRow held by the iterator.
CONSTEXPR_14 pointer operator->()
Return a pointer to the CSVRow the iterator has stopped at.
Main class for parsing CSVs from files and in-memory sources.
CONSTEXPR bool empty() const noexcept
Whether or not the file or stream contains valid CSV rows, not including the header.
bool utf8_bom() const noexcept
Whether or not CSV was prefixed with a UTF-8 bom.
CSVFormat get_format() const
Return the format of the original raw CSV.
CSVReader & operator=(CSVReader &&)=delete
Not movable: contains std::mutex.
int index_of(csv::string_view col_name) const
Return the index of the column name if found or csv::CSV_NOT_FOUND otherwise.
CSVReader & operator=(const CSVReader &)=delete
Not copyable.
CSV_CONST iterator end() const noexcept
A placeholder for the imaginary past the end row in a CSV.
CONSTEXPR size_t n_rows() const noexcept
Retrieves the number of rows that have been read so far.
bool eof() const noexcept
Returns true if we have reached end of file.
bool read_row(CSVRow &row)
Retrieve rows as CSVRow objects, returning true if more rows are available.
std::vector< std::string > get_col_names() const
Return the CSV's column names as a vector of strings.
CSVReader(TStream &source, CSVFormat format=CSVFormat::guess_csv())
Construct CSVReader from std::istream.
CSVReader(CSVReader &&)=delete
Not movable: contains std::mutex.
CSVReader(const CSVReader &)=delete
Not copyable.
iterator begin()
Return an iterator to the first row in the reader.
Data structure for representing CSV rows.
A class for parsing CSV data from a std::stringstream or an std::ifstream
A standalone header file containing shared code.
#define CONSTEXPR
Expands to constexpr in decent compilers and inline otherwise.
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Implements data type parsing functionality.
std::unique_ptr< RowCollection > records
Queue of parsed CSV rows.
size_t _n_rows
How many rows (minus header) have been read so far.
bool read_csv(size_t bytes=internals::ITERATION_CHUNK_SIZE)
Read a chunk of CSV data.
internals::ColNamesPtr col_names
Pointer to a object containing column information.
void set_col_names(const std::vector< std::string > &)
Sets this reader's column names and associated data.
std::unique_ptr< internals::IBasicCSVParser > parser
Helper class which actually does the parsing.
size_t n_cols
The number of columns in this CSV.
constexpr size_t ITERATION_CHUNK_SIZE
Chunk size for lazy-loading large CSV files.
std::vector< std::string > _get_col_names(csv::string_view head, CSVFormat format)
Return a CSV's column names.
std::string format_row(const std::vector< std::string > &row, csv::string_view delim)
CSVGuessResult _guess_format(csv::string_view head, const std::vector< char > &delims)
Guess the delimiter used by a delimiter-separated values file.
CSV_CONST CONSTEXPR_17 OutArray arrayToDefault(T &&value)
Helper constexpr function to initialize an array with all the elements set to value.
The all encompassing namespace.
std::vector< std::string > get_col_names(csv::string_view filename, CSVFormat format)
Return a CSV's column names.
internals::ThreadSafeDeque< CSVRow > RowCollection
Standard type for storing collection of rows.
CSVGuessResult guess_format(csv::string_view filename, const std::vector< char > &delims)
Guess the delimiter used by a delimiter-separated values file.
nonstd::string_view string_view
The string_view class used by this library.
Stores the inferred format of a CSV file.