Vince's CSV Parser
Loading...
Searching...
No Matches
csv_row.hpp
Go to the documentation of this file.
1
5#pragma once
6#include <cmath>
7#include <iterator>
8#include <memory> // For CSVField
9#include <limits> // For CSVField
10#include <mutex>
11#include <unordered_set>
12#include <string>
13#include <sstream>
14#include <vector>
15
16#include "common.hpp"
17#include "data_type.hpp"
18#include "parse_hex.hpp"
19#include "raw_csv_data.hpp"
20
21namespace csv {
22 namespace internals {
23 class IBasicCSVParser;
24
25 static const std::string ERROR_NAN = "Not a number.";
26 static const std::string ERROR_OVERFLOW = "Overflow error.";
27 static const std::string ERROR_FLOAT_TO_INT =
28 "Attempted to convert a floating point value to an integral type.";
29 static const std::string ERROR_NEG_TO_UNSIGNED = "Negative numbers cannot be converted to unsigned types.";
30
31 std::string json_escape_string(csv::string_view s) noexcept;
32 }
33
39 class CSVField {
40 public:
42 constexpr explicit CSVField(csv::string_view _sv) noexcept : sv(_sv) {}
43
44 operator std::string() const {
45 return std::string("<CSVField> ") + std::string(this->sv);
46 }
47
76 template<typename T = std::string> T get() {
77 IF_CONSTEXPR(std::is_arithmetic<T>::value) {
78 // Note: this->type() also converts the CSV value to float
79 if (this->type() <= DataType::CSV_STRING) {
80 throw std::runtime_error(internals::ERROR_NAN);
81 }
82 }
83
84 IF_CONSTEXPR(std::is_integral<T>::value) {
85 // Note: this->is_float() also converts the CSV value to float
86 if (this->is_float()) {
87 throw std::runtime_error(internals::ERROR_FLOAT_TO_INT);
88 }
89
90 IF_CONSTEXPR(std::is_unsigned<T>::value) {
91 if (this->value < 0) {
92 throw std::runtime_error(internals::ERROR_NEG_TO_UNSIGNED);
93 }
94 }
95 }
96
97 // Allow fallthrough from previous if branch
98 IF_CONSTEXPR(!std::is_floating_point<T>::value) {
99 IF_CONSTEXPR(std::is_unsigned<T>::value) {
100 // Quick hack to perform correct unsigned integer boundary checks
101 if (this->value > internals::get_uint_max<sizeof(T)>()) {
102 throw std::runtime_error(internals::ERROR_OVERFLOW);
103 }
104 }
105 else if (internals::type_num<T>() < this->_type) {
106 throw std::runtime_error(internals::ERROR_OVERFLOW);
107 }
108 }
109
110 return static_cast<T>(this->value);
111 }
112
148 template<typename T = std::string>
149 bool try_get(T& out) noexcept {
150 IF_CONSTEXPR(std::is_arithmetic<T>::value) {
151 // Check if value is numeric
152 if (this->type() <= DataType::CSV_STRING) {
153 return false;
154 }
155 }
156
157 IF_CONSTEXPR(std::is_integral<T>::value) {
158 // Check for float-to-int conversion
159 if (this->is_float()) {
160 return false;
161 }
162
163 IF_CONSTEXPR(std::is_unsigned<T>::value) {
164 if (this->value < 0) {
165 return false;
166 }
167 }
168 }
169
170 // Check for overflow
171 IF_CONSTEXPR(!std::is_floating_point<T>::value) {
172 IF_CONSTEXPR(std::is_unsigned<T>::value) {
173 if (this->value > internals::get_uint_max<sizeof(T)>()) {
174 return false;
175 }
176 }
177 else if (internals::type_num<T>() < this->_type) {
178 return false;
179 }
180 }
181
182 out = static_cast<T>(this->value);
183 return true;
184 }
185
189 template<typename T = long long>
190 bool try_parse_hex(T& parsedValue) {
191 static_assert(std::is_integral<T>::value,
192 "try_parse_hex only works with integral types (int, long, long long, etc.)");
193 return internals::try_parse_hex(this->sv, parsedValue);
194 }
195
202 bool try_parse_decimal(long double& dVal, const char decimalSymbol = '.');
203
217 template<typename T>
218 CONSTEXPR_14 bool operator==(T other) const noexcept
219 {
220 static_assert(std::is_arithmetic<T>::value,
221 "T should be a numeric value.");
222
223 if (this->_type != DataType::UNKNOWN) {
224 if (this->_type == DataType::CSV_STRING) {
225 return false;
226 }
227
228 return internals::is_equal(value, static_cast<long double>(other), 0.000001L);
229 }
230
231 long double out = 0;
232 if (internals::data_type(this->sv, &out) == DataType::CSV_STRING) {
233 return false;
234 }
235
236 return internals::is_equal(out, static_cast<long double>(other), 0.000001L);
237 }
238
240 CONSTEXPR csv::string_view get_sv() const noexcept { return this->sv; }
241
243 CONSTEXPR_14 bool is_null() noexcept { return type() == DataType::CSV_NULL; }
244
246 CONSTEXPR_14 bool is_str() noexcept { return type() == DataType::CSV_STRING; }
247
249 CONSTEXPR_14 bool is_num() noexcept { return type() >= DataType::CSV_INT8; }
250
252 CONSTEXPR_14 bool is_int() noexcept {
253 return (type() >= DataType::CSV_INT8) && (type() <= DataType::CSV_INT64);
254 }
255
257 CONSTEXPR_14 bool is_float() noexcept { return type() == DataType::CSV_DOUBLE; }
258
260 CONSTEXPR_14 DataType type() noexcept {
261 this->get_value();
262 return _type;
263 }
264
265 private:
266 long double value = 0;
267 csv::string_view sv = "";
268 DataType _type = DataType::UNKNOWN;
269 CONSTEXPR_14 void get_value() noexcept {
270 /* Check to see if value has been cached previously, if not
271 * evaluate it
272 */
273 if ((int)_type < 0) {
274 this->_type = internals::data_type(this->sv, &this->value);
275 }
276 }
277 };
278
280 class CSVRow {
281 public:
283
284 CSVRow() = default;
285
287 CSVRow(internals::RawCSVDataPtr _data) : data(_data) {}
288 CSVRow(internals::RawCSVDataPtr _data, size_t _data_start, size_t _field_bounds)
289 : data(_data), data_start(_data_start), fields_start(_field_bounds) {}
290 CSVRow(internals::RawCSVDataPtr _data, size_t _data_start, size_t _field_bounds, size_t _row_length)
291 : data(_data), data_start(_data_start), fields_start(_field_bounds), row_length(_row_length) {}
292
294 CONSTEXPR bool empty() const noexcept { return this->size() == 0; }
295
297 CONSTEXPR size_t size() const noexcept { return row_length; }
298
301 CSVField operator[](size_t n) const;
302 CSVField operator[](const std::string&) const;
303 std::string to_json(const std::vector<std::string>& subset = {}) const;
304 std::string to_json_array(const std::vector<std::string>& subset = {}) const;
305
307 std::vector<std::string> get_col_names() const {
308 return this->data->col_names->get_col_names();
309 }
310
314 std::unordered_map<std::string, std::string> to_unordered_map() const;
315
321 std::unordered_map<std::string, std::string> to_unordered_map(
322 const std::vector<std::string>& subset
323 ) const;
324
333 operator std::vector<std::string>() const;
335
339 class iterator {
340 public:
341#ifndef DOXYGEN_SHOULD_SKIP_THIS
342 using value_type = CSVField;
343 using difference_type = int;
344 using pointer = std::shared_ptr<CSVField>;
345 using reference = CSVField & ;
346 using iterator_category = std::random_access_iterator_tag;
347#endif
348 iterator(const CSVRow*, int i);
349
350 reference operator*() const;
351 pointer operator->() const;
352
353 iterator operator++(int);
354 iterator& operator++();
355 iterator operator--(int);
356 iterator& operator--();
357 iterator operator+(difference_type n) const;
358 iterator operator-(difference_type n) const;
359
361 CONSTEXPR bool operator==(const iterator& other) const noexcept {
362 return this->i == other.i;
363 };
364
365 CONSTEXPR bool operator!=(const iterator& other) const noexcept { return !operator==(other); }
366
367#ifndef NDEBUG
368 friend CSVRow;
369#endif
370
371 private:
372 const CSVRow * daddy = nullptr; // Pointer to parent
373 internals::RawCSVDataPtr data = nullptr; // Keep data alive for lifetime of iterator
374 std::shared_ptr<CSVField> field = nullptr; // Current field pointed at
375 int i = 0; // Index of current field
376 };
377
379 using reverse_iterator = std::reverse_iterator<iterator>;
380
385 iterator begin() const;
386 iterator end() const noexcept;
387 reverse_iterator rbegin() const noexcept;
388 reverse_iterator rend() const;
390
391 private:
393 inline csv::string_view get_field_impl(size_t index, const internals::RawCSVDataPtr& _data) const {
395
396 if (index >= this->size())
397 throw std::runtime_error("Index out of bounds.");
398
399 const size_t field_index = this->fields_start + index;
400 auto field = _data->fields[field_index];
401 auto field_str = csv::string_view(_data->data).substr(this->data_start + field.start);
402
403 if (field.has_double_quote) {
404 auto& value = _data->double_quote_fields[field_index];
405 // Double-check locking: minimize lock contention by checking before acquiring lock
406 if (value.empty()) {
407 std::lock_guard<std::mutex> lock(_data->double_quote_init_lock);
408
409 // Check again after acquiring lock in case another thread initialized it
410 if (value.empty()) {
411 bool prev_ch_quote = false;
412 for (size_t i = 0; i < field.length; i++) {
413 if (_data->parse_flags[field_str[i] + CHAR_OFFSET] == ParseFlags::QUOTE) {
414 if (prev_ch_quote) {
415 prev_ch_quote = false;
416 continue;
417 }
418 else {
419 prev_ch_quote = true;
420 }
421 }
422
423 value += field_str[i];
424 }
425 }
426 }
427
428 return csv::string_view(value);
429 }
430
431 return field_str.substr(0, field.length);
432 }
433
435 csv::string_view get_field(size_t index) const;
436
440 csv::string_view get_field_safe(size_t index, internals::RawCSVDataPtr _data) const;
441
442 internals::RawCSVDataPtr data;
443
445 size_t data_start = 0;
446
448 size_t fields_start = 0;
449
451 size_t row_length = 0;
452 };
453
454#ifdef _MSC_VER
455#pragma region CSVField::get Specializations
456#endif
458 template<>
459 inline std::string CSVField::get<std::string>() {
460 return std::string(this->sv);
461 }
462
468 template<>
469 CONSTEXPR_14 csv::string_view CSVField::get<csv::string_view>() {
470 return this->sv;
471 }
472
474 template<>
475 CONSTEXPR_14 long double CSVField::get<long double>() {
476 if (!is_num())
477 throw std::runtime_error(internals::ERROR_NAN);
478
479 return this->value;
480 }
481
483 template<>
484 inline bool CSVField::try_get<std::string>(std::string& out) noexcept {
485 out = std::string(this->sv);
486 return true;
487 }
488
490 template<>
491 CONSTEXPR_14 bool CSVField::try_get<csv::string_view>(csv::string_view& out) noexcept {
492 out = this->sv;
493 return true;
494 }
495
497 template<>
498 CONSTEXPR_14 bool CSVField::try_get<long double>(long double& out) noexcept {
499 if (!is_num())
500 return false;
501
502 out = this->value;
503 return true;
504 }
505#ifdef _MSC_VER
506#pragma endregion CSVField::get Specializations
507#endif
508
510 template<>
511 CONSTEXPR bool CSVField::operator==(const char * other) const noexcept
512 {
513 return this->sv == other;
514 }
515
517 template<>
519 {
520 return this->sv == other;
521 }
522}
523
529inline std::ostream& operator << (std::ostream& os, csv::CSVField const& value) {
530 os << std::string(value);
531 return os;
532}
Data type representing individual CSV values.
Definition csv_row.hpp:39
CONSTEXPR_14 bool is_num() noexcept
Returns true if field is an integer or float.
Definition csv_row.hpp:249
bool try_parse_decimal(long double &dVal, const char decimalSymbol='.')
Attempts to parse a decimal (or integer) value using the given symbol, returning true if the value is...
Definition csv_row.cpp:91
CONSTEXPR_14 bool is_str() noexcept
Returns true if field is a non-numeric, non-empty string.
Definition csv_row.hpp:246
CONSTEXPR_14 bool is_int() noexcept
Returns true if field is an integer.
Definition csv_row.hpp:252
CONSTEXPR_14 bool is_null() noexcept
Returns true if field is an empty string or string of whitespace characters.
Definition csv_row.hpp:243
constexpr CSVField(csv::string_view _sv) noexcept
Constructs a CSVField from a string_view.
Definition csv_row.hpp:42
CONSTEXPR_14 DataType type() noexcept
Return the type of the underlying CSV data.
Definition csv_row.hpp:260
T get()
Returns the value casted to the requested type, performing type checking before.
Definition csv_row.hpp:76
CONSTEXPR_14 bool operator==(T other) const noexcept
Compares the contents of this field to a numeric value.
Definition csv_row.hpp:218
bool try_get(T &out) noexcept
Attempts to retrieve the value as the requested type without throwing exceptions.
Definition csv_row.hpp:149
CONSTEXPR_14 bool is_float() noexcept
Returns true if field is a floating point value.
Definition csv_row.hpp:257
CONSTEXPR csv::string_view get_sv() const noexcept
Return a string view over the field's contents.
Definition csv_row.hpp:240
bool try_parse_hex(T &parsedValue)
Parse a hexadecimal value, returning false if the value is not hex.
Definition csv_row.hpp:190
A random access iterator over the contents of a CSV row.
Definition csv_row.hpp:339
CONSTEXPR bool operator==(const iterator &other) const noexcept
Two iterators are equal if they point to the same field.
Definition csv_row.hpp:361
Data structure for representing CSV rows.
Definition csv_row.hpp:280
iterator end() const noexcept
Return an iterator pointing to just after the end of the CSVRow.
Definition csv_row.cpp:125
std::reverse_iterator< iterator > reverse_iterator
A reverse iterator over the contents of a CSVRow.
Definition csv_row.hpp:379
std::string to_json(const std::vector< std::string > &subset={}) const
Convert a CSV row to a JSON object, i.e.
CONSTEXPR bool empty() const noexcept
Indicates whether row is empty or not.
Definition csv_row.hpp:294
std::string to_json_array(const std::vector< std::string > &subset={}) const
Convert a CSV row to a JSON array, i.e.
std::unordered_map< std::string, std::string > to_unordered_map() const
Convert this CSVRow into an unordered map.
Definition csv_row.cpp:52
CONSTEXPR size_t size() const noexcept
Return the number of fields in this row.
Definition csv_row.hpp:297
std::vector< std::string > get_col_names() const
Retrieve this row's associated column names.
Definition csv_row.hpp:307
CSVField operator[](size_t n) const
Return a CSVField object corrsponding to the nth value in the row.
Definition csv_row.cpp:20
iterator begin() const
Return an iterator pointing to the first field.
Definition csv_row.cpp:116
CSVRow(internals::RawCSVDataPtr _data)
Construct a CSVRow from a RawCSVDataPtr.
Definition csv_row.hpp:287
Abstract base class which provides CSV parsing logic.
A standalone header file containing shared code.
#define IF_CONSTEXPR
Expands to if constexpr in C++17 and if otherwise.
Definition common.hpp:132
#define CONSTEXPR
Expands to constexpr in decent compilers and inline otherwise.
Definition common.hpp:173
std::ostream & operator<<(std::ostream &os, csv::CSVField const &value)
Stream insertion helper for CSVField.
Definition csv_row.hpp:529
Implements data type parsing functionality.
CONSTEXPR_14 DataType data_type(csv::string_view in, long double *const out, const char decimalSymbol)
Distinguishes numeric from other text values.
bool is_equal(T a, T b, T epsilon=0.001)
Definition common.hpp:217
ParseFlags
An enum used for describing the significance of each character with respect to CSV parsing.
Definition common.hpp:229
CSV_CONST CONSTEXPR_17 OutArray arrayToDefault(T &&value)
Helper constexpr function to initialize an array with all the elements set to value.
CONSTEXPR_14 long double get_uint_max()
Given a byte size, return the largest number than can be stored in an unsigned integer of that size.
The all encompassing namespace.
DataType
Enumerates the different CSV field types that are recognized by this library.
Definition data_type.hpp:20
@ CSV_INT64
64-bit integer (long long on MSVC/GCC)
@ CSV_DOUBLE
Floating point value.
@ CSV_NULL
Empty string.
@ CSV_INT8
8-bit integer
@ CSV_STRING
Non-numeric string.
constexpr unsigned CHAR_OFFSET
Offset to convert char into array index.
Definition common.hpp:273
nonstd::string_view string_view
The string_view class used by this library.
Definition common.hpp:123
Implements Functions related to hexadecimal parsing.
Internal data structures for CSV parsing.