Vince's CSV Parser
Loading...
Searching...
No Matches
csv_row.hpp
Go to the documentation of this file.
1
5#pragma once
6#include <cmath>
7#include <iterator>
8#include <memory> // For CSVField
9#include <limits> // For CSVField
10#if !defined(CSV_ENABLE_THREADS) || CSV_ENABLE_THREADS
11#include <mutex>
12#endif
13#include <unordered_set>
14#include <string>
15#include <sstream>
16#include <vector>
17
18#include "common.hpp"
19#ifdef CSV_HAS_CXX20
20#include <ranges>
21#endif
22#include "data_type.hpp"
23#include "parse_hex.hpp"
24#include "raw_csv_data.hpp"
25
26#if CSV_ENABLE_THREADS
27#define CSV_INIT_WITH_OPTIONAL_DCL(data_ref, value_ref, ...) \
28 do { \
29 if ((value_ref).empty()) { \
30 std::lock_guard<std::mutex> lock((data_ref).double_quote_init_lock); \
31 if ((value_ref).empty()) { \
32 __VA_ARGS__ \
33 } \
34 } \
35 } while (0)
36#else
37#define CSV_INIT_WITH_OPTIONAL_DCL(data_ref, value_ref, ...) \
38 do { \
39 (void)(data_ref); \
40 if ((value_ref).empty()) { \
41 __VA_ARGS__ \
42 } \
43 } while (0)
44#endif
45
46namespace csv {
47 namespace internals {
48 class IBasicCSVParser;
49
50 static const std::string ERROR_NAN = "Not a number.";
51 static const std::string ERROR_OVERFLOW = "Overflow error.";
52 static const std::string ERROR_FLOAT_TO_INT =
53 "Attempted to convert a floating point value to an integral type.";
54 static const std::string ERROR_NEG_TO_UNSIGNED = "Negative numbers cannot be converted to unsigned types.";
55
56 std::string json_escape_string(csv::string_view s) noexcept;
57
58 // Inside CSVField::get() or wherever you materialize the value
59 csv::string_view get_trimmed(csv::string_view sv, const WhitespaceMap& ws_flags) noexcept;
60 }
61
67 class CSVField {
68 public:
70 constexpr explicit CSVField(csv::string_view _sv) noexcept : sv(_sv) {}
71
72 operator csv::string_view() const noexcept {
73 return this->sv;
74 }
75
76 operator std::string() const {
77 return std::string(this->sv);
78 }
79
108 template<typename T = std::string> T get() {
109 T out{};
110 if (const auto* err = check_convert(out)) throw std::runtime_error(err);
111 return out;
112 }
113
129 template<typename T = std::string>
130 bool try_get(T& out) noexcept {
131 return check_convert(out) == nullptr;
132 }
133
137 template<typename T = long long>
138 bool try_parse_hex(T& parsedValue) {
139 static_assert(std::is_integral<T>::value,
140 "try_parse_hex only works with integral types (int, long, long long, etc.)");
141 return internals::try_parse_hex(this->sv, parsedValue);
142 }
143
150 bool try_parse_decimal(long double& dVal, const char decimalSymbol = '.');
151
165 template<typename T>
166 CONSTEXPR_14 bool operator==(T other) const noexcept
167 {
168 static_assert(std::is_arithmetic<T>::value,
169 "T should be a numeric value.");
170
171 if (this->_type != DataType::UNKNOWN) {
172 if (this->_type == DataType::CSV_STRING) {
173 return false;
174 }
175
176 return internals::is_equal(value, static_cast<long double>(other), 0.000001L);
177 }
178
179 long double out = 0;
180 if (internals::data_type(this->sv, &out) == DataType::CSV_STRING) {
181 return false;
182 }
183
184 return internals::is_equal(out, static_cast<long double>(other), 0.000001L);
185 }
186
188 CONSTEXPR csv::string_view get_sv() const noexcept { return this->sv; }
189
191 CONSTEXPR_14 bool is_null() noexcept { return type() == DataType::CSV_NULL; }
192
194 CONSTEXPR_14 bool is_str() noexcept { return type() == DataType::CSV_STRING; }
195
197 CONSTEXPR_14 bool is_num() noexcept { return type() >= DataType::CSV_INT8; }
198
200 CONSTEXPR_14 bool is_int() noexcept {
201 return (type() >= DataType::CSV_INT8) && (type() <= DataType::CSV_INT64);
202 }
203
205 CONSTEXPR_14 bool is_float() noexcept { return type() == DataType::CSV_DOUBLE; }
206
208 CONSTEXPR_14 DataType type() noexcept {
209 this->get_value();
210 return _type;
211 }
212
213 private:
214 long double value = 0;
215 csv::string_view sv = "";
216 DataType _type = DataType::UNKNOWN;
222 template<typename T>
223 const char* check_convert(T& out) noexcept {
224 IF_CONSTEXPR(std::is_arithmetic<T>::value) {
225 if (this->type() <= DataType::CSV_STRING)
226 return internals::ERROR_NAN.c_str();
227 }
228
229 IF_CONSTEXPR(std::is_integral<T>::value) {
230 if (this->is_float())
231 return internals::ERROR_FLOAT_TO_INT.c_str();
232
233 IF_CONSTEXPR(std::is_unsigned<T>::value) {
234 if (this->value < 0)
235 return internals::ERROR_NEG_TO_UNSIGNED.c_str();
236 }
237 }
238
239 IF_CONSTEXPR(!std::is_floating_point<T>::value) {
240 IF_CONSTEXPR(std::is_unsigned<T>::value) {
241 if (this->value > internals::get_uint_max<sizeof(T)>())
242 return internals::ERROR_OVERFLOW.c_str();
243 }
244 else if (internals::type_num<T>() < this->_type) {
245 return internals::ERROR_OVERFLOW.c_str();
246 }
247 }
248
249 out = static_cast<T>(this->value);
250 return nullptr;
251 }
252
253 CONSTEXPR_14 void get_value() noexcept {
254 /* Check to see if value has been cached previously, if not
255 * evaluate it
256 */
257 if ((int)_type < 0) {
258 this->_type = internals::data_type(this->sv, &this->value);
259 }
260 }
261 };
262
264 class CSVRow {
265 public:
267
268 CSVRow() = default;
269
271 CSVRow(internals::RawCSVDataPtr _data) : data(_data) {}
272 CSVRow(internals::RawCSVDataPtr _data, size_t _data_start, size_t _field_bounds)
273 : data(_data), data_start(_data_start), fields_start(_field_bounds) {}
274 CSVRow(internals::RawCSVDataPtr _data, size_t _data_start, size_t _field_bounds, size_t _row_length)
275 : data(_data), data_start(_data_start), fields_start(_field_bounds), row_length(_row_length) {}
276
278 CONSTEXPR bool empty() const noexcept { return this->size() == 0; }
279
281 CONSTEXPR size_t size() const noexcept { return row_length; }
282
285 CSVField operator[](size_t n) const;
287 std::string to_json(const std::vector<std::string>& subset = {}) const;
288 std::string to_json_array(const std::vector<std::string>& subset = {}) const;
289
291 std::vector<std::string> get_col_names() const {
292 return this->data->col_names->get_col_names();
293 }
294
298 std::unordered_map<std::string, std::string> to_unordered_map() const;
299
301 std::unordered_map<std::string, std::string> to_unordered_map(
302 const std::vector<std::string>& subset
303 ) const;
304
305 #ifdef CSV_HAS_CXX20
307 auto to_sv_range() const {
308 return std::views::iota(size_t{0}, this->size())
309 | std::views::transform([this](size_t i) { return this->get_field(i); });
310 }
311 #endif
312
321 operator std::vector<std::string>() const;
322
330 csv::string_view raw_str() const noexcept;
332
336 class iterator {
337 public:
338#ifndef DOXYGEN_SHOULD_SKIP_THIS
339 using value_type = CSVField;
340 using difference_type = int;
341 using pointer = std::shared_ptr<CSVField>;
342 using reference = CSVField & ;
343 using iterator_category = std::random_access_iterator_tag;
344#endif
345 iterator(const CSVRow*, int i);
346
347 reference operator*() const;
348 pointer operator->() const;
349
350 iterator operator++(int);
351 iterator& operator++();
352 iterator operator--(int);
353 iterator& operator--();
354 iterator operator+(difference_type n) const;
355 iterator operator-(difference_type n) const;
356
358 CONSTEXPR bool operator==(const iterator& other) const noexcept {
359 return this->i == other.i;
360 };
361
362 CONSTEXPR bool operator!=(const iterator& other) const noexcept { return !operator==(other); }
363
364#ifndef NDEBUG
365 friend CSVRow;
366#endif
367
368 private:
369 const CSVRow * daddy = nullptr; // Pointer to parent
370 internals::RawCSVDataPtr data = nullptr; // Keep data alive for lifetime of iterator
371 std::shared_ptr<CSVField> field = nullptr; // Current field pointed at
372 int i = 0; // Index of current field
373 };
374
376 using reverse_iterator = std::reverse_iterator<iterator>;
377
382 iterator begin() const;
383 iterator end() const noexcept;
384 reverse_iterator rbegin() const noexcept;
385 reverse_iterator rend() const;
387
388 private:
390 inline csv::string_view get_field_impl(size_t index, const internals::RawCSVDataPtr& _data) const {
392
393 if (index >= this->size())
394 throw std::runtime_error("Index out of bounds.");
395
396 const size_t field_index = this->fields_start + index;
397 auto field = _data->fields[field_index];
398 auto field_str = csv::string_view(_data->data).substr(this->data_start + field.start, field.length);
399
400 if (field.has_double_quote) {
401 auto& value = _data->double_quote_fields[field_index];
402 CSV_INIT_WITH_OPTIONAL_DCL((*_data), value,
403 bool prev_ch_quote = false;
404 for (size_t i = 0; i < field.length; i++) {
405 if (_data->parse_flags[field_str[i] + CHAR_OFFSET] == ParseFlags::QUOTE) {
406 if (prev_ch_quote) {
407 prev_ch_quote = false;
408 continue;
409 }
410 else {
411 prev_ch_quote = true;
412 }
413 }
414
415 value += field_str[i];
416 }
417 );
418
419 if (_data->has_ws_trimming)
420 return internals::get_trimmed(csv::string_view(value), _data->ws_flags);
421 return value;
422 }
423 else if (_data->has_ws_trimming) {
424 field_str = internals::get_trimmed(field_str, _data->ws_flags);
425 }
426
427 return field_str;
428 }
429
431 csv::string_view get_field(size_t index) const;
432
436 csv::string_view get_field_safe(size_t index, internals::RawCSVDataPtr _data) const;
437
438 internals::RawCSVDataPtr data;
439
441 size_t data_start = 0;
442
444 size_t fields_start = 0;
445
447 size_t row_length = 0;
448 };
449
450#ifdef _MSC_VER
451#pragma region CSVField::get Specializations
452#endif
454 template<>
455 inline std::string CSVField::get<std::string>() {
456 return std::string(this->sv);
457 }
458
464 template<>
465 CONSTEXPR_14 csv::string_view CSVField::get<csv::string_view>() {
466 return this->sv;
467 }
468
470 template<>
471 CONSTEXPR_14 long double CSVField::get<long double>() {
472 if (!is_num())
473 throw std::runtime_error(internals::ERROR_NAN);
474
475 return this->value;
476 }
477
479 template<>
480 inline bool CSVField::try_get<std::string>(std::string& out) noexcept {
481 out = std::string(this->sv);
482 return true;
483 }
484
486 template<>
487 CONSTEXPR_14 bool CSVField::try_get<csv::string_view>(csv::string_view& out) noexcept {
488 out = this->sv;
489 return true;
490 }
491
493 template<>
494 CONSTEXPR_14 bool CSVField::try_get<long double>(long double& out) noexcept {
495 if (!is_num())
496 return false;
497
498 out = this->value;
499 return true;
500 }
501#ifdef _MSC_VER
502#pragma endregion CSVField::get Specializations
503#endif
504
506 template<>
507 CONSTEXPR bool CSVField::operator==(const char * other) const noexcept
508 {
509 return this->sv == other;
510 }
511
513 template<>
514 CONSTEXPR bool CSVField::operator==(csv::string_view other) const noexcept
515 {
516 return this->sv == other;
517 }
518}
519
520#undef CSV_INIT_WITH_OPTIONAL_DCL
Data type representing individual CSV values.
Definition csv_row.hpp:67
CONSTEXPR_14 bool is_num() noexcept
Returns true if field is an integer or float.
Definition csv_row.hpp:197
bool try_parse_decimal(long double &dVal, const char decimalSymbol='.')
Attempts to parse a decimal (or integer) value using the given symbol, returning true if the value is...
Definition csv_row.cpp:113
CONSTEXPR_14 bool is_str() noexcept
Returns true if field is a non-numeric, non-empty string.
Definition csv_row.hpp:194
CONSTEXPR_14 bool is_int() noexcept
Returns true if field is an integer.
Definition csv_row.hpp:200
CONSTEXPR_14 bool is_null() noexcept
Returns true if field is an empty string or string of whitespace characters.
Definition csv_row.hpp:191
constexpr CSVField(csv::string_view _sv) noexcept
Constructs a CSVField from a string_view.
Definition csv_row.hpp:70
CONSTEXPR_14 DataType type() noexcept
Return the type of the underlying CSV data.
Definition csv_row.hpp:208
T get()
Returns the value casted to the requested type, performing type checking before.
Definition csv_row.hpp:108
CONSTEXPR_14 bool operator==(T other) const noexcept
Compares the contents of this field to a numeric value.
Definition csv_row.hpp:166
bool try_get(T &out) noexcept
Non-throwing equivalent of get().
Definition csv_row.hpp:130
CONSTEXPR_14 bool is_float() noexcept
Returns true if field is a floating point value.
Definition csv_row.hpp:205
CONSTEXPR csv::string_view get_sv() const noexcept
Return a string view over the field's contents.
Definition csv_row.hpp:188
bool try_parse_hex(T &parsedValue)
Parse a hexadecimal value, returning false if the value is not hex.
Definition csv_row.hpp:138
A random access iterator over the contents of a CSV row.
Definition csv_row.hpp:336
CONSTEXPR bool operator==(const iterator &other) const noexcept
Two iterators are equal if they point to the same field.
Definition csv_row.hpp:358
Data structure for representing CSV rows.
Definition csv_row.hpp:264
iterator end() const noexcept
Return an iterator pointing to just after the end of the CSVRow.
Definition csv_row.cpp:147
std::reverse_iterator< iterator > reverse_iterator
A reverse iterator over the contents of a CSVRow.
Definition csv_row.hpp:376
std::string to_json(const std::vector< std::string > &subset={}) const
Convert a CSV row to a JSON object, i.e.
CONSTEXPR bool empty() const noexcept
Indicates whether row is empty or not.
Definition csv_row.hpp:278
csv::string_view raw_str() const noexcept
Return a string_view of the raw bytes of this row as they appear in the underlying parse buffer,...
Definition csv_row.cpp:66
std::string to_json_array(const std::vector< std::string > &subset={}) const
Convert a CSV row to a JSON array, i.e.
auto to_sv_range() const
Convert this CSVRow into a std::ranges::input_range of string_views.
Definition csv_row.hpp:307
std::unordered_map< std::string, std::string > to_unordered_map() const
Convert this CSVRow into an unordered map.
Definition csv_row.cpp:78
CONSTEXPR size_t size() const noexcept
Return the number of fields in this row.
Definition csv_row.hpp:281
std::vector< std::string > get_col_names() const
Retrieve this row's associated column names.
Definition csv_row.hpp:291
CSVField operator[](size_t n) const
Return a CSVField object corrsponding to the nth value in the row.
Definition csv_row.cpp:37
iterator begin() const
Return an iterator pointing to the first field.
Definition csv_row.cpp:138
CSVRow(internals::RawCSVDataPtr _data)
Construct a CSVRow view over parsed row storage.
Definition csv_row.hpp:271
Abstract base class which provides CSV parsing logic.
A standalone header file containing shared code.
#define IF_CONSTEXPR
Expands to if constexpr in C++17 and if otherwise.
Definition common.hpp:144
ParseFlags
An enum used for describing the significance of each character with respect to CSV parsing.
Definition common.hpp:250
#define CONSTEXPR
Expands to constexpr in decent compilers and inline otherwise.
Definition common.hpp:187
Implements data type parsing functionality.
The all encompassing namespace.
DataType
Enumerates the different CSV field types that are recognized by this library.
Definition data_type.hpp:20
@ CSV_INT64
64-bit integer (long long on MSVC/GCC)
@ CSV_DOUBLE
Floating point value.
@ CSV_NULL
Empty string.
@ CSV_INT8
8-bit integer
@ CSV_STRING
Non-numeric string.
constexpr unsigned CHAR_OFFSET
Offset to convert char into array index.
Definition common.hpp:299
nonstd::string_view string_view
The string_view class used by this library.
Definition common.hpp:135
Implements Functions related to hexadecimal parsing.
Internal data structures for CSV parsing.