12#include <unordered_set>
22#if defined(__has_include)
23#if __has_include(<expected>)
25#ifdef __cpp_lib_expected
26#define CSV_HAS_STD_EXPECTED
36#include "../external/classify_scalar.hpp"
42 template<
typename RowSink,
typename ParsePolicy,
typename FieldPolicy,
typename RowPolicy>
44 struct CSVRowRowPolicy;
46 class CSVParserDriverBase;
48 namespace speculative {
49 struct CSVRowFragment;
52 static const std::string ERROR_NAN =
"Not a number.";
53 static const std::string ERROR_OVERFLOW =
"Overflow error.";
54 static const std::string ERROR_FLOAT_TO_INT =
55 "Attempted to convert a floating point value to an integral type.";
56 static const std::string ERROR_NEG_TO_UNSIGNED =
"Negative numbers cannot be converted to unsigned types.";
90 typedef const char* csv_error_message;
92 static CONSTEXPR_VALUE_14 csv_error_message CSV_CONVERSION_ERROR_MESSAGES[] = {
96 "Attempted to convert a floating point value to an integral type.",
97 "Negative numbers cannot be converted to unsigned types."
103 const size_t index =
static_cast<size_t>(error);
104 return index < (
sizeof(internals::CSV_CONVERSION_ERROR_MESSAGES) /
sizeof(internals::CSV_CONVERSION_ERROR_MESSAGES[0]))
105 ? internals::CSV_CONVERSION_ERROR_MESSAGES[index]
123 switch (scalar.type) {
128 this->value_.integer = scalar.integer;
132 this->value_.floating = scalar.floating;
135 this->value_.timestamp = scalar.timestamp;
138 this->value_.boolean = scalar.boolean;
149 operator std::string()
const {
150 return std::string(this->sv);
180 template<
typename T = std::
string> T
get() {
187#ifdef CSV_HAS_STD_EXPECTED
199 template<
typename T = std::
string>
200 std::expected<T, CSVConversionError>
as() {
204 ? std::expected<T, CSVConversionError>(std::unexpected(err))
205 : std::expected<T, CSVConversionError>(out);
224 template<
typename T = std::
string>
239 operator std::optional<T>() {
241 return try_get(out) ? std::optional<T>(out) : std::nullopt;
248 template<
typename T =
long long>
250 static_assert(std::is_integral<T>::value,
251 "try_parse_hex only works with integral types (int, long, long long, etc.)");
253 return classify_scalar::parse_hex(this->sv.data(), this->sv.data() + this->sv.size(), parsedValue);
272#ifdef DOXYGEN_SHOULD_SKIP_THIS
277 internals::enable_if_t<
278 std::is_integral<T>::value && std::is_unsigned<T>::value && !std::is_same<T, bool>::value
279 && (
sizeof(T) >=
sizeof(std::uint64_t)),
283 std::uint64_t milliseconds = 0;
287 out =
static_cast<T
>(milliseconds);
293 template<
typename Rep,
typename Period>
295 std::uint64_t milliseconds = 0;
299 out = std::chrono::duration_cast<std::chrono::duration<Rep, Period>>(
300 std::chrono::milliseconds(milliseconds));
305 template<
typename Duration>
307 std::uint64_t milliseconds = 0;
311 out = std::chrono::time_point<std::chrono::system_clock, Duration>(
312 std::chrono::duration_cast<Duration>(std::chrono::milliseconds(milliseconds)));
332 static_assert(std::is_arithmetic<T>::value,
333 "T should be a numeric value.");
335 const_cast<CSVField*
>(
this)->get_value();
340 return internals::is_equal(this->numeric_value_as_long_double(),
static_cast<long double>(other), 0.000001L);
381#if defined(__GNUC__) && !defined(__clang__)
383 constexpr FieldValue() noexcept
384 : integer(0), floating(0), timestamp(0),
boolean(false) {}
386 std::int64_t integer;
387 long double floating;
388 std::uint64_t timestamp;
393 constexpr FieldValue() noexcept : floating(0) {}
395 std::int64_t integer;
396 long double floating;
397 std::uint64_t timestamp;
402 struct FieldValueOutput {
405 template<
classify_scalar::ScalarKind Kind>
406 typename std::enable_if<
407 Kind == classify_scalar::scalar_int8
408 || Kind == classify_scalar::scalar_int16
409 || Kind == classify_scalar::scalar_int32
410 || Kind == classify_scalar::scalar_int64,
411 void>
::type set(std::int64_t parsed)
const noexcept {
412 value.integer = parsed;
415 template<
classify_scalar::ScalarKind Kind>
416 typename std::enable_if<Kind == classify_scalar::scalar_float, void>::type set(
long double parsed)
const noexcept {
417 value.floating = parsed;
420 template<
classify_scalar::ScalarKind Kind>
421 typename std::enable_if<Kind == classify_scalar::scalar_bool, void>::type set(
bool parsed)
const noexcept {
422 value.boolean = parsed;
425 template<
classify_scalar::ScalarKind Kind>
426 typename std::enable_if<Kind == classify_scalar::scalar_timestamp, void>::type set(std::uint64_t parsed)
const noexcept {
427 value.timestamp = parsed;
435 CONSTEXPR_14
bool stores_integral() const noexcept {
439 CONSTEXPR_14
long double numeric_value_as_long_double() const noexcept {
440 return stores_integral()
441 ?
static_cast<long double>(value_.integer)
445 CONSTEXPR_14
void cache_parsed_value(
DataType parsed_type,
long double parsed_value)
noexcept {
449 value_.integer =
static_cast<std::int64_t
>(parsed_value);
452 value_.floating = parsed_value;
463 out = this->value_.boolean;
467 template<
typename Rep,
typename Period>
472 out = std::chrono::duration_cast<std::chrono::duration<Rep, Period>>(
473 std::chrono::milliseconds(this->value_.timestamp));
477 template<
typename Duration>
478 CSVConversionError check_convert(std::chrono::time_point<std::chrono::system_clock, Duration>& out)
noexcept {
482 out = std::chrono::time_point<std::chrono::system_clock, Duration>(
483 std::chrono::duration_cast<Duration>(std::chrono::milliseconds(this->value_.timestamp)));
501 if (this->numeric_value_as_long_double() < 0)
507 const long double value = this->numeric_value_as_long_double();
508 if (value <
static_cast<long double>(std::numeric_limits<T>::min())
509 || value >
static_cast<long double>(std::numeric_limits<T>::max())) {
514 out = this->stores_integral()
515 ?
static_cast<T
>(this->value_.integer)
516 : static_cast<T>(this->value_.floating);
521 inline void get_value() noexcept {
522 if ((
int)type_ < 0) {
523 if (this->sv.empty()) {
528 const char* first = this->sv.data();
529 const char* last = first + this->sv.size();
530 typedef classify_scalar::policy_pack<
531 classify_scalar::builtin_numeric_policy<
'.',
false>,
532 classify_scalar::builtin_timestamp_policy,
533 classify_scalar::builtin_bool_policy
534 > csv_field_policy_pack;
536 type_ = classify_scalar::classify_scalar<
538 true>(first, last, FieldValueOutput{ this->value_ }, csv_field_policy_pack());
546 template<
typename RowSink,
typename ParsePolicy,
typename FieldPolicy,
typename RowPolicy>
548 friend struct internals::CSVRowRowPolicy;
549 friend internals::parser::CSVParserDriverBase;
550 friend struct internals::speculative::CSVRowFragment;
555 CSVRow(internals::RawCSVDataPtr _data) : data(_data) {}
556 CSVRow(internals::RawCSVDataPtr _data,
size_t _data_start,
size_t _field_bounds)
557 : data(_data), data_start(_data_start), fields_start(_field_bounds) {}
558 CSVRow(internals::RawCSVDataPtr _data,
size_t _data_start,
size_t _field_bounds,
size_t _row_length)
559 : data(_data), data_start(_data_start), fields_start(_field_bounds), row_length(_row_length) {}
571 inline std::string to_json(
const std::vector<std::string>& subset = {})
const {
572 const auto* converter = this->get_json_converter();
573 return converter ==
nullptr ?
"{}"
574 : converter->row_to_json(this->
size(), [
this](
size_t i) {
return this->get_field(i); }, subset);
576 inline std::string to_json_array(
const std::vector<std::string>& subset = {})
const {
577 const auto* converter = this->get_json_converter();
578 return converter ==
nullptr ?
"[]"
579 : converter->row_to_json_array(this->
size(), [
this](
size_t i) {
return this->get_field(i); }, subset);
584 return this->data->col_names->get_col_names();
589 return this->data->col_names;
599 const std::vector<std::string>& subset
608 return std::views::iota(
size_t{0}, this->
size())
609 | std::views::transform([
this](
size_t i) {
return this->get_field(i); });
621 operator std::vector<std::string>()
const;
638#ifndef DOXYGEN_SHOULD_SKIP_THIS
640 using difference_type = int;
641 using pointer = std::shared_ptr<CSVField>;
643 using iterator_category = std::random_access_iterator_tag;
647 reference operator*()
const;
648 pointer operator->()
const;
654 iterator operator+(difference_type n)
const;
655 iterator operator-(difference_type n)
const;
656 iterator& operator+=(difference_type n);
657 iterator& operator-=(difference_type n);
658 difference_type operator-(
const iterator& other)
const noexcept;
662 return this->i == other.i;
665 CONSTEXPR bool operator!=(
const iterator& other)
const noexcept {
return !operator==(other); }
672 const CSVRow * daddy =
nullptr;
673 internals::RawCSVDataPtr data =
nullptr;
674 std::shared_ptr<CSVField> field =
nullptr;
693 inline
csv::
string_view get_field_impl(
size_t index, const internals::RawCSVDataPtr& _data)
const {
694 if (index >= this->
size())
695 throw std::runtime_error(internals::CSV_ERROR_INDEX_OUT_OF_BOUNDS);
697 const size_t field_index = this->fields_start + index;
698 const auto field = _data->fields[field_index];
700 if (field.has_realized_storage()) {
701 field_str = _data->quote_arena.view(field.start, field.length);
704 field_str =
csv::string_view(_data->data).substr(this->data_start + field.start, field.length);
707 if (_data->has_ws_trimming) {
708 field_str = internals::get_trimmed(field_str, _data->ws_flags);
714 CSVField make_field(
size_t index,
const internals::RawCSVDataPtr& _data)
const;
722 csv::string_view get_field_safe(
size_t index, internals::RawCSVDataPtr _data)
const;
724 const internals::JsonConverter* get_json_converter()
const {
725 if (this->data.get() ==
nullptr) {
729 return &this->data->json_converter.get_or_create([
this]() {
730 const std::vector<std::string> columns = this->data->col_names
731 ? this->data->col_names->get_col_names()
732 : std::vector<std::string>();
733 return std::make_shared<internals::JsonConverter>(columns);
737 internals::RawCSVDataPtr data;
740 size_t data_start = 0;
743 size_t fields_start = 0;
746 size_t row_length = 0;
749 size_t data_end = (std::numeric_limits<size_t>::max)();
753#pragma region CSVField::get Specializations
757 inline std::string CSVField::get<std::string>() {
758 return std::string(this->sv);
773 inline long double CSVField::get<long double>() {
775 throw std::runtime_error(internals::ERROR_NAN);
777 return this->numeric_value_as_long_double();
782 inline bool CSVField::try_get<std::string>(std::string& out)
noexcept {
783 out = std::string(this->sv);
796 inline bool CSVField::try_get<long double>(
long double& out)
noexcept {
800 out = this->numeric_value_as_long_double();
804#pragma endregion CSVField::get Specializations
811 return this->sv == other;
818 return this->sv == other;
Data type representing individual CSV values.
bool try_parse_timestamp(std::uint64_t &out) noexcept
Parse this field as Unix milliseconds.
bool try_parse_decimal(long double &dVal, const char decimalSymbol='.')
Attempts to parse a decimal (or integer) value using the given symbol, returning true if the value is...
DataType type() noexcept
Return the type of the underlying CSV data.
bool try_parse_timestamp(std::chrono::duration< Rep, Period > &out) noexcept
Parse this field as a timestamp duration since the Unix epoch.
bool operator==(T other) const noexcept
Compares the contents of this field to a numeric value.
std::expected< T, CSVConversionError > as()
Return this field as T, preserving conversion failure as CSVConversionError.
bool is_bool() noexcept
Returns true if field is a boolean value.
bool is_str() noexcept
Returns true if field is a non-numeric, non-empty string.
constexpr CSVField(csv::string_view _sv) noexcept
Constructs a CSVField from a string_view.
T get()
Returns the value casted to the requested type, performing type checking before.
bool try_parse_timestamp(T &out) noexcept
Parse this field as Unix milliseconds in a 64-bit unsigned integer.
bool try_get(T &out) noexcept
Non-throwing equivalent of get().
bool is_null() noexcept
Returns true if field is an empty string or string of whitespace characters.
CONSTEXPR csv::string_view get_sv() const noexcept
Return a string view over the field's contents.
bool is_float() noexcept
Returns true if field is a floating point value.
bool try_parse_timestamp(std::chrono::time_point< std::chrono::system_clock, Duration > &out) noexcept
Parse this field as a std::chrono::system_clock time point.
bool is_num() noexcept
Returns true if field is an integer or float.
bool is_int() noexcept
Returns true if field is an integer.
bool is_timestamp() noexcept
Returns true if field is a timestamp value.
bool try_parse_hex(T &parsedValue)
Parse a hexadecimal value, returning false if the value is not hex.
A random access iterator over the contents of a CSV row.
CONSTEXPR bool operator==(const iterator &other) const noexcept
Two iterators are equal if they point to the same field.
Data structure for representing CSV rows.
iterator end() const noexcept
Return an iterator pointing to just after the end of the CSVRow.
std::reverse_iterator< iterator > reverse_iterator
A reverse iterator over the contents of a CSVRow.
CONSTEXPR bool empty() const noexcept
Indicates whether row is empty or not.
csv::string_view raw_str() const noexcept
Return a string_view of the raw bytes of this row as they appear in the underlying parse buffer,...
const std::vector< std::string > & get_col_names() const
Retrieve this row's associated column names.
auto to_sv_range() const
Convert this CSVRow into a std::ranges::input_range of string_views.
std::unordered_map< std::string, std::string > to_unordered_map() const
Convert this CSVRow into an unordered map.
CONSTEXPR size_t size() const noexcept
Return the number of fields in this row.
CSVField operator[](size_t n) const
Return a CSVField object corrsponding to the nth value in the row.
internals::ConstColNamesPtr col_names_ptr() const noexcept
Internal accessor for preserving resolved column-name lookup policy across helper types.
iterator begin() const
Return an iterator pointing to the first field.
CSVRow(internals::RawCSVDataPtr _data)
Construct a CSVRow view over parsed row storage.
A standalone header file containing shared code.
#define IF_CONSTEXPR
Expands to if constexpr in C++17 and if otherwise.
#define CONSTEXPR
Expands to constexpr in decent compilers and inline otherwise.
Shared exception message templates and throw helpers.
CSV scalar type classification adapter.
Internal JSON serialization helpers for row-like CSV data.
The all encompassing namespace.
DataType
Enumerates the different CSV field types recognized by this library.
@ CSV_TIMESTAMP
Timestamp value.
@ CSV_INT64
64-bit integer
@ CSV_DOUBLE
Floating point value.
@ CSV_BIGINT
Integer too large to fit in 64 bits.
@ CSV_INT16
16-bit integer
@ CSV_INT32
32-bit integer
@ CSV_STRING
Non-scalar string.
CSVConversionError
Non-throwing CSVField conversion result.
@ FloatToInt
A floating point field was requested as an integral type.
@ Overflow
The parsed value does not fit in the requested target type.
@ NegativeToUnsigned
A negative value was requested as an unsigned type.
@ None
Conversion succeeded.
@ NotANumber
The field is not compatible with the requested target type.
const char * csv_conversion_error_message(CSVConversionError error) noexcept
Return a stable human-readable description for a CSVConversionError.
std::string_view string_view
The string_view class used by this library.
Internal data structures for CSV parsing.
Cached scalar classification and parsed value for one CSV field.