Vince's CSV Parser
Loading...
Searching...
No Matches
csv_row.cpp
Go to the documentation of this file.
1
5#include <cassert>
6#include <functional>
7#include "csv_row.hpp"
8#include "csv_exceptions.hpp"
9
10namespace csv {
11 namespace internals {
12 CSV_INLINE csv::string_view get_trimmed(csv::string_view sv, const WhitespaceMap& ws_flags) noexcept
13 {
14 // Lazy trim only when requested
15 size_t start = 0;
16 while (start < sv.size() && ws_flags[sv[start] + CHAR_OFFSET]) {
17 ++start;
18 }
19
20 size_t end = sv.size();
21 while (end > start && ws_flags[sv[end - 1] + CHAR_OFFSET]) {
22 --end;
23 }
24
25 return sv.substr(start, end - start);
26 }
27 }
28
39 return this->make_field(n, this->data);
40 }
41
50 auto & col_names = this->data->col_names;
51 auto col_pos = col_names->index_of(col_name);
52 if (col_pos > -1) {
53 return this->operator[](col_pos);
54 }
55
56 internals::throw_column_not_found(col_name);
57 }
58 CSV_INLINE CSVRow::operator std::vector<std::string>() const {
59 std::vector<std::string> ret;
60 for (size_t i = 0; i < size(); i++)
61 ret.push_back(std::string(this->get_field(i)));
62
63 return ret;
64 }
65
67 if (!data) return csv::string_view();
68 const csv::string_view full = data->data;
69 if (data_start >= full.size()) return csv::string_view();
70
71 if (data_end != (std::numeric_limits<size_t>::max)()
72 && data_end >= data_start
73 && data_end <= full.size()) {
74 return full.substr(data_start, data_end - data_start);
75 }
76
77 const size_t end = full.find('\n', data_start);
78 const size_t len = (end == csv::string_view::npos)
79 ? (full.size() - data_start)
80 : (end - data_start);
81 return full.substr(data_start, len);
82 }
83
85 CSV_INLINE std::unordered_map<std::string, std::string> CSVRow::to_unordered_map() const {
86 std::unordered_map<std::string, std::string> row_map;
87 row_map.reserve(this->size());
88
89 for (size_t i = 0; i < this->size(); i++) {
90 auto col_name = (*this->data->col_names)[i];
91 row_map[col_name] = this->operator[](i).get<std::string>();
92 }
93
94 return row_map;
95 }
96
98 CSV_INLINE std::unordered_map<std::string, std::string> CSVRow::to_unordered_map(
99 const std::vector<std::string>& subset
100 ) const {
101 std::unordered_map<std::string, std::string> row_map;
102 row_map.reserve(subset.size());
103
104 for (const auto& col_name : subset)
105 row_map[col_name] = this->operator[](col_name).get<std::string>();
106
107 return row_map;
108 }
109
110 CSV_INLINE csv::string_view CSVRow::get_field(size_t index) const
111 {
112 return this->get_field_impl(index, this->data);
113 }
114
115 CSV_INLINE csv::string_view CSVRow::get_field_safe(size_t index, internals::RawCSVDataPtr _data) const
116 {
117 return this->get_field_impl(index, _data);
118 }
119
120 CSV_INLINE CSVField CSVRow::make_field(size_t index, const internals::RawCSVDataPtr& _data) const
121 {
122 const csv::string_view field = this->get_field_impl(index, _data);
123 const size_t field_index = this->fields_start + index;
124 if (_data->has_field_scalars() && field_index < _data->field_scalars.size()) {
125 return CSVField(field, _data->field_scalars[field_index]);
126 }
127
128 return CSVField(field);
129 }
130
131 CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalSymbol) {
132 // If field has already been parsed to empty, no need to do it aagin:
133 if (this->type_ == DataType::CSV_NULL)
134 return false;
135
136 if (this->type_ == DataType::UNKNOWN)
137 this->get_value();
138
139 if (this->type_ == DataType::CSV_NULL)
140 return false;
141
142 if (this->type_ == DataType::CSV_STRING || this->type_ == DataType::CSV_DOUBLE) {
143 double parsed_value = 0;
144 if (!classify_scalar::parse_float(this->sv.data(), this->sv.data() + this->sv.size(), parsed_value, decimalSymbol)) {
145 if (this->type_ == DataType::CSV_DOUBLE)
146 this->type_ = DataType::CSV_STRING;
147 return false;
148 }
149
150 this->cache_parsed_value(DataType::CSV_DOUBLE, parsed_value);
151 }
152
153 // Integral types are not affected by decimalSymbol and need not be parsed again
154
155 // Either we already had an integral type before, or we we just got any numeric type now.
156 if (this->type_ >= DataType::CSV_INT8 && this->type_ <= DataType::CSV_DOUBLE) {
157 dVal = this->numeric_value_as_long_double();
158 return true;
159 }
160
161 // CSV_NULL or CSV_STRING, not numeric
162 return false;
163 }
164
165 CSV_INLINE bool CSVField::try_parse_timestamp(std::uint64_t& out) noexcept {
166 if (this->type_ == DataType::UNKNOWN)
167 this->get_value();
168
169 if (this->type_ == DataType::CSV_TIMESTAMP) {
170 out = this->value_.timestamp;
171 return true;
172 }
173
174 if (this->stores_integral() && this->value_.integer >= 0) {
175 out = static_cast<std::uint64_t>(this->value_.integer);
176 return true;
177 }
178
179 return false;
180 }
181
182#ifdef _MSC_VER
183#pragma region CSVRow Iterator
184#endif
187 return CSVRow::iterator(this, 0);
188 }
189
196 return CSVRow::iterator(this, (int)this->size());
197 }
198
199 CSV_INLINE CSVRow::reverse_iterator CSVRow::rbegin() const noexcept {
200 return std::reverse_iterator<CSVRow::iterator>(this->end());
201 }
202
203 CSV_INLINE CSVRow::reverse_iterator CSVRow::rend() const {
204 return std::reverse_iterator<CSVRow::iterator>(this->begin());
205 }
206
207 CSV_INLINE CSV_NON_NULL(2)
208 CSVRow::iterator::iterator(const CSVRow* _reader, int _i)
209 : daddy(_reader), data(_reader->data), i(_i) {
210 if (_i < (int)this->daddy->size())
211 this->field = std::make_shared<CSVField>(
212 this->daddy->make_field(_i, this->data));
213 else
214 this->field = nullptr;
215 }
216
217 CSV_INLINE CSVRow::iterator::reference CSVRow::iterator::operator*() const {
218 return *(this->field.get());
219 }
220
221 CSV_INLINE CSVRow::iterator::pointer CSVRow::iterator::operator->() const {
222 return this->field;
223 }
224
225 CSV_INLINE CSVRow::iterator& CSVRow::iterator::operator++() {
226 // Pre-increment operator
227 this->i++;
228 if (this->i < (int)this->daddy->size())
229 this->field = std::make_shared<CSVField>(
230 this->daddy->make_field(i, this->data));
231 else // Reached the end of row
232 this->field = nullptr;
233 return *this;
234 }
235
236 CSV_INLINE CSVRow::iterator CSVRow::iterator::operator++(int) {
237 // Post-increment operator
238 auto temp = *this;
239 this->operator++();
240 return temp;
241 }
242
243 CSV_INLINE CSVRow::iterator& CSVRow::iterator::operator--() {
244 // Pre-decrement operator
245 this->i--;
246 this->field = std::make_shared<CSVField>(
247 this->daddy->make_field(this->i, this->data));
248 return *this;
249 }
250
251 CSV_INLINE CSVRow::iterator CSVRow::iterator::operator--(int) {
252 // Post-decrement operator
253 auto temp = *this;
254 this->operator--();
255 return temp;
256 }
257
258 CSV_INLINE CSVRow::iterator CSVRow::iterator::operator+(difference_type n) const {
259 // Allows for iterator arithmetic
260 return CSVRow::iterator(this->daddy, i + (int)n);
261 }
262
263 CSV_INLINE CSVRow::iterator CSVRow::iterator::operator-(difference_type n) const {
264 // Allows for iterator arithmetic
265 return CSVRow::iterator::operator+(-n);
266 }
267
268 CSV_INLINE CSVRow::iterator& CSVRow::iterator::operator+=(difference_type n) {
269 *this = *this + n;
270 return *this;
271 }
272
273 CSV_INLINE CSVRow::iterator& CSVRow::iterator::operator-=(difference_type n) {
274 *this = *this - n;
275 return *this;
276 }
277
278 CSV_INLINE CSVRow::iterator::difference_type CSVRow::iterator::operator-(const iterator& other) const noexcept {
279 return this->i - other.i;
280 }
281#ifdef _MSC_VER
282#pragma endregion CSVRow Iterator
283#endif
284}
Data type representing individual CSV values.
Definition csv_row.hpp:114
bool try_parse_timestamp(std::uint64_t &out) noexcept
Parse this field as Unix milliseconds.
Definition csv_row.cpp:165
bool try_parse_decimal(long double &dVal, const char decimalSymbol='.')
Attempts to parse a decimal (or integer) value using the given symbol, returning true if the value is...
Definition csv_row.cpp:131
T get()
Returns the value casted to the requested type, performing type checking before.
Definition csv_row.hpp:180
A random access iterator over the contents of a CSV row.
Definition csv_row.hpp:636
iterator end() const noexcept
Return an iterator pointing to just after the end of the CSVRow.
Definition csv_row.cpp:195
std::reverse_iterator< iterator > reverse_iterator
A reverse iterator over the contents of a CSVRow.
Definition csv_row.hpp:679
csv::string_view raw_str() const noexcept
Return a string_view of the raw bytes of this row as they appear in the underlying parse buffer,...
Definition csv_row.cpp:66
std::unordered_map< std::string, std::string > to_unordered_map() const
Convert this CSVRow into an unordered map.
Definition csv_row.cpp:85
CONSTEXPR size_t size() const noexcept
Return the number of fields in this row.
Definition csv_row.hpp:565
CSVField operator[](size_t n) const
Return a CSVField object corrsponding to the nth value in the row.
Definition csv_row.cpp:38
iterator begin() const
Return an iterator pointing to the first field.
Definition csv_row.cpp:186
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Definition common.hpp:31
Shared exception message templates and throw helpers.
Defines the data type used for storing information about a CSV row.
The all encompassing namespace.
@ CSV_TIMESTAMP
Timestamp value.
@ CSV_DOUBLE
Floating point value.
@ CSV_NULL
Empty string.
@ CSV_INT8
8-bit integer
@ CSV_STRING
Non-scalar string.
constexpr unsigned CHAR_OFFSET
Offset to convert char into array index.
Definition common.hpp:482
std::string_view string_view
The string_view class used by this library.
Definition common.hpp:174