Vince's CSV Parser
Loading...
Searching...
No Matches
data_type.hpp
Go to the documentation of this file.
1
5#pragma once
6#include <cmath>
7#include <cctype>
8#include <string>
9#include <cassert>
10
11#include "common.hpp"
12
13namespace csv {
20 enum class DataType {
21 UNKNOWN = -1,
22 CSV_NULL,
24 CSV_INT8,
25 CSV_INT16,
26 CSV_INT32,
27 CSV_INT64,
30 };
31
32 static_assert(DataType::CSV_STRING < DataType::CSV_INT8, "String type should come before numeric types.");
33 static_assert(DataType::CSV_INT8 < DataType::CSV_INT64, "Smaller integer types should come before larger integer types.");
34 static_assert(DataType::CSV_INT64 < DataType::CSV_DOUBLE, "Integer types should come before floating point value types.");
35
36 namespace internals {
43 template<typename T>
44 CSV_CONST CONSTEXPR_14
45 long double pow10(const T& n) noexcept {
46 static_assert(std::is_integral<T>::value, "pow10 only supports integral exponents");
47
48 long double multiplicand = n > 0 ? 10 : 0.1,
49 ret = 1;
50
51 // Make all numbers positive
52 T iterations = n > 0 ? n : -n;
53
54 for (T i = 0; i < iterations; i++) {
55 ret *= multiplicand;
56 }
57
58 return ret;
59 }
60
62 template<>
63 CSV_CONST CONSTEXPR_14
64 long double pow10(const unsigned& n) noexcept {
65 long double multiplicand = n > 0 ? 10 : 0.1,
66 ret = 1;
67
68 for (unsigned i = 0; i < n; i++) {
69 ret *= multiplicand;
70 }
71
72 return ret;
73 }
74
75#ifndef DOXYGEN_SHOULD_SKIP_THIS
77 constexpr DataType int_type_arr[8] = {
80 DataType::UNKNOWN,
82 DataType::UNKNOWN,
83 DataType::UNKNOWN,
84 DataType::UNKNOWN,
86 };
87
88 template<typename T>
89 inline DataType type_num() {
90 static_assert(std::is_integral<T>::value, "T should be an integral type.");
91 static_assert(sizeof(T) <= 8, "Byte size must be no greater than 8.");
92 return int_type_arr[sizeof(T) - 1];
93 }
94
95 template<> inline DataType type_num<float>() { return DataType::CSV_DOUBLE; }
96 template<> inline DataType type_num<double>() { return DataType::CSV_DOUBLE; }
97 template<> inline DataType type_num<long double>() { return DataType::CSV_DOUBLE; }
98 template<> inline DataType type_num<std::nullptr_t>() { return DataType::CSV_NULL; }
99 template<> inline DataType type_num<std::string>() { return DataType::CSV_STRING; }
100
101 CONSTEXPR_14 DataType data_type(csv::string_view in, long double* const out = nullptr,
102 const char decimalsymbol = '.');
103#endif
104
111 template<size_t Bytes>
112 CONSTEXPR_14 long double get_int_max() {
113 static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8,
114 "Bytes must be a power of 2 below 8.");
115
116 CSV_MSVC_PUSH_DISABLE(4127)
117 IF_CONSTEXPR (sizeof(signed char) == Bytes) {
118 return (long double)std::numeric_limits<signed char>::max();
119 }
120 else IF_CONSTEXPR (sizeof(short) == Bytes) {
121 return (long double)std::numeric_limits<short>::max();
122 }
123 else IF_CONSTEXPR (sizeof(int) == Bytes) {
124 return (long double)std::numeric_limits<int>::max();
125 }
126 else IF_CONSTEXPR (sizeof(long int) == Bytes) {
127 return (long double)std::numeric_limits<long int>::max();
128 }
129 else {
130 return (long double)std::numeric_limits<long long int>::max();
131 }
132 CSV_MSVC_POP
133 }
134
138 template<size_t Bytes>
139 CONSTEXPR_14 long double get_uint_max() {
140 static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8,
141 "Bytes must be a power of 2 below 8.");
142
143 CSV_MSVC_PUSH_DISABLE(4127)
144 IF_CONSTEXPR(sizeof(unsigned char) == Bytes) {
145 return (long double)std::numeric_limits<unsigned char>::max();
146 }
147 else IF_CONSTEXPR(sizeof(unsigned short) == Bytes) {
148 return (long double)std::numeric_limits<unsigned short>::max();
149 }
150 else IF_CONSTEXPR(sizeof(unsigned int) == Bytes) {
151 return (long double)std::numeric_limits<unsigned int>::max();
152 }
153 else IF_CONSTEXPR(sizeof(unsigned long int) == Bytes) {
154 return (long double)std::numeric_limits<unsigned long int>::max();
155 }
156 else {
157 return (long double)std::numeric_limits<unsigned long long int>::max();
158 }
159 CSV_MSVC_POP
160 }
161
163 CONSTEXPR_VALUE_14 long double CSV_INT8_MAX = get_int_max<1>();
164
166 CONSTEXPR_VALUE_14 long double CSV_INT16_MAX = get_int_max<2>();
167
169 CONSTEXPR_VALUE_14 long double CSV_INT32_MAX = get_int_max<4>();
170
172 CONSTEXPR_VALUE_14 long double CSV_INT64_MAX = get_int_max<8>();
173
175 CONSTEXPR_VALUE_14 long double CSV_UINT8_MAX = get_uint_max<1>();
176
178 CONSTEXPR_VALUE_14 long double CSV_UINT16_MAX = get_uint_max<2>();
179
181 CONSTEXPR_VALUE_14 long double CSV_UINT32_MAX = get_uint_max<4>();
182
184 CONSTEXPR_VALUE_14 long double CSV_UINT64_MAX = get_uint_max<8>();
185
190 CSV_PRIVATE CONSTEXPR_14
192 csv::string_view exponential_part,
193 const long double& coeff,
194 long double * const out) {
195 long double exponent = 0;
196 auto result = data_type(exponential_part, &exponent);
197
198 // Exponents in scientific notation should not be decimal numbers
199 if (result >= DataType::CSV_INT8 && result < DataType::CSV_DOUBLE) {
200 if (out) *out = coeff * pow10(static_cast<long long>(exponent));
202 }
203
205 }
206
210 CSV_PRIVATE CSV_PURE CONSTEXPR_14
211 DataType _determine_integral_type(const long double& number) noexcept {
212 // We can assume number is always non-negative
213 assert(number >= 0);
214
215 if (number <= internals::CSV_INT8_MAX)
216 return DataType::CSV_INT8;
217 else if (number <= internals::CSV_INT16_MAX)
218 return DataType::CSV_INT16;
219 else if (number <= internals::CSV_INT32_MAX)
220 return DataType::CSV_INT32;
221 else if (number <= internals::CSV_INT64_MAX)
222 return DataType::CSV_INT64;
223 else // Conversion to long long will cause an overflow
225 }
226
240 CONSTEXPR_14
241 DataType data_type(csv::string_view in, long double* const out, const char decimalSymbol) {
242 // Empty string --> NULL
243 if (in.size() == 0)
244 return DataType::CSV_NULL;
245
246 bool ws_allowed = true,
247 dot_allowed = true,
248 digit_allowed = true,
249 is_negative = false,
250 has_digit = false,
251 prob_float = false;
252
253 unsigned places_after_decimal = 0;
254 long double integral_part = 0,
255 decimal_part = 0;
256
257 for (size_t i = 0, ilen = in.size(); i < ilen; i++) {
258 const char& current = in[i];
259
260 switch (current) {
261 case ' ':
262 if (!ws_allowed) {
263 if (isdigit(in[i - 1])) {
264 digit_allowed = false;
265 ws_allowed = true;
266 }
267 else {
268 // Ex: '510 123 4567'
270 }
271 }
272 break;
273 case '+':
274 if (!ws_allowed) {
276 }
277
278 break;
279 case '-':
280 if (!ws_allowed) {
281 // Ex: '510-123-4567'
283 }
284
285 is_negative = true;
286 break;
287 // case decimalSymbol: not allowed because decimalSymbol is not a literal,
288 // it is handled in the default block
289 case 'e':
290 case 'E':
291 // Process scientific notation
292 if (prob_float || (i && i + 1 < ilen && isdigit(in[i - 1]))) {
293 size_t exponent_start_idx = i + 1;
294 prob_float = true;
295
296 // Strip out plus sign
297 if (in[i + 1] == '+') {
298 exponent_start_idx++;
299 }
300
302 in.substr(exponent_start_idx),
303 is_negative ? -(integral_part + decimal_part) : integral_part + decimal_part,
304 out
305 );
306 }
307
309 break;
310 default:
311 short digit = static_cast<short>(current - '0');
312 if (digit >= 0 && digit <= 9) {
313 // Process digit
314 has_digit = true;
315
316 if (!digit_allowed)
318 else if (ws_allowed) // Ex: '510 456'
319 ws_allowed = false;
320
321 // Build current number
322 if (prob_float)
323 decimal_part += digit / pow10(++places_after_decimal);
324 else
325 integral_part = (integral_part * 10) + digit;
326 }
327 // case decimalSymbol: not allowed because decimalSymbol is not a literal.
328 else if (dot_allowed && current == decimalSymbol) {
329 dot_allowed = false;
330 prob_float = true;
331 }
332 else {
334 }
335 }
336 }
337
338 // No non-numeric/non-whitespace characters found
339 if (has_digit) {
340 long double number = integral_part + decimal_part;
341 if (out) {
342 *out = is_negative ? -number : number;
343 }
344
345 return prob_float ? DataType::CSV_DOUBLE : _determine_integral_type(number);
346 }
347
348 // Just whitespace
349 return DataType::CSV_NULL;
350 }
351 }
352}
A standalone header file containing shared code.
#define IF_CONSTEXPR
Expands to if constexpr in C++17 and if otherwise.
Definition common.hpp:144
CONSTEXPR_VALUE_14 long double CSV_INT16_MAX
Largest number that can be stored in a 16-bit integer.
CONSTEXPR_VALUE_14 long double CSV_INT32_MAX
Largest number that can be stored in a 32-bit integer.
CONSTEXPR_VALUE_14 long double CSV_UINT16_MAX
Largest number that can be stored in a 16-bit unsigned integer.
CONSTEXPR_14 DataType data_type(csv::string_view in, long double *const out, const char decimalSymbol)
Distinguishes numeric from other text values.
CONSTEXPR_VALUE_14 long double CSV_UINT32_MAX
Largest number that can be stored in a 32-bit unsigned integer.
CSV_CONST CONSTEXPR_14 long double pow10(const T &n) noexcept
Compute 10 to the power of n.
Definition data_type.hpp:45
CONSTEXPR_VALUE_14 long double CSV_INT64_MAX
Largest number that can be stored in a 64-bit integer.
CONSTEXPR_VALUE_14 long double CSV_INT8_MAX
Largest number that can be stored in a 8-bit integer.
CONSTEXPR_VALUE_14 long double CSV_UINT64_MAX
Largest number that can be stored in a 64-bit unsigned integer.
CONSTEXPR_VALUE_14 long double CSV_UINT8_MAX
Largest number that can be stored in a 8-bit ungisned integer.
CONSTEXPR_14 long double get_uint_max()
Given a byte size, return the largest number than can be stored in an unsigned integer of that size.
CSV_PRIVATE CONSTEXPR_14 DataType _process_potential_exponential(csv::string_view exponential_part, const long double &coeff, long double *const out)
Given a pointer to the start of what is start of the exponential part of a number written (possibly) ...
CONSTEXPR_14 long double get_int_max()
Given a byte size, return the largest number than can be stored in an integer of that size.
CSV_PRIVATE CSV_PURE CONSTEXPR_14 DataType _determine_integral_type(const long double &number) noexcept
Given the absolute value of an integer, determine what numeric type it fits in.
The all encompassing namespace.
DataType
Enumerates the different CSV field types that are recognized by this library.
Definition data_type.hpp:20
@ CSV_INT64
64-bit integer (long long on MSVC/GCC)
@ CSV_DOUBLE
Floating point value.
@ CSV_NULL
Empty string.
@ CSV_BIGINT
Value too big to fit in a 64-bit in.
@ CSV_INT16
16-bit integer (short on MSVC/GCC)
@ CSV_INT32
32-bit integer (int on MSVC/GCC)
@ CSV_INT8
8-bit integer
@ CSV_STRING
Non-numeric string.
nonstd::string_view string_view
The string_view class used by this library.
Definition common.hpp:135