Vince's CSV Parser
Loading...
Searching...
No Matches
data_type.hpp
Go to the documentation of this file.
1
5#pragma once
6#include <cmath>
7#include <cctype>
8#include <string>
9#include <cassert>
10
11#include "common.hpp"
12
13namespace csv {
20 enum class DataType {
21 UNKNOWN = -1,
22 CSV_NULL,
24 CSV_INT8,
25 CSV_INT16,
26 CSV_INT32,
27 CSV_INT64,
30 };
31
32 static_assert(DataType::CSV_STRING < DataType::CSV_INT8, "String type should come before numeric types.");
33 static_assert(DataType::CSV_INT8 < DataType::CSV_INT64, "Smaller integer types should come before larger integer types.");
34 static_assert(DataType::CSV_INT64 < DataType::CSV_DOUBLE, "Integer types should come before floating point value types.");
35
36 namespace internals {
38 template<typename T>
39 CSV_CONST CONSTEXPR_14
40 long double pow10(const T& n) noexcept {
41 long double multiplicand = n > 0 ? 10 : 0.1,
42 ret = 1;
43
44 // Make all numbers positive
45 T iterations = n > 0 ? n : -n;
46
47 for (T i = 0; i < iterations; i++) {
49 }
50
51 return ret;
52 }
53
55 template<>
56 CSV_CONST CONSTEXPR_14
57 long double pow10(const unsigned& n) noexcept {
58 long double multiplicand = n > 0 ? 10 : 0.1,
59 ret = 1;
60
61 for (unsigned i = 0; i < n; i++) {
63 }
64
65 return ret;
66 }
67
68#ifndef DOXYGEN_SHOULD_SKIP_THIS
70 constexpr DataType int_type_arr[8] = {
73 DataType::UNKNOWN,
75 DataType::UNKNOWN,
76 DataType::UNKNOWN,
77 DataType::UNKNOWN,
79 };
80
81 template<typename T>
82 inline DataType type_num() {
83 static_assert(std::is_integral<T>::value, "T should be an integral type.");
84 static_assert(sizeof(T) <= 8, "Byte size must be no greater than 8.");
85 return int_type_arr[sizeof(T) - 1];
86 }
87
88 template<> inline DataType type_num<float>() { return DataType::CSV_DOUBLE; }
89 template<> inline DataType type_num<double>() { return DataType::CSV_DOUBLE; }
90 template<> inline DataType type_num<long double>() { return DataType::CSV_DOUBLE; }
91 template<> inline DataType type_num<std::nullptr_t>() { return DataType::CSV_NULL; }
92 template<> inline DataType type_num<std::string>() { return DataType::CSV_STRING; }
93
94 CONSTEXPR_14 DataType data_type(csv::string_view in, long double* const out = nullptr,
95 const char decimalsymbol = '.');
96#endif
97
104 template<size_t Bytes>
105 CONSTEXPR_14 long double get_int_max() {
106 static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8,
107 "Bytes must be a power of 2 below 8.");
108
109 IF_CONSTEXPR (sizeof(signed char) == Bytes) {
110 return (long double)std::numeric_limits<signed char>::max();
111 }
112 else IF_CONSTEXPR (sizeof(short) == Bytes) {
113 return (long double)std::numeric_limits<short>::max();
114 }
115 else IF_CONSTEXPR (sizeof(int) == Bytes) {
116 return (long double)std::numeric_limits<int>::max();
117 }
118 else IF_CONSTEXPR (sizeof(long int) == Bytes) {
119 return (long double)std::numeric_limits<long int>::max();
120 }
121 else {
122 return (long double)std::numeric_limits<long long int>::max();
123 }
124 }
125
129 template<size_t Bytes>
130 CONSTEXPR_14 long double get_uint_max() {
131 static_assert(Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8,
132 "Bytes must be a power of 2 below 8.");
133
134 IF_CONSTEXPR(sizeof(unsigned char) == Bytes) {
135 return (long double)std::numeric_limits<unsigned char>::max();
136 }
137 else IF_CONSTEXPR(sizeof(unsigned short) == Bytes) {
138 return (long double)std::numeric_limits<unsigned short>::max();
139 }
140 else IF_CONSTEXPR(sizeof(unsigned int) == Bytes) {
141 return (long double)std::numeric_limits<unsigned int>::max();
142 }
143 else IF_CONSTEXPR(sizeof(unsigned long int) == Bytes) {
144 return (long double)std::numeric_limits<unsigned long int>::max();
145 }
146 else {
147 return (long double)std::numeric_limits<unsigned long long int>::max();
148 }
149 }
150
152 CONSTEXPR_VALUE_14 long double CSV_INT8_MAX = get_int_max<1>();
153
155 CONSTEXPR_VALUE_14 long double CSV_INT16_MAX = get_int_max<2>();
156
158 CONSTEXPR_VALUE_14 long double CSV_INT32_MAX = get_int_max<4>();
159
161 CONSTEXPR_VALUE_14 long double CSV_INT64_MAX = get_int_max<8>();
162
164 CONSTEXPR_VALUE_14 long double CSV_UINT8_MAX = get_uint_max<1>();
165
167 CONSTEXPR_VALUE_14 long double CSV_UINT16_MAX = get_uint_max<2>();
168
170 CONSTEXPR_VALUE_14 long double CSV_UINT32_MAX = get_uint_max<4>();
171
173 CONSTEXPR_VALUE_14 long double CSV_UINT64_MAX = get_uint_max<8>();
174
179 CSV_PRIVATE CONSTEXPR_14
182 const long double& coeff,
183 long double * const out) {
184 long double exponent = 0;
186
187 // Exponents in scientific notation should not be decimal numbers
189 if (out) *out = coeff * pow10(exponent);
191 }
192
194 }
195
199 CSV_PRIVATE CSV_PURE CONSTEXPR_14
200 DataType _determine_integral_type(const long double& number) noexcept {
201 // We can assume number is always non-negative
202 assert(number >= 0);
203
205 return DataType::CSV_INT8;
207 return DataType::CSV_INT16;
209 return DataType::CSV_INT32;
211 return DataType::CSV_INT64;
212 else // Conversion to long long will cause an overflow
214 }
215
229 CONSTEXPR_14
230 DataType data_type(csv::string_view in, long double* const out, const char decimalSymbol) {
231 // Empty string --> NULL
232 if (in.size() == 0)
233 return DataType::CSV_NULL;
234
235 bool ws_allowed = true,
236 dot_allowed = true,
237 digit_allowed = true,
238 is_negative = false,
239 has_digit = false,
240 prob_float = false;
241
242 unsigned places_after_decimal = 0;
243 long double integral_part = 0,
244 decimal_part = 0;
245
246 for (size_t i = 0, ilen = in.size(); i < ilen; i++) {
247 const char& current = in[i];
248
249 switch (current) {
250 case ' ':
251 if (!ws_allowed) {
252 if (isdigit(in[i - 1])) {
253 digit_allowed = false;
254 ws_allowed = true;
255 }
256 else {
257 // Ex: '510 123 4567'
259 }
260 }
261 break;
262 case '+':
263 if (!ws_allowed) {
265 }
266
267 break;
268 case '-':
269 if (!ws_allowed) {
270 // Ex: '510-123-4567'
272 }
273
274 is_negative = true;
275 break;
276 // case decimalSymbol: not allowed because decimalSymbol is not a literal,
277 // it is handled in the default block
278 case 'e':
279 case 'E':
280 // Process scientific notation
281 if (prob_float || (i && i + 1 < ilen && isdigit(in[i - 1]))) {
282 size_t exponent_start_idx = i + 1;
283 prob_float = true;
284
285 // Strip out plus sign
286 if (in[i + 1] == '+') {
288 }
289
291 in.substr(exponent_start_idx),
293 out
294 );
295 }
296
298 break;
299 default:
300 short digit = static_cast<short>(current - '0');
301 if (digit >= 0 && digit <= 9) {
302 // Process digit
303 has_digit = true;
304
305 if (!digit_allowed)
307 else if (ws_allowed) // Ex: '510 456'
308 ws_allowed = false;
309
310 // Build current number
311 if (prob_float)
313 else
315 }
316 // case decimalSymbol: not allowed because decimalSymbol is not a literal.
317 else if (dot_allowed && current == decimalSymbol) {
318 dot_allowed = false;
319 prob_float = true;
320 }
321 else {
323 }
324 }
325 }
326
327 // No non-numeric/non-whitespace characters found
328 if (has_digit) {
329 long double number = integral_part + decimal_part;
330 if (out) {
331 *out = is_negative ? -number : number;
332 }
333
335 }
336
337 // Just whitespace
338 return DataType::CSV_NULL;
339 }
340 }
341}
A standalone header file containing shared code.
#define IF_CONSTEXPR
Expands to if constexpr in C++17 and if otherwise.
Definition common.hpp:108
CONSTEXPR_VALUE_14 long double CSV_INT16_MAX
Largest number that can be stored in a 16-bit integer.
CONSTEXPR_VALUE_14 long double CSV_INT32_MAX
Largest number that can be stored in a 32-bit integer.
CONSTEXPR_VALUE_14 long double CSV_UINT16_MAX
Largest number that can be stored in a 16-bit unsigned integer.
CONSTEXPR_14 DataType data_type(csv::string_view in, long double *const out, const char decimalSymbol)
Distinguishes numeric from other text values.
CONSTEXPR_VALUE_14 long double CSV_UINT32_MAX
Largest number that can be stored in a 32-bit unsigned integer.
CSV_CONST CONSTEXPR_14 long double pow10(const T &n) noexcept
Compute 10 to the power of n.
Definition data_type.hpp:40
CONSTEXPR_VALUE_14 long double CSV_INT64_MAX
Largest number that can be stored in a 64-bit integer.
CONSTEXPR_VALUE_14 long double CSV_INT8_MAX
Largest number that can be stored in a 8-bit integer.
CONSTEXPR_VALUE_14 long double CSV_UINT64_MAX
Largest number that can be stored in a 64-bit unsigned integer.
CONSTEXPR_VALUE_14 long double CSV_UINT8_MAX
Largest number that can be stored in a 8-bit ungisned integer.
CSV_CONST CONSTEXPR_17 OutArray arrayToDefault(T &&value)
Helper constexpr function to initialize an array with all the elements set to value.
CONSTEXPR_14 long double get_uint_max()
Given a byte size, return the largest number than can be stored in an unsigned integer of that size.
CSV_PRIVATE CONSTEXPR_14 DataType _process_potential_exponential(csv::string_view exponential_part, const long double &coeff, long double *const out)
Given a pointer to the start of what is start of the exponential part of a number written (possibly) ...
CONSTEXPR_14 long double get_int_max()
Given a byte size, return the largest number than can be stored in an integer of that size.
CSV_PRIVATE CSV_PURE CONSTEXPR_14 DataType _determine_integral_type(const long double &number) noexcept
Given the absolute value of an integer, determine what numeric type it fits in.
The all encompassing namespace.
DataType
Enumerates the different CSV field types that are recognized by this library.
Definition data_type.hpp:20
@ CSV_INT64
64-bit integer (long long on MSVC/GCC)
@ CSV_DOUBLE
Floating point value.
@ CSV_NULL
Empty string.
@ CSV_BIGINT
Value too big to fit in a 64-bit in.
@ CSV_INT16
16-bit integer (short on MSVC/GCC)
@ CSV_INT32
32-bit integer (int on MSVC/GCC)
@ CSV_INT8
8-bit integer
@ CSV_STRING
Non-numeric string.
nonstd::string_view string_view
The string_view class used by this library.
Definition common.hpp:99