Vince's CSV Parser
Loading...
Searching...
No Matches
common.hpp
Go to the documentation of this file.
1
5#pragma once
6#include <algorithm>
7#include <array>
8#include <cassert>
9#include <cmath>
10#include <cstdlib>
11#include <deque>
12#include <memory>
13#if !defined(CSV_ENABLE_THREADS) || CSV_ENABLE_THREADS
14#include <mutex>
15#endif
16
17#if defined(_WIN32)
18# ifndef WIN32_LEAN_AND_MEAN
19# define WIN32_LEAN_AND_MEAN
20# endif
21# include <windows.h>
22# undef max
23# undef min
24#elif defined(__linux__)
25# include <unistd.h>
26#endif
27
31#define CSV_INLINE
32#include <type_traits>
33
34#if defined(__EMSCRIPTEN__)
35#undef CSV_ENABLE_THREADS
36#define CSV_ENABLE_THREADS 0
37#elif !defined(CSV_ENABLE_THREADS)
38#define CSV_ENABLE_THREADS 1
39#endif
40
41// Minimal portability macros (Hedley subset) with CSV_ prefix.
42#if defined(CSV_CODE_COVERAGE)
43 #define CSV_CONST
44 #define CSV_PURE
45 #define CSV_FORCE_INLINE inline
46 #define CSV_PRIVATE
47 #define CSV_NON_NULL(...)
48#elif defined(__clang__) || defined(__GNUC__)
49 #define CSV_CONST __attribute__((__const__))
50 #define CSV_PURE __attribute__((__pure__))
51 #define CSV_FORCE_INLINE inline __attribute__((__always_inline__))
52 #if defined(_WIN32)
53 #define CSV_PRIVATE
54 #else
55 #define CSV_PRIVATE __attribute__((__visibility__("hidden")))
56 #endif
57 #define CSV_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__)))
58#elif defined(_MSC_VER)
59 #define CSV_CONST
60 #define CSV_PURE
61 #define CSV_FORCE_INLINE __forceinline
62 #define CSV_PRIVATE
63 #define CSV_NON_NULL(...)
64#else
65 #define CSV_CONST
66 #define CSV_PURE
67 #define CSV_FORCE_INLINE inline
68 #define CSV_PRIVATE
69 #define CSV_NON_NULL(...)
70#endif
71
72// MSVC-specific warning suppression helpers. Use __pragma so the macros
73// work inside other macro bodies (where #pragma is not allowed).
74#ifdef _MSC_VER
75# define CSV_MSVC_PUSH_DISABLE(w) __pragma(warning(push)) __pragma(warning(disable: w))
76# define CSV_MSVC_POP __pragma(warning(pop))
77#else
78# define CSV_MSVC_PUSH_DISABLE(w)
79# define CSV_MSVC_POP
80#endif
81
82// This library uses C++ exceptions for error reporting in public APIs.
83#if defined(__cpp_exceptions) || defined(_CPPUNWIND) || defined(__EXCEPTIONS)
84 #define CSV_EXCEPTIONS_ENABLED 1
85#else
86 #define CSV_EXCEPTIONS_ENABLED 0
87#endif
88
89#if !CSV_EXCEPTIONS_ENABLED
90 #error "csv-parser requires C++ exceptions. Enable exception handling (for example, remove -fno-exceptions or use /EHsc)."
91#endif
92
93// Detect C++ standard version BEFORE namespace to properly include string_view
94// MSVC: __cplusplus == 199711L unless /Zc:__cplusplus is set; use _MSVC_LANG instead.
95#if defined(_MSVC_LANG) && _MSVC_LANG > __cplusplus
96# define CSV_CPLUSPLUS _MSVC_LANG
97#else
98# define CSV_CPLUSPLUS __cplusplus
99#endif
100
101#if CSV_CPLUSPLUS >= 202002L
102#define CSV_HAS_CXX20
103#endif
104
105#if CSV_CPLUSPLUS >= 202302L
106#define CSV_HAS_CXX23
107#endif
108
109#if CSV_CPLUSPLUS >= 201703L
110#define CSV_HAS_CXX17
111#endif
112
113#if CSV_CPLUSPLUS >= 201402L
114#define CSV_HAS_CXX14
115#endif
116
117// Annotate intentional switch fallthroughs in parser hot loops without
118// reshaping the control flow just to appease compiler diagnostics.
119#if defined(CSV_HAS_CXX17)
120#define CSV_FALLTHROUGH [[fallthrough]]
121#elif defined(__clang__) && defined(__has_cpp_attribute)
122#if __has_cpp_attribute(clang::fallthrough)
123#define CSV_FALLTHROUGH [[clang::fallthrough]]
124#else
125#define CSV_FALLTHROUGH ((void)0)
126#endif
127#elif defined(__GNUC__) && __GNUC__ >= 7
128#define CSV_FALLTHROUGH __attribute__((fallthrough))
129#else
130#define CSV_FALLTHROUGH ((void)0)
131#endif
132
133// Include string_view BEFORE csv namespace to avoid namespace pollution issues
134#ifdef CSV_HAS_CXX17
135#include <string_view>
136#else
137#include "../external/string_view.hpp"
138#endif
139
140#ifdef CSV_HAS_CXX20
141#include <ranges>
142#endif
143
144namespace csv {
145#ifdef _MSC_VER
146#pragma region Compatibility Macros
147#endif
161// Allows static assertions without specifying a message
162#define STATIC_ASSERT(x) static_assert(x, "Assertion failed")
163
164#ifdef NDEBUG
165 #define CSV_DEBUG_ASSERT(x) ((void)sizeof(x), (void)0)
166#else
167 #define CSV_DEBUG_ASSERT(x) assert(x)
168#endif
169
170#ifdef CSV_HAS_CXX17
174 using string_view = std::string_view;
175#else
179 using string_view = nonstd::string_view;
180#endif
181
182#ifdef CSV_HAS_CXX17
183 #define IF_CONSTEXPR if constexpr
184 #define CONSTEXPR_VALUE constexpr
185
186 #define CONSTEXPR_17 constexpr
187#else
188 #define IF_CONSTEXPR if
189 #define CONSTEXPR_VALUE const
190
191 #define CONSTEXPR_17 inline
192#endif
193
194#ifdef CSV_HAS_CXX14
195 template<bool B, class T = void>
196 using enable_if_t = std::enable_if_t<B, T>;
197
198 #define CONSTEXPR_14 constexpr
199 #define CONSTEXPR_VALUE_14 constexpr
200#else
201 template<bool B, class T = void>
202 using enable_if_t = typename std::enable_if<B, T>::type;
203
204 #define CONSTEXPR_14 inline
205 #define CONSTEXPR_VALUE_14 const
206#endif
207
208 namespace internals {
209 template<bool B, class T = void>
210 using enable_if_t = csv::enable_if_t<B, T>;
211 }
212
213#ifdef CSV_HAS_CXX17
214 template<typename F, typename... Args>
215 using invoke_result_t = typename std::invoke_result<F, Args...>::type;
216#else
217 template<typename F, typename... Args>
218 using invoke_result_t = typename std::result_of<F(Args...)>::type;
219#endif
220
221 template<typename... Ts>
222 using void_t = void;
223
224 template<typename F, typename ReturnType, typename Enable, typename... Args>
225 struct is_invocable_returning_impl : std::false_type {};
226
227 template<typename F, typename ReturnType, typename... Args>
229 F,
230 ReturnType,
231 void_t<invoke_result_t<F, Args...>>,
232 Args...
233 > : std::integral_constant<
234 bool,
235 std::is_convertible<invoke_result_t<F, Args...>, ReturnType>::value
236 > {};
237
238 template<typename F, typename ReturnType, typename... Args>
239 struct is_invocable_returning : is_invocable_returning_impl<F, ReturnType, void, Args...> {};
240
241 // Resolves g++ bug with regard to constexpr methods.
242 // Keep this gated to C++17+, since C++11/14 pedantic mode rejects constexpr
243 // non-static members when the enclosing class is non-literal.
244 // See: https://stackoverflow.com/questions/36489369/constexpr-non-static-member-function-with-non-constexpr-constructor-gcc-clang-d
245#if defined(__GNUC__) && !defined(__clang__)
246 #if defined(CSV_HAS_CXX17) && (((__GNUC__ == 7) && (__GNUC_MINOR__ >= 2)) || (__GNUC__ >= 8))
247 #define CONSTEXPR constexpr
248 #endif
249#else
250 #ifdef CSV_HAS_CXX17
251 #define CONSTEXPR constexpr
252 #endif
253#endif
254
255#ifndef CONSTEXPR
256#define CONSTEXPR inline
257#endif
258
259#ifdef _MSC_VER
260#pragma endregion
261#endif
262
263 namespace internals {
264 template<typename T>
266 private:
267 template<typename U>
268 static auto test(int) -> decltype(
269 std::hash<U>{}(std::declval<const U&>()),
270 std::true_type{}
271 );
272
273 template<typename>
274 static std::false_type test(...);
275
276 public:
277 static constexpr bool value = decltype(test<T>(0))::value;
278 };
279
280 template<typename T>
282 private:
283 template<typename U>
284 static auto test(int) -> decltype(
285 std::declval<const U&>() == std::declval<const U&>(),
286 std::true_type{}
287 );
288
289 template<typename>
290 static std::false_type test(...);
291
292 public:
293 static constexpr bool value = decltype(test<T>(0))::value;
294 };
295
296 template<typename T>
298 public:
299 lazy_shared_ptr() = default;
300 lazy_shared_ptr(const lazy_shared_ptr&) = delete;
301 lazy_shared_ptr& operator=(const lazy_shared_ptr&) = delete;
302
303 lazy_shared_ptr(lazy_shared_ptr&& other) noexcept : value_(std::move(other.value_)) {}
304
305 lazy_shared_ptr& operator=(lazy_shared_ptr&& other) noexcept {
306 if (this != &other) {
307 value_ = std::move(other.value_);
308 }
309
310 return *this;
311 }
312
313 template<typename Factory>
314 T& get_or_create(Factory&& factory) const {
315#if CSV_ENABLE_THREADS
316 std::call_once(init_once_, [this, &factory]() {
317 value_ = factory();
318 });
319#else
320 if (!value_) {
321 value_ = factory();
322 }
323#endif
324 return *value_;
325 }
326
327 T* get() const noexcept {
328 return value_.get();
329 }
330
331 private:
332 mutable std::shared_ptr<T> value_ = nullptr;
333#if CSV_ENABLE_THREADS
334 mutable std::once_flag init_once_;
335#endif
336 };
337
338 #ifdef CSV_HAS_CXX20
339 #ifdef _MSC_VER
340 #pragma region CXX20 Concepts
341 #endif
342
343 template<typename T>
345 std::ranges::input_range<std::remove_reference_t<T>>
346 && std::convertible_to<
347 std::ranges::range_reference_t<std::remove_reference_t<T>>,
349 >;
350
351 template<typename T>
352 concept has_to_sv_range = requires(const std::remove_reference_t<T>& value) {
353 { value.to_sv_range() } -> std::ranges::input_range;
354 requires std::convertible_to<
355 std::ranges::range_reference_t<decltype(value.to_sv_range())>,
357 >;
358 };
359
360 template<typename T>
362
363 template<typename T>
365 std::ranges::input_range<std::remove_reference_t<T>>
366 && csv_row_like<
367 std::ranges::range_reference_t<std::remove_reference_t<T>>
368 >;
369
370 #ifdef _MSC_VER
371 #pragma endregion
372 #endif
373 #endif
374
375 // PAGE_SIZE macro could be already defined by the host system.
376#if defined(PAGE_SIZE)
377#undef PAGE_SIZE
378#endif
379
380// Get operating system specific details
381#if defined(_WIN32)
382 inline int getpagesize() {
383 _SYSTEM_INFO sys_info = {};
384 GetSystemInfo(&sys_info);
385 return std::max(sys_info.dwPageSize, sys_info.dwAllocationGranularity);
386 }
387
388 const int PAGE_SIZE = getpagesize();
389#elif defined(__linux__)
390 const int PAGE_SIZE = getpagesize();
391#else
395 const int PAGE_SIZE = 4096;
396#endif
397
408 constexpr size_t CSV_CHUNK_SIZE_DEFAULT = 10000000; // 10MB
409
415 constexpr size_t CSV_CHUNK_SIZE_FLOOR = 500 * 1024; // 500KB
416
418 constexpr size_t CSV_SPECULATIVE_PARALLEL_MIN_BYTES = 50ull * 1024ull * 1024ull; // 50MB
419
420 template<typename T>
421 inline bool is_equal(T a, T b, T epsilon = 0.001) {
423 static_assert(std::is_floating_point<T>::value, "T must be a floating point type.");
424 return std::abs(a - b) < epsilon;
425 }
426
433 enum class ParseFlags {
435 QUOTE = 2 | 1,
436 NOT_SPECIAL = 4,
437 DELIMITER = 4 | 1,
438 CARRIAGE_RETURN = 4 | 2,
439 NEWLINE = 4 | 2 | 1
440 };
441
444 constexpr ParseFlags quote_escape_flag(ParseFlags flag, bool quote_escape) noexcept {
445 return (ParseFlags)((int)flag & ~((int)ParseFlags::QUOTE * quote_escape));
446 }
447
448 // Assumed to be true by parsing functions: allows for testing
449 // if an item is DELIMITER or NEWLINE with a >= statement
450 STATIC_ASSERT(ParseFlags::DELIMITER < ParseFlags::CARRIAGE_RETURN);
451 STATIC_ASSERT(ParseFlags::DELIMITER < ParseFlags::NEWLINE);
452 STATIC_ASSERT(ParseFlags::CARRIAGE_RETURN < ParseFlags::NEWLINE);
453
459 STATIC_ASSERT(quote_escape_flag(ParseFlags::NOT_SPECIAL, false) == ParseFlags::NOT_SPECIAL);
460 STATIC_ASSERT(quote_escape_flag(ParseFlags::QUOTE, false) == ParseFlags::QUOTE);
461 STATIC_ASSERT(quote_escape_flag(ParseFlags::DELIMITER, false) == ParseFlags::DELIMITER);
462 STATIC_ASSERT(quote_escape_flag(ParseFlags::CARRIAGE_RETURN, false) == ParseFlags::CARRIAGE_RETURN);
463 STATIC_ASSERT(quote_escape_flag(ParseFlags::NEWLINE, false) == ParseFlags::NEWLINE);
464
465 STATIC_ASSERT(quote_escape_flag(ParseFlags::NOT_SPECIAL, true) == ParseFlags::NOT_SPECIAL);
466 STATIC_ASSERT(quote_escape_flag(ParseFlags::QUOTE, true) == ParseFlags::QUOTE_ESCAPE_QUOTE);
467 STATIC_ASSERT(quote_escape_flag(ParseFlags::DELIMITER, true) == ParseFlags::NOT_SPECIAL);
468 STATIC_ASSERT(quote_escape_flag(ParseFlags::CARRIAGE_RETURN, true) == ParseFlags::NOT_SPECIAL);
469 STATIC_ASSERT(quote_escape_flag(ParseFlags::NEWLINE, true) == ParseFlags::NOT_SPECIAL);
470
472 using ParseFlagMap = std::array<ParseFlags, 256>;
473
475 using WhitespaceMap = std::array<bool, 256>;
476 }
477
479 constexpr int CSV_NOT_FOUND = -1;
480
482 constexpr unsigned CHAR_OFFSET = std::numeric_limits<char>::is_signed ? 128 : 0;
483}
std::array< ParseFlags, 256 > ParseFlagMap
An array which maps ASCII chars to a parsing flag.
Definition common.hpp:472
std::array< bool, 256 > WhitespaceMap
An array which maps ASCII chars to a flag indicating if it is whitespace.
Definition common.hpp:475
bool is_equal(T a, T b, T epsilon=0.001)
Definition common.hpp:421
const int PAGE_SIZE
Size of a memory page in bytes.
Definition common.hpp:395
ParseFlags
An enum used for describing the significance of each character with respect to CSV parsing.
Definition common.hpp:433
@ QUOTE_ESCAPE_QUOTE
A quote inside or terminating a quote_escaped field.
@ NOT_SPECIAL
Characters with no special meaning or escaped delimiters and newlines.
@ NEWLINE
Characters which signify a new row.
@ CARRIAGE_RETURN
Characters which signify a carriage return.
@ QUOTE
Characters which may signify a quote escape.
@ DELIMITER
Characters which signify a new field.
constexpr size_t CSV_CHUNK_SIZE_FLOOR
Minimum supported custom chunk size for CSVFormat::chunk_size().
Definition common.hpp:415
constexpr ParseFlags quote_escape_flag(ParseFlags flag, bool quote_escape) noexcept
Transform the ParseFlags given the context of whether or not the current field is quote escaped.
Definition common.hpp:444
constexpr size_t CSV_SPECULATIVE_PARALLEL_MIN_BYTES
Default minimum source size before speculative parallel parsing is considered.
Definition common.hpp:418
constexpr size_t CSV_CHUNK_SIZE_DEFAULT
Default chunk size for lazy-loading large CSV files.
Definition common.hpp:408
The all encompassing namespace.
constexpr int CSV_NOT_FOUND
Integer indicating a requested column wasn't found.
Definition common.hpp:479
constexpr unsigned CHAR_OFFSET
Offset to convert char into array index.
Definition common.hpp:482
std::string_view string_view
The string_view class used by this library.
Definition common.hpp:174