Vince's CSV Parser
Loading...
Searching...
No Matches
common.hpp
Go to the documentation of this file.
1
5#pragma once
6#include <algorithm>
7#include <array>
8#include <cassert>
9#include <cmath>
10#include <cstdint>
11#include <cstdlib>
12#include <deque>
13#include <limits>
14#include <memory>
15#if !defined(CSV_ENABLE_THREADS) || CSV_ENABLE_THREADS
16#include <mutex>
17#endif
18
19#if defined(_WIN32)
20# ifndef WIN32_LEAN_AND_MEAN
21# define WIN32_LEAN_AND_MEAN
22# endif
23# include <windows.h>
24# undef max
25# undef min
26#elif defined(__linux__)
27# include <unistd.h>
28#endif
29
33#define CSV_INLINE
34#include <type_traits>
35
36#if defined(__EMSCRIPTEN__)
37#undef CSV_ENABLE_THREADS
38#define CSV_ENABLE_THREADS 0
39#elif !defined(CSV_ENABLE_THREADS)
40#define CSV_ENABLE_THREADS 1
41#endif
42
43// Minimal portability macros (Hedley subset) with CSV_ prefix.
44#if defined(CSV_CODE_COVERAGE)
45 #define CSV_CONST
46 #define CSV_PURE
47 #define CSV_FORCE_INLINE inline
48 #define CSV_PRIVATE
49 #define CSV_NON_NULL(...)
50#elif defined(__clang__) || defined(__GNUC__)
51 #define CSV_CONST __attribute__((__const__))
52 #define CSV_PURE __attribute__((__pure__))
53 #define CSV_FORCE_INLINE inline __attribute__((__always_inline__))
54 #if defined(_WIN32)
55 #define CSV_PRIVATE
56 #else
57 #define CSV_PRIVATE __attribute__((__visibility__("hidden")))
58 #endif
59 #define CSV_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__)))
60#elif defined(_MSC_VER)
61 #define CSV_CONST
62 #define CSV_PURE
63 #define CSV_FORCE_INLINE __forceinline
64 #define CSV_PRIVATE
65 #define CSV_NON_NULL(...)
66#else
67 #define CSV_CONST
68 #define CSV_PURE
69 #define CSV_FORCE_INLINE inline
70 #define CSV_PRIVATE
71 #define CSV_NON_NULL(...)
72#endif
73
74// MSVC-specific warning suppression helpers. Use __pragma so the macros
75// work inside other macro bodies (where #pragma is not allowed).
76#ifdef _MSC_VER
77# define CSV_MSVC_PUSH_DISABLE(w) __pragma(warning(push)) __pragma(warning(disable: w))
78# define CSV_MSVC_POP __pragma(warning(pop))
79#else
80# define CSV_MSVC_PUSH_DISABLE(w)
81# define CSV_MSVC_POP
82#endif
83
84// This library uses C++ exceptions for error reporting in public APIs.
85#if defined(__cpp_exceptions) || defined(_CPPUNWIND) || defined(__EXCEPTIONS)
86 #define CSV_EXCEPTIONS_ENABLED 1
87#else
88 #define CSV_EXCEPTIONS_ENABLED 0
89#endif
90
91#if !CSV_EXCEPTIONS_ENABLED
92 #error "csv-parser requires C++ exceptions. Enable exception handling (for example, remove -fno-exceptions or use /EHsc)."
93#endif
94
95// Detect C++ standard version BEFORE namespace to properly include string_view
96// MSVC: __cplusplus == 199711L unless /Zc:__cplusplus is set; use _MSVC_LANG instead.
97#if defined(_MSVC_LANG) && _MSVC_LANG > __cplusplus
98# define CSV_CPLUSPLUS _MSVC_LANG
99#else
100# define CSV_CPLUSPLUS __cplusplus
101#endif
102
103#if CSV_CPLUSPLUS >= 202302L
104#define CSV_HAS_CXX23
105#endif
106
107#if CSV_CPLUSPLUS >= 202002L
108#define CSV_HAS_CXX20
109#endif
110
111#if CSV_CPLUSPLUS >= 201703L
112#define CSV_HAS_CXX17
113#endif
114
115#if CSV_CPLUSPLUS >= 201402L
116#define CSV_HAS_CXX14
117#endif
118
119// Annotate intentional switch fallthroughs in parser hot loops without
120// reshaping the control flow just to appease compiler diagnostics.
121#if defined(CSV_HAS_CXX17)
122#define CSV_FALLTHROUGH [[fallthrough]]
123#elif defined(__clang__) && defined(__has_cpp_attribute)
124#if __has_cpp_attribute(clang::fallthrough)
125#define CSV_FALLTHROUGH [[clang::fallthrough]]
126#else
127#define CSV_FALLTHROUGH ((void)0)
128#endif
129#elif defined(__GNUC__) && __GNUC__ >= 7
130#define CSV_FALLTHROUGH __attribute__((fallthrough))
131#else
132#define CSV_FALLTHROUGH ((void)0)
133#endif
134
135// Include string_view BEFORE csv namespace to avoid namespace pollution issues
136#ifdef CSV_HAS_CXX17
137#include <string_view>
138#else
139#include "../external/string_view.hpp"
140#endif
141
142#ifdef CSV_HAS_CXX20
143#include <ranges>
144#endif
145
146namespace csv {
147#ifdef _MSC_VER
148#pragma region Compatibility Macros
149#endif
163// Allows static assertions without specifying a message
164#define STATIC_ASSERT(x) static_assert(x, "Assertion failed")
165
166#ifdef NDEBUG
167 #define CSV_DEBUG_ASSERT(x) ((void)sizeof(x), (void)0)
168#else
169 #define CSV_DEBUG_ASSERT(x) assert(x)
170#endif
171
172#ifdef CSV_HAS_CXX17
176 using string_view = std::string_view;
177#else
181 using string_view = nonstd::string_view;
182#endif
183
184#ifdef CSV_HAS_CXX17
185 #define IF_CONSTEXPR if constexpr
186 #define CONSTEXPR_VALUE constexpr
187
188 #define CONSTEXPR_17 constexpr
189#else
190 #define IF_CONSTEXPR if
191 #define CONSTEXPR_VALUE const
192
193 #define CONSTEXPR_17 inline
194#endif
195
196#ifdef CSV_HAS_CXX14
197 template<bool B, class T = void>
198 using enable_if_t = std::enable_if_t<B, T>;
199
200 #define CONSTEXPR_14 constexpr
201 #define CONSTEXPR_VALUE_14 constexpr
202#else
203 template<bool B, class T = void>
204 using enable_if_t = typename std::enable_if<B, T>::type;
205
206 #define CONSTEXPR_14 inline
207 #define CONSTEXPR_VALUE_14 const
208#endif
209
210 namespace internals {
211 template<bool B, class T = void>
212 using enable_if_t = csv::enable_if_t<B, T>;
213 }
214
215#ifdef CSV_HAS_CXX17
216 template<typename F, typename... Args>
217 using invoke_result_t = typename std::invoke_result<F, Args...>::type;
218#else
219 template<typename F, typename... Args>
220 using invoke_result_t = typename std::result_of<F(Args...)>::type;
221#endif
222
223 template<typename... Ts>
224 using void_t = void;
225
226 template<typename F, typename ReturnType, typename Enable, typename... Args>
227 struct is_invocable_returning_impl : std::false_type {};
228
229 template<typename F, typename ReturnType, typename... Args>
231 F,
232 ReturnType,
233 void_t<invoke_result_t<F, Args...>>,
234 Args...
235 > : std::integral_constant<
236 bool,
237 std::is_convertible<invoke_result_t<F, Args...>, ReturnType>::value
238 > {};
239
240 template<typename F, typename ReturnType, typename... Args>
241 struct is_invocable_returning : is_invocable_returning_impl<F, ReturnType, void, Args...> {};
242
243 // Resolves g++ bug with regard to constexpr methods.
244 // Keep this gated to C++17+, since C++11/14 pedantic mode rejects constexpr
245 // non-static members when the enclosing class is non-literal.
246 // See: https://stackoverflow.com/questions/36489369/constexpr-non-static-member-function-with-non-constexpr-constructor-gcc-clang-d
247#if defined(__GNUC__) && !defined(__clang__)
248 #if defined(CSV_HAS_CXX17) && (((__GNUC__ == 7) && (__GNUC_MINOR__ >= 2)) || (__GNUC__ >= 8))
249 #define CONSTEXPR constexpr
250 #endif
251#else
252 #ifdef CSV_HAS_CXX17
253 #define CONSTEXPR constexpr
254 #endif
255#endif
256
257#ifndef CONSTEXPR
258#define CONSTEXPR inline
259#endif
260
261#ifdef _MSC_VER
262#pragma endregion
263#endif
264
265 namespace internals {
266 template<typename T>
268 private:
269 template<typename U>
270 static auto test(int) -> decltype(
271 std::hash<U>{}(std::declval<const U&>()),
272 std::true_type{}
273 );
274
275 template<typename>
276 static std::false_type test(...);
277
278 public:
279 static constexpr bool value = decltype(test<T>(0))::value;
280 };
281
282 template<typename T>
284 private:
285 template<typename U>
286 static auto test(int) -> decltype(
287 std::declval<const U&>() == std::declval<const U&>(),
288 std::true_type{}
289 );
290
291 template<typename>
292 static std::false_type test(...);
293
294 public:
295 static constexpr bool value = decltype(test<T>(0))::value;
296 };
297
298 template<typename T>
300 public:
301 lazy_shared_ptr() = default;
302 lazy_shared_ptr(const lazy_shared_ptr&) = delete;
303 lazy_shared_ptr& operator=(const lazy_shared_ptr&) = delete;
304
305 lazy_shared_ptr(lazy_shared_ptr&& other) noexcept : value_(std::move(other.value_)) {}
306
307 lazy_shared_ptr& operator=(lazy_shared_ptr&& other) noexcept {
308 if (this != &other) {
309 value_ = std::move(other.value_);
310 }
311
312 return *this;
313 }
314
315 template<typename Factory>
316 T& get_or_create(Factory&& factory) const {
317#if CSV_ENABLE_THREADS
318 std::call_once(init_once_, [this, &factory]() {
319 value_ = factory();
320 });
321#else
322 if (!value_) {
323 value_ = factory();
324 }
325#endif
326 return *value_;
327 }
328
329 T* get() const noexcept {
330 return value_.get();
331 }
332
333 private:
334 mutable std::shared_ptr<T> value_ = nullptr;
335#if CSV_ENABLE_THREADS
336 mutable std::once_flag init_once_;
337#endif
338 };
339
340 #ifdef CSV_HAS_CXX20
341 #ifdef _MSC_VER
342 #pragma region CXX20 Concepts
343 #endif
344
345 template<typename T>
347 std::ranges::input_range<std::remove_reference_t<T>>
348 && std::convertible_to<
349 std::ranges::range_reference_t<std::remove_reference_t<T>>,
351 >;
352
353 template<typename T>
354 concept has_to_sv_range = requires(const std::remove_reference_t<T>& value) {
355 { value.to_sv_range() } -> std::ranges::input_range;
356 requires std::convertible_to<
357 std::ranges::range_reference_t<decltype(value.to_sv_range())>,
359 >;
360 };
361
362 template<typename T>
364
365 template<typename T>
367 std::ranges::input_range<std::remove_reference_t<T>>
368 && csv_row_like<
369 std::ranges::range_reference_t<std::remove_reference_t<T>>
370 >;
371
372 #ifdef _MSC_VER
373 #pragma endregion
374 #endif
375 #endif
376
377 // PAGE_SIZE macro could be already defined by the host system.
378#if defined(PAGE_SIZE)
379#undef PAGE_SIZE
380#endif
381
382// Get operating system specific details
383#if defined(_WIN32)
384 inline int getpagesize() {
385 _SYSTEM_INFO sys_info = {};
386 GetSystemInfo(&sys_info);
387 return std::max(sys_info.dwPageSize, sys_info.dwAllocationGranularity);
388 }
389
390 const int PAGE_SIZE = getpagesize();
391#elif defined(__linux__)
392 const int PAGE_SIZE = getpagesize();
393#else
397 const int PAGE_SIZE = 4096;
398#endif
399
410 constexpr size_t CSV_CHUNK_SIZE_DEFAULT = 10000000; // 10MB
411
413 typedef std::uint32_t CSVChunkIndex;
414
415 CONSTEXPR_VALUE_14 CSVChunkIndex CSV_CHUNK_INDEX_MAX = (std::numeric_limits<CSVChunkIndex>::max)();
416
417 CONSTEXPR_VALUE_14 size_t CSV_CHUNK_SIZE_MAX = CSV_CHUNK_INDEX_MAX;
418
424 constexpr size_t CSV_CHUNK_SIZE_FLOOR = 500 * 1024; // 500KB
425
427 constexpr size_t CSV_SPECULATIVE_PARALLEL_MIN_BYTES = 50ull * 1024ull * 1024ull; // 50MB
428
429 template<typename T>
430 inline bool is_equal(T a, T b, T epsilon = 0.001) {
432 static_assert(std::is_floating_point<T>::value, "T must be a floating point type.");
433 return std::abs(a - b) < epsilon;
434 }
435
442 enum class ParseFlags {
444 QUOTE = 2 | 1,
445 NOT_SPECIAL = 4,
446 DELIMITER = 4 | 1,
447 CARRIAGE_RETURN = 4 | 2,
448 NEWLINE = 4 | 2 | 1
449 };
450
453 constexpr ParseFlags quote_escape_flag(ParseFlags flag, bool quote_escape) noexcept {
454 return (ParseFlags)((int)flag & ~((int)ParseFlags::QUOTE * quote_escape));
455 }
456
457 // Assumed to be true by parsing functions: allows for testing
458 // if an item is DELIMITER or NEWLINE with a >= statement
459 STATIC_ASSERT(ParseFlags::DELIMITER < ParseFlags::CARRIAGE_RETURN);
460 STATIC_ASSERT(ParseFlags::DELIMITER < ParseFlags::NEWLINE);
461 STATIC_ASSERT(ParseFlags::CARRIAGE_RETURN < ParseFlags::NEWLINE);
462
468 STATIC_ASSERT(quote_escape_flag(ParseFlags::NOT_SPECIAL, false) == ParseFlags::NOT_SPECIAL);
469 STATIC_ASSERT(quote_escape_flag(ParseFlags::QUOTE, false) == ParseFlags::QUOTE);
470 STATIC_ASSERT(quote_escape_flag(ParseFlags::DELIMITER, false) == ParseFlags::DELIMITER);
471 STATIC_ASSERT(quote_escape_flag(ParseFlags::CARRIAGE_RETURN, false) == ParseFlags::CARRIAGE_RETURN);
472 STATIC_ASSERT(quote_escape_flag(ParseFlags::NEWLINE, false) == ParseFlags::NEWLINE);
473
474 STATIC_ASSERT(quote_escape_flag(ParseFlags::NOT_SPECIAL, true) == ParseFlags::NOT_SPECIAL);
475 STATIC_ASSERT(quote_escape_flag(ParseFlags::QUOTE, true) == ParseFlags::QUOTE_ESCAPE_QUOTE);
476 STATIC_ASSERT(quote_escape_flag(ParseFlags::DELIMITER, true) == ParseFlags::NOT_SPECIAL);
477 STATIC_ASSERT(quote_escape_flag(ParseFlags::CARRIAGE_RETURN, true) == ParseFlags::NOT_SPECIAL);
478 STATIC_ASSERT(quote_escape_flag(ParseFlags::NEWLINE, true) == ParseFlags::NOT_SPECIAL);
479
481 using ParseFlagMap = std::array<ParseFlags, 256>;
482
484 using WhitespaceMap = std::array<bool, 256>;
485 }
486
488 constexpr int CSV_NOT_FOUND = -1;
489
491 constexpr unsigned CHAR_OFFSET = std::numeric_limits<char>::is_signed ? 128 : 0;
492}
std::array< ParseFlags, 256 > ParseFlagMap
An array which maps ASCII chars to a parsing flag.
Definition common.hpp:481
std::array< bool, 256 > WhitespaceMap
An array which maps ASCII chars to a flag indicating if it is whitespace.
Definition common.hpp:484
bool is_equal(T a, T b, T epsilon=0.001)
Definition common.hpp:430
const int PAGE_SIZE
Size of a memory page in bytes.
Definition common.hpp:397
ParseFlags
An enum used for describing the significance of each character with respect to CSV parsing.
Definition common.hpp:442
@ QUOTE_ESCAPE_QUOTE
A quote inside or terminating a quote_escaped field.
@ NOT_SPECIAL
Characters with no special meaning or escaped delimiters and newlines.
@ NEWLINE
Characters which signify a new row.
@ CARRIAGE_RETURN
Characters which signify a carriage return.
@ QUOTE
Characters which may signify a quote escape.
@ DELIMITER
Characters which signify a new field.
constexpr size_t CSV_CHUNK_SIZE_FLOOR
Minimum supported custom chunk size for CSVFormat::chunk_size().
Definition common.hpp:424
constexpr ParseFlags quote_escape_flag(ParseFlags flag, bool quote_escape) noexcept
Transform the ParseFlags given the context of whether or not the current field is quote escaped.
Definition common.hpp:453
std::uint32_t CSVChunkIndex
Type used to represent the location of a CSV byte within a larger chunk.
Definition common.hpp:413
constexpr size_t CSV_SPECULATIVE_PARALLEL_MIN_BYTES
Default minimum source size before speculative parallel parsing is considered.
Definition common.hpp:427
constexpr size_t CSV_CHUNK_SIZE_DEFAULT
Default chunk size for lazy-loading large CSV files.
Definition common.hpp:410
The all encompassing namespace.
constexpr int CSV_NOT_FOUND
Integer indicating a requested column wasn't found.
Definition common.hpp:488
constexpr unsigned CHAR_OFFSET
Offset to convert char into array index.
Definition common.hpp:491
std::string_view string_view
The string_view class used by this library.
Definition common.hpp:176