Vince's CSV Parser
Loading...
Searching...
No Matches
common.hpp
Go to the documentation of this file.
1
5#pragma once
6#include <algorithm>
7#include <array>
8#include <cmath>
9#include <cstdlib>
10#include <deque>
11
12#if defined(_WIN32)
13# ifndef WIN32_LEAN_AND_MEAN
14# define WIN32_LEAN_AND_MEAN
15# endif
16# include <windows.h>
17# undef max
18# undef min
19#elif defined(__linux__)
20# include <unistd.h>
21#endif
22
26#define CSV_INLINE
27#include <type_traits>
28
29#if defined(__EMSCRIPTEN__)
30#undef CSV_ENABLE_THREADS
31#define CSV_ENABLE_THREADS 0
32#elif !defined(CSV_ENABLE_THREADS)
33#define CSV_ENABLE_THREADS 1
34#endif
35
36// Minimal portability macros (Hedley subset) with CSV_ prefix.
37#if defined(__clang__) || defined(__GNUC__)
38 #define CSV_CONST __attribute__((__const__))
39 #define CSV_PURE __attribute__((__pure__))
40 #if defined(_WIN32)
41 #define CSV_PRIVATE
42 #else
43 #define CSV_PRIVATE __attribute__((__visibility__("hidden")))
44 #endif
45 #define CSV_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__)))
46#elif defined(_MSC_VER)
47 #define CSV_CONST
48 #define CSV_PURE
49 #define CSV_PRIVATE
50 #define CSV_NON_NULL(...)
51#else
52 #define CSV_CONST
53 #define CSV_PURE
54 #define CSV_PRIVATE
55 #define CSV_NON_NULL(...)
56#endif
57
58// MSVC-specific warning suppression helpers. Use __pragma so the macros
59// work inside other macro bodies (where #pragma is not allowed).
60#ifdef _MSC_VER
61# define CSV_MSVC_PUSH_DISABLE(w) __pragma(warning(push)) __pragma(warning(disable: w))
62# define CSV_MSVC_POP __pragma(warning(pop))
63#else
64# define CSV_MSVC_PUSH_DISABLE(w)
65# define CSV_MSVC_POP
66#endif
67
68// This library uses C++ exceptions for error reporting in public APIs.
69#if defined(__cpp_exceptions) || defined(_CPPUNWIND) || defined(__EXCEPTIONS)
70 #define CSV_EXCEPTIONS_ENABLED 1
71#else
72 #define CSV_EXCEPTIONS_ENABLED 0
73#endif
74
75#if !CSV_EXCEPTIONS_ENABLED
76 #error "csv-parser requires C++ exceptions. Enable exception handling (for example, remove -fno-exceptions or use /EHsc)."
77#endif
78
79// Detect C++ standard version BEFORE namespace to properly include string_view
80// MSVC: __cplusplus == 199711L unless /Zc:__cplusplus is set; use _MSVC_LANG instead.
81#if defined(_MSVC_LANG) && _MSVC_LANG > __cplusplus
82# define CSV_CPLUSPLUS _MSVC_LANG
83#else
84# define CSV_CPLUSPLUS __cplusplus
85#endif
86
87#if CSV_CPLUSPLUS >= 202002L
88#define CSV_HAS_CXX20
89#endif
90
91#if CSV_CPLUSPLUS >= 201703L
92#define CSV_HAS_CXX17
93#endif
94
95#if CSV_CPLUSPLUS >= 201402L
96#define CSV_HAS_CXX14
97#endif
98
99// Include string_view BEFORE csv namespace to avoid namespace pollution issues
100#ifdef CSV_HAS_CXX17
101#include <string_view>
102#else
103#include "../external/string_view.hpp"
104#endif
105
106namespace csv {
107#ifdef _MSC_VER
108#pragma region Compatibility Macros
109#endif
123// Allows static assertions without specifying a message
124#define STATIC_ASSERT(x) static_assert(x, "Assertion failed")
125
126#ifdef CSV_HAS_CXX17
130 using string_view = std::string_view;
131#else
135 using string_view = nonstd::string_view;
136#endif
137
138#ifdef CSV_HAS_CXX17
139 #define IF_CONSTEXPR if constexpr
140 #define CONSTEXPR_VALUE constexpr
141
142 #define CONSTEXPR_17 constexpr
143#else
144 #define IF_CONSTEXPR if
145 #define CONSTEXPR_VALUE const
146
147 #define CONSTEXPR_17 inline
148#endif
149
150#ifdef CSV_HAS_CXX14
151 template<bool B, class T = void>
152 using enable_if_t = std::enable_if_t<B, T>;
153
154 #define CONSTEXPR_14 constexpr
155 #define CONSTEXPR_VALUE_14 constexpr
156#else
157 template<bool B, class T = void>
158 using enable_if_t = typename std::enable_if<B, T>::type;
159
160 #define CONSTEXPR_14 inline
161 #define CONSTEXPR_VALUE_14 const
162#endif
163
164#ifdef CSV_HAS_CXX17
165 template<typename F, typename... Args>
166 using invoke_result_t = typename std::invoke_result<F, Args...>::type;
167#else
168 template<typename F, typename... Args>
169 using invoke_result_t = typename std::result_of<F(Args...)>::type;
170#endif
171
172 // Resolves g++ bug with regard to constexpr methods.
173 // Keep this gated to C++17+, since C++11/14 pedantic mode rejects constexpr
174 // non-static members when the enclosing class is non-literal.
175 // See: https://stackoverflow.com/questions/36489369/constexpr-non-static-member-function-with-non-constexpr-constructor-gcc-clang-d
176#if defined(__GNUC__) && !defined(__clang__)
177 #if defined(CSV_HAS_CXX17) && (((__GNUC__ == 7) && (__GNUC_MINOR__ >= 2)) || (__GNUC__ >= 8))
178 #define CONSTEXPR constexpr
179 #endif
180#else
181 #ifdef CSV_HAS_CXX17
182 #define CONSTEXPR constexpr
183 #endif
184#endif
185
186#ifndef CONSTEXPR
187#define CONSTEXPR inline
188#endif
189
190#ifdef _MSC_VER
191#pragma endregion
192#endif
193
194 namespace internals {
195 // PAGE_SIZE macro could be already defined by the host system.
196#if defined(PAGE_SIZE)
197#undef PAGE_SIZE
198#endif
199
200// Get operating system specific details
201#if defined(_WIN32)
202 inline int getpagesize() {
203 _SYSTEM_INFO sys_info = {};
204 GetSystemInfo(&sys_info);
205 return std::max(sys_info.dwPageSize, sys_info.dwAllocationGranularity);
206 }
207
208 const int PAGE_SIZE = getpagesize();
209#elif defined(__linux__)
210 const int PAGE_SIZE = getpagesize();
211#else
215 const int PAGE_SIZE = 4096;
216#endif
217
228 constexpr size_t CSV_CHUNK_SIZE_DEFAULT = 10000000; // 10MB
229
235 constexpr size_t CSV_CHUNK_SIZE_FLOOR = 500 * 1024; // 500KB
236
237 template<typename T>
238 inline bool is_equal(T a, T b, T epsilon = 0.001) {
240 static_assert(std::is_floating_point<T>::value, "T must be a floating point type.");
241 return std::abs(a - b) < epsilon;
242 }
243
250 enum class ParseFlags {
252 QUOTE = 2 | 1,
253 NOT_SPECIAL = 4,
254 DELIMITER = 4 | 1,
255 CARRIAGE_RETURN = 4 | 2,
256 NEWLINE = 4 | 2 | 1
257 };
258
261 constexpr ParseFlags quote_escape_flag(ParseFlags flag, bool quote_escape) noexcept {
262 return (ParseFlags)((int)flag & ~((int)ParseFlags::QUOTE * quote_escape));
263 }
264
265 // Assumed to be true by parsing functions: allows for testing
266 // if an item is DELIMITER or NEWLINE with a >= statement
267 STATIC_ASSERT(ParseFlags::DELIMITER < ParseFlags::CARRIAGE_RETURN);
268 STATIC_ASSERT(ParseFlags::DELIMITER < ParseFlags::NEWLINE);
269 STATIC_ASSERT(ParseFlags::CARRIAGE_RETURN < ParseFlags::NEWLINE);
270
276 STATIC_ASSERT(quote_escape_flag(ParseFlags::NOT_SPECIAL, false) == ParseFlags::NOT_SPECIAL);
277 STATIC_ASSERT(quote_escape_flag(ParseFlags::QUOTE, false) == ParseFlags::QUOTE);
278 STATIC_ASSERT(quote_escape_flag(ParseFlags::DELIMITER, false) == ParseFlags::DELIMITER);
279 STATIC_ASSERT(quote_escape_flag(ParseFlags::CARRIAGE_RETURN, false) == ParseFlags::CARRIAGE_RETURN);
280 STATIC_ASSERT(quote_escape_flag(ParseFlags::NEWLINE, false) == ParseFlags::NEWLINE);
281
282 STATIC_ASSERT(quote_escape_flag(ParseFlags::NOT_SPECIAL, true) == ParseFlags::NOT_SPECIAL);
283 STATIC_ASSERT(quote_escape_flag(ParseFlags::QUOTE, true) == ParseFlags::QUOTE_ESCAPE_QUOTE);
284 STATIC_ASSERT(quote_escape_flag(ParseFlags::DELIMITER, true) == ParseFlags::NOT_SPECIAL);
285 STATIC_ASSERT(quote_escape_flag(ParseFlags::CARRIAGE_RETURN, true) == ParseFlags::NOT_SPECIAL);
286 STATIC_ASSERT(quote_escape_flag(ParseFlags::NEWLINE, true) == ParseFlags::NOT_SPECIAL);
287
289 using ParseFlagMap = std::array<ParseFlags, 256>;
290
292 using WhitespaceMap = std::array<bool, 256>;
293 }
294
296 constexpr int CSV_NOT_FOUND = -1;
297
299 constexpr unsigned CHAR_OFFSET = std::numeric_limits<char>::is_signed ? 128 : 0;
300}
std::array< ParseFlags, 256 > ParseFlagMap
An array which maps ASCII chars to a parsing flag.
Definition common.hpp:289
std::array< bool, 256 > WhitespaceMap
An array which maps ASCII chars to a flag indicating if it is whitespace.
Definition common.hpp:292
bool is_equal(T a, T b, T epsilon=0.001)
Definition common.hpp:238
const int PAGE_SIZE
Size of a memory page in bytes.
Definition common.hpp:215
ParseFlags
An enum used for describing the significance of each character with respect to CSV parsing.
Definition common.hpp:250
@ QUOTE_ESCAPE_QUOTE
A quote inside or terminating a quote_escaped field.
@ NOT_SPECIAL
Characters with no special meaning or escaped delimiters and newlines.
@ NEWLINE
Characters which signify a new row.
@ CARRIAGE_RETURN
Characters which signify a carriage return.
@ QUOTE
Characters which may signify a quote escape.
@ DELIMITER
Characters which signify a new field.
constexpr size_t CSV_CHUNK_SIZE_FLOOR
Minimum supported custom chunk size for CSVFormat::chunk_size().
Definition common.hpp:235
constexpr ParseFlags quote_escape_flag(ParseFlags flag, bool quote_escape) noexcept
Transform the ParseFlags given the context of whether or not the current field is quote escaped.
Definition common.hpp:261
constexpr size_t CSV_CHUNK_SIZE_DEFAULT
Default chunk size for lazy-loading large CSV files.
Definition common.hpp:228
The all encompassing namespace.
constexpr int CSV_NOT_FOUND
Integer indicating a requested column wasn't found.
Definition common.hpp:296
constexpr unsigned CHAR_OFFSET
Offset to convert char into array index.
Definition common.hpp:299
nonstd::string_view string_view
The string_view class used by this library.
Definition common.hpp:135