Vince's CSV Parser
Loading...
Searching...
No Matches
basic_csv_parser_simd.hpp
Go to the documentation of this file.
1#pragma once
17#include <array>
18#include <cstdint>
19
20#include "common.hpp"
21
22#if !defined(CSV_NO_SIMD) && (defined(__AVX2__) || (defined(_MSC_VER) && defined(_M_AVX) && _M_AVX >= 2))
23#define CSV_SIMD_AVX2 1
24#elif !defined(CSV_NO_SIMD) && (defined(__SSE2__) || (defined(_MSC_VER) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2))))
25#define CSV_SIMD_SSE2 1
26#elif !defined(CSV_NO_SIMD) && (defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64))
27#define CSV_SIMD_NEON 1
28#endif
29
30#if defined(CSV_SIMD_AVX2) || defined(CSV_SIMD_SSE2)
31#include <immintrin.h>
32// _tzcnt_u32 in GCC/Clang headers is __attribute__(__target__("bmi")), which
33// requires -mbmi at the call site. __builtin_ctz has no such restriction and
34// emits BSF/TZCNT as the optimizer sees fit. MSVC's _tzcnt_u32 has no
35// equivalent restriction, so keep it there.
36# ifdef _MSC_VER
37# define CSV_TZCNT32(x) _tzcnt_u32(x)
38# else
39# define CSV_TZCNT32(x) static_cast<unsigned>(__builtin_ctz(x))
40# endif
41#endif
42
43#if defined(CSV_SIMD_NEON)
44# if defined(_MSC_VER)
45# include <arm64_neon.h>
46# else
47# include <arm_neon.h>
48# endif
49#endif
50
51namespace csv {
52 namespace internals {
53 // Precomputed byte vectors for the four CSV sentinel bytes.
54 // Constructed once per parser instance and passed by const-ref into
55 // find_next_non_special, amortizing fill cost across every field scan.
56 //
57 // Keep the layout independent of the consuming target's ISA macros.
58 // CSVReader constructors are header-defined while parser methods live
59 // in csv.lib, so a consumer TU and the library TU must agree on this
60 // member layout even if only the library target was compiled with AVX2.
61 //
62 // Store byte arrays instead of __m256i/__m128i members so parser objects
63 // do not carry over-aligned SIMD members on MSVC. The scan function uses
64 // unaligned SIMD loads from these arrays.
65 //
66 // When no_quote mode is active, set quote_char = delimiter so that
67 // quote bytes are not mistakenly treated as sentinels (they are
68 // NOT_SPECIAL in that mode and must not cause SIMD to stop early).
69 struct SentinelVecs {
70 SentinelVecs() noexcept : SentinelVecs(',', '"') {}
71
72 SentinelVecs(char delimiter, char quote_char) noexcept {
73 v_delim.fill(delimiter);
74 v_quote.fill(quote_char);
75 v_lf.fill('\n');
76 v_cr.fill('\r');
77 }
78
79 std::array<char, 32> v_delim, v_quote, v_lf, v_cr;
80 };
81
82 static_assert(sizeof(SentinelVecs) == 128, "SentinelVecs layout must stay ISA-independent.");
83 static_assert(alignof(SentinelVecs) <= alignof(void*), "SentinelVecs must not require over-aligned allocation.");
84
85 // Free function — easy to unit test independently of CSVParserCore.
86 //
87 // SIMD-only fast-forward: skips pos forward past any bytes that are
88 // definitely not one of the four CSV sentinel characters. Stops as
89 // soon as a sentinel byte is found OR fewer bytes remain than one
90 // SIMD lane. The caller is responsible for the scalar tail loop.
91 //
92 // State-agnostic by design: stops conservatively at any sentinel byte
93 // regardless of quote_escape. Inside a quoted field, delimiter and
94 // newline bytes are NOT_SPECIAL under compound_parse_flag, so the
95 // outer DFA loop re-enters parse_field immediately at zero cost.
96 inline size_t find_next_non_special(
98 size_t pos,
99 const SentinelVecs& sentinels
100 ) noexcept
101 {
102#if defined(CSV_SIMD_AVX2)
103 const __m256i v_delim = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(sentinels.v_delim.data()));
104 const __m256i v_quote = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(sentinels.v_quote.data()));
105 const __m256i v_lf = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(sentinels.v_lf.data()));
106 const __m256i v_cr = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(sentinels.v_cr.data()));
107
108 while (pos + 32 <= data.size()) {
109 __m256i bytes = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(data.data() + pos));
110 __m256i special = _mm256_cmpeq_epi8(bytes, v_delim);
111 special = _mm256_or_si256(special, _mm256_cmpeq_epi8(bytes, v_quote));
112 special = _mm256_or_si256(special, _mm256_cmpeq_epi8(bytes, v_lf));
113 special = _mm256_or_si256(special, _mm256_cmpeq_epi8(bytes, v_cr));
114 int mask = _mm256_movemask_epi8(special);
115
116 if (mask != 0)
117 return pos + CSV_TZCNT32(static_cast<unsigned>(mask));
118 pos += 32;
119 }
120#elif defined(CSV_SIMD_SSE2)
121 const __m128i v_delim = _mm_loadu_si128(reinterpret_cast<const __m128i*>(sentinels.v_delim.data()));
122 const __m128i v_quote = _mm_loadu_si128(reinterpret_cast<const __m128i*>(sentinels.v_quote.data()));
123 const __m128i v_lf = _mm_loadu_si128(reinterpret_cast<const __m128i*>(sentinels.v_lf.data()));
124 const __m128i v_cr = _mm_loadu_si128(reinterpret_cast<const __m128i*>(sentinels.v_cr.data()));
125
126 while (pos + 16 <= data.size()) {
127 __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data.data() + pos));
128 __m128i special = _mm_cmpeq_epi8(bytes, v_delim);
129 special = _mm_or_si128(special, _mm_cmpeq_epi8(bytes, v_quote));
130 special = _mm_or_si128(special, _mm_cmpeq_epi8(bytes, v_lf));
131 special = _mm_or_si128(special, _mm_cmpeq_epi8(bytes, v_cr));
132 int mask = _mm_movemask_epi8(special);
133
134 if (mask != 0)
135 return pos + CSV_TZCNT32(static_cast<unsigned>(mask));
136 pos += 16;
137 }
138#elif defined(CSV_SIMD_NEON)
139 const uint8x16_t v_delim = vld1q_u8(reinterpret_cast<const uint8_t*>(sentinels.v_delim.data()));
140 const uint8x16_t v_quote = vld1q_u8(reinterpret_cast<const uint8_t*>(sentinels.v_quote.data()));
141 const uint8x16_t v_lf = vld1q_u8(reinterpret_cast<const uint8_t*>(sentinels.v_lf.data()));
142 const uint8x16_t v_cr = vld1q_u8(reinterpret_cast<const uint8_t*>(sentinels.v_cr.data()));
143
144 while (pos + 16 <= data.size()) {
145 const uint8x16_t bytes = vld1q_u8(reinterpret_cast<const uint8_t*>(data.data() + pos));
146 uint8x16_t special = vceqq_u8(bytes, v_delim);
147 special = vorrq_u8(special, vceqq_u8(bytes, v_quote));
148 special = vorrq_u8(special, vceqq_u8(bytes, v_lf));
149 special = vorrq_u8(special, vceqq_u8(bytes, v_cr));
150
151#if defined(__aarch64__) || defined(_M_ARM64)
152 if (vmaxvq_u8(special) == 0) {
153 pos += 16;
154 continue;
155 }
156#endif
157
158 uint8_t lanes[16];
159 vst1q_u8(lanes, special);
160 for (size_t i = 0; i < 16; ++i) {
161 if (lanes[i] != 0)
162 return pos + i;
163 }
164 pos += 16;
165 }
166#else
167 (void)data; (void)sentinels;
168#endif
169 return pos;
170 }
171 }
172}
A standalone header file containing shared code.
The all encompassing namespace.
std::string_view string_view
The string_view class used by this library.
Definition common.hpp:174