Vince's CSV Parser
Loading...
Searching...
No Matches
raw_csv_data.hpp
Go to the documentation of this file.
1
10#pragma once
11#include <cassert>
12#include <memory>
13#if !defined(CSV_ENABLE_THREADS) || CSV_ENABLE_THREADS
14#include <mutex>
15#endif
16#include <unordered_map>
17#include <string>
18#include <vector>
19
20#include "common.hpp"
21#include "col_names.hpp"
22
23namespace csv {
24 namespace internals {
26 struct RawCSVField {
27 RawCSVField() = default;
28 RawCSVField(size_t _start, size_t _length, bool _double_quote = false) {
29 start = _start;
32 }
33
35 size_t start;
36
38 size_t length;
39
42 };
43
66 public:
69 _single_buffer_capacity(single_buffer_capacity) {
71 _block_capacity = (max_fields + _single_buffer_capacity - 1) / _single_buffer_capacity;
72 _blocks = std::unique_ptr<RawCSVField*[]>(new RawCSVField*[_block_capacity]());
73
74 this->allocate();
75 }
76
77 // No copy constructor
78 CSVFieldList(const CSVFieldList& other) = delete;
79
80 // CSVFieldArrays may be moved
82 _single_buffer_capacity(other._single_buffer_capacity),
83 _block_capacity(other._block_capacity) {
84
85 this->_blocks = std::move(other._blocks);
86 this->_owned_blocks = std::move(other._owned_blocks);
87 _current_buffer_size = other._current_buffer_size;
88 _current_block = other._current_block;
89
90 // Recalculate _back pointer to point into OUR blocks, not the moved-from ones
91 if (this->_blocks) {
92 RawCSVField* block = this->_blocks[_current_block];
93 _back = block ? (block + _current_buffer_size) : nullptr;
94 } else {
95 _back = nullptr;
96 }
97
98 // Invalidate moved-from state to prevent use-after-move bugs
99 other._back = nullptr;
100 other._current_buffer_size = 0;
101 other._current_block = 0;
102 other._block_capacity = 0;
103 }
104
105 template <class... Args>
106 void emplace_back(Args&&... args) {
107 if (this->_current_buffer_size == this->_single_buffer_capacity) {
108 this->allocate();
109 }
110
111 assert(_back != nullptr);
112 *(_back++) = RawCSVField(std::forward<Args>(args)...);
113 _current_buffer_size++;
114 }
115
116 size_t size() const noexcept {
117 return this->_current_buffer_size + (_current_block * this->_single_buffer_capacity);
118 }
119
120 RawCSVField& operator[](size_t n) const;
121
122 private:
123 const size_t _single_buffer_capacity;
124
127 std::unique_ptr<RawCSVField*[]> _blocks = nullptr;
128
130 std::vector<std::unique_ptr<RawCSVField[]>> _owned_blocks = {};
131 // _owned_blocks may reallocate, but RawCSVField[] allocations stay put;
132 // _blocks holds raw pointers to those allocations, so readers remain valid.
133
135 size_t _current_buffer_size = 0;
136
138 size_t _current_block = 0;
139
141 size_t _block_capacity = 0;
142
144 RawCSVField* _back = nullptr;
145
147 void allocate();
148 };
149
155 struct RawCSVData {
156 std::shared_ptr<void> _data = nullptr;
157 csv::string_view data = "";
158
160
165 std::unordered_map<size_t, std::string> double_quote_fields = {};
166#if CSV_ENABLE_THREADS
167 mutable std::mutex double_quote_init_lock;
168#endif
169
170 internals::ColNamesPtr col_names = nullptr;
171 internals::ParseFlagMap parse_flags;
173 };
174
175 using RawCSVDataPtr = std::shared_ptr<RawCSVData>;
176 }
177}
A class used for efficiently storing RawCSVField objects and expanding as necessary.
CSVFieldList(size_t single_buffer_capacity=(size_t)(internals::PAGE_SIZE/sizeof(RawCSVField)))
Construct a CSVFieldList which allocates blocks of a certain size.
A standalone header file containing shared code.
std::array< ParseFlags, 256 > ParseFlagMap
An array which maps ASCII chars to a parsing flag.
Definition common.hpp:274
std::array< bool, 256 > WhitespaceMap
An array which maps ASCII chars to a flag indicating if it is whitespace.
Definition common.hpp:277
constexpr size_t ITERATION_CHUNK_SIZE
Chunk size for lazy-loading large CSV files.
Definition common.hpp:225
const int PAGE_SIZE
Size of a memory page in bytes.
Definition common.hpp:212
CSV_CONST CONSTEXPR_17 OutArray arrayToDefault(T &&value)
Helper constexpr function to initialize an array with all the elements set to value.
The all encompassing namespace.
nonstd::string_view string_view
The string_view class used by this library.
Definition common.hpp:134
A class for storing raw CSV data and associated metadata.
std::unordered_map< size_t, std::string > double_quote_fields
Cached unescaped field values for fields with escaped quotes.
A barebones class used for describing CSV fields.
size_t start
The start of the field, relative to the beginning of the row.
bool has_double_quote
Whether or not the field contains an escaped quote.
size_t length
The length of the row, ignoring quote escape characters.