Vince's CSV Parser
Loading...
Searching...
No Matches
raw_csv_data.hpp
Go to the documentation of this file.
1
10#pragma once
11#include <cassert>
12#include <memory>
13#include <mutex>
14#include <unordered_map>
15#include <string>
16#include <vector>
17
18#include "common.hpp"
19#include "col_names.hpp"
20
21namespace csv {
22 namespace internals {
24 struct RawCSVField {
25 RawCSVField() = default;
26 RawCSVField(size_t _start, size_t _length, bool _double_quote = false) {
27 start = _start;
30 }
31
33 size_t start;
34
36 size_t length;
37
40 };
41
64 public:
67 _single_buffer_capacity(single_buffer_capacity) {
69 _block_capacity = (max_fields + _single_buffer_capacity - 1) / _single_buffer_capacity;
70 _blocks = std::unique_ptr<RawCSVField*[]>(new RawCSVField*[_block_capacity]());
71
72 this->allocate();
73 }
74
75 // No copy constructor
76 CSVFieldList(const CSVFieldList& other) = delete;
77
78 // CSVFieldArrays may be moved
80 _single_buffer_capacity(other._single_buffer_capacity),
81 _block_capacity(other._block_capacity) {
82
83 this->_blocks = std::move(other._blocks);
84 this->_owned_blocks = std::move(other._owned_blocks);
85 _current_buffer_size = other._current_buffer_size;
86 _current_block = other._current_block;
87
88 // Recalculate _back pointer to point into OUR blocks, not the moved-from ones
89 if (this->_blocks) {
90 RawCSVField* block = this->_blocks[_current_block];
91 _back = block ? (block + _current_buffer_size) : nullptr;
92 } else {
93 _back = nullptr;
94 }
95
96 // Invalidate moved-from state to prevent use-after-move bugs
97 other._back = nullptr;
98 other._current_buffer_size = 0;
99 other._current_block = 0;
100 other._block_capacity = 0;
101 }
102
103 template <class... Args>
104 void emplace_back(Args&&... args) {
105 if (this->_current_buffer_size == this->_single_buffer_capacity) {
106 this->allocate();
107 }
108
109 assert(_back != nullptr);
110 *(_back++) = RawCSVField(std::forward<Args>(args)...);
111 _current_buffer_size++;
112 }
113
114 size_t size() const noexcept {
115 return this->_current_buffer_size + (_current_block * this->_single_buffer_capacity);
116 }
117
118 RawCSVField& operator[](size_t n) const;
119
120 private:
121 const size_t _single_buffer_capacity;
122
125 std::unique_ptr<RawCSVField*[]> _blocks = nullptr;
126
128 std::vector<std::unique_ptr<RawCSVField[]>> _owned_blocks = {};
129 // _owned_blocks may reallocate, but RawCSVField[] allocations stay put;
130 // _blocks holds raw pointers to those allocations, so readers remain valid.
131
133 size_t _current_buffer_size = 0;
134
136 size_t _current_block = 0;
137
139 size_t _block_capacity = 0;
140
142 RawCSVField* _back = nullptr;
143
145 void allocate();
146 };
147
153 struct RawCSVData {
154 std::shared_ptr<void> _data = nullptr;
155 csv::string_view data = "";
156
158
163 std::unordered_map<size_t, std::string> double_quote_fields = {};
164 mutable std::mutex double_quote_init_lock;
165
166 internals::ColNamesPtr col_names = nullptr;
167 internals::ParseFlagMap parse_flags;
169 };
170
171 using RawCSVDataPtr = std::shared_ptr<RawCSVData>;
172 }
173}
A class used for efficiently storing RawCSVField objects and expanding as necessary.
CSVFieldList(size_t single_buffer_capacity=(size_t)(internals::PAGE_SIZE/sizeof(RawCSVField)))
Construct a CSVFieldList which allocates blocks of a certain size.
A standalone header file containing shared code.
std::array< ParseFlags, 256 > ParseFlagMap
An array which maps ASCII chars to a parsing flag.
Definition common.hpp:239
std::array< bool, 256 > WhitespaceMap
An array which maps ASCII chars to a flag indicating if it is whitespace.
Definition common.hpp:242
constexpr size_t ITERATION_CHUNK_SIZE
Chunk size for lazy-loading large CSV files.
Definition common.hpp:190
const int PAGE_SIZE
Size of a memory page in bytes.
Definition common.hpp:177
CSV_CONST CONSTEXPR_17 OutArray arrayToDefault(T &&value)
Helper constexpr function to initialize an array with all the elements set to value.
The all encompassing namespace.
nonstd::string_view string_view
The string_view class used by this library.
Definition common.hpp:99
A class for storing raw CSV data and associated metadata.
std::mutex double_quote_init_lock
Protects lazy initialization only.
std::unordered_map< size_t, std::string > double_quote_fields
Cached unescaped field values for fields with escaped quotes.
A barebones class used for describing CSV fields.
size_t start
The start of the field, relative to the beginning of the row.
bool has_double_quote
Whether or not the field contains an escaped quote.
size_t length
The length of the row, ignoring quote escape characters.