Vince's CSV Parser
Loading...
Searching...
No Matches
csv_format.hpp
Go to the documentation of this file.
1
5#pragma once
6#include <iterator>
7#include <stdexcept>
8#include <string>
9#include <vector>
10
11#include "common.hpp"
12
13namespace csv {
14 namespace internals {
15 class IBasicCSVParser;
16 }
17
18 class CSVReader;
19
22 THROW = -1,
23 IGNORE_ROW = 0,
24 KEEP = 1,
25 KEEP_NON_EMPTY = 2
26 };
27
29 enum class ColumnNamePolicy {
30 EXACT = 0,
32 };
33
36 char delim;
37 int header_row;
38 size_t n_cols;
39 };
40
44 class CSVFormat {
45 public:
47 CSVFormat() = default;
48
57 CSVFormat& delimiter(char delim);
58
65 CSVFormat& delimiter(const std::vector<char> & delim);
66
71 CSVFormat& trim(const std::vector<char> & ws);
72
77 CSVFormat& quote(char quote);
78
83 CSVFormat& column_names(const std::vector<std::string>& names);
84
90 CSVFormat& header_row(int row);
91
98 this->header_row(-1);
99 return *this;
100 }
101
103 CSVFormat& quote(bool use_quote) {
104 this->no_quote = !use_quote;
105 return *this;
106 }
107
109 CONSTEXPR_14 CSVFormat& variable_columns(VariableColumnPolicy policy = VariableColumnPolicy::IGNORE_ROW) {
110 this->variable_column_policy = policy;
111 return *this;
112 }
113
115 CONSTEXPR_14 CSVFormat& variable_columns(bool policy) {
116 this->variable_column_policy = (VariableColumnPolicy)policy;
117 return *this;
118 }
119
127 this->_column_name_policy = policy;
128 return *this;
129 }
130
140 CSVFormat& chunk_size(size_t size);
141
142 #ifndef DOXYGEN_SHOULD_SKIP_THIS
143 char get_delim() const {
144 // This error should never be received by end users.
145 if (this->possible_delimiters.size() > 1) {
146 throw std::runtime_error("There is more than one possible delimiter.");
147 }
148
149 return this->possible_delimiters.at(0);
150 }
151
152 CONSTEXPR bool is_quoting_enabled() const { return !this->no_quote; }
153 CONSTEXPR char get_quote_char() const { return this->quote_char; }
154 CONSTEXPR int get_header() const { return this->header; }
155 std::vector<char> get_possible_delims() const { return this->possible_delimiters; }
156 std::vector<char> get_trim_chars() const { return this->trim_chars; }
157 const std::vector<std::string>& get_col_names() const { return this->col_names; }
158 CONSTEXPR VariableColumnPolicy get_variable_column_policy() const { return this->variable_column_policy; }
159 CONSTEXPR ColumnNamePolicy get_column_name_policy() const { return this->_column_name_policy; }
160 CONSTEXPR size_t get_chunk_size() const { return this->_chunk_size; }
161 #endif
162
165 CSVFormat format;
166 format.delimiter({ ',', '|', '\t', ';', '^' })
167 .quote('"');
168 // Assign header directly rather than via header_row() so that
169 // header_explicitly_set_ remains false — the guesser must be free
170 // to detect the real header row at construction time.
171 format.header = 0;
172
173 return format;
174 }
175
176 bool guess_delim() const {
177 return this->possible_delimiters.size() > 1;
178 }
179
180 friend CSVReader;
181 friend internals::IBasicCSVParser;
182
183 private:
185 void assert_no_char_overlap();
186
188 std::vector<char> possible_delimiters = { ',' };
189
191 std::vector<char> trim_chars = {};
192
194 int header = 0;
195
197 bool header_explicitly_set_ = false;
198
200 bool no_quote = false;
201
203 char quote_char = '"';
204
206 std::vector<std::string> col_names = {};
207
209 bool col_names_explicitly_set_ = false;
210
212 VariableColumnPolicy variable_column_policy = VariableColumnPolicy::IGNORE_ROW;
213
215 ColumnNamePolicy _column_name_policy = ColumnNamePolicy::EXACT;
216
218 size_t _chunk_size = internals::CSV_CHUNK_SIZE_DEFAULT;
219 };
220}
Stores information about how to parse a CSV file.
CSVFormat & column_names(const std::vector< std::string > &names)
Sets the column names.
CONSTEXPR_14 CSVFormat & variable_columns(VariableColumnPolicy policy=VariableColumnPolicy::IGNORE_ROW)
Tells the parser how to handle columns of a different length than the others.
CONSTEXPR_14 CSVFormat & column_names_policy(ColumnNamePolicy policy)
Sets the column name lookup policy.
CSVFormat()=default
Settings for parsing a RFC 4180 CSV file.
static CSVFormat guess_csv()
CSVFormat preset for delimiter inference with header/n_cols inference enabled.
CSVFormat & chunk_size(size_t size)
Sets the chunk size used when reading the CSV.
CSVFormat & trim(const std::vector< char > &ws)
Sets the whitespace characters to be trimmed.
CSVFormat & delimiter(char delim)
Sets the delimiter of the CSV file.
CSVFormat & quote(bool use_quote)
Turn quoting on or off.
CONSTEXPR_14 CSVFormat & variable_columns(bool policy)
Tells the parser how to handle columns of a different length than the others.
CSVFormat & no_header()
Tells the parser that this CSV has no header row.
CSVFormat & header_row(int row)
Sets the header row.
CSVFormat & quote(char quote)
Sets the quote character.
A standalone header file containing shared code.
#define CONSTEXPR
Expands to constexpr in decent compilers and inline otherwise.
Definition common.hpp:187
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
Definition common.hpp:26
The all encompassing namespace.
ColumnNamePolicy
Determines how column name lookups are performed.
@ CASE_INSENSITIVE
Case-insensitive match.
@ EXACT
Case-sensitive match (default)
VariableColumnPolicy
Determines how to handle rows that are shorter or longer than the majority.
std::vector< std::string > get_col_names(csv::string_view filename, const CSVFormat &format=CSVFormat::guess_csv())
Get the column names of a CSV file using just the first 500KB.
Stores the inferred format of a CSV file.