2#include "string_view_stream.hpp"
4#include <unordered_map>
10 std::unordered_map<size_t, size_t> row_tally = { { 0, 0 } };
13 std::unordered_map<size_t, size_t> row_when = { { 0, 0 } };
18 internals::StringViewStream source(head);
21 const auto parse_flags = format.is_quoting_enabled()
22 ? internals::make_parse_flags(format.get_delim(), format.get_quote_char())
24 const auto ws_flags = internals::make_ws_flags(format.get_trim_chars());
25 StreamParser<internals::StringViewStream> parser(source, parse_flags, ws_flags);
26 parser.set_output(rows);
29 for (
size_t i = 0; i < rows.size(); i++) {
34 if (row_tally.find(row.size()) != row_tally.end()) {
35 row_tally[row.size()]++;
38 row_tally[row.size()] = 1;
39 row_when[row.size()] = i;
44 double final_score = 0;
45 size_t header_row = 0;
46 size_t mode_row_length = 0;
49 for (
auto& pair : row_tally) {
50 const size_t row_size = pair.first;
51 const size_t row_count = pair.second;
52 const double score = (double)(row_size * row_count);
53 if (score > final_score) {
55 mode_row_length = row_size;
56 header_row = row_when[row_size];
61 size_t first_row_length = rows.size() > 0 ? rows[0].size() : 0;
62 if (first_row_length >= mode_row_length && first_row_length > 0) {
66 return { header_row, mode_row_length, final_score };
80 char current_delim = delims[0];
82 for (
char cand_delim : delims) {
83 auto result = calculate_score(head, format.
delimiter(cand_delim));
85 if ((
size_t)result.score > max_score) {
86 max_score = (size_t)result.score;
87 current_delim = cand_delim;
88 header = result.header;
89 n_cols = result.mode_row_length;
93 return { current_delim, (int)header, n_cols };
Contains the main CSV parsing algorithm and various utility functions.
CSV_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter)
Create a vector v where each index i corresponds to the ASCII number for a character and,...
CSVGuessResult guess_format(csv::string_view head, const std::vector< char > &delims={ ',', '|', '\t', ';', '^', '~' })
Guess the delimiter used by a delimiter-separated values file.
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
The all encompassing namespace.
internals::ThreadSafeDeque< CSVRow > RowCollection
Standard type for storing collection of rows.
nonstd::string_view string_view
The string_view class used by this library.
Stores the inferred format of a CSV file.