1#include "csv_utility.hpp"
2#include "data_frame.hpp"
6 std::unordered_map<std::string, DataType> csv_dtypes;
7 const auto col_names = reader.get_col_names();
8 std::vector<std::unordered_map<DataType, size_t>> type_counts(col_names.size());
9 constexpr size_t TYPE_CHUNK_SIZE = 5000;
13 for (
size_t row_index = 0; row_index < column.
size(); ++row_index) {
14 counts[internals::data_type(column.
get_sv(row_index))]++;
20 for (
size_t i = 0; i < col_names.size(); i++) {
21 auto& col = type_counts[i];
22 auto& col_name = col_names[i];
Main class for parsing CSVs from files and in-memory sources.
Lightweight non-owning view over one DataFrame column.
csv::string_view get_sv(size_t row_index) const
Access a visible cell value as a string_view without materializing a DataFrameCell.
size_t size() const noexcept
Number of rows in the parent batch.
#define CSV_INLINE
Helper macro which should be #defined as "inline" in the single header version.
The all encompassing namespace.
@ CSV_TIMESTAMP
Timestamp value.
@ CSV_INT64
64-bit integer
@ CSV_DOUBLE
Floating point value.
@ CSV_INT16
16-bit integer
@ CSV_INT32
32-bit integer
@ CSV_STRING
Non-scalar string.
void chunk_parallel_apply(CSVReader &reader, DataFrameExecutor &executor, std::vector< State > &states, Fn &&fn, size_t chunk_size=50000)
Apply a per-column batch function over a CSVReader using a reusable executor.
std::unordered_map< std::string, DataType > csv_data_types(CSVReader &reader)
Infer SQL-friendly column data types from an existing CSVReader.