6 std::unordered_map<std::string, DataType> csv_dtypes;
7 const auto col_names = reader.get_col_names();
8 std::vector<std::unordered_map<DataType, size_t>> type_counts(col_names.size());
9 constexpr size_t TYPE_CHUNK_SIZE = 5000;
12 [](DataFrame<>::column_type column, std::unordered_map<DataType, size_t>& counts) {
13 for (
size_t row_index = 0; row_index < column.size(); ++row_index) {
14 counts[internals::data_type(column.get_sv(row_index))]++;
20 for (
size_t i = 0; i < col_names.size(); i++) {
21 auto& col = type_counts[i];
22 auto& col_name = col_names[i];
void chunk_parallel_apply(CSVReader &reader, DataFrameExecutor &executor, std::vector< State > &states, Fn &&fn, size_t chunk_size=50000)
Apply a per-column batch function over a CSVReader using a reusable executor.