6#include "data_frame.hpp"
8#include "string_view_stream.hpp"
14#include <unordered_map>
26 internals::SpeculativeParseDiagnostics speculative_diagnostics;
43 std::unique_ptr<std::istream> ss(
new std::stringstream(std::string(in)));
58 return CSVReader(std::move(stream), format);
65 return parse(in, format);
76 inline CSVReader operator ""_csv(
const char* in,
size_t n) {
85 inline CSVReader operator ""_csv_no_header(
const char* in,
size_t n) {
99 std::unordered_map<std::string, DataType>
csv_data_types(CSVReader& reader);
118 typename... ReaderArgs,
119 csv::enable_if_t<std::is_constructible<CSVReader, ReaderArgs...>::value,
int> = 0
121 inline std::unordered_map<std::string, DataType>
csv_data_types(ReaderArgs&&... reader_args) {
122 CSVReader reader(std::forward<ReaderArgs>(reader_args)...);
138 template<
typename State,
typename Fn>
142 std::vector<State>& states,
144 size_t chunk_size = 50000
146 if (chunk_size == 0) {
147 throw std::invalid_argument(internals::ERROR_CHUNK_PARALLEL_APPLY_ZERO);
150 std::vector<CSVRow> rows;
151 while (reader.read_chunk(rows, chunk_size)) {
152 DataFrame<> batch(std::move(rows));
153 batch.column_parallel_apply(executor, states, std::forward<Fn>(fn));
162 template<
typename State,
typename Fn>
165 std::vector<State>& states,
167 size_t chunk_size = 50000
179 CSVReader reader(filename, reader_format);
182 std::vector<CSVRow> rows;
183 while (reader.read_chunk(rows, 50000)) {}
187 reader.get_col_names(),
190 reader.get_col_names().size(),
191 reader.parse_worker_count(),
192 reader.speculative_diagnostics()
200 auto head = internals::parser::get_csv_head(filename);
209 std::distance(col_names.begin(), std::find(col_names.begin(), col_names.end(), col_name));
Main class for parsing CSVs from files and in-memory sources.
const std::vector< std::string > & get_col_names() const
Return the active column names in CSV order.
Persistent execution backend for batch-oriented DataFrame column work.
Lightweight istream over csv::string_view with zero copy.
A standalone header file containing shared code.
Shared exception message templates and throw helpers.
Defines functionality needed for basic CSV parsing.
CSV scalar type classification adapter.
The all encompassing namespace.
long long get_col_pos(csv::string_view filename, csv::string_view col_name, const CSVFormat &format=CSVFormat::guess_csv())
Find the position of a column in a CSV file or CSV_NOT_FOUND otherwise.
CSVReader parse_unsafe(csv::string_view in, CSVFormat format=CSVFormat::guess_csv())
Parse CSV from an in-memory view with zero copy.
CSVFileInfo get_file_info(const std::string &filename)
Get basic information about a CSV file.
void chunk_parallel_apply(CSVReader &reader, DataFrameExecutor &executor, std::vector< State > &states, Fn &&fn, size_t chunk_size=50000)
Apply a per-column batch function over a CSVReader using a reusable executor.
CSVReader parse(csv::string_view in, const CSVFormat &format=CSVFormat::guess_csv())
Parse CSV from a string view, copying the input into an owned buffer.
std::vector< std::string > get_col_names(csv::string_view filename, const CSVFormat &format=CSVFormat::guess_csv())
Get the column names of a CSV file using just the first 500KB.
CSVReader parse_no_header(csv::string_view in)
Parses a CSV string with no headers.
constexpr int CSV_NOT_FOUND
Integer indicating a requested column wasn't found.
std::unordered_map< std::string, DataType > csv_data_types(CSVReader &reader)
Infer SQL-friendly column data types from an existing CSVReader.
std::string_view string_view
The string_view class used by this library.
Returned by get_file_info()
size_t n_cols
Number of columns in a CSV.
std::vector< std::string > col_names
CSV column names.
size_t parse_worker_count
Number of parser worker threads used.
char delim
Delimiting character.
std::string filename
Filename.
size_t n_rows
Number of rows in a file.