Vince's CSV Parser
Loading...
Searching...
No Matches
csv::DataFrame< KeyType > Class Template Reference

Classes

struct  const_row_accessor
 
struct  mutable_row_accessor
 

Public Types

using row_type = DataFrameRow< KeyType >
 
using column_type = DataFrameColumn< KeyType >
 
using iterator = internals::indexed_proxy_iterator< DataFrame< KeyType >, DataFrameRow< KeyType >, mutable_row_accessor >
 Row-wise iterator over DataFrameRow entries.
 
using const_iterator = internals::indexed_proxy_iterator< const DataFrame< KeyType >, DataFrameRow< KeyType >, const_row_accessor >
 Row-wise const iterator over DataFrameRow entries.
 
using DuplicateKeyPolicy = DataFrameOptions::DuplicateKeyPolicy
 

Public Member Functions

 DataFrame ()=default
 Construct an empty DataFrame.
 
 DataFrame (CSVReader &reader)
 Construct an unkeyed DataFrame from a CSV reader.
 
 DataFrame (std::vector< CSVRow > rows)
 Construct an unkeyed DataFrame from an existing batch of rows.
 
 DataFrame (CSVReader &reader, const DataFrameOptions &options)
 Construct a keyed DataFrame from a CSV reader with options.
 
 DataFrame (csv::string_view filename, const DataFrameOptions &options, CSVFormat format=CSVFormat::guess_csv())
 Construct a keyed DataFrame directly from a CSV file.
 
 DataFrame (CSVReader &reader, const std::string &_key_column, DuplicateKeyPolicy policy=DuplicateKeyPolicy::OVERWRITE, bool throw_on_missing_key=true)
 Construct a keyed DataFrame using a column name as the key.
 
template<typename KeyFunc , csv::enable_if_t< csv::is_invocable_returning< KeyFunc, KeyType, const CSVRow & >::value, int > = 0>
 DataFrame (CSVReader &reader, KeyFunc key_func, DuplicateKeyPolicy policy=DuplicateKeyPolicy::OVERWRITE)
 Construct a keyed DataFrame using a custom key function.
 
template<typename KeyFunc , csv::enable_if_t< csv::is_invocable_returning< KeyFunc, KeyType, const CSVRow & >::value, int > = 0>
 DataFrame (CSVReader &reader, KeyFunc key_func, const DataFrameOptions &options)
 Construct a keyed DataFrame using a custom key function with options.
 
size_t size () const noexcept
 Get the number of rows in the DataFrame.
 
bool empty () const noexcept
 Check if the DataFrame is empty (has no rows).
 
size_t n_rows () const noexcept
 Get the number of rows in the DataFrame.
 
size_t n_cols () const noexcept
 Get the number of columns in the DataFrame.
 
bool has_column (const std::string &name) const
 Check if a column exists in the DataFrame.
 
int index_of (const std::string &name) const
 Get the index of a column by name.
 
const std::vector< std::string > & columns () const noexcept
 Get the column names in order.
 
DataFrame selected_rows (const std::vector< std::uint8_t > &include_rows) const
 Build an unkeyed DataFrame containing rows whose corresponding mask entry is true.
 
DataFrameColumn< KeyType > column_view (size_t col_index) const
 Access a column view by position.
 
DataFrameColumn< KeyType > column_view (const std::string &name) const
 Access a column view by name.
 
template<typename K = KeyType, csv::enable_if_t<!std::is_integral< K >::value, int > = 0>
DataFrameRow< KeyType > operator[] (size_t i)
 Access a row by position (unchecked).
 
template<typename K = KeyType, csv::enable_if_t<!std::is_integral< K >::value, int > = 0>
DataFrameRow< KeyType > operator[] (size_t i) const
 Access a row by position (unchecked, const version).
 
DataFrameRow< KeyType > at (size_t i)
 Access a row by position with bounds checking.
 
DataFrameRow< KeyType > at (size_t i) const
 Access a row by position with bounds checking (const version).
 
DataFrameRow< KeyType > operator[] (const KeyType &key)
 Access a row by its key.
 
DataFrameRow< KeyType > operator[] (const KeyType &key) const
 Access a row by its key (const version).
 
bool contains (const KeyType &key) const
 Check if a key exists in the DataFrame.
 
template<typename T = std::string>
std::vector< T > column (const std::string &name) const
 Extract all values from a column with type conversion.
 
template<typename State , typename Fn >
void column_parallel_apply (DataFrameExecutor &executor, std::vector< State > &states, Fn &&fn) const
 Apply a batch-oriented function to each column, potentially in parallel.
 
template<typename State , typename Fn >
void column_parallel_apply (DataFrameExecutor &executor, const std::vector< size_t > &column_indices, std::vector< State > &states, Fn &&fn) const
 Apply a batch-oriented function to a selected subset of columns, potentially in parallel.
 
template<typename Fn >
void column_parallel_apply (DataFrameExecutor &executor, Fn &&fn) const
 Apply a read-only batch function to each column, potentially in parallel.
 
template<typename Fn >
void column_parallel_apply (DataFrameExecutor &executor, const std::vector< size_t > &column_indices, Fn &&fn) const
 Apply a read-only batch function to a selected subset of columns, potentially in parallel.
 
template<typename GroupFunc , typename GroupKey = invoke_result_t<GroupFunc, DataFrameRow<KeyType>>, csv::enable_if_t< internals::is_hashable< GroupKey >::value &&internals::is_equality_comparable< GroupKey >::value, int > = 0>
std::unordered_map< GroupKey, std::vector< size_t > > group_by (GroupFunc group_func) const
 Group row positions using an arbitrary grouping function.
 
std::unordered_map< std::string, std::vector< size_t > > group_by (const std::string &name) const
 Group row positions by the value of a column.
 
iterator begin ()
 Get iterator to the first row.
 
iterator end ()
 Get iterator past the last row.
 
const_iterator begin () const
 Get const iterator to the first row.
 
const_iterator end () const
 Get const iterator past the last row.
 
const_iterator cbegin () const
 Get const iterator to the first row (explicit).
 
const_iterator cend () const
 Get const iterator past the last row (explicit).
 

Friends

class DataFrameRow< KeyType >
 
class DataFrameColumn< KeyType >
 

Detailed Description

template<typename KeyType = std::string>
class csv::DataFrame< KeyType >

Definition at line 853 of file data_frame.hpp.

Member Typedef Documentation

◆ column_type

template<typename KeyType = std::string>
using csv::DataFrame< KeyType >::column_type = DataFrameColumn<KeyType>

Definition at line 858 of file data_frame.hpp.

◆ const_iterator

template<typename KeyType = std::string>
using csv::DataFrame< KeyType >::const_iterator = internals::indexed_proxy_iterator<const DataFrame<KeyType>, DataFrameRow<KeyType>, const_row_accessor>

Row-wise const iterator over DataFrameRow entries.

Provides read-only access to rows with edit support.

Definition at line 876 of file data_frame.hpp.

◆ DuplicateKeyPolicy

template<typename KeyType = std::string>
using csv::DataFrame< KeyType >::DuplicateKeyPolicy = DataFrameOptions::DuplicateKeyPolicy

Definition at line 893 of file data_frame.hpp.

◆ iterator

template<typename KeyType = std::string>
using csv::DataFrame< KeyType >::iterator = internals::indexed_proxy_iterator<DataFrame<KeyType>, DataFrameRow<KeyType>, mutable_row_accessor>

Row-wise iterator over DataFrameRow entries.

Provides access to rows with edit support.

Definition at line 873 of file data_frame.hpp.

◆ row_type

template<typename KeyType = std::string>
using csv::DataFrame< KeyType >::row_type = DataFrameRow<KeyType>

Definition at line 857 of file data_frame.hpp.

Constructor & Destructor Documentation

◆ DataFrame() [1/7]

template<typename KeyType = std::string>
csv::DataFrame< KeyType >::DataFrame ( CSVReader reader)
inlineexplicit

Construct an unkeyed DataFrame from a CSV reader.

Rows are accessible by position only.

Definition at line 902 of file data_frame.hpp.

◆ DataFrame() [2/7]

template<typename KeyType = std::string>
csv::DataFrame< KeyType >::DataFrame ( std::vector< CSVRow rows)
inlineexplicit

Construct an unkeyed DataFrame from an existing batch of rows.

Definition at line 907 of file data_frame.hpp.

◆ DataFrame() [3/7]

template<typename KeyType = std::string>
csv::DataFrame< KeyType >::DataFrame ( CSVReader reader,
const DataFrameOptions options 
)
inlineexplicit

Construct a keyed DataFrame from a CSV reader with options.

Exceptions
std::runtime_errorif key column is empty or not found

Definition at line 915 of file data_frame.hpp.

◆ DataFrame() [4/7]

template<typename KeyType = std::string>
csv::DataFrame< KeyType >::DataFrame ( csv::string_view  filename,
const DataFrameOptions options,
CSVFormat  format = CSVFormat::guess_csv() 
)
inline

Construct a keyed DataFrame directly from a CSV file.

Exceptions
std::runtime_errorif key column is empty or not found

Definition at line 923 of file data_frame.hpp.

◆ DataFrame() [5/7]

template<typename KeyType = std::string>
csv::DataFrame< KeyType >::DataFrame ( CSVReader reader,
const std::string &  _key_column,
DuplicateKeyPolicy  policy = DuplicateKeyPolicy::OVERWRITE,
bool  throw_on_missing_key = true 
)
inline

Construct a keyed DataFrame using a column name as the key.

Exceptions
std::runtime_errorif key column is empty or not found

Definition at line 936 of file data_frame.hpp.

◆ DataFrame() [6/7]

template<typename KeyType = std::string>
template<typename KeyFunc , csv::enable_if_t< csv::is_invocable_returning< KeyFunc, KeyType, const CSVRow & >::value, int > = 0>
csv::DataFrame< KeyType >::DataFrame ( CSVReader reader,
KeyFunc  key_func,
DuplicateKeyPolicy  policy = DuplicateKeyPolicy::OVERWRITE 
)
inline

Construct a keyed DataFrame using a custom key function.

Exceptions
std::runtime_errorif policy is THROW and duplicate keys are encountered

Definition at line 957 of file data_frame.hpp.

◆ DataFrame() [7/7]

template<typename KeyType = std::string>
template<typename KeyFunc , csv::enable_if_t< csv::is_invocable_returning< KeyFunc, KeyType, const CSVRow & >::value, int > = 0>
csv::DataFrame< KeyType >::DataFrame ( CSVReader reader,
KeyFunc  key_func,
const DataFrameOptions options 
)
inline

Construct a keyed DataFrame using a custom key function with options.

Definition at line 971 of file data_frame.hpp.

Member Function Documentation

◆ at() [1/2]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::at ( size_t  i)
inline

Access a row by position with bounds checking.

Exceptions
std::out_of_rangeif index is out of bounds

Definition at line 1073 of file data_frame.hpp.

◆ at() [2/2]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::at ( size_t  i) const
inline

Access a row by position with bounds checking (const version).

Definition at line 1080 of file data_frame.hpp.

◆ begin() [1/2]

template<typename KeyType = std::string>
iterator csv::DataFrame< KeyType >::begin ( )
inline

Get iterator to the first row.

Definition at line 1270 of file data_frame.hpp.

◆ begin() [2/2]

template<typename KeyType = std::string>
const_iterator csv::DataFrame< KeyType >::begin ( ) const
inline

Get const iterator to the first row.

Definition at line 1276 of file data_frame.hpp.

◆ cbegin()

template<typename KeyType = std::string>
const_iterator csv::DataFrame< KeyType >::cbegin ( ) const
inline

Get const iterator to the first row (explicit).

Definition at line 1282 of file data_frame.hpp.

◆ cend()

template<typename KeyType = std::string>
const_iterator csv::DataFrame< KeyType >::cend ( ) const
inline

Get const iterator past the last row (explicit).

Definition at line 1285 of file data_frame.hpp.

◆ column()

template<typename KeyType = std::string>
template<typename T = std::string>
std::vector< T > csv::DataFrame< KeyType >::column ( const std::string &  name) const
inline

Extract all values from a column with type conversion.

Accounts for edited values in the overlay.

Template Parameters
TTarget type for conversion (default: std::string)
Exceptions
std::out_of_rangeif column is not found

Definition at line 1125 of file data_frame.hpp.

◆ column_parallel_apply() [1/4]

template<typename KeyType = std::string>
template<typename Fn >
void csv::DataFrame< KeyType >::column_parallel_apply ( DataFrameExecutor executor,
const std::vector< size_t > &  column_indices,
Fn &&  fn 
) const
inline

Apply a read-only batch function to a selected subset of columns, potentially in parallel.

This overload is for callers who want to process only specific columns and prefer to manage any output storage externally.

Exceptions
std::out_of_rangeif any column index is invalid

Definition at line 1216 of file data_frame.hpp.

◆ column_parallel_apply() [2/4]

template<typename KeyType = std::string>
template<typename State , typename Fn >
void csv::DataFrame< KeyType >::column_parallel_apply ( DataFrameExecutor executor,
const std::vector< size_t > &  column_indices,
std::vector< State > &  states,
Fn &&  fn 
) const
inline

Apply a batch-oriented function to a selected subset of columns, potentially in parallel.

The callback receives a lightweight column view plus a mutable per-selected-column state object from states.

Exceptions
std::invalid_argumentif states.size() != column_indices.size()
std::out_of_rangeif any column index is invalid

Definition at line 1174 of file data_frame.hpp.

◆ column_parallel_apply() [3/4]

template<typename KeyType = std::string>
template<typename Fn >
void csv::DataFrame< KeyType >::column_parallel_apply ( DataFrameExecutor executor,
Fn &&  fn 
) const
inline

Apply a read-only batch function to each column, potentially in parallel.

This overload is for callers who do not need one explicit mutable state object per column and prefer to manage any output storage externally.

Definition at line 1199 of file data_frame.hpp.

◆ column_parallel_apply() [4/4]

template<typename KeyType = std::string>
template<typename State , typename Fn >
void csv::DataFrame< KeyType >::column_parallel_apply ( DataFrameExecutor executor,
std::vector< State > &  states,
Fn &&  fn 
) const
inline

Apply a batch-oriented function to each column, potentially in parallel.

The callback receives a lightweight column view plus a mutable per-column state object from states.

Callbacks may safely perform read-only access through the provided column view and any explicit read-only references they already hold to this batch-scoped DataFrame. Sparse-overlay cell edits through DataFrameRow or DataFrameCell are synchronized at row granularity, but structural mutations such as erase() are not thread-safe.

Exceptions
std::invalid_argumentif states.size() != n_cols()

Definition at line 1151 of file data_frame.hpp.

◆ column_view() [1/2]

template<typename KeyType = std::string>
DataFrameColumn< KeyType > csv::DataFrame< KeyType >::column_view ( const std::string &  name) const
inline

Access a column view by name.

Definition at line 1037 of file data_frame.hpp.

◆ column_view() [2/2]

template<typename KeyType = std::string>
DataFrameColumn< KeyType > csv::DataFrame< KeyType >::column_view ( size_t  col_index) const
inline

Access a column view by position.

Definition at line 1028 of file data_frame.hpp.

◆ columns()

template<typename KeyType = std::string>
const std::vector< std::string > & csv::DataFrame< KeyType >::columns ( ) const
inlinenoexcept

Get the column names in order.

Definition at line 1004 of file data_frame.hpp.

◆ contains()

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::contains ( const KeyType &  key) const
inline

Check if a key exists in the DataFrame.

Exceptions
std::runtime_errorif the DataFrame was not created with a key column

Definition at line 1111 of file data_frame.hpp.

◆ empty()

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::empty ( ) const
inlinenoexcept

Check if the DataFrame is empty (has no rows).

Definition at line 983 of file data_frame.hpp.

◆ end() [1/2]

template<typename KeyType = std::string>
iterator csv::DataFrame< KeyType >::end ( )
inline

Get iterator past the last row.

Definition at line 1273 of file data_frame.hpp.

◆ end() [2/2]

template<typename KeyType = std::string>
const_iterator csv::DataFrame< KeyType >::end ( ) const
inline

Get const iterator past the last row.

Definition at line 1279 of file data_frame.hpp.

◆ group_by() [1/2]

template<typename KeyType = std::string>
std::unordered_map< std::string, std::vector< size_t > > csv::DataFrame< KeyType >::group_by ( const std::string &  name) const
inline

Group row positions by the value of a column.

Exceptions
std::out_of_rangeif column is not found

Definition at line 1258 of file data_frame.hpp.

◆ group_by() [2/2]

template<typename KeyType = std::string>
template<typename GroupFunc , typename GroupKey = invoke_result_t<GroupFunc, DataFrameRow<KeyType>>, csv::enable_if_t< internals::is_hashable< GroupKey >::value &&internals::is_equality_comparable< GroupKey >::value, int > = 0>
std::unordered_map< GroupKey, std::vector< size_t > > csv::DataFrame< KeyType >::group_by ( GroupFunc  group_func) const
inline

Group row positions using an arbitrary grouping function.

Template Parameters
GroupFuncCallable that takes a DataFrameRow and returns a hashable key

Definition at line 1242 of file data_frame.hpp.

◆ has_column()

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::has_column ( const std::string &  name) const
inline

Check if a column exists in the DataFrame.

Definition at line 994 of file data_frame.hpp.

◆ index_of()

template<typename KeyType = std::string>
int csv::DataFrame< KeyType >::index_of ( const std::string &  name) const
inline

Get the index of a column by name.

Definition at line 999 of file data_frame.hpp.

◆ n_cols()

template<typename KeyType = std::string>
size_t csv::DataFrame< KeyType >::n_cols ( ) const
inlinenoexcept

Get the number of columns in the DataFrame.

Definition at line 991 of file data_frame.hpp.

◆ n_rows()

template<typename KeyType = std::string>
size_t csv::DataFrame< KeyType >::n_rows ( ) const
inlinenoexcept

Get the number of rows in the DataFrame.

Alias for size().

Definition at line 988 of file data_frame.hpp.

◆ operator[]() [1/4]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::operator[] ( const KeyType &  key)
inline

Access a row by its key.

Exceptions
std::runtime_errorif the DataFrame was not created with a key column
std::out_of_rangeif the key is not found

Definition at line 1092 of file data_frame.hpp.

◆ operator[]() [2/4]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::operator[] ( const KeyType &  key) const
inline

Access a row by its key (const version).

Definition at line 1099 of file data_frame.hpp.

◆ operator[]() [3/4]

template<typename KeyType = std::string>
template<typename K = KeyType, csv::enable_if_t<!std::is_integral< K >::value, int > = 0>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::operator[] ( size_t  i)
inline

Access a row by position (unchecked).

Note
Disabled when KeyType is an integral type to prevent ambiguity with operator[](const KeyType&). Use at(size_t) for positional access on integer-keyed DataFrames.
Exceptions
std::out_of_rangeif index is out of bounds (via std::vector::at)

Definition at line 1052 of file data_frame.hpp.

◆ operator[]() [4/4]

template<typename KeyType = std::string>
template<typename K = KeyType, csv::enable_if_t<!std::is_integral< K >::value, int > = 0>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::operator[] ( size_t  i) const
inline

Access a row by position (unchecked, const version).

Disabled when KeyType is an integral type — use at(size_t) instead.

Definition at line 1062 of file data_frame.hpp.

◆ selected_rows()

template<typename KeyType = std::string>
DataFrame csv::DataFrame< KeyType >::selected_rows ( const std::vector< std::uint8_t > &  include_rows) const
inline

Build an unkeyed DataFrame containing rows whose corresponding mask entry is true.

CSVRow copies share the underlying parsed row storage, so this is intended for filtered document views that should avoid reparsing or rematerializing fields.

Definition at line 1011 of file data_frame.hpp.

◆ size()

template<typename KeyType = std::string>
size_t csv::DataFrame< KeyType >::size ( ) const
inlinenoexcept

Get the number of rows in the DataFrame.

Definition at line 978 of file data_frame.hpp.

Friends And Related Symbol Documentation

◆ DataFrameColumn< KeyType >

template<typename KeyType = std::string>
friend class DataFrameColumn< KeyType >
friend

Definition at line 753 of file data_frame.hpp.

◆ DataFrameRow< KeyType >

template<typename KeyType = std::string>
friend class DataFrameRow< KeyType >
friend

Definition at line 753 of file data_frame.hpp.


The documentation for this class was generated from the following file: