Vince's CSV Parser
Loading...
Searching...
No Matches
csv::DataFrame< KeyType > Class Template Reference

Classes

class  const_iterator
 Row-wise const iterator over DataFrameRow entries. More...
 
class  iterator
 Row-wise iterator over DataFrameRow entries. More...
 

Public Types

using row_entry = std::pair< KeyType, CSVRow >
 Type alias for internal row storage: pair of key and CSVRow.
 
using DuplicateKeyPolicy = DataFrameOptions::DuplicateKeyPolicy
 

Public Member Functions

 DataFrame ()=default
 Construct an empty DataFrame.
 
 DataFrame (CSVReader &reader)
 Construct an unkeyed DataFrame from a CSV reader.
 
 DataFrame (CSVReader &reader, const DataFrameOptions &options)
 Construct a keyed DataFrame from a CSV reader with options.
 
 DataFrame (csv::string_view filename, const DataFrameOptions &options, CSVFormat format=CSVFormat::guess_csv())
 Construct a keyed DataFrame directly from a CSV file.
 
 DataFrame (CSVReader &reader, const std::string &_key_column, DuplicateKeyPolicy policy=DuplicateKeyPolicy::OVERWRITE, bool throw_on_missing_key=true)
 Construct a keyed DataFrame using a column name as the key.
 
template<typename KeyFunc , typename ResultType = invoke_result_t<KeyFunc, const CSVRow&>, csv::enable_if_t< std::is_convertible< ResultType, KeyType >::value, int > = 0>
 DataFrame (CSVReader &reader, KeyFunc key_func, DuplicateKeyPolicy policy=DuplicateKeyPolicy::OVERWRITE)
 Construct a keyed DataFrame using a custom key function.
 
template<typename KeyFunc , typename ResultType = invoke_result_t<KeyFunc, const CSVRow&>, csv::enable_if_t< std::is_convertible< ResultType, KeyType >::value, int > = 0>
 DataFrame (CSVReader &reader, KeyFunc key_func, const DataFrameOptions &options)
 Construct a keyed DataFrame using a custom key function with options.
 
size_t size () const noexcept
 Get the number of rows in the DataFrame.
 
bool empty () const noexcept
 Check if the DataFrame is empty (has no rows).
 
size_t n_rows () const noexcept
 Get the number of rows in the DataFrame.
 
size_t n_cols () const noexcept
 Get the number of columns in the DataFrame.
 
bool has_column (const std::string &name) const
 Check if a column exists in the DataFrame.
 
int index_of (const std::string &name) const
 Get the index of a column by name.
 
const std::vector< std::string > & columns () const noexcept
 Get the column names in order.
 
const std::string & key_name () const noexcept
 Get the name of the key column (empty string if unkeyed).
 
template<typename K = KeyType, csv::enable_if_t<!std::is_integral< K >::value, int > = 0>
DataFrameRow< KeyType > operator[] (size_t i)
 Access a row by position (unchecked).
 
template<typename K = KeyType, csv::enable_if_t<!std::is_integral< K >::value, int > = 0>
DataFrameRow< KeyType > operator[] (size_t i) const
 Access a row by position (unchecked, const version).
 
DataFrameRow< KeyType > at (size_t i)
 Access a row by position with bounds checking.
 
DataFrameRow< KeyType > at (size_t i) const
 Access a row by position with bounds checking (const version).
 
DataFrameRow< KeyType > operator[] (const KeyType &key)
 Access a row by its key.
 
DataFrameRow< KeyType > operator[] (const KeyType &key) const
 Access a row by its key (const version).
 
DataFrameRow< KeyType > iloc (size_t i)
 Access a row by position (iloc-style, pandas naming).
 
DataFrameRow< KeyType > iloc (size_t i) const
 Access a row by position (const version).
 
bool try_get (size_t i, DataFrameRow< KeyType > &out)
 Attempt to access a row by position without throwing.
 
bool try_get (size_t i, DataFrameRow< KeyType > &out) const
 Attempt to access a row by position without throwing (const version).
 
const KeyType & key_at (size_t i) const
 Get the key for a row at a given position.
 
bool contains (const KeyType &key) const
 Check if a key exists in the DataFrame.
 
DataFrameRow< KeyType > at (const KeyType &key)
 Access a row by its key with bounds checking.
 
DataFrameRow< KeyType > at (const KeyType &key) const
 Access a row by its key with bounds checking (const version).
 
bool try_get (const KeyType &key, DataFrameRow< KeyType > &out)
 Attempt to access a row by key without throwing.
 
bool try_get (const KeyType &key, DataFrameRow< KeyType > &out) const
 Attempt to access a row by key without throwing (const version).
 
std::string get (const KeyType &key, const std::string &column) const
 Get a cell value as a string, accounting for edits.
 
void set (const KeyType &key, const std::string &column, const std::string &value)
 Set a cell value (stored in edit overlay).
 
bool erase_row (const KeyType &key)
 Remove a row by its key.
 
bool erase_row_at (size_t i)
 Remove a row by its position.
 
void set_at (size_t i, const std::string &column, const std::string &value)
 Set a cell value by position (stored in edit overlay).
 
template<typename T = std::string>
std::vector< T > column (const std::string &name) const
 Extract all values from a column with type conversion.
 
template<typename GroupFunc , typename GroupKey = invoke_result_t<GroupFunc, const CSVRow&>, csv::enable_if_t< internals::is_hashable< GroupKey >::value &&internals::is_equality_comparable< GroupKey >::value, int > = 0>
std::unordered_map< GroupKey, std::vector< size_t > > group_by (GroupFunc group_func) const
 Group row positions using an arbitrary grouping function.
 
std::unordered_map< std::string, std::vector< size_t > > group_by (const std::string &name, bool use_edits=true) const
 Group row positions by the value of a column.
 
iterator begin ()
 Get iterator to the first row.
 
iterator end ()
 Get iterator past the last row.
 
const_iterator begin () const
 Get const iterator to the first row.
 
const_iterator end () const
 Get const iterator past the last row.
 
const_iterator cbegin () const
 Get const iterator to the first row (explicit).
 
const_iterator cend () const
 Get const iterator past the last row (explicit).
 

Detailed Description

template<typename KeyType = std::string>
class csv::DataFrame< KeyType >

Definition at line 190 of file data_frame.hpp.

Member Typedef Documentation

◆ DuplicateKeyPolicy

template<typename KeyType = std::string>
using csv::DataFrame< KeyType >::DuplicateKeyPolicy = DataFrameOptions::DuplicateKeyPolicy

Definition at line 312 of file data_frame.hpp.

◆ row_entry

template<typename KeyType = std::string>
using csv::DataFrame< KeyType >::row_entry = std::pair<KeyType, CSVRow>

Type alias for internal row storage: pair of key and CSVRow.

Definition at line 193 of file data_frame.hpp.

Constructor & Destructor Documentation

◆ DataFrame() [1/6]

template<typename KeyType = std::string>
csv::DataFrame< KeyType >::DataFrame ( CSVReader reader)
inlineexplicit

Construct an unkeyed DataFrame from a CSV reader.

Rows are accessible by position only.

Definition at line 321 of file data_frame.hpp.

◆ DataFrame() [2/6]

template<typename KeyType = std::string>
csv::DataFrame< KeyType >::DataFrame ( CSVReader reader,
const DataFrameOptions options 
)
inlineexplicit

Construct a keyed DataFrame from a CSV reader with options.

Parameters
readerCSV reader to consume
optionsConfiguration including key column and duplicate policies
Exceptions
std::runtime_errorif key column is empty or not found

Definition at line 332 of file data_frame.hpp.

◆ DataFrame() [3/6]

template<typename KeyType = std::string>
csv::DataFrame< KeyType >::DataFrame ( csv::string_view  filename,
const DataFrameOptions options,
CSVFormat  format = CSVFormat::guess_csv() 
)
inline

Construct a keyed DataFrame directly from a CSV file.

Parameters
filenamePath to the CSV file
optionsConfiguration including key column and duplicate policies
formatCSV format specification (defaults to auto-detection)
Exceptions
std::runtime_errorif key column is empty or not found

Definition at line 344 of file data_frame.hpp.

◆ DataFrame() [4/6]

template<typename KeyType = std::string>
csv::DataFrame< KeyType >::DataFrame ( CSVReader reader,
const std::string &  _key_column,
DuplicateKeyPolicy  policy = DuplicateKeyPolicy::OVERWRITE,
bool  throw_on_missing_key = true 
)
inline

Construct a keyed DataFrame using a column name as the key.

Parameters
readerCSV reader to consume
_key_columnName of the column to use as the key
policyHow to handle duplicate keys (default: OVERWRITE)
throw_on_missing_keyWhether to throw if a key value cannot be parsed (default: true)
Exceptions
std::runtime_errorif key column is empty or not found

Definition at line 362 of file data_frame.hpp.

◆ DataFrame() [5/6]

template<typename KeyType = std::string>
template<typename KeyFunc , typename ResultType = invoke_result_t<KeyFunc, const CSVRow&>, csv::enable_if_t< std::is_convertible< ResultType, KeyType >::value, int > = 0>
csv::DataFrame< KeyType >::DataFrame ( CSVReader reader,
KeyFunc  key_func,
DuplicateKeyPolicy  policy = DuplicateKeyPolicy::OVERWRITE 
)
inline

Construct a keyed DataFrame using a custom key function.

Parameters
readerCSV reader to consume
key_funcFunction that extracts a key from each row
policyHow to handle duplicate keys (default: OVERWRITE)
Exceptions
std::runtime_errorif policy is THROW and duplicate keys are encountered

Definition at line 388 of file data_frame.hpp.

◆ DataFrame() [6/6]

template<typename KeyType = std::string>
template<typename KeyFunc , typename ResultType = invoke_result_t<KeyFunc, const CSVRow&>, csv::enable_if_t< std::is_convertible< ResultType, KeyType >::value, int > = 0>
csv::DataFrame< KeyType >::DataFrame ( CSVReader reader,
KeyFunc  key_func,
const DataFrameOptions options 
)
inline

Construct a keyed DataFrame using a custom key function with options.

Parameters
readerCSV reader to consume
key_funcFunction that extracts a key from each row
optionsConfiguration for duplicate key policy

Definition at line 409 of file data_frame.hpp.

Member Function Documentation

◆ at() [1/4]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::at ( const KeyType &  key)
inline

Access a row by its key with bounds checking.

Parameters
keyThe row key to look up
Returns
DataFrameRow proxy with edit support
Exceptions
std::runtime_errorif the DataFrame was not created with a key column
std::out_of_rangeif the key is not found

Definition at line 640 of file data_frame.hpp.

◆ at() [2/4]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::at ( const KeyType &  key) const
inline

Access a row by its key with bounds checking (const version).

Definition at line 650 of file data_frame.hpp.

◆ at() [3/4]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::at ( size_t  i)
inline

Access a row by position with bounds checking.

Parameters
iRow index (0-based)
Returns
DataFrameRow proxy with edit support
Exceptions
std::out_of_rangeif index is out of bounds

Definition at line 500 of file data_frame.hpp.

◆ at() [4/4]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::at ( size_t  i) const
inline

Access a row by position with bounds checking (const version).

Definition at line 510 of file data_frame.hpp.

◆ begin() [1/2]

template<typename KeyType = std::string>
iterator csv::DataFrame< KeyType >::begin ( )
inline

Get iterator to the first row.

Definition at line 897 of file data_frame.hpp.

◆ begin() [2/2]

template<typename KeyType = std::string>
const_iterator csv::DataFrame< KeyType >::begin ( ) const
inline

Get const iterator to the first row.

Definition at line 903 of file data_frame.hpp.

◆ cbegin()

template<typename KeyType = std::string>
const_iterator csv::DataFrame< KeyType >::cbegin ( ) const
inline

Get const iterator to the first row (explicit).

Definition at line 909 of file data_frame.hpp.

◆ cend()

template<typename KeyType = std::string>
const_iterator csv::DataFrame< KeyType >::cend ( ) const
inline

Get const iterator past the last row (explicit).

Definition at line 912 of file data_frame.hpp.

◆ column()

template<typename KeyType = std::string>
template<typename T = std::string>
std::vector< T > csv::DataFrame< KeyType >::column ( const std::string &  name) const
inline

Extract all values from a column with type conversion.

Accounts for edited values in the overlay.

Template Parameters
TTarget type for conversion (default: std::string)
Parameters
nameColumn name
Returns
Vector of values converted to type T
Exceptions
std::runtime_errorif column is not found

Definition at line 800 of file data_frame.hpp.

◆ columns()

template<typename KeyType = std::string>
const std::vector< std::string > & csv::DataFrame< KeyType >::columns ( ) const
inlinenoexcept

Get the column names in order.

Definition at line 455 of file data_frame.hpp.

◆ contains()

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::contains ( const KeyType &  key) const
inline

Check if a key exists in the DataFrame.

Parameters
keyThe key to check
Returns
true if the key exists, false otherwise
Exceptions
std::runtime_errorif the DataFrame was not created with a key column

Definition at line 626 of file data_frame.hpp.

◆ empty()

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::empty ( ) const
inlinenoexcept

Check if the DataFrame is empty (has no rows).

Definition at line 421 of file data_frame.hpp.

◆ end() [1/2]

template<typename KeyType = std::string>
iterator csv::DataFrame< KeyType >::end ( )
inline

Get iterator past the last row.

Definition at line 900 of file data_frame.hpp.

◆ end() [2/2]

template<typename KeyType = std::string>
const_iterator csv::DataFrame< KeyType >::end ( ) const
inline

Get const iterator past the last row.

Definition at line 906 of file data_frame.hpp.

◆ erase_row()

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::erase_row ( const KeyType &  key)
inline

Remove a row by its key.

Parameters
keyThe row key to remove
Returns
true if the row was removed, false if not found
Exceptions
std::runtime_errorif the DataFrame was not created with a key column

Definition at line 741 of file data_frame.hpp.

◆ erase_row_at()

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::erase_row_at ( size_t  i)
inline

Remove a row by its position.

Parameters
iRow index (0-based)
Returns
true if the row was removed, false if index out of bounds

Definition at line 762 of file data_frame.hpp.

◆ get()

template<typename KeyType = std::string>
std::string csv::DataFrame< KeyType >::get ( const KeyType &  key,
const std::string &  column 
) const
inline

Get a cell value as a string, accounting for edits.

Parameters
keyThe row key
columnThe column name
Returns
Cell value as a string (edited value if present, otherwise original)
Exceptions
std::runtime_errorif the DataFrame was not created with a key column
std::out_of_rangeif the key is not found

Definition at line 705 of file data_frame.hpp.

◆ group_by() [1/2]

template<typename KeyType = std::string>
std::unordered_map< std::string, std::vector< size_t > > csv::DataFrame< KeyType >::group_by ( const std::string &  name,
bool  use_edits = true 
) const
inline

Group row positions by the value of a column.

Parameters
nameColumn to group by
use_editsIf true, use edited values when present (default: true)
Returns
Map of column value -> vector of row indices with that value
Exceptions
std::runtime_errorif column is not found

Definition at line 861 of file data_frame.hpp.

◆ group_by() [2/2]

template<typename KeyType = std::string>
template<typename GroupFunc , typename GroupKey = invoke_result_t<GroupFunc, const CSVRow&>, csv::enable_if_t< internals::is_hashable< GroupKey >::value &&internals::is_equality_comparable< GroupKey >::value, int > = 0>
std::unordered_map< GroupKey, std::vector< size_t > > csv::DataFrame< KeyType >::group_by ( GroupFunc  group_func) const
inline

Group row positions using an arbitrary grouping function.

Template Parameters
GroupFuncCallable that takes a CSVRow and returns a hashable key
Parameters
group_funcFunction to extract group key from each row
Returns
Map of group key -> vector of row indices belonging to that group

Definition at line 842 of file data_frame.hpp.

◆ has_column()

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::has_column ( const std::string &  name) const
inline

Check if a column exists in the DataFrame.

Parameters
nameColumn name to check
Returns
true if the column exists, false otherwise

Definition at line 437 of file data_frame.hpp.

◆ iloc() [1/2]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::iloc ( size_t  i)
inline

Access a row by position (iloc-style, pandas naming).

Parameters
iRow index (0-based)
Returns
DataFrameRow proxy with edit support
Exceptions
std::out_of_rangeif index is out of bounds

Definition at line 553 of file data_frame.hpp.

◆ iloc() [2/2]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::iloc ( size_t  i) const
inline

Access a row by position (const version).

Definition at line 563 of file data_frame.hpp.

◆ index_of()

template<typename KeyType = std::string>
int csv::DataFrame< KeyType >::index_of ( const std::string &  name) const
inline

Get the index of a column by name.

Parameters
nameColumn name to find
Returns
Column index (0-based) or CSV_NOT_FOUND if not found

Definition at line 447 of file data_frame.hpp.

◆ key_at()

template<typename KeyType = std::string>
const KeyType & csv::DataFrame< KeyType >::key_at ( size_t  i) const
inline

Get the key for a row at a given position.

Parameters
iRow index (0-based)
Returns
Reference to the key
Exceptions
std::runtime_errorif the DataFrame was not created with a key column
std::out_of_rangeif index is out of bounds

Definition at line 614 of file data_frame.hpp.

◆ key_name()

template<typename KeyType = std::string>
const std::string & csv::DataFrame< KeyType >::key_name ( ) const
inlinenoexcept

Get the name of the key column (empty string if unkeyed).

Definition at line 460 of file data_frame.hpp.

◆ n_cols()

template<typename KeyType = std::string>
size_t csv::DataFrame< KeyType >::n_cols ( ) const
inlinenoexcept

Get the number of columns in the DataFrame.

Definition at line 429 of file data_frame.hpp.

◆ n_rows()

template<typename KeyType = std::string>
size_t csv::DataFrame< KeyType >::n_rows ( ) const
inlinenoexcept

Get the number of rows in the DataFrame.

Alias for size().

Definition at line 426 of file data_frame.hpp.

◆ operator[]() [1/4]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::operator[] ( const KeyType &  key)
inline

Access a row by its key.

Parameters
keyThe row key to look up
Returns
DataFrameRow proxy with edit support
Exceptions
std::runtime_errorif the DataFrame was not created with a key column
std::out_of_rangeif the key is not found

Definition at line 527 of file data_frame.hpp.

◆ operator[]() [2/4]

template<typename KeyType = std::string>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::operator[] ( const KeyType &  key) const
inline

Access a row by its key (const version).

Definition at line 537 of file data_frame.hpp.

◆ operator[]() [3/4]

template<typename KeyType = std::string>
template<typename K = KeyType, csv::enable_if_t<!std::is_integral< K >::value, int > = 0>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::operator[] ( size_t  i)
inline

Access a row by position (unchecked).

Note
Disabled when KeyType is an integral type to prevent ambiguity with operator[](const KeyType&). Use iloc() for positional access on integer-keyed DataFrames.
Parameters
iRow index (0-based)
Returns
DataFrameRow proxy with edit support
Exceptions
std::out_of_rangeif index is out of bounds (via std::vector::at)

Definition at line 477 of file data_frame.hpp.

◆ operator[]() [4/4]

template<typename KeyType = std::string>
template<typename K = KeyType, csv::enable_if_t<!std::is_integral< K >::value, int > = 0>
DataFrameRow< KeyType > csv::DataFrame< KeyType >::operator[] ( size_t  i) const
inline

Access a row by position (unchecked, const version).

Disabled when KeyType is an integral type — use iloc() instead.

Definition at line 487 of file data_frame.hpp.

◆ set()

template<typename KeyType = std::string>
void csv::DataFrame< KeyType >::set ( const KeyType &  key,
const std::string &  column,
const std::string &  value 
)
inline

Set a cell value (stored in edit overlay).

Parameters
keyThe row key
columnThe column name
valueThe new value as a string
Exceptions
std::runtime_errorif the DataFrame was not created with a key column
std::out_of_rangeif the key is not found

Definition at line 728 of file data_frame.hpp.

◆ set_at()

template<typename KeyType = std::string>
void csv::DataFrame< KeyType >::set_at ( size_t  i,
const std::string &  column,
const std::string &  value 
)
inline

Set a cell value by position (stored in edit overlay).

Parameters
iRow index (0-based)
columnThe column name
valueThe new value as a string
Exceptions
std::runtime_errorif the DataFrame was not created with a key column
std::out_of_rangeif index is out of bounds

Definition at line 780 of file data_frame.hpp.

◆ size()

template<typename KeyType = std::string>
size_t csv::DataFrame< KeyType >::size ( ) const
inlinenoexcept

Get the number of rows in the DataFrame.

Definition at line 416 of file data_frame.hpp.

◆ try_get() [1/4]

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::try_get ( const KeyType &  key,
DataFrameRow< KeyType > &  out 
)
inline

Attempt to access a row by key without throwing.

Parameters
keyThe row key to look up
outOutput parameter that receives the DataFrameRow if found
Returns
true if the key exists, false otherwise
Exceptions
std::runtime_errorif the DataFrame was not created with a key column

Definition at line 667 of file data_frame.hpp.

◆ try_get() [2/4]

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::try_get ( const KeyType &  key,
DataFrameRow< KeyType > &  out 
) const
inline

Attempt to access a row by key without throwing (const version).

Definition at line 682 of file data_frame.hpp.

◆ try_get() [3/4]

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::try_get ( size_t  i,
DataFrameRow< KeyType > &  out 
)
inline

Attempt to access a row by position without throwing.

Parameters
iRow index (0-based)
outOutput parameter that receives the DataFrameRow if found
Returns
true if the row exists, false otherwise

Definition at line 579 of file data_frame.hpp.

◆ try_get() [4/4]

template<typename KeyType = std::string>
bool csv::DataFrame< KeyType >::try_get ( size_t  i,
DataFrameRow< KeyType > &  out 
) const
inline

Attempt to access a row by position without throwing (const version).

Definition at line 593 of file data_frame.hpp.


The documentation for this class was generated from the following file: