Vince's CSV Parser
Loading...
Searching...
No Matches
csv_stat.hpp
Go to the documentation of this file.
1
5#pragma once
6#include <unordered_map>
7#include <sstream>
8#include <vector>
9#include "csv_reader.hpp"
10
11namespace csv {
18 class CSVStat {
19 public:
20 using FreqCount = std::unordered_map<std::string, size_t>;
21 using TypeCount = std::unordered_map<DataType, size_t>;
22
23 std::vector<long double> get_mean() const;
24 std::vector<long double> get_variance() const;
25 std::vector<long double> get_mins() const;
26 std::vector<long double> get_maxes() const;
27 std::vector<FreqCount> get_counts() const;
28 std::vector<TypeCount> get_dtypes() const;
29
30 std::vector<std::string> get_col_names() const {
31 return this->reader.get_col_names();
32 }
33
35 CSVStat(std::stringstream& source, CSVFormat format = CSVFormat());
36 private:
37 // An array of rolling averages
38 // Each index corresponds to the rolling mean for the column at said index
39 std::vector<long double> rolling_means;
40 std::vector<long double> rolling_vars;
41 std::vector<long double> mins;
42 std::vector<long double> maxes;
43 std::vector<FreqCount> counts;
44 std::vector<TypeCount> dtypes;
45 std::vector<long double> n;
46
47 // Statistic calculators
48 void variance(const long double&, const size_t&);
49 void count(CSVField&, const size_t&);
50 void min_max(const long double&, const size_t&);
51 void dtype(CSVField&, const size_t&);
52
53 void calc();
54 void calc_chunk();
55 void calc_worker(const size_t&);
56
57 CSVReader reader;
58 std::deque<CSVRow> records = {};
59 };
60}
Data type representing individual CSV values.
Definition csv_row.hpp:39
Stores information about how to parse a CSV file.
static CSVFormat guess_csv()
CSVFormat for guessing the delimiter.
Main class for parsing CSVs from files and in-memory sources.
std::vector< std::string > get_col_names() const
Return the CSV's column names as a vector of strings.
Class for calculating statistics from CSV files and in-memory sources.
Definition csv_stat.hpp:18
std::vector< long double > get_mean() const
Return current means.
Definition csv_stat.cpp:25
std::vector< long double > get_variance() const
Return current variances.
Definition csv_stat.cpp:34
std::vector< long double > get_mins() const
Return current mins.
Definition csv_stat.cpp:43
std::vector< TypeCount > get_dtypes() const
Get data type counts for each column.
Definition csv_stat.cpp:70
std::vector< long double > get_maxes() const
Return current maxes.
Definition csv_stat.cpp:52
std::vector< FreqCount > get_counts() const
Get counts for each column.
Definition csv_stat.cpp:61
Defines functionality needed for basic CSV parsing.
The all encompassing namespace.
nonstd::string_view string_view
The string_view class used by this library.
Definition common.hpp:99