CSV Writing Guide
This page summarizes write-side APIs and practical usage patterns for emitting CSV/TSV data.
Core Writer APIs
Use csv::make_csv_writer() for comma-delimited output and csv::make_tsv_writer() for tab-delimited output.
Writing Containers with <tt>operator<<</tt>
Any row-like container of string-convertible values can be streamed directly.
TEMPLATE_TEST_CASE("CSV/TSV Writer - operator <<", "[test_csv_operator<<]",
std::vector<std::string>, std::deque<std::string>, std::list<std::string>) {
std::stringstream output, correct_comma, correct_tab;
correct_comma << "A,B,C" << std::endl << "\"1,1\",2,3" << std::endl;
correct_tab << "A\tB\tC" << std::endl << "1,1\t2\t3" << std::endl;
auto test_row_1 = TestType({ "A", "B", "C" }),
test_row_2 = TestType({ "1,1", "2", "3" });
SECTION("CSV Writer") {
csv_writer << test_row_1 << test_row_2;
REQUIRE(output.str() == correct_comma.str());
}
SECTION("TSV Writer") {
tsv_writer << test_row_1 << test_row_2;
REQUIRE(output.str() == correct_tab.str());
}
}
TSVWriter< OutputStream > make_tsv_writer(OutputStream &out, bool quote_minimal=true)
Return a csv::TSVWriter over the output stream.
CSVWriter< OutputStream > make_csv_writer(OutputStream &out, bool quote_minimal=true)
Return a csv::CSVWriter over the output stream.
Writing Tuples and Custom Types
DelimWriter can also serialize tuples and custom types that provide a string conversion.
struct Time {
std::string hour;
std::string minute;
operator std::string() const {
std::string ret = hour;
ret += ":";
ret += minute;
return ret;
}
};
#ifndef __clang__
TEST_CASE("CSV Tuple", "[test_csv_tuple]") {
#ifdef CSV_HAS_CXX17
Time time = { "5", "30" };
#else
std::string time = "5:30";
#endif
std::stringstream output, correct_output;
csv_writer << std::make_tuple("One", 2, "Three", 4.0, time)
<< std::make_tuple("One", (short)2, "Three", 4.0f, time)
<< std::make_tuple(-1, -2.0)
<< std::make_tuple(20.2, -20.3, -20.123)
<< std::make_tuple(0.0, 0.0f, 0);
correct_output << "One,2,Three,4.0,5:30" << std::endl
<< "One,2,Three,4.0,5:30" << std::endl
<< "-1,-2.0" << std::endl
<< "20.20000,-20.30000,-20.12300" << std::endl
<< "0.0,0.0,0" << std::endl;
REQUIRE(output.str() == correct_output.str());
}
#endif
Using <tt>write_row()</tt>
The write_row() method can be used to write rows with arbitrary fields and mixed types without having to construct a container first.
Through the magic of SFINAE, write_row() also supports any of the operations of operator<<.
TEST_CASE("CSV Writer - write_row() with variadic fields", "[test_csv_write_row_variadic]") {
std::stringstream output, correct;
auto writer = make_csv_writer(output);
set_decimal_places(1);
writer.write_row("Name", "Age", "Score");
writer.write_row("Alice", 30, 95.5);
writer.write_row("Bob", 25, 87.5);
writer.write_row("Charlie", 35, 92.5);
set_decimal_places(5);
correct << "Name,Age,Score" << std::endl
<< "Alice,30,95.5" << std::endl
<< "Bob,25,87.5" << std::endl
<< "Charlie,35,92.5" << std::endl;
REQUIRE(output.str() == correct.str());
}
Data Reordering Workflow
For read-transform-write pipelines, csv::CSVRow supports conversion to std::vector<std::string>, which makes it straightforward to reorder/select fields before writing.
Typical flow:
- Read with
CSVReader
- Convert row to
std::vector<std::string>
- Reorder/select fields
- Emit with
CSVWriter
TEST_CASE("CSV Writer - Reorder Columns", "[test_csv_reorder]") {
auto rows = "A,B,C\r\n"
"1,2,3\r\n"
"4,5,6"_csv;
std::stringstream output, correct;
auto writer = make_csv_writer(output);
writer << std::vector<std::string>({ "C", "A" });
for (auto& row : rows) {
writer << std::vector<std::string>({
});
}
correct << "C,A" << std::endl
<< "3,1" << std::endl
<< "6,4" << std::endl;
REQUIRE(output.str() == correct.str());
}
nonstd::string_view string_view
The string_view class used by this library.
C++20 Ranges Version
With C++20, you can use std::ranges::views to elegantly reorder fields in a single expression:
#ifdef CSV_HAS_CXX20
#include <ranges>
TEST_CASE("CSV Writer - Reorder with Ranges", "[test_csv_reorder_ranges]") {
auto rows = "A,B,C\r\n"
"1,2,3\r\n"
"4,5,6"_csv;
std::stringstream output, correct;
auto writer = make_csv_writer(output);
writer << std::vector<std::string>({ "C", "A" });
for (auto& row : rows) {
std::vector<std::string_view> field_names = { "C", "A" };
auto reordered = field_names
| std::views::transform([&row](std::string_view field) {
return row[field];
});
writer << reordered;
}
correct << "C,A" << std::endl
<< "3,1" << std::endl
<< "6,4" << std::endl;
REQUIRE(output.str() == correct.str());
}
#endif
DataFrame with Sparse Overlay
When working with DataFrames, you can efficiently update specific cells without reconstructing entire rows. The overlay mechanism stores only the changed cells and writes them correctly:
TEST_CASE("DataFrame - Write with Sparse Overlay", "[test_dataframe_sparse_overlay_write]") {
auto reader =
"id,name,age,occupation,react_experience_years,favorite_hook,quote\n"
"1,Chad Hooks,28,Senior React Engineer,5,useCallback,\"My useCallback has 12 dependencies and I'm scared to remove any\"\n"
"2,Tailwind Tim,24,Frontend Architect,3,useEffect,\"I fixed the infinite loop by adding another useEffect\"\n"
"3,Dan Abramov Disciple,31,Principal React Engineer,7,useMemo,\"If it's not memoized it's not React\"\n"
"6,Class Component Carl,42,Legacy React Dev,12,none,\"Remember when React was fun? Pepperidge Farm remembers.\""_csv;
auto options = DataFrameOptions();
options.set_key_column("id");
df.set("1", "age", "29");
df.set("3", "react_experience_years", "8");
df.set("6", "quote", "Everything is fine in production");
std::stringstream output;
writer << df.columns();
for (auto& row : df) {
#ifdef CSV_HAS_CXX20
writer << row.to_sv_range();
#else
writer << std::vector<std::string>(row);
#endif
}
std::string result = output.str();
REQUIRE(result.find("1,Chad Hooks,29,") != std::string::npos);
REQUIRE(result.find("3,Dan Abramov Disciple,31,Principal React Engineer,8,") != std::string::npos);
REQUIRE(result.find("Everything is fine in production") != std::string::npos);
}
End-to-End Round-Trip Integrity Example
The following test is intentionally write-first then read/verify, but it validates the same data-integrity guarantee as read-transform-write user workflows.
TEST_CASE("Round Trip with Distinct Field Values", "[test_roundtrip_distinct]") {
auto filename = "round_trip_distinct.csv";
FileGuard cleanup(filename);
{
std::ofstream outfile(filename, std::ios::binary);
auto writer = make_csv_writer(outfile);
writer << std::vector<std::string>({ "col_A", "col_B", "col_C", "col_D", "col_E" });
const size_t n_rows = 500000;
for (size_t i = 0; i < n_rows; i++) {
auto a = internals::to_string(i * 5 + 0);
auto b = internals::to_string(i * 5 + 1);
auto c = internals::to_string(i * 5 + 2);
auto d = internals::to_string(i * 5 + 3);
auto e = internals::to_string(i * 5 + 4);
writer << std::array<csv::string_view, 5>({ a, b, c, d, e });
}
}
const size_t expected_rows = 500000;
auto validate_reader = [&](CSVReader& reader) {
size_t i = 0;
for (auto& row : reader) {
REQUIRE(row.size() == 5);
REQUIRE(row["col_A"].get<size_t>() == i * 5 + 0);
REQUIRE(row["col_B"].get<size_t>() == i * 5 + 1);
REQUIRE(row["col_C"].get<size_t>() == i * 5 + 2);
REQUIRE(row["col_D"].get<size_t>() == i * 5 + 3);
REQUIRE(row["col_E"].get<size_t>() == i * 5 + 4);
for (auto& col : row) {
auto field_str = col.get_sv();
REQUIRE(field_str.find('\n') == std::string::npos);
REQUIRE(field_str.find(',') == std::string::npos);
}
i++;
}
REQUIRE(reader.n_rows() == expected_rows);
};
SECTION("Memory-mapped file path") {
CSVReader reader(filename);
validate_reader(reader);
}
SECTION("std::ifstream path (issue #281)") {
std::ifstream infile(filename, std::ios::binary);
CSVReader reader(infile, CSVFormat());
validate_reader(reader);
}
}