Vince's CSV Parser
Loading...
Searching...
No Matches
CSV Writing Guide

CSV Writing Guide

This page summarizes write-side APIs and practical usage patterns for emitting CSV/TSV data.

Core Writer APIs

Use csv::make_csv_writer() for comma-delimited output and csv::make_tsv_writer() for tab-delimited output.

Writing Containers with <tt>operator<<</tt>

Any row-like container of string-convertible values can be streamed directly.

TEMPLATE_TEST_CASE("CSV/TSV Writer - operator <<", "[test_csv_operator<<]",
std::vector<std::string>, std::deque<std::string>, std::list<std::string>) {
std::stringstream output, correct_comma, correct_tab;
// Build correct strings
correct_comma << "A,B,C" << std::endl << "\"1,1\",2,3" << std::endl;
correct_tab << "A\tB\tC" << std::endl << "1,1\t2\t3" << std::endl;
// Test input
auto test_row_1 = TestType({ "A", "B", "C" }),
test_row_2 = TestType({ "1,1", "2", "3" });
SECTION("CSV Writer") {
auto csv_writer = make_csv_writer(output);
csv_writer << test_row_1 << test_row_2;
REQUIRE(output.str() == correct_comma.str());
}
SECTION("TSV Writer") {
auto tsv_writer = make_tsv_writer(output);
tsv_writer << test_row_1 << test_row_2;
REQUIRE(output.str() == correct_tab.str());
}
}
TSVWriter< OutputStream > make_tsv_writer(OutputStream &out, bool quote_minimal=true)
Return a csv::TSVWriter over the output stream.
CSVWriter< OutputStream > make_csv_writer(OutputStream &out, bool quote_minimal=true)
Return a csv::CSVWriter over the output stream.

Writing Tuples and Custom Types

DelimWriter can also serialize tuples and custom types that provide a string conversion.

struct Time {
std::string hour;
std::string minute;
operator std::string() const {
std::string ret = hour;
ret += ":";
ret += minute;
return ret;
}
};
#ifndef __clang__
TEST_CASE("CSV Tuple", "[test_csv_tuple]") {
#ifdef CSV_HAS_CXX17
Time time = { "5", "30" };
#else
std::string time = "5:30";
#endif
std::stringstream output, correct_output;
auto csv_writer = make_csv_writer(output);
csv_writer << std::make_tuple("One", 2, "Three", 4.0, time)
<< std::make_tuple("One", (short)2, "Three", 4.0f, time)
<< std::make_tuple(-1, -2.0)
<< std::make_tuple(20.2, -20.3, -20.123)
<< std::make_tuple(0.0, 0.0f, 0);
correct_output << "One,2,Three,4.0,5:30" << std::endl
<< "One,2,Three,4.0,5:30" << std::endl
<< "-1,-2.0" << std::endl
<< "20.20000,-20.30000,-20.12300" << std::endl
<< "0.0,0.0,0" << std::endl;
REQUIRE(output.str() == correct_output.str());
}
#endif

Using <tt>write_row()</tt>

The write_row() method can be used to write rows with arbitrary fields and mixed types without having to construct a container first.

Through the magic of SFINAE, write_row() also supports any of the operations of operator<<.

TEST_CASE("CSV Writer - write_row() with variadic fields", "[test_csv_write_row_variadic]") {
std::stringstream output, correct;
auto writer = make_csv_writer(output);
// Important! Call this or else the default will be used
set_decimal_places(1);
// Write rows with mixed types using write_row()
writer.write_row("Name", "Age", "Score");
writer.write_row("Alice", 30, 95.5);
// Use values with binary-exact fractional parts for a stable user-facing example.
writer.write_row("Bob", 25, 87.5);
writer.write_row("Charlie", 35, 92.5);
// Reset to default
set_decimal_places(5);
correct << "Name,Age,Score" << std::endl
<< "Alice,30,95.5" << std::endl
<< "Bob,25,87.5" << std::endl
<< "Charlie,35,92.5" << std::endl;
REQUIRE(output.str() == correct.str());
}

Data Reordering Workflow

For read-transform-write pipelines, csv::CSVRow supports conversion to std::vector<std::string>, which makes it straightforward to reorder/select fields before writing.

Typical flow:

  1. Read with CSVReader
  2. Convert row to std::vector<std::string>
  3. Reorder/select fields
  4. Emit with CSVWriter
TEST_CASE("CSV Writer - Reorder Columns", "[test_csv_reorder]") {
auto rows = "A,B,C\r\n"
"1,2,3\r\n"
"4,5,6"_csv;
std::stringstream output, correct;
auto writer = make_csv_writer(output);
writer << std::vector<std::string>({ "C", "A" });
for (auto& row : rows) {
writer << std::vector<std::string>({
row[csv::string_view("C")].get<std::string>(),
row[csv::string_view("A")].get<std::string>()
});
}
correct << "C,A" << std::endl
<< "3,1" << std::endl
<< "6,4" << std::endl;
REQUIRE(output.str() == correct.str());
}
nonstd::string_view string_view
The string_view class used by this library.
Definition common.hpp:135

C++20 Ranges Version

With C++20, you can use std::ranges::views to elegantly reorder fields in a single expression:

#ifdef CSV_HAS_CXX20
#include <ranges>
TEST_CASE("CSV Writer - Reorder with Ranges", "[test_csv_reorder_ranges]") {
auto rows = "A,B,C\r\n"
"1,2,3\r\n"
"4,5,6"_csv;
std::stringstream output, correct;
auto writer = make_csv_writer(output);
// Write header: C, A
writer << std::vector<std::string>({ "C", "A" });
// Reorder columns using ranges::views::transform with string_view
for (auto& row : rows) {
std::vector<std::string_view> field_names = { "C", "A" };
auto reordered = field_names
| std::views::transform([&row](std::string_view field) {
return row[field];
});
writer << reordered;
}
correct << "C,A" << std::endl
<< "3,1" << std::endl
<< "6,4" << std::endl;
REQUIRE(output.str() == correct.str());
}
#endif

DataFrame with Sparse Overlay

When working with DataFrames, you can efficiently update specific cells without reconstructing entire rows. The overlay mechanism stores only the changed cells and writes them correctly:

TEST_CASE("DataFrame - Write with Sparse Overlay", "[test_dataframe_sparse_overlay_write]") {
auto reader =
"id,name,age,occupation,react_experience_years,favorite_hook,quote\n"
"1,Chad Hooks,28,Senior React Engineer,5,useCallback,\"My useCallback has 12 dependencies and I'm scared to remove any\"\n"
"2,Tailwind Tim,24,Frontend Architect,3,useEffect,\"I fixed the infinite loop by adding another useEffect\"\n"
"3,Dan Abramov Disciple,31,Principal React Engineer,7,useMemo,\"If it's not memoized it's not React\"\n"
"6,Class Component Carl,42,Legacy React Dev,12,none,\"Remember when React was fun? Pepperidge Farm remembers.\""_csv;
auto options = DataFrameOptions();
options.set_key_column("id");
csv::DataFrame<std::string> df(reader, options);
// Make sparse edits to specific cells using the overlay
df.set("1", "age", "29"); // Chad Hooks has a birthday
df.set("3", "react_experience_years", "8"); // Dan got one more year
df.set("6", "quote", "Everything is fine in production"); // Updated quote
// Write the modified DataFrame back
std::stringstream output;
auto writer = csv::make_csv_writer(output);
writer << df.columns();
for (auto& row : df) {
#ifdef CSV_HAS_CXX20
// More efficient version with C++20 ranges
writer << row.to_sv_range();
#else
writer << std::vector<std::string>(row);
#endif
}
// Verify the sparse edits are in the output
std::string result = output.str();
REQUIRE(result.find("1,Chad Hooks,29,") != std::string::npos); // age updated
REQUIRE(result.find("3,Dan Abramov Disciple,31,Principal React Engineer,8,") != std::string::npos); // experience updated
REQUIRE(result.find("Everything is fine in production") != std::string::npos); // quote updated
}

End-to-End Round-Trip Integrity Example

The following test is intentionally write-first then read/verify, but it validates the same data-integrity guarantee as read-transform-write user workflows.

TEST_CASE("Round Trip with Distinct Field Values", "[test_roundtrip_distinct]") {
// User-facing note:
// This test is intentionally "inverted" (write first, then read/verify), but it
// validates the same round-trip guarantee users care about: every field survives
// serialization and parsing without being shifted into a neighboring column.
//
// Using different values per column makes corruption obvious. If column boundaries
// break, at least one of the assertions below will fail immediately.
auto filename = "round_trip_distinct.csv";
FileGuard cleanup(filename);
// Write the CSV file
{
std::ofstream outfile(filename, std::ios::binary);
auto writer = make_csv_writer(outfile);
writer << std::vector<std::string>({ "col_A", "col_B", "col_C", "col_D", "col_E" });
const size_t n_rows = 500000; // Enough to cross 10MB chunk boundary
for (size_t i = 0; i < n_rows; i++) {
// Each column gets a DISTINCT value so corruption is obvious
auto a = internals::to_string(i * 5 + 0);
auto b = internals::to_string(i * 5 + 1);
auto c = internals::to_string(i * 5 + 2);
auto d = internals::to_string(i * 5 + 3);
auto e = internals::to_string(i * 5 + 4);
writer << std::array<csv::string_view, 5>({ a, b, c, d, e });
}
}
const size_t expected_rows = 500000;
// Shared validation for both CSVReader implementations:
// 1) mmap constructor path
// 2) std::istream constructor path
// Keeping both paths here prevents regressions that only affect one parser backend.
auto validate_reader = [&](CSVReader& reader) {
size_t i = 0;
for (auto& row : reader) {
// Verify field count
REQUIRE(row.size() == 5);
// Verify each field has its expected DISTINCT value
REQUIRE(row["col_A"].get<size_t>() == i * 5 + 0);
REQUIRE(row["col_B"].get<size_t>() == i * 5 + 1);
REQUIRE(row["col_C"].get<size_t>() == i * 5 + 2);
REQUIRE(row["col_D"].get<size_t>() == i * 5 + 3);
REQUIRE(row["col_E"].get<size_t>() == i * 5 + 4);
// Verify fields are clean numeric tokens and were not contaminated by
// delimiter/newline handling bugs.
for (auto& col : row) {
auto field_str = col.get_sv();
REQUIRE(field_str.find('\n') == std::string::npos);
REQUIRE(field_str.find(',') == std::string::npos);
}
i++;
}
REQUIRE(reader.n_rows() == expected_rows);
};
SECTION("Memory-mapped file path") {
CSVReader reader(filename);
validate_reader(reader);
}
SECTION("std::ifstream path (issue #281)") {
// Issue #281 was specific to the stream constructor
std::ifstream infile(filename, std::ios::binary);
CSVReader reader(infile, CSVFormat());
validate_reader(reader);
}
}