CSV Writing Guide

This page summarizes write-side APIs and practical usage patterns for emitting CSV/TSV data.

Core Writer APIs

Use csv::make_csv_writer() for comma-delimited output and csv::make_tsv_writer() for tab-delimited output.

If you want buffered behavior, call set_auto_flush(false) on the writer instead of using a separate factory:

std::stringstream output;

auto writer = csv::make_csv_writer(output).set_auto_flush(false);

Writing Containers with <tt>operator<<</tt>

Any row-like container of string-convertible values can be streamed directly.

TEMPLATE_TEST_CASE("CSV/TSV Writer - operator <<", "[test_csv_operator<<]",
    std::vector<std::string>, std::deque<std::string>, std::list<std::string>) {
    std::stringstream output, correct_comma, correct_tab;
 
    // Build correct strings
    correct_comma << "A,B,C" << std::endl << "\"1,1\",2,3" << std::endl;
    correct_tab << "A\tB\tC" << std::endl << "1,1\t2\t3" << std::endl;
 
    // Test input
    auto test_row_1 = TestType({ "A", "B", "C" }),
        test_row_2 = TestType({ "1,1", "2", "3" });
 
    SECTION("CSV Writer") {
        auto csv_writer = make_csv_writer(output);
        csv_writer << test_row_1 << test_row_2;
 
        REQUIRE(output.str() == correct_comma.str());
    }
 
    SECTION("TSV Writer") {
        auto tsv_writer = make_tsv_writer(output);
        tsv_writer << test_row_1 << test_row_2;
 
        REQUIRE(output.str() == correct_tab.str());
    }
}

Writing Tuples and Custom Types

DelimWriter can also serialize tuples and custom types that provide a string conversion.

struct Time {
    std::string hour;
    std::string minute;
 
    operator std::string() const {
        std::string ret = hour;
        ret += ":";
        ret += minute;
        
        return ret;
    }
};
 
#ifndef __clang__
TEST_CASE("CSV Tuple", "[test_csv_tuple]") {
    #ifdef CSV_HAS_CXX17
    Time time = { "5", "30" };
    #else
    std::string time = "5:30";
    #endif
    std::stringstream output, correct_output;
    auto csv_writer = make_csv_writer(output);
 
    csv_writer << std::make_tuple("One", 2, "Three", 4.0, time)
        << std::make_tuple("One", (short)2, "Three", 4.0f, time)
        << std::make_tuple(-1, -2.0)
        << std::make_tuple(20.2, -20.3, -20.123)
        << std::make_tuple(0.0, 0.0f, 0);
 
    correct_output << "One,2,Three,4.0,5:30" << std::endl
        << "One,2,Three,4.0,5:30" << std::endl
        << "-1,-2.0" << std::endl
        << "20.20000,-20.30000,-20.12300" << std::endl
        << "0.0,0.0,0" << std::endl;
 
    REQUIRE(output.str() == correct_output.str());
}
#endif

Using <tt>write_row()</tt>

The write_row() method can be used to write rows with arbitrary fields and mixed types without having to construct a container first.

Through the magic of SFINAE, write_row() also supports any of the operations of operator<<.

TEST_CASE("CSV Writer - write_row() with variadic fields", "[test_csv_write_row_variadic]") {
    std::stringstream output, correct;
    auto writer = make_csv_writer(output);
 
    // Important! Call this or else the default will be used
    set_decimal_places(1);
 
    // Write rows with mixed types using write_row()
    writer.write_row("Name", "Age", "Score");
    writer.write_row("Alice", 30, 95.5);
        // Use values with binary-exact fractional parts for a stable user-facing example.
        writer.write_row("Bob", 25, 87.5);
        writer.write_row("Charlie", 35, 92.5);
 
    // Reset to default
    set_decimal_places(5);
 
    correct << "Name,Age,Score" << std::endl
        << "Alice,30,95.5" << std::endl
            << "Bob,25,87.5" << std::endl
            << "Charlie,35,92.5" << std::endl;
 
    REQUIRE(output.str() == correct.str());
}

Data Reordering Workflow

For read-transform-write pipelines, csv::CSVRow supports conversion to std::vector<std::string>, which makes it straightforward to reorder/select fields before writing.

Typical flow:

Read with CSVReader
Convert row to std::vector<std::string>
Reorder/select fields
Emit with CSVWriter

TEST_CASE("CSV Writer - Reorder Columns", "[test_csv_reorder]") {
    auto rows = "A,B,C\r\n"
        "1,2,3\r\n"
        "4,5,6"_csv;
 
    std::stringstream output, correct;
    auto writer = make_csv_writer(output);
 
    writer << std::vector<std::string>({ "C", "A" });
    for (auto& row : rows) {
        writer << std::vector<std::string>({
            row[csv::string_view("C")].get<std::string>(),
            row[csv::string_view("A")].get<std::string>()
        });
    }
 
    correct << "C,A" << std::endl
        << "3,1" << std::endl
        << "6,4" << std::endl;
 
    REQUIRE(output.str() == correct.str());
}

C++20 Ranges Version

With C++20, you can use std::ranges::views to elegantly reorder fields in a single expression:

#ifdef CSV_HAS_CXX20
#include <ranges>
 
TEST_CASE("CSV Writer - Reorder with Ranges", "[test_csv_reorder_ranges]") {
    auto rows = "A,B,C\r\n"
        "1,2,3\r\n"
        "4,5,6"_csv;
 
    std::stringstream output, correct;
    auto writer = make_csv_writer(output);
 
    // Write header: C, A
    writer << std::vector<std::string>({ "C", "A" });
 
    // Reorder columns using ranges::views::transform with string_view
    for (auto& row : rows) {
        std::vector<std::string_view> field_names = { "C", "A" };
        auto reordered = field_names
            | std::views::transform([&row](std::string_view field) {
                return row[field];
            });
        writer << reordered;
    }
 
    correct << "C,A" << std::endl
        << "3,1" << std::endl
        << "6,4" << std::endl;
 
    REQUIRE(output.str() == correct.str());
}
 
TEST_CASE("CSV Writer - write_rows()", "[test_csv_write_rows]") {
    SECTION("Nested string-like ranges") {
        std::stringstream output, correct;
        auto writer = make_csv_writer(output);
 
        std::vector<std::vector<std::string>> rows = {
            { "A", "B", "C" },
            { "1,1", "2", "3" },
            { "\"quoted\"", "line 1\nline 2", "tail" }
        };
 
        writer.write_rows(rows);
 
        correct << "A,B,C" << std::endl
            << "\"1,1\",2,3" << std::endl
            << "\"\"\"quoted\"\"\",\"line 1\nline 2\",tail" << std::endl;
 
        REQUIRE(output.str() == correct.str());
    }
 
    SECTION("Row-like records exposing to_sv_range()") {
        auto rows =
            "id,name,quote\n"
            "1,Alice,\"Hello, world\"\n"
            "2,Bob,\"Line 1\nLine 2\""_csv;
 
        std::stringstream output, correct;
        auto writer = make_csv_writer(output);
        writer.write_rows(rows);
 
        correct << "1,Alice,\"Hello, world\"" << std::endl
            << "2,Bob,\"Line 1\nLine 2\"" << std::endl;
 
        REQUIRE(output.str() == correct.str());
    }
}
#endif

DataFrame with Sparse Overlay

When working with DataFrames, you can efficiently update specific cells without reconstructing entire rows. The overlay mechanism stores only the changed cells and writes them correctly:

TEST_CASE("DataFrame - Write with Sparse Overlay", "[test_dataframe_sparse_overlay_write]") {
    auto reader = 
        "id,name,age,occupation,react_experience_years,favorite_hook,quote\n"
        "1,Chad Hooks,28,Senior React Engineer,5,useCallback,\"My useCallback has 12 dependencies and I'm scared to remove any\"\n"
        "2,Tailwind Tim,24,Frontend Architect,3,useEffect,\"I fixed the infinite loop by adding another useEffect\"\n"
        "3,Dan Abramov Disciple,31,Principal React Engineer,7,useMemo,\"If it's not memoized it's not React\"\n"
        "6,Class Component Carl,42,Legacy React Dev,12,none,\"Remember when React was fun? Pepperidge Farm remembers.\""_csv;
    
    auto options = DataFrameOptions();
    options.set_key_column("id");
 
    csv::DataFrame<std::string> df(reader, options);
    
    // Make sparse edits to specific cells using the overlay
    df["1"]["age"] = "29";  // Chad Hooks has a birthday
    df["3"]["react_experience_years"] = "8";  // Dan got one more year
    df["6"]["quote"] = "Everything is fine in production";  // Updated quote
    
    // Write the modified DataFrame back
    std::stringstream output;
    auto writer = csv::make_csv_writer(output);
    
    writer << df.columns();
    for (auto& row : df) {
#ifdef CSV_HAS_CXX20
        writer << row;
#else
        writer << std::vector<std::string>(row);
#endif
    }
    
    // Verify the sparse edits are in the output
    std::string result = output.str();
    REQUIRE(result.find("1,Chad Hooks,29,") != std::string::npos);  // age updated
    REQUIRE(result.find("3,Dan Abramov Disciple,31,Principal React Engineer,8,") != std::string::npos);  // experience updated
    REQUIRE(result.find("Everything is fine in production") != std::string::npos);  // quote updated
}

End-to-End Round-Trip Integrity Example

The following test is intentionally write-first then read/verify, but it validates the same data-integrity guarantee as read-transform-write user workflows.

TEST_CASE("Round Trip with Distinct Field Values", "[test_roundtrip_distinct]") {
    // User-facing note:
    // This test is intentionally "inverted" (write first, then read/verify), but it
    // validates the same round-trip guarantee users care about: every field survives
    // serialization and parsing without being shifted into a neighboring column.
    //
    // Using different values per column makes corruption obvious. If column boundaries
    // break, at least one of the assertions below will fail immediately.
    const std::string& filename = distinct_round_trip_filename();
    const size_t expected_rows = 500000;
 
    // Shared validation for both CSVReader implementations:
    // 1) mmap constructor path
    // 2) std::istream constructor path
    // Keeping both paths here prevents regressions that only affect one parser backend.
    auto validate_reader = [&](CSVReader& reader) {
        size_t i = 0;
        for (auto& row : reader) {
            // Verify field count
            REQUIRE(row.size() == 5);
            
            // Verify each field has its expected DISTINCT value
            REQUIRE(row["col_A"].get<size_t>() == i * 5 + 0);
            REQUIRE(row["col_B"].get<size_t>() == i * 5 + 1);
            REQUIRE(row["col_C"].get<size_t>() == i * 5 + 2);
            REQUIRE(row["col_D"].get<size_t>() == i * 5 + 3);
            REQUIRE(row["col_E"].get<size_t>() == i * 5 + 4);
            
            // Verify fields are clean numeric tokens and were not contaminated by
            // delimiter/newline handling bugs.
            for (auto& col : row) {
                auto field_str = col.get_sv();
                REQUIRE(field_str.find('\n') == std::string::npos);
                REQUIRE(field_str.find(',') == std::string::npos);
            }
 
            i++;
        }
        REQUIRE(reader.n_rows() == expected_rows);
    };
 
    SECTION("Memory-mapped file path") {
        CSVReader reader(filename);
        validate_reader(reader);
    }
 
    SECTION("std::ifstream path (issue #281)") {
        // Issue #281 was specific to the stream constructor
        std::ifstream infile(filename, std::ios::binary);
        CSVReader reader(infile, CSVFormat());
        validate_reader(reader);
    }
}