Vince's CSV Parser
Loading...
Searching...
No Matches
csv_writer.hpp
Go to the documentation of this file.
1
5#pragma once
6#include <cmath>
7#include <fstream>
8#include <iostream>
9#include <memory>
10#ifdef CSV_HAS_CXX20
11#include <ranges>
12#endif
13#include <stdexcept>
14#include <string>
15#include <tuple>
16#include <type_traits>
17#include <vector>
18
20#include "common.hpp"
21#include "csv_exceptions.hpp"
22
23namespace csv {
24namespace internals {
25 static int DECIMAL_PLACES = 5;
26
28 template<typename T>
29 CSV_CONST CONSTEXPR_14
30 long double pow10(const T& n) noexcept {
31 static_assert(std::is_integral<T>::value, "pow10 only supports integral exponents");
32
33 long double multiplicand = n > 0 ? 10 : 0.1,
34 ret = 1;
35 T iterations = n > 0 ? n : -n;
36
37 for (T i = 0; i < iterations; i++) {
38 ret *= multiplicand;
39 }
40
41 return ret;
42 }
43
44 template<>
45 CSV_CONST CONSTEXPR_14
46 long double pow10(const unsigned& n) noexcept {
47 long double multiplicand = n > 0 ? 10 : 0.1,
48 ret = 1;
49
50 for (unsigned i = 0; i < n; i++) {
51 ret *= multiplicand;
52 }
53
54 return ret;
55 }
56
60 template<typename T = int>
61 inline T csv_abs(T x) {
62 return abs(x);
63 }
64
65 template<>
66 inline int csv_abs(int x) {
67 return abs(x);
68 }
69
70 template<>
71 inline long int csv_abs(long int x) {
72 return labs(x);
73 }
74
75 template<>
76 inline long long int csv_abs(long long int x) {
77 return llabs(x);
78 }
79
80 template<>
81 inline float csv_abs(float x) {
82 return fabsf(x);
83 }
84
85 template<>
86 inline double csv_abs(double x) {
87 return fabs(x);
88 }
89
90 template<>
91 inline long double csv_abs(long double x) {
92 return fabsl(x);
93 }
94
98 template<
99 typename T,
100 csv::enable_if_t<std::is_arithmetic<T>::value, int> = 0
101 >
102 int num_digits(T x)
103 {
104 x = csv_abs(x);
105 int digits = 0;
106 for (; x >= 1; digits++)
107 x /= 10;
108
109 return (x == 0) ? 1 : digits;
110 }
111
113 template<typename T,
114 csv::enable_if_t<std::is_unsigned<T>::value, int> = 0>
115 inline std::string to_string(T value) {
116 std::string digits_reverse = "";
117 if (value == 0) return "0";
118
119 for (; value > 0; value /= 10)
120 digits_reverse += (char)('0' + (value % 10));
121
122 return std::string(digits_reverse.rbegin(), digits_reverse.rend());
123 }
124
126 template<
127 typename T,
128 csv::enable_if_t<std::is_integral<T>::value && std::is_signed<T>::value, int> = 0
129 >
130 inline std::string to_string(T value) {
131 return (value >= 0) ? to_string((size_t)value)
132 : "-" + to_string((size_t)(value * -1));
133 }
134
136 template<
137 typename T,
138 csv::enable_if_t<std::is_floating_point<T>::value, int> = 0
139 >
140 inline std::string to_string(T value) {
141 std::string result = "";
142
143 long double integral_part;
144 long double fractional_part = csv_abs(std::modf((long double)value, &integral_part));
145
146 const long double scale = pow10(DECIMAL_PLACES);
147 long double rounded_fractional = std::round(fractional_part * scale);
148
149 // Work with the absolute value of the integral part so digit extraction
150 // and carry both work correctly for negative numbers.
151 long double abs_integral = csv_abs(integral_part);
152
153 // Carry rounding overflow from fractional digits into integral digits.
154 if (rounded_fractional >= scale) {
155 abs_integral += 1;
156 rounded_fractional = 0;
157 }
158
159 // Integral part
160 if (value < 0) result = "-";
161
162 if (abs_integral == 0) {
163 result += "0";
164 }
165 else {
166 for (int n_digits = num_digits(abs_integral); n_digits > 0; n_digits --) {
167 int digit = (int)(std::fmod(abs_integral, pow10(n_digits)) / pow10(n_digits - 1));
168 result += (char)('0' + digit);
169 }
170 }
171
172 // Decimal part
173 result += ".";
174
175 if (rounded_fractional > 0) {
176 for (int n_digits = DECIMAL_PLACES; n_digits > 0; n_digits--) {
177 int digit = (int)(std::fmod(rounded_fractional, pow10(n_digits)) / pow10(n_digits - 1));
178 result += (char)('0' + digit);
179 }
180 }
181 else {
182 result += "0";
183 }
184
185 return result;
186 }
187}
188
190inline static void set_decimal_places(int precision) {
191 internals::DECIMAL_PLACES = precision;
192}
193
194namespace internals {
196 template<typename T, typename = void>
197 struct is_iterable : std::false_type {};
198
199 template<typename T>
200 struct is_iterable<T, typename std::enable_if<true>::type> {
201 private:
202 template<typename U>
203 static auto test(int) -> decltype(
204 std::begin(std::declval<const U&>()),
205 std::end(std::declval<const U&>()),
206 std::true_type{}
207 );
208 template<typename>
209 static std::false_type test(...);
210 public:
211 static constexpr bool value = decltype(test<T>(0))::value;
212 };
213
215 template<typename T, typename = void>
216 struct is_tuple : std::false_type {};
217
218 template<typename T>
219 struct is_tuple<T, typename std::enable_if<true>::type> {
220 private:
221 template<typename U>
222 static auto test(int) -> decltype(std::tuple_size<U>::value, std::true_type{});
223 template<typename>
224 static std::false_type test(...);
225 public:
226 static constexpr bool value = decltype(test<T>(0))::value;
227 };
228
229}
230
233
254 template<class OutputStream, char Delim, char Quote>
256 public:
259 DelimWriter(OutputStream& _out, bool _quote_minimal = true)
260 : out(&_out), quote_minimal(_quote_minimal) {}
261
263 template<typename T = OutputStream,
264 csv::enable_if_t<std::is_same<T, std::ofstream>::value, int> = 0>
265 DelimWriter(const std::string& filename, bool _quote_minimal = true)
266 : owned_out(new std::ofstream(filename, std::ios::out)),
267 out(owned_out.get()),
268 quote_minimal(_quote_minimal) {
269 if (!owned_out->is_open())
270 internals::throw_failed_open_for_writing(filename);
271 }
272
273 DelimWriter(const DelimWriter&) = delete;
274 DelimWriter& operator=(const DelimWriter&) = delete;
275
276 DelimWriter(DelimWriter&& other) noexcept
277 : owned_out(std::move(other.owned_out)),
278 out(other.out),
279 quote_minimal(other.quote_minimal),
280 auto_flush_(other.auto_flush_),
281 batch_buffer_(std::move(other.batch_buffer_)) {
282 if (owned_out) {
283 out = owned_out.get();
284 }
285 other.out = nullptr;
286 other.quote_minimal = true;
287 }
288
289 DelimWriter& operator=(DelimWriter&& other) noexcept {
290 if (this == &other) return *this;
291
292 owned_out = std::move(other.owned_out);
293 out = other.out;
294 quote_minimal = other.quote_minimal;
295 auto_flush_ = other.auto_flush_;
296 batch_buffer_ = std::move(other.batch_buffer_);
297
298 if (owned_out) {
299 out = owned_out.get();
300 }
301
302 other.out = nullptr;
303 other.quote_minimal = true;
304 return *this;
305 }
306
308 DelimWriter& set_auto_flush(bool value) & noexcept {
309 this->auto_flush_ = value;
310 return *this;
311 }
312
314 DelimWriter&& set_auto_flush(bool value) && noexcept {
315 this->auto_flush_ = value;
316 return std::move(*this);
317 }
318
320 bool get_auto_flush() const noexcept {
321 return this->auto_flush_;
322 }
323
326 if (out) {
327 flush_batch();
328 out->flush();
329 }
330 }
331
333 template<typename T, size_t N>
334 DelimWriter& operator<<(const T (&record)[N]) {
335 write_range_impl(record);
336 return *this;
337 }
338
340 template<typename T, size_t N>
341 DelimWriter& operator<<(const std::array<T, N>& record) {
342 write_range_impl(record);
343 return *this;
344 }
345
357 template<typename T>
358 auto write_row(T&& record) -> decltype(*this << std::forward<T>(record)) {
359 return *this << std::forward<T>(record);
360 }
361
371 template<typename T, typename U, typename... Rest>
372 DelimWriter& write_row(T&& first, U&& second, Rest&&... rest) {
373 this->write_tuple<0>(std::forward_as_tuple(
374 std::forward<T>(first), std::forward<U>(second), std::forward<Rest>(rest)...));
375 return *this;
376 }
377
378#ifdef CSV_HAS_CXX20
391 template<std::ranges::input_range Rows>
393 DelimWriter& write_rows(Rows&& rows) {
394 for (auto&& row : rows) {
395 append_row_like(row);
396 flush_batch_if_needed();
397 }
398
399 finish_write_call();
400 return *this;
401 }
402
409 template<std::ranges::input_range Range>
410 DelimWriter& operator<<(Range&& container)
411 requires std::ranges::input_range<Range>
412 && std::convertible_to<std::ranges::range_reference_t<Range>, csv::string_view> {
413 write_range_impl(container);
414 return *this;
415 }
416
418 template<typename RowLike>
419 DelimWriter& operator<<(const RowLike& row)
422 append_row_like(row);
423 finish_write_call();
424 return *this;
425 }
426#else
435 template<typename Range>
436 typename std::enable_if<
439 && !std::is_same<Range, std::string>::value
440 && !std::is_same<Range, csv::string_view>::value,
442 >::type operator<<(const Range& record) {
443 write_range_impl(record);
444 return *this;
445 }
446#endif
447
449 template<typename... T>
450 DelimWriter& operator<<(const std::tuple<T...>& record) {
451 this->write_tuple<0, T...>(record);
452 return *this;
453 }
454
456 void flush() {
457 flush_batch();
458 out->flush();
459 }
460
461 private:
467 template<typename Range>
468 inline void append_range_fields(Range&& record) {
469 auto it = std::begin(record);
470 auto end = std::end(record);
471
472 if (it != end) {
473 write_field(*it);
474 ++it;
475 }
476
477 for (; it != end; ++it) {
478 batch_buffer_.push_back(Delim);
479 write_field(*it);
480 }
481 }
482
484 size_t find_first_special_for_writer(csv::string_view in) const {
485 size_t pos = internals::find_next_non_special(in, 0, simd_sentinels_);
486
487 for (; pos < in.size(); ++pos) {
488 char ch = in[pos];
489 if (ch == Quote || ch == Delim || ch == '\r' || ch == '\n')
490 return pos;
491 }
492
493 return in.size();
494 }
495
499 template<typename Range>
500 inline void write_range_impl(const Range& record) {
501 append_range_fields(record);
502
503 end_record();
504 finish_write_call();
505 }
506
507#ifdef CSV_HAS_CXX20
508 template<typename Row>
509 void append_row_like(Row&& row) {
510 IF_CONSTEXPR(internals::csv_string_field_range<Row>) {
511 append_range_fields(std::forward<Row>(row));
512 }
513 else {
514 append_range_fields(row.to_sv_range());
515 }
516
517 end_record();
518 }
519#endif
520
521 template<
522 typename T,
523 csv::enable_if_t<
524 !std::is_convertible<T, std::string>::value
525 && !std::is_convertible<T, csv::string_view>::value
526 , int> = 0
527 >
528 void write_field(T in) {
529 const std::string serialized = internals::to_string(in);
530 write_raw(serialized);
531 }
532
533 template<
534 typename T,
535 csv::enable_if_t<
536 std::is_convertible<T, std::string>::value
537 || std::is_convertible<T, csv::string_view>::value
538 , int> = 0
539 >
540 void write_field(T in) {
541 IF_CONSTEXPR(std::is_convertible<T, csv::string_view>::value) {
542 write_escaped_field(in);
543 }
544 else {
545 const std::string serialized(in);
546 write_escaped_field(serialized);
547 }
548 }
549
550 void write_raw(csv::string_view in) {
551 if (!in.empty())
552 batch_buffer_.append(in.data(), in.size());
553 }
554
555 void write_escaped_field(csv::string_view in) {
556 const size_t first_special = find_first_special_for_writer(in);
557
558 if (first_special == in.size()) {
559 if (!quote_minimal) {
560 batch_buffer_.push_back(Quote);
561 write_raw(in);
562 batch_buffer_.push_back(Quote);
563 } else {
564 write_raw(in);
565 }
566 return;
567 }
568
569 write_quoted_field(in, first_special);
570 }
571
572 void write_quoted_field(csv::string_view in, size_t first_special) {
573 batch_buffer_.push_back(Quote);
574
575 size_t chunk_start = 0;
576 size_t pos = first_special;
577 while (pos < in.size()) {
578 if (in[pos] == Quote) {
579 write_raw(in.substr(chunk_start, pos - chunk_start));
580 batch_buffer_.push_back(Quote);
581 batch_buffer_.push_back(Quote);
582 chunk_start = pos + 1;
583 }
584
585 ++pos;
586 }
587
588 write_raw(in.substr(chunk_start));
589 batch_buffer_.push_back(Quote);
590 }
591
593 template<size_t Index = 0, typename... T>
594 typename std::enable_if<Index < sizeof...(T), void>::type write_tuple(const std::tuple<T...>& record) {
595 write_field(std::get<Index>(record));
596
597 CSV_MSVC_PUSH_DISABLE(4127)
598 IF_CONSTEXPR (Index + 1 < sizeof...(T)) batch_buffer_.push_back(Delim);
599 CSV_MSVC_POP
600
601 this->write_tuple<Index + 1>(record);
602 }
603
605 template<size_t Index = 0, typename... T>
606 typename std::enable_if<Index == sizeof...(T), void>::type write_tuple(const std::tuple<T...>& record) {
607 (void)record;
608 end_record();
609 finish_write_call();
610 }
611
613 void end_record() {
614 batch_buffer_.push_back('\n');
615 }
616
617 void finish_write_call() {
618 if (this->auto_flush_) {
619 flush_batch();
620 out->flush();
621 return;
622 }
623
624 flush_batch_if_needed();
625 }
626
627 void flush_batch() {
628 if (batch_buffer_.empty()) return;
629
630 out->write(batch_buffer_.data(), static_cast<std::streamsize>(batch_buffer_.size()));
631 batch_buffer_.clear();
632 }
633
634 void flush_batch_if_needed() {
635 if (batch_buffer_.size() >= batch_flush_threshold_)
636 flush_batch();
637 }
638
644 std::unique_ptr<OutputStream> owned_out;
645
647 OutputStream* out;
648
649 bool quote_minimal;
650 bool auto_flush_ = true;
651 static constexpr size_t batch_flush_threshold_ = 64 * 1024;
652 std::string batch_buffer_;
653 internals::SentinelVecs simd_sentinels_{Delim, Quote};
654 };
655
663 template<class OutputStream>
664 using CSVWriter = DelimWriter<OutputStream, ',', '"'>;
665
674 template<class OutputStream>
675 using TSVWriter = DelimWriter<OutputStream, '\t', '"'>;
676
678 template<class OutputStream>
679 inline CSVWriter<OutputStream> make_csv_writer(OutputStream& out, bool quote_minimal=true) {
680 return CSVWriter<OutputStream>(out, quote_minimal);
681 }
682
684 template<class OutputStream>
685 inline TSVWriter<OutputStream> make_tsv_writer(OutputStream& out, bool quote_minimal=true) {
686 return TSVWriter<OutputStream>(out, quote_minimal);
687 }
689}
SIMD-accelerated skip for runs of non-special CSV bytes.
Class for writing delimiter separated values files.
DelimWriter & write_row(T &&first, U &&second, Rest &&... rest)
Write a row from a variadic list of mixed-type values.
~DelimWriter()
Destructor will flush remaining data.
DelimWriter & operator<<(const std::array< T, N > &record)
Write a std::array of strings as one delimited row.
DelimWriter && set_auto_flush(bool value) &&noexcept
Configure whether each write operation flushes the underlying stream.
DelimWriter & set_auto_flush(bool value) &noexcept
Configure whether each write operation flushes the underlying stream.
DelimWriter(const std::string &filename, bool _quote_minimal=true)
Construct a DelimWriter that owns an output file stream.
auto write_row(T &&record) -> decltype(*this<< std::forward< T >(record))
Write a row from any single argument accepted by operator<< (std::vector, std::array,...
DelimWriter & write_rows(Rows &&rows)
Write many rows using a shared batch buffer.
DelimWriter & operator<<(const T(&record)[N])
Write a C-style array of strings as one delimited row.
DelimWriter & operator<<(const RowLike &row)
Write a row-like object that exposes to_sv_range().
bool get_auto_flush() const noexcept
Return whether each write operation flushes the underlying stream.
DelimWriter & operator<<(const std::tuple< T... > &record)
Write a C-style array of strings as one delimited row.
void flush()
Flushes the written data.
DelimWriter(OutputStream &_out, bool _quote_minimal=true)
Construct a DelimWriter over the specified output stream.
DelimWriter & operator<<(Range &&container)
Write a range of string-like fields as one delimited row.
A standalone header file containing shared code.
#define IF_CONSTEXPR
Expands to if constexpr in C++17 and if otherwise.
Definition common.hpp:183
Shared exception message templates and throw helpers.
T csv_abs(T x)
Calculate the absolute value of a number.
CSV_CONST CONSTEXPR_14 long double pow10(const T &n) noexcept
Compute 10 to the power of an integral exponent.
int num_digits(T x)
Calculate the number of digits in a number.
std::string to_string(T value)
to_string() for unsigned integers
The all encompassing namespace.
TSVWriter< OutputStream > make_tsv_writer(OutputStream &out, bool quote_minimal=true)
Return a csv::TSVWriter over the output stream.
CSVWriter< OutputStream > make_csv_writer(OutputStream &out, bool quote_minimal=true)
Return a csv::CSVWriter over the output stream.
DelimWriter< OutputStream, ',', '"'> CSVWriter
An alias for csv::DelimWriter for writing standard CSV files.
DelimWriter< OutputStream, '\t', '"'> TSVWriter
Class for writing tab-separated values files.
std::string_view string_view
The string_view class used by this library.
Definition common.hpp:174
SFINAE trait: detects if a type is iterable (has std::begin/end).
SFINAE trait: detects if a type is a std::tuple.