CommonLibVR
Loading...
Searching...
No Matches
csv.h
Go to the documentation of this file.
1#pragma warning(disable: 4083 244 267 458)
2// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
3// License: BSD-3
4//
5// All rights reserved.
6//
7// Redistribution and use in source and binary forms, with or without
8// modification, are permitted provided that the following conditions are met:
9//
10// 1. Redistributions of source code must retain the above copyright notice,
11// this list of conditions and the following disclaimer.
12//
13// 2. Redistributions in binary form must reproduce the above copyright notice,
14// this list of conditions and the following disclaimer in the documentation
15// and/or other materials provided with the distribution.
16//
17// 3. Neither the name of the copyright holder nor the names of its contributors
18// may be used to endorse or promote products derived from this software
19// without specific prior written permission.
20//
21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31// POSSIBILITY OF SUCH DAMAGE.
32
33#ifndef CSV_H
34# define CSV_H
35
36# include <algorithm>
37# include <cstdio>
38# include <cstring>
39# include <exception>
40# include <string>
41# include <utility>
42# include <vector>
43# ifndef CSV_IO_NO_THREAD
44# include <condition_variable>
45# include <mutex>
46# include <thread>
47# endif
48# include <cassert>
49# include <cerrno>
50# include <istream>
51# include <limits>
52# include <memory>
53
54namespace io
55{
57 // LineReader //
59
60 namespace error
61 {
62 struct base : std::exception
63 {
64 virtual void format_error_message() const = 0;
65
66 const char* what() const noexcept override
67 {
70 }
71
72 mutable char error_message_buffer[512];
73 };
74
75 const int max_file_name_length = 255;
76
78 {
80 {
81 std::memset(file_name, 0, sizeof(file_name));
82 }
83
84 void set_file_name(const char* file_name)
85 {
86 if (file_name != nullptr) {
87 // This call to strncpy has parenthesis around it
88 // to silence the GCC -Wstringop-truncation warning
89 (strncpy(this->file_name, file_name, sizeof(this->file_name)));
90 this->file_name[sizeof(this->file_name) - 1] = '\0';
91 } else {
92 this->file_name[0] = '\0';
93 }
94 }
95
97 };
98
100 {
102 {
103 file_line = -1;
104 }
105
107 {
108 this->file_line = file_line;
109 }
110
112 };
113
115 {
117 {
118 errno_value = 0;
119 }
120
122 {
123 this->errno_value = errno_value;
124 }
125
127 };
128
130 base,
133 {
134 void format_error_message() const override
135 {
136 if (errno_value != 0)
137 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
138 "Can not open file \"%s\" because \"%s\".", file_name, std::strerror(errno_value));
139 else
140 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
141 "Can not open file \"%s\".", file_name);
142 }
143 };
144
146 base,
149 {
150 void format_error_message() const override
151 {
152 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
153 "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1.", file_line, file_name);
154 }
155 };
156 }
157
159 {
160 public:
161 virtual int read(char* buffer, int size) = 0;
162 virtual ~ByteSourceBase() {}
163 };
164
165 namespace detail
166 {
168 {
169 public:
170 explicit OwningStdIOByteSourceBase(FILE* file) :
171 file(file)
172 {
173 // Tell the std library that we want to do the buffering ourself.
174 std::setvbuf(file, 0, _IONBF, 0);
175 }
176
177 int read(char* buffer, int size)
178 {
179 return std::fread(buffer, 1, size, file);
180 }
181
183 {
184 std::fclose(file);
185 }
186
187 private:
188 FILE* file;
189 };
190
192 {
193 public:
194 explicit NonOwningIStreamByteSource(std::istream& in) :
195 in(in) {}
196
197 int read(char* buffer, int size)
198 {
199 in.read(buffer, size);
200 return in.gcount();
201 }
202
204
205 private:
206 std::istream& in;
207 };
208
210 {
211 public:
212 NonOwningStringByteSource(const char* str, long long size) :
213 str(str), remaining_byte_count(size) {}
214
215 int read(char* buffer, int desired_byte_count)
216 {
217 int to_copy_byte_count = desired_byte_count;
218 if (remaining_byte_count < to_copy_byte_count)
219 to_copy_byte_count = remaining_byte_count;
220 std::memcpy(buffer, str, to_copy_byte_count);
221 remaining_byte_count -= to_copy_byte_count;
222 str += to_copy_byte_count;
223 return to_copy_byte_count;
224 }
225
227
228 private:
229 const char* str;
230 long long remaining_byte_count;
231 };
232
233# ifndef CSV_IO_NO_THREAD
235 {
236 public:
237 void init(std::unique_ptr<ByteSourceBase> arg_byte_source)
238 {
239 std::unique_lock<std::mutex> guard(lock);
240 byte_source = std::move(arg_byte_source);
241 desired_byte_count = -1;
242 termination_requested = false;
243 worker = std::thread(
244 [&] {
245 std::unique_lock<std::mutex> guard(lock);
246 try {
247 for (;;) {
248 read_requested_condition.wait(
249 guard,
250 [&] {
251 return desired_byte_count != -1 || termination_requested;
252 });
253 if (termination_requested)
254 return;
255
256 read_byte_count = byte_source->read(buffer, desired_byte_count);
257 desired_byte_count = -1;
258 if (read_byte_count == 0)
259 break;
260 read_finished_condition.notify_one();
261 }
262 } catch (...) {
263 read_error = std::current_exception();
264 }
265 read_finished_condition.notify_one();
266 });
267 }
268
269 bool is_valid() const
270 {
271 return byte_source != nullptr;
272 }
273
274 void start_read(char* arg_buffer, int arg_desired_byte_count)
275 {
276 std::unique_lock<std::mutex> guard(lock);
277 buffer = arg_buffer;
278 desired_byte_count = arg_desired_byte_count;
279 read_byte_count = -1;
280 read_requested_condition.notify_one();
281 }
282
284 {
285 std::unique_lock<std::mutex> guard(lock);
286 read_finished_condition.wait(
287 guard,
288 [&] {
289 return read_byte_count != -1 || read_error;
290 });
291 if (read_error)
292 std::rethrow_exception(read_error);
293 else
294 return read_byte_count;
295 }
296
298 {
299 if (byte_source != nullptr) {
300 {
301 std::unique_lock<std::mutex> guard(lock);
302 termination_requested = true;
303 }
304 read_requested_condition.notify_one();
305 worker.join();
306 }
307 }
308
309 private:
310 std::unique_ptr<ByteSourceBase> byte_source;
311
312 std::thread worker;
313
314 bool termination_requested;
315 std::exception_ptr read_error;
316 char* buffer;
317 int desired_byte_count;
318 int read_byte_count;
319
320 std::mutex lock;
321 std::condition_variable read_finished_condition;
322 std::condition_variable read_requested_condition;
323 };
324# endif
325
327 {
328 public:
329 void init(std::unique_ptr<ByteSourceBase> arg_byte_source)
330 {
331 byte_source = std::move(arg_byte_source);
332 }
333
334 bool is_valid() const
335 {
336 return byte_source != nullptr;
337 }
338
339 void start_read(char* arg_buffer, int arg_desired_byte_count)
340 {
341 buffer = arg_buffer;
342 desired_byte_count = arg_desired_byte_count;
343 }
344
346 {
347 return byte_source->read(buffer, desired_byte_count);
348 }
349
350 private:
351 std::unique_ptr<ByteSourceBase> byte_source;
352 char* buffer;
353 int desired_byte_count;
354 };
355 }
356
358 {
359 private:
360 static const int block_len = 1 << 20;
361 std::unique_ptr<char[]> buffer; // must be constructed before (and thus destructed after) the reader!
362# ifdef CSV_IO_NO_THREAD
364# else
366# endif
367 int data_begin;
368 int data_end;
369
370 char file_name[error::max_file_name_length + 1];
371 unsigned file_line;
372
373 static std::unique_ptr<ByteSourceBase> open_file(const char* file_name)
374 {
375 // We open the file in binary mode as it makes no difference under *nix
376 // and under Windows we handle \r\n newlines ourself.
377 FILE* file = std::fopen(file_name, "rb");
378 if (file == 0) {
379 int x = errno; // store errno as soon as possible, doing it after constructor call can fail.
381 err.set_errno(x);
382 err.set_file_name(file_name);
383 throw err;
384 }
385 return std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file));
386 }
387
388 void init(std::unique_ptr<ByteSourceBase> byte_source)
389 {
390 file_line = 0;
391
392 buffer = std::unique_ptr<char[]>(new char[3 * block_len]);
393 data_begin = 0;
394 data_end = byte_source->read(buffer.get(), 2 * block_len);
395
396 // Ignore UTF-8 BOM
397 if (data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
398 data_begin = 3;
399
400 if (data_end == 2 * block_len) {
401 reader.init(std::move(byte_source));
402 reader.start_read(buffer.get() + 2 * block_len, block_len);
403 }
404 }
405
406 public:
407 LineReader() = delete;
408 LineReader(const LineReader&) = delete;
409 LineReader& operator=(const LineReader&) = delete;
410
411 explicit LineReader(const char* file_name)
412 {
413 set_file_name(file_name);
414 init(open_file(file_name));
415 }
416
417 explicit LineReader(const std::string& file_name)
418 {
419 set_file_name(file_name.c_str());
420 init(open_file(file_name.c_str()));
421 }
422
423 LineReader(const char* file_name, std::unique_ptr<ByteSourceBase> byte_source)
424 {
425 set_file_name(file_name);
426 init(std::move(byte_source));
427 }
428
429 LineReader(const std::string& file_name, std::unique_ptr<ByteSourceBase> byte_source)
430 {
431 set_file_name(file_name.c_str());
432 init(std::move(byte_source));
433 }
434
435 LineReader(const char* file_name, const char* data_begin, const char* data_end)
436 {
437 set_file_name(file_name);
438 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end - data_begin)));
439 }
440
441 LineReader(const std::string& file_name, const char* data_begin, const char* data_end)
442 {
443 set_file_name(file_name.c_str());
444 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end - data_begin)));
445 }
446
447 LineReader(const char* file_name, FILE* file)
448 {
449 set_file_name(file_name);
450 init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
451 }
452
453 LineReader(const std::string& file_name, FILE* file)
454 {
455 set_file_name(file_name.c_str());
456 init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
457 }
458
459 LineReader(const char* file_name, std::istream& in)
460 {
461 set_file_name(file_name);
462 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
463 }
464
465 LineReader(const std::string& file_name, std::istream& in)
466 {
467 set_file_name(file_name.c_str());
468 init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
469 }
470
471 void set_file_name(const std::string& file_name)
472 {
473 set_file_name(file_name.c_str());
474 }
475
476 void set_file_name(const char* file_name)
477 {
478 if (file_name != nullptr) {
479 strncpy(this->file_name, file_name, sizeof(this->file_name));
480 this->file_name[sizeof(this->file_name) - 1] = '\0';
481 } else {
482 this->file_name[0] = '\0';
483 }
484 }
485
486 const char* get_truncated_file_name() const
487 {
488 return file_name;
489 }
490
491 void set_file_line(unsigned file_line)
492 {
493 this->file_line = file_line;
494 }
495
496 unsigned get_file_line() const
497 {
498 return file_line;
499 }
500
501 char* next_line()
502 {
503 if (data_begin == data_end)
504 return nullptr;
505
506 ++file_line;
507
508 assert(data_begin < data_end);
509 assert(data_end <= block_len * 2);
510
511 if (data_begin >= block_len) {
512 std::memcpy(buffer.get(), buffer.get() + block_len, block_len);
513 data_begin -= block_len;
514 data_end -= block_len;
515 if (reader.is_valid()) {
516 data_end += reader.finish_read();
517 std::memcpy(buffer.get() + block_len, buffer.get() + 2 * block_len, block_len);
518 reader.start_read(buffer.get() + 2 * block_len, block_len);
519 }
520 }
521
522 int line_end = data_begin;
523 while (line_end != data_end && buffer[line_end] != '\n') {
524 ++line_end;
525 }
526
527 if (line_end - data_begin + 1 > block_len) {
529 err.set_file_name(file_name);
530 err.set_file_line(file_line);
531 throw err;
532 }
533
534 if (line_end != data_end && buffer[line_end] == '\n') {
535 buffer[line_end] = '\0';
536 } else {
537 // some files are missing the newline at the end of the
538 // last line
539 ++data_end;
540 buffer[line_end] = '\0';
541 }
542
543 // handle windows \r\n-line breaks
544 if (line_end != data_begin && buffer[line_end - 1] == '\r')
545 buffer[line_end - 1] = '\0';
546
547 char* ret = buffer.get() + data_begin;
548 data_begin = line_end + 1;
549 return ret;
550 }
551 };
552
554 // CSV //
556
557 namespace error
558 {
561 {
563 {
564 std::memset(column_name, 0, max_column_name_length + 1);
565 }
566
567 void set_column_name(const char* column_name)
568 {
569 if (column_name != nullptr) {
570 std::strncpy(this->column_name, column_name, max_column_name_length);
571 this->column_name[max_column_name_length] = '\0';
572 } else {
573 this->column_name[0] = '\0';
574 }
575 }
576
578 };
579
581
583 {
585 {
586 std::memset(column_content, 0, max_column_content_length + 1);
587 }
588
590 {
591 if (column_content != nullptr) {
592 std::strncpy(this->column_content, column_content, max_column_content_length);
593 this->column_content[max_column_content_length] = '\0';
594 } else {
595 this->column_content[0] = '\0';
596 }
597 }
598
600 };
601
603 base,
606 {
607 void format_error_message() const override
608 {
609 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
610 R"(Extra column "%s" in header of file "%s".)", column_name, file_name);
611 }
612 };
613
615 base,
618 {
619 void format_error_message() const override
620 {
621 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
622 R"(Missing column "%s" in header of file "%s".)", column_name, file_name);
623 }
624 };
625
627 base,
630 {
631 void format_error_message() const override
632 {
633 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
634 R"(Duplicated column "%s" in header of file "%s".)", column_name, file_name);
635 }
636 };
637
639 base,
641 {
642 void format_error_message() const override
643 {
644 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
645 "Header missing in file \"%s\".", file_name);
646 }
647 };
648
650 base,
653 {
654 void format_error_message() const override
655 {
656 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
657 "Too few columns in line %d in file \"%s\".", file_line, file_name);
658 }
659 };
660
662 base,
665 {
666 void format_error_message() const override
667 {
668 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
669 "Too many columns in line %d in file \"%s\".", file_line, file_name);
670 }
671 };
672
674 base,
677 {
678 void format_error_message() const override
679 {
680 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
681 "Escaped string was not closed in line %d in file \"%s\".", file_line, file_name);
682 }
683 };
684
686 base,
691 {
692 void format_error_message() const override
693 {
694 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
695 R"(The integer "%s" must be positive or 0 in column "%s" in file "%s" in line "%d".)", column_content, column_name, file_name, file_line);
696 }
697 };
698
699 struct no_digit :
700 base,
705 {
706 void format_error_message() const override
707 {
708 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
709 R"(The integer "%s" contains an invalid digit in column "%s" in file "%s" in line "%d".)", column_content, column_name, file_name, file_line);
710 }
711 };
712
714 base,
719 {
720 void format_error_message() const override
721 {
722 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
723 R"(The integer "%s" overflows in column "%s" in file "%s" in line "%d".)", column_content, column_name, file_name, file_line);
724 }
725 };
726
728 base,
733 {
734 void format_error_message() const override
735 {
736 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
737 R"(The integer "%s" underflows in column "%s" in file "%s" in line "%d".)", column_content, column_name, file_name, file_line);
738 }
739 };
740
742 base,
747 {
748 void format_error_message() const override
749 {
750 std::snprintf(error_message_buffer, sizeof(error_message_buffer),
751 R"(The content "%s" of column "%s" in file "%s" in line "%d" is not a single character.)", column_content, column_name, file_name, file_line);
752 }
753 };
754 }
755
756 using ignore_column = unsigned int;
760
761 template <char... trim_char_list>
763 {
764 private:
765 constexpr static bool is_trim_char(char)
766 {
767 return false;
768 }
769
770 template <class... OtherTrimChars>
771 constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars... other_trim_chars)
772 {
773 return c == trim_char || is_trim_char(c, other_trim_chars...);
774 }
775
776 public:
777 static void trim(char*& str_begin, char*& str_end)
778 {
779 while (str_begin != str_end && is_trim_char(*str_begin, trim_char_list...))
780 ++str_begin;
781 while (str_begin != str_end && is_trim_char(*(str_end - 1), trim_char_list...))
782 --str_end;
783 *str_end = '\0';
784 }
785 };
786
788 {
789 static bool is_comment(const char*)
790 {
791 return false;
792 }
793 };
794
795 template <char... comment_start_char_list>
797 {
798 private:
799 constexpr static bool is_comment_start_char(char)
800 {
801 return false;
802 }
803
804 template <class... OtherCommentStartChars>
805 constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars... other_comment_start_chars)
806 {
807 return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...);
808 }
809
810 public:
811 static bool is_comment(const char* line)
812 {
813 return is_comment_start_char(*line, comment_start_char_list...);
814 }
815 };
816
818 {
819 static bool is_comment(const char* line)
820 {
821 if (*line == '\0')
822 return true;
823 while (*line == ' ' || *line == '\t') {
824 ++line;
825 if (*line == 0)
826 return true;
827 }
828 return false;
829 }
830 };
831
832 template <char... comment_start_char_list>
834 {
835 static bool is_comment(const char* line)
836 {
838 }
839 };
840
841 template <char sep>
843 {
844 static const char* find_next_column_end(const char* col_begin)
845 {
846 while (*col_begin != sep && *col_begin != '\0')
847 ++col_begin;
848 return col_begin;
849 }
850
851 static void unescape(char*&, char*&)
852 {
853 }
854 };
855
856 template <char sep, char quote>
858 {
859 static const char* find_next_column_end(const char* col_begin)
860 {
861 while (*col_begin != sep && *col_begin != '\0')
862 if (*col_begin != quote)
863 ++col_begin;
864 else {
865 do {
866 ++col_begin;
867 while (*col_begin != quote) {
868 if (*col_begin == '\0')
870 ++col_begin;
871 }
872 ++col_begin;
873 } while (*col_begin == quote);
874 }
875 return col_begin;
876 }
877
878 static void unescape(char*& col_begin, char*& col_end)
879 {
880 if (col_end - col_begin >= 2) {
881 if (*col_begin == quote && *(col_end - 1) == quote) {
882 ++col_begin;
883 --col_end;
884 char* out = col_begin;
885 for (char* in = col_begin; in != col_end; ++in) {
886 if (*in == quote && (in + 1) != col_end && *(in + 1) == quote) {
887 ++in;
888 }
889 *out = *in;
890 ++out;
891 }
892 col_end = out;
893 *col_end = '\0';
894 }
895 }
896 }
897 };
898
900 {
901 template <class T>
902 static void on_overflow(T&)
903 {
905 }
906
907 template <class T>
908 static void on_underflow(T&)
909 {
911 }
912 };
913
915 {
916 template <class T>
917 static void on_overflow(T&)
918 {}
919
920 template <class T>
921 static void on_underflow(T&)
922 {}
923 };
924
926 {
927 template <class T>
928 static void on_overflow(T& x)
929 {
930 // using (std::numeric_limits<T>::max) instead of std::numeric_limits<T>::max
931 // to make code including windows.h with its max macro happy
932 x = (std::numeric_limits<T>::max)();
933 }
934
935 template <class T>
936 static void on_underflow(T& x)
937 {
938 x = (std::numeric_limits<T>::min)();
939 }
940 };
941
942 namespace detail
943 {
944 template <class quote_policy>
946 char*& line, char*& col_begin, char*& col_end)
947 {
948 assert(line != nullptr);
949
950 col_begin = line;
951 // the col_begin + (... - col_begin) removes the constness
952 col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);
953
954 if (*col_end == '\0') {
955 line = nullptr;
956 } else {
957 *col_end = '\0';
958 line = col_end + 1;
959 }
960 }
961
962 template <class trim_policy, class quote_policy>
964 char* line,
965 char** sorted_col,
966 const std::vector<int>& col_order)
967 {
968 for (int i : col_order) {
969 if (line == nullptr)
970 throw ::io::error::too_few_columns();
971 char *col_begin, *col_end;
973
974 if (i != -1) {
975 trim_policy::trim(col_begin, col_end);
976 quote_policy::unescape(col_begin, col_end);
977
979 }
980 }
981 if (line != nullptr)
982 throw ::io::error::too_many_columns();
983 }
984
985 template <unsigned column_count, class trim_policy, class quote_policy>
987 char* line,
988 std::vector<int>& col_order,
989 const std::string* col_name,
991 {
992 col_order.clear();
993
994 bool found[column_count];
995 std::fill(found, found + column_count, false);
996 while (line) {
997 char *col_begin, *col_end;
999
1000 trim_policy::trim(col_begin, col_end);
1001 quote_policy::unescape(col_begin, col_end);
1002
1003 for (unsigned i = 0; i < column_count; ++i)
1004 if (col_begin == col_name[i]) {
1005 if (found[i]) {
1008 throw err;
1009 }
1010 found[i] = true;
1011 col_order.push_back(i);
1012 col_begin = 0;
1013 break;
1014 }
1015 if (col_begin) {
1017 col_order.push_back(-1);
1018 else {
1021 throw err;
1022 }
1023 }
1024 }
1026 for (unsigned i = 0; i < column_count; ++i) {
1027 if (!found[i]) {
1029 err.set_column_name(col_name[i].c_str());
1030 throw err;
1031 }
1032 }
1033 }
1034 }
1035
1036 template <class overflow_policy>
1037 void parse(char* col, char& x)
1038 {
1039 if (!*col)
1041 x = *col;
1042 ++col;
1043 if (*col)
1045 }
1046
1047 template <class overflow_policy>
1048 void parse(char* col, std::string& x)
1049 {
1050 x = col;
1051 }
1052
1053 template <class overflow_policy>
1054 void parse(char* col, const char*& x)
1055 {
1056 x = col;
1057 }
1058
1059 template <class overflow_policy>
1060 void parse(char* col, char*& x)
1061 {
1062 x = col;
1063 }
1064
1065 template <class overflow_policy, class T>
1066 void parse_unsigned_integer(const char* col, T& x)
1067 {
1068 x = 0;
1069 while (*col != '\0') {
1070 if ('0' <= *col && *col <= '9') {
1071 T y = *col - '0';
1072 if (x > ((std::numeric_limits<T>::max)() - y) / 10) {
1073 overflow_policy::on_overflow(x);
1074 return;
1075 }
1076 x = 10 * x + y;
1077 } else
1078 throw error::no_digit();
1079 ++col;
1080 }
1081 }
1082
1083 template <class overflow_policy>
1084 void parse(char* col, unsigned char& x)
1085 {
1087 }
1088 template <class overflow_policy>
1089 void parse(char* col, unsigned short& x)
1090 {
1092 }
1093 template <class overflow_policy>
1094 void parse(char* col, unsigned int& x)
1095 {
1097 }
1098 template <class overflow_policy>
1099 void parse(char* col, unsigned long& x)
1100 {
1102 }
1103 template <class overflow_policy>
1104 void parse(char* col, unsigned long long& x)
1105 {
1107 }
1108
1109 template <class overflow_policy, class T>
1110 void parse_signed_integer(const char* col, T& x)
1111 {
1112 if (*col == '-') {
1113 ++col;
1114
1115 x = 0;
1116 while (*col != '\0') {
1117 if ('0' <= *col && *col <= '9') {
1118 T y = *col - '0';
1119 if (x < ((std::numeric_limits<T>::min)() + y) / 10) {
1120 overflow_policy::on_underflow(x);
1121 return;
1122 }
1123 x = 10 * x - y;
1124 } else
1125 throw error::no_digit();
1126 ++col;
1127 }
1128 return;
1129 } else if (*col == '+')
1130 ++col;
1132 }
1133
1134 template <class overflow_policy>
1135 void parse(char* col, signed char& x)
1136 {
1138 }
1139 template <class overflow_policy>
1140 void parse(char* col, signed short& x)
1141 {
1143 }
1144 template <class overflow_policy>
1145 void parse(char* col, signed int& x)
1146 {
1148 }
1149 template <class overflow_policy>
1150 void parse(char* col, signed long& x)
1151 {
1153 }
1154 template <class overflow_policy>
1155 void parse(char* col, signed long long& x)
1156 {
1158 }
1159
1160 template <class T>
1161 void parse_float(const char* col, T& x)
1162 {
1163 bool is_neg = false;
1164 if (*col == '-') {
1165 is_neg = true;
1166 ++col;
1167 } else if (*col == '+')
1168 ++col;
1169
1170 x = 0;
1171 while ('0' <= *col && *col <= '9') {
1172 int y = *col - '0';
1173 x *= 10;
1174 x += y;
1175 ++col;
1176 }
1177
1178 if (*col == '.' || *col == ',') {
1179 ++col;
1180 T pos = 1;
1181 while ('0' <= *col && *col <= '9') {
1182 pos /= 10;
1183 int y = *col - '0';
1184 ++col;
1185 x += y * pos;
1186 }
1187 }
1188
1189 if (*col == 'e' || *col == 'E') {
1190 ++col;
1191 int e;
1192
1194
1195 if (e != 0) {
1196 T base;
1197 if (e < 0) {
1198 base = T(0.1);
1199 e = -e;
1200 } else {
1201 base = T(10);
1202 }
1203
1204 while (e != 1) {
1205 if ((e & 1) == 0) {
1206 base = base * base;
1207 e >>= 1;
1208 } else {
1209 x *= base;
1210 --e;
1211 }
1212 }
1213 x *= base;
1214 }
1215 } else {
1216 if (*col != '\0')
1217 throw error::no_digit();
1218 }
1219
1220 if (is_neg)
1221 x = -x;
1222 }
1223
1224 template <class overflow_policy>
1225 void parse(char* col, float& x)
1226 {
1227 parse_float(col, x);
1228 }
1229 template <class overflow_policy>
1230 void parse(char* col, double& x)
1231 {
1232 parse_float(col, x);
1233 }
1234 template <class overflow_policy>
1235 void parse(char* col, long double& x)
1236 {
1237 parse_float(col, x);
1238 }
1239
1240 template <class overflow_policy, class T>
1241 void parse(char* col, T& x)
1242 {
1243 // Mute unused variable compiler warning
1244 (void)col;
1245 (void)x;
1246 // GCC evalutes "false" when reading the template and
1247 // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
1248 // this strange construct is used.
1249 static_assert(sizeof(T) != sizeof(T),
1250 "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported");
1251 }
1252
1253 }
1254
1255 template <unsigned column_count,
1256 class trim_policy = trim_chars<' ', '\t'>,
1257 class quote_policy = no_quote_escape<','>,
1258 class overflow_policy = throw_on_overflow,
1259 class comment_policy = no_comment>
1261 {
1262 private:
1263 LineReader in;
1264
1265 char* row[column_count];
1266 std::string column_names[column_count];
1267
1268 std::vector<int> col_order;
1269
1270 template <class... ColNames>
1271 void set_column_names(std::string s, ColNames... cols)
1272 {
1273 column_names[column_count - sizeof...(ColNames) - 1] = std::move(s);
1274 set_column_names(std::forward<ColNames>(cols)...);
1275 }
1276
1277 void set_column_names() {}
1278
1279 public:
1280 CSVReader() = delete;
1281 CSVReader(const CSVReader&) = delete;
1283
1284 template <class... Args>
1285 explicit CSVReader(Args&&... args) :
1286 in(std::forward<Args>(args)...)
1287 {
1288 std::fill(row, row + column_count, nullptr);
1289 col_order.resize(column_count);
1290 for (unsigned i = 0; i < column_count; ++i)
1291 col_order[i] = i;
1292 for (unsigned i = 1; i <= column_count; ++i)
1293 column_names[i - 1] = "col" + std::to_string(i);
1294 }
1295
1297 {
1298 return in.next_line();
1299 }
1300
1301 template <class... ColNames>
1302 void read_header(ignore_column ignore_policy, ColNames... cols)
1303 {
1304 static_assert(sizeof...(ColNames) >= column_count, "not enough column names specified");
1305 static_assert(sizeof...(ColNames) <= column_count, "too many column names specified");
1306 try {
1307 set_column_names(std::forward<ColNames>(cols)...);
1308
1309 char* line;
1310 do {
1311 line = in.next_line();
1312 if (!line)
1313 throw error::header_missing();
1314 } while (comment_policy::is_comment(line));
1315
1316 detail::parse_header_line<column_count, trim_policy, quote_policy>(line, col_order, column_names, ignore_policy);
1317 } catch (error::with_file_name& err) {
1319 throw;
1320 }
1321 }
1322
1323 template <class... ColNames>
1324 void set_header(ColNames... cols)
1325 {
1326 static_assert(sizeof...(ColNames) >= column_count,
1327 "not enough column names specified");
1328 static_assert(sizeof...(ColNames) <= column_count,
1329 "too many column names specified");
1330 set_column_names(std::forward<ColNames>(cols)...);
1331 std::fill(row, row + column_count, nullptr);
1332 col_order.resize(column_count);
1333 for (unsigned i = 0; i < column_count; ++i)
1334 col_order[i] = i;
1335 }
1336
1337 bool has_column(const std::string& name) const
1338 {
1339 return col_order.end() != std::find(
1340 col_order.begin(), col_order.end(),
1341 std::find(std::begin(column_names), std::end(column_names), name) - std::begin(column_names));
1342 }
1343
1344 void set_file_name(const std::string& file_name)
1345 {
1346 in.set_file_name(file_name);
1347 }
1348
1349 void set_file_name(const char* file_name)
1350 {
1351 in.set_file_name(file_name);
1352 }
1353
1354 const char* get_truncated_file_name() const
1355 {
1356 return in.get_truncated_file_name();
1357 }
1358
1359 void set_file_line(unsigned file_line)
1360 {
1361 in.set_file_line(file_line);
1362 }
1363
1364 unsigned get_file_line() const
1365 {
1366 return in.get_file_line();
1367 }
1368
1369 private:
1370 void parse_helper(std::size_t) {}
1371
1372 template <class T, class... ColType>
1373 void parse_helper(std::size_t r, T& t, ColType&... cols)
1374 {
1375 if (row[r]) {
1376 try {
1377 try {
1379 } catch (error::with_column_content& err) {
1380 err.set_column_content(row[r]);
1381 throw;
1382 }
1383 } catch (error::with_column_name& err) {
1384 err.set_column_name(column_names[r].c_str());
1385 throw;
1386 }
1387 }
1388 parse_helper(r + 1, cols...);
1389 }
1390
1391 public:
1392 template <class... ColType>
1393 bool read_row(ColType&... cols)
1394 {
1395 static_assert(sizeof...(ColType) >= column_count,
1396 "not enough columns specified");
1397 static_assert(sizeof...(ColType) <= column_count,
1398 "too many columns specified");
1399 try {
1400 try {
1401 char* line;
1402 do {
1403 line = in.next_line();
1404 if (!line)
1405 return false;
1406 } while (comment_policy::is_comment(line));
1407
1409
1410 parse_helper(0, cols...);
1411 } catch (error::with_file_name& err) {
1413 throw;
1414 }
1415 } catch (error::with_file_line& err) {
1416 err.set_file_line(in.get_file_line());
1417 throw;
1418 }
1419
1420 return true;
1421 }
1422 };
1423}
1424#endif
Definition csv.h:159
virtual int read(char *buffer, int size)=0
virtual ~ByteSourceBase()
Definition csv.h:162
Definition csv.h:1261
CSVReader(const CSVReader &)=delete
unsigned get_file_line() const
Definition csv.h:1364
void set_file_line(unsigned file_line)
Definition csv.h:1359
CSVReader & operator=(const CSVReader &)
void set_file_name(const std::string &file_name)
Definition csv.h:1344
const char * get_truncated_file_name() const
Definition csv.h:1354
bool read_row(ColType &... cols)
Definition csv.h:1393
void set_file_name(const char *file_name)
Definition csv.h:1349
char * next_line()
Definition csv.h:1296
void set_header(ColNames... cols)
Definition csv.h:1324
bool has_column(const std::string &name) const
Definition csv.h:1337
CSVReader()=delete
void read_header(ignore_column ignore_policy, ColNames... cols)
Definition csv.h:1302
CSVReader(Args &&... args)
Definition csv.h:1285
Definition csv.h:358
char * next_line()
Definition csv.h:501
LineReader(const std::string &file_name, const char *data_begin, const char *data_end)
Definition csv.h:441
void set_file_name(const std::string &file_name)
Definition csv.h:471
LineReader & operator=(const LineReader &)=delete
LineReader(const char *file_name, std::unique_ptr< ByteSourceBase > byte_source)
Definition csv.h:423
LineReader(const char *file_name, std::istream &in)
Definition csv.h:459
LineReader(const std::string &file_name, std::istream &in)
Definition csv.h:465
unsigned get_file_line() const
Definition csv.h:496
void set_file_line(unsigned file_line)
Definition csv.h:491
LineReader(const std::string &file_name, std::unique_ptr< ByteSourceBase > byte_source)
Definition csv.h:429
LineReader(const char *file_name)
Definition csv.h:411
void set_file_name(const char *file_name)
Definition csv.h:476
LineReader(const LineReader &)=delete
const char * get_truncated_file_name() const
Definition csv.h:486
LineReader(const std::string &file_name, FILE *file)
Definition csv.h:453
LineReader(const std::string &file_name)
Definition csv.h:417
LineReader()=delete
LineReader(const char *file_name, FILE *file)
Definition csv.h:447
LineReader(const char *file_name, const char *data_begin, const char *data_end)
Definition csv.h:435
Definition csv.h:235
~AsynchronousReader()
Definition csv.h:297
void init(std::unique_ptr< ByteSourceBase > arg_byte_source)
Definition csv.h:237
int finish_read()
Definition csv.h:283
void start_read(char *arg_buffer, int arg_desired_byte_count)
Definition csv.h:274
bool is_valid() const
Definition csv.h:269
~NonOwningIStreamByteSource()
Definition csv.h:203
NonOwningIStreamByteSource(std::istream &in)
Definition csv.h:194
int read(char *buffer, int size)
Definition csv.h:197
NonOwningStringByteSource(const char *str, long long size)
Definition csv.h:212
~NonOwningStringByteSource()
Definition csv.h:226
int read(char *buffer, int desired_byte_count)
Definition csv.h:215
OwningStdIOByteSourceBase(FILE *file)
Definition csv.h:170
int read(char *buffer, int size)
Definition csv.h:177
~OwningStdIOByteSourceBase()
Definition csv.h:182
Definition csv.h:327
int finish_read()
Definition csv.h:345
void start_read(char *arg_buffer, int arg_desired_byte_count)
Definition csv.h:339
bool is_valid() const
Definition csv.h:334
void init(std::unique_ptr< ByteSourceBase > arg_byte_source)
Definition csv.h:329
void parse(char *col, char &x)
Definition csv.h:1037
void parse_header_line(char *line, std::vector< int > &col_order, const std::string *col_name, ignore_column ignore_policy)
Definition csv.h:986
void parse_float(const char *col, T &x)
Definition csv.h:1161
void chop_next_column(char *&line, char *&col_begin, char *&col_end)
Definition csv.h:945
void parse_unsigned_integer(const char *col, T &x)
Definition csv.h:1066
void parse_signed_integer(const char *col, T &x)
Definition csv.h:1110
void parse_line(char *line, char **sorted_col, const std::vector< int > &col_order)
Definition csv.h:963
const int max_column_content_length
Definition csv.h:580
const int max_file_name_length
Definition csv.h:75
const int max_column_name_length
Definition csv.h:559
Definition csv.h:55
static const ignore_column ignore_no_column
Definition csv.h:757
static const ignore_column ignore_extra_column
Definition csv.h:758
unsigned int ignore_column
Definition csv.h:756
static const ignore_column ignore_missing_column
Definition csv.h:759
Definition EffectArchetypes.h:65
std::string to_string(RE::EffectArchetype a_archetype)
Definition EffectArchetypes.h:66
Definition csv.h:858
static void unescape(char *&col_begin, char *&col_end)
Definition csv.h:878
static const char * find_next_column_end(const char *col_begin)
Definition csv.h:859
Definition csv.h:818
static bool is_comment(const char *line)
Definition csv.h:819
Definition csv.h:63
char error_message_buffer[512]
Definition csv.h:72
const char * what() const noexcept override
Definition csv.h:66
virtual void format_error_message() const =0
Definition csv.h:133
void format_error_message() const override
Definition csv.h:134
void format_error_message() const override
Definition csv.h:631
void format_error_message() const override
Definition csv.h:678
void format_error_message() const override
Definition csv.h:607
Definition csv.h:641
void format_error_message() const override
Definition csv.h:642
void format_error_message() const override
Definition csv.h:692
Definition csv.h:719
void format_error_message() const override
Definition csv.h:720
Definition csv.h:733
void format_error_message() const override
Definition csv.h:734
void format_error_message() const override
Definition csv.h:748
void format_error_message() const override
Definition csv.h:150
void format_error_message() const override
Definition csv.h:619
Definition csv.h:705
void format_error_message() const override
Definition csv.h:706
Definition csv.h:653
void format_error_message() const override
Definition csv.h:654
Definition csv.h:665
void format_error_message() const override
Definition csv.h:666
Definition csv.h:583
char column_content[max_column_content_length+1]
Definition csv.h:599
with_column_content()
Definition csv.h:584
void set_column_content(const char *column_content)
Definition csv.h:589
Definition csv.h:561
with_column_name()
Definition csv.h:562
void set_column_name(const char *column_name)
Definition csv.h:567
char column_name[max_column_name_length+1]
Definition csv.h:577
Definition csv.h:115
with_errno()
Definition csv.h:116
void set_errno(int errno_value)
Definition csv.h:121
int errno_value
Definition csv.h:126
Definition csv.h:100
int file_line
Definition csv.h:111
with_file_line()
Definition csv.h:101
void set_file_line(int file_line)
Definition csv.h:106
Definition csv.h:78
char file_name[max_file_name_length+1]
Definition csv.h:96
void set_file_name(const char *file_name)
Definition csv.h:84
with_file_name()
Definition csv.h:79
Definition csv.h:915
static void on_underflow(T &)
Definition csv.h:921
static void on_overflow(T &)
Definition csv.h:917
Definition csv.h:788
static bool is_comment(const char *)
Definition csv.h:789
Definition csv.h:843
static const char * find_next_column_end(const char *col_begin)
Definition csv.h:844
static void unescape(char *&, char *&)
Definition csv.h:851
Definition csv.h:926
static void on_overflow(T &x)
Definition csv.h:928
static void on_underflow(T &x)
Definition csv.h:936
static bool is_comment(const char *line)
Definition csv.h:835
Definition csv.h:797
static bool is_comment(const char *line)
Definition csv.h:811
Definition csv.h:900
static void on_overflow(T &)
Definition csv.h:902
static void on_underflow(T &)
Definition csv.h:908
Definition csv.h:763
static void trim(char *&str_begin, char *&str_end)
Definition csv.h:777