CommonLibVR
csv.h
Go to the documentation of this file.
1 #pragma warning(disable: 4083 244 267 458)
2 // Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
3 // License: BSD-3
4 //
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // 1. Redistributions of source code must retain the above copyright notice,
11 // this list of conditions and the following disclaimer.
12 //
13 // 2. Redistributions in binary form must reproduce the above copyright notice,
14 // this list of conditions and the following disclaimer in the documentation
15 // and/or other materials provided with the distribution.
16 //
17 // 3. Neither the name of the copyright holder nor the names of its contributors
18 // may be used to endorse or promote products derived from this software
19 // without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 // POSSIBILITY OF SUCH DAMAGE.
32 
33 #ifndef CSV_H
34 # define CSV_H
35 
36 # include <algorithm>
37 # include <cstdio>
38 # include <cstring>
39 # include <exception>
40 # include <string>
41 # include <utility>
42 # include <vector>
43 # ifndef CSV_IO_NO_THREAD
44 # include <condition_variable>
45 # include <mutex>
46 # include <thread>
47 # endif
48 # include <cassert>
49 # include <cerrno>
50 # include <istream>
51 # include <limits>
52 # include <memory>
53 
54 namespace io
55 {
57  // LineReader //
59 
60  namespace error
61  {
62  struct base : std::exception
63  {
64  virtual void format_error_message() const = 0;
65 
66  const char* what() const noexcept override
67  {
69  return error_message_buffer;
70  }
71 
72  mutable char error_message_buffer[512];
73  };
74 
75  const int max_file_name_length = 255;
76 
78  {
80  {
81  std::memset(file_name, 0, sizeof(file_name));
82  }
83 
84  void set_file_name(const char* file_name)
85  {
86  if (file_name != nullptr) {
87  // This call to strncpy has parenthesis around it
88  // to silence the GCC -Wstringop-truncation warning
89  (strncpy(this->file_name, file_name, sizeof(this->file_name)));
90  this->file_name[sizeof(this->file_name) - 1] = '\0';
91  } else {
92  this->file_name[0] = '\0';
93  }
94  }
95 
97  };
98 
100  {
102  {
103  file_line = -1;
104  }
105 
107  {
108  this->file_line = file_line;
109  }
110 
112  };
113 
114  struct with_errno
115  {
117  {
118  errno_value = 0;
119  }
120 
122  {
123  this->errno_value = errno_value;
124  }
125 
127  };
128 
130  base,
132  with_errno
133  {
134  void format_error_message() const override
135  {
136  if (errno_value != 0)
137  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
138  "Can not open file \"%s\" because \"%s\".", file_name, std::strerror(errno_value));
139  else
140  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
141  "Can not open file \"%s\".", file_name);
142  }
143  };
144 
146  base,
149  {
150  void format_error_message() const override
151  {
152  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
153  "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1.", file_line, file_name);
154  }
155  };
156  }
157 
159  {
160  public:
161  virtual int read(char* buffer, int size) = 0;
162  virtual ~ByteSourceBase() {}
163  };
164 
165  namespace detail
166  {
168  {
169  public:
170  explicit OwningStdIOByteSourceBase(FILE* file) :
171  file(file)
172  {
173  // Tell the std library that we want to do the buffering ourself.
174  std::setvbuf(file, 0, _IONBF, 0);
175  }
176 
177  int read(char* buffer, int size)
178  {
179  return std::fread(buffer, 1, size, file);
180  }
181 
183  {
184  std::fclose(file);
185  }
186 
187  private:
188  FILE* file;
189  };
190 
192  {
193  public:
194  explicit NonOwningIStreamByteSource(std::istream& in) :
195  in(in) {}
196 
197  int read(char* buffer, int size)
198  {
199  in.read(buffer, size);
200  return in.gcount();
201  }
202 
204 
205  private:
206  std::istream& in;
207  };
208 
210  {
211  public:
212  NonOwningStringByteSource(const char* str, long long size) :
213  str(str), remaining_byte_count(size) {}
214 
215  int read(char* buffer, int desired_byte_count)
216  {
217  int to_copy_byte_count = desired_byte_count;
218  if (remaining_byte_count < to_copy_byte_count)
219  to_copy_byte_count = remaining_byte_count;
220  std::memcpy(buffer, str, to_copy_byte_count);
221  remaining_byte_count -= to_copy_byte_count;
222  str += to_copy_byte_count;
223  return to_copy_byte_count;
224  }
225 
227 
228  private:
229  const char* str;
230  long long remaining_byte_count;
231  };
232 
233 # ifndef CSV_IO_NO_THREAD
235  {
236  public:
237  void init(std::unique_ptr<ByteSourceBase> arg_byte_source)
238  {
239  std::unique_lock<std::mutex> guard(lock);
240  byte_source = std::move(arg_byte_source);
241  desired_byte_count = -1;
242  termination_requested = false;
243  worker = std::thread(
244  [&] {
245  std::unique_lock<std::mutex> guard(lock);
246  try {
247  for (;;) {
248  read_requested_condition.wait(
249  guard,
250  [&] {
251  return desired_byte_count != -1 || termination_requested;
252  });
253  if (termination_requested)
254  return;
255 
256  read_byte_count = byte_source->read(buffer, desired_byte_count);
257  desired_byte_count = -1;
258  if (read_byte_count == 0)
259  break;
260  read_finished_condition.notify_one();
261  }
262  } catch (...) {
263  read_error = std::current_exception();
264  }
265  read_finished_condition.notify_one();
266  });
267  }
268 
269  bool is_valid() const
270  {
271  return byte_source != nullptr;
272  }
273 
274  void start_read(char* arg_buffer, int arg_desired_byte_count)
275  {
276  std::unique_lock<std::mutex> guard(lock);
277  buffer = arg_buffer;
278  desired_byte_count = arg_desired_byte_count;
279  read_byte_count = -1;
280  read_requested_condition.notify_one();
281  }
282 
284  {
285  std::unique_lock<std::mutex> guard(lock);
286  read_finished_condition.wait(
287  guard,
288  [&] {
289  return read_byte_count != -1 || read_error;
290  });
291  if (read_error)
292  std::rethrow_exception(read_error);
293  else
294  return read_byte_count;
295  }
296 
298  {
299  if (byte_source != nullptr) {
300  {
301  std::unique_lock<std::mutex> guard(lock);
302  termination_requested = true;
303  }
304  read_requested_condition.notify_one();
305  worker.join();
306  }
307  }
308 
309  private:
310  std::unique_ptr<ByteSourceBase> byte_source;
311 
312  std::thread worker;
313 
314  bool termination_requested;
315  std::exception_ptr read_error;
316  char* buffer;
317  int desired_byte_count;
318  int read_byte_count;
319 
320  std::mutex lock;
321  std::condition_variable read_finished_condition;
322  std::condition_variable read_requested_condition;
323  };
324 # endif
325 
327  {
328  public:
329  void init(std::unique_ptr<ByteSourceBase> arg_byte_source)
330  {
331  byte_source = std::move(arg_byte_source);
332  }
333 
334  bool is_valid() const
335  {
336  return byte_source != nullptr;
337  }
338 
339  void start_read(char* arg_buffer, int arg_desired_byte_count)
340  {
341  buffer = arg_buffer;
342  desired_byte_count = arg_desired_byte_count;
343  }
344 
346  {
347  return byte_source->read(buffer, desired_byte_count);
348  }
349 
350  private:
351  std::unique_ptr<ByteSourceBase> byte_source;
352  char* buffer;
353  int desired_byte_count;
354  };
355  }
356 
358  {
359  private:
360  static const int block_len = 1 << 20;
361  std::unique_ptr<char[]> buffer; // must be constructed before (and thus destructed after) the reader!
362 # ifdef CSV_IO_NO_THREAD
364 # else
366 # endif
367  int data_begin;
368  int data_end;
369 
370  char file_name[error::max_file_name_length + 1];
371  unsigned file_line;
372 
373  static std::unique_ptr<ByteSourceBase> open_file(const char* file_name)
374  {
375  // We open the file in binary mode as it makes no difference under *nix
376  // and under Windows we handle \r\n newlines ourself.
377  FILE* file = std::fopen(file_name, "rb");
378  if (file == 0) {
379  int x = errno; // store errno as soon as possible, doing it after constructor call can fail.
381  err.set_errno(x);
382  err.set_file_name(file_name);
383  throw err;
384  }
385  return std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file));
386  }
387 
388  void init(std::unique_ptr<ByteSourceBase> byte_source)
389  {
390  file_line = 0;
391 
392  buffer = std::unique_ptr<char[]>(new char[3 * block_len]);
393  data_begin = 0;
394  data_end = byte_source->read(buffer.get(), 2 * block_len);
395 
396  // Ignore UTF-8 BOM
397  if (data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
398  data_begin = 3;
399 
400  if (data_end == 2 * block_len) {
401  reader.init(std::move(byte_source));
402  reader.start_read(buffer.get() + 2 * block_len, block_len);
403  }
404  }
405 
406  public:
407  LineReader() = delete;
408  LineReader(const LineReader&) = delete;
409  LineReader& operator=(const LineReader&) = delete;
410 
411  explicit LineReader(const char* file_name)
412  {
413  set_file_name(file_name);
414  init(open_file(file_name));
415  }
416 
417  explicit LineReader(const std::string& file_name)
418  {
419  set_file_name(file_name.c_str());
420  init(open_file(file_name.c_str()));
421  }
422 
423  LineReader(const char* file_name, std::unique_ptr<ByteSourceBase> byte_source)
424  {
425  set_file_name(file_name);
426  init(std::move(byte_source));
427  }
428 
429  LineReader(const std::string& file_name, std::unique_ptr<ByteSourceBase> byte_source)
430  {
431  set_file_name(file_name.c_str());
432  init(std::move(byte_source));
433  }
434 
435  LineReader(const char* file_name, const char* data_begin, const char* data_end)
436  {
437  set_file_name(file_name);
438  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end - data_begin)));
439  }
440 
441  LineReader(const std::string& file_name, const char* data_begin, const char* data_end)
442  {
443  set_file_name(file_name.c_str());
444  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end - data_begin)));
445  }
446 
447  LineReader(const char* file_name, FILE* file)
448  {
449  set_file_name(file_name);
450  init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
451  }
452 
453  LineReader(const std::string& file_name, FILE* file)
454  {
455  set_file_name(file_name.c_str());
456  init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
457  }
458 
459  LineReader(const char* file_name, std::istream& in)
460  {
461  set_file_name(file_name);
462  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
463  }
464 
465  LineReader(const std::string& file_name, std::istream& in)
466  {
467  set_file_name(file_name.c_str());
468  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
469  }
470 
471  void set_file_name(const std::string& file_name)
472  {
473  set_file_name(file_name.c_str());
474  }
475 
476  void set_file_name(const char* file_name)
477  {
478  if (file_name != nullptr) {
479  strncpy(this->file_name, file_name, sizeof(this->file_name));
480  this->file_name[sizeof(this->file_name) - 1] = '\0';
481  } else {
482  this->file_name[0] = '\0';
483  }
484  }
485 
486  const char* get_truncated_file_name() const
487  {
488  return file_name;
489  }
490 
491  void set_file_line(unsigned file_line)
492  {
493  this->file_line = file_line;
494  }
495 
496  unsigned get_file_line() const
497  {
498  return file_line;
499  }
500 
501  char* next_line()
502  {
503  if (data_begin == data_end)
504  return nullptr;
505 
506  ++file_line;
507 
508  assert(data_begin < data_end);
509  assert(data_end <= block_len * 2);
510 
511  if (data_begin >= block_len) {
512  std::memcpy(buffer.get(), buffer.get() + block_len, block_len);
513  data_begin -= block_len;
514  data_end -= block_len;
515  if (reader.is_valid()) {
516  data_end += reader.finish_read();
517  std::memcpy(buffer.get() + block_len, buffer.get() + 2 * block_len, block_len);
518  reader.start_read(buffer.get() + 2 * block_len, block_len);
519  }
520  }
521 
522  int line_end = data_begin;
523  while (line_end != data_end && buffer[line_end] != '\n') {
524  ++line_end;
525  }
526 
527  if (line_end - data_begin + 1 > block_len) {
529  err.set_file_name(file_name);
530  err.set_file_line(file_line);
531  throw err;
532  }
533 
534  if (line_end != data_end && buffer[line_end] == '\n') {
535  buffer[line_end] = '\0';
536  } else {
537  // some files are missing the newline at the end of the
538  // last line
539  ++data_end;
540  buffer[line_end] = '\0';
541  }
542 
543  // handle windows \r\n-line breaks
544  if (line_end != data_begin && buffer[line_end - 1] == '\r')
545  buffer[line_end - 1] = '\0';
546 
547  char* ret = buffer.get() + data_begin;
548  data_begin = line_end + 1;
549  return ret;
550  }
551  };
552 
554  // CSV //
556 
557  namespace error
558  {
559  const int max_column_name_length = 63;
561  {
563  {
564  std::memset(column_name, 0, max_column_name_length + 1);
565  }
566 
567  void set_column_name(const char* column_name)
568  {
569  if (column_name != nullptr) {
570  std::strncpy(this->column_name, column_name, max_column_name_length);
571  this->column_name[max_column_name_length] = '\0';
572  } else {
573  this->column_name[0] = '\0';
574  }
575  }
576 
578  };
579 
581 
583  {
585  {
586  std::memset(column_content, 0, max_column_content_length + 1);
587  }
588 
590  {
591  if (column_content != nullptr) {
592  std::strncpy(this->column_content, column_content, max_column_content_length);
593  this->column_content[max_column_content_length] = '\0';
594  } else {
595  this->column_content[0] = '\0';
596  }
597  }
598 
600  };
601 
603  base,
606  {
607  void format_error_message() const override
608  {
609  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
610  R"(Extra column "%s" in header of file "%s".)", column_name, file_name);
611  }
612  };
613 
615  base,
618  {
619  void format_error_message() const override
620  {
621  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
622  R"(Missing column "%s" in header of file "%s".)", column_name, file_name);
623  }
624  };
625 
627  base,
630  {
631  void format_error_message() const override
632  {
633  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
634  R"(Duplicated column "%s" in header of file "%s".)", column_name, file_name);
635  }
636  };
637 
638  struct header_missing :
639  base,
641  {
642  void format_error_message() const override
643  {
644  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
645  "Header missing in file \"%s\".", file_name);
646  }
647  };
648 
650  base,
653  {
654  void format_error_message() const override
655  {
656  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
657  "Too few columns in line %d in file \"%s\".", file_line, file_name);
658  }
659  };
660 
662  base,
665  {
666  void format_error_message() const override
667  {
668  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
669  "Too many columns in line %d in file \"%s\".", file_line, file_name);
670  }
671  };
672 
674  base,
677  {
678  void format_error_message() const override
679  {
680  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
681  "Escaped string was not closed in line %d in file \"%s\".", file_line, file_name);
682  }
683  };
684 
686  base,
691  {
692  void format_error_message() const override
693  {
694  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
695  R"(The integer "%s" must be positive or 0 in column "%s" in file "%s" in line "%d".)", column_content, column_name, file_name, file_line);
696  }
697  };
698 
699  struct no_digit :
700  base,
705  {
706  void format_error_message() const override
707  {
708  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
709  R"(The integer "%s" contains an invalid digit in column "%s" in file "%s" in line "%d".)", column_content, column_name, file_name, file_line);
710  }
711  };
712 
714  base,
719  {
720  void format_error_message() const override
721  {
722  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
723  R"(The integer "%s" overflows in column "%s" in file "%s" in line "%d".)", column_content, column_name, file_name, file_line);
724  }
725  };
726 
728  base,
733  {
734  void format_error_message() const override
735  {
736  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
737  R"(The integer "%s" underflows in column "%s" in file "%s" in line "%d".)", column_content, column_name, file_name, file_line);
738  }
739  };
740 
742  base,
747  {
748  void format_error_message() const override
749  {
750  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
751  R"(The content "%s" of column "%s" in file "%s" in line "%d" is not a single character.)", column_content, column_name, file_name, file_line);
752  }
753  };
754  }
755 
756  using ignore_column = unsigned int;
757  static const ignore_column ignore_no_column = 0;
760 
761  template <char... trim_char_list>
762  struct trim_chars
763  {
764  private:
765  constexpr static bool is_trim_char(char)
766  {
767  return false;
768  }
769 
770  template <class... OtherTrimChars>
771  constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars... other_trim_chars)
772  {
773  return c == trim_char || is_trim_char(c, other_trim_chars...);
774  }
775 
776  public:
777  static void trim(char*& str_begin, char*& str_end)
778  {
779  while (str_begin != str_end && is_trim_char(*str_begin, trim_char_list...))
780  ++str_begin;
781  while (str_begin != str_end && is_trim_char(*(str_end - 1), trim_char_list...))
782  --str_end;
783  *str_end = '\0';
784  }
785  };
786 
787  struct no_comment
788  {
789  static bool is_comment(const char*)
790  {
791  return false;
792  }
793  };
794 
795  template <char... comment_start_char_list>
797  {
798  private:
799  constexpr static bool is_comment_start_char(char)
800  {
801  return false;
802  }
803 
804  template <class... OtherCommentStartChars>
805  constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars... other_comment_start_chars)
806  {
807  return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...);
808  }
809 
810  public:
811  static bool is_comment(const char* line)
812  {
813  return is_comment_start_char(*line, comment_start_char_list...);
814  }
815  };
816 
818  {
819  static bool is_comment(const char* line)
820  {
821  if (*line == '\0')
822  return true;
823  while (*line == ' ' || *line == '\t') {
824  ++line;
825  if (*line == 0)
826  return true;
827  }
828  return false;
829  }
830  };
831 
832  template <char... comment_start_char_list>
834  {
835  static bool is_comment(const char* line)
836  {
838  }
839  };
840 
841  template <char sep>
843  {
844  static const char* find_next_column_end(const char* col_begin)
845  {
846  while (*col_begin != sep && *col_begin != '\0')
847  ++col_begin;
848  return col_begin;
849  }
850 
851  static void unescape(char*&, char*&)
852  {
853  }
854  };
855 
856  template <char sep, char quote>
858  {
859  static const char* find_next_column_end(const char* col_begin)
860  {
861  while (*col_begin != sep && *col_begin != '\0')
862  if (*col_begin != quote)
863  ++col_begin;
864  else {
865  do {
866  ++col_begin;
867  while (*col_begin != quote) {
868  if (*col_begin == '\0')
870  ++col_begin;
871  }
872  ++col_begin;
873  } while (*col_begin == quote);
874  }
875  return col_begin;
876  }
877 
878  static void unescape(char*& col_begin, char*& col_end)
879  {
880  if (col_end - col_begin >= 2) {
881  if (*col_begin == quote && *(col_end - 1) == quote) {
882  ++col_begin;
883  --col_end;
884  char* out = col_begin;
885  for (char* in = col_begin; in != col_end; ++in) {
886  if (*in == quote && (in + 1) != col_end && *(in + 1) == quote) {
887  ++in;
888  }
889  *out = *in;
890  ++out;
891  }
892  col_end = out;
893  *col_end = '\0';
894  }
895  }
896  }
897  };
898 
900  {
901  template <class T>
902  static void on_overflow(T&)
903  {
904  throw error::integer_overflow();
905  }
906 
907  template <class T>
908  static void on_underflow(T&)
909  {
910  throw error::integer_underflow();
911  }
912  };
913 
915  {
916  template <class T>
917  static void on_overflow(T&)
918  {}
919 
920  template <class T>
921  static void on_underflow(T&)
922  {}
923  };
924 
926  {
927  template <class T>
928  static void on_overflow(T& x)
929  {
930  // using (std::numeric_limits<T>::max) instead of std::numeric_limits<T>::max
931  // to make code including windows.h with its max macro happy
933  }
934 
935  template <class T>
936  static void on_underflow(T& x)
937  {
939  }
940  };
941 
942  namespace detail
943  {
944  template <class quote_policy>
946  char*& line, char*& col_begin, char*& col_end)
947  {
948  assert(line != nullptr);
949 
950  col_begin = line;
951  // the col_begin + (... - col_begin) removes the constness
952  col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);
953 
954  if (*col_end == '\0') {
955  line = nullptr;
956  } else {
957  *col_end = '\0';
958  line = col_end + 1;
959  }
960  }
961 
962  template <class trim_policy, class quote_policy>
964  char* line,
965  char** sorted_col,
966  const std::vector<int>& col_order)
967  {
968  for (int i : col_order) {
969  if (line == nullptr)
970  throw ::io::error::too_few_columns();
971  char *col_begin, *col_end;
972  chop_next_column<quote_policy>(line, col_begin, col_end);
973 
974  if (i != -1) {
975  trim_policy::trim(col_begin, col_end);
976  quote_policy::unescape(col_begin, col_end);
977 
978  sorted_col[i] = col_begin;
979  }
980  }
981  if (line != nullptr)
982  throw ::io::error::too_many_columns();
983  }
984 
985  template <unsigned column_count, class trim_policy, class quote_policy>
987  char* line,
988  std::vector<int>& col_order,
989  const std::string* col_name,
990  ignore_column ignore_policy)
991  {
992  col_order.clear();
993 
994  bool found[column_count];
995  std::fill(found, found + column_count, false);
996  while (line) {
997  char *col_begin, *col_end;
998  chop_next_column<quote_policy>(line, col_begin, col_end);
999 
1000  trim_policy::trim(col_begin, col_end);
1001  quote_policy::unescape(col_begin, col_end);
1002 
1003  for (unsigned i = 0; i < column_count; ++i)
1004  if (col_begin == col_name[i]) {
1005  if (found[i]) {
1007  err.set_column_name(col_begin);
1008  throw err;
1009  }
1010  found[i] = true;
1011  col_order.push_back(i);
1012  col_begin = 0;
1013  break;
1014  }
1015  if (col_begin) {
1016  if (ignore_policy & ::io::ignore_extra_column)
1017  col_order.push_back(-1);
1018  else {
1020  err.set_column_name(col_begin);
1021  throw err;
1022  }
1023  }
1024  }
1025  if (!(ignore_policy & ::io::ignore_missing_column)) {
1026  for (unsigned i = 0; i < column_count; ++i) {
1027  if (!found[i]) {
1029  err.set_column_name(col_name[i].c_str());
1030  throw err;
1031  }
1032  }
1033  }
1034  }
1035 
1036  template <class overflow_policy>
1037  void parse(char* col, char& x)
1038  {
1039  if (!*col)
1041  x = *col;
1042  ++col;
1043  if (*col)
1045  }
1046 
1047  template <class overflow_policy>
1048  void parse(char* col, std::string& x)
1049  {
1050  x = col;
1051  }
1052 
1053  template <class overflow_policy>
1054  void parse(char* col, const char*& x)
1055  {
1056  x = col;
1057  }
1058 
1059  template <class overflow_policy>
1060  void parse(char* col, char*& x)
1061  {
1062  x = col;
1063  }
1064 
1065  template <class overflow_policy, class T>
1066  void parse_unsigned_integer(const char* col, T& x)
1067  {
1068  x = 0;
1069  while (*col != '\0') {
1070  if ('0' <= *col && *col <= '9') {
1071  T y = *col - '0';
1072  if (x > ((std::numeric_limits<T>::max)() - y) / 10) {
1073  overflow_policy::on_overflow(x);
1074  return;
1075  }
1076  x = 10 * x + y;
1077  } else
1078  throw error::no_digit();
1079  ++col;
1080  }
1081  }
1082 
1083  template <class overflow_policy>
1084  void parse(char* col, unsigned char& x)
1085  {
1086  parse_unsigned_integer<overflow_policy>(col, x);
1087  }
1088  template <class overflow_policy>
1089  void parse(char* col, unsigned short& x)
1090  {
1091  parse_unsigned_integer<overflow_policy>(col, x);
1092  }
1093  template <class overflow_policy>
1094  void parse(char* col, unsigned int& x)
1095  {
1096  parse_unsigned_integer<overflow_policy>(col, x);
1097  }
1098  template <class overflow_policy>
1099  void parse(char* col, unsigned long& x)
1100  {
1101  parse_unsigned_integer<overflow_policy>(col, x);
1102  }
1103  template <class overflow_policy>
1104  void parse(char* col, unsigned long long& x)
1105  {
1106  parse_unsigned_integer<overflow_policy>(col, x);
1107  }
1108 
1109  template <class overflow_policy, class T>
1110  void parse_signed_integer(const char* col, T& x)
1111  {
1112  if (*col == '-') {
1113  ++col;
1114 
1115  x = 0;
1116  while (*col != '\0') {
1117  if ('0' <= *col && *col <= '9') {
1118  T y = *col - '0';
1119  if (x < ((std::numeric_limits<T>::min)() + y) / 10) {
1120  overflow_policy::on_underflow(x);
1121  return;
1122  }
1123  x = 10 * x - y;
1124  } else
1125  throw error::no_digit();
1126  ++col;
1127  }
1128  return;
1129  } else if (*col == '+')
1130  ++col;
1131  parse_unsigned_integer<overflow_policy>(col, x);
1132  }
1133 
1134  template <class overflow_policy>
1135  void parse(char* col, signed char& x)
1136  {
1137  parse_signed_integer<overflow_policy>(col, x);
1138  }
1139  template <class overflow_policy>
1140  void parse(char* col, signed short& x)
1141  {
1142  parse_signed_integer<overflow_policy>(col, x);
1143  }
1144  template <class overflow_policy>
1145  void parse(char* col, signed int& x)
1146  {
1147  parse_signed_integer<overflow_policy>(col, x);
1148  }
1149  template <class overflow_policy>
1150  void parse(char* col, signed long& x)
1151  {
1152  parse_signed_integer<overflow_policy>(col, x);
1153  }
1154  template <class overflow_policy>
1155  void parse(char* col, signed long long& x)
1156  {
1157  parse_signed_integer<overflow_policy>(col, x);
1158  }
1159 
1160  template <class T>
1161  void parse_float(const char* col, T& x)
1162  {
1163  bool is_neg = false;
1164  if (*col == '-') {
1165  is_neg = true;
1166  ++col;
1167  } else if (*col == '+')
1168  ++col;
1169 
1170  x = 0;
1171  while ('0' <= *col && *col <= '9') {
1172  int y = *col - '0';
1173  x *= 10;
1174  x += y;
1175  ++col;
1176  }
1177 
1178  if (*col == '.' || *col == ',') {
1179  ++col;
1180  T pos = 1;
1181  while ('0' <= *col && *col <= '9') {
1182  pos /= 10;
1183  int y = *col - '0';
1184  ++col;
1185  x += y * pos;
1186  }
1187  }
1188 
1189  if (*col == 'e' || *col == 'E') {
1190  ++col;
1191  int e;
1192 
1193  parse_signed_integer<set_to_max_on_overflow>(col, e);
1194 
1195  if (e != 0) {
1196  T base;
1197  if (e < 0) {
1198  base = T(0.1);
1199  e = -e;
1200  } else {
1201  base = T(10);
1202  }
1203 
1204  while (e != 1) {
1205  if ((e & 1) == 0) {
1206  base = base * base;
1207  e >>= 1;
1208  } else {
1209  x *= base;
1210  --e;
1211  }
1212  }
1213  x *= base;
1214  }
1215  } else {
1216  if (*col != '\0')
1217  throw error::no_digit();
1218  }
1219 
1220  if (is_neg)
1221  x = -x;
1222  }
1223 
1224  template <class overflow_policy>
1225  void parse(char* col, float& x)
1226  {
1227  parse_float(col, x);
1228  }
1229  template <class overflow_policy>
1230  void parse(char* col, double& x)
1231  {
1232  parse_float(col, x);
1233  }
1234  template <class overflow_policy>
1235  void parse(char* col, long double& x)
1236  {
1237  parse_float(col, x);
1238  }
1239 
1240  template <class overflow_policy, class T>
1241  void parse(char* col, T& x)
1242  {
1243  // Mute unused variable compiler warning
1244  (void)col;
1245  (void)x;
1246  // GCC evalutes "false" when reading the template and
1247  // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
1248  // this strange construct is used.
1249  static_assert(sizeof(T) != sizeof(T),
1250  "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported");
1251  }
1252 
1253  }
1254 
1255  template <unsigned column_count,
1256  class trim_policy = trim_chars<' ', '\t'>,
1257  class quote_policy = no_quote_escape<','>,
1258  class overflow_policy = throw_on_overflow,
1259  class comment_policy = no_comment>
1261  {
1262  private:
1263  LineReader in;
1264 
1265  char* row[column_count];
1266  std::string column_names[column_count];
1267 
1268  std::vector<int> col_order;
1269 
1270  template <class... ColNames>
1271  void set_column_names(std::string s, ColNames... cols)
1272  {
1273  column_names[column_count - sizeof...(ColNames) - 1] = std::move(s);
1274  set_column_names(std::forward<ColNames>(cols)...);
1275  }
1276 
1277  void set_column_names() {}
1278 
1279  public:
1280  CSVReader() = delete;
1281  CSVReader(const CSVReader&) = delete;
1283 
1284  template <class... Args>
1285  explicit CSVReader(Args&&... args) :
1286  in(std::forward<Args>(args)...)
1287  {
1288  std::fill(row, row + column_count, nullptr);
1289  col_order.resize(column_count);
1290  for (unsigned i = 0; i < column_count; ++i)
1291  col_order[i] = i;
1292  for (unsigned i = 1; i <= column_count; ++i)
1293  column_names[i - 1] = "col" + std::to_string(i);
1294  }
1295 
1296  char* next_line()
1297  {
1298  return in.next_line();
1299  }
1300 
1301  template <class... ColNames>
1302  void read_header(ignore_column ignore_policy, ColNames... cols)
1303  {
1304  static_assert(sizeof...(ColNames) >= column_count, "not enough column names specified");
1305  static_assert(sizeof...(ColNames) <= column_count, "too many column names specified");
1306  try {
1307  set_column_names(std::forward<ColNames>(cols)...);
1308 
1309  char* line;
1310  do {
1311  line = in.next_line();
1312  if (!line)
1313  throw error::header_missing();
1314  } while (comment_policy::is_comment(line));
1315 
1316  detail::parse_header_line<column_count, trim_policy, quote_policy>(line, col_order, column_names, ignore_policy);
1317  } catch (error::with_file_name& err) {
1319  throw;
1320  }
1321  }
1322 
1323  template <class... ColNames>
1324  void set_header(ColNames... cols)
1325  {
1326  static_assert(sizeof...(ColNames) >= column_count,
1327  "not enough column names specified");
1328  static_assert(sizeof...(ColNames) <= column_count,
1329  "too many column names specified");
1330  set_column_names(std::forward<ColNames>(cols)...);
1331  std::fill(row, row + column_count, nullptr);
1332  col_order.resize(column_count);
1333  for (unsigned i = 0; i < column_count; ++i)
1334  col_order[i] = i;
1335  }
1336 
1337  bool has_column(const std::string& name) const
1338  {
1339  return col_order.end() != std::find(
1340  col_order.begin(), col_order.end(),
1341  std::find(std::begin(column_names), std::end(column_names), name) - std::begin(column_names));
1342  }
1343 
1344  void set_file_name(const std::string& file_name)
1345  {
1346  in.set_file_name(file_name);
1347  }
1348 
1349  void set_file_name(const char* file_name)
1350  {
1351  in.set_file_name(file_name);
1352  }
1353 
1354  const char* get_truncated_file_name() const
1355  {
1356  return in.get_truncated_file_name();
1357  }
1358 
1359  void set_file_line(unsigned file_line)
1360  {
1361  in.set_file_line(file_line);
1362  }
1363 
1364  unsigned get_file_line() const
1365  {
1366  return in.get_file_line();
1367  }
1368 
1369  private:
1370  void parse_helper(std::size_t) {}
1371 
1372  template <class T, class... ColType>
1373  void parse_helper(std::size_t r, T& t, ColType&... cols)
1374  {
1375  if (row[r]) {
1376  try {
1377  try {
1378  ::io::detail::parse<overflow_policy>(row[r], t);
1379  } catch (error::with_column_content& err) {
1380  err.set_column_content(row[r]);
1381  throw;
1382  }
1383  } catch (error::with_column_name& err) {
1384  err.set_column_name(column_names[r].c_str());
1385  throw;
1386  }
1387  }
1388  parse_helper(r + 1, cols...);
1389  }
1390 
1391  public:
1392  template <class... ColType>
1393  bool read_row(ColType&... cols)
1394  {
1395  static_assert(sizeof...(ColType) >= column_count,
1396  "not enough columns specified");
1397  static_assert(sizeof...(ColType) <= column_count,
1398  "too many columns specified");
1399  try {
1400  try {
1401  char* line;
1402  do {
1403  line = in.next_line();
1404  if (!line)
1405  return false;
1406  } while (comment_policy::is_comment(line));
1407 
1408  detail::parse_line<trim_policy, quote_policy>(line, row, col_order);
1409 
1410  parse_helper(0, cols...);
1411  } catch (error::with_file_name& err) {
1413  throw;
1414  }
1415  } catch (error::with_file_line& err) {
1416  err.set_file_line(in.get_file_line());
1417  throw;
1418  }
1419 
1420  return true;
1421  }
1422  };
1423 }
1424 #endif
Definition: csv.h:159
virtual int read(char *buffer, int size)=0
virtual ~ByteSourceBase()
Definition: csv.h:162
Definition: csv.h:1261
CSVReader(const CSVReader &)=delete
unsigned get_file_line() const
Definition: csv.h:1364
void set_file_line(unsigned file_line)
Definition: csv.h:1359
CSVReader & operator=(const CSVReader &)
void set_file_name(const std::string &file_name)
Definition: csv.h:1344
bool read_row(ColType &... cols)
Definition: csv.h:1393
void set_file_name(const char *file_name)
Definition: csv.h:1349
void set_header(ColNames... cols)
Definition: csv.h:1324
char * next_line()
Definition: csv.h:1296
bool has_column(const std::string &name) const
Definition: csv.h:1337
CSVReader()=delete
const char * get_truncated_file_name() const
Definition: csv.h:1354
void read_header(ignore_column ignore_policy, ColNames... cols)
Definition: csv.h:1302
CSVReader(Args &&... args)
Definition: csv.h:1285
Definition: csv.h:358
LineReader(const std::string &file_name, const char *data_begin, const char *data_end)
Definition: csv.h:441
void set_file_name(const std::string &file_name)
Definition: csv.h:471
LineReader(const char *file_name, std::unique_ptr< ByteSourceBase > byte_source)
Definition: csv.h:423
LineReader(const char *file_name, std::istream &in)
Definition: csv.h:459
LineReader(const std::string &file_name, std::istream &in)
Definition: csv.h:465
unsigned get_file_line() const
Definition: csv.h:496
void set_file_line(unsigned file_line)
Definition: csv.h:491
LineReader(const std::string &file_name, std::unique_ptr< ByteSourceBase > byte_source)
Definition: csv.h:429
LineReader(const char *file_name)
Definition: csv.h:411
void set_file_name(const char *file_name)
Definition: csv.h:476
LineReader(const LineReader &)=delete
LineReader(const std::string &file_name, FILE *file)
Definition: csv.h:453
char * next_line()
Definition: csv.h:501
LineReader & operator=(const LineReader &)=delete
LineReader(const std::string &file_name)
Definition: csv.h:417
LineReader()=delete
LineReader(const char *file_name, FILE *file)
Definition: csv.h:447
const char * get_truncated_file_name() const
Definition: csv.h:486
LineReader(const char *file_name, const char *data_begin, const char *data_end)
Definition: csv.h:435
Definition: csv.h:235
~AsynchronousReader()
Definition: csv.h:297
void init(std::unique_ptr< ByteSourceBase > arg_byte_source)
Definition: csv.h:237
int finish_read()
Definition: csv.h:283
void start_read(char *arg_buffer, int arg_desired_byte_count)
Definition: csv.h:274
bool is_valid() const
Definition: csv.h:269
~NonOwningIStreamByteSource()
Definition: csv.h:203
NonOwningIStreamByteSource(std::istream &in)
Definition: csv.h:194
int read(char *buffer, int size)
Definition: csv.h:197
NonOwningStringByteSource(const char *str, long long size)
Definition: csv.h:212
~NonOwningStringByteSource()
Definition: csv.h:226
int read(char *buffer, int desired_byte_count)
Definition: csv.h:215
OwningStdIOByteSourceBase(FILE *file)
Definition: csv.h:170
int read(char *buffer, int size)
Definition: csv.h:177
~OwningStdIOByteSourceBase()
Definition: csv.h:182
Definition: csv.h:327
int finish_read()
Definition: csv.h:345
void start_read(char *arg_buffer, int arg_desired_byte_count)
Definition: csv.h:339
bool is_valid() const
Definition: csv.h:334
void init(std::unique_ptr< ByteSourceBase > arg_byte_source)
Definition: csv.h:329
NiColor min(const NiColor &a_lhs, const NiColor &a_rhs)
Definition: ColorUtil.h:63
NiColor max(const NiColor &a_lhs, const NiColor &a_rhs)
Definition: ColorUtil.h:71
string(const CharT(&)[N]) -> string< CharT, N - 1 >
void parse(char *col, char &x)
Definition: csv.h:1037
void parse_header_line(char *line, std::vector< int > &col_order, const std::string *col_name, ignore_column ignore_policy)
Definition: csv.h:986
void parse_float(const char *col, T &x)
Definition: csv.h:1161
void chop_next_column(char *&line, char *&col_begin, char *&col_end)
Definition: csv.h:945
void parse_unsigned_integer(const char *col, T &x)
Definition: csv.h:1066
void parse_signed_integer(const char *col, T &x)
Definition: csv.h:1110
void parse_line(char *line, char **sorted_col, const std::vector< int > &col_order)
Definition: csv.h:963
const int max_column_content_length
Definition: csv.h:580
const int max_file_name_length
Definition: csv.h:75
const int max_column_name_length
Definition: csv.h:559
Definition: csv.h:55
static const ignore_column ignore_no_column
Definition: csv.h:757
static const ignore_column ignore_extra_column
Definition: csv.h:758
unsigned int ignore_column
Definition: csv.h:756
static const ignore_column ignore_missing_column
Definition: csv.h:759
Definition: EffectArchetypes.h:65
std::string to_string(RE::EffectArchetype a_archetype)
Definition: EffectArchetypes.h:66
Definition: csv.h:858
static void unescape(char *&col_begin, char *&col_end)
Definition: csv.h:878
static const char * find_next_column_end(const char *col_begin)
Definition: csv.h:859
Definition: csv.h:818
static bool is_comment(const char *line)
Definition: csv.h:819
Definition: csv.h:63
const char * what() const noexcept override
Definition: csv.h:66
char error_message_buffer[512]
Definition: csv.h:72
virtual void format_error_message() const =0
Definition: csv.h:133
void format_error_message() const override
Definition: csv.h:134
void format_error_message() const override
Definition: csv.h:631
void format_error_message() const override
Definition: csv.h:678
void format_error_message() const override
Definition: csv.h:607
Definition: csv.h:641
void format_error_message() const override
Definition: csv.h:642
void format_error_message() const override
Definition: csv.h:692
Definition: csv.h:719
void format_error_message() const override
Definition: csv.h:720
Definition: csv.h:733
void format_error_message() const override
Definition: csv.h:734
void format_error_message() const override
Definition: csv.h:748
void format_error_message() const override
Definition: csv.h:150
void format_error_message() const override
Definition: csv.h:619
Definition: csv.h:705
void format_error_message() const override
Definition: csv.h:706
Definition: csv.h:653
void format_error_message() const override
Definition: csv.h:654
Definition: csv.h:665
void format_error_message() const override
Definition: csv.h:666
Definition: csv.h:583
char column_content[max_column_content_length+1]
Definition: csv.h:599
with_column_content()
Definition: csv.h:584
void set_column_content(const char *column_content)
Definition: csv.h:589
Definition: csv.h:561
with_column_name()
Definition: csv.h:562
void set_column_name(const char *column_name)
Definition: csv.h:567
char column_name[max_column_name_length+1]
Definition: csv.h:577
Definition: csv.h:115
with_errno()
Definition: csv.h:116
void set_errno(int errno_value)
Definition: csv.h:121
int errno_value
Definition: csv.h:126
Definition: csv.h:100
int file_line
Definition: csv.h:111
with_file_line()
Definition: csv.h:101
void set_file_line(int file_line)
Definition: csv.h:106
Definition: csv.h:78
char file_name[max_file_name_length+1]
Definition: csv.h:96
void set_file_name(const char *file_name)
Definition: csv.h:84
with_file_name()
Definition: csv.h:79
Definition: csv.h:915
static void on_underflow(T &)
Definition: csv.h:921
static void on_overflow(T &)
Definition: csv.h:917
Definition: csv.h:788
static bool is_comment(const char *)
Definition: csv.h:789
Definition: csv.h:843
static const char * find_next_column_end(const char *col_begin)
Definition: csv.h:844
static void unescape(char *&, char *&)
Definition: csv.h:851
Definition: csv.h:926
static void on_overflow(T &x)
Definition: csv.h:928
static void on_underflow(T &x)
Definition: csv.h:936
static bool is_comment(const char *line)
Definition: csv.h:835
Definition: csv.h:797
static bool is_comment(const char *line)
Definition: csv.h:811
Definition: csv.h:900
static void on_overflow(T &)
Definition: csv.h:902
static void on_underflow(T &)
Definition: csv.h:908
Definition: csv.h:763
static void trim(char *&str_begin, char *&str_end)
Definition: csv.h:777