1 /*
2 www.github.com/emilk/configuru
3 
4 # Configuru
5         Configuru, an experimental config library for C++, by Emil Ernerfeldt.
6 
7 # License
8         This software is in the public domain. Where that dedication is not
9         recognized, you are granted a perpetual, irrevocable license to copy
10         and modify this file as you see fit.
11 
12         That being said, I would appreciate credit!
13         If you find this library useful, send a tweet to [@ernerfeldt](https://twitter.com/ernerfeldt) or mail me at
14 emil.ernerfeldt@gmail.com.
15 
16 # Version history
17         0.0.0: 2014-07-21 - Initial steps
18         0.1.0: 2015-11-08 - First commit as stand-alone library
19         0.2.0: 2016-03-25 - check_dangling changes
20         0.2.1: 2016-04-11 - mark_accessed in dump_string by default
21         0.2.2: 2016-07-27 - optimizations
22         0.2.3: 2016-08-09 - optimizations + add Config::emplace(key, value)
23         0.2.4: 2016-08-18 - fix compilation error for when CONFIGURU_VALUE_SEMANTICS=0
24         0.3.0: 2016-09-15 - Add option to not align values (object_align_values)
25         0.3.1: 2016-09-19 - Fix crashes on some compilers/stdlibs
26         0.3.2: 2016-09-22 - Add support for Config::array(some_container)
27         0.3.3: 2017-01-10 - Add some missing iterator members
28         0.3.4: 2017-01-17 - Add cast conversion to std::array
29         0.4.0: 2017-04-17 - Automatic (de)serialization with serialize/deserialize with https://github.com/cbeck88/visit_struct
30         0.4.1: 2017-05-21 - Make it compile on VC++
31 
32 # Getting started
33         For using:
34                 `#include <configuru.hpp>`
35 
36         And in one .cpp file:
37 
38                 #define CONFIGURU_IMPLEMENTATION 1
39                 #include <configuru.hpp>
40 
41         For more info, please see README.md (at www.github.com/emilk/configuru).
42 */
43 
44 //  dP""b8  dP"Yb  88b 88 888888 88  dP""b8 88   88 88""Yb 88   88
45 // dP   `" dP   Yb 88Yb88 88__   88 dP   `" 88   88 88__dP 88   88
46 // Yb      Yb   dP 88 Y88 88""   88 Yb  "88 Y8   8P 88"Yb  Y8   8P
47 //  YboodP  YbodP  88  Y8 88     88  YboodP `YbodP' 88  Yb `YbodP'
48 
49 // Disable all warnings from gcc/clang:
50 #if defined(__clang__)
51 #pragma clang system_header
52 #elif defined(__GNUC__)
53 #pragma GCC system_header
54 #endif
55 
56 #pragma once
57 
58 #include <algorithm>
59 #include <array>
60 #include <atomic>
61 #include <cmath>
62 #include <cstddef>
63 #include <cstring>
64 #include <functional>
65 #include <initializer_list>
66 #include <iosfwd>
67 #include <iterator>
68 #include <map>
69 #include <memory>
70 #include <stdexcept>
71 #include <string>
72 #include <type_traits>
73 #include <utility>
74 #include <vector>
75 
76 #ifndef CONFIGURU_ONERROR
77 #define CONFIGURU_ONERROR(message_str) throw std::runtime_error(message_str)
78 #endif // CONFIGURU_ONERROR
79 
80 #ifndef CONFIGURU_ASSERT
81 #include <cassert>
82 #define CONFIGURU_ASSERT(test) assert(test)
83 #endif // CONFIGURU_ASSERT
84 
85 #ifndef CONFIGURU_ON_DANGLING
86 /// CONFIGURU_ON_DANGLING(message_str) is called by check_dangling() if there is any unaccessed keys.
87 #define CONFIGURU_ON_DANGLING(message_str) CONFIGURU_ONERROR(message_str)
88 #endif // CONFIGURU_ON_DANGLING
89 
90 #ifdef __GNUC__
91 #define CONFIGURU_NORETURN __attribute__((noreturn))
92 #elif __MINGW32__
93 #define CONFIGURU_NORETURN __attribute__((noreturn))
94 #elif __clang__
95 #define CONFIGURU_NORETURN __attribute__((noreturn))
96 #elif _MSC_VER
97 #define CONFIGURU_NORETURN
98 #endif
99 
100 #ifndef CONFIGURU_IMPLICIT_CONVERSIONS
101 /// Set to 1 to allow  `int x = some_cfg,`
102 #define CONFIGURU_IMPLICIT_CONVERSIONS 0
103 #endif
104 
105 #ifndef CONFIGURU_VALUE_SEMANTICS
106 /// If set, all copies are deep clones.
107 /// If 0, all copies of objects and array are shallow (ref-counted).
108 #define CONFIGURU_VALUE_SEMANTICS 0
109 #endif
110 
111 #undef Bool  // Needed on Ubuntu 14.04 with GCC 4.8.5
112 #undef check // Needed on OSX
113 
114 /// The Configuru namespace.
115 namespace configuru
116 {
117 struct DocInfo;
118 using DocInfo_SP = std::shared_ptr<DocInfo>;
119 
120 using Index           = unsigned;
121 const Index BAD_INDEX = static_cast<Index>(-1);
122 
123 struct Include {
124   DocInfo_SP doc;
125   Index line = BAD_INDEX;
126 
Includeconfiguru::Include127   Include() {}
Includeconfiguru::Include128   Include(DocInfo_SP d, Index l) : doc(d), line(l) {}
129 };
130 
131 /// Helper for describing a document.
132 struct DocInfo {
133   std::vector<Include> includers;
134 
135   std::string filename;
136 
DocInfoconfiguru::DocInfo137   DocInfo(const std::string &fn) : filename(fn) {}
138   void append_include_info(std::string &ret, const std::string &indent = "    ") const;
139 };
140 
141 struct BadLookupInfo;
142 
143 /// Helper: value in an object.
144 template <typename Config_T> struct Config_Entry {
145   Config_T _value;
146   Index _nr              = BAD_INDEX; ///< Size of the object prior to adding this entry
147   mutable bool _accessed = false;     ///< Set to true if accessed.
148 
Config_Entryconfiguru::Config_Entry149   Config_Entry() {}
Config_Entryconfiguru::Config_Entry150   Config_Entry(Config_T value, Index nr) : _value(std::move(value)), _nr(nr) {}
151 };
152 
153 using Comment  = std::string;
154 using Comments = std::vector<Comment>;
155 
156 /// Captures the comments related to a Config value.
157 struct ConfigComments {
158   /// Comments on preceeding lines.
159   /// Like this.
160   Comments prefix;
161   Comments postfix;       ///< After the value, on the same line. Like this.
162   Comments pre_end_brace; /// Before the closing } or ]
163 
ConfigCommentsconfiguru::ConfigComments164   ConfigComments() {}
165   bool empty() const;
166   void append(ConfigComments &&other);
167 };
168 
169 /// A dynamic config variable.
170 class Config;
171 
172 /** Overload this (in cofiguru namespace) for you own types, e.g:
173 
174         ```
175         namespace configuru {
176                 template<>
177                 inline Vector2f as(const Config& config)
178                 {
179                         auto&& array = config.as_array();
180                         config.check(array.size() == 2, "Expected Vector2f");
181                         return {(float)array[0], (float)array[1]};
182                 }
183         }
184         ```
185 */
186 template <typename T> inline T as(const configuru::Config &config);
187 
188 /// A dynamic config variable.
189 /// Acts like something out of Python or Lua.
190 /// If CONFIGURU_VALUE_SEMANTICS all copies of this will be deep copies.
191 /// If not, it will use reference-counting for objects and arrays,
192 /// meaning all copies will be shallow copies.
193 class Config
194 {
195 public:
196   enum Type {
197     Uninitialized, ///< Accessing a Config of this type is always an error.
198     BadLookupType, ///< We are the result of a key-lookup in a Object with no hit. We are in effect write-only.
199     Null,
200     Bool,
201     Int,
202     Float,
203     String,
204     Array,
205     Object
206   };
207 
208   using ObjectEntry = Config_Entry<Config>;
209 
210   using ConfigArrayImpl  = std::vector<Config>;
211   using ConfigObjectImpl = std::map<std::string, ObjectEntry>;
212   struct ConfigArray {
213 #if !CONFIGURU_VALUE_SEMANTICS
214     std::atomic<unsigned> _ref_count{1};
215 #endif
216     ConfigArrayImpl _impl;
217   };
218   struct ConfigObject;
219 
220   // ----------------------------------------
221   // Constructors:
222 
223   /// Creates an uninitialized Config.
Config()224   Config() : _type(Uninitialized) {}
Config(std::nullptr_t)225   Config(std::nullptr_t) : _type(Null) {}
Config(float f)226   Config(float f) : _type(Float) { _u.f = f; }
Config(double f)227   Config(double f) : _type(Float) { _u.f = f; }
Config(bool b)228   Config(bool b) : _type(Bool) { _u.b = b; }
Config(int i)229   Config(int i) : _type(Int) { _u.i = i; }
Config(unsigned int i)230   Config(unsigned int i) : _type(Int) { _u.i = i; }
Config(long i)231   Config(long i) : _type(Int) { _u.i = i; }
Config(unsigned long i)232   Config(unsigned long i) : Config(static_cast<unsigned long long>(i)) {}
Config(long long i)233   Config(long long i) : _type(Int) { _u.i = i; }
Config(unsigned long long i)234   Config(unsigned long long i) : _type(Int)
235   {
236     if ((i & 0x8000000000000000ull) != 0) {
237       CONFIGURU_ONERROR("Integer too large to fit into 63 bits");
238     }
239     _u.i = static_cast<int64_t>(i);
240   }
241   Config(const char *str);
242   Config(std::string str);
243 
244   /** This constructor is a short-form for Config::object(...).
245       We have no short-form for Config::array(...),
246       as that is less common and can lead to ambiguities.
247       Usage:
248 
249       ```
250                   Config cfg {
251                           { "key",          "value" },
252                           { "empty_array",  Config::array() },
253                           { "array",        Config::array({1, 2, 3}) },
254                           { "empty_object", Config::object() },
255                           { "object",       Config::object({
256                                   { "nested_key", "nested_value" },
257                           })},
258                           { "another_object", {
259                                   { "nested_key", "nested_value" },
260                           }},
261                   };
262       ```
263   */
264   Config(std::initializer_list<std::pair<std::string, Config>> values);
265 
266   /// Array constructor
Config(const std::vector<T> & values)267   template <typename T> Config(const std::vector<T> &values) : _type(Uninitialized)
268   {
269     make_array();
270     _u.array->_impl.reserve(values.size());
271     for (const auto &v : values) {
272       push_back(v);
273     }
274   }
275 
276   /// Array constructor
Config(const std::vector<bool> & values)277   Config(const std::vector<bool> &values) : _type(Uninitialized)
278   {
279     make_array();
280     _u.array->_impl.reserve(values.size());
281     for (const auto v : values) {
282       push_back(!!v);
283     }
284   }
285 
286   /// Object constructor
Config(const std::map<std::string,T> & values)287   template <typename T> Config(const std::map<std::string, T> &values) : _type(Uninitialized)
288   {
289     make_object();
290     for (const auto &p : values) {
291       (*this)[p.first] = p.second;
292     }
293   }
294 
295   /// Used by the parser - no need to use directly.
296   void make_object();
297 
298   /// Used by the parser - no need to use directly.
299   void make_array();
300 
301   /// Used by the parser - no need to use directly.
302   void tag(const DocInfo_SP &doc, Index line, Index column);
303 
304   /// Preferred way to create an empty object.
305   static Config object();
306 
307   /// Preferred way to create an object.
308   static Config object(std::initializer_list<std::pair<std::string, Config>> values);
309 
310   /// Preferred way to create an empty array.
311   static Config array();
312 
313   /// Preferred way to create an array.
314   static Config array(std::initializer_list<Config> values);
315 
316   /// Preferred way to create an array from an STL container.
317   template <typename Container>
318   static Config
array(const Container & container)319   array(const Container &container)
320   {
321     Config ret;
322     ret.make_array();
323     auto &impl = ret._u.array->_impl;
324     impl.reserve(container.size());
325     for (auto &&v : container) {
326       impl.emplace_back(v);
327     }
328     return ret;
329   }
330 
331   // ----------------------------------------
332 
333   ~Config();
334 
335   Config(const Config &o);
336   Config(Config &&o) noexcept;
337   Config &operator=(const Config &o);
338 
339   /// Will still remember file/line when assigned an object which has no file/line
340   Config &operator=(Config &&o) noexcept;
341 
342   /// Swaps file/line too.
343   void swap(Config &o) noexcept;
344 
345 #ifdef CONFIG_EXTENSION
346   CONFIG_EXTENSION
347 #endif
348 
349   // ----------------------------------------
350   // Inspectors:
351 
352   Type
type() const353   type() const
354   {
355     return _type;
356   }
357 
358   bool
is_uninitialized() const359   is_uninitialized() const
360   {
361     return _type == Uninitialized;
362   }
363   bool
is_null() const364   is_null() const
365   {
366     return _type == Null;
367   }
368   bool
is_bool() const369   is_bool() const
370   {
371     return _type == Bool;
372   }
373   bool
is_int() const374   is_int() const
375   {
376     return _type == Int;
377   }
378   bool
is_float() const379   is_float() const
380   {
381     return _type == Float;
382   }
383   bool
is_string() const384   is_string() const
385   {
386     return _type == String;
387   }
388   bool
is_object() const389   is_object() const
390   {
391     return _type == Object;
392   }
393   bool
is_array() const394   is_array() const
395   {
396     return _type == Array;
397   }
398   bool
is_number() const399   is_number() const
400   {
401     return is_int() || is_float();
402   }
403 
404   /// Returns file:line iff available.
405   std::string where() const;
406 
407   /// BAD_INDEX if not set.
408   Index
line() const409   line() const
410   {
411     return _line;
412   }
413 
414   /// Handle to document.
415   const DocInfo_SP &
doc() const416   doc() const
417   {
418     return _doc;
419   }
420   void
set_doc(const DocInfo_SP & doc)421   set_doc(const DocInfo_SP &doc)
422   {
423     _doc = doc;
424   }
425 
426 // ----------------------------------------
427 // Convertors:
428 
429 #if CONFIGURU_IMPLICIT_CONVERSIONS
430   /// Explicit casting, for overloads of as<T>
431   template <typename T> explicit operator T() const { return as<T>(*this); }
432   inline operator bool() const { return as_bool(); }
433   inline operator signed char() const { return as_integer<signed char>(); }
434   inline operator unsigned char() const { return as_integer<unsigned char>(); }
435   inline operator signed short() const { return as_integer<signed short>(); }
436   inline operator unsigned short() const { return as_integer<unsigned short>(); }
437   inline operator signed int() const { return as_integer<signed int>(); }
438   inline operator unsigned int() const { return as_integer<unsigned int>(); }
439   inline operator signed long() const { return as_integer<signed long>(); }
440   inline operator unsigned long() const { return as_integer<unsigned long>(); }
441   inline operator signed long long() const { return as_integer<signed long long>(); }
442   inline operator unsigned long long() const { return as_integer<unsigned long long>(); }
443   inline operator float() const { return as_float(); }
444   inline operator double() const { return as_double(); }
445   inline operator std::string() const { return as_string(); }
446   inline operator Config::ConfigArrayImpl() const { return as_array(); }
447   /// Convenience conversion to std::vector
448   template <typename T> operator std::vector<T>() const
449   {
450     const auto &array = as_array();
451     std::vector<T> ret;
452     ret.reserve(array.size());
453     for (auto &&config : array) {
454       ret.push_back((T)config);
455     }
456     return ret;
457   }
458 
459   /// Convenience conversion to std::array
460   template <typename T, size_t N> operator std::array<T, N>() const
461   {
462     const auto &array = as_array();
463     check(array.size() == N, "Array size mismatch.");
464     std::array<T, N> ret;
465     std::copy(array.begin(), array.end(), ret.begin());
466     return ret;
467   }
468 
469   /// Convenience conversion of an array of length 2 to an std::pair.
470   /// TODO: generalize for tuples.
471   template <typename Left, typename Right> operator std::pair<Left, Right>() const
472   {
473     const auto &array = as_array();
474     check(array.size() == 2u, "Mismatched array length.");
475     return {(Left)array[0], (Right)array[1]};
476   }
477 #else
478   /// Explicit casting, since C++ handles implicit casts real badly.
479   template <typename T> explicit operator T() const { return as<T>(*this); }
480   /// Convenience conversion to std::vector
481   template <typename T> explicit operator std::vector<T>() const
482   {
483     const auto &array = as_array();
484     std::vector<T> ret;
485     ret.reserve(array.size());
486     for (auto &&config : array) {
487       ret.push_back(static_cast<T>(config));
488     }
489     return ret;
490   }
491 
492   /// Convenience conversion to std::array
493   template <typename T, size_t N> explicit operator std::array<T, N>() const
494   {
495     const auto &array = as_array();
496     check(array.size() == N, "Array size mismatch.");
497     std::array<T, N> ret;
498     for (size_t i = 0; i < N; ++i) {
499       ret[i] = static_cast<T>(array[i]);
500     }
501     return ret;
502   }
503 
504   /// Convenience conversion of an array of length 2 to an std::pair.
505   /// TODO: generalize for tuples.
506   template <typename Left, typename Right> explicit operator std::pair<Left, Right>() const
507   {
508     const auto &array = as_array();
509     check(array.size() == 2u, "Mismatched array length.");
510     return {static_cast<Left>(array[0]), static_cast<Right>(array[1])};
511   }
512 #endif
513 
514   const std::string &
as_string() const515   as_string() const
516   {
517     assert_type(String);
518     return *_u.str;
519   }
520   const char *
c_str() const521   c_str() const
522   {
523     assert_type(String);
524     return _u.str->c_str();
525   }
526 
527   /// The Config must be a boolean.
528   bool
as_bool() const529   as_bool() const
530   {
531     assert_type(Bool);
532     return _u.b;
533   }
534 
535   template <typename IntT>
536   IntT
as_integer() const537   as_integer() const
538   {
539     static_assert(std::is_integral<IntT>::value, "Not an integer.");
540     assert_type(Int);
541     check(static_cast<int64_t>(static_cast<IntT>(_u.i)) == _u.i, "Integer out of range");
542     return static_cast<IntT>(_u.i);
543   }
544 
545   float
as_float() const546   as_float() const
547   {
548     if (_type == Int) {
549       return _u.i;
550     } else {
551       assert_type(Float);
552       return static_cast<float>(_u.f);
553     }
554   }
555 
556   double
as_double() const557   as_double() const
558   {
559     if (_type == Int) {
560       return _u.i;
561     } else {
562       assert_type(Float);
563       return _u.f;
564     }
565   }
566 
567   /// Extract the value of this Config.
568   template <typename T> T get() const;
569 
570   /// Returns the value or `default_value` if this is the result of a bad lookup.
571   template <typename T>
572   T
get_or(const T & default_value) const573   get_or(const T &default_value) const
574   {
575     if (_type == BadLookupType) {
576       return default_value;
577     } else {
578       return static_cast<T>(*this);
579     }
580   }
581 
582   // ----------------------------------------
583   // Array:
584 
585   /// Length of an array
586   size_t
array_size() const587   array_size() const
588   {
589     return as_array().size();
590   }
591 
592   /// Only use this for iterating over an array: `for (Config& e : cfg.as_array()) { ... }`
593   ConfigArrayImpl &
as_array()594   as_array()
595   {
596     assert_type(Array);
597     return _u.array->_impl;
598   }
599 
600   /// Only use this for iterating over an array: `for (Config& e : cfg.as_array()) { ... }`
601   const ConfigArrayImpl &
as_array() const602   as_array() const
603   {
604     assert_type(Array);
605     return _u.array->_impl;
606   }
607 
608   /// Array indexing
operator [](size_t ix)609   Config &operator[](size_t ix)
610   {
611     auto &&array = as_array();
612     check(ix < array.size(), "Array index out of range");
613     return array[ix];
614   }
615 
616   /// Array indexing
operator [](size_t ix) const617   const Config &operator[](size_t ix) const
618   {
619     auto &&array = as_array();
620     check(ix < array.size(), "Array index out of range");
621     return array[ix];
622   }
623 
624   /// Append a value to this array.
625   void
push_back(Config value)626   push_back(Config value)
627   {
628     as_array().push_back(std::move(value));
629   }
630 
631   // ----------------------------------------
632   // Object:
633 
634   /// Number of elementsi n this object
635   size_t object_size() const;
636 
637   /// Only use this for iterating over an object:
638   /// `for (auto& p : cfg.as_object()) { p.value() = p.key(); }`
639   ConfigObject &
as_object()640   as_object()
641   {
642     assert_type(Object);
643     return *_u.object;
644   }
645 
646   /// Only use this for iterating over an object:
647   /// `for (const auto& p : cfg.as_object()) { cout << p.key() << ": " << p.value(); }`
648   const ConfigObject &
as_object() const649   as_object() const
650   {
651     assert_type(Object);
652     return *_u.object;
653   }
654 
655   /// Look up a value in an Object. Returns a BadLookupType Config if the key does not exist.
656   const Config &operator[](const std::string &key) const;
657 
658   /// Prefer `obj.insert_or_assign(key, value);` to `obj[key] = value;` when inserting and performance is important!
659   Config &operator[](const std::string &key);
660 
661   /// For indexing with string literals:
operator [](const char (& key)[N])662   template <std::size_t N> Config &operator[](const char (&key)[N]) { return operator[](std::string(key)); }
operator [](const char (& key)[N]) const663   template <std::size_t N> const Config &operator[](const char (&key)[N]) const { return operator[](std::string(key)); }
664   /// Check if an object has a specific key.
665   bool has_key(const std::string &key) const;
666 
667   /// Like has_key, but STL compatible.
668   size_t
count(const std::string & key) const669   count(const std::string &key) const
670   {
671     return has_key(key) ? 1 : 0;
672   }
673 
674   /// Returns true iff the value was inserted, false if they key was already there.
675   bool emplace(std::string key, Config value);
676 
677   /// Like `foo[key] = value`, but faster.
678   void insert_or_assign(const std::string &key, Config &&value);
679 
680   /// Erase a key from an object.
681   bool erase(const std::string &key);
682 
683   /// Get the given value in this object.
684   template <typename T>
685   T
get(const std::string & key) const686   get(const std::string &key) const
687   {
688     return as<T>((*this)[key]);
689   }
690 
691   /// Look for the given key in this object, and return default_value on failure.
692   template <typename T> T get_or(const std::string &key, const T &default_value) const;
693 
694   /// Look for the given key in this object, and return default_value on failure.
695   std::string
get_or(const std::string & key,const char * default_value) const696   get_or(const std::string &key, const char *default_value) const
697   {
698     return get_or<std::string>(key, default_value);
699   }
700 
701   /// obj.get_or({"a", "b". "c"}, 42) - like obj["a"]["b"]["c"], but returns 42 if any of the keys are *missing*.
702   template <typename T> T get_or(std::initializer_list<std::string> keys, const T &default_value) const;
703 
704   /// obj.get_or({"a", "b". "c"}, 42) - like obj["a"]["b"]["c"], but returns 42 if any of the keys are *missing*.
705   std::string
get_or(std::initializer_list<std::string> keys,const char * default_value) const706   get_or(std::initializer_list<std::string> keys, const char *default_value) const
707   {
708     return get_or<std::string>(keys, default_value);
709   }
710 
711   // --------------------------------------------------------------------------------
712 
713   /// Compare Config values recursively.
714   static bool deep_eq(const Config &a, const Config &b);
715 
716 #if !CONFIGURU_VALUE_SEMANTICS // No need for a deep_clone method when all copies are deep clones.
717   /// Copy this Config value recursively.
718   Config deep_clone() const;
719 #endif
720 
721   // ----------------------------------------
722 
723   /// Visit dangling (unaccessed) object keys recursively.
724   void visit_dangling(const std::function<void(const std::string &key, const Config &value)> &visitor) const;
725 
726   /// Will check for dangling (unaccessed) object keys recursively and call CONFIGURU_ON_DANGLING on all found.
727   void check_dangling() const;
728 
729   /// Set the 'access' flag recursively,
730   void mark_accessed(bool v) const;
731 
732   // ----------------------------------------
733 
734   /// Was there any comments about this value in the input?
735   bool
has_comments() const736   has_comments() const
737   {
738     return _comments && !_comments->empty();
739   }
740 
741   /// Read/write of comments.
742   ConfigComments &
comments()743   comments()
744   {
745     if (!_comments) {
746       _comments.reset(new ConfigComments());
747     }
748     return *_comments;
749   }
750 
751   /// Read comments.
752   const ConfigComments &
comments() const753   comments() const
754   {
755     static const ConfigComments s_empty{};
756     if (_comments) {
757       return *_comments;
758     } else {
759       return s_empty;
760     }
761   }
762 
763   /// Returns either "true", "false", the constained string, or the type name.
764   const char *debug_descr() const;
765 
766   /// Human-readable version of the type ("integer", "bool", etc).
767   static const char *type_str(Type t);
768 
769   // ----------------------------------------
770   // Helper functions for checking the type is what we expect:
771 
772   inline void
check(bool b,const char * msg) const773   check(bool b, const char *msg) const
774   {
775     if (!b) {
776       on_error(msg);
777     }
778   }
779 
780   void assert_type(Type t) const;
781 
782   void on_error(const std::string &msg) const CONFIGURU_NORETURN;
783 
784 private:
785   void free();
786 
787   using ConfigComments_UP = std::unique_ptr<ConfigComments>;
788 
789   union {
790     bool b;
791     int64_t i;
792     double f;
793     const std::string *str;
794     ConfigObject *object;
795     ConfigArray *array;
796     BadLookupInfo *bad_lookup;
797   } _u;
798 
799   DocInfo_SP _doc; // So we can name the file
800   ConfigComments_UP _comments;
801   Index _line = BAD_INDEX; // Where in the source, or BAD_INDEX. Lines are 1-indexed.
802   Type _type  = Uninitialized;
803 };
804 
805 // ------------------------------------------------------------------------
806 
807 struct Config::ConfigObject {
808 #if !CONFIGURU_VALUE_SEMANTICS
809   std::atomic<unsigned> _ref_count{1};
810 #endif
811   ConfigObjectImpl _impl;
812 
813   class iterator
814   {
815   public:
816     iterator() = default;
iterator(ConfigObjectImpl::iterator it)817     explicit iterator(ConfigObjectImpl::iterator it) : _it(std::move(it)) {}
operator *() const818     const iterator &operator*() const
819     {
820       _it->second._accessed = true;
821       return *this;
822     }
823 
operator ++()824     iterator &operator++()
825     {
826       ++_it;
827       return *this;
828     }
829 
830     friend bool
operator ==(const iterator & a,const iterator & b)831     operator==(const iterator &a, const iterator &b)
832     {
833       return a._it == b._it;
834     }
835 
836     friend bool
operator !=(const iterator & a,const iterator & b)837     operator!=(const iterator &a, const iterator &b)
838     {
839       return a._it != b._it;
840     }
841 
842     const std::string &
key() const843     key() const
844     {
845       return _it->first;
846     }
847     Config &
value() const848     value() const
849     {
850       return _it->second._value;
851     }
852 
853   private:
854     ConfigObjectImpl::iterator _it;
855   };
856 
857   class const_iterator
858   {
859   public:
860     const_iterator() = default;
const_iterator(ConfigObjectImpl::const_iterator it)861     explicit const_iterator(ConfigObjectImpl::const_iterator it) : _it(std::move(it)) {}
operator *() const862     const const_iterator &operator*() const
863     {
864       _it->second._accessed = true;
865       return *this;
866     }
867 
operator ++()868     const_iterator &operator++()
869     {
870       ++_it;
871       return *this;
872     }
873 
874     friend bool
operator ==(const const_iterator & a,const const_iterator & b)875     operator==(const const_iterator &a, const const_iterator &b)
876     {
877       return a._it == b._it;
878     }
879 
880     friend bool
operator !=(const const_iterator & a,const const_iterator & b)881     operator!=(const const_iterator &a, const const_iterator &b)
882     {
883       return a._it != b._it;
884     }
885 
886     const std::string &
key() const887     key() const
888     {
889       return _it->first;
890     }
891     const Config &
value() const892     value() const
893     {
894       return _it->second._value;
895     }
896 
897   private:
898     ConfigObjectImpl::const_iterator _it;
899   };
900 
901   iterator
beginconfiguru::Config::ConfigObject902   begin()
903   {
904     return iterator{_impl.begin()};
905   }
906   iterator
endconfiguru::Config::ConfigObject907   end()
908   {
909     return iterator{_impl.end()};
910   }
911   const_iterator
beginconfiguru::Config::ConfigObject912   begin() const
913   {
914     return const_iterator{_impl.cbegin()};
915   }
916   const_iterator
endconfiguru::Config::ConfigObject917   end() const
918   {
919     return const_iterator{_impl.cend()};
920   }
921   const_iterator
cbeginconfiguru::Config::ConfigObject922   cbegin() const
923   {
924     return const_iterator{_impl.cbegin()};
925   }
926   const_iterator
cendconfiguru::Config::ConfigObject927   cend() const
928   {
929     return const_iterator{_impl.cend()};
930   }
931 };
932 
933 // ------------------------------------------------------------------------
934 
935 inline bool
operator ==(const Config & a,const Config & b)936 operator==(const Config &a, const Config &b)
937 {
938   return Config::deep_eq(a, b);
939 }
940 
941 inline bool
operator !=(const Config & a,const Config & b)942 operator!=(const Config &a, const Config &b)
943 {
944   return !Config::deep_eq(a, b);
945 }
946 
947 // ------------------------------------------------------------------------
948 
949 template <>
950 inline bool
get() const951 Config::get() const
952 {
953   return as_bool();
954 }
955 template <>
956 inline signed char
get() const957 Config::get() const
958 {
959   return as_integer<signed char>();
960 }
961 template <>
962 inline unsigned char
get() const963 Config::get() const
964 {
965   return as_integer<unsigned char>();
966 }
967 template <>
968 inline signed short
get() const969 Config::get() const
970 {
971   return as_integer<signed short>();
972 }
973 template <>
974 inline unsigned short
get() const975 Config::get() const
976 {
977   return as_integer<unsigned short>();
978 }
979 template <>
980 inline signed int
get() const981 Config::get() const
982 {
983   return as_integer<signed int>();
984 }
985 template <>
986 inline unsigned int
get() const987 Config::get() const
988 {
989   return as_integer<unsigned int>();
990 }
991 template <>
992 inline signed long
get() const993 Config::get() const
994 {
995   return as_integer<signed long>();
996 }
997 template <>
998 inline unsigned long
get() const999 Config::get() const
1000 {
1001   return as_integer<unsigned long>();
1002 }
1003 template <>
1004 inline signed long long
get() const1005 Config::get() const
1006 {
1007   return as_integer<signed long long>();
1008 }
1009 template <>
1010 inline unsigned long long
get() const1011 Config::get() const
1012 {
1013   return as_integer<unsigned long long>();
1014 }
1015 template <>
1016 inline float
get() const1017 Config::get() const
1018 {
1019   return as_float();
1020 }
1021 template <>
1022 inline double
get() const1023 Config::get() const
1024 {
1025   return as_double();
1026 }
1027 template <>
1028 inline const std::string &
get() const1029 Config::get() const
1030 {
1031   return as_string();
1032 }
1033 template <>
1034 inline std::string
get() const1035 Config::get() const
1036 {
1037   return as_string();
1038 }
1039 template <>
1040 inline const Config::ConfigArrayImpl &
get() const1041 Config::get() const
1042 {
1043   return as_array();
1044 }
1045 // template<> inline std::vector<std::string>     Config::get() const { return as_vector<T>();   }
1046 
1047 // ------------------------------------------------------------------------
1048 
1049 template <typename T>
1050 inline T
as(const configuru::Config & config)1051 as(const configuru::Config &config)
1052 {
1053   return config.get<T>();
1054 }
1055 
1056 template <typename T>
1057 T
get_or(const std::string & key,const T & default_value) const1058 Config::get_or(const std::string &key, const T &default_value) const
1059 {
1060   auto &&object = as_object()._impl;
1061   auto it       = object.find(key);
1062   if (it == object.end()) {
1063     return default_value;
1064   } else {
1065     const auto &entry = it->second;
1066     entry._accessed   = true;
1067     return as<T>(entry._value);
1068   }
1069 }
1070 
1071 template <typename T>
1072 T
get_or(std::initializer_list<std::string> keys,const T & default_value) const1073 Config::get_or(std::initializer_list<std::string> keys, const T &default_value) const
1074 {
1075   const Config *obj = this;
1076   for (const auto &key : keys) {
1077     if (obj->has_key(key)) {
1078       obj = &(*obj)[key];
1079     } else {
1080       return default_value;
1081     }
1082   }
1083   return as<T>(*obj);
1084 }
1085 
1086 // ------------------------------------------------------------------------
1087 
1088 /// Prints in JSON but in a fail-safe manner, allowing uninitialized keys and inf/nan.
1089 std::ostream &operator<<(std::ostream &os, const Config &cfg);
1090 
1091 // ------------------------------------------------------------------------
1092 
1093 /// Recursively visit all values in a config.
1094 template <class Config, class Visitor>
1095 void
visit_configs(Config && config,Visitor && visitor)1096 visit_configs(Config &&config, Visitor &&visitor)
1097 {
1098   visitor(config);
1099   if (config.is_object()) {
1100     for (auto &&p : config.as_object()) {
1101       visit_configs(p.value(), visitor);
1102     }
1103   } else if (config.is_array()) {
1104     for (auto &&e : config.as_array()) {
1105       visit_configs(e, visitor);
1106     }
1107   }
1108 }
1109 
clear_doc(Config & root)1110 inline void clear_doc(
1111   Config &root) // TODO: shouldn't be needed. Replace with some info of whether a Config is the root of the document it is in.
1112 {
1113   visit_configs(root, [&](Config &cfg) { cfg.set_doc(nullptr); });
1114 }
1115 
1116 /*
1117 inline void replace_doc(Config& root, DocInfo_SP find, DocInfo_SP replacement)
1118 {
1119         visit_configs(root, [&](Config& config){
1120                 if (config.doc() == find) {
1121                         config.set_doc(replacement);
1122                 }
1123         });
1124 }
1125 
1126 // Will try to merge from 'src' do 'dst', replacing with 'src' on any conflict.
1127 inline void merge_replace(Config& dst, const Config& src)
1128 {
1129         if (dst.is_object() && src.is_object()) {
1130                 for (auto&& p : src.as_object()) {
1131                         merge_replace(dst[p.key()], p.value());
1132                 }
1133         } else {
1134                 dst = src;
1135         }
1136 }
1137  */
1138 
1139 // ----------------------------------------------------------
1140 
1141 /// Thrown on a syntax error.
1142 class ParseError : public std::exception
1143 {
1144 public:
ParseError(const DocInfo_SP & doc,Index line,Index column,const std::string & msg)1145   ParseError(const DocInfo_SP &doc, Index line, Index column, const std::string &msg) : _line(line), _column(column)
1146   {
1147     _what = doc->filename + ":" + std::to_string(line) + ":" + std::to_string(column);
1148     doc->append_include_info(_what);
1149     _what += ": " + msg;
1150   }
1151 
1152   /// Will name the file name, line number, column and description.
1153   const char *
what() const1154   what() const noexcept override
1155   {
1156     return _what.c_str();
1157   }
1158 
1159   Index
line() const1160   line() const noexcept
1161   {
1162     return _line;
1163   }
1164   Index
column() const1165   column() const noexcept
1166   {
1167     return _column;
1168   }
1169 
1170 private:
1171   Index _line, _column;
1172   std::string _what;
1173 };
1174 
1175 // ----------------------------------------------------------
1176 
1177 /// This struct basically contain all the way we can tweak the file format.
1178 struct FormatOptions {
1179   /// Indentation should be a single tab,
1180   /// multiple spaces or an empty string.
1181   /// An empty string means the output will be compact.
1182   std::string indentation  = "\t";
1183   bool enforce_indentation = true; ///< Must have correct indentation?
1184   bool end_with_newline    = true; ///< End each file with a newline (unless compact).
1185 
1186   // Top file:
1187   bool empty_file          = false; ///< If true, an empty file is an empty object.
1188   bool implicit_top_object = true;  ///< Ok with key-value pairs top-level?
1189   bool implicit_top_array  = true;  ///< Ok with several values top-level?
1190 
1191   // Comments:
1192   bool single_line_comments   = true; ///< Allow this?
1193   bool block_comments         = true; /* Allow this? */
1194   bool nesting_block_comments = true; ///< /* Allow /*    this? */ */
1195 
1196   // Numbers:
1197   bool inf                  = true; ///< Allow +inf, -inf
1198   bool nan                  = true; ///< Allow +NaN
1199   bool hexadecimal_integers = true; ///< Allow 0xff
1200   bool binary_integers      = true; ///< Allow 0b1010
1201   bool unary_plus           = true; ///< Allow +42
1202   bool distinct_floats      = true; ///< Print 9.0 as "9.0", not just "9". A must for round-tripping.
1203 
1204   // Arrays
1205   bool array_omit_comma     = true; ///< Allow [1 2 3]
1206   bool array_trailing_comma = true; ///< Allow [1, 2, 3,]
1207 
1208   // Objects:
1209   bool identifiers_keys         = true;  ///< { is_this_ok: true }
1210   bool object_separator_equal   = false; ///< { "is_this_ok" = true }
1211   bool allow_space_before_colon = false; ///< { "is_this_ok" : true }
1212   bool omit_colon_before_object = false; ///< { "nested_object" { } }
1213   bool object_omit_comma        = true;  ///< Allow {a:1 b:2}
1214   bool object_trailing_comma    = true;  ///< Allow {a:1, b:2,}
1215   bool object_duplicate_keys    = false; ///< Allow {"a":1, "a":2}
1216   bool object_align_values      = true;  ///< Add spaces after keys to align subsequent values.
1217 
1218   // Strings
1219   bool str_csharp_verbatim  = true; ///< Allow @"Verbatim\strings"
1220   bool str_python_multiline = true; ///< Allow """ Python\nverbatim strings """
1221   bool str_32bit_unicode    = true; ///< Allow "\U0030dbfd"
1222   bool str_allow_tab        = true; ///< Allow unescaped tab in string.
1223 
1224   // Special
1225   bool allow_macro = true; ///< Allow `#include "some_other_file.cfg"`
1226 
1227   // When writing:
1228   bool write_comments = true;
1229 
1230   /// Sort keys lexicographically. If false, sort by order they where added.
1231   bool sort_keys = false;
1232 
1233   /// When printing, write uninitialized values as UNINITIALIZED. Useful for debugging.
1234   bool write_uninitialized = false;
1235 
1236   /// Dumping should mark the json as accessed?
1237   bool mark_accessed = true;
1238 
1239   bool
compactconfiguru::FormatOptions1240   compact() const
1241   {
1242     return indentation.empty();
1243   }
1244 };
1245 
1246 /// Returns FormatOptions that are describe a JSON file format.
1247 inline FormatOptions
make_json_options()1248 make_json_options()
1249 {
1250   FormatOptions options;
1251 
1252   options.indentation         = "\t";
1253   options.enforce_indentation = false;
1254 
1255   // Top file:
1256   options.empty_file          = false;
1257   options.implicit_top_object = false;
1258   options.implicit_top_array  = false;
1259 
1260   // Comments:
1261   options.single_line_comments   = false;
1262   options.block_comments         = false;
1263   options.nesting_block_comments = false;
1264 
1265   // Numbers:
1266   options.inf                  = false;
1267   options.nan                  = false;
1268   options.hexadecimal_integers = false;
1269   options.binary_integers      = false;
1270   options.unary_plus           = false;
1271   options.distinct_floats      = true;
1272 
1273   // Arrays
1274   options.array_omit_comma     = false;
1275   options.array_trailing_comma = false;
1276 
1277   // Objects:
1278   options.identifiers_keys         = false;
1279   options.object_separator_equal   = false;
1280   options.allow_space_before_colon = true;
1281   options.omit_colon_before_object = false;
1282   options.object_omit_comma        = false;
1283   options.object_trailing_comma    = false;
1284   options.object_duplicate_keys    = false; // To be 100% JSON compatile, this should be true, but it is error prone.
1285   options.object_align_values      = true;  // Looks better.
1286 
1287   // Strings
1288   options.str_csharp_verbatim  = false;
1289   options.str_python_multiline = false;
1290   options.str_32bit_unicode    = false;
1291   options.str_allow_tab        = false;
1292 
1293   // Special
1294   options.allow_macro = false;
1295 
1296   // When writing:
1297   options.write_comments = false;
1298   options.sort_keys      = false;
1299 
1300   return options;
1301 }
1302 
1303 /// Returns format options that allow us parsing most files.
1304 inline FormatOptions
make_forgiving_options()1305 make_forgiving_options()
1306 {
1307   FormatOptions options;
1308 
1309   options.indentation         = "\t";
1310   options.enforce_indentation = false;
1311 
1312   // Top file:
1313   options.empty_file          = true;
1314   options.implicit_top_object = true;
1315   options.implicit_top_array  = true;
1316 
1317   // Comments:
1318   options.single_line_comments   = true;
1319   options.block_comments         = true;
1320   options.nesting_block_comments = true;
1321 
1322   // Numbers:
1323   options.inf                  = true;
1324   options.nan                  = true;
1325   options.hexadecimal_integers = true;
1326   options.binary_integers      = true;
1327   options.unary_plus           = true;
1328   options.distinct_floats      = true;
1329 
1330   // Arrays
1331   options.array_omit_comma     = true;
1332   options.array_trailing_comma = true;
1333 
1334   // Objects:
1335   options.identifiers_keys         = true;
1336   options.object_separator_equal   = true;
1337   options.allow_space_before_colon = true;
1338   options.omit_colon_before_object = true;
1339   options.object_omit_comma        = true;
1340   options.object_trailing_comma    = true;
1341   options.object_duplicate_keys    = true;
1342 
1343   // Strings
1344   options.str_csharp_verbatim  = true;
1345   options.str_python_multiline = true;
1346   options.str_32bit_unicode    = true;
1347   options.str_allow_tab        = true;
1348 
1349   // Special
1350   options.allow_macro = true;
1351 
1352   // When writing:
1353   options.write_comments = false;
1354   options.sort_keys      = false;
1355 
1356   return options;
1357 }
1358 
1359 /// The CFG file format.
1360 static const FormatOptions CFG = FormatOptions();
1361 
1362 /// The JSON file format.
1363 static const FormatOptions JSON = make_json_options();
1364 
1365 /// A very forgiving file format, when parsing stuff that is not strict.
1366 static const FormatOptions FORGIVING = make_forgiving_options();
1367 
1368 struct ParseInfo {
1369   std::map<std::string, Config> parsed_files; // Two #include gives same Config tree.
1370 };
1371 
1372 /// The parser may throw ParseError.
1373 /// `str` should be a zero-ended Utf-8 encoded string of characters.
1374 /// The `name` should be something akin to a filename. It is only for error reporting.
1375 Config parse_string(const char *str, const FormatOptions &options, const char *name);
1376 Config parse_file(const std::string &path, const FormatOptions &options);
1377 
1378 /// Advanced usage:
1379 Config parse_string(const char *str, const FormatOptions &options, DocInfo _doc, ParseInfo &info);
1380 Config parse_file(const std::string &path, const FormatOptions &options, DocInfo_SP doc, ParseInfo &info);
1381 
1382 // ----------------------------------------------------------
1383 /// Writes the config as a string in the given format.
1384 /// May call CONFIGURU_ONERROR if the given config is invalid. This can happen if
1385 /// a Config is unitialized (and options write_uninitialized is not set) or
1386 /// a Config contains inf/nan (and options.inf/options.nan aren't set).
1387 std::string dump_string(const Config &config, const FormatOptions &options);
1388 
1389 /// Writes the config to a file. Like dump_string, but can may also call CONFIGURU_ONERROR
1390 /// if it fails to write to the given path.
1391 void dump_file(const std::string &path, const Config &config, const FormatOptions &options);
1392 
1393 // ----------------------------------------------------------
1394 // Automatic (de)serialize of most things.
1395 // Include <visit_struct/visit_struct.hpp> (from https://github.com/cbeck88/visit_struct)
1396 // before including <configuru.hpp> to get this feature.
1397 
1398 #ifdef VISITABLE_STRUCT
1399 template <typename Container> struct is_container : std::false_type {
1400 };
1401 
1402 // template <typename... Ts> struct is_container<std::list<Ts...> > : std::true_type { };
1403 template <typename... Ts> struct is_container<std::vector<Ts...>> : std::true_type {
1404 };
1405 
1406 // ----------------------------------------------------------------------------
1407 
1408 Config serialize(const std::string &some_string);
1409 
1410 template <typename T> typename std::enable_if<std::is_arithmetic<T>::value, Config>::type serialize(const T &some_value);
1411 
1412 template <typename T, size_t N> Config serialize(T (&some_array)[N]);
1413 
1414 template <typename T> typename std::enable_if<is_container<T>::value, Config>::type serialize(const T &some_container);
1415 
1416 template <typename T>
1417 typename std::enable_if<visit_struct::traits::is_visitable<T>::value, Config>::type serialize(const T &some_struct);
1418 
1419 // ----------------------------------------------------------------------------
1420 
1421 inline Config
serialize(const std::string & some_string)1422 serialize(const std::string &some_string)
1423 {
1424   return Config(some_string);
1425 }
1426 
1427 template <typename T>
1428 typename std::enable_if<std::is_arithmetic<T>::value, Config>::type
serialize(const T & some_value)1429 serialize(const T &some_value)
1430 {
1431   return Config(some_value);
1432 }
1433 
serialize(T (& some_array)[N])1434 template <typename T, size_t N> Config serialize(T (&some_array)[N])
1435 {
1436   auto config = Config::array();
1437   for (size_t i = 0; i < N; ++i) {
1438     config.push_back(serialize(some_array[i]));
1439   }
1440   return config;
1441 }
1442 
1443 template <typename T>
1444 typename std::enable_if<is_container<T>::value, Config>::type
serialize(const T & some_container)1445 serialize(const T &some_container)
1446 {
1447   auto config = Config::array();
1448   for (const auto &value : some_container) {
1449     config.push_back(serialize(value));
1450   }
1451   return config;
1452 }
1453 
1454 template <typename T>
1455 typename std::enable_if<visit_struct::traits::is_visitable<T>::value, Config>::type
serialize(const T & some_struct)1456 serialize(const T &some_struct)
1457 {
1458   auto config = Config::object();
1459   visit_struct::apply_visitor([&config](const std::string &name, const auto &value) { config[name] = serialize(value); },
1460                               some_struct);
1461   return config;
1462 }
1463 
1464 // ----------------------------------------------------------------------------
1465 
1466 /// Called when there is a problem in deserialize.
1467 using ConversionError = std::function<void(std::string)>;
1468 
1469 void deserialize(std::string *some_string, const Config &config, const ConversionError &on_error);
1470 
1471 template <typename T>
1472 typename std::enable_if<std::is_arithmetic<T>::value>::type deserialize(T *some_value, const Config &config,
1473                                                                         const ConversionError &on_error);
1474 
1475 template <typename T, size_t N>
1476 typename std::enable_if<std::is_arithmetic<T>::value>::type deserialize(T (*some_array)[N], const Config &config,
1477                                                                         const ConversionError &on_error);
1478 
1479 template <typename T>
1480 typename std::enable_if<is_container<T>::value>::type deserialize(T *some_container, const Config &config,
1481                                                                   const ConversionError &on_error);
1482 
1483 template <typename T>
1484 typename std::enable_if<visit_struct::traits::is_visitable<T>::value>::type deserialize(T *some_struct, const Config &config,
1485                                                                                         const ConversionError &on_error);
1486 
1487 // ----------------------------------------------------------------------------
1488 
1489 inline void
deserialize(std::string * some_string,const Config & config,const ConversionError & on_error)1490 deserialize(std::string *some_string, const Config &config, const ConversionError &on_error)
1491 {
1492   *some_string = config.as_string();
1493 }
1494 
1495 template <typename T>
1496 typename std::enable_if<std::is_arithmetic<T>::value>::type
deserialize(T * some_value,const Config & config,const ConversionError & on_error)1497 deserialize(T *some_value, const Config &config, const ConversionError &on_error)
1498 {
1499   *some_value = as<T>(config);
1500 }
1501 
1502 template <typename T, size_t N>
1503 typename std::enable_if<std::is_arithmetic<T>::value>::type
deserialize(T (* some_array)[N],const Config & config,const ConversionError & on_error)1504 deserialize(T (*some_array)[N], const Config &config, const ConversionError &on_error)
1505 {
1506   if (config.array_size() != N) {
1507     if (on_error) {
1508       on_error(config.where() + "Expected array to be " + std::to_string(N) + " long.");
1509     }
1510   } else {
1511     for (size_t i = 0; i < N; ++i) {
1512       deserialize(&(*some_array)[i], config[i], on_error);
1513     }
1514   }
1515 }
1516 
1517 template <typename T>
1518 typename std::enable_if<is_container<T>::value>::type
deserialize(T * some_container,const Config & config,const ConversionError & on_error)1519 deserialize(T *some_container, const Config &config, const ConversionError &on_error)
1520 {
1521   if (!config.is_array()) {
1522     if (on_error) {
1523       on_error(config.where() + "Failed to deserialize container: config is not an array.");
1524     }
1525   } else {
1526     some_container->clear();
1527     some_container->reserve(config.array_size());
1528     for (const auto &value : config.as_array()) {
1529       some_container->push_back({});
1530       deserialize(&some_container->back(), value, on_error);
1531     }
1532   }
1533 }
1534 
1535 template <typename T>
1536 typename std::enable_if<visit_struct::traits::is_visitable<T>::value>::type
deserialize(T * some_struct,const Config & config,const ConversionError & on_error)1537 deserialize(T *some_struct, const Config &config, const ConversionError &on_error)
1538 {
1539   if (!config.is_object()) {
1540     if (on_error) {
1541       on_error(config.where() + "Failed to deserialize object: config is not an object.");
1542     }
1543   } else {
1544     visit_struct::apply_visitor(
1545       [&config, &on_error](const std::string &name, auto &value) {
1546         if (config.has_key(name)) {
1547           deserialize(&value, config[name], on_error);
1548         }
1549       },
1550       *some_struct);
1551   }
1552 }
1553 #endif // VISITABLE_STRUCT
1554 
1555 } // namespace configuru
1556 
1557 // ----------------------------------------------------------------------------
1558 // 88 8b    d8 88""Yb 88     888888 8b    d8 888888 88b 88 888888    db    888888 88  dP"Yb  88b 88
1559 // 88 88b  d88 88__dP 88     88__   88b  d88 88__   88Yb88   88     dPYb     88   88 dP   Yb 88Yb88
1560 // 88 88YbdP88 88"""  88  .o 88""   88YbdP88 88""   88 Y88   88    dP__Yb    88   88 Yb   dP 88 Y88
1561 // 88 88 YY 88 88     88ood8 888888 88 YY 88 888888 88  Y8   88   dP""""Yb   88   88  YbodP  88  Y8
1562 
1563 /* In one of your .cpp files you need to do the following:
1564 #define CONFIGURU_IMPLEMENTATION
1565 #include <configuru.hpp>
1566 
1567 This will define all the Configuru functions so that the linker may find them.
1568 */
1569 
1570 #include <algorithm>
1571 #include <limits>
1572 #include <ostream>
1573 
1574 // ----------------------------------------------------------------------------
1575 namespace configuru
1576 {
1577 void
append_include_info(std::string & ret,const std::string & indent) const1578 DocInfo::append_include_info(std::string &ret, const std::string &indent) const
1579 {
1580   if (!includers.empty()) {
1581     ret += ", included at:\n";
1582     for (auto &&includer : includers) {
1583       ret += indent + includer.doc->filename + ":" + std::to_string(includer.line);
1584       includer.doc->append_include_info(ret, indent + "    ");
1585       ret += "\n";
1586     }
1587     ret.pop_back();
1588   }
1589 }
1590 
1591 struct BadLookupInfo {
1592   const DocInfo_SP doc; // Of parent object
1593   const unsigned line;  // Of parent object
1594   const std::string key;
1595 
1596 #if !CONFIGURU_VALUE_SEMANTICS
1597   std::atomic<unsigned> _ref_count{1};
1598 #endif
1599 
BadLookupInfoconfiguru::BadLookupInfo1600   BadLookupInfo(DocInfo_SP doc_, Index line_, std::string key_) : doc(std::move(doc_)), line(line_), key(std::move(key_)) {}
1601 };
1602 
Config(const char * str)1603 Config::Config(const char *str) : _type(String)
1604 {
1605   CONFIGURU_ASSERT(str != nullptr);
1606   _u.str = new std::string(str);
1607 }
1608 
Config(std::string str)1609 Config::Config(std::string str) : _type(String)
1610 {
1611   _u.str = new std::string(move(str));
1612 }
1613 
Config(std::initializer_list<std::pair<std::string,Config>> values)1614 Config::Config(std::initializer_list<std::pair<std::string, Config>> values) : _type(Uninitialized)
1615 {
1616   make_object();
1617   for (auto &&v : values) {
1618     (*this)[v.first] = std::move(v.second);
1619   }
1620 }
1621 
1622 void
make_object()1623 Config::make_object()
1624 {
1625   assert_type(Uninitialized);
1626   _type     = Object;
1627   _u.object = new ConfigObject();
1628 }
1629 
1630 void
make_array()1631 Config::make_array()
1632 {
1633   assert_type(Uninitialized);
1634   _type    = Array;
1635   _u.array = new ConfigArray();
1636 }
1637 
1638 Config
object()1639 Config::object()
1640 {
1641   Config ret;
1642   ret.make_object();
1643   return ret;
1644 }
1645 
1646 Config
object(std::initializer_list<std::pair<std::string,Config>> values)1647 Config::object(std::initializer_list<std::pair<std::string, Config>> values)
1648 {
1649   Config ret;
1650   ret.make_object();
1651   for (auto &&p : values) {
1652     ret[static_cast<std::string>(p.first)] = std::move(p.second);
1653   }
1654   return ret;
1655 }
1656 
1657 Config
array()1658 Config::array()
1659 {
1660   Config ret;
1661   ret.make_array();
1662   return ret;
1663 }
1664 
1665 Config
array(std::initializer_list<Config> values)1666 Config::array(std::initializer_list<Config> values)
1667 {
1668   Config ret;
1669   ret.make_array();
1670   ret._u.array->_impl.reserve(values.size());
1671   for (auto &&v : values) {
1672     ret.push_back(std::move(v));
1673   }
1674   return ret;
1675 }
1676 
1677 void
tag(const DocInfo_SP & doc,Index line,Index column)1678 Config::tag(const DocInfo_SP &doc, Index line, Index column)
1679 {
1680   _doc  = doc;
1681   _line = line;
1682   (void)column; // TODO: include this info too.
1683 }
1684 
1685 // ------------------------------------------------------------------------
1686 
Config(const Config & o)1687 Config::Config(const Config &o) : _type(Uninitialized)
1688 {
1689   *this = o;
1690 }
1691 
Config(Config && o)1692 Config::Config(Config &&o) noexcept : _type(Uninitialized)
1693 {
1694   this->swap(o);
1695 }
1696 
1697 void
swap(Config & o)1698 Config::swap(Config &o) noexcept
1699 {
1700   if (&o == this) {
1701     return;
1702   }
1703   std::swap(_type, o._type);
1704   std::swap(_u, o._u);
1705   std::swap(_doc, o._doc);
1706   std::swap(_line, o._line);
1707   std::swap(_comments, o._comments);
1708 }
1709 
1710 Config &
operator =(Config && o)1711 Config::operator=(Config &&o) noexcept
1712 {
1713   if (&o == this) {
1714     return *this;
1715   }
1716 
1717   std::swap(_type, o._type);
1718   std::swap(_u, o._u);
1719 
1720   // Remember where we come from even when assigned a new value:
1721   if (o._doc || o._line != BAD_INDEX) {
1722     std::swap(_doc, o._doc);
1723     std::swap(_line, o._line);
1724   }
1725 
1726   if (o._comments) {
1727     std::swap(_comments, o._comments);
1728   }
1729 
1730   return *this;
1731 }
1732 
1733 Config &
operator =(const Config & o)1734 Config::operator=(const Config &o)
1735 {
1736   if (&o == this) {
1737     return *this;
1738   }
1739 
1740   free();
1741 
1742   _type = o._type;
1743 
1744 #if CONFIGURU_VALUE_SEMANTICS
1745   if (_type == String) {
1746     _u.str = new std::string(*o._u.str);
1747   } else if (_type == BadLookupType) {
1748     _u.bad_lookup = new BadLookupInfo(*o._u.bad_lookup);
1749   } else if (_type == Object) {
1750     _u.object = new ConfigObject(*o._u.object);
1751   } else if (_type == Array) {
1752     _u.array = new ConfigArray(*o._u.array);
1753   } else {
1754     memcpy(&_u, &o._u, sizeof(_u));
1755   }
1756 #else  // !CONFIGURU_VALUE_SEMANTICS:
1757   if (_type == String) {
1758     _u.str = new std::string(*o._u.str);
1759   } else {
1760     memcpy(&_u, &o._u, sizeof(_u));
1761     if (_type == BadLookupType) {
1762       ++_u.bad_lookup->_ref_count;
1763     }
1764     if (_type == Array) {
1765       ++_u.array->_ref_count;
1766     }
1767     if (_type == Object) {
1768       ++_u.object->_ref_count;
1769     }
1770   }
1771 #endif // !CONFIGURU_VALUE_SEMANTICS
1772 
1773   // Remember where we come from even when assigned a new value:
1774   if (o._doc || o._line != BAD_INDEX) {
1775     _doc  = o._doc;
1776     _line = o._line;
1777   }
1778 
1779   if (o._comments) {
1780     _comments.reset(new ConfigComments(*o._comments));
1781   }
1782 
1783 #if CONFIGURU_VALUE_SEMANTICS
1784   o.mark_accessed(true);
1785 #endif
1786 
1787   return *this;
1788 }
1789 
~Config()1790 Config::~Config()
1791 {
1792   free();
1793 }
1794 
1795 void
free()1796 Config::free()
1797 {
1798 #if CONFIGURU_VALUE_SEMANTICS
1799   if (_type == BadLookupType) {
1800     delete _u.bad_lookup;
1801   } else if (_type == Object) {
1802     delete _u.object;
1803   } else if (_type == Array) {
1804     delete _u.array;
1805   } else if (_type == String) {
1806     delete _u.str;
1807   }
1808 #else  // !CONFIGURU_VALUE_SEMANTICS:
1809   if (_type == BadLookupType) {
1810     if (--_u.bad_lookup->_ref_count == 0) {
1811       delete _u.bad_lookup;
1812     }
1813   } else if (_type == Object) {
1814     if (--_u.object->_ref_count == 0) {
1815       delete _u.object;
1816     }
1817   } else if (_type == Array) {
1818     if (--_u.array->_ref_count == 0) {
1819       delete _u.array;
1820     }
1821   } else if (_type == String) {
1822     delete _u.str;
1823   }
1824 #endif // !CONFIGURU_VALUE_SEMANTICS
1825 
1826   _type = Uninitialized;
1827 
1828   // Keep _doc, _line, _comments until overwritten/destructor.
1829 }
1830 
1831 // ------------------------------------------------------------------------
1832 
1833 size_t
object_size() const1834 Config::object_size() const
1835 {
1836   return as_object()._impl.size();
1837 }
1838 
operator [](const std::string & key) const1839 const Config &Config::operator[](const std::string &key) const
1840 {
1841   auto &&object = as_object()._impl;
1842   auto it       = object.find(key);
1843   if (it == object.end()) {
1844     on_error("Key '" + key + "' not in object");
1845   } else {
1846     const auto &entry = it->second;
1847     entry._accessed   = true;
1848     return entry._value;
1849   }
1850 }
1851 
operator [](const std::string & key)1852 Config &Config::operator[](const std::string &key)
1853 {
1854   auto &&object = as_object()._impl;
1855   auto &&entry  = object[key];
1856   if (entry._nr == BAD_INDEX) {
1857     // New entry
1858     entry._nr                  = static_cast<Index>(object.size()) - 1;
1859     entry._value._type         = BadLookupType;
1860     entry._value._u.bad_lookup = new BadLookupInfo{_doc, _line, key};
1861   } else {
1862     entry._accessed = true;
1863   }
1864   return entry._value;
1865 }
1866 
1867 bool
has_key(const std::string & key) const1868 Config::has_key(const std::string &key) const
1869 {
1870   return as_object()._impl.count(key) != 0;
1871 }
1872 
1873 bool
emplace(std::string key,Config value)1874 Config::emplace(std::string key, Config value)
1875 {
1876   auto &&object = as_object()._impl;
1877   return object.emplace(std::move(key), Config::ObjectEntry{std::move(value), (unsigned)object.size()}).second;
1878 }
1879 
1880 void
insert_or_assign(const std::string & key,Config && config)1881 Config::insert_or_assign(const std::string &key, Config &&config)
1882 {
1883   auto &&object = as_object()._impl;
1884   auto &&entry  = object[key];
1885   if (entry._nr == BAD_INDEX) {
1886     // New entry
1887     entry._nr = static_cast<Index>(object.size()) - 1;
1888   } else {
1889     entry._accessed = true;
1890   }
1891   entry._value = std::move(config);
1892 }
1893 
1894 bool
erase(const std::string & key)1895 Config::erase(const std::string &key)
1896 {
1897   auto &object = as_object()._impl;
1898   auto it      = object.find(key);
1899   if (it == object.end()) {
1900     return false;
1901   } else {
1902     object.erase(it);
1903     return true;
1904   }
1905 }
1906 
1907 bool
deep_eq(const Config & a,const Config & b)1908 Config::deep_eq(const Config &a, const Config &b)
1909 {
1910   if (a._type != b._type) {
1911     return false;
1912   }
1913   if (a._type == Null) {
1914     return true;
1915   }
1916   if (a._type == Bool) {
1917     return a._u.b == b._u.b;
1918   }
1919   if (a._type == Int) {
1920     return a._u.i == b._u.i;
1921   }
1922   if (a._type == Float) {
1923     return a._u.f == b._u.f;
1924   }
1925   if (a._type == String) {
1926     return *a._u.str == *b._u.str;
1927   }
1928   if (a._type == Object) {
1929     if (a._u.object == b._u.object) {
1930       return true;
1931     }
1932     auto &&a_object = a.as_object()._impl;
1933     auto &&b_object = b.as_object()._impl;
1934     if (a_object.size() != b_object.size()) {
1935       return false;
1936     }
1937     for (auto &&p : a_object) {
1938       auto it = b_object.find(p.first);
1939       if (it == b_object.end()) {
1940         return false;
1941       }
1942       if (!deep_eq(p.second._value, it->second._value)) {
1943         return false;
1944       }
1945     }
1946     return true;
1947   }
1948   if (a._type == Array) {
1949     if (a._u.array == b._u.array) {
1950       return true;
1951     }
1952     auto &&a_array = a.as_array();
1953     auto &&b_array = b.as_array();
1954     if (a_array.size() != b_array.size()) {
1955       return false;
1956     }
1957     for (size_t i = 0; i < a_array.size(); ++i) {
1958       if (!deep_eq(a_array[i], a_array[i])) {
1959         return false;
1960       }
1961     }
1962     return true;
1963   }
1964 
1965   return false;
1966 }
1967 
1968 #if !CONFIGURU_VALUE_SEMANTICS
1969 Config
deep_clone() const1970 Config::deep_clone() const
1971 {
1972   Config ret = *this;
1973   if (ret._type == Object) {
1974     ret = Config::object();
1975     for (auto &&p : this->as_object()._impl) {
1976       auto &dst  = ret._u.object->_impl[p.first];
1977       dst._nr    = p.second._nr;
1978       dst._value = p.second._value.deep_clone();
1979     }
1980   }
1981   if (ret._type == Array) {
1982     ret = Config::array();
1983     for (auto &&value : this->as_array()) {
1984       ret.push_back(value.deep_clone());
1985     }
1986   }
1987   return ret;
1988 }
1989 #endif
1990 
1991 void
visit_dangling(const std::function<void (const std::string & key,const Config & value)> & visitor) const1992 Config::visit_dangling(const std::function<void(const std::string &key, const Config &value)> &visitor) const
1993 {
1994   if (is_object()) {
1995     for (auto &&p : as_object()._impl) {
1996       auto &&entry = p.second;
1997       auto &&value = entry._value;
1998       if (entry._accessed) {
1999         value.check_dangling();
2000       } else {
2001         visitor(p.first, value);
2002       }
2003     }
2004   } else if (is_array()) {
2005     for (auto &&e : as_array()) {
2006       e.check_dangling();
2007     }
2008   }
2009 }
2010 
2011 void
check_dangling() const2012 Config::check_dangling() const
2013 {
2014   std::string message = "";
2015 
2016   visit_dangling([&](const std::string &key, const Config &value) {
2017     message += "\n    " + value.where() + "Key '" + key + "' never accessed.";
2018   });
2019 
2020   if (!message.empty()) {
2021     message = "Dangling keys:" + message;
2022     CONFIGURU_ON_DANGLING(message);
2023   }
2024 }
2025 
2026 void
mark_accessed(bool v) const2027 Config::mark_accessed(bool v) const
2028 {
2029   if (is_object()) {
2030     for (auto &&p : as_object()._impl) {
2031       auto &&entry    = p.second;
2032       entry._accessed = v;
2033       entry._value.mark_accessed(v);
2034     }
2035   } else if (is_array()) {
2036     for (auto &&e : as_array()) {
2037       e.mark_accessed(v);
2038     }
2039   }
2040 }
2041 
2042 const char *
debug_descr() const2043 Config::debug_descr() const
2044 {
2045   switch (_type) {
2046   case Bool:
2047     return _u.b ? "true" : "false";
2048   case String:
2049     return _u.str->c_str();
2050   default:
2051     return type_str(_type);
2052   }
2053 }
2054 
2055 const char *
type_str(Type t)2056 Config::type_str(Type t)
2057 {
2058   switch (t) {
2059   case Uninitialized:
2060     return "uninitialized";
2061   case BadLookupType:
2062     return "undefined";
2063   case Null:
2064     return "null";
2065   case Bool:
2066     return "bool";
2067   case Int:
2068     return "integer";
2069   case Float:
2070     return "float";
2071   case String:
2072     return "string";
2073   case Array:
2074     return "array";
2075   case Object:
2076     return "object";
2077   }
2078   return "BROKEN Config";
2079 }
2080 
2081 std::string
where_is(const DocInfo_SP & doc,Index line)2082 where_is(const DocInfo_SP &doc, Index line)
2083 {
2084   if (doc) {
2085     std::string ret = doc->filename;
2086     if (line != BAD_INDEX) {
2087       ret += ":" + std::to_string(line);
2088     }
2089     doc->append_include_info(ret);
2090     ret += ": ";
2091     return ret;
2092   } else if (line != BAD_INDEX) {
2093     return "line " + std::to_string(line) + ": ";
2094   } else {
2095     return "";
2096   }
2097 }
2098 
2099 std::string
where() const2100 Config::where() const
2101 {
2102   return where_is(_doc, _line);
2103 }
2104 
2105 void
on_error(const std::string & msg) const2106 Config::on_error(const std::string &msg) const
2107 {
2108   CONFIGURU_ONERROR(where() + msg);
2109   abort(); // We shouldn't get here.
2110 }
2111 
2112 void
assert_type(Type exepected) const2113 Config::assert_type(Type exepected) const
2114 {
2115   if (_type == BadLookupType) {
2116     auto where = where_is(_u.bad_lookup->doc, _u.bad_lookup->line);
2117     CONFIGURU_ONERROR(where + "Failed to find key '" + _u.bad_lookup->key + "'");
2118   } else if (_type != exepected) {
2119     const auto message = where() + "Expected " + type_str(exepected) + ", got " + type_str(_type);
2120     if (_type == Uninitialized && exepected == Object) {
2121       CONFIGURU_ONERROR(message + ". Did you forget to call Config::object()?");
2122     } else if (_type == Uninitialized && exepected == Array) {
2123       CONFIGURU_ONERROR(message + ". Did you forget to call Config::array()?");
2124     } else {
2125       CONFIGURU_ONERROR(message);
2126     }
2127   }
2128 }
2129 
2130 std::ostream &
operator <<(std::ostream & os,const Config & cfg)2131 operator<<(std::ostream &os, const Config &cfg)
2132 {
2133   auto format = JSON;
2134   // Make sure that all config types are serializable:
2135   format.inf                 = true;
2136   format.nan                 = true;
2137   format.write_uninitialized = true;
2138   format.end_with_newline    = false;
2139   format.mark_accessed       = false;
2140   return os << dump_string(cfg, format);
2141 }
2142 }
2143 
2144 // ----------------------------------------------------------------------------
2145 // 88""Yb    db    88""Yb .dP"Y8 888888 88""Yb
2146 // 88__dP   dPYb   88__dP `Ybo." 88__   88__dP
2147 // 88"""   dP__Yb  88"Yb  o.`Y8b 88""   88"Yb
2148 // 88     dP""""Yb 88  Yb 8bodP' 888888 88  Yb
2149 
2150 #include <cerrno>
2151 #include <cstdlib>
2152 
2153 namespace configuru
2154 {
2155 void
append(Comments & a,Comments && b)2156 append(Comments &a, Comments &&b)
2157 {
2158   for (auto &&entry : b) {
2159     a.emplace_back(std::move(entry));
2160   }
2161 }
2162 
2163 bool
empty() const2164 ConfigComments::empty() const
2165 {
2166   return prefix.empty() && postfix.empty() && pre_end_brace.empty();
2167 }
2168 
2169 void
append(ConfigComments && other)2170 ConfigComments::append(ConfigComments &&other)
2171 {
2172   configuru::append(this->prefix, std::move(other.prefix));
2173   configuru::append(this->postfix, std::move(other.postfix));
2174   configuru::append(this->pre_end_brace, std::move(other.pre_end_brace));
2175 }
2176 
2177 // Returns the number of bytes written, or 0 on error
2178 size_t
encode_utf8(std::string & dst,uint64_t c)2179 encode_utf8(std::string &dst, uint64_t c)
2180 {
2181   if (c <= 0x7F) // 0XXX XXXX - one byte
2182   {
2183     dst += static_cast<char>(c);
2184     return 1;
2185   } else if (c <= 0x7FF) // 110X XXXX - two bytes
2186   {
2187     dst += static_cast<char>(0xC0 | (c >> 6));
2188     dst += static_cast<char>(0x80 | (c & 0x3F));
2189     return 2;
2190   } else if (c <= 0xFFFF) // 1110 XXXX - three bytes
2191   {
2192     dst += static_cast<char>(0xE0 | (c >> 12));
2193     dst += static_cast<char>(0x80 | ((c >> 6) & 0x3F));
2194     dst += static_cast<char>(0x80 | (c & 0x3F));
2195     return 3;
2196   } else if (c <= 0x1FFFFF) // 1111 0XXX - four bytes
2197   {
2198     dst += static_cast<char>(0xF0 | (c >> 18));
2199     dst += static_cast<char>(0x80 | ((c >> 12) & 0x3F));
2200     dst += static_cast<char>(0x80 | ((c >> 6) & 0x3F));
2201     dst += static_cast<char>(0x80 | (c & 0x3F));
2202     return 4;
2203   } else if (c <= 0x3FFFFFF) // 1111 10XX - five bytes
2204   {
2205     dst += static_cast<char>(0xF8 | (c >> 24));
2206     dst += static_cast<char>(0x80 | (c >> 18));
2207     dst += static_cast<char>(0x80 | ((c >> 12) & 0x3F));
2208     dst += static_cast<char>(0x80 | ((c >> 6) & 0x3F));
2209     dst += static_cast<char>(0x80 | (c & 0x3F));
2210     return 5;
2211   } else if (c <= 0x7FFFFFFF) // 1111 110X - six bytes
2212   {
2213     dst += static_cast<char>(0xFC | (c >> 30));
2214     dst += static_cast<char>(0x80 | ((c >> 24) & 0x3F));
2215     dst += static_cast<char>(0x80 | ((c >> 18) & 0x3F));
2216     dst += static_cast<char>(0x80 | ((c >> 12) & 0x3F));
2217     dst += static_cast<char>(0x80 | ((c >> 6) & 0x3F));
2218     dst += static_cast<char>(0x80 | (c & 0x3F));
2219     return 6;
2220   } else {
2221     return 0; // Error
2222   }
2223 }
2224 
2225 std::string
quote(char c)2226 quote(char c)
2227 {
2228   if (c == 0) {
2229     return "<eof>";
2230   }
2231   if (c == ' ') {
2232     return "<space>";
2233   }
2234   if (c == '\n') {
2235     return "'\\n'";
2236   }
2237   if (c == '\t') {
2238     return "'\\t'";
2239   }
2240   if (c == '\r') {
2241     return "'\\r'";
2242   }
2243   if (c == '\b') {
2244     return "'\\b'";
2245   }
2246   return std::string("'") + c + "'";
2247 }
2248 
2249 struct State {
2250   const char *ptr;
2251   unsigned line_nr;
2252   const char *line_start;
2253 };
2254 
2255 struct Parser {
2256   Parser(const char *str, const FormatOptions &options, DocInfo_SP doc, ParseInfo &info);
2257 
2258   bool skip_white(Comments *out_comments, int &out_indentation, bool break_on_newline);
2259 
2260   bool
skip_white_ignore_commentsconfiguru::Parser2261   skip_white_ignore_comments()
2262   {
2263     int indentation;
2264     return skip_white(nullptr, indentation, false);
2265   }
2266 
2267   bool
skip_pre_whiteconfiguru::Parser2268   skip_pre_white(Config *config, int &out_indentation)
2269   {
2270     if (!MAYBE_WHITE[static_cast<uint8_t>(_ptr[0])]) {
2271       // Early out
2272       out_indentation = -1;
2273       return false;
2274     }
2275 
2276     Comments comments;
2277     bool did_skip = skip_white(&comments, out_indentation, false);
2278     if (!comments.empty()) {
2279       append(config->comments().prefix, std::move(comments));
2280     }
2281     return did_skip;
2282   }
2283 
2284   bool
skip_post_whiteconfiguru::Parser2285   skip_post_white(Config *config)
2286   {
2287     if (!MAYBE_WHITE[static_cast<uint8_t>(_ptr[0])]) {
2288       // Early out
2289       return false;
2290     }
2291 
2292     Comments comments;
2293     int indentation;
2294     bool did_skip = skip_white(&comments, indentation, true);
2295     if (!comments.empty()) {
2296       append(config->comments().postfix, std::move(comments));
2297     }
2298     return did_skip;
2299   }
2300 
2301   Config top_level();
2302   void parse_value(Config &out, bool *out_did_skip_postwhites);
2303   void parse_array(Config &dst);
2304   void parse_array_contents(Config &dst);
2305   void parse_object(Config &dst);
2306   void parse_object_contents(Config &dst);
2307   void parse_int(Config &out);
2308   void parse_float(Config &out);
2309   void parse_finite_number(Config &dst);
2310   std::string parse_string();
2311   std::string parse_c_sharp_string();
2312   uint64_t parse_hex(int count);
2313   void parse_macro(Config &dst);
2314 
2315   void
tagconfiguru::Parser2316   tag(Config &var)
2317   {
2318     var.tag(_doc, _line_nr, column());
2319   }
2320 
2321   State
get_stateconfiguru::Parser2322   get_state() const
2323   {
2324     return {_ptr, _line_nr, _line_start};
2325   }
2326 
2327   void
set_stateconfiguru::Parser2328   set_state(State s)
2329   {
2330     _ptr        = s.ptr;
2331     _line_nr    = s.line_nr;
2332     _line_start = s.line_start;
2333   }
2334 
2335   Index
columnconfiguru::Parser2336   column() const
2337   {
2338     return static_cast<unsigned>(_ptr - _line_start + 1);
2339   }
2340 
2341   const char *
start_of_lineconfiguru::Parser2342   start_of_line() const
2343   {
2344     return _line_start;
2345   }
2346 
2347   const char *
end_of_lineconfiguru::Parser2348   end_of_line() const
2349   {
2350     const char *p = _ptr;
2351     while (*p && *p != '\r' && *p != '\n') {
2352       ++p;
2353     }
2354     return p;
2355   }
2356 
2357   void
throw_errorconfiguru::Parser2358   throw_error(const std::string &desc) CONFIGURU_NORETURN
2359   {
2360     const char *sol = start_of_line();
2361     const char *eol = end_of_line();
2362     std::string orientation;
2363     for (const char *p = sol; p != eol; ++p) {
2364       if (*p == '\t') {
2365         orientation += "    ";
2366       } else {
2367         orientation.push_back(*p);
2368       }
2369     }
2370 
2371     orientation += "\n";
2372     for (const char *p = sol; p != _ptr; ++p) {
2373       if (*p == '\t') {
2374         orientation += "    ";
2375       } else {
2376         orientation.push_back(' ');
2377       }
2378     }
2379     orientation += "^";
2380 
2381     throw ParseError(_doc, _line_nr, column(), desc + "\n" + orientation);
2382   }
2383 
2384   void
throw_indentation_errorconfiguru::Parser2385   throw_indentation_error(int found_tabs, int expected_tabs)
2386   {
2387     if (_options.enforce_indentation) {
2388       char buff[128];
2389       snprintf(buff, sizeof(buff), "Bad indentation: expected %d tabs, found %d", found_tabs, expected_tabs);
2390       throw_error(buff);
2391     }
2392   }
2393 
2394   void
parse_assertconfiguru::Parser2395   parse_assert(bool b, const char *error_msg)
2396   {
2397     if (!b) {
2398       throw_error(error_msg);
2399     }
2400   }
2401 
2402   void
parse_assertconfiguru::Parser2403   parse_assert(bool b, const char *error_msg, const State &error_state)
2404   {
2405     if (!b) {
2406       set_state(error_state);
2407       throw_error(error_msg);
2408     }
2409   }
2410 
2411   void
swallowconfiguru::Parser2412   swallow(char c)
2413   {
2414     if (_ptr[0] == c) {
2415       _ptr += 1;
2416     } else {
2417       throw_error("Expected " + quote(c));
2418     }
2419   }
2420 
2421   bool
try_swallowconfiguru::Parser2422   try_swallow(const char *str)
2423   {
2424     auto n = strlen(str);
2425     if (strncmp(str, _ptr, n) == 0) {
2426       _ptr += n;
2427       return true;
2428     } else {
2429       return false;
2430     }
2431   }
2432 
2433   void
swallowconfiguru::Parser2434   swallow(const char *str, const char *error_msg)
2435   {
2436     parse_assert(try_swallow(str), error_msg);
2437   }
2438 
2439   bool
is_reserved_identifierconfiguru::Parser2440   is_reserved_identifier(const char *ptr)
2441   {
2442     if (strncmp(ptr, "true", 4) == 0 || strncmp(ptr, "null", 4) == 0) {
2443       return !IDENT_CHARS[static_cast<uint8_t>(ptr[4])];
2444     } else if (strncmp(ptr, "false", 5) == 0) {
2445       return !IDENT_CHARS[static_cast<uint8_t>(ptr[5])];
2446     } else {
2447       return false;
2448     }
2449   }
2450 
2451 private:
2452   bool IDENT_STARTERS[256]     = {0};
2453   bool IDENT_CHARS[256]        = {0};
2454   bool MAYBE_WHITE[256]        = {0};
2455   bool SPECIAL_CHARACTERS[256] = {0};
2456 
2457 private:
2458   FormatOptions _options;
2459   DocInfo_SP _doc;
2460   ParseInfo &_info;
2461 
2462   const char *_ptr;
2463   Index _line_nr;
2464   const char *_line_start;
2465   int _indentation = 0; // Expected number of tabs between a \n and the next key/value
2466 };
2467 
2468 // --------------------------------------------
2469 
2470 // Sets an inclusive range
2471 void
set_range(bool lookup[256],char a,char b)2472 set_range(bool lookup[256], char a, char b)
2473 {
2474   for (char c = a; c <= b; ++c) {
2475     lookup[static_cast<uint8_t>(c)] = true;
2476   }
2477 }
2478 
Parser(const char * str,const FormatOptions & options,DocInfo_SP doc,ParseInfo & info)2479 Parser::Parser(const char *str, const FormatOptions &options, DocInfo_SP doc, ParseInfo &info) : _doc(doc), _info(info)
2480 {
2481   _options    = options;
2482   _line_nr    = 1;
2483   _ptr        = str;
2484   _line_start = str;
2485 
2486   IDENT_STARTERS[static_cast<uint8_t>('_')] = true;
2487   set_range(IDENT_STARTERS, 'a', 'z');
2488   set_range(IDENT_STARTERS, 'A', 'Z');
2489 
2490   IDENT_CHARS[static_cast<uint8_t>('_')] = true;
2491   set_range(IDENT_CHARS, 'a', 'z');
2492   set_range(IDENT_CHARS, 'A', 'Z');
2493   set_range(IDENT_CHARS, '0', '9');
2494 
2495   MAYBE_WHITE[static_cast<uint8_t>('\n')] = true;
2496   MAYBE_WHITE[static_cast<uint8_t>('\r')] = true;
2497   MAYBE_WHITE[static_cast<uint8_t>('\t')] = true;
2498   MAYBE_WHITE[static_cast<uint8_t>(' ')]  = true;
2499   MAYBE_WHITE[static_cast<uint8_t>('/')]  = true; // Maybe a comment
2500 
2501   SPECIAL_CHARACTERS[static_cast<uint8_t>('\0')] = true;
2502   SPECIAL_CHARACTERS[static_cast<uint8_t>('\\')] = true;
2503   SPECIAL_CHARACTERS[static_cast<uint8_t>('\"')] = true;
2504   SPECIAL_CHARACTERS[static_cast<uint8_t>('\n')] = true;
2505   SPECIAL_CHARACTERS[static_cast<uint8_t>('\t')] = true;
2506 
2507   CONFIGURU_ASSERT(_options.indentation != "" || !_options.enforce_indentation);
2508 }
2509 
2510 // Returns true if we did skip white-space.
2511 // out_indentation is the depth of indentation on the last line we did skip on.
2512 // iff out_indentation is -1 there is a non-tab on the last line.
2513 bool
skip_white(Comments * out_comments,int & out_indentation,bool break_on_newline)2514 Parser::skip_white(Comments *out_comments, int &out_indentation, bool break_on_newline)
2515 {
2516   auto start_ptr     = _ptr;
2517   out_indentation    = 0;
2518   bool found_newline = false;
2519 
2520   const std::string &indentation = _options.indentation;
2521 
2522   while (MAYBE_WHITE[static_cast<uint8_t>(_ptr[0])]) {
2523     if (_ptr[0] == '\n') {
2524       // Unix style newline
2525       _ptr += 1;
2526       _line_nr += 1;
2527       _line_start     = _ptr;
2528       out_indentation = 0;
2529       if (break_on_newline) {
2530         return true;
2531       }
2532       found_newline = true;
2533     } else if (_ptr[0] == '\r') {
2534       // CR-LF - windows style newline
2535       parse_assert(_ptr[1] == '\n', "CR with no LF. \\r only allowed before \\n."); // TODO: this is OK in JSON.
2536       _ptr += 2;
2537       _line_nr += 1;
2538       _line_start     = _ptr;
2539       out_indentation = 0;
2540       if (break_on_newline) {
2541         return true;
2542       }
2543       found_newline = true;
2544     } else if (!indentation.empty() && strncmp(_ptr, indentation.c_str(), indentation.size()) == 0) {
2545       _ptr += indentation.size();
2546       if (_options.enforce_indentation && indentation == "\t") {
2547         parse_assert(out_indentation != -1, "Tabs should only occur on the start of a line!");
2548       }
2549       ++out_indentation;
2550     } else if (_ptr[0] == '\t') {
2551       ++_ptr;
2552       if (_options.enforce_indentation) {
2553         parse_assert(out_indentation != -1, "Tabs should only occur on the start of a line!");
2554       }
2555       ++out_indentation;
2556     } else if (_ptr[0] == ' ') {
2557       if (found_newline && _options.enforce_indentation) {
2558         if (indentation == "\t") {
2559           throw_error("Found a space at beginning of a line. Indentation must be done using tabs!");
2560         } else {
2561           throw_error("Indentation should be a multiple of " + std::to_string(indentation.size()) + " spaces.");
2562         }
2563       }
2564       ++_ptr;
2565       out_indentation = -1;
2566     } else if (_ptr[0] == '/' && _ptr[1] == '/') {
2567       parse_assert(_options.single_line_comments, "Single line comments forbidden.");
2568       // Single line comment
2569       auto start = _ptr;
2570       _ptr += 2;
2571       while (_ptr[0] && _ptr[0] != '\n') {
2572         _ptr += 1;
2573       }
2574       if (out_comments) {
2575         out_comments->emplace_back(start, _ptr - start);
2576       }
2577       out_indentation = 0;
2578       if (break_on_newline) {
2579         return true;
2580       }
2581     } else if (_ptr[0] == '/' && _ptr[1] == '*') {
2582       parse_assert(_options.block_comments, "Block comments forbidden.");
2583       // Multi-line comment
2584       auto state = get_state(); // So we can point out the start if there's an error
2585       _ptr += 2;
2586       unsigned nesting = 1; // We allow nested /**/ comments
2587       do {
2588         if (_ptr[0] == 0) {
2589           set_state(state);
2590           throw_error("Non-ending /* comment");
2591         } else if (_ptr[0] == '/' && _ptr[1] == '*') {
2592           _ptr += 2;
2593           parse_assert(_options.nesting_block_comments, "Nesting comments (/* /* */ */) forbidden.");
2594           nesting += 1;
2595         } else if (_ptr[0] == '*' && _ptr[1] == '/') {
2596           _ptr += 2;
2597           nesting -= 1;
2598         } else if (_ptr[0] == '\n') {
2599           _ptr += 1;
2600           _line_nr += 1;
2601           _line_start = _ptr;
2602         } else {
2603           _ptr += 1;
2604         }
2605       } while (nesting > 0);
2606       if (out_comments) {
2607         out_comments->emplace_back(state.ptr, _ptr - state.ptr);
2608       }
2609       out_indentation = -1;
2610       if (break_on_newline) {
2611         return true;
2612       }
2613     } else {
2614       break;
2615     }
2616   }
2617 
2618   if (start_ptr == _ptr) {
2619     out_indentation = -1;
2620     return false;
2621   } else {
2622     return true;
2623   }
2624 }
2625 
2626 /*
2627 The top-level can be any value, OR the innerds of an object:
2628 foo = 1
2629 "bar": 2
2630 */
2631 Config
top_level()2632 Parser::top_level()
2633 {
2634   bool is_object = false;
2635 
2636   if (_options.implicit_top_object) {
2637     auto state = get_state();
2638     skip_white_ignore_comments();
2639 
2640     if (IDENT_STARTERS[static_cast<uint8_t>(_ptr[0])] && !is_reserved_identifier(_ptr)) {
2641       is_object = true;
2642     } else if (_ptr[0] == '"' || _ptr[0] == '@') {
2643       parse_string();
2644       skip_white_ignore_comments();
2645       is_object = (_ptr[0] == ':' || _ptr[0] == '=');
2646     }
2647 
2648     set_state(state); // restore
2649   }
2650 
2651   Config ret;
2652   tag(ret);
2653 
2654   if (is_object) {
2655     parse_object_contents(ret);
2656   } else {
2657     parse_array_contents(ret);
2658     parse_assert(ret.array_size() <= 1 || _options.implicit_top_array, "Multiple values not allowed without enclosing []");
2659   }
2660 
2661   skip_post_white(&ret);
2662 
2663   parse_assert(_ptr[0] == 0, "Expected EoF");
2664 
2665   if (!is_object && ret.array_size() == 0) {
2666     if (_options.empty_file) {
2667       auto empty_object = Config::object();
2668       if (ret.has_comments()) {
2669         empty_object.comments() = std::move(ret.comments());
2670       }
2671       return empty_object;
2672     } else {
2673       throw_error("Empty file");
2674     }
2675   }
2676 
2677   if (!is_object && ret.array_size() == 1) {
2678     // A single value - not an array after all:
2679     Config first(std::move(ret[0]));
2680     if (ret.has_comments()) {
2681       first.comments().append(std::move(ret.comments()));
2682     }
2683     return first;
2684   }
2685 
2686   return ret;
2687 }
2688 
2689 void
parse_value(Config & dst,bool * out_did_skip_postwhites)2690 Parser::parse_value(Config &dst, bool *out_did_skip_postwhites)
2691 {
2692   int line_indentation;
2693   skip_pre_white(&dst, line_indentation);
2694   tag(dst);
2695 
2696   if (line_indentation >= 0 && _indentation - 1 != line_indentation) {
2697     throw_indentation_error(_indentation - 1, line_indentation);
2698   }
2699 
2700   if (_ptr[0] == '"' || _ptr[0] == '@') {
2701     dst = parse_string();
2702   } else if (_ptr[0] == 'n') {
2703     parse_assert(_ptr[1] == 'u' && _ptr[2] == 'l' && _ptr[3] == 'l', "Expected 'null'");
2704     parse_assert(!IDENT_CHARS[static_cast<uint8_t>(_ptr[4])], "Expected 'null'");
2705     _ptr += 4;
2706     dst = nullptr;
2707   } else if (_ptr[0] == 't') {
2708     parse_assert(_ptr[1] == 'r' && _ptr[2] == 'u' && _ptr[3] == 'e', "Expected 'true'");
2709     parse_assert(!IDENT_CHARS[static_cast<uint8_t>(_ptr[4])], "Expected 'true'");
2710     _ptr += 4;
2711     dst = true;
2712   } else if (_ptr[0] == 'f') {
2713     parse_assert(_ptr[1] == 'a' && _ptr[2] == 'l' && _ptr[3] == 's' && _ptr[4] == 'e', "Expected 'false'");
2714     parse_assert(!IDENT_CHARS[static_cast<uint8_t>(_ptr[5])], "Expected 'false'");
2715     _ptr += 5;
2716     dst = false;
2717   } else if (_ptr[0] == '{') {
2718     parse_object(dst);
2719   } else if (_ptr[0] == '[') {
2720     parse_array(dst);
2721   } else if (_ptr[0] == '#') {
2722     parse_macro(dst);
2723   } else if (_ptr[0] == '+' || _ptr[0] == '-' || _ptr[0] == '.' || ('0' <= _ptr[0] && _ptr[0] <= '9')) {
2724     // Some kind of number:
2725 
2726     if (_ptr[0] == '-' && _ptr[1] == 'i' && _ptr[2] == 'n' && _ptr[3] == 'f') {
2727       parse_assert(!IDENT_CHARS[static_cast<uint8_t>(_ptr[4])], "Expected -inf");
2728       parse_assert(_options.inf, "infinity forbidden.");
2729       _ptr += 4;
2730       dst = -std::numeric_limits<double>::infinity();
2731     } else if (_ptr[0] == '+' && _ptr[1] == 'i' && _ptr[2] == 'n' && _ptr[3] == 'f') {
2732       parse_assert(!IDENT_CHARS[static_cast<uint8_t>(_ptr[4])], "Expected +inf");
2733       parse_assert(_options.inf, "infinity forbidden.");
2734       _ptr += 4;
2735       dst = std::numeric_limits<double>::infinity();
2736     } else if (_ptr[0] == '+' && _ptr[1] == 'N' && _ptr[2] == 'a' && _ptr[3] == 'N') {
2737       parse_assert(!IDENT_CHARS[static_cast<uint8_t>(_ptr[4])], "Expected +NaN");
2738       parse_assert(_options.nan, "NaN (Not a Number) forbidden.");
2739       _ptr += 4;
2740       dst = std::numeric_limits<double>::quiet_NaN();
2741     } else {
2742       parse_finite_number(dst);
2743     }
2744   } else {
2745     throw_error("Expected value");
2746   }
2747 
2748   *out_did_skip_postwhites = skip_post_white(&dst);
2749 }
2750 
2751 void
parse_array(Config & array)2752 Parser::parse_array(Config &array)
2753 {
2754   auto state = get_state();
2755 
2756   swallow('[');
2757 
2758   _indentation += 1;
2759   parse_array_contents(array);
2760   _indentation -= 1;
2761 
2762   if (_ptr[0] == ']') {
2763     _ptr += 1;
2764   } else {
2765     set_state(state);
2766     throw_error("Non-terminated array");
2767   }
2768 }
2769 
2770 void
parse_array_contents(Config & array_cfg)2771 Parser::parse_array_contents(Config &array_cfg)
2772 {
2773   array_cfg.make_array();
2774   auto &array_impl = array_cfg.as_array();
2775 
2776   Comments next_prefix_comments;
2777 
2778   for (;;) {
2779     Config value;
2780     if (!next_prefix_comments.empty()) {
2781       std::swap(value.comments().prefix, next_prefix_comments);
2782     }
2783     int line_indentation;
2784     skip_pre_white(&value, line_indentation);
2785 
2786     if (_ptr[0] == ']') {
2787       if (line_indentation >= 0 && _indentation - 1 != line_indentation) {
2788         throw_indentation_error(_indentation - 1, line_indentation);
2789       }
2790       if (value.has_comments()) {
2791         array_cfg.comments().pre_end_brace = value.comments().prefix;
2792       }
2793       break;
2794     }
2795 
2796     if (!_ptr[0]) {
2797       if (value.has_comments()) {
2798         array_cfg.comments().pre_end_brace = value.comments().prefix;
2799       }
2800       break;
2801     }
2802 
2803     if (line_indentation >= 0 && _indentation != line_indentation) {
2804       throw_indentation_error(_indentation, line_indentation);
2805     }
2806 
2807     if (IDENT_STARTERS[static_cast<uint8_t>(_ptr[0])] && !is_reserved_identifier(_ptr)) {
2808       throw_error("Found identifier; expected value. Did you mean to use a {object} rather than a [array]?");
2809     }
2810 
2811     bool has_separator;
2812     parse_value(value, &has_separator);
2813     int ignore;
2814     skip_white(&next_prefix_comments, ignore, false);
2815 
2816     auto comma_state = get_state();
2817     bool has_comma   = _ptr[0] == ',';
2818 
2819     if (has_comma) {
2820       _ptr += 1;
2821       skip_post_white(&value);
2822       has_separator = true;
2823     }
2824 
2825     array_impl.emplace_back(std::move(value));
2826 
2827     bool is_last_element = !_ptr[0] || _ptr[0] == ']';
2828 
2829     if (is_last_element) {
2830       parse_assert(!has_comma || _options.array_trailing_comma, "Trailing comma forbidden.", comma_state);
2831     } else {
2832       if (_options.array_omit_comma) {
2833         parse_assert(has_separator, "Expected a space, newline, comma or ]");
2834       } else {
2835         parse_assert(has_comma, "Expected a comma or ]");
2836       }
2837     }
2838   }
2839 }
2840 
2841 void
parse_object(Config & object)2842 Parser::parse_object(Config &object)
2843 {
2844   auto state = get_state();
2845 
2846   swallow('{');
2847 
2848   _indentation += 1;
2849   parse_object_contents(object);
2850   _indentation -= 1;
2851 
2852   if (_ptr[0] == '}') {
2853     _ptr += 1;
2854   } else {
2855     set_state(state);
2856     throw_error("Non-terminated object");
2857   }
2858 }
2859 
2860 void
parse_object_contents(Config & object)2861 Parser::parse_object_contents(Config &object)
2862 {
2863   object.make_object();
2864 
2865   Comments next_prefix_comments;
2866 
2867   for (;;) {
2868     Config value;
2869     if (!next_prefix_comments.empty()) {
2870       std::swap(value.comments().prefix, next_prefix_comments);
2871     }
2872     int line_indentation;
2873     skip_pre_white(&value, line_indentation);
2874 
2875     if (_ptr[0] == '}') {
2876       if (line_indentation >= 0 && _indentation - 1 != line_indentation) {
2877         throw_indentation_error(_indentation - 1, line_indentation);
2878       }
2879       if (value.has_comments()) {
2880         object.comments().pre_end_brace = value.comments().prefix;
2881       }
2882       break;
2883     }
2884 
2885     if (!_ptr[0]) {
2886       if (value.has_comments()) {
2887         object.comments().pre_end_brace = value.comments().prefix;
2888       }
2889       break;
2890     }
2891 
2892     if (line_indentation >= 0 && _indentation != line_indentation) {
2893       throw_indentation_error(_indentation, line_indentation);
2894     }
2895 
2896     auto pre_key_state = get_state();
2897     std::string key;
2898 
2899     if (IDENT_STARTERS[static_cast<uint8_t>(_ptr[0])] && !is_reserved_identifier(_ptr)) {
2900       parse_assert(_options.identifiers_keys, "You need to surround keys with quotes");
2901       while (IDENT_CHARS[static_cast<uint8_t>(_ptr[0])]) {
2902         key += _ptr[0];
2903         _ptr += 1;
2904       }
2905     } else if (_ptr[0] == '"' || _ptr[0] == '@') {
2906       key = parse_string();
2907     } else {
2908       throw_error("Object key expected (either an identifier or a quoted string), got " + quote(_ptr[0]));
2909     }
2910 
2911     if (!_options.object_duplicate_keys && object.has_key(key)) {
2912       set_state(pre_key_state);
2913       throw_error("Duplicate key: \"" + key + "\". Already set at " + object[key].where());
2914     }
2915 
2916     bool space_after_key = skip_white_ignore_comments();
2917 
2918     if (_ptr[0] == ':' || (_options.object_separator_equal && _ptr[0] == '=')) {
2919       parse_assert(_options.allow_space_before_colon || _ptr[0] != ':' || !space_after_key, "No space allowed before colon");
2920       _ptr += 1;
2921       skip_white_ignore_comments();
2922     } else if (_options.omit_colon_before_object && (_ptr[0] == '{' || _ptr[0] == '#')) {
2923       // Ok to omit : in this case
2924     } else {
2925       if (_options.object_separator_equal && _options.omit_colon_before_object) {
2926         throw_error("Expected one of '=', ':', '{' or '#' after object key");
2927       } else {
2928         throw_error("Expected : after object key");
2929       }
2930     }
2931 
2932     bool has_separator;
2933     parse_value(value, &has_separator);
2934     int ignore;
2935     skip_white(&next_prefix_comments, ignore, false);
2936 
2937     auto comma_state = get_state();
2938     bool has_comma   = _ptr[0] == ',';
2939 
2940     if (has_comma) {
2941       _ptr += 1;
2942       skip_post_white(&value);
2943       has_separator = true;
2944     }
2945 
2946     object.emplace(std::move(key), std::move(value));
2947 
2948     bool is_last_element = !_ptr[0] || _ptr[0] == '}';
2949 
2950     if (is_last_element) {
2951       parse_assert(!has_comma || _options.object_trailing_comma, "Trailing comma forbidden.", comma_state);
2952     } else {
2953       if (_options.object_omit_comma) {
2954         parse_assert(has_separator, "Expected a space, newline, comma or }");
2955       } else {
2956         parse_assert(has_comma, "Expected a comma or }");
2957       }
2958     }
2959   }
2960 }
2961 
2962 void
parse_int(Config & out)2963 Parser::parse_int(Config &out)
2964 {
2965   const auto start  = _ptr;
2966   const auto result = strtoll(start, const_cast<char **>(&_ptr), 10);
2967   parse_assert(start < _ptr, "Invalid integer");
2968   parse_assert(start[0] != '0' || result == 0, "Integer may not start with a zero");
2969   out = result;
2970 }
2971 
2972 void
parse_float(Config & out)2973 Parser::parse_float(Config &out)
2974 {
2975   const auto start    = _ptr;
2976   const double result = strtod(start, const_cast<char **>(&_ptr));
2977   parse_assert(start < _ptr, "Invalid number");
2978   out = result;
2979 }
2980 
2981 void
parse_finite_number(Config & out)2982 Parser::parse_finite_number(Config &out)
2983 {
2984   const auto pre_sign = _ptr;
2985   int sign            = +1;
2986 
2987   if (_ptr[0] == '+') {
2988     parse_assert(_options.unary_plus, "Prefixing numbers with + is forbidden.");
2989     _ptr += 1;
2990   }
2991   if (_ptr[0] == '-') {
2992     _ptr += 1;
2993     sign = -1;
2994   }
2995 
2996   parse_assert(_ptr[0] != '+' && _ptr[0] != '-', "Duplicate sign");
2997 
2998   // Check if it's an integer:
2999   if (_ptr[0] == '0' && _ptr[1] == 'x') {
3000     parse_assert(_options.hexadecimal_integers, "Hexadecimal numbers forbidden.");
3001     _ptr += 2;
3002     auto start = _ptr;
3003     out        = sign * static_cast<int64_t>(strtoull(start, const_cast<char **>(&_ptr), 16));
3004     parse_assert(start < _ptr, "Missing hexaxdecimal digits after 0x");
3005     return;
3006   }
3007 
3008   if (_ptr[0] == '0' && _ptr[1] == 'b') {
3009     parse_assert(_options.binary_integers, "Binary numbers forbidden.");
3010     _ptr += 2;
3011     auto start = _ptr;
3012     out        = sign * static_cast<int64_t>(strtoull(start, const_cast<char **>(&_ptr), 2));
3013     parse_assert(start < _ptr, "Missing binary digits after 0b");
3014     return;
3015   }
3016 
3017   const char *p = _ptr;
3018 
3019   while ('0' <= *p && *p <= '9') {
3020     p += 1;
3021   }
3022 
3023   if (*p == '.' || *p == 'e' || *p == 'E') {
3024     _ptr = pre_sign;
3025     return parse_float(out);
3026   }
3027 
3028   // It looks like an integer - but it may be too long to represent as one!
3029   const auto MAX_INT_STR = (sign == +1 ? "9223372036854775807" : "9223372036854775808");
3030 
3031   const auto length = p - _ptr;
3032 
3033   if (length < 19) {
3034     _ptr = pre_sign;
3035     return parse_int(out);
3036   }
3037 
3038   if (length > 19) {
3039     _ptr = pre_sign;
3040     return parse_float(out); // Uncommon case optimization
3041   }
3042 
3043   // Compare fast:
3044   for (int i = 0; i < 19; ++i) {
3045     if (_ptr[i] > MAX_INT_STR[i]) {
3046       _ptr = pre_sign;
3047       return parse_float(out);
3048     }
3049     if (_ptr[i] < MAX_INT_STR[i]) {
3050       _ptr = pre_sign;
3051       return parse_int(out);
3052     }
3053   }
3054   _ptr = pre_sign;
3055   return parse_int(out); // Exactly max int
3056 }
3057 
3058 std::string
parse_c_sharp_string()3059 Parser::parse_c_sharp_string()
3060 {
3061   // C# style verbatim string - everything until the next " except "" which is ":
3062   auto state = get_state();
3063   parse_assert(_options.str_csharp_verbatim, "C# @-style verbatim strings forbidden.");
3064   swallow('@');
3065   swallow('"');
3066 
3067   std::string str;
3068 
3069   for (;;) {
3070     if (_ptr[0] == 0) {
3071       set_state(state);
3072       throw_error("Unterminated verbatim string");
3073     } else if (_ptr[0] == '\n') {
3074       throw_error("Newline in verbatim string");
3075     } else if (_ptr[0] == '"' && _ptr[1] == '"') {
3076       // Escaped quote
3077       _ptr += 2;
3078       str.push_back('"');
3079     } else if (_ptr[0] == '"') {
3080       _ptr += 1;
3081       return str;
3082     } else {
3083       str += _ptr[0];
3084       _ptr += 1;
3085     }
3086   }
3087 }
3088 
3089 std::string
parse_string()3090 Parser::parse_string()
3091 {
3092   if (_ptr[0] == '@') {
3093     return parse_c_sharp_string();
3094   }
3095 
3096   auto state = get_state();
3097   parse_assert(_ptr[0] == '"', "Quote (\") expected");
3098 
3099   if (_ptr[1] == '"' && _ptr[2] == '"') {
3100     // Python style multiline string - everything until the next """:
3101     parse_assert(_options.str_python_multiline, "Python \"\"\"-style multiline strings forbidden.");
3102     _ptr += 3;
3103     const char *start = _ptr;
3104     for (;;) {
3105       if (_ptr[0] == 0 || _ptr[1] == 0 || _ptr[2] == 0) {
3106         set_state(state);
3107         throw_error("Unterminated multiline string");
3108       }
3109 
3110       if (_ptr[0] == '"' && _ptr[1] == '"' && _ptr[2] == '"' && _ptr[3] != '"') {
3111         std::string str(start, _ptr);
3112         _ptr += 3;
3113         return str;
3114       }
3115 
3116       if (_ptr[0] == '\n') {
3117         _ptr += 1;
3118         _line_nr += 1;
3119         _line_start = _ptr;
3120       } else {
3121         _ptr += 1;
3122       }
3123     }
3124   } else {
3125     // Normal string
3126     _ptr += 1; // Swallow quote
3127 
3128     std::string str;
3129 
3130     for (;;) {
3131       // Handle larges swats of safe characters at once:
3132       auto safe_end = _ptr;
3133       while (!SPECIAL_CHARACTERS[static_cast<uint8_t>(*safe_end)]) {
3134         ++safe_end;
3135       }
3136 
3137       if (_ptr != safe_end) {
3138         str.append(_ptr, safe_end - _ptr);
3139         _ptr = safe_end;
3140       }
3141 
3142       if (_ptr[0] == 0) {
3143         set_state(state);
3144         throw_error("Unterminated string");
3145       }
3146       if (_ptr[0] == '"') {
3147         _ptr += 1;
3148         return str;
3149       }
3150       if (_ptr[0] == '\n') {
3151         throw_error("Newline in string");
3152       }
3153       if (_ptr[0] == '\t') {
3154         parse_assert(_options.str_allow_tab, "Un-escaped tab not allowed in string");
3155       }
3156 
3157       if (_ptr[0] == '\\') {
3158         // Escape sequence
3159         _ptr += 1;
3160 
3161         if (_ptr[0] == '"') {
3162           str.push_back('"');
3163           _ptr += 1;
3164         } else if (_ptr[0] == '\\') {
3165           str.push_back('\\');
3166           _ptr += 1;
3167         } else if (_ptr[0] == '/') {
3168           str.push_back('/');
3169           _ptr += 1;
3170         } else if (_ptr[0] == 'b') {
3171           str.push_back('\b');
3172           _ptr += 1;
3173         } else if (_ptr[0] == 'f') {
3174           str.push_back('\f');
3175           _ptr += 1;
3176         } else if (_ptr[0] == 'n') {
3177           str.push_back('\n');
3178           _ptr += 1;
3179         } else if (_ptr[0] == 'r') {
3180           str.push_back('\r');
3181           _ptr += 1;
3182         } else if (_ptr[0] == 't') {
3183           str.push_back('\t');
3184           _ptr += 1;
3185         } else if (_ptr[0] == 'u') {
3186           // Four hexadecimal characters
3187           _ptr += 1;
3188           uint64_t codepoint = parse_hex(4);
3189 
3190           if (0xD800 <= codepoint && codepoint <= 0xDBFF) {
3191             // surrogate pair
3192             parse_assert(_ptr[0] == '\\' && _ptr[1] == 'u', "Missing second unicode surrogate.");
3193             _ptr += 2;
3194             uint64_t codepoint2 = parse_hex(4);
3195             parse_assert(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF, "Invalid second unicode surrogate");
3196             codepoint = (codepoint << 10) + codepoint2 - 0x35FDC00;
3197           }
3198 
3199           auto num_bytes_written = encode_utf8(str, codepoint);
3200           parse_assert(num_bytes_written > 0, "Bad unicode codepoint");
3201         } else if (_ptr[0] == 'U') {
3202           // Eight hexadecimal characters
3203           parse_assert(_options.str_32bit_unicode, "\\U 32 bit unicodes forbidden.");
3204           _ptr += 1;
3205           uint64_t unicode       = parse_hex(8);
3206           auto num_bytes_written = encode_utf8(str, unicode);
3207           parse_assert(num_bytes_written > 0, "Bad unicode codepoint");
3208         } else {
3209           throw_error("Unknown escape character " + quote(_ptr[0]));
3210         }
3211       } else {
3212         str.push_back(_ptr[0]);
3213         _ptr += 1;
3214       }
3215     }
3216   }
3217 }
3218 
3219 uint64_t
parse_hex(int count)3220 Parser::parse_hex(int count)
3221 {
3222   uint64_t ret = 0;
3223   for (int i = 0; i < count; ++i) {
3224     ret *= 16;
3225     char c = _ptr[i];
3226     if ('0' <= c && c <= '9') {
3227       ret += static_cast<uint64_t>(c - '0');
3228     } else if ('a' <= c && c <= 'f') {
3229       ret += static_cast<uint64_t>(10 + c - 'a');
3230     } else if ('A' <= c && c <= 'F') {
3231       ret += static_cast<uint64_t>(10 + c - 'A');
3232     } else {
3233       throw_error("Expected hexadecimal digit, got " + quote(_ptr[0]));
3234     }
3235   }
3236   _ptr += count;
3237   return ret;
3238 }
3239 
3240 void
parse_macro(Config & dst)3241 Parser::parse_macro(Config &dst)
3242 {
3243   parse_assert(_options.allow_macro, "#macros forbidden.");
3244 
3245   swallow("#include", "Expected '#include'");
3246   skip_white_ignore_comments();
3247 
3248   bool absolute;
3249   char terminator;
3250 
3251   if (_ptr[0] == '"') {
3252     absolute   = false;
3253     terminator = '"';
3254   } else if (_ptr[0] == '<') {
3255     absolute   = true;
3256     terminator = '>';
3257   } else {
3258     throw_error("Expected \" or <");
3259   }
3260 
3261   auto state = get_state();
3262   _ptr += 1;
3263   auto start = _ptr;
3264   std::string path;
3265   for (;;) {
3266     if (_ptr[0] == 0) {
3267       set_state(state);
3268       throw_error("Unterminated include path");
3269     } else if (_ptr[0] == terminator) {
3270       path = std::string(start, static_cast<size_t>(_ptr - start));
3271       _ptr += 1;
3272       break;
3273     } else if (_ptr[0] == '\n') {
3274       throw_error("Newline in string");
3275     } else {
3276       _ptr += 1;
3277     }
3278   }
3279 
3280   if (!absolute) {
3281     auto my_path = _doc->filename;
3282     auto pos     = my_path.find_last_of('/');
3283     if (pos != std::string::npos) {
3284       auto my_dir = my_path.substr(0, pos + 1);
3285       path        = my_dir + path;
3286     }
3287   }
3288 
3289   auto it = _info.parsed_files.find(path);
3290   if (it == _info.parsed_files.end()) {
3291     auto child_doc = std::make_shared<DocInfo>(path);
3292     child_doc->includers.emplace_back(_doc, _line_nr);
3293     dst                      = parse_file(path.c_str(), _options, child_doc, _info);
3294     _info.parsed_files[path] = dst;
3295   } else {
3296     auto child_doc = it->second.doc();
3297     child_doc->includers.emplace_back(_doc, _line_nr);
3298     dst = it->second;
3299   }
3300 }
3301 
3302 // ----------------------------------------------------------------------------------------
3303 
3304 Config
parse_string(const char * str,const FormatOptions & options,DocInfo_SP doc,ParseInfo & info)3305 parse_string(const char *str, const FormatOptions &options, DocInfo_SP doc, ParseInfo &info)
3306 {
3307   Parser p(str, options, doc, info);
3308   return p.top_level();
3309 }
3310 
3311 Config
parse_string(const char * str,const FormatOptions & options,const char * name)3312 parse_string(const char *str, const FormatOptions &options, const char *name)
3313 {
3314   ParseInfo info;
3315   return parse_string(str, options, std::make_shared<DocInfo>(name), info);
3316 }
3317 
3318 std::string
read_text_file(const char * path)3319 read_text_file(const char *path)
3320 {
3321   FILE *fp = fopen(path, "rb");
3322   if (fp == nullptr) {
3323     CONFIGURU_ONERROR(std::string("Failed to open '") + path + "' for reading: " + strerror(errno));
3324   }
3325   std::string contents;
3326   fseek(fp, 0, SEEK_END);
3327   const auto size = ftell(fp);
3328   if (size < 0) {
3329     fclose(fp);
3330     CONFIGURU_ONERROR(std::string("Failed to find out size of '") + path + "': " + strerror(errno));
3331   }
3332   contents.resize(static_cast<size_t>(size));
3333   rewind(fp);
3334   const auto num_read = fread(&contents[0], 1, contents.size(), fp);
3335   fclose(fp);
3336   if (num_read != contents.size()) {
3337     CONFIGURU_ONERROR(std::string("Failed to read from '") + path + "': " + strerror(errno));
3338   }
3339   return contents;
3340 }
3341 
3342 Config
parse_file(const std::string & path,const FormatOptions & options,DocInfo_SP doc,ParseInfo & info)3343 parse_file(const std::string &path, const FormatOptions &options, DocInfo_SP doc, ParseInfo &info)
3344 {
3345   // auto file = util::FILEWrapper::read_text_file(path);
3346   auto file = read_text_file(path.c_str());
3347   return parse_string(file.c_str(), options, doc, info);
3348 }
3349 
3350 Config
parse_file(const std::string & path,const FormatOptions & options)3351 parse_file(const std::string &path, const FormatOptions &options)
3352 {
3353   ParseInfo info;
3354   return parse_file(path, options, std::make_shared<DocInfo>(path), info);
3355 }
3356 }
3357 
3358 // ----------------------------------------------------------------------------
3359 // Yb        dP 88""Yb 88 888888 888888 88""Yb
3360 //  Yb  db  dP  88__dP 88   88   88__   88__dP
3361 //   YbdPYbdP   88"Yb  88   88   88""   88"Yb
3362 //    YP  YP    88  Yb 88   88   888888 88  Yb
3363 
3364 #include <cstdlib> // strtod
3365 
3366 namespace <