xref: /trafficserver/plugins/cachekey/configs.cc (revision db8cd14a)
1 /*
2   Licensed to the Apache Software Foundation (ASF) under one
3   or more contributor license agreements.  See the NOTICE file
4   distributed with this work for additional information
5   regarding copyright ownership.  The ASF licenses this file
6   to you under the Apache License, Version 2.0 (the
7   "License"); you may not use this file except in compliance
8   with the License.  You may obtain a copy of the License at
9 
10   http://www.apache.org/licenses/LICENSE-2.0
11 
12   Unless required by applicable law or agreed to in writing, software
13   distributed under the License is distributed on an "AS IS" BASIS,
14   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   See the License for the specific language governing permissions and
16   limitations under the License.
17 */
18 
19 /**
20  * @file configs.cc
21  * @brief Plugin configuration.
22  */
23 
24 #include <fstream>   /* std::ifstream */
25 #include <sstream>   /* std::istringstream */
26 #include <getopt.h>  /* getopt_long() */
27 #include <strings.h> /* strncasecmp() */
28 #include <cstring>   /* strlen() */
29 
30 #include "configs.h"
31 
32 template <typename ContainerType>
33 static void
commaSeparateString(ContainerType & c,const String & input)34 commaSeparateString(ContainerType &c, const String &input)
35 {
36   std::istringstream istr(input);
37   String token;
38 
39   while (std::getline(istr, token, ',')) {
40     c.insert(c.end(), token);
41   }
42 }
43 
44 static bool
isTrue(const char * arg)45 isTrue(const char *arg)
46 {
47   return (nullptr == arg || 0 == strncasecmp("true", arg, 4) || 0 == strncasecmp("1", arg, 1) || 0 == strncasecmp("yes", arg, 3));
48 }
49 
50 void
setExclude(const char * arg)51 ConfigElements::setExclude(const char *arg)
52 {
53   ::commaSeparateString<StringSet>(_exclude, arg);
54 }
55 
56 void
setInclude(const char * arg)57 ConfigElements::setInclude(const char *arg)
58 {
59   ::commaSeparateString<StringSet>(_include, arg);
60 }
61 
62 static void
setPattern(MultiPattern & multiPattern,const char * arg)63 setPattern(MultiPattern &multiPattern, const char *arg)
64 {
65   Pattern *p = new Pattern();
66   if (nullptr != p && p->init(arg)) {
67     multiPattern.add(p);
68   } else {
69     delete p;
70   }
71 }
72 
73 bool
setCapture(const String & name,const String & pattern)74 ConfigElements::setCapture(const String &name, const String &pattern)
75 {
76   auto it = _captures.find(name);
77   if (_captures.end() == it) {
78     auto mp = new MultiPattern(name);
79     if (nullptr != mp) {
80       _captures[name] = mp;
81     } else {
82       return false;
83     }
84   }
85   setPattern(*_captures[name], pattern.c_str());
86   CacheKeyDebug("added capture pattern '%s' for element '%s'", pattern.c_str(), name.c_str());
87   return true;
88 }
89 
90 void
addCapture(const char * arg)91 ConfigElements::addCapture(const char *arg)
92 {
93   StringView args(arg);
94   StringView::size_type pos = args.find_first_of(':');
95   if (StringView::npos != pos) {
96     String name(args.substr(0, pos));
97     if (!name.empty()) {
98       String pattern(args.substr(pos + 1));
99       if (!pattern.empty()) {
100         if (!setCapture(name, pattern)) {
101           CacheKeyError("failed to add capture: '%s'", arg);
102         }
103       } else {
104         CacheKeyError("missing pattern in capture: '%s'", arg);
105       }
106     } else {
107       CacheKeyError("missing element name in capture: %s", arg);
108     }
109   } else {
110     CacheKeyError("invalid capture: %s, should be 'name:<capture_definition>", arg);
111   }
112 }
113 
114 void
setExcludePatterns(const char * arg)115 ConfigElements::setExcludePatterns(const char *arg)
116 {
117   setPattern(_excludePatterns, arg);
118 }
119 
120 void
setIncludePatterns(const char * arg)121 ConfigElements::setIncludePatterns(const char *arg)
122 {
123   setPattern(_includePatterns, arg);
124 }
125 
126 void
setSort(const char * arg)127 ConfigElements::setSort(const char *arg)
128 {
129   _sort = ::isTrue(arg);
130 }
131 
132 void
setRemove(const char * arg)133 ConfigElements::setRemove(const char *arg)
134 {
135   _remove = ::isTrue(arg);
136 }
137 
138 bool
toBeRemoved() const139 ConfigElements::toBeRemoved() const
140 {
141   return _remove;
142 }
143 
144 bool
toBeSkipped() const145 ConfigElements::toBeSkipped() const
146 {
147   return _skip;
148 }
149 
150 bool
toBeSorted() const151 ConfigElements::toBeSorted() const
152 {
153   return _sort;
154 }
155 
156 bool
toBeAdded(const String & element) const157 ConfigElements::toBeAdded(const String &element) const
158 {
159   /* Exclude the element if it is in the exclusion list. If the list is empty don't exclude anything. */
160   bool exclude = (!_exclude.empty() && _exclude.find(element) != _exclude.end()) ||
161                  (!_excludePatterns.empty() && _excludePatterns.match(element));
162   CacheKeyDebug("%s '%s' %s the 'exclude' rule", name().c_str(), element.c_str(), exclude ? "matches" : "does not match");
163 
164   /* Include the element only if it is in the inclusion list. If the list is empty include everything. */
165   bool include =
166     ((_include.empty() && _includePatterns.empty()) || _include.find(element) != _include.end()) || _includePatterns.match(element);
167   CacheKeyDebug("%s '%s' %s the 'include' rule", name().c_str(), element.c_str(), include ? "matches" : "do not match");
168 
169   if (include && !exclude) {
170     CacheKeyDebug("%s '%s' should be added to cache key", name().c_str(), element.c_str());
171     return true;
172   }
173 
174   CacheKeyDebug("%s '%s' should not be added to cache key", name().c_str(), element.c_str());
175   return false;
176 }
177 
178 inline bool
noIncludeExcludeRules() const179 ConfigElements::noIncludeExcludeRules() const
180 {
181   return _exclude.empty() && _excludePatterns.empty() && _include.empty() && _includePatterns.empty();
182 }
183 
~ConfigElements()184 ConfigElements::~ConfigElements()
185 {
186   for (auto &_capture : _captures) {
187     delete _capture.second;
188   }
189 }
190 
191 /**
192  * @brief finalizes the query parameters related configuration.
193  *
194  * If we don't have any inclusions or exclusions and don't have to sort, we don't need to do anything
195  * with the query string. Include the whole original query in the cache key.
196  */
197 bool
finalize()198 ConfigQuery::finalize()
199 {
200   _skip = noIncludeExcludeRules() && !_sort;
201   return true;
202 }
203 
204 const String ConfigQuery::_NAME = "query parameter";
205 inline const String &
name() const206 ConfigQuery::name() const
207 {
208   return _NAME;
209 }
210 
211 /**
212  * @briefs finalizes the headers related configuration.
213  *
214  * If the all include and exclude lists are empty, including patterns, then there is no headers to be included.
215  */
216 bool
finalize()217 ConfigHeaders::finalize()
218 {
219   _remove = noIncludeExcludeRules();
220   return true;
221 }
222 
223 const String ConfigHeaders::_NAME = "header";
224 inline const String &
name() const225 ConfigHeaders::name() const
226 {
227   return _NAME;
228 }
229 
230 /**
231  * @brief finalizes the cookies related configuration.
232  *
233  * If the all include and exclude lists are empty, including pattern, then there is no cookies to be included.
234  */
235 bool
finalize()236 ConfigCookies::finalize()
237 {
238   _remove = noIncludeExcludeRules();
239   return true;
240 }
241 
242 const String ConfigCookies::_NAME = "cookie";
243 inline const String &
name() const244 ConfigCookies::name() const
245 {
246   return _NAME;
247 }
248 
249 /**
250  * @brief Accessor method for getting include list only for headers config.
251  *
252  * We would not need to drill this hole in the design if there was an efficient way to iterate through the headers in the traffic
253  * server API (inefficiency mentioned in ts/ts.h), iterating through the "include" list should be good enough work-around.
254  */
255 const StringSet &
getInclude() const256 ConfigHeaders::getInclude() const
257 {
258   return _include;
259 }
260 
261 /**
262  * @brief Rebase a relative path onto the configuration directory.
263  */
264 static String
makeConfigPath(const String & path)265 makeConfigPath(const String &path)
266 {
267   if (path.empty() || path[0] == '/') {
268     return path;
269   }
270 
271   return String(TSConfigDirGet()) + "/" + path;
272 }
273 
274 /**
275  * @brief a helper function which loads the classifier from files.
276  * @param args classname + filename in '<classname>:<filename>' format.
277  * @param blacklist true - load as a blacklist classifier, false - whitelist.
278  * @return true if successful, false otherwise.
279  */
280 bool
loadClassifiers(const String & args,bool blacklist)281 Configs::loadClassifiers(const String &args, bool blacklist)
282 {
283   static const char *EXPECTED_FORMAT = "<classname>:<filename>";
284 
285   std::size_t d = args.find(':');
286   if (String::npos == d) {
287     CacheKeyError("failed to parse classifier string '%s', expected format: '%s'", optarg ? optarg : "null", EXPECTED_FORMAT);
288     return false;
289   }
290 
291   String classname(optarg, 0, d);
292   String filename(optarg, d + 1, String::npos);
293 
294   if (classname.empty() || filename.empty()) {
295     CacheKeyError("'<classname>' and '<filename>' in '%s' cannot be empty, expected format: '%s'", optarg ? optarg : "null",
296                   EXPECTED_FORMAT);
297     return false;
298   }
299 
300   String path(makeConfigPath(filename));
301 
302   std::ifstream ifstr;
303   String regex;
304   unsigned lineno = 0;
305 
306   ifstr.open(path.c_str());
307   if (!ifstr) {
308     CacheKeyError("failed to load classifier '%s' from '%s'", classname.c_str(), path.c_str());
309     return false;
310   }
311 
312   MultiPattern *multiPattern;
313   if (blacklist) {
314     multiPattern = new NonMatchingMultiPattern(classname);
315   } else {
316     multiPattern = new MultiPattern(classname);
317   }
318   if (nullptr == multiPattern) {
319     CacheKeyError("failed to allocate classifier '%s'", classname.c_str());
320     return false;
321   }
322 
323   CacheKeyDebug("loading classifier '%s' from '%s'", classname.c_str(), path.c_str());
324 
325   while (std::getline(ifstr, regex)) {
326     Pattern *p;
327     String::size_type pos;
328 
329     ++lineno;
330 
331     // Allow #-prefixed comments.
332     pos = regex.find_first_of('#');
333     if (pos != String::npos) {
334       regex.resize(pos);
335     }
336 
337     if (regex.empty()) {
338       continue;
339     }
340 
341     p = new Pattern();
342 
343     if (nullptr != p && p->init(regex)) {
344       if (blacklist) {
345         CacheKeyDebug("Added pattern '%s' to black list '%s'", regex.c_str(), classname.c_str());
346         multiPattern->add(p);
347       } else {
348         CacheKeyDebug("Added pattern '%s' to white list '%s'", regex.c_str(), classname.c_str());
349         multiPattern->add(p);
350       }
351     } else {
352       CacheKeyError("%s:%u: failed to parse regex '%s'", path.c_str(), lineno, regex.c_str());
353       delete p;
354     }
355   }
356 
357   ifstr.close();
358 
359   if (!multiPattern->empty()) {
360     _classifier.add(multiPattern);
361   } else {
362     delete multiPattern;
363   }
364 
365   return true;
366 }
367 
368 /**
369  * @brief initializes plugin configuration.
370  * @param argc number of plugin parameters
371  * @param argv plugin parameters
372  * @param perRemapConfig boolean showing if this is per-remap config (vs global config).
373  *
374  */
375 bool
init(int argc,const char * argv[],bool perRemapConfig)376 Configs::init(int argc, const char *argv[], bool perRemapConfig)
377 {
378   static const struct option longopt[] = {
379     {const_cast<char *>("exclude-params"), optional_argument, nullptr, 'a'},
380     {const_cast<char *>("include-params"), optional_argument, nullptr, 'b'},
381     {const_cast<char *>("include-match-params"), optional_argument, nullptr, 'c'},
382     {const_cast<char *>("exclude-match-params"), optional_argument, nullptr, 'd'},
383     {const_cast<char *>("sort-params"), optional_argument, nullptr, 'e'},
384     {const_cast<char *>("remove-all-params"), optional_argument, nullptr, 'f'},
385     {const_cast<char *>("include-headers"), optional_argument, nullptr, 'g'},
386     {const_cast<char *>("include-cookies"), optional_argument, nullptr, 'h'},
387     {const_cast<char *>("ua-capture"), optional_argument, nullptr, 'i'},
388     {const_cast<char *>("ua-whitelist"), optional_argument, nullptr, 'j'},
389     {const_cast<char *>("ua-blacklist"), optional_argument, nullptr, 'k'},
390     {const_cast<char *>("static-prefix"), optional_argument, nullptr, 'l'},
391     {const_cast<char *>("capture-prefix"), optional_argument, nullptr, 'm'},
392     {const_cast<char *>("capture-prefix-uri"), optional_argument, nullptr, 'n'},
393     {const_cast<char *>("capture-path"), optional_argument, nullptr, 'o'},
394     {const_cast<char *>("capture-path-uri"), optional_argument, nullptr, 'p'},
395     {const_cast<char *>("remove-prefix"), optional_argument, nullptr, 'q'},
396     {const_cast<char *>("remove-path"), optional_argument, nullptr, 'r'},
397     {const_cast<char *>("separator"), optional_argument, nullptr, 's'},
398     {const_cast<char *>("uri-type"), optional_argument, nullptr, 't'},
399     {const_cast<char *>("key-type"), optional_argument, nullptr, 'u'},
400     {const_cast<char *>("capture-header"), optional_argument, nullptr, 'v'},
401     {const_cast<char *>("canonical-prefix"), optional_argument, nullptr, 'w'},
402     /* reserve 'z' for 'config' files */
403     {nullptr, 0, nullptr, 0},
404   };
405 
406   bool status = true;
407 
408   /* For remap.config: argv contains the "to" and "from" URLs. Skip the first so that the second one poses as the program name.
409    * For plugin.config: argv contains the plugin shared object name. Don't skip any */
410   if (perRemapConfig) {
411     argc--;
412     argv++;
413   }
414 
415   for (;;) {
416     int opt;
417     opt = getopt_long(argc, const_cast<char *const *>(argv), "", longopt, nullptr);
418 
419     if (opt == -1) {
420       break;
421     }
422     CacheKeyDebug("processing %s", argv[optind - 1]);
423 
424     switch (opt) {
425     case 'a': /* exclude-params */
426       _query.setExclude(optarg);
427       break;
428     case 'b': /* include-params */
429       _query.setInclude(optarg);
430       break;
431     case 'c': /* include-match-params */
432       _query.setIncludePatterns(optarg);
433       break;
434     case 'd': /* exclude-match-params */
435       _query.setExcludePatterns(optarg);
436       break;
437     case 'e': /* sort-params */
438       _query.setSort(optarg);
439       break;
440     case 'f': /* remove-all-params */
441       _query.setRemove(optarg);
442       break;
443     case 'g': /* include-headers */
444       _headers.setInclude(optarg);
445       break;
446     case 'h': /* include-cookies */
447       _cookies.setInclude(optarg);
448       break;
449     case 'i': /* ua-capture */
450       if (!_uaCapture.init(optarg)) {
451         CacheKeyError("failed to initialize User-Agent capture pattern '%s'", optarg);
452         status = false;
453       }
454       break;
455     case 'j': /* ua-whitelist */
456       if (!loadClassifiers(optarg, /* blacklist = */ false)) {
457         CacheKeyError("failed to load User-Agent pattern white-list '%s'", optarg);
458         status = false;
459       }
460       break;
461     case 'k': /* ua-blacklist */
462       if (!loadClassifiers(optarg, /* blacklist = */ true)) {
463         CacheKeyError("failed to load User-Agent pattern black-list '%s'", optarg);
464         status = false;
465       }
466       break;
467     case 'l': /* static-prefix */
468       _prefix.assign(optarg);
469       CacheKeyDebug("prefix='%s'", _prefix.c_str());
470       break;
471     case 'm': /* capture-prefix */
472       if (!_prefixCapture.init(optarg)) {
473         CacheKeyError("failed to initialize prefix URI host:port capture pattern '%s'", optarg);
474         status = false;
475       }
476       break;
477     case 'n': /* capture-prefix-uri */
478       if (!_prefixCaptureUri.init(optarg)) {
479         CacheKeyError("failed to initialize prefix URI capture pattern '%s'", optarg);
480         status = false;
481       }
482       break;
483     case 'o': /* capture-path */
484       if (!_pathCapture.init(optarg)) {
485         CacheKeyError("failed to initialize path capture pattern '%s'", optarg);
486         status = false;
487       }
488       break;
489     case 'p': /* capture-path-uri */
490       if (!_pathCaptureUri.init(optarg)) {
491         CacheKeyError("failed to initialize path URI capture pattern '%s'", optarg);
492         status = false;
493       }
494       break;
495     case 'q': /* remove-prefix */
496       _prefixToBeRemoved = isTrue(optarg);
497       break;
498     case 'r': /* remove-path */
499       _pathToBeRemoved = isTrue(optarg);
500       break;
501     case 's': /* separator */
502       setSeparator(optarg);
503       break;
504     case 't': /* uri-type */
505       setUriType(optarg);
506       break;
507     case 'u': /* key-type */
508       setKeyType(optarg);
509       break;
510     case 'v': /* capture-header */
511       _headers.addCapture(optarg);
512       break;
513     case 'w': /* canonical-prefix */
514       _canonicalPrefix = isTrue(optarg);
515       break;
516     }
517   }
518 
519   status &= finalize();
520 
521   return status;
522 }
523 
524 /**
525  * @brief provides means for post-processing of the plugin parameters to finalize the configuration or to "cache" some of the
526  * decisions for later use.
527  * @return true if successful, false if failure.
528  */
529 bool
finalize()530 Configs::finalize()
531 {
532   if (_keyTypes.empty()) {
533     CacheKeyDebug("setting cache key");
534     _keyTypes = {CACHE_KEY};
535   }
536   return _query.finalize() && _headers.finalize() && _cookies.finalize();
537 }
538 
539 bool
prefixToBeRemoved()540 Configs::prefixToBeRemoved()
541 {
542   return _prefixToBeRemoved;
543 }
544 
545 bool
pathToBeRemoved()546 Configs::pathToBeRemoved()
547 {
548   return _pathToBeRemoved;
549 }
550 
551 bool
canonicalPrefix()552 Configs::canonicalPrefix()
553 {
554   return _canonicalPrefix;
555 }
556 
557 void
setSeparator(const char * arg)558 Configs::setSeparator(const char *arg)
559 {
560   if (nullptr != arg) {
561     _separator.assign(arg);
562   }
563 }
564 
565 const String &
getSeparator()566 Configs::getSeparator()
567 {
568   return _separator;
569 }
570 
571 void
setUriType(const char * arg)572 Configs::setUriType(const char *arg)
573 {
574   if (nullptr != arg) {
575     if (5 == strlen(arg) && 0 == strncasecmp(arg, "remap", 5)) {
576       _uriType = CacheKeyUriType::REMAP;
577       CacheKeyDebug("using remap URI type");
578     } else if (8 == strlen(arg) && 0 == strncasecmp(arg, "pristine", 8)) {
579       _uriType = CacheKeyUriType::PRISTINE;
580       CacheKeyDebug("using pristine URI type");
581     } else {
582       CacheKeyError("unrecognized URI type '%s', using default 'remap'", arg);
583     }
584   } else {
585     CacheKeyError("found an empty URI type, using default 'remap'");
586   }
587 }
588 
589 void
setKeyType(const char * arg)590 Configs::setKeyType(const char *arg)
591 {
592   if (nullptr != arg) {
593     StringVector types;
594     ::commaSeparateString<StringVector>(types, arg);
595 
596     for (auto type : types) {
597       if (9 == type.length() && 0 == strncasecmp(type.c_str(), "cache_key", 9)) {
598         _keyTypes.insert(CacheKeyKeyType::CACHE_KEY);
599         CacheKeyDebug("setting cache key");
600       } else if (20 == type.length() && 0 == strncasecmp(type.c_str(), "parent_selection_url", 20)) {
601         _keyTypes.insert(CacheKeyKeyType::PARENT_SELECTION_URL);
602         CacheKeyDebug("setting parent selection URL");
603       } else {
604         CacheKeyError("unrecognized key type '%s', using default 'cache_key'", arg);
605       }
606     }
607   } else {
608     CacheKeyError("found an empty key type, using default 'cache_key'");
609   }
610 }
611 
612 CacheKeyUriType
getUriType()613 Configs::getUriType()
614 {
615   return _uriType;
616 }
617 
618 CacheKeyKeyTypeSet &
getKeyType()619 Configs::getKeyType()
620 {
621   return _keyTypes;
622 }
623 
624 const char *
getCacheKeyUriTypeName(CacheKeyUriType type)625 getCacheKeyUriTypeName(CacheKeyUriType type)
626 {
627   switch (type) {
628   case REMAP:
629     return "remap";
630   case PRISTINE:
631     return "pristine";
632   default:
633     return "unknown";
634   }
635 }
636 
637 const char *
getCacheKeyKeyTypeName(CacheKeyKeyType type)638 getCacheKeyKeyTypeName(CacheKeyKeyType type)
639 {
640   switch (type) {
641   case CACHE_KEY:
642     return "cache key";
643   case PARENT_SELECTION_URL:
644     return "parent selection url";
645   default:
646     return "unknown";
647   }
648 }
649