xref: /trafficserver/src/tscore/MatcherUtils.cc (revision 4cfd5a73)
1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 /*****************************************************************************
25  *
26  *  MatcherUtils.cc - Various helper routines used in ControlMatcher
27  *                    and ReverseProxy
28  *
29  *
30  ****************************************************************************/
31 
32 #include "tscore/ink_platform.h"
33 #include "tscore/Diags.h"
34 #include "tscore/ink_memory.h"
35 #include "tscore/ink_inet.h"
36 #include "tscore/ink_assert.h"
37 #include "tscore/MatcherUtils.h"
38 #include "tscore/Tokenizer.h"
39 
40 // char* readIntoBuffer(const char* file_path, const char* module_name,
41 //                          int* read_size_ptr)
42 //
43 //  Attempts to open and read arg file_path into a buffer allocated
44 //   off the heap (via ats_malloc() )  Returns a pointer to the buffer
45 //   is successful and nullptr otherwise.
46 //
47 //  CALLEE is responsible for deallocating the buffer via ats_free()
48 //
49 char *
readIntoBuffer(const char * file_path,const char * module_name,int * read_size_ptr)50 readIntoBuffer(const char *file_path, const char *module_name, int *read_size_ptr)
51 {
52   int fd;
53   struct stat file_info;
54   char *file_buf, *buf;
55   int read_size = 0;
56   int file_size;
57 
58   if (read_size_ptr != nullptr) {
59     *read_size_ptr = 0;
60   }
61   // Open the file for Blocking IO.  We will be reading this
62   //   at start up and infrequently afterward
63   if ((fd = open(file_path, O_RDONLY)) < 0) {
64     Error("%s Can not open %s file : %s", module_name, file_path, strerror(errno));
65     return nullptr;
66   }
67 
68   if (fstat(fd, &file_info) < 0) {
69     Error("%s Can not stat %s file : %s", module_name, file_path, strerror(errno));
70     close(fd);
71     return nullptr;
72   }
73 
74   file_size = file_info.st_size; // number of bytes in file
75 
76   if (file_size < 0) {
77     Error("%s Can not get correct file size for %s file : %" PRId64 "", module_name, file_path, (int64_t)file_info.st_size);
78     close(fd);
79     return nullptr;
80   }
81 
82   ink_assert(file_size >= 0);
83 
84   // Allocate a buffer large enough to hold the entire file
85   //   File size should be small and this makes it easy to
86   //   do two passes on the file
87   file_buf = static_cast<char *>(ats_malloc(file_size + 1));
88   // Null terminate the buffer so that string operations will work
89   file_buf[file_size] = '\0';
90 
91   int ret = 0;
92   buf     = file_buf; // working pointer
93 
94   // loop over read, trying to read in as much as we can each time.
95   while (file_size > read_size) {
96     ret = read(fd, buf, file_size - read_size);
97     if (ret <= 0) {
98       break;
99     }
100 
101     buf += ret;
102     read_size += ret;
103   }
104 
105   buf = nullptr; // done with. don't want to accidentally use this instead of file_buf.
106 
107   // Check to make sure that we got the whole file
108   if (ret < 0) {
109     Error("%s Read of %s file failed : %s", module_name, file_path, strerror(errno));
110     ats_free(file_buf);
111     file_buf = nullptr;
112   } else if (read_size < file_size) {
113     // Didn't get the whole file, drop everything. We don't want to return
114     //   something partially read because, ie. with configs, the behaviour
115     //   is undefined.
116     Error("%s Only able to read %d bytes out %d for %s file", module_name, read_size, file_size, file_path);
117     ats_free(file_buf);
118     file_buf = nullptr;
119   }
120 
121   if (file_buf && read_size_ptr) {
122     *read_size_ptr = read_size;
123   }
124 
125   close(fd);
126 
127   return file_buf;
128 }
129 
130 // int unescapifyStr(char* buffer)
131 //
132 //   Unescapifies a URL without a making a copy.
133 //    The passed in string is modified
134 //
135 int
unescapifyStr(char * buffer)136 unescapifyStr(char *buffer)
137 {
138   char *read  = buffer;
139   char *write = buffer;
140   char subStr[3];
141 
142   subStr[2] = '\0';
143   while (*read != '\0') {
144     if (*read == '%' && *(read + 1) != '\0' && *(read + 2) != '\0') {
145       subStr[0] = *(++read);
146       subStr[1] = *(++read);
147       *write    = static_cast<char>(strtol(subStr, (char **)nullptr, 16));
148       read++;
149       write++;
150     } else if (*read == '+') {
151       *write = ' ';
152       write++;
153       read++;
154     } else {
155       *write = *read;
156       write++;
157       read++;
158     }
159   }
160   *write = '\0';
161 
162   return (write - buffer);
163 }
164 
165 const char *
ExtractIpRange(char * match_str,in_addr_t * min,in_addr_t * max)166 ExtractIpRange(char *match_str, in_addr_t *min, in_addr_t *max)
167 {
168   IpEndpoint ip_min, ip_max;
169   const char *zret = ExtractIpRange(match_str, &ip_min.sa, &ip_max.sa);
170   if (nullptr == zret) { // success
171     if (ats_is_ip4(&ip_min) && ats_is_ip4(&ip_max)) {
172       if (min) {
173         *min = ntohl(ats_ip4_addr_cast(&ip_min));
174       }
175       if (max) {
176         *max = ntohl(ats_ip4_addr_cast(&ip_max));
177       }
178     } else {
179       zret = "The addresses were not IPv4 addresses.";
180     }
181   }
182   return zret;
183 }
184 
185 //   char* ExtractIpRange(char* match_str, sockaddr* addr1,
186 //                         sockaddr* addr2)
187 //
188 //   Attempts to extract either an Ip Address or an IP Range
189 //     from match_str.  The range should be two addresses
190 //     separated by a hyphen and no spaces
191 //
192 //   If the extraction is successful, sets addr1 and addr2
193 //     to the extracted values (in the case of a single
194 //     address addr2 = addr1) and returns nullptr
195 //
196 //   If the extraction fails, returns a static string
197 //     that describes the reason for the error.
198 //
199 const char *
ExtractIpRange(char * match_str,sockaddr * addr1,sockaddr * addr2)200 ExtractIpRange(char *match_str, sockaddr *addr1, sockaddr *addr2)
201 {
202   Tokenizer rangeTok("-/");
203   bool mask = strchr(match_str, '/') != nullptr;
204   int mask_bits;
205   int mask_val;
206   int numToks;
207   IpEndpoint la1, la2;
208 
209   // Extract the IP addresses from match data
210   numToks = rangeTok.Initialize(match_str, SHARE_TOKS);
211 
212   if (numToks < 0) {
213     return "no IP address given";
214   } else if (numToks > 2) {
215     return "malformed IP range";
216   }
217 
218   if (0 != ats_ip_pton(rangeTok[0], &la1.sa)) {
219     return "malformed IP address";
220   }
221 
222   // Handle a IP range
223   if (numToks == 2) {
224     if (mask) {
225       if (!ats_is_ip4(&la1)) {
226         return "Masks supported only for IPv4";
227       }
228       // coverity[secure_coding]
229       if (sscanf(rangeTok[1], "%d", &mask_bits) != 1) {
230         return "bad mask specification";
231       }
232 
233       if (!(mask_bits >= 0 && mask_bits <= 32)) {
234         return "invalid mask specification";
235       }
236 
237       if (mask_bits == 32) {
238         mask_val = 0;
239       } else {
240         mask_val = htonl(0xffffffff >> mask_bits);
241       }
242       in_addr_t a = ats_ip4_addr_cast(&la1);
243       ats_ip4_set(&la2, a | mask_val);
244       ats_ip4_set(&la1, a & (mask_val ^ 0xffffffff));
245 
246     } else {
247       if (0 != ats_ip_pton(rangeTok[1], &la2)) {
248         return "malformed ip address at range end";
249       }
250     }
251 
252     if (1 == ats_ip_addr_cmp(&la1.sa, &la2.sa)) {
253       return "range start greater than range end";
254     }
255 
256     ats_ip_copy(addr2, &la2);
257   } else {
258     ats_ip_copy(addr2, &la1);
259   }
260 
261   ats_ip_copy(addr1, &la1);
262   return nullptr;
263 }
264 
265 // char* tokLine(char* buf, char** last, char cont)
266 //
267 //  Similar to strtok_r but only tokenizes on '\n'
268 //   and will return tokens that are empty strings
269 //
270 char *
tokLine(char * buf,char ** last,char cont)271 tokLine(char *buf, char **last, char cont)
272 {
273   char *start;
274   char *cur;
275   char *prev = nullptr;
276 
277   if (buf != nullptr) {
278     start = cur = buf;
279     *last       = buf;
280   } else {
281     start = cur = (*last) + 1;
282   }
283 
284   while (*cur != '\0') {
285     if (*cur == '\n') {
286       if (cont != '\0' && prev != nullptr && *prev == cont) {
287         *prev = ' ';
288         *cur  = ' ';
289       } else {
290         *cur  = '\0';
291         *last = cur;
292         return start;
293       }
294     }
295     prev = cur++;
296   }
297 
298   // Return the last line even if it does
299   //  not end in a newline
300   if (cur > (*last + 1)) {
301     *last = cur - 1;
302     return start;
303   }
304 
305   return nullptr;
306 }
307 
308 const char *matcher_type_str[] = {"invalid", "host", "domain", "ip", "url_regex", "url", "host_regex"};
309 
310 // char* processDurationString(char* str, int* seconds)
311 //
312 //   Take a duration sting which is composed of
313 //      digits followed by a unit specifier
314 //         w - week
315 //         d - day
316 //         h - hour
317 //         m - min
318 //         s - sec
319 //
320 //   Trailing digits without a specifier are
321 //    assumed to be seconds
322 //
323 //   Returns nullptr on success and a static
324 //    error string on failure
325 //
326 const char *
processDurationString(char * str,int * seconds)327 processDurationString(char *str, int *seconds)
328 {
329   char *s       = str;
330   char *current = str;
331   char unit;
332   int tmp;
333   int multiplier;
334   int result = 0;
335   int len;
336 
337   if (str == nullptr) {
338     return "Missing time";
339   }
340 
341   len = strlen(str);
342   for (int i = 0; i < len; i++) {
343     if (!ParseRules::is_digit(*current)) {
344       // Make sure there is a time to proces
345       if (current == s) {
346         return "Malformed time";
347       }
348 
349       unit = *current;
350 
351       switch (unit) {
352       case 'w':
353         multiplier = 7 * 24 * 60 * 60;
354         break;
355       case 'd':
356         multiplier = 24 * 60 * 60;
357         break;
358       case 'h':
359         multiplier = 60 * 60;
360         break;
361       case 'm':
362         multiplier = 60;
363         break;
364       case 's':
365         multiplier = 1;
366         break;
367       case '-':
368         return "Negative time not permitted";
369       default:
370         return "Invalid time unit specified";
371       }
372 
373       *current = '\0';
374 
375       // coverity[secure_coding]
376       if (sscanf(s, "%d", &tmp) != 1) {
377         // Really should not happen since everything
378         //   in the string is digit
379         ink_assert(0);
380         return "Malformed time";
381       }
382 
383       result += (multiplier * tmp);
384       s = current + 1;
385     }
386     current++;
387   }
388 
389   // Read any trailing seconds
390   if (current != s) {
391     // coverity[secure_coding]
392     if (sscanf(s, "%d", &tmp) != 1) {
393       // Really should not happen since everything
394       //   in the string is digit
395       ink_assert(0);
396       return "Malformed time";
397     } else {
398       result += tmp;
399     }
400   }
401   // We rolled over the int
402   if (result < 0) {
403     return "Time too big";
404   }
405 
406   *seconds = result;
407   return nullptr;
408 }
409 
410 const matcher_tags http_dest_tags = {"dest_host", "dest_domain", "dest_ip", "url_regex", "url", "host_regex", true};
411 
412 const matcher_tags ip_allow_src_tags = {nullptr, nullptr, "src_ip", nullptr, nullptr, nullptr, false};
413 
414 const matcher_tags ip_allow_dest_tags = {nullptr, nullptr, "dest_ip", nullptr, nullptr, nullptr, true};
415 
416 const matcher_tags socks_server_tags = {nullptr, nullptr, "dest_ip", nullptr, nullptr, nullptr, false};
417 
418 // char* parseConfigLine(char* line, matcher_line* p_line,
419 //                       const matcher_tags* tags)
420 //
421 //   Parse out a config file line suitable for passing to
422 //    a ControlMatcher object
423 //
424 //   If successful, nullptr is returned.  If unsuccessful,
425 //     a static error string is returned
426 //
427 const char *
parseConfigLine(char * line,matcher_line * p_line,const matcher_tags * tags)428 parseConfigLine(char *line, matcher_line *p_line, const matcher_tags *tags)
429 {
430   enum pState {
431     FIND_LABEL,
432     PARSE_LABEL,
433     PARSE_VAL,
434     START_PARSE_VAL,
435     CONSUME,
436   };
437 
438   pState state      = FIND_LABEL;
439   bool inQuote      = false;
440   char *copyForward = nullptr;
441   char *copyFrom    = nullptr;
442   char *s           = line;
443   char *label       = nullptr;
444   char *val         = nullptr;
445   int num_el        = 0;
446   matcher_type type = MATCH_NONE;
447 
448   // Zero out the parsed line structure
449   memset(p_line, 0, sizeof(matcher_line));
450 
451   if (*s == '\0') {
452     return nullptr;
453   }
454 
455   do {
456     switch (state) {
457     case FIND_LABEL:
458       if (!isspace(*s)) {
459         state = PARSE_LABEL;
460         label = s;
461       }
462       s++;
463       break;
464     case PARSE_LABEL:
465       if (*s == '=') {
466         *s    = '\0';
467         state = START_PARSE_VAL;
468       }
469       s++;
470       break;
471     case START_PARSE_VAL:
472       // Init state needed for parsing values
473       copyForward = nullptr;
474       copyFrom    = nullptr;
475 
476       if (*s == '"') {
477         inQuote = true;
478         val     = s + 1;
479       } else if (*s == '\\') {
480         inQuote = false;
481         val     = s + 1;
482       } else {
483         inQuote = false;
484         val     = s;
485       }
486 
487       if (inQuote == false && (isspace(*s) || *(s + 1) == '\0')) {
488         state = CONSUME;
489       } else {
490         state = PARSE_VAL;
491       }
492 
493       s++;
494       break;
495     case PARSE_VAL:
496       if (inQuote == true) {
497         if (*s == '\\') {
498           // The next character is escaped
499           //
500           // To remove the escaped character
501           // we need to copy
502           //  the rest of the entry over it
503           //  but since we do not know where the
504           //  end is right now, defer the work
505           //  into the future
506 
507           if (copyForward != nullptr) {
508             // Perform the prior copy forward
509             int bytesCopy = s - copyFrom;
510             memcpy(copyForward, copyFrom, s - copyFrom);
511             ink_assert(bytesCopy > 0);
512 
513             copyForward += bytesCopy;
514             copyFrom = s + 1;
515           } else {
516             copyForward = s;
517             copyFrom    = s + 1;
518           }
519 
520           // Scroll past the escape character
521           s++;
522 
523           // Handle the case that places us
524           //  at the end of the file
525           if (*s == '\0') {
526             break;
527           }
528         } else if (*s == '"') {
529           state = CONSUME;
530           *s    = '\0';
531         }
532       } else if ((*s == '\\' && ParseRules::is_digit(*(s + 1))) || !ParseRules::is_char(*s)) {
533         // INKqa10511
534         // traffic server need to handle unicode characters
535         // right now ignore the entry
536         return "Unrecognized encoding scheme";
537       } else if (isspace(*s)) {
538         state = CONSUME;
539         *s    = '\0';
540       }
541 
542       s++;
543 
544       // If we are now at the end of the line,
545       //   we need to consume final data
546       if (*s == '\0') {
547         state = CONSUME;
548       }
549       break;
550     case CONSUME:
551       break;
552     }
553 
554     if (state == CONSUME) {
555       // See if there are any quote copy overs
556       //   we've pushed into the future
557       if (copyForward != nullptr) {
558         int toCopy = (s - 1) - copyFrom;
559         memcpy(copyForward, copyFrom, toCopy);
560         *(copyForward + toCopy) = '\0';
561       }
562 
563       p_line->line[0][num_el] = label;
564       p_line->line[1][num_el] = val;
565       type                    = MATCH_NONE;
566 
567       // Check to see if this the primary specifier we are looking for
568       if (tags->match_ip && strcasecmp(tags->match_ip, label) == 0) {
569         type = MATCH_IP;
570       } else if (tags->match_host && strcasecmp(tags->match_host, label) == 0) {
571         type = MATCH_HOST;
572       } else if (tags->match_domain && strcasecmp(tags->match_domain, label) == 0) {
573         type = MATCH_DOMAIN;
574       } else if (tags->match_regex && strcasecmp(tags->match_regex, label) == 0) {
575         type = MATCH_REGEX;
576       } else if (tags->match_url && strcasecmp(tags->match_url, label) == 0) {
577         type = MATCH_URL;
578       } else if (tags->match_host_regex && strcasecmp(tags->match_host_regex, label) == 0) {
579         type = MATCH_HOST_REGEX;
580       }
581       // If this a destination tag, use it
582       if (type != MATCH_NONE) {
583         // Check to see if this second destination specifier
584         if (p_line->type != MATCH_NONE) {
585           if (tags->dest_error_msg == false) {
586             return "Multiple Sources Specified";
587           } else {
588             return "Multiple Destinations Specified";
589           }
590         } else {
591           p_line->dest_entry = num_el;
592           p_line->type       = type;
593         }
594       }
595       num_el++;
596 
597       if (num_el > MATCHER_MAX_TOKENS) {
598         return "Malformed line: Too many tokens";
599       }
600 
601       state = FIND_LABEL;
602     }
603   } while (*s != '\0');
604 
605   p_line->num_el = num_el;
606 
607   if (state != CONSUME && state != FIND_LABEL) {
608     return "Malformed entry";
609   }
610 
611   if (!tags->empty() && p_line->type == MATCH_NONE) {
612     if (tags->dest_error_msg == false) {
613       return "No source specifier";
614     } else {
615       return "No destination specifier";
616     }
617   }
618 
619   return nullptr;
620 }
621