xref: /trafficserver/src/tscore/HostLookup.cc (revision 9d51c2f9)
1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 /*****************************************************************************
25  *
26  *  HostLookup.cc - Implementation of a hostname/domainname matcher
27  *
28  *
29  ****************************************************************************/
30 
31 #include "tscore/ink_inet.h"
32 #include "tscore/ink_assert.h"
33 #include "tscore/HostLookup.h"
34 #include "tscpp/util/TextView.h"
35 
36 #include <string_view>
37 #include <array>
38 #include <memory>
39 
40 using std::string_view;
41 using ts::TextView;
42 
43 namespace
44 {
45 // bool domaincmp(const char* hostname, const char* domain)
46 //
47 //    Returns true if hostname is in domain
48 //
49 bool
domaincmp(string_view hostname,string_view domain)50 domaincmp(string_view hostname, string_view domain)
51 {
52   // Check to see if were passed empty strings for either
53   //  argument.  Empty strings do not match anything
54   //
55   if (domain.empty() || hostname.empty()) {
56     return false;
57   }
58 
59   // Walk through both strings backward - explicit declares, need these post-loop.
60   auto d_idx = domain.rbegin();
61   auto h_idx = hostname.rbegin();
62   // Trailing dots should be ignored since they are optional
63   if (*d_idx == '.') {
64     ++d_idx;
65   }
66   if (*h_idx == '.') {
67     ++h_idx;
68   }
69   while (d_idx != domain.rend() && h_idx != hostname.rend()) {
70     // If we still have characters left on both strings and they do not match, matching fails
71     if (tolower(*d_idx) != tolower(*h_idx)) {
72       return false;
73     }
74 
75     ++d_idx;
76     ++h_idx;
77   };
78 
79   // There are three possible cases that could have gotten us
80   //  here
81   //
82   //     Case 1: we ran out of both strings
83   //     Case 2: we ran out of domain but not hostname
84   //     Case 3: we ran out of hostname but not domain
85   //
86   if (d_idx == domain.rend()) {
87     // If end of hostname also, then case 1 match.
88     // Otherwise it's a case 2 match iff last character match was at a domain boundary.
89     // (ex: avoid 'www.inktomi.ecom' matching 'com')
90     // note: d_idx[-1] == '.' --> h_idx[-1] == '.' because of match check in loop.
91     return h_idx == hostname.rend() || *h_idx == '.' || *(d_idx - 1) == '.';
92   } else if (h_idx == hostname.rend()) {
93     // This covers the case 3 (a very unusual case)
94     //  ex: example.com needing to match .example.com
95     return *d_idx == '.' && d_idx + 1 == domain.rend();
96   }
97 
98   ink_assert(!"Should not get here");
99   return false;
100 }
101 
102 //   Similar to strcasecmp except that if one string has a
103 //     trailing '.' and the other one does not
104 //     then they are equal
105 //
106 //   Meant to compare to host names and
107 //     take in out account that www.example.com
108 //     and www.example.com. are the same host
109 //     since the trailing dot is optional
110 //
111 int
hostcmp(string_view lhs,string_view rhs)112 hostcmp(string_view lhs, string_view rhs)
113 {
114   ink_assert(!lhs.empty());
115   ink_assert(!rhs.empty());
116 
117   // ignore any trailing .
118   if (lhs.back() == '.') {
119     lhs.remove_suffix(1);
120   }
121   if (rhs.back() == '.') {
122     rhs.remove_suffix(1);
123   }
124 
125   auto lidx = lhs.begin();
126   auto ridx = rhs.begin();
127   while (lidx != lhs.end() && ridx != rhs.end()) {
128     char lc(tolower(*lidx));
129     char rc(tolower(*ridx));
130     if (lc < rc) {
131       return -1;
132     } else if (lc > rc) {
133       return 1;
134     }
135     ++lidx;
136     ++ridx;
137   }
138   return lidx != lhs.end() ? 1 : ridx != rhs.end() ? -1 : 0;
139 }
140 
141 } // namespace
142 
143 // static const unsigned char asciiToTable[256]
144 //
145 //   Used to Map Legal hostname characters into
146 //     to indexes used by char_table to
147 //     index strings
148 //
149 //   Legal hostname characters are 0-9, A-Z, a-z and '-'
150 //   '_' is also included although it is not in the spec (RFC 883)
151 //
152 //   Uppercase and lowercase "a-z" both map to same indexes
153 //     since hostnames are not case sensitive
154 //
155 //   Illegal characters map to 255
156 //
157 static const unsigned char asciiToTable[256] = {
158   255, 255, 255, 255, 255, 255, 255, 255, // 0 - 7
159   255, 255, 255, 255, 255, 255, 255, 255, // 8 - 15
160   255, 255, 255, 255, 255, 255, 255, 255, // 16 - 23
161   255, 255, 255, 255, 255, 255, 255, 255, // 24 - 31
162   255, 255, 255, 255, 255, 255, 255, 255, // 32 - 39
163   255, 255, 255, 255, 255, 0,   255, 255, // 40 - 47 ('-')
164   1,   2,   3,   4,   5,   6,   7,   8,   // 48 - 55 (0-7)
165   9,   10,  255, 255, 255, 255, 255, 255, // 56 - 63 (8-9)
166   255, 11,  12,  13,  14,  15,  16,  17,  // 64 - 71 (A-G)
167   18,  19,  20,  21,  22,  23,  24,  25,  // 72 - 79 (H-O)
168   26,  27,  28,  29,  30,  31,  32,  33,  // 80 - 87 (P-W)
169   34,  35,  36,  255, 255, 255, 255, 37,  // 88 - 95 (X-Z, '_')
170   255, 11,  12,  13,  14,  15,  16,  17,  // 96 - 103 (a-g)
171   18,  19,  20,  21,  22,  23,  24,  25,  // 104 - 111 (h-0)
172   26,  27,  28,  29,  30,  31,  32,  33,  // 112 - 119 (p-w)
173   34,  35,  36,  255, 255, 255, 255, 255, // 120 - 127 (x-z)
174   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
175   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
176   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
177   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
178   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255};
179 
180 // Number of legal characters in the asciiToTable array
181 static const int numLegalChars = 38;
182 
183 // struct CharIndexBlock
184 //
185 //   Used by class CharIndex.  Forms a single level in CharIndex tree
186 //
187 struct CharIndexBlock {
188   struct Item {
189     HostBranch *branch{nullptr};
190     std::unique_ptr<CharIndexBlock> block;
191   };
192   std::array<Item, numLegalChars> array;
193 };
194 
195 // class CharIndex - A constant time string matcher intended for
196 //    short strings in a sparsely populated DNS partition
197 //
198 //    Creates a look up table for character in data string
199 //
200 //    Mapping from character to table index is done by
201 //      asciiToTable[]
202 //
203 //    The illegalKey hash table is side structure for any
204 //     entries that contain illegal hostname characters that
205 //     we can not index into the normal table
206 //
207 //    Example: com
208 //      c maps to 13, o maps to 25, m maps to 23
209 //
210 //                             CharIndexBlock         CharIndexBlock
211 //                             -----------         ------------
212 //                           0 |    |    |         |    |     |
213 //                           . |    |    |         |    |     |
214 //    CharIndexBlock         . |    |    |         |    |     |
215 //    ----------             . |    |    |         |    |     |
216 //  0 |   |    |             . |    |    |   |-->23| ptr|  0  |  (ptr is to the
217 //  . |   |    |   |-------->25| 0  |   -----|     |    |     |   hostBranch for
218 //  . |   |    |   |         . |    |    |         |    |     |   domain com)
219 // 13 | 0 |  --|---|         . |    |    |         |    |     |
220 //  . |   |    |             . |    |    |         |    |     |
221 //  . |   |    |               |    |    |         |    |     |
222 //  . |   |    |               |    |    |         |    |     |
223 //    |   |    |               -----------         -----------|
224 //    |   |    |
225 //    |   |    |
226 //    |   |    |
227 //    |--------|
228 //
229 //
230 //
231 class CharIndex
232 {
233 public:
234   struct iterator : public std::iterator<std::forward_iterator_tag, HostBranch, int> {
235     using self_type = iterator;
236 
237     struct State {
238       int index{-1};
239       CharIndexBlock *block{nullptr};
240     };
241 
iteratorCharIndex::iterator242     iterator() { q.reserve(HOST_TABLE_DEPTH * 2); } // was 6, guessing that was twice the table depth.
243 
244     value_type *operator->();
245     value_type &operator*();
246     bool operator==(self_type const &that) const;
247     bool operator!=(self_type const &that) const;
248     self_type &operator++();
249 
250     // Current level.
251     int cur_level{-1};
252 
253     // Where we got the last element from
254     State state;
255 
256     //  Queue of the above levels
257     std::vector<State> q;
258 
259     // internal methods
260     self_type &advance();
261   };
262 
263   ~CharIndex();
264   void Insert(string_view match_data, HostBranch *toInsert);
265   HostBranch *Lookup(string_view match_data);
266 
267   iterator begin();
268   iterator end();
269 
270 private:
271   CharIndexBlock root;
272   using Table = std::unordered_map<string_view, HostBranch *>;
273   std::unique_ptr<Table> illegalKey;
274 };
275 
~CharIndex()276 CharIndex::~CharIndex()
277 {
278   // clean up the illegal key table.
279   if (illegalKey) {
280     for (auto &item : *illegalKey) {
281       delete item.second;
282     }
283   }
284 }
285 
286 // void CharIndex::Insert(const char* match_data, HostBranch* toInsert)
287 //
288 //   Places a binding for match_data to toInsert into the index
289 //
290 void
Insert(string_view match_data,HostBranch * toInsert)291 CharIndex::Insert(string_view match_data, HostBranch *toInsert)
292 {
293   unsigned char index;
294   CharIndexBlock *cur = &root;
295 
296   ink_assert(!match_data.empty());
297 
298   if (std::any_of(match_data.begin(), match_data.end(), [](unsigned char c) { return asciiToTable[c] == 255; })) {
299     // Insert into illegals hash table
300     if (illegalKey == nullptr) {
301       illegalKey.reset(new Table);
302     }
303     toInsert->key = match_data;
304     illegalKey->emplace(toInsert->key, toInsert);
305   } else {
306     while (true) {
307       index = asciiToTable[static_cast<unsigned char>(match_data.front())];
308 
309       // Check to see if are at the level we supposed be at
310       if (match_data.size() == 1) {
311         // The slot should always be empty, no duplicate keys are allowed
312         ink_assert(cur->array[index].branch == nullptr);
313         cur->array[index].branch = toInsert;
314         break;
315       } else {
316         // We need to find the next level in the table
317 
318         CharIndexBlock *next = cur->array[index].block.get();
319 
320         // Check to see if we need to expand the table
321         if (next == nullptr) {
322           next = new CharIndexBlock;
323           cur->array[index].block.reset(next);
324         }
325         cur = next;
326       }
327       match_data.remove_prefix(1);
328     }
329   }
330 }
331 
332 // HostBranch* CharIndex::Lookup(const char* match_data)
333 //
334 //  Searches the CharIndex on key match_data
335 //    If there is a binding for match_data, returns a pointer to it
336 //    otherwise a nullptr pointer is returned
337 //
338 HostBranch *
Lookup(string_view match_data)339 CharIndex::Lookup(string_view match_data)
340 {
341   if (match_data.empty()) {
342     return nullptr;
343   }
344 
345   if (std::any_of(match_data.begin(), match_data.end(), [](unsigned char c) { return asciiToTable[c] == 255; })) {
346     if (illegalKey) {
347       auto spot = illegalKey->find(match_data);
348       if (spot != illegalKey->end()) {
349         return spot->second;
350       }
351     }
352     return nullptr;
353   }
354 
355   // Invariant: No invalid characters.
356   CharIndexBlock *cur = &root;
357   while (cur) {
358     unsigned char index = asciiToTable[static_cast<unsigned char>(match_data.front())];
359 
360     // Check to see if we are looking for the next level or
361     //    a HostBranch
362     if (match_data.size() == 1) {
363       return cur->array[index].branch;
364     } else {
365       cur = cur->array[index].block.get();
366     }
367     match_data.remove_prefix(1);
368   }
369   return nullptr;
370 }
371 
372 auto
begin()373 CharIndex::begin() -> iterator
374 {
375   iterator zret;
376   zret.state.block = &root;
377   zret.state.index = 0;
378   zret.cur_level   = 0;
379   if (root.array[0].branch == nullptr) {
380     zret.advance();
381   }
382   return zret;
383 }
384 
385 auto
end()386 CharIndex::end() -> iterator
387 {
388   return {};
389 }
390 
operator ->()391 auto CharIndex::iterator::operator-> () -> value_type *
392 {
393   ink_assert(state.block != nullptr); // clang!
394   return state.block->array[state.index].branch;
395 }
396 
operator *()397 auto CharIndex::iterator::operator*() -> value_type &
398 {
399   ink_assert(state.block != nullptr); // clang!
400   return *(state.block->array[state.index].branch);
401 }
402 
403 //
404 // HostBranch* CharIndex::iter_next(CharIndexIterState* s)
405 //
406 //    Finds the next element in the char index and returns
407 //      a pointer to it.  If there are no more elements, nullptr
408 //      is returned
409 //
410 auto
advance()411 CharIndex::iterator::advance() -> self_type &
412 {
413   bool check_branch_p{false}; // skip local branch on the first loop.
414   do {
415     // Check to see if we need to go back up a level
416     if (state.index >= numLegalChars) {
417       if (cur_level <= 0) {    // No more levels so bail out
418         state.block = nullptr; // carefully make this @c equal to the end iterator.
419         state.index = -1;
420         break;
421       } else { // Go back up to a stored level
422         state = q[--cur_level];
423         ++state.index; // did that one before descending.
424       }
425     } else if (check_branch_p && state.block->array[state.index].branch != nullptr) {
426       //  Note: we check for a branch on this level before a descending a level so that when we come back up
427       //  this level will be done with this index.
428       break;
429     } else if (state.block->array[state.index].block != nullptr) {
430       // There is a lower level block to iterate over, store our current state and descend
431       if (static_cast<int>(q.size()) <= cur_level) {
432         q.push_back(state);
433       } else {
434         q[cur_level] = state;
435       }
436       cur_level++;
437       state.block = state.block->array[state.index].block.get();
438       state.index = 0;
439     } else {
440       ++state.index;
441     }
442     check_branch_p = true;
443   } while (true);
444   return *this;
445 }
446 
447 auto
operator ++()448 CharIndex::iterator::operator++() -> self_type &
449 {
450   return this->advance();
451 }
452 
453 bool
operator ==(const self_type & that) const454 CharIndex::iterator::operator==(const self_type &that) const
455 {
456   return this->state.block == that.state.block && this->state.index == that.state.index;
457 }
458 
459 bool
operator !=(const self_type & that) const460 CharIndex::iterator::operator!=(const self_type &that) const
461 {
462   return !(*this == that);
463 }
464 
465 // class HostArray
466 //
467 //   Is a fixed size array for holding HostBranch*
468 //   Allows only sequential access to data
469 //
470 
471 class HostArray
472 {
473   /// Element of the @c HostArray.
474   struct Item {
475     HostBranch *branch{nullptr}; ///< Next branch.
476     std::string match_data;      ///< Match data for that branch.
477   };
478   using Array = std::array<Item, HOST_ARRAY_MAX>;
479 
480 public:
481   bool Insert(string_view match_data_in, HostBranch *toInsert);
482   HostBranch *Lookup(string_view match_data_in, bool bNotProcess);
483 
484   Array::iterator
begin()485   begin()
486   {
487     return array.begin();
488   }
489   Array::iterator
end()490   end()
491   {
492     return array.begin() + _size;
493   }
494 
495 private:
496   int _size{0}; // number of elements currently in the array
497   Array array;
498 };
499 
500 // bool HostArray::Insert(const char* match_data_in, HostBranch* toInsert)
501 //
502 //    Places toInsert into the array with key match_data if there
503 //     is adequate space, in which case true is returned
504 //    If space is inadequate, false is returned and nothing is inserted
505 //
506 bool
Insert(string_view match_data,HostBranch * toInsert)507 HostArray::Insert(string_view match_data, HostBranch *toInsert)
508 {
509   if (_size < static_cast<int>(array.size())) {
510     array[_size].branch     = toInsert;
511     array[_size].match_data = match_data;
512     ++_size;
513     return true;
514   }
515   return false;
516 }
517 
518 // HostBranch* HostArray::Lookup(const char* match_data_in)
519 //
520 //   Looks for key match_data_in.  If a binding is found,
521 //     returns HostBranch* found to the key, otherwise
522 //     nullptr is returned
523 //
524 HostBranch *
Lookup(string_view match_data_in,bool bNotProcess)525 HostArray::Lookup(string_view match_data_in, bool bNotProcess)
526 {
527   HostBranch *r = nullptr;
528   string_view pMD;
529 
530   for (int i = 0; i < _size; i++) {
531     pMD = array[i].match_data;
532 
533     if (bNotProcess && '!' == pMD.front()) {
534       pMD.remove_prefix(1);
535       if (pMD.empty()) {
536         continue;
537       }
538 
539       if (pMD == match_data_in) {
540         r = array[i].branch;
541       }
542     } else if (pMD == match_data_in) {
543       r = array[i].branch;
544       break;
545     }
546   }
547   return r;
548 }
549 
550 // maps enum LeafType to strings
551 const char *LeafTypeStr[] = {"Leaf Invalid", "Host (Partial)", "Host (Full)", "Domain (Full)", "Domain (Partial)"};
552 
553 // HostBranch::~HostBranch()
554 //
555 //   Recursive delete all host branches below us
556 //
~HostBranch()557 HostBranch::~HostBranch()
558 {
559   switch (type) {
560   case HOST_TERMINAL:
561     break;
562   case HOST_HASH: {
563     HostTable *ht = next_level._table;
564     for (auto &item : *ht) {
565       delete item.second;
566     }
567     delete ht;
568   } break;
569   case HOST_INDEX: {
570     CharIndex *ci = next_level._index;
571     for (auto &branch : *ci) {
572       delete &branch;
573     }
574     delete ci;
575   } break;
576   case HOST_ARRAY:
577     for (auto &item : *next_level._array) {
578       delete item.branch;
579     }
580     delete next_level._array;
581     break;
582   }
583 }
584 
HostLookup(string_view name)585 HostLookup::HostLookup(string_view name) : matcher_name(name) {}
586 
587 void
Print()588 HostLookup::Print()
589 {
590   Print([](void *) -> void {});
591 }
592 
593 void
Print(PrintFunc const & f)594 HostLookup::Print(PrintFunc const &f)
595 {
596   PrintHostBranch(&root, f);
597 }
598 
599 //
600 // void HostLookup::PrintHostBranch(HostBranch* hb, HostLookupPrintFunc f)
601 //
602 //   Recursively traverse the matching tree rooted at arg hb
603 //     and print out each element
604 //
605 void
PrintHostBranch(HostBranch * hb,PrintFunc const & f)606 HostLookup::PrintHostBranch(HostBranch *hb, PrintFunc const &f)
607 {
608   for (auto curIndex : hb->leaf_indices) {
609     auto &leaf{leaf_array[curIndex]};
610     printf("\t\t%s for %.*s\n", LeafTypeStr[leaf.type], static_cast<int>(leaf.match.size()), leaf.match.data());
611     f(leaf_array[curIndex].opaque_data);
612   }
613 
614   switch (hb->type) {
615   case HostBranch::HOST_TERMINAL:
616     ink_assert(hb->next_level._ptr == nullptr);
617     break;
618   case HostBranch::HOST_HASH:
619     for (auto &branch : *(hb->next_level._table)) {
620       PrintHostBranch(branch.second, f);
621     }
622     break;
623   case HostBranch::HOST_INDEX:
624     for (auto &branch : *(hb->next_level._index)) {
625       PrintHostBranch(&branch, f);
626     }
627     break;
628   case HostBranch::HOST_ARRAY:
629     for (auto &item : *(hb->next_level._array)) {
630       PrintHostBranch(item.branch, f);
631     }
632     break;
633   }
634 }
635 
636 //
637 // HostBranch* HostLookup::TableNewLevel(HostBranch* from, const char* level_data)
638 //
639 //    Creates the next level structure in arg from
640 //       Creates a HostBranch for level_data, inserts it in to the
641 //         the next_level structure and returns a pointer to the new
642 //         HostBranch
643 //
644 HostBranch *
TableNewLevel(HostBranch * from,string_view level_data)645 HostLookup::TableNewLevel(HostBranch *from, string_view level_data)
646 {
647   ink_assert(from->type == HostBranch::HOST_TERMINAL);
648 
649   // Use the CharIndex for high speed matching at the first level of
650   //   the table.  The first level is short strings, ie: com, edu, jp, fr
651   if (from->level_idx == 0) {
652     from->type              = HostBranch::HOST_INDEX;
653     from->next_level._index = new CharIndex;
654   } else {
655     from->type              = HostBranch::HOST_ARRAY;
656     from->next_level._array = new HostArray;
657   }
658 
659   return InsertBranch(from, level_data);
660 }
661 
662 //
663 // HostBranch* HostLookup::InsertBranch(HostBranch* insert_to, const char* level_data)
664 //
665 //
666 //    Abstraction to place a new node for level_data below node
667 //      insert to.  Inserts into any of the data types used by
668 //      by class HostMatcher
669 //
670 HostBranch *
InsertBranch(HostBranch * insert_in,string_view level_data)671 HostLookup::InsertBranch(HostBranch *insert_in, string_view level_data)
672 {
673   HostBranch *new_branch = new HostBranch;
674   new_branch->key        = level_data;
675   new_branch->type       = HostBranch::HOST_TERMINAL;
676   new_branch->level_idx  = insert_in->level_idx + 1;
677 
678   switch (insert_in->type) {
679   case HostBranch::HOST_TERMINAL:
680     // Should not happen
681     ink_release_assert(0);
682     break;
683   case HostBranch::HOST_HASH:
684     insert_in->next_level._table->emplace(new_branch->key, new_branch);
685     break;
686   case HostBranch::HOST_INDEX:
687     insert_in->next_level._index->Insert(new_branch->key, new_branch);
688     break;
689   case HostBranch::HOST_ARRAY: {
690     auto array = insert_in->next_level._array;
691     if (array->Insert(level_data, new_branch) == false) {
692       // The array is out of space, time to move to a hash table
693       auto ha = insert_in->next_level._array;
694       auto ht = new HostTable;
695       ht->emplace(new_branch->key, new_branch);
696       for (auto &item : *array) {
697         ht->emplace(item.branch->key, item.branch);
698       }
699       // Ring out the old, ring in the new
700       delete ha;
701       insert_in->next_level._table = ht;
702       insert_in->type              = HostBranch::HOST_HASH;
703     }
704   } break;
705   }
706 
707   return new_branch;
708 }
709 
710 // HostBranch* HostLookup::FindNextLevel(HostBranch* from,
711 //                                       const char* level_data)
712 //
713 //   Searches in the branch for  the next level down in the search
714 //     structure bound to level data
715 //   If found returns a pointer to the next level HostBranch,
716 //    otherwise returns nullptr
717 //
718 HostBranch *
FindNextLevel(HostBranch * from,string_view level_data,bool bNotProcess)719 HostLookup::FindNextLevel(HostBranch *from, string_view level_data, bool bNotProcess)
720 {
721   HostBranch *r = nullptr;
722 
723   switch (from->type) {
724   case HostBranch::HOST_TERMINAL:
725     // Should not happen
726     ink_assert(0);
727     break;
728   case HostBranch::HOST_HASH: {
729     auto table = from->next_level._table;
730     auto spot  = table->find(level_data);
731     r          = spot == table->end() ? nullptr : spot->second;
732   } break;
733   case HostBranch::HOST_INDEX:
734     r = from->next_level._index->Lookup(level_data);
735     break;
736   case HostBranch::HOST_ARRAY:
737     r = from->next_level._array->Lookup(level_data, bNotProcess);
738     break;
739   }
740   return r;
741 }
742 
743 // void HostLookup::TableInsert(const char* match_data, int index)
744 //
745 //   Insert the match data, index pair into domain/search table
746 //
747 //   arg index is the index into both Data and HostLeaf array for
748 //    the elements corresponding to match_data
749 //
750 void
TableInsert(string_view match_data,int index,bool domain_record)751 HostLookup::TableInsert(string_view match_data, int index, bool domain_record)
752 {
753   HostBranch *cur = &root;
754   HostBranch *next;
755   TextView match{match_data};
756 
757   // Traverse down the search structure until we either
758   //       Get beyond the fixed number depth of the host table
759   //  OR   We reach the level where the match stops
760   //
761   for (int i = 0; !match.rtrim('.').empty() && i < HOST_TABLE_DEPTH; ++i) {
762     TextView token{match.take_suffix_at('.')};
763 
764     if (cur->next_level._ptr == nullptr) {
765       cur = TableNewLevel(cur, token);
766     } else {
767       next = FindNextLevel(cur, token);
768       if (next == nullptr) {
769         cur = InsertBranch(cur, token);
770       } else {
771         cur = next;
772       }
773     }
774   }
775 
776   // Update the leaf type.  There are three types:
777   //     HOST_PARTIAL - Indicates that part of the hostname name
778   //         was not matched by traversin the search structure since
779   //         it had too elements.  A comparison must be done at the
780   //         leaf node to make sure we have a match
781   //     HOST_COMPLETE - Indicates that the entire domain name
782   //         in the match_data was matched by traversing the search
783   //         structure, no further comparison is necessary
784   //
785   //     DOMAIN_COMPLETE - Indicates that the entire domain name
786   //         in the match_data was matched by traversing the search
787   //         structure, no further comparison is necessary
788   //     DOMAIN_PARTIAL - Indicates that part of the domain name
789   //         was not matched by traversin the search structure since
790   //         it had too elements.  A comparison must be done at the
791   //         leaf node to make sure we have a match
792   if (domain_record == false) {
793     if (match.empty()) {
794       leaf_array[index].type = HostLeaf::HOST_COMPLETE;
795     } else {
796       leaf_array[index].type = HostLeaf::HOST_PARTIAL;
797     }
798   } else {
799     if (match.empty()) {
800       leaf_array[index].type = HostLeaf::DOMAIN_COMPLETE;
801     } else {
802       leaf_array[index].type = HostLeaf::DOMAIN_PARTIAL;
803     }
804   }
805 
806   // Place the index in to leaf array into the match list for this
807   //   HOST_BRANCH
808   cur->leaf_indices.push_back(index);
809 }
810 
811 // bool HostLookup::MatchArray(HostLookupState* s, void**opaque_ptr, vector<int>& array,
812 //                             bool host_done)
813 //
814 //  Helper function to iterate through arg array and update Result for each element in arg array
815 //
816 //  host_done should be passed as true if this call represents the all fields in the matched against hostname being
817 //  consumed.  Example: for www.example.com this would be true for the call matching against the "www", but neither of
818 //  the prior two fields, "inktomi" and "com"
819 //
820 
821 bool
MatchArray(HostLookupState * s,void ** opaque_ptr,LeafIndices & array,bool host_done)822 HostLookup::MatchArray(HostLookupState *s, void **opaque_ptr, LeafIndices &array, bool host_done)
823 {
824   size_t i;
825 
826   for (i = s->array_index + 1; i < array.size(); ++i) {
827     auto &leaf{leaf_array[array[i]]};
828 
829     switch (leaf.type) {
830     case HostLeaf::HOST_PARTIAL:
831       if (hostcmp(s->hostname, leaf.match) == 0) {
832         *opaque_ptr    = leaf.opaque_data;
833         s->array_index = i;
834         return true;
835       }
836       break;
837     case HostLeaf::HOST_COMPLETE:
838       // We have to have consumed the whole hostname for this to match
839       //   so that we do not match a rule for "example.com" to
840       //   "www.example.com
841       //
842       if (host_done == true) {
843         *opaque_ptr    = leaf.opaque_data;
844         s->array_index = i;
845         return true;
846       }
847       break;
848     case HostLeaf::DOMAIN_PARTIAL:
849       if (domaincmp(s->hostname, leaf.match) == false) {
850         break;
851       }
852     // FALL THROUGH
853     case HostLeaf::DOMAIN_COMPLETE:
854       *opaque_ptr    = leaf.opaque_data;
855       s->array_index = i;
856       return true;
857     case HostLeaf::LEAF_INVALID:
858       // Should not get here
859       ink_assert(0);
860       break;
861     }
862   }
863 
864   s->array_index = i;
865   return false;
866 }
867 
868 // bool HostLookup::MatchFirst(const char* host, HostLookupState* s, void** opaque_ptr)
869 //
870 //
871 bool
MatchFirst(string_view host,HostLookupState * s,void ** opaque_ptr)872 HostLookup::MatchFirst(string_view host, HostLookupState *s, void **opaque_ptr)
873 {
874   s->cur           = &root;
875   s->table_level   = 0;
876   s->array_index   = -1;
877   s->hostname      = host;
878   s->hostname_stub = s->hostname;
879 
880   return MatchNext(s, opaque_ptr);
881 }
882 
883 // bool HostLookup::MatchNext(HostLookupState* s, void** opaque_ptr)
884 //
885 //  Searches our tree and updates argresult for each element matching
886 //    arg hostname
887 //
888 bool
MatchNext(HostLookupState * s,void ** opaque_ptr)889 HostLookup::MatchNext(HostLookupState *s, void **opaque_ptr)
890 {
891   HostBranch *cur = s->cur;
892 
893   // Check to see if there is any work to be done
894   if (leaf_array.size() <= 0) {
895     return false;
896   }
897 
898   while (s->table_level <= HOST_TABLE_DEPTH) {
899     if (MatchArray(s, opaque_ptr, cur->leaf_indices, s->hostname_stub.empty())) {
900       return true;
901     }
902     // Check to see if we run out of tokens in the hostname
903     if (s->hostname_stub.empty()) {
904       break;
905     }
906     // Check to see if there are any lower levels
907     if (cur->type == HostBranch::HOST_TERMINAL) {
908       break;
909     }
910 
911     string_view token{TextView{s->hostname_stub}.suffix('.')};
912     s->hostname_stub.remove_suffix(std::min(s->hostname_stub.size(), token.size() + 1));
913     cur = FindNextLevel(cur, token, true);
914 
915     if (cur == nullptr) {
916       break;
917     } else {
918       s->cur         = cur;
919       s->array_index = -1;
920       ++(s->table_level);
921     }
922   }
923 
924   return false;
925 }
926 
927 // void HostLookup::AllocateSpace(int num_entries)
928 //
929 //   Allocate the leaf array structure
930 //
931 void
AllocateSpace(int num_entries)932 HostLookup::AllocateSpace(int num_entries)
933 {
934   leaf_array.reserve(num_entries);
935 }
936 
937 // void HostLookup::NewEntry(const char* match_data, bool domain_record, void* opaque_data_in)
938 //
939 //   Insert a new element in to the table
940 //
941 void
NewEntry(string_view match_data,bool domain_record,void * opaque_data_in)942 HostLookup::NewEntry(string_view match_data, bool domain_record, void *opaque_data_in)
943 {
944   leaf_array.emplace_back(match_data, opaque_data_in);
945   TableInsert(match_data, leaf_array.size() - 1, domain_record);
946 }
947