xref: /trafficserver/src/tscore/HostLookup.cc (revision 4cfd5a73)
1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 /*****************************************************************************
25  *
26  *  HostLookup.cc - Implementation of a hostname/domainname matcher
27  *
28  *
29  ****************************************************************************/
30 
31 #include "tscore/ink_inet.h"
32 #include "tscore/ink_assert.h"
33 #include "tscore/HostLookup.h"
34 #include "tscpp/util/TextView.h"
35 
36 #include <string_view>
37 #include <array>
38 #include <memory>
39 
40 using std::string_view;
41 using ts::TextView;
42 
43 namespace
44 {
45 // bool domaincmp(const char* hostname, const char* domain)
46 //
47 //    Returns true if hostname is in domain
48 //
49 bool
50 domaincmp(string_view hostname, string_view domain)
51 {
52   // Check to see if were passed empty strings for either
53   //  argument.  Empty strings do not match anything
54   //
55   if (domain.empty() || hostname.empty()) {
56     return false;
57   }
58 
59   // Walk through both strings backward - explicit declares, need these post-loop.
60   auto d_idx = domain.rbegin();
61   auto h_idx = hostname.rbegin();
62   // Trailing dots should be ignored since they are optional
63   if (*d_idx == '.') {
64     ++d_idx;
65   }
66   if (*h_idx == '.') {
67     ++h_idx;
68   }
69   while (d_idx != domain.rend() && h_idx != hostname.rend()) {
70     // If we still have characters left on both strings and they do not match, matching fails
71     if (tolower(*d_idx) != tolower(*h_idx)) {
72       return false;
73     }
74 
75     ++d_idx;
76     ++h_idx;
77   };
78 
79   // There are three possible cases that could have gotten us
80   //  here
81   //
82   //     Case 1: we ran out of both strings
83   //     Case 2: we ran out of domain but not hostname
84   //     Case 3: we ran out of hostname but not domain
85   //
86   if (d_idx == domain.rend()) {
87     // If end of hostname also, then case 1 match.
88     // Otherwise it's a case 2 match iff last character match was at a domain boundary.
89     // (ex: avoid 'www.inktomi.ecom' matching 'com')
90     // note: d_idx[-1] == '.' --> h_idx[-1] == '.' because of match check in loop.
91     return h_idx == hostname.rend() || *h_idx == '.' || *(d_idx - 1) == '.';
92   } else if (h_idx == hostname.rend()) {
93     // This covers the case 3 (a very unusual case)
94     //  ex: example.com needing to match .example.com
95     return *d_idx == '.' && d_idx + 1 == domain.rend();
96   }
97 
98   ink_assert(!"Should not get here");
99   return false;
100 }
101 
102 //   Similar to strcasecmp except that if one string has a
103 //     trailing '.' and the other one does not
104 //     then they are equal
105 //
106 //   Meant to compare to host names and
107 //     take in out account that www.example.com
108 //     and www.example.com. are the same host
109 //     since the trailing dot is optional
110 //
111 int
112 hostcmp(string_view lhs, string_view rhs)
113 {
114   ink_assert(!lhs.empty());
115   ink_assert(!rhs.empty());
116 
117   // ignore any trailing .
118   if (lhs.back() == '.') {
119     lhs.remove_suffix(1);
120   }
121   if (rhs.back() == '.') {
122     rhs.remove_suffix(1);
123   }
124 
125   auto lidx = lhs.begin();
126   auto ridx = rhs.begin();
127   while (lidx != lhs.end() && ridx != rhs.end()) {
128     char lc(tolower(*lidx));
129     char rc(tolower(*ridx));
130     if (lc < rc) {
131       return -1;
132     } else if (lc > rc) {
133       return 1;
134     }
135     ++lidx;
136     ++ridx;
137   }
138   return lidx != lhs.end() ? 1 : ridx != rhs.end() ? -1 : 0;
139 }
140 
141 } // namespace
142 
143 // static const unsigned char asciiToTable[256]
144 //
145 //   Used to Map Legal hostname characters into
146 //     to indexes used by char_table to
147 //     index strings
148 //
149 //   Legal hostname characters are 0-9, A-Z, a-z and '-'
150 //   '_' is also included although it is not in the spec (RFC 883)
151 //
152 //   Uppercase and lowercase "a-z" both map to same indexes
153 //     since hostnames are not case sensitive
154 //
155 //   Illegal characters map to 255
156 //
157 static const unsigned char asciiToTable[256] = {
158   255, 255, 255, 255, 255, 255, 255, 255, // 0 - 7
159   255, 255, 255, 255, 255, 255, 255, 255, // 8 - 15
160   255, 255, 255, 255, 255, 255, 255, 255, // 16 - 23
161   255, 255, 255, 255, 255, 255, 255, 255, // 24 - 31
162   255, 255, 255, 255, 255, 255, 255, 255, // 32 - 39
163   255, 255, 255, 255, 255, 0,   255, 255, // 40 - 47 ('-')
164   1,   2,   3,   4,   5,   6,   7,   8,   // 48 - 55 (0-7)
165   9,   10,  255, 255, 255, 255, 255, 255, // 56 - 63 (8-9)
166   255, 11,  12,  13,  14,  15,  16,  17,  // 64 - 71 (A-G)
167   18,  19,  20,  21,  22,  23,  24,  25,  // 72 - 79 (H-O)
168   26,  27,  28,  29,  30,  31,  32,  33,  // 80 - 87 (P-W)
169   34,  35,  36,  255, 255, 255, 255, 37,  // 88 - 95 (X-Z, '_')
170   255, 11,  12,  13,  14,  15,  16,  17,  // 96 - 103 (a-g)
171   18,  19,  20,  21,  22,  23,  24,  25,  // 104 - 111 (h-0)
172   26,  27,  28,  29,  30,  31,  32,  33,  // 112 - 119 (p-w)
173   34,  35,  36,  255, 255, 255, 255, 255, // 120 - 127 (x-z)
174   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
175   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
176   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
177   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
178   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255};
179 
180 // Number of legal characters in the asciiToTable array
181 static const int numLegalChars = 38;
182 
183 // struct CharIndexBlock
184 //
185 //   Used by class CharIndex.  Forms a single level in CharIndex tree
186 //
187 struct CharIndexBlock {
188   struct Item {
189     HostBranch *branch{nullptr};
190     std::unique_ptr<CharIndexBlock> block;
191   };
192   std::array<Item, numLegalChars> array;
193 };
194 
195 // class CharIndex - A constant time string matcher intended for
196 //    short strings in a sparsely populated DNS partition
197 //
198 //    Creates a look up table for character in data string
199 //
200 //    Mapping from character to table index is done by
201 //      asciiToTable[]
202 //
203 //    The illegalKey hash table is side structure for any
204 //     entries that contain illegal hostname characters that
205 //     we can not index into the normal table
206 //
207 //    Example: com
208 //      c maps to 13, o maps to 25, m maps to 23
209 //
210 //                             CharIndexBlock         CharIndexBlock
211 //                             -----------         ------------
212 //                           0 |    |    |         |    |     |
213 //                           . |    |    |         |    |     |
214 //    CharIndexBlock           . |    |    |         |    |     |
215 //    ----------             . |    |    |         |    |     |
216 //  0 |   |    |             . |    |    |   |-->23| ptr|  0  |  (ptr is to the
217 //  . |   |    |   |-------->25| 0  |   -----|     |    |     |   hostBranch for
218 //  . |   |    |   |         . |    |    |         |    |     |   domain com)
219 // 13 | 0 |  --|---|         . |    |    |         |    |     |
220 //  . |   |    |             . |    |    |         |    |     |
221 //  . |   |    |               |    |    |         |    |     |
222 //  . |   |    |               |    |    |         |    |     |
223 //    |   |    |               -----------         -----------|
224 //    |   |    |
225 //    |   |    |
226 //    |   |    |
227 //    |--------|
228 //
229 //
230 //
231 class CharIndex
232 {
233 public:
234   struct iterator : public std::iterator<std::forward_iterator_tag, HostBranch, int> {
235     using self_type = iterator;
236 
237     struct State {
238       int index{-1};
239       CharIndexBlock *block{nullptr};
240     };
241 
242     iterator() { q.reserve(HOST_TABLE_DEPTH * 2); } // was 6, guessing that was twice the table depth.
243 
244     value_type *operator->();
245     value_type &operator*();
246     bool operator==(self_type const &that) const;
247     bool operator!=(self_type const &that) const;
248     self_type &operator++();
249 
250     // Current level.
251     int cur_level{-1};
252 
253     // Where we got the last element from
254     State state;
255 
256     //  Queue of the above levels
257     std::vector<State> q;
258 
259     // internal methods
260     self_type &advance();
261   };
262 
263   ~CharIndex();
264   void Insert(string_view match_data, HostBranch *toInsert);
265   HostBranch *Lookup(string_view match_data);
266 
267   iterator begin();
268   iterator end();
269 
270 private:
271   CharIndexBlock root;
272   using Table = std::unordered_map<string_view, HostBranch *>;
273   std::unique_ptr<Table> illegalKey;
274 };
275 
276 CharIndex::~CharIndex()
277 {
278   // clean up the illegal key table.
279   if (illegalKey) {
280     for (auto spot = illegalKey->begin(), limit = illegalKey->end(); spot != limit; delete &*(spot++)) {
281       ; // empty
282     }
283   }
284 }
285 
286 // void CharIndex::Insert(const char* match_data, HostBranch* toInsert)
287 //
288 //   Places a binding for match_data to toInsert into the index
289 //
290 void
291 CharIndex::Insert(string_view match_data, HostBranch *toInsert)
292 {
293   unsigned char index;
294   CharIndexBlock *cur = &root;
295 
296   ink_assert(!match_data.empty());
297 
298   if (std::any_of(match_data.begin(), match_data.end(), [](unsigned char c) { return asciiToTable[c] == 255; })) {
299     // Insert into illegals hash table
300     if (illegalKey == nullptr) {
301       illegalKey.reset(new Table);
302     }
303     toInsert->key = match_data;
304     illegalKey->emplace(match_data, toInsert);
305   } else {
306     while (true) {
307       index = asciiToTable[static_cast<unsigned char>(match_data.front())];
308 
309       // Check to see if are at the level we supposed be at
310       if (match_data.size() == 1) {
311         // The slot should always be empty, no duplicate keys are allowed
312         ink_assert(cur->array[index].branch == nullptr);
313         cur->array[index].branch = toInsert;
314         break;
315       } else {
316         // We need to find the next level in the table
317 
318         CharIndexBlock *next = cur->array[index].block.get();
319 
320         // Check to see if we need to expand the table
321         if (next == nullptr) {
322           next = new CharIndexBlock;
323           cur->array[index].block.reset(next);
324         }
325         cur = next;
326       }
327       match_data.remove_prefix(1);
328     }
329   }
330 }
331 
332 // HostBranch* CharIndex::Lookup(const char* match_data)
333 //
334 //  Searches the CharIndex on key match_data
335 //    If there is a binding for match_data, returns a pointer to it
336 //    otherwise a nullptr pointer is returned
337 //
338 HostBranch *
339 CharIndex::Lookup(string_view match_data)
340 {
341   if (match_data.empty()) {
342     return nullptr;
343   }
344 
345   if (std::any_of(match_data.begin(), match_data.end(), [](unsigned char c) { return asciiToTable[c] == 255; })) {
346     if (illegalKey) {
347       auto spot = illegalKey->find(match_data);
348       if (spot != illegalKey->end()) {
349         return spot->second;
350       }
351     }
352     return nullptr;
353   }
354 
355   // Invariant: No invalid characters.
356   CharIndexBlock *cur = &root;
357   while (cur) {
358     unsigned char index = asciiToTable[static_cast<unsigned char>(match_data.front())];
359 
360     // Check to see if we are looking for the next level or
361     //    a HostBranch
362     if (match_data.size() == 1) {
363       return cur->array[index].branch;
364     } else {
365       cur = cur->array[index].block.get();
366     }
367     match_data.remove_prefix(1);
368   }
369   return nullptr;
370 }
371 
372 auto
373 CharIndex::begin() -> iterator
374 {
375   iterator zret;
376   zret.state.block = &root;
377   zret.state.index = 0;
378   zret.cur_level   = 0;
379   if (root.array[0].branch == nullptr) {
380     zret.advance();
381   }
382   return zret;
383 }
384 
385 auto
386 CharIndex::end() -> iterator
387 {
388   return {};
389 }
390 
391 auto CharIndex::iterator::operator-> () -> value_type *
392 {
393   ink_assert(state.block != nullptr); // clang!
394   return state.block->array[state.index].branch;
395 }
396 
397 auto CharIndex::iterator::operator*() -> value_type &
398 {
399   ink_assert(state.block != nullptr); // clang!
400   return *(state.block->array[state.index].branch);
401 }
402 
403 //
404 // HostBranch* CharIndex::iter_next(CharIndexIterState* s)
405 //
406 //    Finds the next element in the char index and returns
407 //      a pointer to it.  If there are no more elements, nullptr
408 //      is returned
409 //
410 auto
411 CharIndex::iterator::advance() -> self_type &
412 {
413   bool check_branch_p{false}; // skip local branch on the first loop.
414   do {
415     // Check to see if we need to go back up a level
416     if (state.index >= numLegalChars) {
417       if (cur_level <= 0) {    // No more levels so bail out
418         state.block = nullptr; // carefully make this @c equal to the end iterator.
419         state.index = -1;
420         break;
421       } else { // Go back up to a stored level
422         state = q[--cur_level];
423         ++state.index; // did that one before descending.
424       }
425     } else if (check_branch_p && state.block->array[state.index].branch != nullptr) {
426       //  Note: we check for a branch on this level before a descending a level so that when we come back up
427       //  this level will be done with this index.
428       break;
429     } else if (state.block->array[state.index].block != nullptr) {
430       // There is a lower level block to iterate over, store our current state and descend
431       q[cur_level++] = state;
432       state.block    = state.block->array[state.index].block.get();
433       state.index    = 0;
434     } else {
435       ++state.index;
436     }
437     check_branch_p = true;
438   } while (true);
439   return *this;
440 }
441 
442 auto
443 CharIndex::iterator::operator++() -> self_type &
444 {
445   return this->advance();
446 }
447 
448 bool
449 CharIndex::iterator::operator==(const self_type &that) const
450 {
451   return this->state.block == that.state.block && this->state.index == that.state.index;
452 }
453 
454 bool
455 CharIndex::iterator::operator!=(const self_type &that) const
456 {
457   return !(*this == that);
458 }
459 
460 // class HostArray
461 //
462 //   Is a fixed size array for holding HostBranch*
463 //   Allows only sequential access to data
464 //
465 
466 class HostArray
467 {
468   /// Element of the @c HostArray.
469   struct Item {
470     HostBranch *branch{nullptr}; ///< Next branch.
471     std::string match_data;      ///< Match data for that branch.
472   };
473   using Array = std::array<Item, HOST_ARRAY_MAX>;
474 
475 public:
476   bool Insert(string_view match_data_in, HostBranch *toInsert);
477   HostBranch *Lookup(string_view match_data_in, bool bNotProcess);
478 
479   Array::iterator
480   begin()
481   {
482     return array.begin();
483   }
484   Array::iterator
485   end()
486   {
487     return array.begin() + _size;
488   }
489 
490 private:
491   int _size{0}; // number of elements currently in the array
492   Array array;
493 };
494 
495 // bool HostArray::Insert(const char* match_data_in, HostBranch* toInsert)
496 //
497 //    Places toInsert into the array with key match_data if there
498 //     is adequate space, in which case true is returned
499 //    If space is inadequate, false is returned and nothing is inserted
500 //
501 bool
502 HostArray::Insert(string_view match_data, HostBranch *toInsert)
503 {
504   if (_size < static_cast<int>(array.size())) {
505     array[_size].branch     = toInsert;
506     array[_size].match_data = match_data;
507     ++_size;
508     return true;
509   }
510   return false;
511 }
512 
513 // HostBranch* HostArray::Lookup(const char* match_data_in)
514 //
515 //   Looks for key match_data_in.  If a binding is found,
516 //     returns HostBranch* found to the key, otherwise
517 //     nullptr is returned
518 //
519 HostBranch *
520 HostArray::Lookup(string_view match_data_in, bool bNotProcess)
521 {
522   HostBranch *r = nullptr;
523   string_view pMD;
524 
525   for (int i = 0; i < _size; i++) {
526     pMD = array[i].match_data;
527 
528     if (bNotProcess && '!' == pMD.front()) {
529       pMD.remove_prefix(1);
530       if (pMD.empty()) {
531         continue;
532       }
533 
534       if (pMD == match_data_in) {
535         r = array[i].branch;
536       }
537     } else if (pMD == match_data_in) {
538       r = array[i].branch;
539       break;
540     }
541   }
542   return r;
543 }
544 
545 // maps enum LeafType to strings
546 const char *LeafTypeStr[] = {"Leaf Invalid", "Host (Partial)", "Host (Full)", "Domain (Full)", "Domain (Partial)"};
547 
548 // HostBranch::~HostBranch()
549 //
550 //   Recursive delete all host branches below us
551 //
552 HostBranch::~HostBranch()
553 {
554   switch (type) {
555   case HOST_TERMINAL:
556     break;
557   case HOST_HASH: {
558     HostTable *ht = next_level._table;
559     for (auto spot = ht->begin(), limit = ht->end(); spot != limit; delete &*(spot++)) {
560     } // empty
561     delete ht;
562   } break;
563   case HOST_INDEX: {
564     CharIndex *ci = next_level._index;
565     for (auto &branch : *ci) {
566       delete &branch;
567     }
568     delete ci;
569   } break;
570   case HOST_ARRAY:
571     for (auto &item : *next_level._array) {
572       delete item.branch;
573     }
574     delete next_level._array;
575     break;
576   }
577 }
578 
579 HostLookup::HostLookup(string_view name) : matcher_name(name) {}
580 
581 void
582 HostLookup::Print()
583 {
584   Print([](void *) -> void {});
585 }
586 
587 void
588 HostLookup::Print(PrintFunc const &f)
589 {
590   PrintHostBranch(&root, f);
591 }
592 
593 //
594 // void HostLookup::PrintHostBranch(HostBranch* hb, HostLookupPrintFunc f)
595 //
596 //   Recursively traverse the matching tree rooted at arg hb
597 //     and print out each element
598 //
599 void
600 HostLookup::PrintHostBranch(HostBranch *hb, PrintFunc const &f)
601 {
602   for (auto curIndex : hb->leaf_indices) {
603     auto &leaf{leaf_array[curIndex]};
604     printf("\t\t%s for %.*s\n", LeafTypeStr[leaf.type], static_cast<int>(leaf.match.size()), leaf.match.data());
605     f(leaf_array[curIndex].opaque_data);
606   }
607 
608   switch (hb->type) {
609   case HostBranch::HOST_TERMINAL:
610     ink_assert(hb->next_level._ptr == nullptr);
611     break;
612   case HostBranch::HOST_HASH:
613     for (auto &branch : *(hb->next_level._table)) {
614       PrintHostBranch(branch.second, f);
615     }
616     break;
617   case HostBranch::HOST_INDEX:
618     for (auto &branch : *(hb->next_level._index)) {
619       PrintHostBranch(&branch, f);
620     }
621     break;
622   case HostBranch::HOST_ARRAY:
623     for (auto &item : *(hb->next_level._array)) {
624       PrintHostBranch(item.branch, f);
625     }
626     break;
627   }
628 }
629 
630 //
631 // HostBranch* HostLookup::TableNewLevel(HostBranch* from, const char* level_data)
632 //
633 //    Creates the next level structure in arg from
634 //       Creates a HostBranch for level_data, inserts it in to the
635 //         the next_level structure and returns a pointer to the new
636 //         HostBranch
637 //
638 HostBranch *
639 HostLookup::TableNewLevel(HostBranch *from, string_view level_data)
640 {
641   ink_assert(from->type == HostBranch::HOST_TERMINAL);
642 
643   // Use the CharIndex for high speed matching at the first level of
644   //   the table.  The first level is short strings, ie: com, edu, jp, fr
645   if (from->level_idx == 0) {
646     from->type              = HostBranch::HOST_INDEX;
647     from->next_level._index = new CharIndex;
648   } else {
649     from->type              = HostBranch::HOST_ARRAY;
650     from->next_level._array = new HostArray;
651   }
652 
653   return InsertBranch(from, level_data);
654 }
655 
656 //
657 // HostBranch* HostLookup::InsertBranch(HostBranch* insert_to, const char* level_data)
658 //
659 //
660 //    Abstraction to place a new node for level_data below node
661 //      insert to.  Inserts into any of the data types used by
662 //      by class HostMatcher
663 //
664 HostBranch *
665 HostLookup::InsertBranch(HostBranch *insert_in, string_view level_data)
666 {
667   HostBranch *new_branch = new HostBranch;
668   new_branch->key        = level_data;
669   new_branch->type       = HostBranch::HOST_TERMINAL;
670   new_branch->level_idx  = insert_in->level_idx + 1;
671 
672   switch (insert_in->type) {
673   case HostBranch::HOST_TERMINAL:
674     // Should not happen
675     ink_release_assert(0);
676     break;
677   case HostBranch::HOST_HASH:
678     insert_in->next_level._table->emplace(level_data, new_branch);
679     break;
680   case HostBranch::HOST_INDEX:
681     insert_in->next_level._index->Insert(level_data, new_branch);
682     break;
683   case HostBranch::HOST_ARRAY: {
684     auto array = insert_in->next_level._array;
685     if (array->Insert(level_data, new_branch) == false) {
686       // The array is out of space, time to move to a hash table
687       auto ha = insert_in->next_level._array;
688       auto ht = new HostTable;
689       ht->emplace(level_data, new_branch);
690       for (auto &item : *array) {
691         ht->emplace(item.match_data, item.branch);
692       }
693       // Ring out the old, ring in the new
694       delete ha;
695       insert_in->next_level._table = ht;
696       insert_in->type              = HostBranch::HOST_HASH;
697     }
698   } break;
699   }
700 
701   return new_branch;
702 }
703 
704 // HostBranch* HostLookup::FindNextLevel(HostBranch* from,
705 //                                       const char* level_data)
706 //
707 //   Searches in the branch for  the next level down in the search
708 //     structure bound to level data
709 //   If found returns a pointer to the next level HostBranch,
710 //    otherwise returns nullptr
711 //
712 HostBranch *
713 HostLookup::FindNextLevel(HostBranch *from, string_view level_data, bool bNotProcess)
714 {
715   HostBranch *r = nullptr;
716 
717   switch (from->type) {
718   case HostBranch::HOST_TERMINAL:
719     // Should not happen
720     ink_assert(0);
721     break;
722   case HostBranch::HOST_HASH: {
723     auto table = from->next_level._table;
724     auto spot  = table->find(level_data);
725     r          = spot == table->end() ? nullptr : spot->second;
726   } break;
727   case HostBranch::HOST_INDEX:
728     r = from->next_level._index->Lookup(level_data);
729     break;
730   case HostBranch::HOST_ARRAY:
731     r = from->next_level._array->Lookup(level_data, bNotProcess);
732     break;
733   }
734   return r;
735 }
736 
737 // void HostLookup::TableInsert(const char* match_data, int index)
738 //
739 //   Insert the match data, index pair into domain/search table
740 //
741 //   arg index is the index into both Data and HostLeaf array for
742 //    the elements corresponding to match_data
743 //
744 void
745 HostLookup::TableInsert(string_view match_data, int index, bool domain_record)
746 {
747   HostBranch *cur = &root;
748   HostBranch *next;
749   TextView match{match_data};
750 
751   // Traverse down the search structure until we either
752   //       Get beyond the fixed number depth of the host table
753   //  OR   We reach the level where the match stops
754   //
755   for (int i = 0; !match.rtrim('.').empty() && i < HOST_TABLE_DEPTH; ++i) {
756     TextView token{match.take_suffix_at('.')};
757 
758     if (cur->next_level._ptr == nullptr) {
759       cur = TableNewLevel(cur, token);
760     } else {
761       next = FindNextLevel(cur, token);
762       if (next == nullptr) {
763         cur = InsertBranch(cur, token);
764       } else {
765         cur = next;
766       }
767     }
768   }
769 
770   // Update the leaf type.  There are three types:
771   //     HOST_PARTIAL - Indicates that part of the hostname name
772   //         was not matched by traversin the search structure since
773   //         it had too elements.  A comparison must be done at the
774   //         leaf node to make sure we have a match
775   //     HOST_COMPLETE - Indicates that the entire domain name
776   //         in the match_data was matched by traversing the search
777   //         structure, no further comparison is necessary
778   //
779   //     DOMAIN_COMPLETE - Indicates that the entire domain name
780   //         in the match_data was matched by traversing the search
781   //         structure, no further comparison is necessary
782   //     DOMAIN_PARTIAL - Indicates that part of the domain name
783   //         was not matched by traversin the search structure since
784   //         it had too elements.  A comparison must be done at the
785   //         leaf node to make sure we have a match
786   if (domain_record == false) {
787     if (match.empty()) {
788       leaf_array[index].type = HostLeaf::HOST_PARTIAL;
789     } else {
790       leaf_array[index].type = HostLeaf::HOST_COMPLETE;
791     }
792   } else {
793     if (match.empty()) {
794       leaf_array[index].type = HostLeaf::DOMAIN_PARTIAL;
795     } else {
796       leaf_array[index].type = HostLeaf::DOMAIN_COMPLETE;
797     }
798   }
799 
800   // Place the index in to leaf array into the match list for this
801   //   HOST_BRANCH
802   cur->leaf_indices.push_back(index);
803 }
804 
805 // bool HostLookup::MatchArray(HostLookupState* s, void**opaque_ptr, vector<int>& array,
806 //                             bool host_done)
807 //
808 //  Helper function to iterate through arg array and update Result for each element in arg array
809 //
810 //  host_done should be passed as true if this call represents the all fields in the matched against hostname being
811 //  consumed.  Example: for www.example.com this would be true for the call matching against the "www", but neither of
812 //  the prior two fields, "inktomi" and "com"
813 //
814 
815 bool
816 HostLookup::MatchArray(HostLookupState *s, void **opaque_ptr, LeafIndices &array, bool host_done)
817 {
818   size_t i;
819 
820   for (i = s->array_index + 1; i < array.size(); ++i) {
821     auto &leaf{leaf_array[array[i]]};
822 
823     switch (leaf.type) {
824     case HostLeaf::HOST_PARTIAL:
825       if (hostcmp(s->hostname, leaf.match) == 0) {
826         *opaque_ptr    = leaf.opaque_data;
827         s->array_index = i;
828         return true;
829       }
830       break;
831     case HostLeaf::HOST_COMPLETE:
832       // We have to have consumed the whole hostname for this to match
833       //   so that we do not match a rule for "example.com" to
834       //   "www.example.com
835       //
836       if (host_done == true) {
837         *opaque_ptr    = leaf.opaque_data;
838         s->array_index = i;
839         return true;
840       }
841       break;
842     case HostLeaf::DOMAIN_PARTIAL:
843       if (domaincmp(s->hostname, leaf.match) == false) {
844         break;
845       }
846     // FALL THROUGH
847     case HostLeaf::DOMAIN_COMPLETE:
848       *opaque_ptr    = leaf.opaque_data;
849       s->array_index = i;
850       return true;
851     case HostLeaf::LEAF_INVALID:
852       // Should not get here
853       ink_assert(0);
854       break;
855     }
856   }
857 
858   s->array_index = i;
859   return false;
860 }
861 
862 // bool HostLookup::MatchFirst(const char* host, HostLookupState* s, void** opaque_ptr)
863 //
864 //
865 bool
866 HostLookup::MatchFirst(string_view host, HostLookupState *s, void **opaque_ptr)
867 {
868   s->cur           = &root;
869   s->table_level   = 0;
870   s->array_index   = -1;
871   s->hostname      = host;
872   s->hostname_stub = s->hostname;
873 
874   return MatchNext(s, opaque_ptr);
875 }
876 
877 // bool HostLookup::MatchNext(HostLookupState* s, void** opaque_ptr)
878 //
879 //  Searches our tree and updates argresult for each element matching
880 //    arg hostname
881 //
882 bool
883 HostLookup::MatchNext(HostLookupState *s, void **opaque_ptr)
884 {
885   HostBranch *cur = s->cur;
886 
887   // Check to see if there is any work to be done
888   if (leaf_array.size() <= 0) {
889     return false;
890   }
891 
892   while (s->table_level <= HOST_TABLE_DEPTH) {
893     if (MatchArray(s, opaque_ptr, cur->leaf_indices, s->hostname_stub.empty())) {
894       return true;
895     }
896     // Check to see if we run out of tokens in the hostname
897     if (s->hostname_stub.empty()) {
898       break;
899     }
900     // Check to see if there are any lower levels
901     if (cur->type == HostBranch::HOST_TERMINAL) {
902       break;
903     }
904 
905     string_view token{TextView{s->hostname_stub}.suffix('.')};
906     s->hostname_stub.remove_suffix(std::min(s->hostname_stub.size(), token.size() + 1));
907     cur = FindNextLevel(cur, token, true);
908 
909     if (cur == nullptr) {
910       break;
911     } else {
912       s->cur         = cur;
913       s->array_index = -1;
914       ++(s->table_level);
915     }
916   }
917 
918   return false;
919 }
920 
921 // void HostLookup::AllocateSpace(int num_entries)
922 //
923 //   Allocate the leaf array structure
924 //
925 void
926 HostLookup::AllocateSpace(int num_entries)
927 {
928   leaf_array.reserve(num_entries);
929 }
930 
931 // void HostLookup::NewEntry(const char* match_data, bool domain_record, void* opaque_data_in)
932 //
933 //   Insert a new element in to the table
934 //
935 void
936 HostLookup::NewEntry(string_view match_data, bool domain_record, void *opaque_data_in)
937 {
938   leaf_array.emplace_back(match_data, opaque_data_in);
939   TableInsert(match_data, leaf_array.size() - 1, domain_record);
940 }
941