xref: /trafficserver/plugins/esi/lib/EsiParser.cc (revision 4cfd5a73)
1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 #include "EsiParser.h"
25 #include "Utils.h"
26 
27 #include <cctype>
28 
29 using std::string;
30 using namespace EsiLib;
31 
32 const char *EsiParser::ESI_TAG_PREFIX   = "<esi:";
33 const int EsiParser::ESI_TAG_PREFIX_LEN = 5;
34 
35 const string EsiParser::SRC_ATTR_STR("src");
36 const string EsiParser::TEST_ATTR_STR("test");
37 const string EsiParser::HANDLER_ATTR_STR("handler");
38 
39 const unsigned int EsiParser::MAX_DOC_SIZE = 1024 * 1024;
40 
41 const EsiParser::EsiNodeInfo EsiParser::ESI_NODES[] = {
42   EsiNodeInfo(DocNode::TYPE_INCLUDE, "include", 7, "/>", 2),
43   EsiNodeInfo(DocNode::TYPE_REMOVE, "remove>", 7, "</esi:remove>", 13),
44   EsiNodeInfo(DocNode::TYPE_COMMENT, "comment", 7, "/>", 2),
45   EsiNodeInfo(DocNode::TYPE_VARS, "vars>", 5, "</esi:vars>", 11),
46   EsiNodeInfo(DocNode::TYPE_CHOOSE, "choose>", 7, "</esi:choose>", 13),
47   EsiNodeInfo(DocNode::TYPE_WHEN, "when", 4, "</esi:when>", 11),
48   EsiNodeInfo(DocNode::TYPE_OTHERWISE, "otherwise>", 10, "</esi:otherwise>", 16),
49   EsiNodeInfo(DocNode::TYPE_TRY, "try>", 4, "</esi:try>", 10),
50   EsiNodeInfo(DocNode::TYPE_ATTEMPT, "attempt>", 8, "</esi:attempt>", 14),
51   EsiNodeInfo(DocNode::TYPE_EXCEPT, "except>", 7, "</esi:except>", 13),
52   EsiNodeInfo(DocNode::TYPE_SPECIAL_INCLUDE, "special-include", 15, "/>", 2),
53   EsiNodeInfo(DocNode::TYPE_UNKNOWN, "", 0, "", 0) // serves as end marker
54 };
55 
56 const EsiParser::EsiNodeInfo EsiParser::HTML_COMMENT_NODE_INFO(DocNode::TYPE_HTML_COMMENT, "<!--esi", 7, "-->", 3);
57 
EsiParser(const char * debug_tag,ComponentBase::Debug debug_func,ComponentBase::Error error_func)58 EsiParser::EsiParser(const char *debug_tag, ComponentBase::Debug debug_func, ComponentBase::Error error_func)
59   : ComponentBase(debug_tag, debug_func, error_func), _parse_start_pos(-1)
60 {
61   // do this so that object doesn't move around in memory;
62   // (because we return pointers into this object)
63   _data.reserve(MAX_DOC_SIZE);
64 }
65 
66 bool
_setup(string & data,int & parse_start_pos,size_t & orig_output_list_size,DocNodeList & node_list,const char * data_ptr,int & data_len) const67 EsiParser::_setup(string &data, int &parse_start_pos, size_t &orig_output_list_size, DocNodeList &node_list, const char *data_ptr,
68                   int &data_len) const
69 {
70   bool retval = true;
71   if (!data_ptr || !data_len) {
72     _debugLog(_debug_tag, "[%s] Returning true for empty data", __FUNCTION__);
73   } else {
74     if (data_len == -1) {
75       data_len = strlen(data_ptr);
76     }
77     if ((data.size() + data_len) > MAX_DOC_SIZE) {
78       _errorLog("[%s] Cannot allow attempted doc of size %d; Max allowed size is %d", __FUNCTION__, data.size() + data_len,
79                 MAX_DOC_SIZE);
80       retval = false;
81     } else {
82       data.append(data_ptr, data_len);
83     }
84   }
85   if (parse_start_pos == -1) { // first time this cycle that input is being provided
86     parse_start_pos       = 0;
87     orig_output_list_size = node_list.size();
88   }
89   return retval;
90 }
91 
92 bool
parseChunk(const char * data,DocNodeList & node_list,int data_len)93 EsiParser::parseChunk(const char *data, DocNodeList &node_list, int data_len /* = -1 */)
94 {
95   if (!_setup(_data, _parse_start_pos, _orig_output_list_size, node_list, data, data_len)) {
96     return false;
97   }
98   if (!_parse(_data, _parse_start_pos, node_list)) {
99     _errorLog("[%s] Failed to parse chunk of size %d starting with [%.5s]...", __FUNCTION__, data_len,
100               (data_len ? data : "(null)"));
101     return false;
102   }
103   return true;
104 }
105 
106 bool
_completeParse(string & data,int & parse_start_pos,size_t & orig_output_list_size,DocNodeList & node_list,const char * data_ptr,int data_len) const107 EsiParser::_completeParse(string &data, int &parse_start_pos, size_t &orig_output_list_size, DocNodeList &node_list,
108                           const char *data_ptr /* = 0 */, int data_len /* = -1 */) const
109 {
110   if (!_setup(data, parse_start_pos, orig_output_list_size, node_list, data_ptr, data_len)) {
111     return false;
112   }
113   if (!data.size()) {
114     _debugLog(_debug_tag, "[%s] No data to parse!", __FUNCTION__);
115     return true;
116   }
117   if (!_parse(data, parse_start_pos, node_list, true)) {
118     _errorLog("[%s] Failed to complete parse of data of total size %d starting with [%.5s]...", __FUNCTION__, data.size(),
119               (data.size() ? data.data() : "(null)"));
120     node_list.resize(orig_output_list_size);
121     return false;
122   }
123   return true;
124 }
125 
126 EsiParser::MATCH_TYPE
_searchData(const string & data,size_t start_pos,const char * str,int str_len,size_t & pos) const127 EsiParser::_searchData(const string &data, size_t start_pos, const char *str, int str_len, size_t &pos) const
128 {
129   const char *data_ptr = data.data() + start_pos;
130   int data_len         = data.size() - start_pos;
131   int i_data = 0, i_str = 0;
132 
133   while (i_data < data_len) {
134     if (data_ptr[i_data] == str[i_str]) {
135       ++i_str;
136       if (i_str == str_len) {
137         break;
138       }
139     } else {
140       i_data -= i_str;
141       i_str = 0;
142     }
143     ++i_data;
144   }
145 
146   if (i_str == str_len) {
147     pos = start_pos + i_data + 1 - i_str;
148     _debugLog(_debug_tag, "[%s] Found full match of %.*s in [%.5s...] at position %d", __FUNCTION__, str_len, str, data_ptr, pos);
149     return COMPLETE_MATCH;
150   } else if (i_str) {
151     pos = start_pos + i_data - i_str;
152     _debugLog(_debug_tag, "[%s] Found partial match of %.*s in [%.5s...] at position %d", __FUNCTION__, str_len, str, data_ptr,
153               pos);
154     return PARTIAL_MATCH;
155   } else {
156     _debugLog(_debug_tag, "[%s] Found no match of %.*s in [%.5s...]", __FUNCTION__, str_len, str, data_ptr);
157     return NO_MATCH;
158   }
159 }
160 
161 EsiParser::MATCH_TYPE
_compareData(const string & data,size_t pos,const char * str,int str_len) const162 EsiParser::_compareData(const string &data, size_t pos, const char *str, int str_len) const
163 {
164   int i_str     = 0;
165   size_t i_data = pos;
166   for (; i_data < data.size(); ++i_data) {
167     if (data[i_data] == str[i_str]) {
168       ++i_str;
169       if (i_str == str_len) {
170         _debugLog(_debug_tag, "[%s] string [%.*s] is equal to data at position %d", __FUNCTION__, str_len, str, pos);
171         return COMPLETE_MATCH;
172       }
173     } else {
174       /*
175       _debugLog(_debug_tag, "[%s] string [%.*s] is not equal to data at position %d",
176                 __FUNCTION__, str_len, str, pos);
177       */
178       return NO_MATCH;
179     }
180   }
181   _debugLog(_debug_tag, "[%s] string [%.*s] is partially equal to data at position %d", __FUNCTION__, str_len, str, pos);
182   return PARTIAL_MATCH;
183 }
184 
185 /** This implementation is optimized but not completely correct.  If
186  * the opening tag were to have a repeating opening sequence ('<e<esi'
187  * or something like that), this will break. However that is not the
188  * case for the two opening tags we are looking for */
189 EsiParser::MATCH_TYPE
_findOpeningTag(const string & data,size_t start_pos,size_t & opening_tag_pos,bool & is_html_comment_node) const190 EsiParser::_findOpeningTag(const string &data, size_t start_pos, size_t &opening_tag_pos, bool &is_html_comment_node) const
191 {
192   size_t i_data = start_pos;
193   int i_esi = 0, i_html_comment = 0;
194 
195   while (i_data < data.size()) {
196     if (data[i_data] == ESI_TAG_PREFIX[i_esi]) {
197       if (++i_esi == ESI_TAG_PREFIX_LEN) {
198         is_html_comment_node = false;
199         opening_tag_pos      = i_data - i_esi + 1;
200         return COMPLETE_MATCH;
201       }
202     } else {
203       if (i_esi) {
204         i_esi = 0;
205         --i_data; // we do this to reexamine the current char as target string might start from here
206         if (i_html_comment) {
207           --i_html_comment; // in case other target string has started matching, adjust it's index
208         }
209       }
210     }
211     // doing the exact same thing for the other target string
212     if (i_html_comment < HTML_COMMENT_NODE_INFO.tag_suffix_len &&
213         data[i_data] == HTML_COMMENT_NODE_INFO.tag_suffix[i_html_comment]) {
214       if (++i_html_comment == HTML_COMMENT_NODE_INFO.tag_suffix_len && i_data + 1 < data.size()) {
215         char ch = data[i_data + 1]; //<!--esi must follow by a space char
216         if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
217           is_html_comment_node = true;
218           opening_tag_pos      = i_data - i_html_comment + 1;
219           return COMPLETE_MATCH;
220         }
221       }
222     } else {
223       if (i_html_comment) {
224         i_html_comment = 0;
225         --i_data; // same comments from above applies
226         if (i_esi) {
227           --i_esi;
228         }
229       }
230     }
231     ++i_data;
232   }
233   // partial matches; with the nature of our current opening tags, the
234   // only way we can have a partial match for both target strings is
235   // if the last char of the input string is '<' and that is not
236   // enough information to differentiate the tags; Anyway, the parser
237   // takes no action for a partial match
238   if (i_esi) {
239     is_html_comment_node = false;
240     opening_tag_pos      = i_data - i_esi;
241     return PARTIAL_MATCH;
242   }
243   if (i_html_comment) {
244     is_html_comment_node = true;
245     opening_tag_pos      = i_data - i_html_comment;
246     return PARTIAL_MATCH;
247   }
248   return NO_MATCH;
249 }
250 
251 inline bool
_processSimpleContentTag(DocNode::TYPE node_type,const char * data,int data_len,DocNodeList & node_list) const252 EsiParser::_processSimpleContentTag(DocNode::TYPE node_type, const char *data, int data_len, DocNodeList &node_list) const
253 {
254   DocNode new_node(node_type);
255   if (!parse(new_node.child_nodes, data, data_len)) {
256     _errorLog("[%s] Could not parse simple content of [%s] node", __FUNCTION__, DocNode::type_names_[node_type]);
257     return false;
258   }
259   node_list.push_back(new_node);
260   return true;
261 }
262 
263 bool
_parse(const string & data,int & parse_start_pos,DocNodeList & node_list,bool last_chunk) const264 EsiParser::_parse(const string &data, int &parse_start_pos, DocNodeList &node_list, bool last_chunk /* = false */) const
265 {
266   size_t orig_list_size = node_list.size();
267   size_t curr_pos, end_pos;
268   const char *const data_start_ptr = data.data();
269   size_t data_size                 = data.size();
270   const EsiNodeInfo *node_info;
271   MATCH_TYPE search_result;
272   bool is_html_comment_node;
273   bool parse_result;
274 
275   while (parse_start_pos < static_cast<int>(data_size)) {
276     search_result = _findOpeningTag(data, static_cast<int>(parse_start_pos), curr_pos, is_html_comment_node);
277     if (search_result == NO_MATCH) {
278       // we could add this chunk as a PRE node, but it might be
279       // possible that the next chunk is also a PRE node, in which
280       // case it is more correct to create one PRE node than two PRE
281       // nodes even though processing would result in the same final
282       // output in either case.  we are sacrificing a little
283       // performance (we'll have to parse this chunk again next time)
284       // for correctness
285       break;
286     }
287     if (search_result == PARTIAL_MATCH) {
288       goto lPartialMatch;
289     }
290 
291     // we have a complete match of the opening tag
292     if ((curr_pos - parse_start_pos) > 0) {
293       // add text till here as a PRE node
294       _debugLog(_debug_tag, "[%s], Adding data of size %d before (newly found) ESI tag as PRE node", __FUNCTION__,
295                 curr_pos - parse_start_pos);
296       node_list.push_back(DocNode(DocNode::TYPE_PRE, data_start_ptr + parse_start_pos, curr_pos - parse_start_pos));
297       parse_start_pos = curr_pos;
298     }
299 
300     if (is_html_comment_node) {
301       _debugLog(_debug_tag, "[%s] Found html comment tag at position %d", __FUNCTION__, curr_pos);
302       node_info = &HTML_COMMENT_NODE_INFO;
303       ++curr_pos;
304     } else {
305       curr_pos += ESI_TAG_PREFIX_LEN;
306 
307       for (node_info = ESI_NODES; node_info->type != DocNode::TYPE_UNKNOWN; ++node_info) {
308         search_result = _compareData(data, curr_pos, node_info->tag_suffix, node_info->tag_suffix_len);
309         if (search_result == COMPLETE_MATCH) {
310           if (node_info->tag_suffix[node_info->tag_suffix_len - 1] == '>') {
311             _debugLog(_debug_tag, "[%s] Found [%s] tag at position %d", __FUNCTION__, DocNode::type_names_[node_info->type],
312                       curr_pos - ESI_TAG_PREFIX_LEN);
313             break;
314           } else {
315             if (curr_pos + node_info->tag_suffix_len < data_size) {
316               char ch = data_start_ptr[curr_pos + node_info->tag_suffix_len];
317               if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
318                 _debugLog(_debug_tag, "[%s] Found [%s] tag at position %d", __FUNCTION__, DocNode::type_names_[node_info->type],
319                           curr_pos - ESI_TAG_PREFIX_LEN);
320                 ++curr_pos; // skip the space char
321                 break;
322               } else if (ch == '/' || ch == '>') {
323                 _debugLog(_debug_tag, "[%s] Found [%s] tag at position %d", __FUNCTION__, DocNode::type_names_[node_info->type],
324                           curr_pos - ESI_TAG_PREFIX_LEN);
325                 break;
326               }
327             } else {
328               goto lPartialMatch;
329             }
330           }
331         } else if (search_result == PARTIAL_MATCH) {
332           goto lPartialMatch;
333         }
334       }
335       if (node_info->type == DocNode::TYPE_UNKNOWN) {
336         _errorLog("[%s] Unknown ESI tag starting with [%10s]...", __FUNCTION__, data.c_str());
337         goto lFail;
338       }
339     }
340 
341     curr_pos += node_info->tag_suffix_len;
342     search_result = _searchData(data, curr_pos, node_info->closing_tag, node_info->closing_tag_len, end_pos);
343 
344     if ((search_result == NO_MATCH) || (search_result == PARTIAL_MATCH)) {
345       if (last_chunk) {
346         _errorLog("[%s] ESI tag starting with [%10s]... has no matching closing tag [%.*s]", __FUNCTION__, data.c_str(),
347                   node_info->closing_tag_len, node_info->closing_tag);
348         goto lFail;
349       } else {
350         goto lPartialMatch;
351       }
352     }
353 
354     // now we process only complete nodes
355     switch (node_info->type) {
356     case DocNode::TYPE_INCLUDE:
357       _debugLog(_debug_tag, "[%s] Handling include tag...", __FUNCTION__);
358       parse_result = _processIncludeTag(data, curr_pos, end_pos, node_list);
359       break;
360     case DocNode::TYPE_COMMENT:
361     case DocNode::TYPE_REMOVE:
362       _debugLog(_debug_tag, "[%s] Adding node [%s]", __FUNCTION__, DocNode::type_names_[node_info->type]);
363       node_list.push_back(DocNode(node_info->type)); // no data required
364       parse_result = true;
365       break;
366     case DocNode::TYPE_WHEN:
367       _debugLog(_debug_tag, "[%s] Handling when tag...", __FUNCTION__);
368       parse_result = _processWhenTag(data, curr_pos, end_pos, node_list);
369       break;
370     case DocNode::TYPE_TRY:
371       _debugLog(_debug_tag, "[%s] Handling try tag...", __FUNCTION__);
372       parse_result = _processTryTag(data, curr_pos, end_pos, node_list);
373       break;
374     case DocNode::TYPE_CHOOSE:
375       _debugLog(_debug_tag, "[%s] Handling choose tag...", __FUNCTION__);
376       parse_result = _processChooseTag(data, curr_pos, end_pos, node_list);
377       break;
378     case DocNode::TYPE_OTHERWISE:
379     case DocNode::TYPE_ATTEMPT:
380     case DocNode::TYPE_EXCEPT:
381       _debugLog(_debug_tag, "[%s] Handling %s tag...", __FUNCTION__, DocNode::type_names_[node_info->type]);
382       parse_result = _processSimpleContentTag(node_info->type, data.data() + curr_pos, end_pos - curr_pos, node_list);
383       break;
384     case DocNode::TYPE_VARS:
385     case DocNode::TYPE_HTML_COMMENT:
386       _debugLog(_debug_tag, "[%s] added string of size %d starting with [%.5s] for node %s", __FUNCTION__, end_pos - curr_pos,
387                 data.data() + curr_pos, DocNode::type_names_[node_info->type]);
388       node_list.push_back(DocNode(node_info->type, data.data() + curr_pos, end_pos - curr_pos));
389       parse_result = true;
390       break;
391     case DocNode::TYPE_SPECIAL_INCLUDE:
392       _debugLog(_debug_tag, "[%s] Handling special include tag...", __FUNCTION__);
393       parse_result = _processSpecialIncludeTag(data, curr_pos, end_pos, node_list);
394       break;
395     default:
396       parse_result = false;
397       break;
398     }
399 
400     if (!parse_result) {
401       _errorLog("[%s] Cannot handle ESI tag [%.*s]", __FUNCTION__, node_info->tag_suffix_len, node_info->tag_suffix);
402       goto lFail;
403     }
404 
405     parse_start_pos = end_pos + node_info->closing_tag_len;
406     continue;
407 
408   lPartialMatch:
409     if (last_chunk) {
410       _debugLog(_debug_tag, "[%s] Found a partial ESI tag - will be treated as PRE text", __FUNCTION__);
411     } else {
412       _debugLog(_debug_tag, "[%s] Deferring to next chunk to find complete tag", __FUNCTION__);
413     }
414     break;
415   }
416   if (last_chunk && (parse_start_pos < static_cast<int>(data_size))) {
417     _debugLog(_debug_tag, "[%s] Adding trailing text of size %d starting at [%.5s] as a PRE node", __FUNCTION__,
418               data_size - parse_start_pos, data_start_ptr + parse_start_pos);
419     node_list.push_back(DocNode(DocNode::TYPE_PRE, data_start_ptr + parse_start_pos, data_size - parse_start_pos));
420   }
421   _debugLog(_debug_tag, "[%s] Added %d node(s) during parse", __FUNCTION__, node_list.size() - orig_list_size);
422   return true;
423 
424 lFail:
425   node_list.resize(orig_list_size); // delete whatever nodes we have added so far
426   return false;
427 }
428 
429 bool
_processIncludeTag(const string & data,size_t curr_pos,size_t end_pos,DocNodeList & node_list) const430 EsiParser::_processIncludeTag(const string &data, size_t curr_pos, size_t end_pos, DocNodeList &node_list) const
431 {
432   Attribute src_info;
433   if (!Utils::getAttribute(data, SRC_ATTR_STR, curr_pos, end_pos, src_info)) {
434     _errorLog("[%s] Could not find src attribute", __FUNCTION__);
435     return false;
436   }
437   node_list.push_back(DocNode(DocNode::TYPE_INCLUDE));
438   node_list.back().attr_list.push_back(src_info);
439   _debugLog(_debug_tag, "[%s] Added include tag with url [%.*s]", __FUNCTION__, src_info.value_len, src_info.value);
440   return true;
441 }
442 
443 bool
_processSpecialIncludeTag(const string & data,size_t curr_pos,size_t end_pos,DocNodeList & node_list) const444 EsiParser::_processSpecialIncludeTag(const string &data, size_t curr_pos, size_t end_pos, DocNodeList &node_list) const
445 {
446   Attribute handler_info;
447   if (!Utils::getAttribute(data, HANDLER_ATTR_STR, curr_pos, end_pos, handler_info)) {
448     _errorLog("[%s] Could not find handler attribute", __FUNCTION__);
449     return false;
450   }
451   node_list.push_back(DocNode(DocNode::TYPE_SPECIAL_INCLUDE));
452   DocNode &node = node_list.back();
453   node.attr_list.push_back(handler_info);
454   node.data     = data.data() + curr_pos;
455   node.data_len = end_pos - curr_pos;
456   _debugLog(_debug_tag, "[%s] Added special include tag with handler [%.*s] and data [%.*s]", __FUNCTION__, handler_info.value_len,
457             handler_info.value, node.data_len, node.data);
458   return true;
459 }
460 
461 inline bool
_isWhitespace(const char * data,int data_len) const462 EsiParser::_isWhitespace(const char *data, int data_len) const
463 {
464   for (int i = 0; i < data_len; ++i) {
465     if (!isspace(data[i])) {
466       return false;
467     }
468   }
469   return true;
470 }
471 
472 bool
_processWhenTag(const string & data,size_t curr_pos,size_t end_pos,DocNodeList & node_list) const473 EsiParser::_processWhenTag(const string &data, size_t curr_pos, size_t end_pos, DocNodeList &node_list) const
474 {
475   Attribute test_expr;
476   size_t term_pos;
477   if (!Utils::getAttribute(data, TEST_ATTR_STR, curr_pos, end_pos, test_expr, &term_pos, '>')) {
478     _errorLog("[%s] Could not find test attribute", __FUNCTION__);
479     return false;
480   }
481   ++term_pos; // go past the terminator
482   const char *data_start_ptr = data.data() + term_pos;
483   int data_size              = end_pos - term_pos;
484   if (!_processSimpleContentTag(DocNode::TYPE_WHEN, data_start_ptr, data_size, node_list)) {
485     _errorLog("[%s] Could not parse when node's content", __FUNCTION__);
486     return false;
487   }
488   node_list.back().attr_list.push_back(test_expr);
489   _debugLog(_debug_tag, "[%s] Added when tag with expression [%.*s] and data starting with [%.5s]", __FUNCTION__,
490             test_expr.value_len, test_expr.value, data_start_ptr);
491   return true;
492 }
493 
494 bool
_processTryTag(const string & data,size_t curr_pos,size_t end_pos,DocNodeList & node_list) const495 EsiParser::_processTryTag(const string &data, size_t curr_pos, size_t end_pos, DocNodeList &node_list) const
496 {
497   const char *data_start_ptr = data.data() + curr_pos;
498   int data_size              = end_pos - curr_pos;
499   DocNode try_node(DocNode::TYPE_TRY);
500   if (!parse(try_node.child_nodes, data_start_ptr, data_size)) {
501     _errorLog("[%s] Could not parse try node's content", __FUNCTION__);
502     return false;
503   }
504 
505   DocNodeList::iterator iter, end_node, attempt_node, except_node, temp_iter;
506   end_node     = try_node.child_nodes.end();
507   attempt_node = except_node = end_node;
508   iter                       = try_node.child_nodes.begin();
509   while (iter != end_node) {
510     if (iter->type == DocNode::TYPE_ATTEMPT) {
511       if (attempt_node != end_node) {
512         _errorLog("[%s] Can have exactly one attempt node in try block", __FUNCTION__);
513         return false;
514       }
515       attempt_node = iter;
516     } else if (iter->type == DocNode::TYPE_EXCEPT) {
517       if (except_node != end_node) {
518         _errorLog("[%s] Can have exactly one except node in try block", __FUNCTION__);
519         return false;
520       }
521       except_node = iter;
522     } else if (iter->type == DocNode::TYPE_PRE) {
523       if (!_isWhitespace(iter->data, iter->data_len)) {
524         _errorLog("[%s] Cannot have non-whitespace raw text as top level node in try block", __FUNCTION__);
525         return false;
526       }
527       _debugLog(_debug_tag, "[%s] Ignoring top-level whitespace raw text", __FUNCTION__);
528       temp_iter = iter;
529       ++temp_iter;
530       try_node.child_nodes.erase(iter);
531       iter = temp_iter;
532       continue; // skip the increment
533     } else {
534       _errorLog("[%s] Only attempt/except/text nodes allowed in try block; [%s] node invalid", __FUNCTION__,
535                 DocNode::type_names_[iter->type]);
536       return false;
537     }
538     ++iter;
539   }
540   if ((attempt_node == end_node) || (except_node == end_node)) {
541     _errorLog("[%s] try block must contain one each of attempt and except nodes", __FUNCTION__);
542     return false;
543   }
544   node_list.push_back(try_node);
545   _debugLog(_debug_tag, "[%s] Added try node successfully", __FUNCTION__);
546   return true;
547 }
548 
549 bool
_processChooseTag(const string & data,size_t curr_pos,size_t end_pos,DocNodeList & node_list) const550 EsiParser::_processChooseTag(const string &data, size_t curr_pos, size_t end_pos, DocNodeList &node_list) const
551 {
552   const char *data_start_ptr = data.data() + curr_pos;
553   size_t data_size           = end_pos - curr_pos;
554   DocNode choose_node(DocNode::TYPE_CHOOSE);
555   if (!parse(choose_node.child_nodes, data_start_ptr, data_size)) {
556     _errorLog("[%s] Couldn't parse choose node content", __FUNCTION__);
557     return false;
558   }
559   DocNodeList::iterator end_node       = choose_node.child_nodes.end();
560   DocNodeList::iterator otherwise_node = end_node, iter, temp_iter;
561   iter                                 = choose_node.child_nodes.begin();
562   while (iter != end_node) {
563     if (iter->type == DocNode::TYPE_OTHERWISE) {
564       if (otherwise_node != end_node) {
565         _errorLog("[%s] Cannot have more than one esi:otherwise node in an esi:choose node", __FUNCTION__);
566         return false;
567       }
568       otherwise_node = iter;
569     } else if (iter->type == DocNode::TYPE_PRE) {
570       if (!_isWhitespace(iter->data, iter->data_len)) {
571         _errorLog("[%s] Cannot have non-whitespace raw text as top-level node in choose data", __FUNCTION__,
572                   DocNode::type_names_[iter->type]);
573         return false;
574       }
575       _debugLog(_debug_tag, "[%s] Ignoring top-level whitespace raw text", __FUNCTION__);
576       temp_iter = iter;
577       ++temp_iter;
578       choose_node.child_nodes.erase(iter);
579       iter = temp_iter;
580       continue; // skip the increment
581     } else if (iter->type != DocNode::TYPE_WHEN) {
582       _errorLog("[%s] Cannot have %s as top-level node in choose data; only when/otherwise/whitespace-text "
583                 "permitted",
584                 __FUNCTION__, DocNode::type_names_[iter->type]);
585       return false;
586     }
587     ++iter;
588   }
589   node_list.push_back(choose_node);
590   return true;
591 }
592 
593 void
clear()594 EsiParser::clear()
595 {
596   _data.clear();
597   _parse_start_pos = -1;
598 }
599 
600 EsiParser::~EsiParser() = default;
601 
602 inline void
_adjustPointers(DocNodeList::iterator node_iter,DocNodeList::iterator end,const char * ext_data_ptr,const char * int_data_start) const603 EsiParser::_adjustPointers(DocNodeList::iterator node_iter, DocNodeList::iterator end, const char *ext_data_ptr,
604                            const char *int_data_start) const
605 {
606   AttributeList::iterator attr_iter;
607   for (; node_iter != end; ++node_iter) {
608     if (node_iter->data_len) {
609       node_iter->data = ext_data_ptr + (node_iter->data - int_data_start);
610     }
611     for (attr_iter = node_iter->attr_list.begin(); attr_iter != node_iter->attr_list.end(); ++attr_iter) {
612       if (attr_iter->name_len) {
613         attr_iter->name = ext_data_ptr + (attr_iter->name - int_data_start);
614       }
615       if (attr_iter->value_len) {
616         attr_iter->value = ext_data_ptr + (attr_iter->value - int_data_start);
617       }
618     }
619     if (node_iter->child_nodes.size()) {
620       _adjustPointers(node_iter->child_nodes.begin(), node_iter->child_nodes.end(), ext_data_ptr, int_data_start);
621     }
622   }
623 }
624 
625 bool
parse(DocNodeList & node_list,const char * ext_data_ptr,int data_len) const626 EsiParser::parse(DocNodeList &node_list, const char *ext_data_ptr, int data_len /* = -1 */) const
627 {
628   string data;
629   size_t orig_output_list_size;
630   int parse_start_pos = -1;
631   bool retval         = _completeParse(data, parse_start_pos, orig_output_list_size, node_list, ext_data_ptr, data_len);
632   if (retval && (node_list.size() - orig_output_list_size)) {
633     // adjust all pointers to addresses in input parameter
634     const char *int_data_start      = data.data();
635     DocNodeList::iterator node_iter = node_list.begin();
636     for (size_t i = 0; i < orig_output_list_size; ++i, ++node_iter) {
637       ;
638     }
639     _adjustPointers(node_iter, node_list.end(), ext_data_ptr, int_data_start);
640   }
641   return retval;
642 }
643