1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 #include "EsiProcessor.h"
25 #include "Stats.h"
26 #include <cctype>
27 
28 using std::string;
29 using namespace EsiLib;
30 // this needs to be a fixed address as only the address is used for comparison
31 const char *EsiProcessor::INCLUDE_DATA_ID_ATTR = reinterpret_cast<const char *>(0xbeadface);
32 
33 #define FAILURE_INFO_TAG "plugin_esi_failureInfo"
34 
EsiProcessor(const char * debug_tag,const char * parser_debug_tag,const char * expression_debug_tag,ComponentBase::Debug debug_func,ComponentBase::Error error_func,HttpDataFetcher & fetcher,Variables & variables,const HandlerManager & handler_mgr)35 EsiProcessor::EsiProcessor(const char *debug_tag, const char *parser_debug_tag, const char *expression_debug_tag,
36                            ComponentBase::Debug debug_func, ComponentBase::Error error_func, HttpDataFetcher &fetcher,
37                            Variables &variables, const HandlerManager &handler_mgr)
38   : ComponentBase(debug_tag, debug_func, error_func),
39     _curr_state(STOPPED),
40     _parser(parser_debug_tag, debug_func, error_func),
41     _n_prescanned_nodes(0),
42     _n_processed_nodes(0),
43     _n_processed_try_nodes(0),
44     _overall_len(0),
45     _fetcher(fetcher),
46     _usePackedNodeList(false),
47     _esi_vars(variables),
48     _expression(expression_debug_tag, debug_func, error_func, _esi_vars),
49     _n_try_blocks_processed(0),
50     _handler_manager(handler_mgr)
51 {
52 }
53 
54 bool
start()55 EsiProcessor::start()
56 {
57   if (_curr_state != STOPPED) {
58     _debugLog(_debug_tag, "[%s] Implicit call to stop()", __FUNCTION__);
59     stop();
60   }
61   _curr_state        = PARSING;
62   _usePackedNodeList = false;
63   return true;
64 }
65 
66 bool
addParseData(const char * data,int data_len)67 EsiProcessor::addParseData(const char *data, int data_len)
68 {
69   if (_curr_state == ERRORED) {
70     return false;
71   }
72   if (_curr_state == STOPPED) {
73     _debugLog(_debug_tag, "[%s] Implicit call to start()", __FUNCTION__);
74     start();
75   } else if (_curr_state != PARSING) {
76     _debugLog(_debug_tag, "[%s] Can only parse in parse stage", __FUNCTION__);
77     return false;
78   }
79 
80   if (!_parser.parseChunk(data, _node_list, data_len)) {
81     _errorLog("[%s] Failed to parse chunk; Stopping processor...", __FUNCTION__);
82     error();
83     Stats::increment(Stats::N_PARSE_ERRS);
84     return false;
85   }
86   if (!_preprocess(_node_list, _n_prescanned_nodes)) {
87     _errorLog("[%s] Failed to preprocess parsed nodes; Stopping processor...", __FUNCTION__);
88     error();
89     return false;
90   }
91   return true;
92 }
93 
94 bool
completeParse(const char * data,int data_len)95 EsiProcessor::completeParse(const char *data /* = 0 */, int data_len /* = -1 */)
96 {
97   if (_curr_state == ERRORED) {
98     return false;
99   }
100   if (_curr_state == STOPPED) {
101     _debugLog(_debug_tag, "[%s] Implicit call to start()", __FUNCTION__);
102     start();
103   } else if (_curr_state != PARSING) {
104     _debugLog(_debug_tag, "[%s] Can only parse in parse stage", __FUNCTION__);
105     return false;
106   }
107 
108   if (!_parser.completeParse(_node_list, data, data_len)) {
109     _errorLog("[%s] Couldn't parse ESI document", __FUNCTION__);
110     error();
111     Stats::increment(Stats::N_PARSE_ERRS);
112     return false;
113   }
114   return _handleParseComplete();
115 }
116 
117 EsiProcessor::UsePackedNodeResult
usePackedNodeList(const char * data,int data_len)118 EsiProcessor::usePackedNodeList(const char *data, int data_len)
119 {
120   if (_curr_state != STOPPED) {
121     _errorLog("[%s] Cannot use packed node list whilst processing other data", __FUNCTION__);
122     return PROCESS_IN_PROGRESS;
123   }
124   start();
125   if (!_node_list.unpack(data, data_len)) {
126     _errorLog("[%s] Could not unpack node list from provided data!", __FUNCTION__);
127     error();
128     return UNPACK_FAILURE;
129   }
130   _usePackedNodeList = true;
131   return _handleParseComplete() ? PROCESS_SUCCESS : PROCESS_FAILURE;
132 }
133 
134 bool
_handleParseComplete()135 EsiProcessor::_handleParseComplete()
136 {
137   if (_curr_state != PARSING) {
138     _debugLog(_debug_tag, "[%s] Cannot handle parse complete in state %d", __FUNCTION__, _curr_state);
139     return false;
140   }
141   if (!_preprocess(_node_list, _n_prescanned_nodes)) {
142     _errorLog("[%s] Failed to preprocess parsed nodes; Stopping processor...", __FUNCTION__);
143     error();
144     return false;
145   }
146   for (IncludeHandlerMap::iterator map_iter = _include_handlers.begin(); map_iter != _include_handlers.end(); ++map_iter) {
147     map_iter->second->handleParseComplete();
148   }
149 
150   _debugLog(_debug_tag, "[%s] Parsed ESI document with %d nodes", __FUNCTION__, _node_list.size());
151   _curr_state = WAITING_TO_PROCESS;
152 
153   return true;
154 }
155 
156 DataStatus
_getIncludeStatus(const DocNode & node)157 EsiProcessor::_getIncludeStatus(const DocNode &node)
158 {
159   _debugLog(_debug_tag, "[%s] inside getIncludeStatus", __FUNCTION__);
160   if (node.type == DocNode::TYPE_INCLUDE) {
161     const Attribute &url = node.attr_list.front();
162 
163     if (url.value_len == 0) { // allow empty url
164       return STATUS_DATA_AVAILABLE;
165     }
166 
167     string raw_url(url.value, url.value_len);
168     StringHash::iterator iter = _include_urls.find(raw_url);
169     if (iter == _include_urls.end()) {
170       _errorLog("[%s] Data not requested for URL [%.*s]; no data to include", __FUNCTION__, url.value_len, url.value);
171       return STATUS_ERROR;
172     }
173     const string &processed_url = iter->second;
174     DataStatus status           = _fetcher.getRequestStatus(processed_url);
175     _debugLog(_debug_tag, "[%s] Got status %d successfully for URL [%.*s]", __FUNCTION__, status, processed_url.size(),
176               processed_url.data());
177     return status;
178   } else if (node.type == DocNode::TYPE_SPECIAL_INCLUDE) {
179     int include_data_id            = 0;
180     SpecialIncludeHandler *handler = nullptr;
181     for (AttributeList::const_iterator attr_iter = node.attr_list.begin(); attr_iter != node.attr_list.end(); ++attr_iter) {
182       if (attr_iter->name == INCLUDE_DATA_ID_ATTR) {
183         include_data_id = attr_iter->value_len;
184         handler         = reinterpret_cast<SpecialIncludeHandler *>(const_cast<char *>(attr_iter->value));
185         break;
186       }
187     }
188     if (include_data_id == 0 || handler == nullptr) {
189       _errorLog("[%s] Fail to find the special include data id attribute", __FUNCTION__);
190       return STATUS_ERROR;
191     }
192     DataStatus status = handler->getIncludeStatus(include_data_id);
193     _debugLog(_debug_tag, "[%s] Successfully got status for special include with id %d", __FUNCTION__, status, include_data_id);
194 
195     return status;
196   }
197   _debugLog(_debug_tag, "[%s] node of type %s", __FUNCTION__, DocNode::type_names_[node.type]);
198   return STATUS_DATA_AVAILABLE;
199 }
200 
201 bool
_getIncludeData(const DocNode & node,const char ** content_ptr,int * content_len_ptr)202 EsiProcessor::_getIncludeData(const DocNode &node, const char **content_ptr /* = 0 */, int *content_len_ptr /* = 0 */)
203 {
204   if (node.type == DocNode::TYPE_INCLUDE) {
205     const Attribute &url = node.attr_list.front();
206 
207     if (url.value_len == 0) { // allow empty url
208       if (content_ptr && content_len_ptr) {
209         *content_ptr     = nullptr;
210         *content_len_ptr = 0;
211         return true;
212       } else {
213         return false;
214       }
215     }
216 
217     string raw_url(url.value, url.value_len);
218     StringHash::iterator iter = _include_urls.find(raw_url);
219     if (iter == _include_urls.end()) {
220       _errorLog("[%s] Data not requested for URL [%.*s]; no data to include", __FUNCTION__, url.value_len, url.value);
221       return false;
222     }
223     const string &processed_url = iter->second;
224     bool result;
225     if (content_ptr && content_len_ptr) {
226       result = _fetcher.getContent(processed_url, *content_ptr, *content_len_ptr);
227     } else {
228       result = (_fetcher.getRequestStatus(processed_url) == STATUS_DATA_AVAILABLE);
229     }
230     if (!result) {
231       _errorLog("[%s] Couldn't get content for URL [%.*s]", __FUNCTION__, processed_url.size(), processed_url.data());
232       Stats::increment(Stats::N_INCLUDE_ERRS);
233       return false;
234     }
235     _debugLog(_debug_tag, "[%s] Got content successfully for URL [%.*s]", __FUNCTION__, processed_url.size(), processed_url.data());
236     return true;
237   } else if (node.type == DocNode::TYPE_SPECIAL_INCLUDE) {
238     int include_data_id            = 0;
239     SpecialIncludeHandler *handler = nullptr;
240     for (AttributeList::const_iterator attr_iter = node.attr_list.begin(); attr_iter != node.attr_list.end(); ++attr_iter) {
241       if (attr_iter->name == INCLUDE_DATA_ID_ATTR) {
242         include_data_id = attr_iter->value_len;
243         handler         = reinterpret_cast<SpecialIncludeHandler *>(const_cast<char *>(attr_iter->value));
244         break;
245       }
246     }
247     if (include_data_id == 0 || handler == nullptr) {
248       _errorLog("[%s] Fail to find the special include data id attribute", __FUNCTION__);
249       Stats::increment(Stats::N_SPCL_INCLUDE_ERRS);
250       return false;
251     }
252     bool result;
253     if (content_ptr && content_len_ptr) {
254       result = handler->getData(include_data_id, *content_ptr, *content_len_ptr);
255     } else {
256       result = (handler->getIncludeStatus(include_data_id) == STATUS_DATA_AVAILABLE);
257     }
258     if (!result) {
259       _errorLog("[%s] Couldn't get content for special include with id %d", __FUNCTION__, include_data_id);
260       Stats::increment(Stats::N_SPCL_INCLUDE_ERRS);
261       return false;
262     }
263     _debugLog(_debug_tag, "[%s] Successfully got content for special include with id %d", __FUNCTION__, include_data_id);
264     return true;
265   }
266   _errorLog("[%s] Cannot get include data for node of type %s", __FUNCTION__, DocNode::type_names_[node.type]);
267   return false;
268 }
269 
270 EsiProcessor::ReturnCode
process(const char * & data,int & data_len)271 EsiProcessor::process(const char *&data, int &data_len)
272 {
273   if (_curr_state == ERRORED) {
274     return FAILURE;
275   }
276   if (_curr_state != WAITING_TO_PROCESS) {
277     _errorLog("[%s] Processor has to finish parsing via completeParse() before process() call", __FUNCTION__);
278     return FAILURE;
279   }
280   DocNodeList::iterator node_iter, iter;
281   bool attempt_succeeded;
282   TryBlockList::iterator try_iter = _try_blocks.begin();
283   for (int i = 0; i < _n_try_blocks_processed; ++i, ++try_iter) {
284     ;
285   }
286   for (; _n_try_blocks_processed < static_cast<int>(_try_blocks.size()); ++try_iter) {
287     ++_n_try_blocks_processed;
288     attempt_succeeded = true;
289     for (node_iter = try_iter->attempt_nodes.begin(); node_iter != try_iter->attempt_nodes.end(); ++node_iter) {
290       if ((node_iter->type == DocNode::TYPE_INCLUDE) || (node_iter->type == DocNode::TYPE_SPECIAL_INCLUDE)) {
291         const Attribute &url = (*node_iter).attr_list.front();
292         string raw_url(url.value, url.value_len);
293         if (!_getIncludeData(*node_iter)) {
294           attempt_succeeded = false;
295           _errorLog("[%s] attempt section errored; due to url [%s]", __FUNCTION__, raw_url.c_str());
296           break;
297         }
298       }
299     }
300 
301     if (attempt_succeeded) {
302       _debugLog(_debug_tag, "[%s] attempt section succeeded; using attempt section", __FUNCTION__);
303       _node_list.splice(try_iter->pos, try_iter->attempt_nodes);
304     } else {
305       _debugLog(_debug_tag, "[%s] attempt section errored; trying except section", __FUNCTION__);
306       int n_prescanned_nodes = 0;
307       if (!_preprocess(try_iter->except_nodes, n_prescanned_nodes)) {
308         _errorLog("[%s] Failed to preprocess except nodes", __FUNCTION__);
309         stop();
310         return FAILURE;
311       }
312       _node_list.splice(try_iter->pos, try_iter->except_nodes);
313       if (_fetcher.getNumPendingRequests()) {
314         _debugLog(_debug_tag,
315                   "[%s] New fetch requests were triggered by except block; "
316                   "Returning NEED_MORE_DATA...",
317                   __FUNCTION__);
318         return NEED_MORE_DATA;
319       }
320     }
321   }
322   _curr_state = PROCESSED;
323   for (node_iter = _node_list.begin(); node_iter != _node_list.end(); ++node_iter) {
324     DocNode &doc_node = *node_iter; // handy reference
325     _debugLog(_debug_tag, "[%s] Processing ESI node [%s] with data of size %d starting with [%.10s...]", __FUNCTION__,
326               DocNode::type_names_[doc_node.type], doc_node.data_len, (doc_node.data_len ? doc_node.data : "(null)"));
327     if (doc_node.type == DocNode::TYPE_PRE) {
328       // just copy the data
329       _output_data.append(doc_node.data, doc_node.data_len);
330     } else if (!_processEsiNode(node_iter)) {
331       _errorLog("[%s] Failed to process ESI node [%.*s]", __FUNCTION__, doc_node.data_len, doc_node.data);
332       stop();
333       return FAILURE;
334     }
335   }
336   _addFooterData();
337   data     = _output_data.c_str();
338   data_len = _output_data.size();
339   _debugLog(_debug_tag, "[%s] ESI processed document of size %d starting with [%.10s]", __FUNCTION__, data_len,
340             (data_len ? data : "(null)"));
341   return SUCCESS;
342 }
343 
344 EsiProcessor::ReturnCode
flush(string & data,int & overall_len)345 EsiProcessor::flush(string &data, int &overall_len)
346 {
347   if (_curr_state == ERRORED) {
348     return FAILURE;
349   }
350   if (_curr_state == PROCESSED) {
351     overall_len = _overall_len;
352     data.assign("");
353     return SUCCESS;
354   }
355   DocNodeList::iterator node_iter, iter;
356   bool attempt_succeeded;
357   bool attempt_pending;
358   bool node_pending;
359   _output_data.clear();
360   TryBlockList::iterator try_iter = _try_blocks.begin();
361   for (int i = 0; i < _n_try_blocks_processed; ++i, ++try_iter) {
362     ;
363   }
364   for (; _n_try_blocks_processed < static_cast<int>(_try_blocks.size()); ++try_iter) {
365     attempt_pending = false;
366     for (node_iter = try_iter->attempt_nodes.begin(); node_iter != try_iter->attempt_nodes.end(); ++node_iter) {
367       if ((node_iter->type == DocNode::TYPE_INCLUDE) || (node_iter->type == DocNode::TYPE_SPECIAL_INCLUDE)) {
368         if (_getIncludeStatus(*node_iter) == STATUS_DATA_PENDING) {
369           attempt_pending = true;
370           break;
371         }
372       }
373     }
374     if (attempt_pending) {
375       break;
376     }
377 
378     ++_n_try_blocks_processed;
379     attempt_succeeded = true;
380     for (node_iter = try_iter->attempt_nodes.begin(); node_iter != try_iter->attempt_nodes.end(); ++node_iter) {
381       if ((node_iter->type == DocNode::TYPE_INCLUDE) || (node_iter->type == DocNode::TYPE_SPECIAL_INCLUDE)) {
382         const Attribute &url = (*node_iter).attr_list.front();
383         string raw_url(url.value, url.value_len);
384         if (_getIncludeStatus(*node_iter) != STATUS_DATA_AVAILABLE) {
385           attempt_succeeded = false;
386           _errorLog("[%s] attempt section errored; due to url [%s]", __FUNCTION__, raw_url.c_str());
387           break;
388         }
389       }
390     }
391 
392     if (attempt_succeeded) {
393       _debugLog(_debug_tag, "[%s] attempt section succeeded; using attempt section", __FUNCTION__);
394       _n_prescanned_nodes = _n_prescanned_nodes + try_iter->attempt_nodes.size();
395       _node_list.splice(try_iter->pos, try_iter->attempt_nodes);
396     } else {
397       _debugLog(_debug_tag, "[%s] attempt section errored; trying except section", __FUNCTION__);
398       int n_prescanned_nodes = 0;
399       if (!_preprocess(try_iter->except_nodes, n_prescanned_nodes)) {
400         _errorLog("[%s] Failed to preprocess except nodes", __FUNCTION__);
401       }
402       _n_prescanned_nodes = _n_prescanned_nodes + try_iter->except_nodes.size();
403       _node_list.splice(try_iter->pos, try_iter->except_nodes);
404       if (_fetcher.getNumPendingRequests()) {
405         _debugLog(_debug_tag,
406                   "[%s] New fetch requests were triggered by except block; "
407                   "Returning NEED_MORE_DATA...",
408                   __FUNCTION__);
409       }
410     }
411   }
412 
413   node_pending = false;
414   node_iter    = _node_list.begin();
415   for (int i = 0; i < _n_processed_nodes; ++i, ++node_iter) {
416     ;
417   }
418   for (; node_iter != _node_list.end(); ++node_iter) {
419     DocNode &doc_node = *node_iter; // handy reference
420     _debugLog(_debug_tag, "[%s] Processing ESI node [%s] with data of size %d starting with [%.10s...]", __FUNCTION__,
421               DocNode::type_names_[doc_node.type], doc_node.data_len, (doc_node.data_len ? doc_node.data : "(null)"));
422 
423     if (_getIncludeStatus(doc_node) == STATUS_DATA_PENDING) {
424       node_pending = true;
425       break;
426     }
427 
428     _debugLog(_debug_tag, "[%s] processed node: %d, try blocks processed: %d, processed try nodes: %d", __FUNCTION__,
429               _n_processed_nodes, _n_try_blocks_processed, _n_processed_try_nodes);
430     if (doc_node.type == DocNode::TYPE_TRY) {
431       if (_n_try_blocks_processed <= _n_processed_try_nodes) {
432         node_pending = true;
433         break;
434       } else {
435         ++_n_processed_try_nodes;
436       }
437     }
438 
439     _debugLog(_debug_tag, "[%s] really Processing ESI node [%s] with data of size %d starting with [%.10s...]", __FUNCTION__,
440               DocNode::type_names_[doc_node.type], doc_node.data_len, (doc_node.data_len ? doc_node.data : "(null)"));
441 
442     if (doc_node.type == DocNode::TYPE_PRE) {
443       // just copy the data
444       _output_data.append(doc_node.data, doc_node.data_len);
445       ++_n_processed_nodes;
446     } else if (!_processEsiNode(node_iter)) {
447       _errorLog("[%s] Failed to process ESI node [%.*s]", __FUNCTION__, doc_node.data_len, doc_node.data);
448       ++_n_processed_nodes;
449     } else {
450       ++_n_processed_nodes;
451     }
452   }
453 
454   if (!node_pending && (_curr_state == WAITING_TO_PROCESS)) {
455     _curr_state = PROCESSED;
456     _addFooterData();
457   }
458   data.assign(_output_data);
459   _overall_len = _overall_len + data.size();
460   overall_len  = _overall_len;
461 
462   _debugLog(_debug_tag, "[%s] ESI processed document of size %d starting with [%.10s]", __FUNCTION__, data.size(),
463             (data.size() ? data.data() : "(null)"));
464   return SUCCESS;
465 }
466 
467 void
stop()468 EsiProcessor::stop()
469 {
470   _output_data.clear();
471   _node_list.clear();
472   _include_urls.clear();
473   _try_blocks.clear();
474   _n_prescanned_nodes     = 0;
475   _n_try_blocks_processed = 0;
476   _overall_len            = 0;
477   for (IncludeHandlerMap::iterator map_iter = _include_handlers.begin(); map_iter != _include_handlers.end(); ++map_iter) {
478     delete map_iter->second;
479   }
480   _include_handlers.clear();
481   _curr_state = STOPPED;
482 }
483 
~EsiProcessor()484 EsiProcessor::~EsiProcessor()
485 {
486   if (_curr_state != STOPPED) {
487     stop();
488   }
489 }
490 
491 bool
_processEsiNode(const DocNodeList::iterator & iter)492 EsiProcessor::_processEsiNode(const DocNodeList::iterator &iter)
493 {
494   bool retval;
495   const DocNode &node = *iter;
496   if ((node.type == DocNode::TYPE_INCLUDE) || (node.type == DocNode::TYPE_SPECIAL_INCLUDE)) {
497     const char *content;
498     int content_len;
499     if ((retval = _getIncludeData(node, &content, &content_len))) {
500       if (content_len > 0) {
501         _output_data.append(content, content_len);
502       }
503     }
504   } else if ((node.type == DocNode::TYPE_COMMENT) || (node.type == DocNode::TYPE_REMOVE) || (node.type == DocNode::TYPE_TRY) ||
505              (node.type == DocNode::TYPE_CHOOSE) || (node.type == DocNode::TYPE_HTML_COMMENT)) {
506     // choose, try and html-comment would've been dealt with earlier
507     _debugLog(_debug_tag, "[%s] No-op for [%s] node", __FUNCTION__, DocNode::type_names_[node.type]);
508     retval = true;
509   } else if (node.type == DocNode::TYPE_VARS) {
510     retval = _handleVars(node.data, node.data_len);
511   } else {
512     _errorLog("[%s] Unknown ESI Doc node type %d", __FUNCTION__, node.type);
513     retval = false;
514   }
515   if (retval) {
516     _debugLog(_debug_tag, "[%s] Processed ESI [%s] node", __FUNCTION__, DocNode::type_names_[node.type]);
517   } else {
518     _errorLog("[%s] Failed to process ESI doc node of type %d", __FUNCTION__, node.type);
519   }
520   return retval;
521 }
522 
523 inline bool
_isWhitespace(const char * data,int data_len)524 EsiProcessor::_isWhitespace(const char *data, int data_len)
525 {
526   for (int i = 0; i < data_len; ++i) {
527     if (!isspace(data[i])) {
528       return false;
529     }
530   }
531   return true;
532 }
533 
534 bool
_handleChoose(DocNodeList::iterator & curr_node)535 EsiProcessor::_handleChoose(DocNodeList::iterator &curr_node)
536 {
537   DocNodeList::iterator iter, otherwise_node, winning_node, end_node;
538   end_node       = curr_node->child_nodes.end();
539   otherwise_node = end_node;
540   for (iter = curr_node->child_nodes.begin(); iter != end_node; ++iter) {
541     if (iter->type == DocNode::TYPE_OTHERWISE) {
542       otherwise_node = iter;
543       break;
544     }
545   }
546   winning_node = end_node;
547   for (iter = curr_node->child_nodes.begin(); iter != end_node; ++iter) {
548     if (iter->type == DocNode::TYPE_WHEN) {
549       const Attribute &test_expr = iter->attr_list.front();
550       if (_expression.evaluate(test_expr.value, test_expr.value_len)) {
551         winning_node = iter;
552         break;
553       }
554     }
555   }
556   if (winning_node == end_node) {
557     _debugLog(_debug_tag, "[%s] All when nodes failed to evaluate to true", __FUNCTION__);
558     if (otherwise_node != end_node) {
559       _debugLog(_debug_tag, "[%s] Using otherwise node...", __FUNCTION__);
560       winning_node = otherwise_node;
561     } else {
562       _debugLog(_debug_tag, "[%s] No otherwise node, nothing to do...", __FUNCTION__);
563       return true;
564     }
565   }
566   // splice() inserts elements *before* given position, but we need to
567   // insert new nodes after the choose node for them to be seen by
568   // preprocess(); hence...
569   DocNodeList::iterator next_node = curr_node;
570   ++next_node;
571   _node_list.splice(next_node, winning_node->child_nodes);
572   return true;
573 }
574 
575 bool
_handleTry(DocNodeList::iterator & curr_node)576 EsiProcessor::_handleTry(DocNodeList::iterator &curr_node)
577 {
578   DocNodeList::iterator iter, end_node = curr_node->child_nodes.end();
579   DocNodeList::iterator attempt_node = end_node, except_node = end_node;
580   for (iter = curr_node->child_nodes.begin(); iter != end_node; ++iter) {
581     if (iter->type == DocNode::TYPE_ATTEMPT) {
582       attempt_node = iter;
583     } else if (iter->type == DocNode::TYPE_EXCEPT) {
584       except_node = iter;
585     }
586   }
587   TryBlock try_info(attempt_node->child_nodes, except_node->child_nodes, curr_node);
588   int n_prescanned_nodes = 0;
589   if (!_preprocess(try_info.attempt_nodes, n_prescanned_nodes)) {
590     _errorLog("[%s] Couldn't preprocess attempt node of try block", __FUNCTION__);
591     return false;
592   }
593   _try_blocks.push_back(try_info);
594   return true;
595 }
596 
597 bool
_handleVars(const char * str,int str_len)598 EsiProcessor::_handleVars(const char *str, int str_len)
599 {
600   const string &str_value = _expression.expand(str, str_len);
601   _debugLog(_debug_tag, "[%s] Vars expression [%.*s] expanded to [%.*s]", __FUNCTION__, str_len, str, str_value.size(),
602             str_value.data());
603   _output_data.append(str_value);
604   return true;
605 }
606 
607 bool
_handleHtmlComment(const DocNodeList::iterator & curr_node)608 EsiProcessor::_handleHtmlComment(const DocNodeList::iterator &curr_node)
609 {
610   DocNodeList inner_nodes;
611   if (!_parser.parse(inner_nodes, curr_node->data, curr_node->data_len)) {
612     _errorLog("[%s] Couldn't parse html comment node content", __FUNCTION__);
613     Stats::increment(Stats::N_PARSE_ERRS);
614     return false;
615   }
616   _debugLog(_debug_tag, "[%s] parsed %d inner nodes from html comment node", __FUNCTION__, inner_nodes.size());
617   DocNodeList::iterator next_node = curr_node;
618   ++next_node;
619   _node_list.splice(next_node, inner_nodes); // insert after curr node for pre-processing
620   return true;
621 }
622 
623 bool
_preprocess(DocNodeList & node_list,int & n_prescanned_nodes)624 EsiProcessor::_preprocess(DocNodeList &node_list, int &n_prescanned_nodes)
625 {
626   DocNodeList::iterator list_iter = node_list.begin();
627   StringHash::iterator hash_iter;
628   string raw_url;
629 
630   // skip previously examined nodes
631   for (int i = 0; i < n_prescanned_nodes; ++i, ++list_iter) {
632     ;
633   }
634 
635   for (; list_iter != node_list.end(); ++list_iter, ++n_prescanned_nodes) {
636     switch (list_iter->type) {
637     case DocNode::TYPE_CHOOSE:
638       if (!_handleChoose(list_iter)) {
639         _errorLog("[%s] Failed to preprocess choose node", __FUNCTION__);
640         return false;
641       }
642       _debugLog(_debug_tag, "[%s] handled choose node successfully", __FUNCTION__);
643       break;
644     case DocNode::TYPE_TRY:
645       if (!_handleTry(list_iter)) {
646         _errorLog("[%s] Failed to preprocess try node", __FUNCTION__);
647         return false;
648       }
649       _debugLog(_debug_tag, "[%s] handled try node successfully", __FUNCTION__);
650       break;
651     case DocNode::TYPE_HTML_COMMENT:
652       /**
653        * the html comment <!--esi inner text--> is a container.
654        * the esi processor will remove the starting tag "<!--esi" and the
655        * closure tag "-->", then keep the inner text (the content within it).
656        *
657        * we should call _handleHtmlComment when the node list is parsed
658        * from the content,
659        * but we should NOT call _handleHtmlComment again when the node list
660        * is unpacked from the cache because the node list has been parsed.
661        */
662       if (!_usePackedNodeList && !_handleHtmlComment(list_iter)) {
663         _errorLog("[%s] Failed to preprocess try node", __FUNCTION__);
664         return false;
665       }
666       break;
667     case DocNode::TYPE_INCLUDE: {
668       Stats::increment(Stats::N_INCLUDES);
669       const Attribute &src = list_iter->attr_list.front();
670       raw_url.assign(src.value, src.value_len);
671       _debugLog(_debug_tag, "[%s] Adding fetch request for url [%.*s]", __FUNCTION__, raw_url.size(), raw_url.data());
672       hash_iter = _include_urls.find(raw_url);
673       if (hash_iter != _include_urls.end()) { // we have already processed this URL
674         _debugLog(_debug_tag, "[%s] URL [%.*s] already processed", __FUNCTION__, raw_url.size(), raw_url.data());
675         continue;
676       }
677       const string &expanded_url = _expression.expand(raw_url);
678       if (!expanded_url.size()) {
679         _errorLog("[%s] Couldn't expand raw URL [%.*s]", __FUNCTION__, raw_url.size(), raw_url.data());
680         Stats::increment(Stats::N_INCLUDE_ERRS);
681         continue;
682       }
683 
684       if (!_fetcher.addFetchRequest(expanded_url)) {
685         _errorLog("[%s] Couldn't add fetch request for URL [%.*s]", __FUNCTION__, raw_url.size(), raw_url.data());
686         Stats::increment(Stats::N_INCLUDE_ERRS);
687         continue;
688       }
689       _include_urls.insert(StringHash::value_type(raw_url, expanded_url));
690       break;
691     }
692     case DocNode::TYPE_SPECIAL_INCLUDE: {
693       Stats::increment(Stats::N_SPCL_INCLUDES);
694       const Attribute &handler_attr = list_iter->attr_list.front();
695       string handler_id(handler_attr.value, handler_attr.value_len);
696       SpecialIncludeHandler *handler;
697       IncludeHandlerMap::const_iterator map_iter = _include_handlers.find(handler_id);
698       if (map_iter == _include_handlers.end()) {
699         handler = _handler_manager.getHandler(_esi_vars, _expression, _fetcher, handler_id);
700         if (!handler) {
701           _errorLog("[%s] Couldn't create handler with id [%s]", __FUNCTION__, handler_id.c_str());
702           Stats::increment(Stats::N_SPCL_INCLUDE_ERRS);
703           return false;
704         }
705         _include_handlers.insert(IncludeHandlerMap::value_type(handler_id, handler));
706         _debugLog(_debug_tag, "[%s] Created new special include handler object for id [%s]", __FUNCTION__, handler_id.c_str());
707       } else {
708         handler = map_iter->second;
709       }
710       int special_data_id = handler->handleInclude(list_iter->data, list_iter->data_len);
711       if (special_data_id == -1) {
712         _errorLog("[%s] Include handler [%s] couldn't process include with data [%.*s]", __FUNCTION__, handler_id.c_str(),
713                   list_iter->data_len, list_iter->data);
714         Stats::increment(Stats::N_SPCL_INCLUDE_ERRS);
715         return false;
716       }
717       // overloading this structure's members
718       // handler will be in value and include id will be in value_len of the structure
719       list_iter->attr_list.push_back(Attribute(INCLUDE_DATA_ID_ATTR, 0, reinterpret_cast<const char *>(handler), special_data_id));
720       _debugLog(_debug_tag, "[%s] Got id %d for special include at node %d from handler [%s]", __FUNCTION__, special_data_id,
721                 n_prescanned_nodes + 1, handler_id.c_str());
722     } break;
723     default:
724       break;
725     }
726   }
727 
728   return true;
729 }
730 
731 void
_addFooterData()732 EsiProcessor::_addFooterData()
733 {
734   const char *footer;
735   int footer_len;
736   for (IncludeHandlerMap::iterator iter = _include_handlers.begin(); iter != _include_handlers.end(); ++iter) {
737     iter->second->getFooter(footer, footer_len);
738     if (footer_len > 0) {
739       _output_data.append(footer, footer_len);
740     }
741   }
742 }
743