1 /** @file
2 
3   This is a command line tool that reads an ATS log in the squid
4   binary log format, and produces meaningful metrics per property.
5 
6   @section license License
7 
8   Licensed to the Apache Software Foundation (ASF) under one
9   or more contributor license agreements.  See the NOTICE file
10   distributed with this work for additional information
11   regarding copyright ownership.  The ASF licenses this file
12   to you under the Apache License, Version 2.0 (the
13   "License"); you may not use this file except in compliance
14   with the License.  You may obtain a copy of the License at
15 
16       http://www.apache.org/licenses/LICENSE-2.0
17 
18   Unless required by applicable law or agreed to in writing, software
19   distributed under the License is distributed on an "AS IS" BASIS,
20   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21   See the License for the specific language governing permissions and
22   limitations under the License.
23  */
24 
25 #include "tscore/ink_platform.h"
26 #include "tscore/ink_file.h"
27 #include "tscore/I_Layout.h"
28 #include "tscore/I_Version.h"
29 #include "tscore/HashFNV.h"
30 #include "tscore/ink_args.h"
31 #include "tscore/MatcherUtils.h"
32 #include "tscore/runroot.h"
33 
34 // Includes and namespaces etc.
35 #include "LogStandalone.cc"
36 
37 #include "LogObject.h"
38 #include "hdrs/HTTP.h"
39 
40 #include <sys/utsname.h>
41 #if defined(solaris)
42 #include <sys/types.h>
43 #include <unistd.h>
44 #endif
45 
46 #include <iostream>
47 #include <fstream>
48 #include <sstream>
49 #include <iomanip>
50 #include <string>
51 #include <algorithm>
52 #include <vector>
53 #include <list>
54 #include <cmath>
55 #include <functional>
56 #include <fcntl.h>
57 #include <unordered_map>
58 #include <unordered_set>
59 #include <string_view>
60 
61 #ifndef _XOPEN_SOURCE
62 #define _XOPEN_SOURCE 600
63 #endif
64 
65 using namespace std;
66 
67 // Constants, please update the VERSION number when you make a new build!!!
68 #define PROGRAM_NAME "traffic_logstats"
69 
70 const int MAX_LOGBUFFER_SIZE = 65536;
71 const int DEFAULT_LINE_LEN   = 78;
72 const double LOG10_1024      = 3.0102999566398116;
73 const int MAX_ORIG_STRING    = 4096;
74 
75 // Optimizations for "strcmp()", treat some fixed length (3 or 4 bytes) strings
76 // as integers.
77 const int GET_AS_INT  = 5522759;
78 const int PUT_AS_INT  = 5526864;
79 const int HEAD_AS_INT = 1145128264;
80 const int POST_AS_INT = 1414745936;
81 
82 const int TEXT_AS_INT = 1954047348;
83 
84 const int JPEG_AS_INT = 1734701162;
85 const int JPG_AS_INT  = 6778986;
86 const int GIF_AS_INT  = 6711655;
87 const int PNG_AS_INT  = 6778480;
88 const int BMP_AS_INT  = 7368034;
89 const int CSS_AS_INT  = 7566179;
90 const int XML_AS_INT  = 7105912;
91 const int HTML_AS_INT = 1819112552;
92 const int ZIP_AS_INT  = 7367034;
93 
94 const int JAVA_AS_INT = 1635148138; // For "javascript"
95 const int X_JA_AS_INT = 1634348408; // For "x-javascript"
96 const int RSSp_AS_INT = 728986482;  // For "RSS+"
97 const int PLAI_AS_INT = 1767992432; // For "plain"
98 const int IMAG_AS_INT = 1734438249; // For "image"
99 const int HTTP_AS_INT = 1886680168; // For "http" followed by "s://" or "://"
100 
101 // Store our "state" (position in log file etc.)
102 struct LastState {
103   off_t offset;
104   ino_t st_ino;
105 };
106 static LastState last_state;
107 
108 // Store the collected counters and stats, per Origin Server, URL or total
109 struct StatsCounter {
110   int64_t count;
111   int64_t bytes;
112 };
113 
114 struct ElapsedStats {
115   int min;
116   int max;
117   float avg;
118   float stddev;
119 };
120 
121 struct OriginStats {
122   const char *server;
123   StatsCounter total;
124 
125   struct {
126     struct {
127       ElapsedStats hit;
128       ElapsedStats hit_ram;
129       ElapsedStats ims;
130       ElapsedStats refresh;
131       ElapsedStats other;
132       ElapsedStats total;
133     } hits;
134     struct {
135       ElapsedStats miss;
136       ElapsedStats ims;
137       ElapsedStats refresh;
138       ElapsedStats other;
139       ElapsedStats total;
140     } misses;
141   } elapsed;
142 
143   struct {
144     struct {
145       StatsCounter hit;
146       StatsCounter hit_ram;
147       StatsCounter ims;
148       StatsCounter refresh;
149       StatsCounter other;
150       StatsCounter total;
151     } hits;
152     struct {
153       StatsCounter miss;
154       StatsCounter ims;
155       StatsCounter refresh;
156       StatsCounter other;
157       StatsCounter total;
158     } misses;
159     struct {
160       StatsCounter client_abort;
161       StatsCounter client_read_error;
162       StatsCounter connect_fail;
163       StatsCounter invalid_req;
164       StatsCounter unknown;
165       StatsCounter other;
166       StatsCounter total;
167     } errors;
168     StatsCounter other;
169   } results;
170 
171   struct {
172     StatsCounter c_000; // Bad
173     StatsCounter c_100;
174     StatsCounter c_200;
175     StatsCounter c_201;
176     StatsCounter c_202;
177     StatsCounter c_203;
178     StatsCounter c_204;
179     StatsCounter c_205;
180     StatsCounter c_206;
181     StatsCounter c_2xx;
182     StatsCounter c_300;
183     StatsCounter c_301;
184     StatsCounter c_302;
185     StatsCounter c_303;
186     StatsCounter c_304;
187     StatsCounter c_305;
188     StatsCounter c_307;
189     StatsCounter c_3xx;
190     StatsCounter c_400;
191     StatsCounter c_401;
192     StatsCounter c_402;
193     StatsCounter c_403;
194     StatsCounter c_404;
195     StatsCounter c_405;
196     StatsCounter c_406;
197     StatsCounter c_407;
198     StatsCounter c_408;
199     StatsCounter c_409;
200     StatsCounter c_410;
201     StatsCounter c_411;
202     StatsCounter c_412;
203     StatsCounter c_413;
204     StatsCounter c_414;
205     StatsCounter c_415;
206     StatsCounter c_416;
207     StatsCounter c_417;
208     StatsCounter c_4xx;
209     StatsCounter c_500;
210     StatsCounter c_501;
211     StatsCounter c_502;
212     StatsCounter c_503;
213     StatsCounter c_504;
214     StatsCounter c_505;
215     StatsCounter c_5xx;
216   } codes;
217 
218   struct {
219     StatsCounter direct;
220     StatsCounter none;
221     StatsCounter sibling;
222     StatsCounter parent;
223     StatsCounter empty;
224     StatsCounter invalid;
225     StatsCounter other;
226   } hierarchies;
227 
228   struct {
229     StatsCounter http;
230     StatsCounter https;
231     StatsCounter none;
232     StatsCounter other;
233   } schemes;
234 
235   struct {
236     StatsCounter ipv4;
237     StatsCounter ipv6;
238   } protocols;
239 
240   struct {
241     StatsCounter options;
242     StatsCounter get;
243     StatsCounter head;
244     StatsCounter post;
245     StatsCounter put;
246     StatsCounter del;
247     StatsCounter trace;
248     StatsCounter connect;
249     StatsCounter purge;
250     StatsCounter none;
251     StatsCounter other;
252   } methods;
253 
254   struct {
255     struct {
256       StatsCounter plain;
257       StatsCounter xml;
258       StatsCounter html;
259       StatsCounter css;
260       StatsCounter javascript;
261       StatsCounter other;
262       StatsCounter total;
263     } text;
264     struct {
265       StatsCounter jpeg;
266       StatsCounter gif;
267       StatsCounter png;
268       StatsCounter bmp;
269       StatsCounter other;
270       StatsCounter total;
271     } image;
272     struct {
273       StatsCounter shockwave_flash;
274       StatsCounter quicktime;
275       StatsCounter javascript;
276       StatsCounter zip;
277       StatsCounter other;
278       StatsCounter rss_xml;
279       StatsCounter rss_atom;
280       StatsCounter rss_other;
281       StatsCounter total;
282     } application;
283     struct {
284       StatsCounter wav;
285       StatsCounter mpeg;
286       StatsCounter other;
287       StatsCounter total;
288     } audio;
289     StatsCounter none;
290     StatsCounter other;
291   } content;
292 };
293 
294 struct UrlStats {
295   bool
operator <UrlStats296   operator<(const UrlStats &rhs) const
297   {
298     return req.count > rhs.req.count;
299   } // Reverse order
300 
301   const char *url;
302   StatsCounter req;
303   ElapsedStats time;
304   int64_t c_000;
305   int64_t c_2xx;
306   int64_t c_3xx;
307   int64_t c_4xx;
308   int64_t c_5xx;
309   int64_t hits;
310   int64_t misses;
311   int64_t errors;
312 };
313 
314 ///////////////////////////////////////////////////////////////////////////////
315 // Equal operator for char* (for the hash_map)
316 struct eqstr {
317   inline bool
operator ()eqstr318   operator()(const char *s1, const char *s2) const
319   {
320     return 0 == strcmp(s1, s2);
321   }
322 };
323 
324 struct hash_fnv32 {
325   inline uint32_t
operator ()hash_fnv32326   operator()(const char *s) const
327   {
328     ATSHash32FNV1a fnv;
329 
330     if (s) {
331       fnv.update(s, strlen(s));
332     }
333 
334     fnv.final();
335     return fnv.get();
336   }
337 };
338 
339 using LruStack = std::list<UrlStats>;
340 typedef std::unordered_map<const char *, OriginStats *, hash_fnv32, eqstr> OriginStorage;
341 typedef std::unordered_set<const char *, hash_fnv32, eqstr> OriginSet;
342 typedef std::unordered_map<const char *, LruStack::iterator, hash_fnv32, eqstr> LruHash;
343 
344 // Resize a hash-based container.
345 template <class T, class N>
346 void
rehash(T & container,N size)347 rehash(T &container, N size)
348 {
349   container.rehash(size);
350 }
351 
352 // LRU class for the URL data
353 void update_elapsed(ElapsedStats &stat, const int elapsed, const StatsCounter &counter);
354 
355 class UrlLru
356 {
357 public:
UrlLru(int size=1000000,int show_urls=0)358   UrlLru(int size = 1000000, int show_urls = 0) : _size(size)
359   {
360     _show_urls = size > 0 ? (show_urls >= size ? size - 1 : show_urls) : show_urls;
361     _init();
362     _reset(false);
363     _cur = _stack.begin();
364   }
365 
366   void
resize(int size=0)367   resize(int size = 0)
368   {
369     if (0 != size) {
370       _size = size;
371     }
372 
373     _init();
374     _reset(true);
375     _cur = _stack.begin();
376   }
377 
378   void
dump(int as_object=0)379   dump(int as_object = 0)
380   {
381     int show = _stack.size();
382 
383     if (_show_urls > 0 && _show_urls < show) {
384       show = _show_urls;
385     }
386 
387     _stack.sort();
388     for (LruStack::iterator u = _stack.begin(); nullptr != u->url && --show >= 0; ++u) {
389       _dump_url(u, as_object);
390     }
391     if (as_object) {
392       std::cout << "  \"_timestamp\" : \"" << static_cast<int>(ink_time_wall_seconds()) << "\"" << std::endl;
393     } else {
394       std::cout << "  { \"_timestamp\" : \"" << static_cast<int>(ink_time_wall_seconds()) << "\" }" << std::endl;
395     }
396   }
397 
398   void
add_stat(const char * url,int64_t bytes,int time,int result,int http_code,int as_object=0)399   add_stat(const char *url, int64_t bytes, int time, int result, int http_code, int as_object = 0)
400   {
401     LruHash::iterator h = _hash.find(url);
402 
403     if (h != _hash.end()) {
404       LruStack::iterator &l = h->second;
405 
406       ++(l->req.count);
407       l->req.bytes += bytes;
408 
409       if ((http_code >= 600) || (http_code < 200)) {
410         ++(l->c_000);
411       } else if (http_code >= 500) {
412         ++(l->c_5xx);
413       } else if (http_code >= 400) {
414         ++(l->c_4xx);
415       } else if (http_code >= 300) {
416         ++(l->c_3xx);
417       } else { // http_code >= 200
418         ++(l->c_2xx);
419       }
420 
421       switch (result) {
422       case SQUID_LOG_TCP_HIT:
423       case SQUID_LOG_TCP_IMS_HIT:
424       case SQUID_LOG_TCP_REFRESH_HIT:
425       case SQUID_LOG_TCP_DISK_HIT:
426       case SQUID_LOG_TCP_MEM_HIT:
427       case SQUID_LOG_TCP_REF_FAIL_HIT:
428       case SQUID_LOG_UDP_HIT:
429       case SQUID_LOG_UDP_WEAK_HIT:
430       case SQUID_LOG_UDP_HIT_OBJ:
431         ++(l->hits);
432         break;
433       case SQUID_LOG_TCP_MISS:
434       case SQUID_LOG_TCP_IMS_MISS:
435       case SQUID_LOG_TCP_REFRESH_MISS:
436       case SQUID_LOG_TCP_EXPIRED_MISS:
437       case SQUID_LOG_TCP_WEBFETCH_MISS:
438       case SQUID_LOG_UDP_MISS:
439         ++(l->misses);
440         break;
441       case SQUID_LOG_ERR_CLIENT_ABORT:
442       case SQUID_LOG_ERR_CLIENT_READ_ERROR:
443       case SQUID_LOG_ERR_CONNECT_FAIL:
444       case SQUID_LOG_ERR_INVALID_REQ:
445       case SQUID_LOG_ERR_UNKNOWN:
446       case SQUID_LOG_ERR_READ_TIMEOUT:
447         ++(l->errors);
448         break;
449       }
450 
451       update_elapsed(l->time, time, l->req);
452       // Move this entry to the top of the stack (hence, LRU)
453       if (_size > 0) {
454         _stack.splice(_stack.begin(), _stack, l);
455       }
456     } else {                                  // "new" URL
457       const char *u        = ats_strdup(url); // We own it.
458       LruStack::iterator l = _stack.end();
459 
460       if (_size > 0) {
461         if (_cur == l) { // LRU is full, take the last one
462           --l;
463           h = _hash.find(l->url);
464           if (h != _hash.end()) {
465             _hash.erase(h);
466           }
467           if (0 == _show_urls) {
468             _dump_url(l, as_object);
469           }
470         } else {
471           l = _cur++;
472         }
473         ats_free(const_cast<char *>(l->url)); // We no longer own this string.
474       } else {
475         l = _stack.insert(l, UrlStats()); // This seems faster than having a static "template" ...
476       }
477 
478       // Setup this URL stat
479       l->url       = u;
480       l->req.bytes = bytes;
481       l->req.count = 1;
482 
483       if ((http_code >= 600) || (http_code < 200)) {
484         l->c_000 = 1;
485       } else if (http_code >= 500) {
486         l->c_5xx = 1;
487       } else if (http_code >= 400) {
488         l->c_4xx = 1;
489       } else if (http_code >= 300) {
490         l->c_3xx = 1;
491       } else { // http_code >= 200
492         l->c_2xx = 1;
493       }
494 
495       switch (result) {
496       case SQUID_LOG_TCP_HIT:
497       case SQUID_LOG_TCP_IMS_HIT:
498       case SQUID_LOG_TCP_REFRESH_HIT:
499       case SQUID_LOG_TCP_DISK_HIT:
500       case SQUID_LOG_TCP_MEM_HIT:
501       case SQUID_LOG_TCP_REF_FAIL_HIT:
502       case SQUID_LOG_UDP_HIT:
503       case SQUID_LOG_UDP_WEAK_HIT:
504       case SQUID_LOG_UDP_HIT_OBJ:
505         l->hits = 1;
506         break;
507       case SQUID_LOG_TCP_MISS:
508       case SQUID_LOG_TCP_IMS_MISS:
509       case SQUID_LOG_TCP_REFRESH_MISS:
510       case SQUID_LOG_TCP_EXPIRED_MISS:
511       case SQUID_LOG_TCP_WEBFETCH_MISS:
512       case SQUID_LOG_UDP_MISS:
513         l->misses = 1;
514         break;
515       case SQUID_LOG_ERR_CLIENT_ABORT:
516       case SQUID_LOG_ERR_CLIENT_READ_ERROR:
517       case SQUID_LOG_ERR_CONNECT_FAIL:
518       case SQUID_LOG_ERR_INVALID_REQ:
519       case SQUID_LOG_ERR_UNKNOWN:
520       case SQUID_LOG_ERR_READ_TIMEOUT:
521         l->errors = 1;
522         break;
523       }
524 
525       l->time.min = -1;
526       l->time.max = -1;
527       update_elapsed(l->time, time, l->req);
528       _hash[u] = l;
529 
530       // We running a real LRU or not?
531       if (_size > 0) {
532         _stack.splice(_stack.begin(), _stack, l); // Move this to the top of the stack
533       }
534     }
535   }
536 
537 private:
538   void
_init()539   _init()
540   {
541     if (_size > 0) {
542       _stack.resize(_size);
543       rehash(_hash, _size);
544     }
545   }
546 
547   void
_reset(bool free=false)548   _reset(bool free = false)
549   {
550     for (LruStack::iterator l = _stack.begin(); l != _stack.end(); ++l) {
551       if (free && l->url) {
552         ats_free(const_cast<char *>(l->url));
553       }
554       memset(&(*l), 0, sizeof(UrlStats));
555     }
556   }
557 
558   void
_dump_url(LruStack::iterator & u,int as_object)559   _dump_url(LruStack::iterator &u, int as_object)
560   {
561     if (as_object) {
562       std::cout << "  \"" << u->url << "\" : { ";
563     } else {
564       std::cout << "  { \"" << u->url << "\" : { ";
565       // Requests
566     }
567     std::cout << "\"req\" : { \"total\" : \"" << u->req.count << "\", \"hits\" : \"" << u->hits << "\", \"misses\" : \""
568               << u->misses << "\", \"errors\" : \"" << u->errors << "\", \"000\" : \"" << u->c_000 << "\", \"2xx\" : \"" << u->c_2xx
569               << "\", \"3xx\" : \"" << u->c_3xx << "\", \"4xx\" : \"" << u->c_4xx << "\", \"5xx\" : \"" << u->c_5xx << "\" }, ";
570     std::cout << "\"bytes\" : \"" << u->req.bytes << "\", ";
571     // Service times
572     std::cout << "\"svc_t\" : { \"min\" : \"" << u->time.min << "\", \"max\" : \"" << u->time.max << "\", \"avg\" : \""
573               << std::setiosflags(ios::fixed) << std::setprecision(2) << u->time.avg << "\", \"dev\" : \""
574               << std::setiosflags(ios::fixed) << std::setprecision(2) << u->time.stddev;
575 
576     if (as_object) {
577       std::cout << "\" } }," << std::endl;
578     } else {
579       std::cout << "\" } } }," << std::endl;
580     }
581   }
582 
583   LruHash _hash;
584   LruStack _stack;
585   int _size, _show_urls;
586   LruStack::iterator _cur;
587 };
588 
589 ///////////////////////////////////////////////////////////////////////////////
590 // Globals, holding the accumulated stats (ok, I'm lazy ...)
591 static OriginStats totals;
592 static OriginStorage origins;
593 static OriginSet *origin_set;
594 static UrlLru *urls;
595 static int parse_errors;
596 
597 // Command line arguments (parsing)
598 struct CommandLineArgs {
599   char log_file[1024];
600   char origin_file[1024];
601   char origin_list[MAX_ORIG_STRING];
602   int max_origins = 0;
603   char state_tag[1024];
604   int64_t min_hits = 0;
605   int max_age      = 0;
606   int line_len;
607   int incremental     = 0; // Do an incremental run
608   int tail            = 0; // Tail the log file
609   int summary         = 0; // Summary only
610   int json            = 0; // JSON output
611   int cgi             = 0; // CGI output (typically with json)
612   int urls            = 0; // Produce JSON output of URL stats, arg is LRU size
613   int show_urls       = 0; // Max URLs to show
614   int as_object       = 0; // Show the URL stats as a single JSON object (not array)
615   int concise         = 0; // Eliminate metrics that can be inferred by other values
616   int report_per_user = 0; // A flag to aggregate and report stats per user instead of per host if 'true' (default 'false')
617   int no_format_check = 0; // A flag to skip the log format check if any of the fields is not a standard squid log format field.
618 
CommandLineArgsCommandLineArgs619   CommandLineArgs() : line_len(DEFAULT_LINE_LEN)
620 
621   {
622     log_file[0]    = '\0';
623     origin_file[0] = '\0';
624     origin_list[0] = '\0';
625     state_tag[0]   = '\0';
626   }
627 
628   void parse_arguments(const char **argv);
629 };
630 
631 static CommandLineArgs cl;
632 
633 static ArgumentDescription argument_descriptions[] = {
634   {"log_file", 'f', "Specific logfile to parse", "S1023", cl.log_file, nullptr, nullptr},
635   {"origin_list", 'o', "Only show stats for listed Origins", "S4095", cl.origin_list, nullptr, nullptr},
636   {"origin_file", 'O', "File listing Origins to show", "S1023", cl.origin_file, nullptr, nullptr},
637   {"max_origins", 'M', "Max number of Origins to show", "I", &cl.max_origins, nullptr, nullptr},
638   {"urls", 'u', "Produce JSON stats for URLs, argument is LRU size", "I", &cl.urls, nullptr, nullptr},
639   {"show_urls", 'U', "Only show max this number of URLs", "I", &cl.show_urls, nullptr, nullptr},
640   {"as_object", 'A', "Produce URL stats as a JSON object instead of array", "T", &cl.as_object, nullptr, nullptr},
641   {"concise", 'C', "Eliminate metrics that can be inferred from other values", "T", &cl.concise, nullptr, nullptr},
642   {"incremental", 'i', "Incremental log parsing", "T", &cl.incremental, nullptr, nullptr},
643   {"statetag", 'S', "Name of the state file to use", "S1023", cl.state_tag, nullptr, nullptr},
644   {"tail", 't', "Parse the last <sec> seconds of log", "I", &cl.tail, nullptr, nullptr},
645   {"summary", 's', "Only produce the summary", "T", &cl.summary, nullptr, nullptr},
646   {"json", 'j', "Produce JSON formatted output", "T", &cl.json, nullptr, nullptr},
647   {"cgi", 'c', "Produce HTTP headers suitable as a CGI", "T", &cl.cgi, nullptr, nullptr},
648   {"min_hits", 'm', "Minimum total hits for an Origin", "L", &cl.min_hits, nullptr, nullptr},
649   {"max_age", 'a', "Max age for log entries to be considered", "I", &cl.max_age, nullptr, nullptr},
650   {"line_len", 'l', "Output line length", "I", &cl.line_len, nullptr, nullptr},
651   {"debug_tags", 'T', "Colon-Separated Debug Tags", "S1023", &error_tags, nullptr, nullptr},
652   {"report_per_user", 'r', "Report stats per user instead of host", "T", &cl.report_per_user, nullptr, nullptr},
653   {"no_format_check", 'n', "Don't validate the log format field names", "T", &cl.no_format_check, nullptr, nullptr},
654   HELP_ARGUMENT_DESCRIPTION(),
655   VERSION_ARGUMENT_DESCRIPTION(),
656   RUNROOT_ARGUMENT_DESCRIPTION()};
657 
658 static const char *USAGE_LINE = "Usage: " PROGRAM_NAME " [-f logfile] [-o origin[,...]] [-O originfile] [-m minhits] [-binshv]";
659 
660 void
parse_arguments(const char ** argv)661 CommandLineArgs::parse_arguments(const char **argv)
662 {
663   // process command-line arguments
664   process_args(&appVersionInfo, argument_descriptions, countof(argument_descriptions), argv, USAGE_LINE);
665 
666   // Process as "CGI" ?
667   if (strstr(argv[0], ".cgi") || cgi) {
668     char *query;
669 
670     json = 1;
671     cgi  = 1;
672 
673     if (nullptr != (query = getenv("QUERY_STRING"))) {
674       char buffer[MAX_ORIG_STRING];
675       char *tok, *sep_ptr, *val;
676 
677       ink_strlcpy(buffer, query, sizeof(buffer));
678       unescapifyStr(buffer);
679 
680       for (tok = strtok_r(buffer, "&", &sep_ptr); tok != nullptr;) {
681         val = strchr(tok, '=');
682         if (val) {
683           *(val++) = '\0';
684           if (0 == strncmp(tok, "origin_list", 11)) {
685             ink_strlcpy(origin_list, val, sizeof(origin_list));
686           } else if (0 == strncmp(tok, "state_tag", 9)) {
687             ink_strlcpy(state_tag, val, sizeof(state_tag));
688           } else if (0 == strncmp(tok, "max_origins", 11)) {
689             max_origins = strtol(val, nullptr, 10);
690           } else if (0 == strncmp(tok, "urls", 4)) {
691             urls = strtol(val, nullptr, 10);
692           } else if (0 == strncmp(tok, "show_urls", 9)) {
693             show_urls = strtol(val, nullptr, 10);
694           } else if (0 == strncmp(tok, "as_object", 9)) {
695             as_object = strtol(val, nullptr, 10);
696           } else if (0 == strncmp(tok, "min_hits", 8)) {
697             min_hits = strtol(val, nullptr, 10);
698           } else if (0 == strncmp(tok, "incremental", 11)) {
699             incremental = strtol(val, nullptr, 10);
700           } else {
701             // Unknown query arg.
702           }
703         }
704 
705         tok = strtok_r(nullptr, "&", &sep_ptr);
706       }
707     }
708   }
709 }
710 
711 // Enum for return code levels.
712 enum ExitLevel {
713   EXIT_OK       = 0,
714   EXIT_WARNING  = 1,
715   EXIT_CRITICAL = 2,
716   EXIT_UNKNOWN  = 3,
717 };
718 
719 struct ExitStatus {
720   ExitLevel level = EXIT_OK;
721   char notice[1024];
722 
ExitStatusExitStatus723   ExitStatus() { memset(notice, 0, sizeof(notice)); }
724   void
setExitStatus725   set(ExitLevel l, const char *n = nullptr)
726   {
727     if (l > level) {
728       level = l;
729     }
730     if (n) {
731       ink_strlcat(notice, n, sizeof(notice));
732     }
733   }
734 
735   void
appendExitStatus736   append(const char *n)
737   {
738     ink_strlcat(notice, n, sizeof(notice));
739   }
740 
741   void
appendExitStatus742   append(const std::string &s)
743   {
744     ink_strlcat(notice, s.c_str(), sizeof(notice));
745   }
746 };
747 
748 // Enum for parsing a log line
749 enum ParseStates {
750   P_STATE_ELAPSED,
751   P_STATE_IP,
752   P_STATE_RESULT,
753   P_STATE_CODE,
754   P_STATE_SIZE,
755   P_STATE_METHOD,
756   P_STATE_URL,
757   P_STATE_RFC931,
758   P_STATE_HIERARCHY,
759   P_STATE_PEER,
760   P_STATE_TYPE,
761   P_STATE_END
762 };
763 
764 // Enum for HTTP methods
765 enum HTTPMethod {
766   METHOD_OPTIONS,
767   METHOD_GET,
768   METHOD_HEAD,
769   METHOD_POST,
770   METHOD_PUT,
771   METHOD_DELETE,
772   METHOD_TRACE,
773   METHOD_CONNECT,
774   METHOD_PURGE,
775   METHOD_NONE,
776   METHOD_OTHER
777 };
778 
779 // Enum for URL schemes
780 enum URLScheme {
781   SCHEME_HTTP,
782   SCHEME_HTTPS,
783   SCHEME_NONE,
784   SCHEME_OTHER,
785 };
786 
787 ///////////////////////////////////////////////////////////////////////////////
788 // Initialize the elapsed field
789 inline void
init_elapsed(OriginStats * stats)790 init_elapsed(OriginStats *stats)
791 {
792   stats->elapsed.hits.hit.min       = -1;
793   stats->elapsed.hits.hit_ram.min   = -1;
794   stats->elapsed.hits.ims.min       = -1;
795   stats->elapsed.hits.refresh.min   = -1;
796   stats->elapsed.hits.other.min     = -1;
797   stats->elapsed.hits.total.min     = -1;
798   stats->elapsed.misses.miss.min    = -1;
799   stats->elapsed.misses.ims.min     = -1;
800   stats->elapsed.misses.refresh.min = -1;
801   stats->elapsed.misses.other.min   = -1;
802   stats->elapsed.misses.total.min   = -1;
803 }
804 
805 // Update the counters for one StatsCounter
806 inline void
update_counter(StatsCounter & counter,int size)807 update_counter(StatsCounter &counter, int size)
808 {
809   counter.count++;
810   counter.bytes += size;
811 }
812 
813 inline void
update_elapsed(ElapsedStats & stat,const int elapsed,const StatsCounter & counter)814 update_elapsed(ElapsedStats &stat, const int elapsed, const StatsCounter &counter)
815 {
816   int newcount, oldcount;
817   float oldavg, newavg, sum_of_squares;
818 
819   // Skip all the "0" values.
820   if (0 == elapsed) {
821     return;
822   }
823   if (-1 == stat.min) {
824     stat.min = elapsed;
825   } else if (stat.min > elapsed) {
826     stat.min = elapsed;
827   }
828 
829   if (stat.max < elapsed) {
830     stat.max = elapsed;
831   }
832 
833   // update_counter should have been called on counter.count before calling
834   // update_elapsed.
835   newcount = counter.count;
836   // New count should never be zero, else there was a programming error.
837   ink_release_assert(newcount);
838   oldcount = counter.count - 1;
839   oldavg   = stat.avg;
840   newavg   = (oldavg * oldcount + elapsed) / newcount;
841   // Now find the new standard deviation from the old one
842 
843   if (oldcount != 0) {
844     sum_of_squares = (stat.stddev * stat.stddev * oldcount);
845   } else {
846     sum_of_squares = 0;
847   }
848 
849   // Find the old sum of squares.
850   sum_of_squares = sum_of_squares + 2 * oldavg * oldcount * (oldavg - newavg) + oldcount * (newavg * newavg - oldavg * oldavg);
851 
852   // Now, find the new sum of squares.
853   sum_of_squares = sum_of_squares + (elapsed - newavg) * (elapsed - newavg);
854 
855   stat.stddev = sqrt(sum_of_squares / newcount);
856   stat.avg    = newavg;
857 }
858 
859 ///////////////////////////////////////////////////////////////////////////////
860 // Update the "result" and "elapsed" stats for a particular record
861 inline void
update_results_elapsed(OriginStats * stat,int result,int elapsed,int size)862 update_results_elapsed(OriginStats *stat, int result, int elapsed, int size)
863 {
864   switch (result) {
865   case SQUID_LOG_TCP_HIT:
866     update_counter(stat->results.hits.hit, size);
867     update_counter(stat->results.hits.total, size);
868     update_elapsed(stat->elapsed.hits.hit, elapsed, stat->results.hits.hit);
869     update_elapsed(stat->elapsed.hits.total, elapsed, stat->results.hits.total);
870     break;
871   case SQUID_LOG_TCP_MEM_HIT:
872     update_counter(stat->results.hits.hit_ram, size);
873     update_counter(stat->results.hits.total, size);
874     update_elapsed(stat->elapsed.hits.hit_ram, elapsed, stat->results.hits.hit_ram);
875     update_elapsed(stat->elapsed.hits.total, elapsed, stat->results.hits.total);
876     break;
877   case SQUID_LOG_TCP_MISS:
878     update_counter(stat->results.misses.miss, size);
879     update_counter(stat->results.misses.total, size);
880     update_elapsed(stat->elapsed.misses.miss, elapsed, stat->results.misses.miss);
881     update_elapsed(stat->elapsed.misses.total, elapsed, stat->results.misses.total);
882     break;
883   case SQUID_LOG_TCP_IMS_HIT:
884     update_counter(stat->results.hits.ims, size);
885     update_counter(stat->results.hits.total, size);
886     update_elapsed(stat->elapsed.hits.ims, elapsed, stat->results.hits.ims);
887     update_elapsed(stat->elapsed.hits.total, elapsed, stat->results.hits.total);
888     break;
889   case SQUID_LOG_TCP_IMS_MISS:
890     update_counter(stat->results.misses.ims, size);
891     update_counter(stat->results.misses.total, size);
892     update_elapsed(stat->elapsed.misses.ims, elapsed, stat->results.misses.ims);
893     update_elapsed(stat->elapsed.misses.total, elapsed, stat->results.misses.total);
894     break;
895   case SQUID_LOG_TCP_REFRESH_HIT:
896     update_counter(stat->results.hits.refresh, size);
897     update_counter(stat->results.hits.total, size);
898     update_elapsed(stat->elapsed.hits.refresh, elapsed, stat->results.hits.refresh);
899     update_elapsed(stat->elapsed.hits.total, elapsed, stat->results.hits.total);
900     break;
901   case SQUID_LOG_TCP_REFRESH_MISS:
902     update_counter(stat->results.misses.refresh, size);
903     update_counter(stat->results.misses.total, size);
904     update_elapsed(stat->elapsed.misses.refresh, elapsed, stat->results.misses.refresh);
905     update_elapsed(stat->elapsed.misses.total, elapsed, stat->results.misses.total);
906     break;
907   case SQUID_LOG_TCP_DISK_HIT:
908   case SQUID_LOG_TCP_REF_FAIL_HIT:
909   case SQUID_LOG_UDP_HIT:
910   case SQUID_LOG_UDP_WEAK_HIT:
911   case SQUID_LOG_UDP_HIT_OBJ:
912     update_counter(stat->results.hits.other, size);
913     update_counter(stat->results.hits.total, size);
914     update_elapsed(stat->elapsed.hits.other, elapsed, stat->results.hits.other);
915     update_elapsed(stat->elapsed.hits.total, elapsed, stat->results.hits.total);
916     break;
917   case SQUID_LOG_TCP_EXPIRED_MISS:
918   case SQUID_LOG_TCP_WEBFETCH_MISS:
919   case SQUID_LOG_UDP_MISS:
920     update_counter(stat->results.misses.other, size);
921     update_counter(stat->results.misses.total, size);
922     update_elapsed(stat->elapsed.misses.other, elapsed, stat->results.misses.other);
923     update_elapsed(stat->elapsed.misses.total, elapsed, stat->results.misses.total);
924     break;
925   case SQUID_LOG_ERR_CLIENT_ABORT:
926     update_counter(stat->results.errors.client_abort, size);
927     update_counter(stat->results.errors.total, size);
928     break;
929   case SQUID_LOG_ERR_CLIENT_READ_ERROR:
930     update_counter(stat->results.errors.client_read_error, size);
931     update_counter(stat->results.errors.total, size);
932     break;
933   case SQUID_LOG_ERR_CONNECT_FAIL:
934     update_counter(stat->results.errors.connect_fail, size);
935     update_counter(stat->results.errors.total, size);
936     break;
937   case SQUID_LOG_ERR_INVALID_REQ:
938     update_counter(stat->results.errors.invalid_req, size);
939     update_counter(stat->results.errors.total, size);
940     break;
941   case SQUID_LOG_ERR_UNKNOWN:
942     update_counter(stat->results.errors.unknown, size);
943     update_counter(stat->results.errors.total, size);
944     break;
945   default:
946     // This depends on all errors being at the end of the enum ... Which is the case right now.
947     if (result < SQUID_LOG_ERR_READ_TIMEOUT) {
948       update_counter(stat->results.other, size);
949     } else {
950       update_counter(stat->results.errors.other, size);
951       update_counter(stat->results.errors.total, size);
952     }
953     break;
954   }
955 }
956 
957 ///////////////////////////////////////////////////////////////////////////////
958 // Update the "codes" stats for a particular record
959 inline void
update_codes(OriginStats * stat,int code,int size)960 update_codes(OriginStats *stat, int code, int size)
961 {
962   switch (code) {
963   case 100:
964     update_counter(stat->codes.c_100, size);
965     break;
966 
967   // 200's
968   case 200:
969     update_counter(stat->codes.c_200, size);
970     break;
971   case 201:
972     update_counter(stat->codes.c_201, size);
973     break;
974   case 202:
975     update_counter(stat->codes.c_202, size);
976     break;
977   case 203:
978     update_counter(stat->codes.c_203, size);
979     break;
980   case 204:
981     update_counter(stat->codes.c_204, size);
982     break;
983   case 205:
984     update_counter(stat->codes.c_205, size);
985     break;
986   case 206:
987     update_counter(stat->codes.c_206, size);
988     break;
989 
990   // 300's
991   case 300:
992     update_counter(stat->codes.c_300, size);
993     break;
994   case 301:
995     update_counter(stat->codes.c_301, size);
996     break;
997   case 302:
998     update_counter(stat->codes.c_302, size);
999     break;
1000   case 303:
1001     update_counter(stat->codes.c_303, size);
1002     break;
1003   case 304:
1004     update_counter(stat->codes.c_304, size);
1005     break;
1006   case 305:
1007     update_counter(stat->codes.c_305, size);
1008     break;
1009   case 307:
1010     update_counter(stat->codes.c_307, size);
1011     break;
1012 
1013   // 400's
1014   case 400:
1015     update_counter(stat->codes.c_400, size);
1016     break;
1017   case 401:
1018     update_counter(stat->codes.c_401, size);
1019     break;
1020   case 402:
1021     update_counter(stat->codes.c_402, size);
1022     break;
1023   case 403:
1024     update_counter(stat->codes.c_403, size);
1025     break;
1026   case 404:
1027     update_counter(stat->codes.c_404, size);
1028     break;
1029   case 405:
1030     update_counter(stat->codes.c_405, size);
1031     break;
1032   case 406:
1033     update_counter(stat->codes.c_406, size);
1034     break;
1035   case 407:
1036     update_counter(stat->codes.c_407, size);
1037     break;
1038   case 408:
1039     update_counter(stat->codes.c_408, size);
1040     break;
1041   case 409:
1042     update_counter(stat->codes.c_409, size);
1043     break;
1044   case 410:
1045     update_counter(stat->codes.c_410, size);
1046     break;
1047   case 411:
1048     update_counter(stat->codes.c_411, size);
1049     break;
1050   case 412:
1051     update_counter(stat->codes.c_412, size);
1052     break;
1053   case 413:
1054     update_counter(stat->codes.c_413, size);
1055     break;
1056   case 414:
1057     update_counter(stat->codes.c_414, size);
1058     break;
1059   case 415:
1060     update_counter(stat->codes.c_415, size);
1061     break;
1062   case 416:
1063     update_counter(stat->codes.c_416, size);
1064     break;
1065   case 417:
1066     update_counter(stat->codes.c_417, size);
1067     break;
1068 
1069   // 500's
1070   case 500:
1071     update_counter(stat->codes.c_500, size);
1072     break;
1073   case 501:
1074     update_counter(stat->codes.c_501, size);
1075     break;
1076   case 502:
1077     update_counter(stat->codes.c_502, size);
1078     break;
1079   case 503:
1080     update_counter(stat->codes.c_503, size);
1081     break;
1082   case 504:
1083     update_counter(stat->codes.c_504, size);
1084     break;
1085   case 505:
1086     update_counter(stat->codes.c_505, size);
1087     break;
1088   default:
1089     break;
1090   }
1091 
1092   if ((code >= 600) || (code < 200)) {
1093     update_counter(stat->codes.c_000, size);
1094   } else if (code >= 500) {
1095     update_counter(stat->codes.c_5xx, size);
1096   } else if (code >= 400) {
1097     update_counter(stat->codes.c_4xx, size);
1098   } else if (code >= 300) {
1099     update_counter(stat->codes.c_3xx, size);
1100   } else if (code >= 200) {
1101     update_counter(stat->codes.c_2xx, size);
1102   }
1103 }
1104 
1105 ///////////////////////////////////////////////////////////////////////////////
1106 // Update the "methods" stats for a particular record
1107 inline void
update_methods(OriginStats * stat,int method,int size)1108 update_methods(OriginStats *stat, int method, int size)
1109 {
1110   // We're so lopsided on GETs, so makes most sense to test 'out of order'.
1111   switch (method) {
1112   case METHOD_GET:
1113     update_counter(stat->methods.get, size);
1114     break;
1115 
1116   case METHOD_OPTIONS:
1117     update_counter(stat->methods.options, size);
1118     break;
1119 
1120   case METHOD_HEAD:
1121     update_counter(stat->methods.head, size);
1122     break;
1123 
1124   case METHOD_POST:
1125     update_counter(stat->methods.post, size);
1126     break;
1127 
1128   case METHOD_PUT:
1129     update_counter(stat->methods.put, size);
1130     break;
1131 
1132   case METHOD_DELETE:
1133     update_counter(stat->methods.del, size);
1134     break;
1135 
1136   case METHOD_TRACE:
1137     update_counter(stat->methods.trace, size);
1138     break;
1139 
1140   case METHOD_CONNECT:
1141     update_counter(stat->methods.connect, size);
1142     break;
1143 
1144   case METHOD_PURGE:
1145     update_counter(stat->methods.purge, size);
1146     break;
1147 
1148   case METHOD_NONE:
1149     update_counter(stat->methods.none, size);
1150     break;
1151 
1152   default:
1153     update_counter(stat->methods.other, size);
1154     break;
1155   }
1156 }
1157 
1158 ///////////////////////////////////////////////////////////////////////////////
1159 // Update the "schemes" stats for a particular record
1160 inline void
update_schemes(OriginStats * stat,int scheme,int size)1161 update_schemes(OriginStats *stat, int scheme, int size)
1162 {
1163   if (SCHEME_HTTP == scheme) {
1164     update_counter(stat->schemes.http, size);
1165   } else if (SCHEME_HTTPS == scheme) {
1166     update_counter(stat->schemes.https, size);
1167   } else if (SCHEME_NONE == scheme) {
1168     update_counter(stat->schemes.none, size);
1169   } else {
1170     update_counter(stat->schemes.other, size);
1171   }
1172 }
1173 
1174 ///////////////////////////////////////////////////////////////////////////////
1175 // Update the "protocols" stats for a particular record
1176 inline void
update_protocols(OriginStats * stat,bool ipv6,int size)1177 update_protocols(OriginStats *stat, bool ipv6, int size)
1178 {
1179   if (ipv6) {
1180     update_counter(stat->protocols.ipv6, size);
1181   } else {
1182     update_counter(stat->protocols.ipv4, size);
1183   }
1184 }
1185 
1186 ///////////////////////////////////////////////////////////////////////////////
1187 // Finds or creates a stats structures if missing
1188 OriginStats *
find_or_create_stats(const char * key)1189 find_or_create_stats(const char *key)
1190 {
1191   OriginStats *o_stats = nullptr;
1192   OriginStorage::iterator o_iter;
1193   char *o_server = nullptr;
1194 
1195   // TODO: If we save state (struct) for a run, we probably need to always
1196   // update the origin data, no matter what the origin_set is.
1197   if (origin_set->empty() || (origin_set->find(key) != origin_set->end())) {
1198     o_iter = origins.find(key);
1199     if (origins.end() == o_iter) {
1200       o_stats = static_cast<OriginStats *>(ats_malloc(sizeof(OriginStats)));
1201       memset(o_stats, 0, sizeof(OriginStats));
1202       init_elapsed(o_stats);
1203       o_server = ats_strdup(key);
1204       if (o_server) {
1205         o_stats->server   = o_server;
1206         origins[o_server] = o_stats;
1207       }
1208     } else {
1209       o_stats = o_iter->second;
1210     }
1211   }
1212   return o_stats;
1213 }
1214 
1215 ///////////////////////////////////////////////////////////////////////////////
1216 // Update the stats
1217 void
update_stats(OriginStats * o_stats,const HTTPMethod method,URLScheme scheme,int http_code,int size,int result,int hier,int elapsed,bool ipv6)1218 update_stats(OriginStats *o_stats, const HTTPMethod method, URLScheme scheme, int http_code, int size, int result, int hier,
1219              int elapsed, bool ipv6)
1220 {
1221   update_results_elapsed(&totals, result, elapsed, size);
1222   update_codes(&totals, http_code, size);
1223   update_methods(&totals, method, size);
1224   update_schemes(&totals, scheme, size);
1225   update_protocols(&totals, ipv6, size);
1226   update_counter(totals.total, size);
1227   if (nullptr != o_stats) {
1228     update_results_elapsed(o_stats, result, elapsed, size);
1229     update_codes(o_stats, http_code, size);
1230     update_methods(o_stats, method, size);
1231     update_schemes(o_stats, scheme, size);
1232     update_protocols(o_stats, ipv6, size);
1233     update_counter(o_stats->total, size);
1234   }
1235 }
1236 
1237 ///////////////////////////////////////////////////////////////////////////////
1238 // Parse a log buffer
1239 int
parse_log_buff(LogBufferHeader * buf_header,bool summary=false,bool aggregate_per_userid=false)1240 parse_log_buff(LogBufferHeader *buf_header, bool summary = false, bool aggregate_per_userid = false)
1241 {
1242   static LogFieldList *fieldlist = nullptr;
1243 
1244   LogEntryHeader *entry;
1245   LogBufferIterator buf_iter(buf_header);
1246   LogField *field = nullptr;
1247   ParseStates state;
1248 
1249   char *read_from;
1250   char *tok;
1251   char *ptr;
1252   int tok_len;
1253   int flag = 0; // Flag used in state machine to carry "state" forward
1254 
1255   // Parsed results
1256   int http_code = 0, size = 0, result = 0, hier = 0, elapsed = 0;
1257   bool ipv6 = false;
1258   OriginStats *o_stats;
1259   HTTPMethod method;
1260   URLScheme scheme;
1261 
1262   if (!fieldlist) {
1263     fieldlist = new LogFieldList;
1264     ink_assert(fieldlist != nullptr);
1265     bool agg = false;
1266     LogFormat::parse_symbol_string(buf_header->fmt_fieldlist(), fieldlist, &agg);
1267   }
1268 
1269   if (!cl.no_format_check) {
1270     // Validate the fieldlist
1271     field                                = fieldlist->first();
1272     const std::string_view test_fields[] = {"cqtq", "ttms", "chi", "crc", "pssc", "psql", "cqhm", "cquc", "caun", "phr", "shn"};
1273     for (auto i : test_fields) {
1274       if (i != field->symbol()) {
1275         cerr << "Error parsing log file - expected field: " << i << ", but read field: " << field->symbol() << endl;
1276         return 1;
1277       }
1278       field = fieldlist->next(field);
1279     }
1280   }
1281 
1282   // Loop over all entries
1283   while ((entry = buf_iter.next())) {
1284     read_from = (char *)entry + sizeof(LogEntryHeader);
1285     // We read and skip over the first field, which is the timestamp.
1286     if ((field = fieldlist->first())) {
1287       read_from += INK_MIN_ALIGN;
1288     } else { // This shouldn't happen, buffer must be messed up.
1289       break;
1290     }
1291 
1292     state   = P_STATE_ELAPSED;
1293     o_stats = nullptr;
1294     method  = METHOD_OTHER;
1295     scheme  = SCHEME_OTHER;
1296 
1297     while ((field = fieldlist->next(field))) {
1298       switch (state) {
1299       case P_STATE_ELAPSED:
1300         state   = P_STATE_IP;
1301         elapsed = *((int64_t *)(read_from));
1302         read_from += INK_MIN_ALIGN;
1303         break;
1304 
1305       case P_STATE_IP:
1306         state = P_STATE_RESULT;
1307         // Just skip the IP, we no longer assume it's always the same.
1308         {
1309           LogFieldIp *ip = reinterpret_cast<LogFieldIp *>(read_from);
1310           int len        = sizeof(LogFieldIp);
1311           if (AF_INET == ip->_family) {
1312             ipv6 = false;
1313             len  = sizeof(LogFieldIp4);
1314           } else if (AF_INET6 == ip->_family) {
1315             ipv6 = true;
1316             len  = sizeof(LogFieldIp6);
1317           }
1318           read_from += INK_ALIGN_DEFAULT(len);
1319         }
1320         break;
1321 
1322       case P_STATE_RESULT:
1323         state  = P_STATE_CODE;
1324         result = *((int64_t *)(read_from));
1325         read_from += INK_MIN_ALIGN;
1326         if ((result < 32) || (result > 255)) {
1327           flag  = 1;
1328           state = P_STATE_END;
1329         }
1330         break;
1331 
1332       case P_STATE_CODE:
1333         state     = P_STATE_SIZE;
1334         http_code = *((int64_t *)(read_from));
1335         read_from += INK_MIN_ALIGN;
1336         if ((http_code < 0) || (http_code > 999)) {
1337           flag  = 1;
1338           state = P_STATE_END;
1339         }
1340         break;
1341 
1342       case P_STATE_SIZE:
1343         // Warning: This is not 64-bit safe, when converting the log format,
1344         // this needs to be fixed as well.
1345         state = P_STATE_METHOD;
1346         size  = *((int64_t *)(read_from));
1347         read_from += INK_MIN_ALIGN;
1348         break;
1349 
1350       case P_STATE_METHOD:
1351         state = P_STATE_URL;
1352         flag  = 0;
1353 
1354         // Small optimization for common (3-4 char) cases
1355         switch (*reinterpret_cast<int *>(read_from)) {
1356         case GET_AS_INT:
1357           method = METHOD_GET;
1358           read_from += LogAccess::round_strlen(3 + 1);
1359           break;
1360         case PUT_AS_INT:
1361           method = METHOD_PUT;
1362           read_from += LogAccess::round_strlen(3 + 1);
1363           break;
1364         case HEAD_AS_INT:
1365           method = METHOD_HEAD;
1366           read_from += LogAccess::round_strlen(4 + 1);
1367           break;
1368         case POST_AS_INT:
1369           method = METHOD_POST;
1370           read_from += LogAccess::round_strlen(4 + 1);
1371           break;
1372         default:
1373           tok_len = strlen(read_from);
1374           if ((5 == tok_len) && (0 == strncmp(read_from, "PURGE", 5))) {
1375             method = METHOD_PURGE;
1376           } else if ((6 == tok_len) && (0 == strncmp(read_from, "DELETE", 6))) {
1377             method = METHOD_DELETE;
1378           } else if ((7 == tok_len) && (0 == strncmp(read_from, "OPTIONS", 7))) {
1379             method = METHOD_OPTIONS;
1380           } else if ((1 == tok_len) && ('-' == *read_from)) {
1381             method = METHOD_NONE;
1382             flag   = 1; // No method, so no need to parse the URL
1383           } else {
1384             ptr = read_from;
1385             while (*ptr && isupper(*ptr)) {
1386               ++ptr;
1387             }
1388             // Skip URL if it doesn't look like an HTTP method
1389             if (*ptr != '\0') {
1390               flag = 1;
1391             }
1392           }
1393           read_from += LogAccess::round_strlen(tok_len + 1);
1394           break;
1395         }
1396         break;
1397 
1398       case P_STATE_URL:
1399         state = P_STATE_RFC931;
1400         if (urls) {
1401           urls->add_stat(read_from, size, elapsed, result, http_code, cl.as_object);
1402         }
1403 
1404         // TODO check for read_from being empty string
1405         if (0 == flag) {
1406           tok = read_from;
1407           if (HTTP_AS_INT == *reinterpret_cast<int *>(tok)) {
1408             tok += 4;
1409             if (':' == *tok) {
1410               scheme = SCHEME_HTTP;
1411               tok += 3;
1412               tok_len = strlen(tok) + 7;
1413             } else if ('s' == *tok) {
1414               scheme = SCHEME_HTTPS;
1415               tok += 4;
1416               tok_len = strlen(tok) + 8;
1417             } else {
1418               tok_len = strlen(tok) + 4;
1419             }
1420           } else {
1421             if ('/' == *tok) {
1422               scheme = SCHEME_NONE;
1423             }
1424             tok_len = strlen(tok);
1425           }
1426           if ('/' == *tok) { // This is to handle crazy stuff like http:///origin.com
1427             tok++;
1428           }
1429           ptr = strchr(tok, '/');
1430           if (ptr) {
1431             *ptr = '\0';
1432           }
1433           if (!aggregate_per_userid && !summary) {
1434             o_stats = find_or_create_stats(tok);
1435           }
1436         } else {
1437           // No method given
1438           if ('/' == *read_from) {
1439             scheme = SCHEME_NONE;
1440           }
1441           tok_len = strlen(read_from);
1442         }
1443         read_from += LogAccess::round_strlen(tok_len + 1);
1444         if (!aggregate_per_userid) {
1445           update_stats(o_stats, method, scheme, http_code, size, result, hier, elapsed, ipv6);
1446         }
1447         break;
1448 
1449       case P_STATE_RFC931:
1450         state = P_STATE_HIERARCHY;
1451 
1452         if (aggregate_per_userid) {
1453           if (!summary) {
1454             o_stats = find_or_create_stats(read_from);
1455           }
1456           update_stats(o_stats, method, scheme, http_code, size, result, hier, elapsed, ipv6);
1457         }
1458 
1459         if ('-' == *read_from) {
1460           read_from += LogAccess::round_strlen(1 + 1);
1461         } else {
1462           read_from += LogAccess::strlen(read_from);
1463         }
1464         break;
1465 
1466       case P_STATE_HIERARCHY:
1467         state = P_STATE_PEER;
1468         hier  = *((int64_t *)(read_from));
1469         switch (hier) {
1470         case SQUID_HIER_NONE:
1471           update_counter(totals.hierarchies.none, size);
1472           if (o_stats != nullptr) {
1473             update_counter(o_stats->hierarchies.none, size);
1474           }
1475           break;
1476         case SQUID_HIER_DIRECT:
1477           update_counter(totals.hierarchies.direct, size);
1478           if (o_stats != nullptr) {
1479             update_counter(o_stats->hierarchies.direct, size);
1480           }
1481           break;
1482         case SQUID_HIER_SIBLING_HIT:
1483           update_counter(totals.hierarchies.sibling, size);
1484           if (o_stats != nullptr) {
1485             update_counter(o_stats->hierarchies.sibling, size);
1486           }
1487           break;
1488         case SQUID_HIER_PARENT_HIT:
1489           update_counter(totals.hierarchies.parent, size);
1490           if (o_stats != nullptr) {
1491             update_counter(o_stats->hierarchies.direct, size);
1492           }
1493           break;
1494         case SQUID_HIER_EMPTY:
1495           update_counter(totals.hierarchies.empty, size);
1496           if (o_stats != nullptr) {
1497             update_counter(o_stats->hierarchies.empty, size);
1498           }
1499           break;
1500         default:
1501           if ((hier >= SQUID_HIER_EMPTY) && (hier < SQUID_HIER_INVALID_ASSIGNED_CODE)) {
1502             update_counter(totals.hierarchies.other, size);
1503             if (o_stats != nullptr) {
1504               update_counter(o_stats->hierarchies.other, size);
1505             }
1506           } else {
1507             update_counter(totals.hierarchies.invalid, size);
1508             if (o_stats != nullptr) {
1509               update_counter(o_stats->hierarchies.invalid, size);
1510             }
1511           }
1512           break;
1513         }
1514         read_from += INK_MIN_ALIGN;
1515         break;
1516 
1517       case P_STATE_PEER:
1518         state = P_STATE_TYPE;
1519         if ('-' == *read_from) {
1520           read_from += LogAccess::round_strlen(1 + 1);
1521         } else {
1522           read_from += LogAccess::strlen(read_from);
1523         }
1524         break;
1525 
1526       case P_STATE_TYPE:
1527         state = P_STATE_END;
1528         if (IMAG_AS_INT == *reinterpret_cast<int *>(read_from)) {
1529           update_counter(totals.content.image.total, size);
1530           if (o_stats != nullptr) {
1531             update_counter(o_stats->content.image.total, size);
1532           }
1533           tok = read_from + 6;
1534           switch (*reinterpret_cast<int *>(tok)) {
1535           case JPEG_AS_INT:
1536             tok_len = 10;
1537             update_counter(totals.content.image.jpeg, size);
1538             if (o_stats != nullptr) {
1539               update_counter(o_stats->content.image.jpeg, size);
1540             }
1541             break;
1542           case JPG_AS_INT:
1543             tok_len = 9;
1544             update_counter(totals.content.image.jpeg, size);
1545             if (o_stats != nullptr) {
1546               update_counter(o_stats->content.image.jpeg, size);
1547             }
1548             break;
1549           case GIF_AS_INT:
1550             tok_len = 9;
1551             update_counter(totals.content.image.gif, size);
1552             if (o_stats != nullptr) {
1553               update_counter(o_stats->content.image.gif, size);
1554             }
1555             break;
1556           case PNG_AS_INT:
1557             tok_len = 9;
1558             update_counter(totals.content.image.png, size);
1559             if (o_stats != nullptr) {
1560               update_counter(o_stats->content.image.png, size);
1561             }
1562             break;
1563           case BMP_AS_INT:
1564             tok_len = 9;
1565             update_counter(totals.content.image.bmp, size);
1566             if (o_stats != nullptr) {
1567               update_counter(o_stats->content.image.bmp, size);
1568             }
1569             break;
1570           default:
1571             tok_len = 6 + strlen(tok);
1572             update_counter(totals.content.image.other, size);
1573             if (o_stats != nullptr) {
1574               update_counter(o_stats->content.image.other, size);
1575             }
1576             break;
1577           }
1578         } else if (TEXT_AS_INT == *reinterpret_cast<int *>(read_from)) {
1579           tok = read_from + 5;
1580           update_counter(totals.content.text.total, size);
1581           if (o_stats != nullptr) {
1582             update_counter(o_stats->content.text.total, size);
1583           }
1584           switch (*reinterpret_cast<int *>(tok)) {
1585           case JAVA_AS_INT:
1586             // TODO verify if really "javascript"
1587             tok_len = 15;
1588             update_counter(totals.content.text.javascript, size);
1589             if (o_stats != nullptr) {
1590               update_counter(o_stats->content.text.javascript, size);
1591             }
1592             break;
1593           case CSS_AS_INT:
1594             tok_len = 8;
1595             update_counter(totals.content.text.css, size);
1596             if (o_stats != nullptr) {
1597               update_counter(o_stats->content.text.css, size);
1598             }
1599             break;
1600           case XML_AS_INT:
1601             tok_len = 8;
1602             update_counter(totals.content.text.xml, size);
1603             if (o_stats != nullptr) {
1604               update_counter(o_stats->content.text.xml, size);
1605             }
1606             break;
1607           case HTML_AS_INT:
1608             tok_len = 9;
1609             update_counter(totals.content.text.html, size);
1610             if (o_stats != nullptr) {
1611               update_counter(o_stats->content.text.html, size);
1612             }
1613             break;
1614           case PLAI_AS_INT:
1615             tok_len = 10;
1616             update_counter(totals.content.text.plain, size);
1617             if (o_stats != nullptr) {
1618               update_counter(o_stats->content.text.plain, size);
1619             }
1620             break;
1621           default:
1622             tok_len = 5 + strlen(tok);
1623             update_counter(totals.content.text.other, size);
1624             if (o_stats != nullptr) {
1625               update_counter(o_stats->content.text.other, size);
1626             }
1627             break;
1628           }
1629         } else if (0 == strncmp(read_from, "application", 11)) {
1630           tok = read_from + 12;
1631           update_counter(totals.content.application.total, size);
1632           if (o_stats != nullptr) {
1633             update_counter(o_stats->content.application.total, size);
1634           }
1635           switch (*reinterpret_cast<int *>(tok)) {
1636           case ZIP_AS_INT:
1637             tok_len = 15;
1638             update_counter(totals.content.application.zip, size);
1639             if (o_stats != nullptr) {
1640               update_counter(o_stats->content.application.zip, size);
1641             }
1642             break;
1643           case JAVA_AS_INT:
1644             tok_len = 22;
1645             update_counter(totals.content.application.javascript, size);
1646             if (o_stats != nullptr) {
1647               update_counter(o_stats->content.application.javascript, size);
1648             }
1649             break;
1650           case X_JA_AS_INT:
1651             tok_len = 24;
1652             update_counter(totals.content.application.javascript, size);
1653             if (o_stats != nullptr) {
1654               update_counter(o_stats->content.application.javascript, size);
1655             }
1656             break;
1657           case RSSp_AS_INT:
1658             if (0 == strcmp(tok + 4, "xml")) {
1659               tok_len = 19;
1660               update_counter(totals.content.application.rss_xml, size);
1661               if (o_stats != nullptr) {
1662                 update_counter(o_stats->content.application.rss_xml, size);
1663               }
1664             } else if (0 == strcmp(tok + 4, "atom")) {
1665               tok_len = 20;
1666               update_counter(totals.content.application.rss_atom, size);
1667               if (o_stats != nullptr) {
1668                 update_counter(o_stats->content.application.rss_atom, size);
1669               }
1670             } else {
1671               tok_len = 12 + strlen(tok);
1672               update_counter(totals.content.application.rss_other, size);
1673               if (o_stats != nullptr) {
1674                 update_counter(o_stats->content.application.rss_other, size);
1675               }
1676             }
1677             break;
1678           default:
1679             if (0 == strcmp(tok, "x-shockwave-flash")) {
1680               tok_len = 29;
1681               update_counter(totals.content.application.shockwave_flash, size);
1682               if (o_stats != nullptr) {
1683                 update_counter(o_stats->content.application.shockwave_flash, size);
1684               }
1685             } else if (0 == strcmp(tok, "x-quicktimeplayer")) {
1686               tok_len = 29;
1687               update_counter(totals.content.application.quicktime, size);
1688               if (o_stats != nullptr) {
1689                 update_counter(o_stats->content.application.quicktime, size);
1690               }
1691             } else {
1692               tok_len = 12 + strlen(tok);
1693               update_counter(totals.content.application.other, size);
1694               if (o_stats != nullptr) {
1695                 update_counter(o_stats->content.application.other, size);
1696               }
1697             }
1698           }
1699         } else if (0 == strncmp(read_from, "audio", 5)) {
1700           tok     = read_from + 6;
1701           tok_len = 6 + strlen(tok);
1702           update_counter(totals.content.audio.total, size);
1703           if (o_stats != nullptr) {
1704             update_counter(o_stats->content.audio.total, size);
1705           }
1706           if ((0 == strcmp(tok, "x-wav")) || (0 == strcmp(tok, "wav"))) {
1707             update_counter(totals.content.audio.wav, size);
1708             if (o_stats != nullptr) {
1709               update_counter(o_stats->content.audio.wav, size);
1710             }
1711           } else if ((0 == strcmp(tok, "x-mpeg")) || (0 == strcmp(tok, "mpeg"))) {
1712             update_counter(totals.content.audio.mpeg, size);
1713             if (o_stats != nullptr) {
1714               update_counter(o_stats->content.audio.mpeg, size);
1715             }
1716           } else {
1717             update_counter(totals.content.audio.other, size);
1718             if (o_stats != nullptr) {
1719               update_counter(o_stats->content.audio.other, size);
1720             }
1721           }
1722         } else if ('-' == *read_from) {
1723           tok_len = 1;
1724           update_counter(totals.content.none, size);
1725           if (o_stats != nullptr) {
1726             update_counter(o_stats->content.none, size);
1727           }
1728         } else {
1729           tok_len = strlen(read_from);
1730           update_counter(totals.content.other, size);
1731           if (o_stats != nullptr) {
1732             update_counter(o_stats->content.other, size);
1733           }
1734         }
1735         read_from += LogAccess::round_strlen(tok_len + 1);
1736         flag = 0; // We exited this state without errors
1737         break;
1738 
1739       case P_STATE_END:
1740         // Nothing to do really
1741         if (flag) {
1742           parse_errors++;
1743         }
1744         break;
1745       }
1746     }
1747   }
1748 
1749   return 0;
1750 }
1751 
1752 ///////////////////////////////////////////////////////////////////////////////
1753 // Process a file (FD)
1754 int
process_file(int in_fd,off_t offset,unsigned max_age)1755 process_file(int in_fd, off_t offset, unsigned max_age)
1756 {
1757   char buffer[MAX_LOGBUFFER_SIZE];
1758   int nread, buffer_bytes;
1759 
1760   Debug("logstats", "Processing file [offset=%" PRId64 "].", (int64_t)offset);
1761   while (true) {
1762     Debug("logstats", "Reading initial header.");
1763     buffer[0] = '\0';
1764 
1765     unsigned first_read_size = sizeof(uint32_t) + sizeof(uint32_t);
1766     LogBufferHeader *header  = (LogBufferHeader *)&buffer[0];
1767 
1768     // Find the next log header, aligning us properly. This is not
1769     // particularly optimal, but we should only have to do this
1770     // once, and hopefully we'll be aligned immediately.
1771     if (offset > 0) {
1772       Debug("logstats", "Re-aligning file read.");
1773       while (true) {
1774         if (lseek(in_fd, offset, SEEK_SET) < 0) {
1775           Debug("logstats", "Internal seek failed (offset=%" PRId64 ").", (int64_t)offset);
1776           return 1;
1777         }
1778 
1779         // read the first 8 bytes of the header, which will give us the
1780         // cookie and the version number.
1781         nread = read(in_fd, buffer, first_read_size);
1782         if (!nread || EOF == nread) {
1783           return 0;
1784         }
1785         // ensure that this is a valid logbuffer header
1786         if (header->cookie && (LOG_SEGMENT_COOKIE == header->cookie)) {
1787           offset = 0;
1788           break;
1789         }
1790         offset++;
1791       }
1792       if (!header->cookie) {
1793         return 0;
1794       }
1795     } else {
1796       nread = read(in_fd, buffer, first_read_size);
1797       if (!nread || EOF == nread || !header->cookie) {
1798         return 0;
1799       }
1800 
1801       // ensure that this is a valid logbuffer header
1802       if (header->cookie != LOG_SEGMENT_COOKIE) {
1803         Debug("logstats", "Invalid segment cookie (expected %d, got %d)", LOG_SEGMENT_COOKIE, header->cookie);
1804         return 1;
1805       }
1806     }
1807 
1808     Debug("logstats", "LogBuffer version %d, current = %d", header->version, LOG_SEGMENT_VERSION);
1809     if (header->version != LOG_SEGMENT_VERSION) {
1810       return 1;
1811     }
1812 
1813     // read the rest of the header
1814     unsigned second_read_size = sizeof(LogBufferHeader) - first_read_size;
1815     nread                     = read(in_fd, &buffer[first_read_size], second_read_size);
1816     if (!nread || EOF == nread) {
1817       Debug("logstats", "Second read of header failed (attempted %d bytes at offset %d, got nothing), errno=%d.", second_read_size,
1818             first_read_size, errno);
1819       return 1;
1820     }
1821 
1822     // read the rest of the buffer
1823     if (header->byte_count > sizeof(buffer)) {
1824       Debug("logstats", "Header byte count [%d] > expected [%zu]", header->byte_count, sizeof(buffer));
1825       return 1;
1826     }
1827 
1828     buffer_bytes = header->byte_count - sizeof(LogBufferHeader);
1829     if (buffer_bytes <= 0 || (unsigned int)buffer_bytes > (sizeof(buffer) - sizeof(LogBufferHeader))) {
1830       Debug("logstats", "Buffer payload [%d] is wrong.", buffer_bytes);
1831       return 1;
1832     }
1833 
1834     const int MAX_READ_TRIES = 5;
1835     int total_read           = 0;
1836     int read_tries_remaining = MAX_READ_TRIES; // since the data will be old anyway, let's only try a few times.
1837     do {
1838       nread = read(in_fd, &buffer[sizeof(LogBufferHeader) + total_read], buffer_bytes - total_read);
1839       if (EOF == nread || !nread) { // just bail on error
1840         Debug("logstats", "Read failed while reading log buffer, wanted %d bytes, nread=%d, errno=%d", buffer_bytes - total_read,
1841               nread, errno);
1842         return 1;
1843       } else {
1844         total_read += nread;
1845       }
1846 
1847       if (total_read < buffer_bytes) {
1848         if (--read_tries_remaining <= 0) {
1849           Debug("logstats_failed_retries", "Unable to read after %d tries, total_read=%d, buffer_bytes=%d", MAX_READ_TRIES,
1850                 total_read, buffer_bytes);
1851           return 1;
1852         }
1853         // let's wait until we get more data on this file descriptor
1854         Debug("logstats_partial_read",
1855               "Failed to read buffer payload [%d bytes], total_read=%d, buffer_bytes=%d, tries_remaining=%d",
1856               buffer_bytes - total_read, total_read, buffer_bytes, read_tries_remaining);
1857         usleep(50 * 1000); // wait 50ms
1858       }
1859     } while (total_read < buffer_bytes);
1860 
1861     // Possibly skip too old entries (the entire buffer is skipped)
1862     if (header->high_timestamp >= max_age) {
1863       if (parse_log_buff(header, cl.summary != 0, cl.report_per_user != 0) != 0) {
1864         Debug("logstats", "Failed to parse log buffer.");
1865         return 1;
1866       }
1867     } else {
1868       Debug("logstats", "Skipping old buffer (age=%d, max=%d)", header->high_timestamp, max_age);
1869     }
1870   }
1871 
1872   return 0;
1873 }
1874 
1875 ///////////////////////////////////////////////////////////////////////////////
1876 // Determine if this "stat" (Origin Server) is worthwhile to produce a
1877 // report for.
1878 inline int
use_origin(const OriginStats * stat)1879 use_origin(const OriginStats *stat)
1880 {
1881   return cl.report_per_user != 0 ?
1882            (stat->total.count > cl.min_hits) :
1883            ((stat->total.count > cl.min_hits) && (nullptr != strchr(stat->server, '.')) && (nullptr == strchr(stat->server, '%')));
1884 }
1885 
1886 ///////////////////////////////////////////////////////////////////////////////
1887 // Produce a nicely formatted output for a stats collection on a stream
1888 inline void
format_center(const char * str)1889 format_center(const char *str)
1890 {
1891   std::cout << std::setfill(' ') << std::setw((cl.line_len - strlen(str)) / 2 + strlen(str)) << str << std::endl << std::endl;
1892 }
1893 
1894 inline void
format_int(int64_t num)1895 format_int(int64_t num)
1896 {
1897   if (num > 0) {
1898     int64_t mult = static_cast<int64_t>(pow(static_cast<double>(10), static_cast<int>(log10(static_cast<double>(num)) / 3) * 3));
1899     int64_t div;
1900     std::stringstream ss;
1901 
1902     ss.fill('0');
1903     while (mult > 0) {
1904       div = num / mult;
1905       ss << div << std::setw(3);
1906       num -= (div * mult);
1907       if (mult /= 1000) {
1908         ss << std::setw(0) << ',' << std::setw(3);
1909       }
1910     }
1911     std::cout << ss.str();
1912   } else {
1913     std::cout << '0';
1914   }
1915 }
1916 
1917 void
format_elapsed_header()1918 format_elapsed_header()
1919 {
1920   std::cout << std::left << std::setw(24) << "Elapsed time stats";
1921   std::cout << std::right << std::setw(7) << "Min" << std::setw(13) << "Max";
1922   std::cout << std::right << std::setw(17) << "Avg" << std::setw(17) << "Std Deviation" << std::endl;
1923   std::cout << std::setw(cl.line_len) << std::setfill('-') << '-' << std::setfill(' ') << std::endl;
1924 }
1925 
1926 inline void
format_elapsed_line(const char * desc,const ElapsedStats & stat,bool json,bool concise)1927 format_elapsed_line(const char *desc, const ElapsedStats &stat, bool json, bool concise)
1928 {
1929   if (json) {
1930     std::cout << "    " << '"' << desc << "\" : "
1931               << "{ ";
1932     std::cout << "\"min\": \"" << stat.min << "\", ";
1933     std::cout << "\"max\": \"" << stat.max << "\"";
1934     if (!concise) {
1935       std::cout << ", \"avg\": \"" << std::setiosflags(ios::fixed) << std::setprecision(2) << stat.avg << "\", ";
1936       std::cout << "\"dev\": \"" << std::setiosflags(ios::fixed) << std::setprecision(2) << stat.stddev << "\"";
1937     }
1938     std::cout << " }," << std::endl;
1939   } else {
1940     std::cout << std::left << std::setw(24) << desc;
1941     std::cout << std::right << std::setw(7);
1942     format_int(stat.min);
1943     std::cout << std::right << std::setw(13);
1944     format_int(stat.max);
1945 
1946     std::cout << std::right << std::setw(17) << std::setiosflags(ios::fixed) << std::setprecision(2) << stat.avg;
1947     std::cout << std::right << std::setw(17) << std::setiosflags(ios::fixed) << std::setprecision(2) << stat.stddev;
1948     std::cout << std::endl;
1949   }
1950 }
1951 
1952 void
format_detail_header(const char * desc,bool concise=false)1953 format_detail_header(const char *desc, bool concise = false)
1954 {
1955   std::cout << std::left << std::setw(29) << desc;
1956   std::cout << std::right << std::setw(15) << "Count" << std::setw(11) << "Percent";
1957   std::cout << std::right << std::setw(12) << "Bytes" << std::setw(11) << "Percent" << std::endl;
1958   std::cout << std::setw(cl.line_len) << std::setfill('-') << '-' << std::setfill(' ') << std::endl;
1959 }
1960 
1961 inline void
format_line(const char * desc,const StatsCounter & stat,const StatsCounter & total,bool json,bool concise)1962 format_line(const char *desc, const StatsCounter &stat, const StatsCounter &total, bool json, bool concise)
1963 {
1964   static char metrics[] = "KKMGTP";
1965   static char buf[64];
1966   int ix = (stat.bytes > 1024 ? static_cast<int>(log10(static_cast<double>(stat.bytes)) / LOG10_1024) : 1);
1967 
1968   if (json) {
1969     std::cout << "    " << '"' << desc << "\" : "
1970               << "{ ";
1971     std::cout << "\"req\": \"" << stat.count << "\", ";
1972     if (!concise) {
1973       std::cout << "\"req_pct\": \"" << std::setiosflags(ios::fixed) << std::setprecision(2)
1974                 << (double)stat.count / total.count * 100 << "\", ";
1975     }
1976     std::cout << "\"bytes\": \"" << stat.bytes << "\"";
1977 
1978     if (!concise) {
1979       std::cout << ", \"bytes_pct\": \"" << std::setiosflags(ios::fixed) << std::setprecision(2)
1980                 << (double)stat.bytes / total.bytes * 100 << "\"";
1981     }
1982     std::cout << " }," << std::endl;
1983   } else {
1984     std::cout << std::left << std::setw(29) << desc;
1985 
1986     std::cout << std::right << std::setw(15);
1987     format_int(stat.count);
1988 
1989     snprintf(buf, sizeof(buf), "%10.2f%%", (static_cast<double>(stat.count) / total.count * 100));
1990     std::cout << std::right << buf;
1991 
1992     snprintf(buf, sizeof(buf), "%10.2f%cB", stat.bytes / pow(static_cast<double>(1024), ix), metrics[ix]);
1993     std::cout << std::right << buf;
1994 
1995     snprintf(buf, sizeof(buf), "%10.2f%%", (static_cast<double>(stat.bytes) / total.bytes * 100));
1996     std::cout << std::right << buf << std::endl;
1997   }
1998 }
1999 
2000 // Little "helpers" for the vector we use to sort the Origins.
2001 typedef pair<const char *, OriginStats *> OriginPair;
2002 inline bool
operator <(const OriginPair & a,const OriginPair & b)2003 operator<(const OriginPair &a, const OriginPair &b)
2004 {
2005   return a.second->total.count > b.second->total.count;
2006 }
2007 
2008 void
print_detail_stats(const OriginStats * stat,bool json,bool concise)2009 print_detail_stats(const OriginStats *stat, bool json, bool concise)
2010 {
2011   // Cache hit/misses etc.
2012   if (!json) {
2013     format_detail_header("Request Result");
2014   }
2015 
2016   format_line(json ? "hit.direct" : "Cache hit", stat->results.hits.hit, stat->total, json, concise);
2017   format_line(json ? "hit.ram" : "Cache hit RAM", stat->results.hits.hit_ram, stat->total, json, concise);
2018   format_line(json ? "hit.ims" : "Cache hit IMS", stat->results.hits.ims, stat->total, json, concise);
2019   format_line(json ? "hit.refresh" : "Cache hit refresh", stat->results.hits.refresh, stat->total, json, concise);
2020   format_line(json ? "hit.other" : "Cache hit other", stat->results.hits.other, stat->total, json, concise);
2021   format_line(json ? "hit.total" : "Cache hit total", stat->results.hits.total, stat->total, json, concise);
2022 
2023   if (!json) {
2024     std::cout << std::endl;
2025   }
2026 
2027   format_line(json ? "miss.direct" : "Cache miss", stat->results.misses.miss, stat->total, json, concise);
2028   format_line(json ? "miss.ims" : "Cache miss IMS", stat->results.misses.ims, stat->total, json, concise);
2029   format_line(json ? "miss.refresh" : "Cache miss refresh", stat->results.misses.refresh, stat->total, json, concise);
2030   format_line(json ? "miss.other" : "Cache miss other", stat->results.misses.other, stat->total, json, concise);
2031   format_line(json ? "miss.total" : "Cache miss total", stat->results.misses.total, stat->total, json, concise);
2032 
2033   if (!json) {
2034     std::cout << std::endl;
2035   }
2036 
2037   format_line(json ? "error.client_abort" : "Client aborted", stat->results.errors.client_abort, stat->total, json, concise);
2038   format_line(json ? "error.client_read_error" : "Client read error", stat->results.errors.client_read_error, stat->total, json,
2039               concise);
2040   format_line(json ? "error.connect_failed" : "Connect failed", stat->results.errors.connect_fail, stat->total, json, concise);
2041   format_line(json ? "error.invalid_request" : "Invalid request", stat->results.errors.invalid_req, stat->total, json, concise);
2042   format_line(json ? "error.unknown" : "Unknown error(99)", stat->results.errors.unknown, stat->total, json, concise);
2043   format_line(json ? "error.other" : "Other errors", stat->results.errors.other, stat->total, json, concise);
2044   format_line(json ? "error.total" : "Errors total", stat->results.errors.total, stat->total, json, concise);
2045 
2046   if (!json) {
2047     std::cout << std::setw(cl.line_len) << std::setfill('.') << '.' << std::setfill(' ') << std::endl;
2048     format_line("Total requests", stat->total, stat->total, json, concise);
2049     std::cout << std::endl << std::endl;
2050 
2051     // HTTP codes
2052     format_detail_header("HTTP return codes");
2053   }
2054 
2055   format_line(json ? "status.100" : "100 Continue", stat->codes.c_100, stat->total, json, concise);
2056 
2057   format_line(json ? "status.200" : "200 OK", stat->codes.c_200, stat->total, json, concise);
2058   format_line(json ? "status.201" : "201 Created", stat->codes.c_201, stat->total, json, concise);
2059   format_line(json ? "status.202" : "202 Accepted", stat->codes.c_202, stat->total, json, concise);
2060   format_line(json ? "status.203" : "203 Non-Authoritative Info", stat->codes.c_203, stat->total, json, concise);
2061   format_line(json ? "status.204" : "204 No content", stat->codes.c_204, stat->total, json, concise);
2062   format_line(json ? "status.205" : "205 Reset Content", stat->codes.c_205, stat->total, json, concise);
2063   format_line(json ? "status.206" : "206 Partial content", stat->codes.c_206, stat->total, json, concise);
2064   format_line(json ? "status.2xx" : "2xx Total", stat->codes.c_2xx, stat->total, json, concise);
2065 
2066   if (!json) {
2067     std::cout << std::endl;
2068   }
2069 
2070   format_line(json ? "status.300" : "300 Multiple Choices", stat->codes.c_300, stat->total, json, concise);
2071   format_line(json ? "status.301" : "301 Moved permanently", stat->codes.c_301, stat->total, json, concise);
2072   format_line(json ? "status.302" : "302 Found", stat->codes.c_302, stat->total, json, concise);
2073   format_line(json ? "status.303" : "303 See Other", stat->codes.c_303, stat->total, json, concise);
2074   format_line(json ? "status.304" : "304 Not modified", stat->codes.c_304, stat->total, json, concise);
2075   format_line(json ? "status.305" : "305 Use Proxy", stat->codes.c_305, stat->total, json, concise);
2076   format_line(json ? "status.307" : "307 Temporary Redirect", stat->codes.c_307, stat->total, json, concise);
2077   format_line(json ? "status.3xx" : "3xx Total", stat->codes.c_3xx, stat->total, json, concise);
2078 
2079   if (!json) {
2080     std::cout << std::endl;
2081   }
2082 
2083   format_line(json ? "status.400" : "400 Bad request", stat->codes.c_400, stat->total, json, concise);
2084   format_line(json ? "status.401" : "401 Unauthorized", stat->codes.c_401, stat->total, json, concise);
2085   format_line(json ? "status.402" : "402 Payment Required", stat->codes.c_402, stat->total, json, concise);
2086   format_line(json ? "status.403" : "403 Forbidden", stat->codes.c_403, stat->total, json, concise);
2087   format_line(json ? "status.404" : "404 Not found", stat->codes.c_404, stat->total, json, concise);
2088   format_line(json ? "status.405" : "405 Method Not Allowed", stat->codes.c_405, stat->total, json, concise);
2089   format_line(json ? "status.406" : "406 Not Acceptable", stat->codes.c_406, stat->total, json, concise);
2090   format_line(json ? "status.407" : "407 Proxy Auth Required", stat->codes.c_407, stat->total, json, concise);
2091   format_line(json ? "status.408" : "408 Request Timeout", stat->codes.c_408, stat->total, json, concise);
2092   format_line(json ? "status.409" : "409 Conflict", stat->codes.c_409, stat->total, json, concise);
2093   format_line(json ? "status.410" : "410 Gone", stat->codes.c_410, stat->total, json, concise);
2094   format_line(json ? "status.411" : "411 Length Required", stat->codes.c_411, stat->total, json, concise);
2095   format_line(json ? "status.412" : "412 Precondition Failed", stat->codes.c_412, stat->total, json, concise);
2096   format_line(json ? "status.413" : "413 Request Entity Too Large", stat->codes.c_413, stat->total, json, concise);
2097   format_line(json ? "status.414" : "414 Request-URI Too Long", stat->codes.c_414, stat->total, json, concise);
2098   format_line(json ? "status.415" : "415 Unsupported Media Type", stat->codes.c_415, stat->total, json, concise);
2099   format_line(json ? "status.416" : "416 Req Range Not Satisfiable", stat->codes.c_416, stat->total, json, concise);
2100   format_line(json ? "status.417" : "417 Expectation Failed", stat->codes.c_417, stat->total, json, concise);
2101   format_line(json ? "status.4xx" : "4xx Total", stat->codes.c_4xx, stat->total, json, concise);
2102 
2103   if (!json) {
2104     std::cout << std::endl;
2105   }
2106 
2107   format_line(json ? "status.500" : "500 Internal Server Error", stat->codes.c_500, stat->total, json, concise);
2108   format_line(json ? "status.501" : "501 Not implemented", stat->codes.c_501, stat->total, json, concise);
2109   format_line(json ? "status.502" : "502 Bad gateway", stat->codes.c_502, stat->total, json, concise);
2110   format_line(json ? "status.503" : "503 Service unavailable", stat->codes.c_503, stat->total, json, concise);
2111   format_line(json ? "status.504" : "504 Gateway Timeout", stat->codes.c_504, stat->total, json, concise);
2112   format_line(json ? "status.505" : "505 HTTP Ver. Not Supported", stat->codes.c_505, stat->total, json, concise);
2113   format_line(json ? "status.5xx" : "5xx Total", stat->codes.c_5xx, stat->total, json, concise);
2114 
2115   if (!json) {
2116     std::cout << std::endl;
2117   }
2118 
2119   format_line(json ? "status.000" : "000 Unknown", stat->codes.c_000, stat->total, json, concise);
2120 
2121   if (!json) {
2122     std::cout << std::endl << std::endl;
2123 
2124     // Origin hierarchies
2125     format_detail_header("Origin hierarchies");
2126   }
2127 
2128   format_line(json ? "hier.none" : "NONE", stat->hierarchies.none, stat->total, json, concise);
2129   format_line(json ? "hier.direct" : "DIRECT", stat->hierarchies.direct, stat->total, json, concise);
2130   format_line(json ? "hier.sibling" : "SIBLING", stat->hierarchies.sibling, stat->total, json, concise);
2131   format_line(json ? "hier.parent" : "PARENT", stat->hierarchies.parent, stat->total, json, concise);
2132   format_line(json ? "hier.empty" : "EMPTY", stat->hierarchies.empty, stat->total, json, concise);
2133   format_line(json ? "hier.invalid" : "invalid", stat->hierarchies.invalid, stat->total, json, concise);
2134   format_line(json ? "hier.other" : "other", stat->hierarchies.other, stat->total, json, concise);
2135 
2136   if (!json) {
2137     std::cout << std::endl << std::endl;
2138 
2139     // HTTP methods
2140     format_detail_header("HTTP Methods");
2141   }
2142 
2143   format_line(json ? "method.options" : "OPTIONS", stat->methods.options, stat->total, json, concise);
2144   format_line(json ? "method.get" : "GET", stat->methods.get, stat->total, json, concise);
2145   format_line(json ? "method.head" : "HEAD", stat->methods.head, stat->total, json, concise);
2146   format_line(json ? "method.post" : "POST", stat->methods.post, stat->total, json, concise);
2147   format_line(json ? "method.put" : "PUT", stat->methods.put, stat->total, json, concise);
2148   format_line(json ? "method.delete" : "DELETE", stat->methods.del, stat->total, json, concise);
2149   format_line(json ? "method.trace" : "TRACE", stat->methods.trace, stat->total, json, concise);
2150   format_line(json ? "method.connect" : "CONNECT", stat->methods.connect, stat->total, json, concise);
2151   format_line(json ? "method.purge" : "PURGE", stat->methods.purge, stat->total, json, concise);
2152   format_line(json ? "method.none" : "none (-)", stat->methods.none, stat->total, json, concise);
2153   format_line(json ? "method.other" : "other", stat->methods.other, stat->total, json, concise);
2154 
2155   if (!json) {
2156     std::cout << std::endl << std::endl;
2157 
2158     // URL schemes (HTTP/HTTPs)
2159     format_detail_header("URL Schemes");
2160   }
2161 
2162   format_line(json ? "scheme.http" : "HTTP (port 80)", stat->schemes.http, stat->total, json, concise);
2163   format_line(json ? "scheme.https" : "HTTPS (port 443)", stat->schemes.https, stat->total, json, concise);
2164   format_line(json ? "scheme.none" : "none", stat->schemes.none, stat->total, json, concise);
2165   format_line(json ? "scheme.other" : "other", stat->schemes.other, stat->total, json, concise);
2166 
2167   if (!json) {
2168     std::cout << std::endl << std::endl;
2169 
2170     // Protocol families
2171     format_detail_header("Protocols");
2172   }
2173 
2174   format_line(json ? "proto.ipv4" : "IPv4", stat->protocols.ipv4, stat->total, json, concise);
2175   format_line(json ? "proto.ipv6" : "IPv6", stat->protocols.ipv6, stat->total, json, concise);
2176 
2177   if (!json) {
2178     std::cout << std::endl << std::endl;
2179 
2180     // Content types
2181     format_detail_header("Content Types");
2182   }
2183 
2184   format_line(json ? "content.text.javascript" : "text/javascript", stat->content.text.javascript, stat->total, json, concise);
2185   format_line(json ? "content.text.css" : "text/css", stat->content.text.css, stat->total, json, concise);
2186   format_line(json ? "content.text.html" : "text/html", stat->content.text.html, stat->total, json, concise);
2187   format_line(json ? "content.text.xml" : "text/xml", stat->content.text.xml, stat->total, json, concise);
2188   format_line(json ? "content.text.plain" : "text/plain", stat->content.text.plain, stat->total, json, concise);
2189   format_line(json ? "content.text.other" : "text/ other", stat->content.text.other, stat->total, json, concise);
2190   format_line(json ? "content.text.total" : "text/ total", stat->content.text.total, stat->total, json, concise);
2191 
2192   if (!json) {
2193     std::cout << std::endl;
2194   }
2195 
2196   format_line(json ? "content.image.jpeg" : "image/jpeg", stat->content.image.jpeg, stat->total, json, concise);
2197   format_line(json ? "content.image.gif" : "image/gif", stat->content.image.gif, stat->total, json, concise);
2198   format_line(json ? "content.image.png" : "image/png", stat->content.image.png, stat->total, json, concise);
2199   format_line(json ? "content.image.bmp" : "image/bmp", stat->content.image.bmp, stat->total, json, concise);
2200   format_line(json ? "content.image.other" : "image/ other", stat->content.image.other, stat->total, json, concise);
2201   format_line(json ? "content.image.total" : "image/ total", stat->content.image.total, stat->total, json, concise);
2202 
2203   if (!json) {
2204     std::cout << std::endl;
2205   }
2206 
2207   format_line(json ? "content.audio.x-wav" : "audio/x-wav", stat->content.audio.wav, stat->total, json, concise);
2208   format_line(json ? "content.audio.x-mpeg" : "audio/x-mpeg", stat->content.audio.mpeg, stat->total, json, concise);
2209   format_line(json ? "content.audio.other" : "audio/ other", stat->content.audio.other, stat->total, json, concise);
2210   format_line(json ? "content.audio.total" : "audio/ total", stat->content.audio.total, stat->total, json, concise);
2211 
2212   if (!json) {
2213     std::cout << std::endl;
2214   }
2215 
2216   format_line(json ? "content.application.shockwave" : "application/x-shockwave", stat->content.application.shockwave_flash,
2217               stat->total, json, concise);
2218   format_line(json ? "content.application.javascript" : "application/[x-]javascript", stat->content.application.javascript,
2219               stat->total, json, concise);
2220   format_line(json ? "content.application.quicktime" : "application/x-quicktime", stat->content.application.quicktime, stat->total,
2221               json, concise);
2222   format_line(json ? "content.application.zip" : "application/zip", stat->content.application.zip, stat->total, json, concise);
2223   format_line(json ? "content.application.rss_xml" : "application/rss+xml", stat->content.application.rss_xml, stat->total, json,
2224               concise);
2225   format_line(json ? "content.application.rss_atom" : "application/rss+atom", stat->content.application.rss_atom, stat->total, json,
2226               concise);
2227   format_line(json ? "content.application.other" : "application/ other", stat->content.application.other, stat->total, json,
2228               concise);
2229   format_line(json ? "content.application.total" : "application/ total", stat->content.application.total, stat->total, json,
2230               concise);
2231 
2232   if (!json) {
2233     std::cout << std::endl;
2234   }
2235 
2236   format_line(json ? "content.none" : "none", stat->content.none, stat->total, json, concise);
2237   format_line(json ? "content.other" : "other", stat->content.other, stat->total, json, concise);
2238 
2239   if (!json) {
2240     std::cout << std::endl << std::endl;
2241 
2242     // Elapsed time
2243     format_elapsed_header();
2244   }
2245 
2246   format_elapsed_line(json ? "hit.direct.latency" : "Cache hit", stat->elapsed.hits.hit, json, concise);
2247   format_elapsed_line(json ? "hit.ram.latency" : "Cache hit RAM", stat->elapsed.hits.hit_ram, json, concise);
2248   format_elapsed_line(json ? "hit.ims.latency" : "Cache hit IMS", stat->elapsed.hits.ims, json, concise);
2249   format_elapsed_line(json ? "hit.refresh.latency" : "Cache hit refresh", stat->elapsed.hits.refresh, json, concise);
2250   format_elapsed_line(json ? "hit.other.latency" : "Cache hit other", stat->elapsed.hits.other, json, concise);
2251   format_elapsed_line(json ? "hit.total.latency" : "Cache hit total", stat->elapsed.hits.total, json, concise);
2252 
2253   format_elapsed_line(json ? "miss.direct.latency" : "Cache miss", stat->elapsed.misses.miss, json, concise);
2254   format_elapsed_line(json ? "miss.ims.latency" : "Cache miss IMS", stat->elapsed.misses.ims, json, concise);
2255   format_elapsed_line(json ? "miss.refresh.latency" : "Cache miss refresh", stat->elapsed.misses.refresh, json, concise);
2256   format_elapsed_line(json ? "miss.other.latency" : "Cache miss other", stat->elapsed.misses.other, json, concise);
2257   format_elapsed_line(json ? "miss.total.latency" : "Cache miss total", stat->elapsed.misses.total, json, concise);
2258 
2259   if (!json) {
2260     std::cout << std::endl;
2261     std::cout << std::setw(cl.line_len) << std::setfill('_') << '_' << std::setfill(' ') << std::endl;
2262   } else {
2263     std::cout << "    \"_timestamp\" : \"" << static_cast<int>(ink_time_wall_seconds()) << '"' << std::endl;
2264   }
2265 }
2266 
2267 ///////////////////////////////////////////////////////////////////////////////
2268 // Little wrapper around exit, to allow us to exit gracefully
2269 void
my_exit(const ExitStatus & status)2270 my_exit(const ExitStatus &status)
2271 {
2272   vector<OriginPair> vec;
2273   bool first = true;
2274   int max_origins;
2275 
2276   // Special case for URLs output.
2277   if (urls) {
2278     urls->dump(cl.as_object);
2279     if (cl.as_object) {
2280       std::cout << "}" << std::endl;
2281     } else {
2282       std::cout << "]" << std::endl;
2283     }
2284     ::exit(status.level);
2285   }
2286 
2287   if (cl.json) {
2288     // TODO: produce output
2289   } else {
2290     switch (status.level) {
2291     case EXIT_OK:
2292       break;
2293     case EXIT_WARNING:
2294       std::cout << "warning: " << status.notice << std::endl;
2295       break;
2296     case EXIT_CRITICAL:
2297       std::cout << "critical: " << status.notice << std::endl;
2298       ::exit(status.level);
2299       break;
2300     case EXIT_UNKNOWN:
2301       std::cout << "unknown: " << status.notice << std::endl;
2302       ::exit(status.level);
2303       break;
2304     }
2305   }
2306 
2307   if (!origins.empty()) {
2308     // Sort the Origins by 'traffic'
2309     for (OriginStorage::iterator i = origins.begin(); i != origins.end(); i++) {
2310       if (use_origin(i->second)) {
2311         vec.push_back(*i);
2312       }
2313     }
2314     sort(vec.begin(), vec.end());
2315 
2316     if (!cl.json) {
2317       // Produce a nice summary first
2318       format_center("Traffic summary");
2319       std::cout << std::left << std::setw(33) << "Origin Server";
2320       std::cout << std::right << std::setw(15) << "Hits";
2321       std::cout << std::right << std::setw(15) << "Misses";
2322       std::cout << std::right << std::setw(15) << "Errors" << std::endl;
2323       std::cout << std::setw(cl.line_len) << std::setfill('-') << '-' << std::setfill(' ') << std::endl;
2324 
2325       max_origins = cl.max_origins > 0 ? cl.max_origins : INT_MAX;
2326       for (vector<OriginPair>::iterator i = vec.begin(); (i != vec.end()) && (max_origins > 0); ++i, --max_origins) {
2327         std::cout << std::left << std::setw(33) << i->first;
2328         std::cout << std::right << std::setw(15);
2329         format_int(i->second->results.hits.total.count);
2330         std::cout << std::right << std::setw(15);
2331         format_int(i->second->results.misses.total.count);
2332         std::cout << std::right << std::setw(15);
2333         format_int(i->second->results.errors.total.count);
2334         std::cout << std::endl;
2335       }
2336       std::cout << std::setw(cl.line_len) << std::setfill('=') << '=' << std::setfill(' ') << std::endl;
2337       std::cout << std::endl << std::endl << std::endl;
2338     }
2339   }
2340 
2341   // Next the totals for all Origins, unless we specified a list of origins to filter.
2342   if (origin_set->empty()) {
2343     first = false;
2344     if (cl.json) {
2345       std::cout << "{ \"total\": {" << std::endl;
2346       print_detail_stats(&totals, cl.json, cl.concise);
2347       std::cout << "  }";
2348     } else {
2349       format_center("Totals (all Origins combined)");
2350       print_detail_stats(&totals, cl.json, cl.concise);
2351       std::cout << std::endl << std::endl << std::endl;
2352     }
2353   }
2354 
2355   // And finally the individual Origin Servers.
2356   max_origins = cl.max_origins > 0 ? cl.max_origins : INT_MAX;
2357   for (vector<OriginPair>::iterator i = vec.begin(); (i != vec.end()) && (max_origins > 0); ++i, --max_origins) {
2358     if (cl.json) {
2359       if (first) {
2360         std::cout << "{ ";
2361         first = false;
2362       } else {
2363         std::cout << "," << std::endl << "  ";
2364       }
2365       std::cout << '"' << i->first << "\": {" << std::endl;
2366       print_detail_stats(i->second, cl.json, cl.concise);
2367       std::cout << "  }";
2368     } else {
2369       format_center(i->first);
2370       print_detail_stats(i->second, cl.json, cl.concise);
2371       std::cout << std::endl << std::endl << std::endl;
2372     }
2373   }
2374 
2375   if (cl.json) {
2376     std::cout << std::endl << "}" << std::endl;
2377   }
2378 
2379   ::exit(status.level);
2380 }
2381 
2382 ///////////////////////////////////////////////////////////////////////////////
2383 // Open the "default" log file (squid.blog), allow for it to be rotated.
2384 int
open_main_log(ExitStatus & status)2385 open_main_log(ExitStatus &status)
2386 {
2387   std::string logfile(Layout::get()->logdir);
2388   int cnt = 3;
2389   int main_fd;
2390 
2391   logfile.append("/squid.blog");
2392   while (((main_fd = open(logfile.c_str(), O_RDONLY)) < 0) && --cnt) {
2393     switch (errno) {
2394     case ENOENT:
2395     case EACCES:
2396       sleep(5);
2397       break;
2398     default:
2399       status.append(" can't open squid.blog");
2400       return -1;
2401     }
2402   }
2403 
2404   if (main_fd < 0) {
2405     status.append(" squid.blog not enabled");
2406     return -1;
2407   }
2408 #if HAVE_POSIX_FADVISE
2409   if (0 != posix_fadvise(main_fd, 0, 0, POSIX_FADV_DONTNEED)) {
2410     status.append(" posix_fadvise() failed");
2411   }
2412 #endif
2413   return main_fd;
2414 }
2415 
2416 ///////////////////////////////////////////////////////////////////////////////
2417 // main
2418 int
main(int,const char * argv[])2419 main(int /* argc ATS_UNUSED */, const char *argv[])
2420 {
2421   ExitStatus exit_status;
2422   int res, cnt;
2423   int main_fd;
2424   unsigned max_age;
2425   struct flock lck;
2426 
2427   // build the application information structure
2428   appVersionInfo.setup(PACKAGE_NAME, PROGRAM_NAME, PACKAGE_VERSION, __DATE__, __TIME__, BUILD_MACHINE, BUILD_PERSON, "");
2429 
2430   runroot_handler(argv);
2431   // Before accessing file system initialize Layout engine
2432   Layout::create();
2433 
2434   memset(&totals, 0, sizeof(totals));
2435   init_elapsed(&totals);
2436 
2437   origin_set   = new OriginSet;
2438   parse_errors = 0;
2439 
2440   // Command line parsing
2441   cl.parse_arguments(argv);
2442 
2443   // Calculate the max age of acceptable log entries, if necessary
2444   if (cl.max_age > 0) {
2445     struct timeval tv;
2446 
2447     gettimeofday(&tv, nullptr);
2448     max_age = tv.tv_sec - cl.max_age;
2449   } else {
2450     max_age = 0;
2451   }
2452 
2453   // initialize this application for standalone logging operation
2454   init_log_standalone_basic(PROGRAM_NAME);
2455   Log::init(Log::NO_REMOTE_MANAGEMENT | Log::LOGCAT);
2456 
2457   // Do we have a list of Origins on the command line?
2458   if (cl.origin_list[0] != '\0') {
2459     char *tok;
2460     char *sep_ptr;
2461 
2462     for (tok = strtok_r(cl.origin_list, ",", &sep_ptr); tok != nullptr;) {
2463       origin_set->insert(tok);
2464       tok = strtok_r(nullptr, ",", &sep_ptr);
2465     }
2466   }
2467   // Load origins from an "external" file (\n separated)
2468   if (cl.origin_file[0] != '\0') {
2469     std::ifstream fs;
2470 
2471     fs.open(cl.origin_file, std::ios::in);
2472     if (!fs.is_open()) {
2473       std::cerr << "can't read " << cl.origin_file << std::endl;
2474       usage(argument_descriptions, countof(argument_descriptions), USAGE_LINE);
2475       ::exit(0);
2476     }
2477 
2478     while (!fs.eof()) {
2479       std::string line;
2480       std::string::size_type start, end;
2481 
2482       getline(fs, line);
2483       start = line.find_first_not_of(" \t");
2484       if (start != std::string::npos) {
2485         end = line.find_first_of(" \t#/");
2486         if (std::string::npos == end) {
2487           end = line.length();
2488         }
2489 
2490         if (end > start) {
2491           char *buf;
2492 
2493           buf = ats_stringdup(line.substr(start, end));
2494           if (buf) {
2495             origin_set->insert(buf);
2496           }
2497         }
2498       }
2499     }
2500   }
2501 
2502   // Produce the CGI header first (if applicable)
2503   if (cl.cgi) {
2504     std::cout << "Content-Type: application/javascript\r\n";
2505     std::cout << "Cache-Control: no-cache\r\n\r\n";
2506   }
2507 
2508   // Should we calculate per URL data;
2509   if (cl.urls != 0) {
2510     urls = new UrlLru(cl.urls, cl.show_urls);
2511     if (cl.as_object) {
2512       std::cout << "{" << std::endl;
2513     } else {
2514       std::cout << "[" << std::endl;
2515     }
2516   }
2517 
2518   // Do the incremental parse of the default squid log.
2519   if (cl.incremental) {
2520     // Change directory to the log dir
2521     if (chdir(Layout::get()->logdir.c_str()) < 0) {
2522       exit_status.set(EXIT_CRITICAL, " can't chdir to ");
2523       exit_status.append(Layout::get()->logdir);
2524       my_exit(exit_status);
2525     }
2526 
2527     std::string sf_name(Layout::get()->logdir);
2528     struct stat stat_buf;
2529     int state_fd;
2530     sf_name.append("/logstats.state");
2531 
2532     if (cl.state_tag[0] != '\0') {
2533       sf_name.append(".");
2534       sf_name.append(cl.state_tag);
2535     } else {
2536       // Default to the username
2537       struct passwd *pwd = getpwuid(geteuid());
2538 
2539       if (pwd) {
2540         sf_name.append(".");
2541         sf_name.append(pwd->pw_name);
2542       } else {
2543         exit_status.set(EXIT_CRITICAL, " can't get current UID");
2544         my_exit(exit_status);
2545       }
2546     }
2547 
2548     if ((state_fd = open(sf_name.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) < 0) {
2549       exit_status.set(EXIT_CRITICAL, " can't open state file ");
2550       exit_status.append(sf_name);
2551       my_exit(exit_status);
2552     }
2553     // Get an exclusive lock, if possible. Try for up to 20 seconds.
2554     // Use more portable & standard fcntl() over flock()
2555     lck.l_type   = F_WRLCK;
2556     lck.l_whence = 0; /* offset l_start from beginning of file*/
2557     lck.l_start  = static_cast<off_t>(0);
2558     lck.l_len    = static_cast<off_t>(0); /* till end of file*/
2559     cnt          = 10;
2560     while (((res = fcntl(state_fd, F_SETLK, &lck)) < 0) && --cnt) {
2561       switch (errno) {
2562       case