1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 #pragma once
25 
26 #include <netinet/in.h>
27 #include <iostream>
28 #include <list>
29 
30 #include "tscore/I_Version.h"
31 #include "tscore/Scalar.h"
32 #include "tscore/Regex.h"
33 #include "tscore/Errata.h"
34 #include "tscpp/util/TextView.h"
35 #include "tscore/ink_file.h"
36 #include "tscore/CryptoHash.h"
37 #include "tscore/ts_file.h"
38 
39 namespace tag
40 {
41 struct bytes {
42   static constexpr char const *const label = " bytes";
43 };
44 } // namespace tag
45 
46 using ts::round_down;
47 using ts::round_up;
48 
49 namespace ts
50 {
51 /* INK_ALIGN() is only to be used to align on a power of 2 boundary */
52 #define INK_ALIGN(size, boundary) (((size) + ((boundary)-1)) & ~((boundary)-1))
53 #define ROUND_TO_STORE_BLOCK(_x) INK_ALIGN((_x), 8192)
54 #define dir_clear(_e) \
55   do {                \
56     (_e)->w[0] = 0;   \
57     (_e)->w[1] = 0;   \
58     (_e)->w[2] = 0;   \
59     (_e)->w[3] = 0;   \
60     (_e)->w[4] = 0;   \
61   } while (0)
62 
63 #define dir_assign(_e, _x)   \
64   do {                       \
65     (_e)->w[0] = (_x)->w[0]; \
66     (_e)->w[1] = (_x)->w[1]; \
67     (_e)->w[2] = (_x)->w[2]; \
68     (_e)->w[3] = (_x)->w[3]; \
69     (_e)->w[4] = (_x)->w[4]; \
70   } while (0)
71 
72 constexpr static uint8_t CACHE_DB_MAJOR_VERSION = 24;
73 constexpr static uint8_t CACHE_DB_MINOR_VERSION = 1;
74 /// Maximum allowed volume index.
75 constexpr static int MAX_VOLUME_IDX          = 255;
76 constexpr static int ENTRIES_PER_BUCKET      = 4;
77 constexpr static int MAX_BUCKETS_PER_SEGMENT = (1 << 16) / ENTRIES_PER_BUCKET;
78 
79 typedef Scalar<1, off_t, tag::bytes> Bytes;
80 typedef Scalar<1024, off_t, tag::bytes> Kilobytes;
81 typedef Scalar<1024 * Kilobytes::SCALE, off_t, tag::bytes> Megabytes;
82 typedef Scalar<1024 * Megabytes::SCALE, off_t, tag::bytes> Gigabytes;
83 typedef Scalar<1024 * Gigabytes::SCALE, off_t, tag::bytes> Terabytes;
84 
85 // Units of allocation for stripes.
86 typedef Scalar<128 * Megabytes::SCALE, int64_t, tag::bytes> CacheStripeBlocks;
87 // Size measurement of cache storage.
88 // Also size of meta data storage units.
89 typedef Scalar<8 * Kilobytes::SCALE, int64_t, tag::bytes> CacheStoreBlocks;
90 // Size unit for content stored in cache.
91 typedef Scalar<512, int64_t, tag::bytes> CacheDataBlocks;
92 
93 /** A cache span is a representation of raw storage.
94     It corresponds to a raw disk, disk partition, file, or directory.
95  */
96 class CacheSpan
97 {
98 public:
99   /// Default offset of start of data in a span.
100   /// @internal I think this is done to avoid collisions with partition tracking mechanisms.
101   static const Bytes OFFSET;
102 };
103 
104 /** A section of storage in a span, used to contain a stripe.
105 
106     This is stored in the span header to describe the stripes in the span.
107 
108     @note Serializable.
109 
110     @internal nee @c DiskVolBlock
111  */
112 struct CacheStripeDescriptor {
113   Bytes offset;         // offset of start of stripe from start of span.
114   CacheStoreBlocks len; // length of block.
115   uint32_t vol_idx;     ///< If in use, the volume index.
116   unsigned int type : 3;
117   unsigned int free : 1;
118 };
119 
120 /** Header data for a span.
121 
122     This is the serializable descriptor stored in a span.
123 
124     @internal nee DiskHeader
125  */
126 struct SpanHeader {
127   static constexpr uint32_t MAGIC = 0xABCD1237;
128   uint32_t magic;
129   uint32_t num_volumes;      /* number of discrete volumes (DiskVol) */
130   uint32_t num_free;         /* number of disk volume blocks free */
131   uint32_t num_used;         /* number of disk volume blocks in use */
132   uint32_t num_diskvol_blks; /* number of disk volume blocks */
133   CacheStoreBlocks num_blocks;
134   /// Serialized stripe descriptors. This is treated as a variable sized array.
135   CacheStripeDescriptor stripes[1];
136 };
137 
138 /** Stripe data, serialized format.
139 
140     @internal nee VolHeadFooter
141  */
142 // the counterpart of this structure in ATS is called VolHeaderFooter
143 class StripeMeta
144 {
145 public:
146   static constexpr uint32_t MAGIC = 0xF1D0F00D;
147 
148   uint32_t magic;
149   VersionNumber version;
150   time_t create_time;
151   off_t write_pos;
152   off_t last_write_pos;
153   off_t agg_pos;
154   uint32_t generation; // token generation (vary), this cannot be 0
155   uint32_t phase;
156   uint32_t cycle;
157   uint32_t sync_serial;
158   uint32_t write_serial;
159   uint32_t dirty;
160   uint32_t sector_size;
161   uint32_t unused; // pad out to 8 byte boundary
162   uint16_t freelist[1];
163 };
164 
165 struct Doc {
166   uint32_t magic;     // DOC_MAGIC
167   uint32_t len;       // length of this fragment (including hlen & sizeof(Doc), unrounded)
168   uint64_t total_len; // total length of document
169 #if TS_ENABLE_FIPS == 1
170   // For FIPS CryptoHash is 256 bits vs. 128, and the 'first_key' must be checked first, so
171   // ensure that the new 'first_key' overlaps the old 'first_key' and that the rest of the data layout
172   // is the same by putting 'key' at the ned.
173   CryptoHash first_key; ///< first key in object.
174 #else
175   CryptoHash first_key; ///< first key in object.
176   CryptoHash key;       ///< Key for this doc.
177 #endif
178   uint32_t hlen;         ///< Length of this header.
179   uint32_t doc_type : 8; ///< Doc type - indicates the format of this structure and its content.
180   uint32_t v_major : 8;  ///< Major version number.
181   uint32_t v_minor : 8;  ///< Minor version number.
182   uint32_t unused : 8;   ///< Unused, forced to zero.
183   uint32_t sync_serial;
184   uint32_t write_serial;
185   uint32_t pinned; // pinned until
186   uint32_t checksum;
187 #if TS_ENABLE_FIPS == 1
188   CryptoHash key; ///< Key for this doc.
189 #endif
190   uint32_t data_len();
191   uint32_t prefix_len();
192   int single_fragment();
193   int no_data_in_fragment();
194   char *hdr();
195   char *data();
196 };
197 
198 /*
199  @internal struct Dir in P_CacheDir.h
200  * size: 10bytes
201  */
202 
203 class CacheDirEntry
204 {
205 public:
206 #if 0
207   unsigned int offset : 24;
208   unsigned int big : 2;
209   unsigned int size : 6;
210   unsigned int tag : 12;
211   unsigned int phase : 1;
212   unsigned int head : 1;
213   unsigned int pinnned : 1;
214   unsigned int token : 1;
215   unsigned int next : 16;
216   uint16_t offset_high;
217 #else
218   uint16_t w[5];
219 #endif
220 };
221 
222 class CacheVolume
223 {
224 };
225 
226 class URLparser
227 {
228 public:
229   bool verifyURL(std::string &url1);
230   Errata parseURL(TextView URI);
231   int getPort(std::string &fullURL, int &port_ptr, int &port_len);
232 
233 private:
234   //   DFA regex;
235 };
236 
237 class CacheURL
238 {
239 public:
240   in_port_t port;
241   std::string scheme;
242   std::string url;
243   std::string hostname;
244   std::string path;
245   std::string query;
246   std::string params;
247   std::string fragments;
248   std::string user;
249   std::string password;
CacheURL(int port_,ts::TextView b_hostname,ts::TextView b_path,ts::TextView b_params,ts::TextView b_query,ts::TextView b_fragments)250   CacheURL(int port_, ts::TextView b_hostname, ts::TextView b_path, ts::TextView b_params, ts::TextView b_query,
251            ts::TextView b_fragments)
252   {
253     hostname.assign(b_hostname.data(), b_hostname.size());
254     port = port_;
255     path.assign(b_path.data(), b_path.size());
256     params.assign(b_params.data(), b_params.size());
257     query.assign(b_query.data(), b_query.size());
258     fragments.assign(b_fragments.data(), b_fragments.size());
259   }
260 
CacheURL(ts::TextView blob,int port_)261   CacheURL(ts::TextView blob, int port_)
262   {
263     url.assign(blob.data(), blob.size());
264     port = port_;
265   }
266 
267   void
setCredential(char * p_user,int user_len,char * p_pass,int pass_len)268   setCredential(char *p_user, int user_len, char *p_pass, int pass_len)
269   {
270     user.assign(p_user, user_len);
271     password.assign(p_pass, pass_len);
272   }
273 };
274 } // namespace ts
275 
276 class DFA;
277 // this class matches url of the format : scheme://hostname:port/path;params?query
278 
279 struct url_matcher {
url_matcherurl_matcher280   url_matcher(ts::file::path const &path) // file contains a list of regex
281   {
282     std::error_code ec;
283     std::string load_content = ts::file::load(path, ec);
284     ts::TextView fileContent(load_content);
285     if (ec.value() == 0) {
286       const char **patterns;
287       std::vector<std::string> str_vec;
288       int count = 0;
289       while (fileContent) {
290         ts::TextView line = fileContent.take_prefix_at('\n');
291         std::string reg_str(line.data(), line.size());
292         str_vec.push_back(reg_str);
293         count++;
294       }
295       patterns = (const char **)ats_malloc(count * sizeof(char *));
296       int i    = 0;
297       for (const auto &str : str_vec) {
298         patterns[i++] = ats_strdup(str.data());
299 
300         std::cout << "regex input\n" << patterns[i - 1] << std::endl;
301       }
302       for (i = 0; i < count; i++) {
303         std::cout << "regex " << patterns[i] << std::endl;
304       }
305       if (regex.compile(patterns, count) != count) {
306         std::cout << "Check your regular expression" << std::endl;
307       }
308 
309       if (!port.compile(R"([0-9]+$)")) {
310         std::cout << "Check your regular expression" << std::endl;
311         return;
312       }
313     }
314   }
315 
url_matcherurl_matcher316   url_matcher()
317   {
318     if (!regex.compile(R"(^(https?\:\/\/)")) {
319       std::cout << "Check your regular expression" << std::endl;
320       return;
321     }
322     if (!port.compile(R"([0-9]+$)")) {
323       std::cout << "Check your regular expression" << std::endl;
324       return;
325     }
326   }
327 
~url_matcherurl_matcher328   ~url_matcher() {}
329 
330   uint8_t
matchurl_matcher331   match(const char *hostname) const
332   {
333     return regex.match(hostname) ? 1 : 0;
334   }
335   uint8_t
portmatchurl_matcher336   portmatch(const char *hostname, int length) const
337   {
338     return port.match({hostname, size_t(length)}) ? 1 : 0;
339   }
340 
341 private:
342   DFA port;
343   DFA regex;
344 };
345 
346 using ts::Bytes;
347 using ts::Megabytes;
348 using ts::CacheStoreBlocks;
349 using ts::CacheStripeBlocks;
350 using ts::StripeMeta;
351 using ts::CacheStripeDescriptor;
352 using ts::Errata;
353 using ts::CacheDirEntry;
354 using ts::MemSpan;
355 using ts::Doc;
356 
357 constexpr int ESTIMATED_OBJECT_SIZE     = 8000;
358 constexpr int DEFAULT_HW_SECTOR_SIZE    = 512;
359 constexpr int VOL_HASH_TABLE_SIZE       = 32707;
360 constexpr unsigned short VOL_HASH_EMPTY = 65535;
361 constexpr int DIR_TAG_WIDTH             = 12;
362 constexpr int DIR_DEPTH                 = 4;
363 constexpr int SIZEOF_DIR                = 10;
364 constexpr int MAX_ENTRIES_PER_SEGMENT   = (1 << 16);
365 constexpr int DIR_SIZE_WIDTH            = 6;
366 constexpr int DIR_BLOCK_SIZES           = 4;
367 constexpr int CACHE_BLOCK_SHIFT         = 9;
368 constexpr int CACHE_BLOCK_SIZE          = (1 << CACHE_BLOCK_SHIFT); // 512, smallest sector size
369 
370 namespace ct
371 {
372 #define dir_big(_e) ((uint32_t)((((_e)->w[1]) >> 8) & 0x3))
373 #define dir_bit(_e, _w, _b) ((uint32_t)(((_e)->w[_w] >> (_b)) & 1))
374 #define dir_size(_e) ((uint32_t)(((_e)->w[1]) >> 10))
375 #define dir_approx_size(_e) ((dir_size(_e) + 1) * DIR_BLOCK_SIZE(dir_big(_e)))
376 #define dir_head(_e) dir_bit(_e, 2, 13)
377 #define DIR_MASK_TAG(_t) ((_t) & ((1 << DIR_TAG_WIDTH) - 1))
378 #define dir_tag(_e) ((uint32_t)((_e)->w[2] & ((1 << DIR_TAG_WIDTH) - 1)))
379 #define dir_offset(_e) \
380   ((int64_t)(((uint64_t)(_e)->w[0]) | (((uint64_t)((_e)->w[1] & 0xFF)) << 16) | (((uint64_t)(_e)->w[4]) << 24)))
381 
382 #define dir_set_offset(_e, _o)                                              \
383   do {                                                                      \
384     (_e)->w[0] = (uint16_t)_o;                                              \
385     (_e)->w[1] = (uint16_t)((((_o) >> 16) & 0xFF) | ((_e)->w[1] & 0xFF00)); \
386     (_e)->w[4] = (uint16_t)((_o) >> 24);                                    \
387   } while (0)
388 
389 #define dir_next(_e) (_e)->w[3]
390 #define dir_phase(_e) dir_bit(_e, 2, 12)
391 #define DIR_BLOCK_SHIFT(_i) (3 * (_i))
392 #define DIR_BLOCK_SIZE(_i) (CACHE_BLOCK_SIZE << DIR_BLOCK_SHIFT(_i))
393 #define dir_set_prev(_e, _o) (_e)->w[2] = (uint16_t)(_o)
394 #define dir_set_next(_e, _o) (_e)->w[3] = (uint16_t)(_o)
395 
396 #define dir_in_seg(_s, _i) ((CacheDirEntry *)(((char *)(_s)) + (SIZEOF_DIR * (_i))))
397 
398 inline CacheDirEntry *
dir_from_offset(int64_t i,CacheDirEntry * seg)399 dir_from_offset(int64_t i, CacheDirEntry *seg)
400 {
401 #if DIR_DEPTH < 5
402   if (!i)
403     return nullptr;
404   return dir_in_seg(seg, i);
405 #else
406   i = i + ((i - 1) / (DIR_DEPTH - 1));
407   return dir_in_seg(seg, i);
408 #endif
409 }
410 
411 inline CacheDirEntry *
dir_bucket(int64_t b,CacheDirEntry * seg)412 dir_bucket(int64_t b, CacheDirEntry *seg)
413 {
414   return dir_in_seg(seg, b * DIR_DEPTH);
415 }
416 
417 inline CacheDirEntry *
next_dir(CacheDirEntry * d,CacheDirEntry * seg)418 next_dir(CacheDirEntry *d, CacheDirEntry *seg)
419 {
420   int i = dir_next(d);
421   return dir_from_offset(i, seg);
422 }
423 
424 inline CacheDirEntry *
dir_bucket_row(CacheDirEntry * b,int64_t i)425 dir_bucket_row(CacheDirEntry *b, int64_t i)
426 {
427   return dir_in_seg(b, i);
428 }
429 
430 inline int64_t
dir_to_offset(const CacheDirEntry * d,const CacheDirEntry * seg)431 dir_to_offset(const CacheDirEntry *d, const CacheDirEntry *seg)
432 {
433 #if DIR_DEPTH < 5
434   return (((char *)d) - ((char *)seg)) / SIZEOF_DIR;
435 #else
436   int64_t i = (int64_t)((((char *)d) - ((char *)seg)) / SIZEOF_DIR);
437   i         = i - (i / DIR_DEPTH);
438   return i;
439 #endif
440 }
441 
442 struct Stripe;
443 struct Span {
Spanct::Span444   Span(ts::file::path const &path) : _path(path) {}
445   Errata load();
446   Errata loadDevice();
447   bool isEmpty() const;
448   int header_len = 0;
449 
450   /// Replace all existing stripes with a single unallocated stripe covering the span.
451   Errata clear();
452 
453   /// This is broken and needs to be cleaned up.
454   void clearPermanently();
455 
456   ts::Rv<Stripe *> allocStripe(int vol_idx, const CacheStripeBlocks &len);
457   Errata updateHeader(); ///< Update serialized header and write to disk.
458 
459   ts::file::path _path;     ///< File system location of span.
460   ats_scoped_fd _fd;        ///< Open file descriptor for span.
461   int _vol_idx = 0;         ///< Forced volume.
462   CacheStoreBlocks _base;   ///< Offset to first usable byte.
463   CacheStoreBlocks _offset; ///< Offset to first content byte.
464   // The space between _base and _offset is where the span information is stored.
465   CacheStoreBlocks _len;                                 ///< Total length of span.
466   CacheStoreBlocks _free_space;                          ///< Total size of free stripes.
467   ink_device_geometry _geometry = ink_device_geometry(); ///< Geometry of span.
468   uint64_t num_usable_blocks    = 0; // number of usable blocks for stripes i.e., after subtracting the skip and the disk header.
469   /// Local copy of serialized header data stored on in the span.
470   std::unique_ptr<ts::SpanHeader> _header;
471   /// Live information about stripes.
472   /// Seeded from @a _header and potentially augmented with direct probing.
473   std::list<Stripe *> _stripes;
474 };
475 /* --------------------------------------------------------------------------------------- */
476 struct Stripe {
477   /// Meta data is stored in 4 copies A/B and Header/Footer.
478   enum Copy { A = 0, B = 1 };
479   enum { HEAD = 0, FOOT = 1 };
480 
481   /// Piece wise memory storage for the directory.
482   struct Chunk {
483     Bytes _start; ///< Starting offset relative to physical device of span.
484     Bytes _skip;  ///< # of bytes not valid at the start of the first block.
485     Bytes _clip;  ///< # of bytes not valid at the end of the last block.
486 
487     typedef std::vector<MemSpan<void>> Chain;
488     Chain _chain; ///< Chain of blocks.
489 
490     ~Chunk();
491 
492     void append(MemSpan<void> m);
493     void clear();
494   };
495 
496   /// Construct from span header data.
497   Stripe(Span *span, const Bytes &start, const CacheStoreBlocks &len);
498 
499   /// Is stripe unallocated?
500   bool isFree() const;
501 
502   /** Probe a chunk of memory @a mem for stripe metadata.
503 
504       @a mem is updated to remove memory that has been probed. If @a
505       meta is not @c nullptr then it is used for additional cross
506       checking.
507 
508       @return @c true if @a mem has valid data, @c false otherwise.
509   */
510   bool probeMeta(MemSpan<void> &mem, StripeMeta const *meta = nullptr);
511 
512   /// Check a buffer for being valid stripe metadata.
513   /// @return @c true if valid, @c false otherwise.
514   static bool validateMeta(StripeMeta const *meta);
515 
516   /// Load metadata for this stripe.
517   Errata loadMeta();
518   Errata loadDir();
519   int check_loop(int s);
520   void dir_check();
521   bool walk_bucket_chain(int s); // returns true if there is a loop
522   void walk_all_buckets();
523 
524   /// Initialize the live data from the loaded serialized data.
525   void updateLiveData(enum Copy c);
526 
527   Span *_span;           ///< Hosting span.
528   CryptoHash hash_id;    /// hash_id
529   Bytes _start;          ///< Offset of first byte of stripe metadata.
530   Bytes _content;        ///< Start of content.
531   CacheStoreBlocks _len; ///< Length of stripe.
532   uint8_t _vol_idx = 0;  ///< Volume index.
533   uint8_t _type    = 0;  ///< Stripe type.
534   int8_t _idx      = -1; ///< Stripe index in span.
535   int agg_buf_pos  = 0;
536 
537   int64_t _buckets  = 0; ///< Number of buckets per segment.
538   int64_t _segments = 0; ///< Number of segments.
539 
540   std::string hashText;
541 
542   /// Meta copies, indexed by A/B then HEAD/FOOT.
543   StripeMeta _meta[2][2];
544   /// Locations for the meta data.
545   CacheStoreBlocks _meta_pos[2][2];
546   /// Directory.
547   Chunk _directory;
548   CacheDirEntry const *dir = nullptr; // the big buffer that will hold the whole directory of stripe header.
549   uint16_t *freelist       = nullptr; // using this freelist instead of the one in StripeMeta.
550                                       // This is because the freelist is not being copied to _metap[2][2] correctly.
551   // need to do something about it .. hmmm :-?
552   int dir_freelist_length(int s);
553   inline CacheDirEntry *
vol_dir_segmentct::Stripe554   vol_dir_segment(int s)
555   {
556     return (CacheDirEntry *)(((char *)this->dir) + (s * this->_buckets) * DIR_DEPTH * SIZEOF_DIR);
557   }
558   inline CacheDirEntry *
dir_segmentct::Stripe559   dir_segment(int s)
560   {
561     return vol_dir_segment(s);
562   }
563 
564   Bytes stripe_offset(CacheDirEntry *e); // offset w.r.t the stripe content
565   size_t vol_dirlen();
566   inline int
vol_headerlenct::Stripe567   vol_headerlen()
568   {
569     return ROUND_TO_STORE_BLOCK(sizeof(StripeMeta) + sizeof(uint16_t) * (this->_segments - 1));
570   }
571   void vol_init_data_internal();
572   void vol_init_data();
573   void dir_init_segment(int s);
574   void dir_free_entry(CacheDirEntry *e, int s);
575   CacheDirEntry *dir_delete_entry(CacheDirEntry *e, CacheDirEntry *p, int s);
576   //  int dir_bucket_length(CacheDirEntry *b, int s);
577   int dir_probe(CryptoHash *key, CacheDirEntry *result, CacheDirEntry **last_collision);
578   bool dir_valid(CacheDirEntry *e);
579   bool validate_sync_serial();
580   Errata updateHeaderFooter();
581   Errata InitializeMeta();
582   void init_dir();
583   Errata clear(); // clears striped headers and footers
584 };
585 } // namespace ct
586