xref: /trafficserver/src/tscore/Regex.cc (revision 4cfd5a73)
1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 #include <array>
25 
26 #include "tscore/ink_platform.h"
27 #include "tscore/ink_thread.h"
28 #include "tscore/ink_memory.h"
29 #include "tscore/Regex.h"
30 
31 #ifdef PCRE_CONFIG_JIT
32 struct RegexThreadKey {
33   RegexThreadKey() { ink_thread_key_create(&this->key, reinterpret_cast<void (*)(void *)>(&pcre_jit_stack_free)); }
34   ink_thread_key key;
35 };
36 
37 static RegexThreadKey k;
38 
39 static pcre_jit_stack *
40 get_jit_stack(void *data ATS_UNUSED)
41 {
42   pcre_jit_stack *jit_stack;
43 
44   if ((jit_stack = static_cast<pcre_jit_stack *>(ink_thread_getspecific(k.key))) == nullptr) {
45     jit_stack = pcre_jit_stack_alloc(ats_pagesize(), 1024 * 1024); // 1 page min and 1MB max
46     ink_thread_setspecific(k.key, (void *)jit_stack);
47   }
48 
49   return jit_stack;
50 }
51 #endif
52 
53 Regex::Regex(Regex &&that) noexcept : regex(that.regex), regex_extra(that.regex_extra)
54 {
55   that.regex       = nullptr;
56   that.regex_extra = nullptr;
57 }
58 
59 bool
60 Regex::compile(const char *pattern, const unsigned flags)
61 {
62   const char *error;
63   int erroffset;
64   int options    = 0;
65   int study_opts = 0;
66 
67   if (regex) {
68     return false;
69   }
70 
71   if (flags & RE_CASE_INSENSITIVE) {
72     options |= PCRE_CASELESS;
73   }
74 
75   if (flags & RE_ANCHORED) {
76     options |= PCRE_ANCHORED;
77   }
78 
79   regex = pcre_compile(pattern, options, &error, &erroffset, nullptr);
80   if (error) {
81     regex = nullptr;
82     return false;
83   }
84 
85 #ifdef PCRE_CONFIG_JIT
86   study_opts |= PCRE_STUDY_JIT_COMPILE;
87 #endif
88 
89   regex_extra = pcre_study(regex, study_opts, &error);
90 
91 #ifdef PCRE_CONFIG_JIT
92   if (regex_extra) {
93     pcre_assign_jit_stack(regex_extra, &get_jit_stack, nullptr);
94   }
95 #endif
96 
97   return true;
98 }
99 
100 int
101 Regex::get_capture_count()
102 {
103   int captures = -1;
104   if (pcre_fullinfo(regex, regex_extra, PCRE_INFO_CAPTURECOUNT, &captures) != 0) {
105     return -1;
106   }
107 
108   return captures;
109 }
110 
111 bool
112 Regex::exec(std::string_view const &str)
113 {
114   std::array<int, DEFAULT_GROUP_COUNT * 3> ovector;
115   return this->exec(str, ovector.data(), ovector.size());
116 }
117 
118 bool
119 Regex::exec(std::string_view const &str, int *ovector, int ovecsize)
120 {
121   int rv;
122 
123   rv = pcre_exec(regex, regex_extra, str.data(), int(str.size()), 0, 0, ovector, ovecsize);
124   return rv > 0;
125 }
126 
127 Regex::~Regex()
128 {
129   if (regex_extra) {
130 #ifdef PCRE_CONFIG_JIT
131     pcre_free_study(regex_extra);
132 #else
133     pcre_free(regex_extra);
134 #endif
135   }
136   if (regex) {
137     pcre_free(regex);
138   }
139 }
140 
141 DFA::~DFA() {}
142 
143 bool
144 DFA::build(std::string_view const &pattern, unsigned flags)
145 {
146   Regex rxp;
147   std::string string{pattern};
148 
149   if (!(flags & RE_UNANCHORED)) {
150     flags |= RE_ANCHORED;
151   }
152 
153   if (!rxp.compile(string.c_str(), flags)) {
154     return false;
155   }
156   _patterns.emplace_back(std::move(rxp), std::move(string));
157   return true;
158 }
159 
160 int
161 DFA::compile(std::string_view const &pattern, unsigned flags)
162 {
163   ink_assert(_patterns.empty());
164   this->build(pattern, flags);
165   return _patterns.size();
166 }
167 
168 int
169 DFA::compile(std::string_view *patterns, int npatterns, unsigned flags)
170 {
171   _patterns.reserve(npatterns); // try to pre-allocate.
172   for (int i = 0; i < npatterns; ++i) {
173     this->build(patterns[i], flags);
174   }
175   return _patterns.size();
176 }
177 
178 int
179 DFA::compile(const char **patterns, int npatterns, unsigned flags)
180 {
181   _patterns.reserve(npatterns); // try to pre-allocate.
182   for (int i = 0; i < npatterns; ++i) {
183     this->build(patterns[i], flags);
184   }
185   return _patterns.size();
186 }
187 
188 int
189 DFA::match(std::string_view const &str) const
190 {
191   // This is ugly, but the external interface needs to be @c const even though it's not really.
192   // This handles making the iterator non-const.
193   auto &pv{const_cast<decltype(_patterns) &>(_patterns)};
194   for (auto spot = pv.begin(), limit = pv.end(); spot != limit; ++spot) {
195     if (spot->_re.exec(str)) {
196       return spot - _patterns.begin();
197     }
198   }
199 
200   return -1;
201 }
202