xref: /trafficserver/src/tscore/lockfile.cc (revision 4cfd5a73)
1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 #include "tscore/ink_platform.h"
25 #include "tscore/ink_lockfile.h"
26 
27 #define LOCKFILE_BUF_LEN 16 // 16 bytes should be enough for a pid
28 
29 int
Open(pid_t * holding_pid)30 Lockfile::Open(pid_t *holding_pid)
31 {
32   char buf[LOCKFILE_BUF_LEN];
33   pid_t val;
34   int err;
35   *holding_pid = 0;
36 
37 #define FAIL(x)  \
38   {              \
39     if (fd > 0)  \
40       close(fd); \
41     return (x);  \
42   }
43 
44   struct flock lock;
45   char *t;
46   int size;
47 
48   fd = -1;
49 
50   // Try and open the Lockfile. Create it if it does not already
51   // exist.
52   do {
53     fd = open(fname, O_RDWR | O_CREAT, 0644);
54   } while ((fd < 0) && (errno == EINTR));
55 
56   if (fd < 0) {
57     return (-errno);
58   }
59 
60   // Lock it. Note that if we can't get the lock EAGAIN will be the
61   // error we receive.
62   lock.l_type   = F_WRLCK;
63   lock.l_start  = 0;
64   lock.l_whence = SEEK_SET;
65   lock.l_len    = 0;
66 
67   do {
68     err = fcntl(fd, F_SETLK, &lock);
69   } while ((err < 0) && (errno == EINTR));
70 
71   if (err < 0) {
72     // We couldn't get the lock. Try and read the process id of the
73     // process holding the lock from the lockfile.
74     t = buf;
75 
76     for (size = 15; size > 0;) {
77       do {
78         err = read(fd, t, size);
79       } while ((err < 0) && (errno == EINTR));
80 
81       if (err < 0)
82         FAIL(-errno);
83       if (err == 0) {
84         break;
85       }
86 
87       size -= err;
88       t += err;
89     }
90 
91     *t = '\0';
92 
93     // coverity[secure_coding]
94     if (sscanf(buf, "%d\n", static_cast<int *>(&val)) != 1) {
95       *holding_pid = 0;
96     } else {
97       *holding_pid = val;
98     }
99     FAIL(0);
100   }
101   // If we did get the lock, then set the close on exec flag so that
102   // we don't accidentally pass the file descriptor to a child process
103   // when we do a fork/exec.
104   do {
105     err = fcntl(fd, F_GETFD, 0);
106   } while ((err < 0) && (errno == EINTR));
107 
108   if (err < 0)
109     FAIL(-errno);
110 
111   val = err | FD_CLOEXEC;
112 
113   do {
114     err = fcntl(fd, F_SETFD, val);
115   } while ((err < 0) && (errno == EINTR));
116 
117   if (err < 0)
118     FAIL(-errno);
119 
120   // Return the file descriptor of the opened lockfile. When this file
121   // descriptor is closed the lock will be released.
122 
123   return (1); // success
124 
125 #undef FAIL
126 }
127 
128 int
Get(pid_t * holding_pid)129 Lockfile::Get(pid_t *holding_pid)
130 {
131   char buf[LOCKFILE_BUF_LEN];
132   int err;
133   *holding_pid = 0;
134 
135   fd = -1;
136 
137   // Open the Lockfile and get the lock. If we are successful, the
138   // return value will be the file descriptor of the opened Lockfile.
139   err = Open(holding_pid);
140   if (err != 1) {
141     return err;
142   }
143 
144   if (fd < 0) {
145     return -1;
146   }
147   // Truncate the Lockfile effectively erasing it.
148   do {
149     err = ftruncate(fd, 0);
150   } while ((err < 0) && (errno == EINTR));
151 
152   if (err < 0) {
153     close(fd);
154     return (-errno);
155   }
156   // Write our process id to the Lockfile.
157   snprintf(buf, sizeof(buf), "%d\n", static_cast<int>(getpid()));
158 
159   do {
160     err = write(fd, buf, strlen(buf));
161   } while ((err < 0) && (errno == EINTR));
162 
163   if (err != static_cast<int>(strlen(buf))) {
164     close(fd);
165     return (-errno);
166   }
167 
168   return (1); // success
169 }
170 
171 void
Close()172 Lockfile::Close()
173 {
174   if (fd != -1) {
175     close(fd);
176   }
177 }
178 
179 //-------------------------------------------------------------------------
180 // Lockfile::Kill() and Lockfile::KillAll()
181 //
182 // Open the lockfile. If we succeed it means there was no process
183 // holding the lock. We'll just close the file and release the lock
184 // in that case. If we don't succeed in getting the lock, the
185 // process id of the process holding the lock is returned. We
186 // repeatedly send the KILL signal to that process until doing so
187 // fails. That is, until kill says that the process id is no longer
188 // valid (we killed the process), or that we don't have permission
189 // to send a signal to that process id (the process holding the lock
190 // is dead and a new process has replaced it).
191 //
192 // INKqa11325 (Kevlar: linux machine hosed up if specific threads
193 // killed): Unfortunately, it's possible on Linux that the main PID of
194 // the process has been successfully killed (and is waiting to be
195 // reaped while in a defunct state), while some of the other threads
196 // of the process just don't want to go away.
197 //-------------------------------------------------------------------------
198 
199 static void
lockfile_kill_internal(pid_t init_pid,int init_sig,pid_t pid,const char *,int sig)200 lockfile_kill_internal(pid_t init_pid, int init_sig, pid_t pid, const char * /* pname ATS_UNUSED */, int sig)
201 {
202   int err;
203   int status;
204 
205   if (init_sig > 0) {
206     kill(init_pid, init_sig);
207     // Wait for children to exit
208     do {
209       err = waitpid(-1, &status, WNOHANG);
210       if (err == -1) {
211         break;
212       }
213     } while (!WIFEXITED(status) && !WIFSIGNALED(status));
214   }
215 
216   do {
217     err = kill(pid, sig);
218   } while ((err == 0) || ((err < 0) && (errno == EINTR)));
219 }
220 
221 void
Kill(int sig,int initial_sig,const char * pname)222 Lockfile::Kill(int sig, int initial_sig, const char *pname)
223 {
224   int err;
225   int pid;
226   pid_t holding_pid;
227 
228   err = Open(&holding_pid);
229   if (err == 1) // success getting the lock file
230   {
231     Close();
232   } else if (err == 0) // someone else has the lock
233   {
234     pid = holding_pid;
235     if (pid != 0) {
236       lockfile_kill_internal(pid, initial_sig, pid, pname, sig);
237     }
238   }
239 }
240 
241 void
KillGroup(int sig,int initial_sig,const char * pname)242 Lockfile::KillGroup(int sig, int initial_sig, const char *pname)
243 {
244   int err;
245   pid_t pid;
246   pid_t holding_pid;
247   pid_t self = getpid();
248 
249   err = Open(&holding_pid);
250   if (err == 1) // success getting the lock file
251   {
252     Close();
253   } else if (err == 0) // someone else has the lock
254   {
255     do {
256       pid = getpgid(holding_pid);
257     } while ((pid < 0) && (errno == EINTR));
258 
259     if ((pid < 0) || (pid == self)) {
260       // Error getting process group,
261       // or we are the group's owner.
262       // Let's kill just holding_pid
263       pid = holding_pid;
264     } else if (pid != self) {
265       // We managed to get holding_pid's process group
266       // and this group is not ours.
267       // This way, we kill the process_group:
268       pid = -pid;
269     }
270 
271     if (pid != 0) {
272       // In order to get core files from each process, please
273       // set your core_pattern appropriately.
274       lockfile_kill_internal(holding_pid, initial_sig, pid, pname, sig);
275     }
276   }
277 }
278