1 /*
2  * Copyright (c) 2012 Will Drewry <wad@dataspill.org>
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 /*
18  * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose
19  * filter breakage during development. *Do not* use this in production,
20  * as it relies on making library calls that are unsafe in signal context.
21  *
22  * Instead, live systems the auditctl(8) may be used to monitor failures.
23  * E.g.
24  *   auditctl -a task,always -F uid=<privsep uid>
25  */
26 /* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */
27 
28 /* XXX it should be possible to do logging via the log socket safely */
29 
30 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
31 /* Use the kernel headers in case of an older toolchain. */
32 # include <asm/siginfo.h>
33 # define __have_siginfo_t 1
34 # define __have_sigval_t 1
35 # define __have_sigevent_t 1
36 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
37 
38 #include "includes.h"
39 
40 #ifdef SANDBOX_SECCOMP_FILTER
41 
42 #include <sys/types.h>
43 #include <sys/resource.h>
44 #include <sys/prctl.h>
45 #include <sys/mman.h>
46 #include <sys/syscall.h>
47 
48 #include <linux/net.h>
49 #include <linux/audit.h>
50 #include <linux/filter.h>
51 #include <linux/seccomp.h>
52 #include <elf.h>
53 
54 #include <asm/unistd.h>
55 #ifdef __s390__
56 #include <asm/zcrypt.h>
57 #endif
58 
59 #include <errno.h>
60 #include <signal.h>
61 #include <stdarg.h>
62 #include <stddef.h>  /* for offsetof */
63 #include <stdio.h>
64 #include <stdlib.h>
65 #include <string.h>
66 #include <unistd.h>
67 
68 #include "log.h"
69 #include "ssh-sandbox.h"
70 #include "xmalloc.h"
71 
72 /* Linux seccomp_filter sandbox */
73 #define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL
74 
75 /* Use a signal handler to emit violations when debugging */
76 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
77 # undef SECCOMP_FILTER_FAIL
78 # define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP
79 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
80 
81 #if __BYTE_ORDER == __LITTLE_ENDIAN
82 # define ARG_LO_OFFSET  0
83 # define ARG_HI_OFFSET  sizeof(uint32_t)
84 #elif __BYTE_ORDER == __BIG_ENDIAN
85 # define ARG_LO_OFFSET  sizeof(uint32_t)
86 # define ARG_HI_OFFSET  0
87 #else
88 #error "Unknown endianness"
89 #endif
90 
91 /* Simple helpers to avoid manual errors (but larger BPF programs). */
92 #define SC_DENY(_nr, _errno) \
93 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
94 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
95 #define SC_ALLOW(_nr) \
96 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
97 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
98 #define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \
99 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 6), \
100 	/* load and test syscall argument, low word */ \
101 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
102 	    offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \
103 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
104 	    ((_arg_val) & 0xFFFFFFFF), 0, 3), \
105 	/* load and test syscall argument, high word */ \
106 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
107 	    offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \
108 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
109 	    (((uint32_t)((uint64_t)(_arg_val) >> 32)) & 0xFFFFFFFF), 0, 1), \
110 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
111 	/* reload syscall number; all rules expect it in accumulator */ \
112 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
113 		offsetof(struct seccomp_data, nr))
114 /* Allow if syscall argument contains only values in mask */
115 #define SC_ALLOW_ARG_MASK(_nr, _arg_nr, _arg_mask) \
116 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 8), \
117 	/* load, mask and test syscall argument, low word */ \
118 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
119 	    offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \
120 	BPF_STMT(BPF_ALU+BPF_AND+BPF_K, ~((_arg_mask) & 0xFFFFFFFF)), \
121 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 4), \
122 	/* load, mask and test syscall argument, high word */ \
123 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
124 	    offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \
125 	BPF_STMT(BPF_ALU+BPF_AND+BPF_K, \
126 	    ~(((uint32_t)((uint64_t)(_arg_mask) >> 32)) & 0xFFFFFFFF)), \
127 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 1), \
128 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
129 	/* reload syscall number; all rules expect it in accumulator */ \
130 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
131 		offsetof(struct seccomp_data, nr))
132 
133 /* Syscall filtering set for preauth. */
134 static const struct sock_filter preauth_insns[] = {
135 	/* Ensure the syscall arch convention is as expected. */
136 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
137 		offsetof(struct seccomp_data, arch)),
138 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0),
139 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
140 	/* Load the syscall number for checking. */
141 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
142 		offsetof(struct seccomp_data, nr)),
143 
144 	/* Syscalls to non-fatally deny */
145 #ifdef __NR_lstat
146 	SC_DENY(__NR_lstat, EACCES),
147 #endif
148 #ifdef __NR_lstat64
149 	SC_DENY(__NR_lstat64, EACCES),
150 #endif
151 #ifdef __NR_fstat
152 	SC_DENY(__NR_fstat, EACCES),
153 #endif
154 #ifdef __NR_fstat64
155 	SC_DENY(__NR_fstat64, EACCES),
156 #endif
157 #ifdef __NR_open
158 	SC_DENY(__NR_open, EACCES),
159 #endif
160 #ifdef __NR_openat
161 	SC_DENY(__NR_openat, EACCES),
162 #endif
163 #ifdef __NR_newfstatat
164 	SC_DENY(__NR_newfstatat, EACCES),
165 #endif
166 #ifdef __NR_stat
167 	SC_DENY(__NR_stat, EACCES),
168 #endif
169 #ifdef __NR_stat64
170 	SC_DENY(__NR_stat64, EACCES),
171 #endif
172 #ifdef __NR_shmget
173 	SC_DENY(__NR_shmget, EACCES),
174 #endif
175 #ifdef __NR_shmat
176 	SC_DENY(__NR_shmat, EACCES),
177 #endif
178 #ifdef __NR_shmdt
179 	SC_DENY(__NR_shmdt, EACCES),
180 #endif
181 #ifdef __NR_ipc
182 	SC_DENY(__NR_ipc, EACCES),
183 #endif
184 
185 	/* Syscalls to permit */
186 #ifdef __NR_brk
187 	SC_ALLOW(__NR_brk),
188 #endif
189 #ifdef __NR_clock_gettime
190 	SC_ALLOW(__NR_clock_gettime),
191 #endif
192 #ifdef __NR_clock_gettime64
193 	SC_ALLOW(__NR_clock_gettime64),
194 #endif
195 #ifdef __NR_close
196 	SC_ALLOW(__NR_close),
197 #endif
198 #ifdef __NR_exit
199 	SC_ALLOW(__NR_exit),
200 #endif
201 #ifdef __NR_exit_group
202 	SC_ALLOW(__NR_exit_group),
203 #endif
204 #ifdef __NR_futex
205 	SC_ALLOW(__NR_futex),
206 #endif
207 #ifdef __NR_geteuid
208 	SC_ALLOW(__NR_geteuid),
209 #endif
210 #ifdef __NR_geteuid32
211 	SC_ALLOW(__NR_geteuid32),
212 #endif
213 #ifdef __NR_getpgid
214 	SC_ALLOW(__NR_getpgid),
215 #endif
216 #ifdef __NR_getpid
217 	SC_ALLOW(__NR_getpid),
218 #endif
219 #ifdef __NR_getrandom
220 	SC_ALLOW(__NR_getrandom),
221 #endif
222 #ifdef __NR_gettimeofday
223 	SC_ALLOW(__NR_gettimeofday),
224 #endif
225 #ifdef __NR_getuid
226 	SC_ALLOW(__NR_getuid),
227 #endif
228 #ifdef __NR_getuid32
229 	SC_ALLOW(__NR_getuid32),
230 #endif
231 #ifdef __NR_madvise
232 	SC_ALLOW(__NR_madvise),
233 #endif
234 #ifdef __NR_mmap
235 	SC_ALLOW_ARG_MASK(__NR_mmap, 2, PROT_READ|PROT_WRITE|PROT_NONE),
236 #endif
237 #ifdef __NR_mmap2
238 	SC_ALLOW_ARG_MASK(__NR_mmap2, 2, PROT_READ|PROT_WRITE|PROT_NONE),
239 #endif
240 #ifdef __NR_mprotect
241 	SC_ALLOW_ARG_MASK(__NR_mprotect, 2, PROT_READ|PROT_WRITE|PROT_NONE),
242 #endif
243 #ifdef __NR_mremap
244 	SC_ALLOW(__NR_mremap),
245 #endif
246 #ifdef __NR_munmap
247 	SC_ALLOW(__NR_munmap),
248 #endif
249 #ifdef __NR_nanosleep
250 	SC_ALLOW(__NR_nanosleep),
251 #endif
252 #ifdef __NR_clock_nanosleep
253 	SC_ALLOW(__NR_clock_nanosleep),
254 #endif
255 #ifdef __NR_clock_nanosleep_time64
256 	SC_ALLOW(__NR_clock_nanosleep_time64),
257 #endif
258 #ifdef __NR_clock_gettime64
259 	SC_ALLOW(__NR_clock_gettime64),
260 #endif
261 #ifdef __NR__newselect
262 	SC_ALLOW(__NR__newselect),
263 #endif
264 #ifdef __NR_poll
265 	SC_ALLOW(__NR_poll),
266 #endif
267 #ifdef __NR_pselect6
268 	SC_ALLOW(__NR_pselect6),
269 #endif
270 #ifdef __NR_read
271 	SC_ALLOW(__NR_read),
272 #endif
273 #ifdef __NR_rt_sigprocmask
274 	SC_ALLOW(__NR_rt_sigprocmask),
275 #endif
276 #ifdef __NR_select
277 	SC_ALLOW(__NR_select),
278 #endif
279 #ifdef __NR_shutdown
280 	SC_ALLOW(__NR_shutdown),
281 #endif
282 #ifdef __NR_sigprocmask
283 	SC_ALLOW(__NR_sigprocmask),
284 #endif
285 #ifdef __NR_time
286 	SC_ALLOW(__NR_time),
287 #endif
288 #ifdef __NR_write
289 	SC_ALLOW(__NR_write),
290 #endif
291 #ifdef __NR_socketcall
292 	SC_ALLOW_ARG(__NR_socketcall, 0, SYS_SHUTDOWN),
293 	SC_DENY(__NR_socketcall, EACCES),
294 #endif
295 #if defined(__NR_ioctl) && defined(__s390__)
296 	/* Allow ioctls for ICA crypto card on s390 */
297 	SC_ALLOW_ARG(__NR_ioctl, 1, Z90STAT_STATUS_MASK),
298 	SC_ALLOW_ARG(__NR_ioctl, 1, ICARSAMODEXPO),
299 	SC_ALLOW_ARG(__NR_ioctl, 1, ICARSACRT),
300 	SC_ALLOW_ARG(__NR_ioctl, 1, ZSECSENDCPRB),
301 	/* Allow ioctls for EP11 crypto card on s390 */
302 	SC_ALLOW_ARG(__NR_ioctl, 1, ZSENDEP11CPRB),
303 #endif
304 #if defined(__x86_64__) && defined(__ILP32__) && defined(__X32_SYSCALL_BIT)
305 	/*
306 	 * On Linux x32, the clock_gettime VDSO falls back to the
307 	 * x86-64 syscall under some circumstances, e.g.
308 	 * https://bugs.debian.org/849923
309 	 */
310 	SC_ALLOW(__NR_clock_gettime & ~__X32_SYSCALL_BIT),
311 #endif
312 
313 	/* Default deny */
314 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
315 };
316 
317 static const struct sock_fprog preauth_program = {
318 	.len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])),
319 	.filter = (struct sock_filter *)preauth_insns,
320 };
321 
322 struct ssh_sandbox {
323 	pid_t child_pid;
324 };
325 
326 struct ssh_sandbox *
ssh_sandbox_init(struct monitor * monitor)327 ssh_sandbox_init(struct monitor *monitor)
328 {
329 	struct ssh_sandbox *box;
330 
331 	/*
332 	 * Strictly, we don't need to maintain any state here but we need
333 	 * to return non-NULL to satisfy the API.
334 	 */
335 	debug3("%s: preparing seccomp filter sandbox", __func__);
336 	box = xcalloc(1, sizeof(*box));
337 	box->child_pid = 0;
338 
339 	return box;
340 }
341 
342 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
343 extern struct monitor *pmonitor;
344 void mm_log_handler(LogLevel level, const char *msg, void *ctx);
345 
346 static void
ssh_sandbox_violation(int signum,siginfo_t * info,void * void_context)347 ssh_sandbox_violation(int signum, siginfo_t *info, void *void_context)
348 {
349 	char msg[256];
350 
351 	snprintf(msg, sizeof(msg),
352 	    "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)",
353 	    __func__, info->si_arch, info->si_syscall, info->si_call_addr);
354 	mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor);
355 	_exit(1);
356 }
357 
358 static void
ssh_sandbox_child_debugging(void)359 ssh_sandbox_child_debugging(void)
360 {
361 	struct sigaction act;
362 	sigset_t mask;
363 
364 	debug3("%s: installing SIGSYS handler", __func__);
365 	memset(&act, 0, sizeof(act));
366 	sigemptyset(&mask);
367 	sigaddset(&mask, SIGSYS);
368 
369 	act.sa_sigaction = &ssh_sandbox_violation;
370 	act.sa_flags = SA_SIGINFO;
371 	if (sigaction(SIGSYS, &act, NULL) == -1)
372 		fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno));
373 	if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
374 		fatal("%s: sigprocmask(SIGSYS): %s",
375 		      __func__, strerror(errno));
376 }
377 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
378 
379 void
ssh_sandbox_child(struct ssh_sandbox * box)380 ssh_sandbox_child(struct ssh_sandbox *box)
381 {
382 	struct rlimit rl_zero;
383 	int nnp_failed = 0;
384 
385 	/* Set rlimits for completeness if possible. */
386 	rl_zero.rlim_cur = rl_zero.rlim_max = 0;
387 	if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1)
388 		fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s",
389 			__func__, strerror(errno));
390 	if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1)
391 		fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s",
392 			__func__, strerror(errno));
393 	if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1)
394 		fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s",
395 			__func__, strerror(errno));
396 
397 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
398 	ssh_sandbox_child_debugging();
399 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
400 
401 	debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__);
402 	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
403 		debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s",
404 		      __func__, strerror(errno));
405 		nnp_failed = 1;
406 	}
407 	debug3("%s: attaching seccomp filter program", __func__);
408 	if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1)
409 		debug("%s: prctl(PR_SET_SECCOMP): %s",
410 		      __func__, strerror(errno));
411 	else if (nnp_failed)
412 		fatal("%s: SECCOMP_MODE_FILTER activated but "
413 		    "PR_SET_NO_NEW_PRIVS failed", __func__);
414 }
415 
416 void
ssh_sandbox_parent_finish(struct ssh_sandbox * box)417 ssh_sandbox_parent_finish(struct ssh_sandbox *box)
418 {
419 	free(box);
420 	debug3("%s: finished", __func__);
421 }
422 
423 void
ssh_sandbox_parent_preauth(struct ssh_sandbox * box,pid_t child_pid)424 ssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid)
425 {
426 	box->child_pid = child_pid;
427 }
428 
429 #endif /* SANDBOX_SECCOMP_FILTER */
430