1 /*
2  * Copyright (c) 2012 Will Drewry <wad@dataspill.org>
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 /*
18  * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose
19  * filter breakage during development. *Do not* use this in production,
20  * as it relies on making library calls that are unsafe in signal context.
21  *
22  * Instead, live systems the auditctl(8) may be used to monitor failures.
23  * E.g.
24  *   auditctl -a task,always -F uid=<privsep uid>
25  */
26 /* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */
27 
28 /* XXX it should be possible to do logging via the log socket safely */
29 
30 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
31 /* Use the kernel headers in case of an older toolchain. */
32 # include <asm/siginfo.h>
33 # define __have_siginfo_t 1
34 # define __have_sigval_t 1
35 # define __have_sigevent_t 1
36 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
37 
38 #include "includes.h"
39 
40 #ifdef SANDBOX_SECCOMP_FILTER
41 
42 #include <sys/types.h>
43 #include <sys/resource.h>
44 #include <sys/prctl.h>
45 
46 #include <linux/net.h>
47 #include <linux/audit.h>
48 #include <linux/filter.h>
49 #include <linux/seccomp.h>
50 #include <elf.h>
51 
52 #include <asm/unistd.h>
53 #ifdef __s390__
54 #include <asm/zcrypt.h>
55 #endif
56 
57 #include <errno.h>
58 #include <signal.h>
59 #include <stdarg.h>
60 #include <stddef.h>  /* for offsetof */
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <string.h>
64 #include <unistd.h>
65 
66 #include "log.h"
67 #include "ssh-sandbox.h"
68 #include "xmalloc.h"
69 
70 /* Linux seccomp_filter sandbox */
71 #define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL
72 
73 /* Use a signal handler to emit violations when debugging */
74 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
75 # undef SECCOMP_FILTER_FAIL
76 # define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP
77 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
78 
79 #if __BYTE_ORDER == __LITTLE_ENDIAN
80 # define ARG_LO_OFFSET  0
81 # define ARG_HI_OFFSET  sizeof(uint32_t)
82 #elif __BYTE_ORDER == __BIG_ENDIAN
83 # define ARG_LO_OFFSET  sizeof(uint32_t)
84 # define ARG_HI_OFFSET  0
85 #else
86 #error "Unknown endianness"
87 #endif
88 
89 /* Simple helpers to avoid manual errors (but larger BPF programs). */
90 #define SC_DENY(_nr, _errno) \
91 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
92 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
93 #define SC_ALLOW(_nr) \
94 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
95 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
96 #define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \
97 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 6), \
98 	/* load and test first syscall argument, low word */ \
99 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
100 	    offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \
101 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
102 	    ((_arg_val) & 0xFFFFFFFF), 0, 3), \
103 	/* load and test first syscall argument, high word */ \
104 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
105 	    offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \
106 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
107 	    (((uint32_t)((uint64_t)(_arg_val) >> 32)) & 0xFFFFFFFF), 0, 1), \
108 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
109 	/* reload syscall number; all rules expect it in accumulator */ \
110 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
111 		offsetof(struct seccomp_data, nr))
112 
113 /* Syscall filtering set for preauth. */
114 static const struct sock_filter preauth_insns[] = {
115 	/* Ensure the syscall arch convention is as expected. */
116 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
117 		offsetof(struct seccomp_data, arch)),
118 	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0),
119 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
120 	/* Load the syscall number for checking. */
121 	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
122 		offsetof(struct seccomp_data, nr)),
123 
124 	/* Syscalls to non-fatally deny */
125 #ifdef __NR_lstat
126 	SC_DENY(__NR_lstat, EACCES),
127 #endif
128 #ifdef __NR_lstat64
129 	SC_DENY(__NR_lstat64, EACCES),
130 #endif
131 #ifdef __NR_fstat
132 	SC_DENY(__NR_fstat, EACCES),
133 #endif
134 #ifdef __NR_fstat64
135 	SC_DENY(__NR_fstat64, EACCES),
136 #endif
137 #ifdef __NR_open
138 	SC_DENY(__NR_open, EACCES),
139 #endif
140 #ifdef __NR_openat
141 	SC_DENY(__NR_openat, EACCES),
142 #endif
143 #ifdef __NR_newfstatat
144 	SC_DENY(__NR_newfstatat, EACCES),
145 #endif
146 #ifdef __NR_stat
147 	SC_DENY(__NR_stat, EACCES),
148 #endif
149 #ifdef __NR_stat64
150 	SC_DENY(__NR_stat64, EACCES),
151 #endif
152 
153 	/* Syscalls to permit */
154 #ifdef __NR_brk
155 	SC_ALLOW(__NR_brk),
156 #endif
157 #ifdef __NR_clock_gettime
158 	SC_ALLOW(__NR_clock_gettime),
159 #endif
160 #ifdef __NR_close
161 	SC_ALLOW(__NR_close),
162 #endif
163 #ifdef __NR_exit
164 	SC_ALLOW(__NR_exit),
165 #endif
166 #ifdef __NR_exit_group
167 	SC_ALLOW(__NR_exit_group),
168 #endif
169 #ifdef __NR_futex
170 	SC_ALLOW(__NR_futex),
171 #endif
172 #ifdef __NR_geteuid
173 	SC_ALLOW(__NR_geteuid),
174 #endif
175 #ifdef __NR_geteuid32
176 	SC_ALLOW(__NR_geteuid32),
177 #endif
178 #ifdef __NR_getpgid
179 	SC_ALLOW(__NR_getpgid),
180 #endif
181 #ifdef __NR_getpid
182 	SC_ALLOW(__NR_getpid),
183 #endif
184 #ifdef __NR_getrandom
185 	SC_ALLOW(__NR_getrandom),
186 #endif
187 #ifdef __NR_gettimeofday
188 	SC_ALLOW(__NR_gettimeofday),
189 #endif
190 #ifdef __NR_getuid
191 	SC_ALLOW(__NR_getuid),
192 #endif
193 #ifdef __NR_getuid32
194 	SC_ALLOW(__NR_getuid32),
195 #endif
196 #ifdef __NR_madvise
197 	SC_ALLOW(__NR_madvise),
198 #endif
199 #ifdef __NR_mmap
200 	SC_ALLOW(__NR_mmap),
201 #endif
202 #ifdef __NR_mmap2
203 	SC_ALLOW(__NR_mmap2),
204 #endif
205 #ifdef __NR_mremap
206 	SC_ALLOW(__NR_mremap),
207 #endif
208 #ifdef __NR_munmap
209 	SC_ALLOW(__NR_munmap),
210 #endif
211 #ifdef __NR_nanosleep
212 	SC_ALLOW(__NR_nanosleep),
213 #endif
214 #ifdef __NR__newselect
215 	SC_ALLOW(__NR__newselect),
216 #endif
217 #ifdef __NR_poll
218 	SC_ALLOW(__NR_poll),
219 #endif
220 #ifdef __NR_pselect6
221 	SC_ALLOW(__NR_pselect6),
222 #endif
223 #ifdef __NR_read
224 	SC_ALLOW(__NR_read),
225 #endif
226 #ifdef __NR_rt_sigprocmask
227 	SC_ALLOW(__NR_rt_sigprocmask),
228 #endif
229 #ifdef __NR_select
230 	SC_ALLOW(__NR_select),
231 #endif
232 #ifdef __NR_shutdown
233 	SC_ALLOW(__NR_shutdown),
234 #endif
235 #ifdef __NR_sigprocmask
236 	SC_ALLOW(__NR_sigprocmask),
237 #endif
238 #ifdef __NR_time
239 	SC_ALLOW(__NR_time),
240 #endif
241 #ifdef __NR_write
242 	SC_ALLOW(__NR_write),
243 #endif
244 #ifdef __NR_socketcall
245 	SC_ALLOW_ARG(__NR_socketcall, 0, SYS_SHUTDOWN),
246 	SC_DENY(__NR_socketcall, EACCES),
247 #endif
248 #if defined(__NR_ioctl) && defined(__s390__)
249 	/* Allow ioctls for ICA crypto card on s390 */
250 	SC_ALLOW_ARG(__NR_ioctl, 1, Z90STAT_STATUS_MASK),
251 	SC_ALLOW_ARG(__NR_ioctl, 1, ICARSAMODEXPO),
252 	SC_ALLOW_ARG(__NR_ioctl, 1, ICARSACRT),
253 	SC_ALLOW_ARG(__NR_ioctl, 1, ZSECSENDCPRB),
254 #endif
255 #if defined(__x86_64__) && defined(__ILP32__) && defined(__X32_SYSCALL_BIT)
256 	/*
257 	 * On Linux x32, the clock_gettime VDSO falls back to the
258 	 * x86-64 syscall under some circumstances, e.g.
259 	 * https://bugs.debian.org/849923
260 	 */
261 	SC_ALLOW(__NR_clock_gettime & ~__X32_SYSCALL_BIT),
262 #endif
263 
264 	/* Default deny */
265 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
266 };
267 
268 static const struct sock_fprog preauth_program = {
269 	.len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])),
270 	.filter = (struct sock_filter *)preauth_insns,
271 };
272 
273 struct ssh_sandbox {
274 	pid_t child_pid;
275 };
276 
277 struct ssh_sandbox *
ssh_sandbox_init(struct monitor * monitor)278 ssh_sandbox_init(struct monitor *monitor)
279 {
280 	struct ssh_sandbox *box;
281 
282 	/*
283 	 * Strictly, we don't need to maintain any state here but we need
284 	 * to return non-NULL to satisfy the API.
285 	 */
286 	debug3("%s: preparing seccomp filter sandbox", __func__);
287 	box = xcalloc(1, sizeof(*box));
288 	box->child_pid = 0;
289 
290 	return box;
291 }
292 
293 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
294 extern struct monitor *pmonitor;
295 void mm_log_handler(LogLevel level, const char *msg, void *ctx);
296 
297 static void
ssh_sandbox_violation(int signum,siginfo_t * info,void * void_context)298 ssh_sandbox_violation(int signum, siginfo_t *info, void *void_context)
299 {
300 	char msg[256];
301 
302 	snprintf(msg, sizeof(msg),
303 	    "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)",
304 	    __func__, info->si_arch, info->si_syscall, info->si_call_addr);
305 	mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor);
306 	_exit(1);
307 }
308 
309 static void
ssh_sandbox_child_debugging(void)310 ssh_sandbox_child_debugging(void)
311 {
312 	struct sigaction act;
313 	sigset_t mask;
314 
315 	debug3("%s: installing SIGSYS handler", __func__);
316 	memset(&act, 0, sizeof(act));
317 	sigemptyset(&mask);
318 	sigaddset(&mask, SIGSYS);
319 
320 	act.sa_sigaction = &ssh_sandbox_violation;
321 	act.sa_flags = SA_SIGINFO;
322 	if (sigaction(SIGSYS, &act, NULL) == -1)
323 		fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno));
324 	if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
325 		fatal("%s: sigprocmask(SIGSYS): %s",
326 		      __func__, strerror(errno));
327 }
328 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
329 
330 void
ssh_sandbox_child(struct ssh_sandbox * box)331 ssh_sandbox_child(struct ssh_sandbox *box)
332 {
333 	struct rlimit rl_zero;
334 	int nnp_failed = 0;
335 
336 	/* Set rlimits for completeness if possible. */
337 	rl_zero.rlim_cur = rl_zero.rlim_max = 0;
338 	if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1)
339 		fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s",
340 			__func__, strerror(errno));
341 	if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1)
342 		fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s",
343 			__func__, strerror(errno));
344 	if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1)
345 		fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s",
346 			__func__, strerror(errno));
347 
348 #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
349 	ssh_sandbox_child_debugging();
350 #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
351 
352 	debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__);
353 	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
354 		debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s",
355 		      __func__, strerror(errno));
356 		nnp_failed = 1;
357 	}
358 	debug3("%s: attaching seccomp filter program", __func__);
359 	if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1)
360 		debug("%s: prctl(PR_SET_SECCOMP): %s",
361 		      __func__, strerror(errno));
362 	else if (nnp_failed)
363 		fatal("%s: SECCOMP_MODE_FILTER activated but "
364 		    "PR_SET_NO_NEW_PRIVS failed", __func__);
365 }
366 
367 void
ssh_sandbox_parent_finish(struct ssh_sandbox * box)368 ssh_sandbox_parent_finish(struct ssh_sandbox *box)
369 {
370 	free(box);
371 	debug3("%s: finished", __func__);
372 }
373 
374 void
ssh_sandbox_parent_preauth(struct ssh_sandbox * box,pid_t child_pid)375 ssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid)
376 {
377 	box->child_pid = child_pid;
378 }
379 
380 #endif /* SANDBOX_SECCOMP_FILTER */
381