[OpenWrt-Devel] [PATCH/RFC procd 2/3] jail: add support for userns and cgroupsns
Daniel Golle
daniel at makrotopia.org
Fri Mar 20 14:20:51 EDT 2020
Add options to have jailed process inside new user namespace and
cgroups namespace.
Currently only the root user inside the container is mapped.
Also, mounting /proc currently still fails in the new user namespace
with permission denied for unknown reasons.
Signed-off-by: Daniel Golle <daniel at makrotopia.org>
---
jail/jail.c | 291 ++++++++++++++++++++++++++++++++++-----------
service/instance.c | 43 +++++--
service/instance.h | 4 +-
3 files changed, 255 insertions(+), 83 deletions(-)
diff --git a/jail/jail.c b/jail/jail.c
index df1bda7..78f0c6b 100644
--- a/jail/jail.c
+++ b/jail/jail.c
@@ -40,15 +40,7 @@
#include <libubus.h>
#define STACK_SIZE (1024 * 1024)
-#define OPT_ARGS "S:C:n:h:r:w:d:psulocU:G:NR:"
-
-#define NAMESPACE_MOUNT (1U << 0)
-#define NAMESPACE_IPC (1U << 1)
-#define NAMESPACE_NET (1U << 2)
-#define NAMESPACE_PID (1U << 3)
-#define NAMESPACE_USER (1U << 4)
-#define NAMESPACE_UTS (1U << 5)
-#define NAMESPACE_CGROUP (1U << 6)
+#define OPT_ARGS "S:C:n:h:r:w:d:psulocU:G:NR:fF"
static struct {
char *name;
@@ -64,8 +56,12 @@ static struct {
int procfs;
int ronly;
int sysfs;
+ int pw_uid;
+ int pw_gid;
+ int gr_gid;
} opts;
+
extern int pivot_root(const char *new_root, const char *put_old);
int debug = 0;
@@ -166,7 +162,7 @@ static int build_jail_fs(void)
}
if (opts.extroot) {
- if (mount(opts.extroot, jail_root, NULL, MS_BIND | MS_REC, NULL)) {
+ if (mount(opts.extroot, jail_root, NULL, MS_BIND, NULL)) {
ERROR("extroot mount failed %m\n");
return -1;
}
@@ -187,7 +183,7 @@ static int build_jail_fs(void)
return -1;
}
- if (opts.namespace & NAMESPACE_NET) {
+ if (opts.namespace & CLONE_NEWNET) {
char hostdir[PATH_MAX], jailetc[PATH_MAX], jaillink[PATH_MAX];
snprintf(hostdir, PATH_MAX, "/tmp/resolv.conf-%s.d", opts.name);
@@ -231,6 +227,99 @@ static int build_jail_fs(void)
return 0;
}
+static int write_uid_gid_map(pid_t child_pid, bool gidmap, int id)
+{
+ int map_file;
+ char map_path[64];
+ const char *map_format = "%d %d %d\n";
+ if (snprintf(map_path, sizeof(map_path), "/proc/%d/%s",
+ child_pid, gidmap?"gid_map":"uid_map") < 0)
+ return -1;
+
+ if ((map_file = open(map_path, O_WRONLY)) == -1)
+ return -1;
+
+ if (dprintf(map_file, map_format, 0, id, 1) == -1) {
+ close(map_file);
+ return -1;
+ }
+
+ close(map_file);
+ return 0;
+}
+
+static int write_setgroups(pid_t child_pid, bool allow)
+{
+ int setgroups_file;
+ char setgroups_path[64];
+
+ if (snprintf(setgroups_path, sizeof(setgroups_path), "/proc/%d/setgroups",
+ child_pid) < 0) {
+ return -1;
+ }
+
+ if ((setgroups_file = open(setgroups_path, O_WRONLY)) == -1) {
+ return -1;
+ }
+
+ if (dprintf(setgroups_file, allow?"allow":"deny") == -1) {
+ close(setgroups_file);
+ return -1;
+ }
+
+ close(setgroups_file);
+ return 0;
+}
+
+static void get_jail_user(int *user, int *user_gid, int *gr_gid)
+{
+ struct passwd *p = NULL;
+ struct group *g = NULL;
+
+ if (opts.user) {
+ p = getpwnam(opts.user);
+ if (!p) {
+ ERROR("failed to get uid/gid for user %s: %d (%s)\n",
+ opts.user, errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ *user = p->pw_uid;
+ *user_gid = p->pw_gid;
+ } else {
+ *user = -1;
+ *user_gid = -1;
+ }
+
+ if (opts.group) {
+ g = getgrnam(opts.group);
+ if (!g) {
+ ERROR("failed to get gid for group %s: %m\n", opts.group);
+ exit(EXIT_FAILURE);
+ }
+ *gr_gid = g->gr_gid;
+ } else {
+ *gr_gid = -1;
+ }
+};
+
+static void set_jail_user(int pw_uid, int user_gid, int gr_gid)
+{
+ if ((user_gid != -1) && initgroups(opts.user, user_gid)) {
+ ERROR("failed to initgroups() for user %s: %m\n", opts.user);
+ exit(EXIT_FAILURE);
+ }
+
+ if ((gr_gid != -1) && setregid(gr_gid, gr_gid)) {
+ ERROR("failed to set group id %d: %m\n", gr_gid);
+ exit(EXIT_FAILURE);
+ }
+
+ if ((pw_uid != -1) && setreuid(pw_uid, pw_uid)) {
+ ERROR("failed to set user id %d: %m\n", pw_uid);
+ exit(EXIT_FAILURE);
+ }
+}
+
#define MAX_ENVP 8
static char** build_envp(const char *seccomp)
{
@@ -253,8 +342,7 @@ static char** build_envp(const char *seccomp)
envp[count++] = preload_var;
}
- if (is_extroot)
- envp[count++] = container_var;
+ envp[count++] = container_var;
if (debug > 1)
envp[count++] = debug_var;
@@ -292,62 +380,71 @@ ujail will not use namespace/build a jail,\n\
and will only drop capabilities/apply seccomp filter.\n\n");
}
-static int exec_jail(void *_notused)
+static int exec_jail(void *pipes_ptr)
{
- struct passwd *p = NULL;
- struct group *g = NULL;
+ int *pipes = (int*)pipes_ptr;
+ char buf[1];
+ int pw_uid, pw_gid, gr_gid;
- if (opts.capabilities && drop_capabilities(opts.capabilities))
- exit(EXIT_FAILURE);
+ close(pipes[0]);
+ close(pipes[3]);
- if (opts.no_new_privs && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
- ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
+
+ buf[0] = 'i';
+ if (write(pipes[1], buf, 1) < 1) {
+ ERROR("can't write to parent\n");
exit(EXIT_FAILURE);
}
-
- if (opts.namespace && opts.hostname && strlen(opts.hostname) > 0
- && sethostname(opts.hostname, strlen(opts.hostname))) {
- ERROR("sethostname(%s) failed: %m\n", opts.hostname);
+ if (read(pipes[2], buf, 1) < 1) {
+ ERROR("can't read from parent\n");
exit(EXIT_FAILURE);
}
-
- if (opts.namespace && build_jail_fs()) {
- ERROR("failed to build jail fs\n");
+ if (buf[0] != 'O') {
+ ERROR("parent had an error, child exiting\n");
exit(EXIT_FAILURE);
}
- if (opts.user) {
- p = getpwnam(opts.user);
- if (!p) {
- ERROR("failed to get uid/gid for user %s: %d (%s)\n",
- opts.user, errno, strerror(errno));
+ close(pipes[1]);
+ close(pipes[2]);
+
+ if (opts.namespace & CLONE_NEWUSER) {
+ if (setgid(0) < 0) {
+ ERROR("setgid\n");
exit(EXIT_FAILURE);
}
- }
-
- if (opts.group) {
- g = getgrnam(opts.group);
- if (!g) {
- ERROR("failed to get gid for group %s: %m\n", opts.group);
+ if (setuid(0) < 0) {
+ ERROR("setuid\n");
exit(EXIT_FAILURE);
}
+// if (setgroups(0, NULL) < 0) {
+// ERROR("setgroups\n");
+// exit(EXIT_FAILURE);
+// }
}
- if (p && p->pw_gid && initgroups(opts.user, p->pw_gid)) {
- ERROR("failed to initgroups() for user %s: %m\n", opts.user);
+ if (opts.namespace && opts.hostname && strlen(opts.hostname) > 0
+ && sethostname(opts.hostname, strlen(opts.hostname))) {
+ ERROR("sethostname(%s) failed: %m\n", opts.hostname);
exit(EXIT_FAILURE);
}
- if (g && g->gr_gid && setgid(g->gr_gid)) {
- ERROR("failed to set group id %d: %m\n", g?g->gr_gid:p->pw_gid);
+ if (opts.namespace && build_jail_fs()) {
+ ERROR("failed to build jail fs\n");
exit(EXIT_FAILURE);
}
- if (p && p->pw_uid && setuid(p->pw_uid)) {
- ERROR("failed to set user id %d: %m\n", p->pw_uid);
+ if (opts.capabilities && drop_capabilities(opts.capabilities))
+ exit(EXIT_FAILURE);
+
+ if (opts.no_new_privs && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
exit(EXIT_FAILURE);
}
+ if (!(opts.namespace & CLONE_NEWUSER)) {
+ get_jail_user(&pw_uid, &pw_gid, &gr_gid);
+ set_jail_user(pw_uid, pw_gid, gr_gid);
+ }
char **envp = build_envp(opts.seccomp);
if (!envp)
@@ -398,12 +495,20 @@ static void jail_handle_signal(int signo)
kill(jail_process.pid, signo);
}
-static void netns_updown(bool start)
+static int netns_open_pid(const pid_t target_ns)
+{
+ char pid_net_path[PATH_MAX];
+
+ snprintf(pid_net_path, sizeof(pid_net_path), "/proc/%u/ns/net", target_ns);
+
+ return open(pid_net_path, O_RDONLY);
+}
+
+static void netns_updown(pid_t pid, bool start)
{
struct ubus_context *ctx = ubus_connect(NULL);
static struct blob_buf req;
uint32_t id;
- pid_t pid = getpid();
if (!ctx)
return;
@@ -428,6 +533,9 @@ int main(int argc, char **argv)
char log[] = "/dev/log";
char ubus[] = "/var/run/ubus.sock";
int ch, i;
+ int pipes[4];
+ char sig_buf[1];
+ int netns_fd;
if (uid) {
ERROR("not root, aborting: %m\n");
@@ -444,19 +552,24 @@ int main(int argc, char **argv)
debug = atoi(optarg);
break;
case 'p':
- opts.namespace |= NAMESPACE_MOUNT;
+ opts.namespace |= CLONE_NEWNS;
opts.procfs = 1;
break;
case 'o':
- opts.namespace |= NAMESPACE_MOUNT;
+ opts.namespace |= CLONE_NEWNS;
opts.ronly = 1;
break;
+ case 'f':
+ opts.namespace |= CLONE_NEWUSER;
+ break;
+ case 'F':
+ opts.namespace |= CLONE_NEWCGROUP;
+ break;
case 'R':
- opts.namespace |= NAMESPACE_MOUNT | NAMESPACE_UTS;
opts.extroot = optarg;
break;
case 's':
- opts.namespace |= NAMESPACE_MOUNT;
+ opts.namespace |= CLONE_NEWNS;
opts.sysfs = 1;
break;
case 'S':
@@ -473,25 +586,26 @@ int main(int argc, char **argv)
opts.name = optarg;
break;
case 'N':
- opts.namespace |= NAMESPACE_NET;
+ opts.namespace |= CLONE_NEWNET;
break;
case 'h':
+ opts.namespace |= CLONE_NEWUTS;
opts.hostname = optarg;
break;
case 'r':
- opts.namespace |= NAMESPACE_MOUNT;
+ opts.namespace |= CLONE_NEWNS;
add_path_and_deps(optarg, 1, 0, 0);
break;
case 'w':
- opts.namespace |= NAMESPACE_MOUNT;
+ opts.namespace |= CLONE_NEWNS;
add_path_and_deps(optarg, 0, 0, 0);
break;
case 'u':
- opts.namespace |= NAMESPACE_MOUNT;
+ opts.namespace |= CLONE_NEWNS;
add_mount(ubus, 0, -1);
break;
case 'l':
- opts.namespace |= NAMESPACE_MOUNT;
+ opts.namespace |= CLONE_NEWNS;
add_mount(log, 0, -1);
break;
case 'U':
@@ -503,6 +617,9 @@ int main(int argc, char **argv)
}
}
+ if (opts.namespace)
+ opts.namespace |= CLONE_NEWIPC | CLONE_NEWPID;
+
/* no <binary> param found */
if (argc - optind < 1) {
usage();
@@ -513,13 +630,15 @@ int main(int argc, char **argv)
usage();
return EXIT_FAILURE;
}
- DEBUG("Using namespaces(%d), capabilities(%d), seccomp(%d)\n",
+ DEBUG("Using namespaces(0x%08x), capabilities(%d), seccomp(%d)\n",
opts.namespace,
opts.capabilities != 0,
opts.seccomp != 0);
opts.jail_argv = &argv[optind];
+ get_jail_user(&opts.pw_uid, &opts.pw_gid, &opts.gr_gid);
+
if (!opts.extroot) {
if (opts.namespace && add_path_and_deps(*opts.jail_argv, 1, -1, 0)) {
ERROR("failed to load dependencies\n");
@@ -551,10 +670,7 @@ int main(int argc, char **argv)
}
if (opts.namespace) {
- int flags = SIGCHLD | CLONE_NEWPID | CLONE_NEWIPC;
-
- if (opts.namespace & NAMESPACE_MOUNT) {
- flags |= CLONE_NEWNS;
+ if (opts.namespace & CLONE_NEWNS) {
add_mount("/dev/full", 0, -1);
add_mount("/dev/null", 0, -1);
add_mount("/dev/random", 0, -1);
@@ -563,7 +679,7 @@ int main(int argc, char **argv)
add_mount("/dev/zero", 0, -1);
add_mount("/dev/console", 0, -1);
- if (opts.user || opts.group) {
+ if (!opts.extroot && (opts.user || opts.group)) {
add_mount("/etc/passwd", 0, -1);
add_mount("/etc/group", 0, -1);
}
@@ -573,21 +689,50 @@ int main(int argc, char **argv)
}
}
- if (opts.hostname)
- flags |= CLONE_NEWUTS;
-
- if (opts.namespace & NAMESPACE_NET) {
- unshare(CLONE_NEWNET);
- netns_updown(true);
- };
+ if (pipe(&pipes[0]) < 0 || pipe(&pipes[2]) < 0)
+ return -1;
- jail_process.pid = clone(exec_jail, child_stack + STACK_SIZE, flags, NULL);
+ jail_process.pid = clone(exec_jail, child_stack + STACK_SIZE, SIGCHLD | opts.namespace, &pipes);
} else {
jail_process.pid = fork();
}
if (jail_process.pid > 0) {
+ seteuid(0);
/* parent process */
+ close(pipes[1]);
+ close(pipes[2]);
+ if (read(pipes[0], sig_buf, 1) < 1) {
+ ERROR("can't read from child\n");
+ return -1;
+ }
+ close(pipes[0]);
+ if (opts.namespace & CLONE_NEWUSER) {
+ bool has_gr = (opts.gr_gid != -1);
+ if (write_setgroups(jail_process.pid, false)) {
+ ERROR("can't write setgroups\n");
+ return -1;
+ }
+ if (opts.pw_uid != -1) {
+ write_uid_gid_map(jail_process.pid, 0, opts.pw_uid);
+ write_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:opts.pw_gid);
+ } else {
+ write_uid_gid_map(jail_process.pid, 0, 65534);
+ write_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:65534);
+ }
+ }
+
+ if (opts.namespace & CLONE_NEWNET) {
+ netns_fd = netns_open_pid(jail_process.pid);
+ netns_updown(jail_process.pid, true);
+ }
+
+ sig_buf[0] = 'O';
+ if (write(pipes[3], sig_buf, 1) < 0) {
+ ERROR("can't write to child\n");
+ return -1;
+ }
+ close(pipes[3]);
uloop_process_add(&jail_process);
uloop_run();
if (jail_running) {
@@ -597,9 +742,11 @@ int main(int argc, char **argv)
uloop_run();
}
uloop_done();
- if (opts.namespace & NAMESPACE_NET)
- netns_updown(false);
-
+ if (opts.namespace & CLONE_NEWNET) {
+ setns(netns_fd, CLONE_NEWNET);
+ netns_updown(getpid(), false);
+ close(netns_fd);
+ }
return jail_return_code;
} else if (jail_process.pid == 0) {
/* fork child process */
diff --git a/service/instance.c b/service/instance.c
index 47b7d09..d29aa0d 100644
--- a/service/instance.c
+++ b/service/instance.c
@@ -62,6 +62,7 @@ enum {
INSTANCE_ATTR_RELOADSIG,
INSTANCE_ATTR_TERMTIMEOUT,
INSTANCE_ATTR_FACILITY,
+ INSTANCE_ATTR_EXTROOT,
__INSTANCE_ATTR_MAX
};
@@ -89,6 +90,7 @@ static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = {
[INSTANCE_ATTR_RELOADSIG] = { "reload_signal", BLOBMSG_TYPE_INT32 },
[INSTANCE_ATTR_TERMTIMEOUT] = { "term_timeout", BLOBMSG_TYPE_INT32 },
[INSTANCE_ATTR_FACILITY] = { "facility", BLOBMSG_TYPE_STRING },
+ [INSTANCE_ATTR_EXTROOT] = { "extroot", BLOBMSG_TYPE_STRING },
};
enum {
@@ -101,8 +103,9 @@ enum {
JAIL_ATTR_RONLY,
JAIL_ATTR_MOUNT,
JAIL_ATTR_NETNS,
+ JAIL_ATTR_USERNS,
+ JAIL_ATTR_CGROUPSNS,
JAIL_ATTR_REQUIREJAIL,
- JAIL_ATTR_EXTROOT,
__JAIL_ATTR_MAX,
};
@@ -116,8 +119,9 @@ static const struct blobmsg_policy jail_attr[__JAIL_ATTR_MAX] = {
[JAIL_ATTR_RONLY] = { "ronly", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_MOUNT] = { "mount", BLOBMSG_TYPE_TABLE },
[JAIL_ATTR_NETNS] = { "netns", BLOBMSG_TYPE_BOOL },
+ [JAIL_ATTR_USERNS] = { "userns", BLOBMSG_TYPE_BOOL },
+ [JAIL_ATTR_CGROUPSNS] = { "cgroupsns", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_REQUIREJAIL] = { "requirejail", BLOBMSG_TYPE_BOOL },
- [JAIL_ATTR_EXTROOT] = { "extroot", BLOBMSG_TYPE_STRING },
};
struct instance_netdev {
@@ -260,9 +264,15 @@ jail_run(struct service_instance *in, char **argv)
if (jail->netns)
argv[argc++] = "-N";
- if (jail->extroot) {
+ if (jail->userns)
+ argv[argc++] = "-f";
+
+ if (jail->cgroupsns)
+ argv[argc++] = "-F";
+
+ if (in->extroot) {
argv[argc++] = "-R";
- argv[argc++] = jail->extroot;
+ argv[argc++] = in->extroot;
}
blobmsg_list_for_each(&jail->mount, var) {
@@ -870,9 +880,13 @@ instance_jail_parse(struct service_instance *in, struct blob_attr *attr)
jail->netns = blobmsg_get_bool(tb[JAIL_ATTR_NETNS]);
jail->argc++;
}
- if (tb[JAIL_ATTR_EXTROOT]) {
- jail->extroot = strdup(blobmsg_get_string(tb[JAIL_ATTR_EXTROOT]));
- jail->argc += 2;
+ if (tb[JAIL_ATTR_USERNS]) {
+ jail->userns = blobmsg_get_bool(tb[JAIL_ATTR_USERNS]);
+ jail->argc++;
+ }
+ if (tb[JAIL_ATTR_CGROUPSNS]) {
+ jail->cgroupsns = blobmsg_get_bool(tb[JAIL_ATTR_CGROUPSNS]);
+ jail->argc++;
}
if (tb[JAIL_ATTR_MOUNT]) {
@@ -892,6 +906,10 @@ instance_jail_parse(struct service_instance *in, struct blob_attr *attr)
if (in->group)
jail->argc += 2;
+ if (in->extroot) {
+ jail->argc += 2;
+ }
+
if (in->no_new_privs)
jail->argc++;
@@ -1003,6 +1021,9 @@ instance_config_parse(struct service_instance *in)
if (!in->trace && tb[INSTANCE_ATTR_SECCOMP])
in->seccomp = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_SECCOMP]));
+ if (tb[INSTANCE_ATTR_EXTROOT])
+ in->extroot = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_EXTROOT]));
+
if (tb[INSTANCE_ATTR_PIDFILE]) {
char *pidfile = blobmsg_get_string(tb[INSTANCE_ATTR_PIDFILE]);
if (pidfile)
@@ -1151,7 +1172,7 @@ instance_free(struct service_instance *in)
free(in->config);
free(in->user);
free(in->group);
- free(in->jail.extroot);
+ free(in->extroot);
free(in->jail.name);
free(in->jail.hostname);
free(in->seccomp);
@@ -1275,14 +1296,16 @@ void instance_dump(struct blob_buf *b, struct service_instance *in, int verbose)
blobmsg_add_string(b, "name", in->jail.name);
if (in->jail.hostname)
blobmsg_add_string(b, "hostname", in->jail.hostname);
- if (in->jail.extroot)
- blobmsg_add_string(b, "extroot", in->jail.extroot);
+ if (in->extroot)
+ blobmsg_add_string(b, "extroot", in->extroot);
blobmsg_add_u8(b, "procfs", in->jail.procfs);
blobmsg_add_u8(b, "sysfs", in->jail.sysfs);
blobmsg_add_u8(b, "ubus", in->jail.ubus);
blobmsg_add_u8(b, "log", in->jail.log);
blobmsg_add_u8(b, "ronly", in->jail.ronly);
blobmsg_add_u8(b, "netns", in->jail.netns);
+ blobmsg_add_u8(b, "userns", in->jail.userns);
+ blobmsg_add_u8(b, "cgroupsns", in->jail.cgroupsns);
blobmsg_close_table(b, r);
if (!avl_is_empty(&in->jail.mount.avl)) {
struct blobmsg_list_node *var;
diff --git a/service/instance.h b/service/instance.h
index 29406c9..7e80c61 100644
--- a/service/instance.h
+++ b/service/instance.h
@@ -30,9 +30,10 @@ struct jail {
bool log;
bool ronly;
bool netns;
+ bool userns;
+ bool cgroupsns;
char *name;
char *hostname;
- char *extroot;
struct blobmsg_list mount;
int argc;
};
@@ -65,6 +66,7 @@ struct service_instance {
struct jail jail;
char *seccomp;
char *pidfile;
+ char *extroot;
int syslog_facility;
int exit_code;
--
2.25.1
_______________________________________________
openwrt-devel mailing list
openwrt-devel at lists.openwrt.org
https://lists.openwrt.org/mailman/listinfo/openwrt-devel
More information about the openwrt-devel
mailing list