[PATCH 2/4] unshare: new applet
Bartosz Golaszewski
bartekgola at gmail.com
Fri Mar 11 13:07:53 UTC 2016
Add a fully featured unshare implementation implementing all arguments
supported in the upstream version.
Signed-off-by: Bartosz Golaszewski <bartekgola at gmail.com>
---
util-linux/unshare.c | 450 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 450 insertions(+)
create mode 100644 util-linux/unshare.c
diff --git a/util-linux/unshare.c b/util-linux/unshare.c
new file mode 100644
index 0000000..41260d8
--- /dev/null
+++ b/util-linux/unshare.c
@@ -0,0 +1,450 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * Mini unshare implementation for busybox.
+ *
+ * Copyright (C) 2016 by Bartosz Golaszewski <bartekgola at gmail.com>
+ *
+ * Licensed under GPLv2 or later, see file LICENSE in this source tree.
+ */
+
+//config:config UNSHARE
+//config: bool "unshare"
+//config: default y
+//config: select PLATFORM_LINUX
+//config: help
+//config: Run program with some namespaces unshared from parent.
+//config:
+//config:config FEATURE_UNSHARE_LONG_OPTS
+//config: bool "enable long options"
+//config: default y
+//config: depends on UNSHARE && LONG_OPTS
+//config: help
+//config: Support long options for the unshare applet. This makes
+//config: the busybox implementation more compatible with upstream.
+
+//applet:IF_UNSHARE(APPLET(unshare, BB_DIR_USR_BIN, BB_SUID_DROP))
+
+//kbuild:lib-$(CONFIG_UNSHARE) += unshare.o
+
+//usage:#define unshare_trivial_usage
+//usage: "[options] <program> [args...]"
+//usage:#if ENABLE_FEATURE_UNSHARE_LONG_OPTS
+//usage:#define unshare_full_usage "\n\n"
+//usage: "Options:"
+//usage: "\n -m, --mount[=<file>] unshare mounts namespace"
+//usage: "\n -u, --uts[=<file>] unshare UTS namespace (hostname etc.)"
+//usage: "\n -i, --ipc[=<file>] unshare System V IPC namespace"
+//usage: "\n -n, --network[=<file>] unshare network namespace"
+//usage: "\n -p, --pid[=<file>] unshare pid namespace"
+//usage: "\n -U, --user[=<file>] unshare user namespace"
+//usage: "\n -f, --fork fork before launching <program>"
+//usage: "\n -M, --mount-proc[=<dir>] mount proc filesystem first (implies --mount)"
+//usage: "\n -r, --map-root-user map current user to root (implies --user)"
+//usage: "\n -P, --propagation slave|shared|private|unchanged"
+//usage: "\n modify mount propagation in mount namespace"
+//usage: "\n -s, --setgroups allow|deny control the setgroups syscall in user namespaces"
+//usage:#else
+//usage:#define unshare_full_usage "\n\n"
+//usage: "Options:"
+//usage: "\n -m [<file>] unshare mounts namespace"
+//usage: "\n -u [<file>] unshare UTS namespace (hostname etc.)"
+//usage: "\n -i [<file>] unshare System V IPC namespace"
+//usage: "\n -n [<file>] unshare network namespace"
+//usage: "\n -p [<file>] unshare pid namespace"
+//usage: "\n -U [<file>] unshare user namespace"
+//usage: "\n -f fork before launching <program>"
+//usage: "\n -M [<dir>] mount proc filesystem first (implies -m)"
+//usage: "\n -r map current user to root (implies -u)"
+//usage: "\n -P slave|shared|private|unchanged"
+//usage: "\n modify mount propagation in mount namespace"
+//usage: "\n -s allow|deny ontrol the setgroups syscall in user namespaces"
+//usage:#endif
+
+#include "libbb.h"
+
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+
+/*
+ * Longest possible path to a procfs file used in unshare. Must be able to
+ * contain the '/proc/' string, the '/ns/user' string which is the longest
+ * namespace name and a 32-bit integer representing the process ID.
+ */
+#define PROC_PATH_MAX (sizeof("/proc//ns/user") + sizeof(pid_t) * 3)
+
+#define PATH_PROC_SETGROUPS "/proc/self/setgroups"
+#define PATH_PROC_UIDMAP "/proc/self/uid_map"
+#define PATH_PROC_GIDMAP "/proc/self/gid_map"
+
+enum {
+ OPT_mount = BIT( 0),
+ OPT_uts = BIT( 1),
+ OPT_ipc = BIT( 2),
+ OPT_network = BIT( 3),
+ OPT_pid = BIT( 4),
+ OPT_user = BIT( 5),
+ OPT_fork = BIT( 6),
+ OPT_mount_proc = BIT( 7),
+ OPT_map_root = BIT( 8),
+ OPT_propagation = BIT( 9),
+ OPT_setgroups = BIT(10),
+};
+
+struct namespace {
+ const int opt;
+ const int flag;
+ const char *nsfile;
+ char *path;
+};
+
+struct propagation_mode {
+ const char *name;
+ unsigned long flags;
+};
+
+/*
+ * Upstream unshare doesn't support short options for --mount-proc and
+ * --propagation, but let's add them here to let the user use them even with
+ * long options disabled in busybox config.
+ */
+static const char opt_str[] = "+m::u::i::n::p::U::fM::rP:s:";
+
+/*
+ * Upstream unshare only accepts optional arguments (namespace mountpoints)
+ * for long options. We support them for both short (for size reduction
+ * with LONG_OPTS disabled) and long opts (for upstream compatibility).
+ */
+#if ENABLE_FEATURE_UNSHARE_LONG_OPTS
+static const char unshare_longopts[] ALIGN1 =
+ "mount\0" Optional_argument "m"
+ "uts\0" Optional_argument "u"
+ "ipc\0" Optional_argument "i"
+ "network\0" Optional_argument "n"
+ "pid\0" Optional_argument "p"
+ "user\0" Optional_argument "U"
+ "fork\0" No_argument "f"
+ "mount-proc\0" Optional_argument "M"
+ "map-root-user\0" No_argument "r"
+ "propagation\0" Required_argument "P"
+ "setgroups\0" Required_argument "s";
+#endif
+
+static unsigned long parse_propagation(const char *prop_str)
+{
+ static const struct propagation_mode prop_modes[] = {
+ {
+ .name = "slave",
+ .flags = MS_REC | MS_SLAVE,
+ },
+ {
+ .name = "private",
+ .flags = MS_REC | MS_PRIVATE,
+ },
+ {
+ .name = "shared",
+ .flags = MS_REC | MS_SHARED,
+ },
+ {
+ .name = "unchanged",
+ .flags = 0,
+ }
+ };
+
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(prop_modes); i++) {
+ if (strcmp(prop_modes[i].name, prop_str) == 0)
+ return prop_modes[i].flags;
+ }
+
+ bb_error_msg_and_die("unsupported propagation mode: %s", prop_str);
+}
+
+static ino_t get_mnt_ns_inode_by_pid(pid_t pid)
+{
+ char path[PROC_PATH_MAX];
+ struct stat statbuf;
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid);
+ xstat(path, &statbuf);
+
+ return statbuf.st_ino;
+}
+
+static void mount_namespaces(pid_t pid,
+ struct namespace *ns_list, size_t num_ns)
+{
+ char nsf[PROC_PATH_MAX];
+ struct namespace *ns;
+ int i, status;
+
+ for (i = 0; i < num_ns; i++) {
+ ns = &ns_list[i];
+
+ if (!ns->path)
+ continue;
+
+ snprintf(nsf, sizeof(nsf), "/proc/%d/ns/%s", pid, ns->nsfile);
+
+ status = mount(nsf, ns->path, NULL, MS_BIND, NULL);
+ if (status < 0) {
+ bb_perror_msg_and_die("mount %s on %s failed",
+ nsf, ns->path);
+ }
+ }
+}
+
+static void mount_procfs(const char *target)
+{
+ int status;
+
+ status = mount("none", target, NULL, MS_PRIVATE | MS_REC, NULL);
+ if (status < 0)
+ goto mount_err;
+
+ status = mount("proc", target, "proc",
+ MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL);
+ if (status < 0)
+ goto mount_err;
+
+ return;
+
+mount_err:
+ bb_perror_msg_and_die("mount %s failed", target);
+}
+
+int unshare_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int unshare_main(int argc UNUSED_PARAM, char **argv)
+{
+ static struct namespace ns_list[] = {
+ {
+#define NS_MNT_POS 0
+ .opt = OPT_mount,
+ .flag = CLONE_NEWNS,
+ .nsfile = "mnt",
+ },
+ {
+#define NS_UTS_POS 1
+ .opt = OPT_uts,
+ .flag = CLONE_NEWUTS,
+ .nsfile = "uts",
+ },
+ {
+#define NS_IPC_POS 2
+ .opt = OPT_ipc,
+ .flag = CLONE_NEWIPC,
+ .nsfile = "ipc",
+ },
+ {
+#define NS_NET_POS 3
+ .opt = OPT_network,
+ .flag = CLONE_NEWNET,
+ .nsfile = "net",
+ },
+ {
+#define NS_PID_POS 4
+ .opt = OPT_pid,
+ .flag = CLONE_NEWPID,
+ .nsfile = "pid",
+ },
+ {
+#define NS_USR_POS 5
+ .opt = OPT_user,
+ .flag = CLONE_NEWUSER,
+ .nsfile = "user",
+ },
+ };
+
+ int unsflags = 0, i, need_mount = 0, status, setgrp_allow = 0;
+ const char *proc_mnt_target = "/proc", *prop_str, *setgrp_str;
+ unsigned long prop_flags = MS_REC | MS_PRIVATE;
+ uid_t reuid = geteuid();
+ gid_t regid = getegid();
+ unsigned int opts;
+ pid_t pid = -1;
+
+ IF_FEATURE_UNSHARE_LONG_OPTS(applet_long_options = unshare_longopts);
+
+ opts = getopt32(argv, opt_str,
+ &ns_list[NS_MNT_POS].path, &ns_list[NS_UTS_POS].path,
+ &ns_list[NS_IPC_POS].path, &ns_list[NS_NET_POS].path,
+ &ns_list[NS_PID_POS].path, &ns_list[NS_USR_POS].path,
+ &proc_mnt_target, &prop_str, &setgrp_str);
+ argv += optind;
+
+ /*
+ * Mounting the proc filesystem before running the program implies
+ * creating a new mount namespace since the /proc mount would
+ * otherwise mess up existing programs on the system.
+ */
+ if (opts & OPT_mount_proc)
+ opts |= OPT_mount;
+
+ /* Mapping user and group IDs to root implies --user. */
+ if (opts & OPT_map_root)
+ opts |= OPT_user;
+
+ if (opts & OPT_setgroups) {
+ if (strcmp(setgrp_str, "allow") == 0) {
+ setgrp_allow = 1;
+ } else if (strcmp(setgrp_str, "deny") == 0) {
+ setgrp_allow = 0;
+ } else {
+ bb_error_msg_and_die(
+ "unsupported --setgroups argument '%s'",
+ setgrp_str);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ns_list); i++) {
+ struct namespace *ns = &ns_list[i];
+
+ if (opts & ns->opt)
+ unsflags |= ns->flag;
+
+ if (ns->path)
+ need_mount = 1;
+ }
+
+ /* Silently ignore --propagation if --mount is not requested. */
+ if ((opts & OPT_propagation) && (opts & OPT_mount))
+ prop_flags = parse_propagation(prop_str);
+
+ /*
+ * Special case: if we were requested to unshare the mount namespace
+ * AND to make any namespace persistent (by bind mounting it) we need
+ * to spawn a child process which will wait for the parent to call
+ * unshare(), then mount parent's namespaces while still in the
+ * previous namespace.
+ */
+ if (need_mount && (opts & OPT_mount)) {
+ ino_t inop, inoc;
+ pid_t ppid;
+
+ /*
+ * Can't use getppid() in child, as we can be unsharing the
+ * pid namespace.
+ */
+ ppid = getpid();
+
+ /*
+ * Save current process' mount namespace file inode number. We
+ * will later use it in child process to check if it already
+ * changed meaning that this process already called unshare().
+ */
+ inop = get_mnt_ns_inode_by_pid(ppid);
+
+ pid = xfork();
+ if (pid == 0) {
+ /*
+ * Child - wait until parent calls unshare(). No issue
+ * in busy-waiting - by the time we get here from
+ * fork(), the parent has usually already unshared the
+ * mount namespace. We should spin a few times at most.
+ *
+ * XXX Should probably use a pipe to notify the child
+ * about completing unshare().
+ */
+ do {
+ inoc = get_mnt_ns_inode_by_pid(ppid);
+ } while (inoc == inop);
+
+ /* Mount parent's unshared namespaces. */
+ mount_namespaces(ppid, ns_list, ARRAY_SIZE(ns_list));
+
+ return EXIT_SUCCESS;
+ } /* Parent continues. */
+ }
+
+ status = unshare(unsflags);
+ if (status < 0)
+ bb_perror_msg_and_die("unshare failed");
+
+ if (need_mount) {
+ /* Wait for the child to finish mounting the namespaces. */
+ if (opts & OPT_mount) {
+ int exit_status;
+
+ status = safe_waitpid(pid, &exit_status, 0);
+ if (status < 0)
+ bb_perror_msg_and_die("waitpid");
+
+ if (WIFEXITED(exit_status) &&
+ WEXITSTATUS(exit_status) != EXIT_SUCCESS)
+ return WEXITSTATUS(status);
+ } else {
+ /*
+ * Regular way - we were requested to mount some other
+ * namespaces: mount them after the call to unshare().
+ */
+ mount_namespaces(getpid(), ns_list,
+ ARRAY_SIZE(ns_list));
+ }
+ }
+
+ /*
+ * When we're unsharing the pid namespace, it's not the process that
+ * calls unshare() that is put into the new namespace, but its first
+ * child. The user may want to use this option to spawn a new process
+ * that'll become PID 1 in this new namespace.
+ */
+ if (opts & OPT_fork) {
+ int exit_status;
+
+ pid = xfork();
+ if (pid > 0) {
+ status = safe_waitpid(pid, &exit_status, 0);
+ if (status < 0)
+ bb_perror_msg_and_die("waitpid");
+
+ if (WIFEXITED(exit_status))
+ return WEXITSTATUS(exit_status);
+ else if (WIFSIGNALED(exit_status))
+ kill(getpid(), WTERMSIG(exit_status));
+
+ bb_error_msg_and_die("child exit failed");
+ } /* Child continues. */
+ }
+
+ if (opts & OPT_map_root) {
+ char uidmap_buf[sizeof(unsigned int) * 3 + sizeof(" 0 1")];
+
+ if ((opts & OPT_setgroups) && setgrp_allow) {
+ bb_error_msg_and_die(
+ "options --setgroups=allow and --map-root-user are mutually exclusive");
+ }
+
+ /*
+ * Since Linux 3.19 unprivileged writing of /proc/self/gid_map
+ * has s been disabled unless /proc/self/setgroups is written
+ * first to permanently disable the ability to call setgroups
+ * in that user namespace.
+ */
+ xopen_xwrite_close(PATH_PROC_SETGROUPS, "deny");
+ snprintf(uidmap_buf, COMMON_BUFSIZE, "%u 0 1", reuid);
+ xopen_xwrite_close(PATH_PROC_UIDMAP, uidmap_buf);
+ snprintf(uidmap_buf, COMMON_BUFSIZE, "%u 0 1", regid);
+ xopen_xwrite_close(PATH_PROC_GIDMAP, uidmap_buf);
+ } else if (opts & OPT_setgroups) {
+ xopen_xwrite_close(PATH_PROC_SETGROUPS, setgrp_str);
+ }
+
+ if (opts & OPT_mount) {
+ status = mount("none", "/", NULL, prop_flags, NULL);
+ if (status < 0) {
+ bb_perror_msg_and_die(
+ "cannot change root filesystem propagation");
+ }
+ }
+
+ if (opts & OPT_mount_proc)
+ mount_procfs(proc_mnt_target);
+
+ if (*argv) {
+ execvp(*argv, argv);
+ bb_perror_msg_and_die("failed to execute %s", *argv);
+ }
+
+ run_shell(getenv("SHELL"), 0, NULL, NULL);
+}
--
2.1.4
More information about the busybox
mailing list