713 lines
49 KiB
HTML
713 lines
49 KiB
HTML
|
<!DOCTYPE html>
|
||
|
<html lang="en"><head>
|
||
|
<meta http-equiv="content-type" content="text/html; charset=UTF-8"><script type="text/javascript" async="" src="Creating%20containers%20-%20Part%201_files/ga.js"></script><script src="Creating%20containers%20-%20Part%201_files/analytics.js" type="text/javascript"></script>
|
||
|
<script type="text/javascript">window.addEventListener('DOMContentLoaded',function(){var v=archive_analytics.values;v.service='wb';v.server_name='wwwb-app102.us.archive.org';v.server_ms=1019;archive_analytics.send_pageview({});});</script><script type="text/javascript" src="Creating%20containers%20-%20Part%201_files/playback.js" charset="utf-8"></script>
|
||
|
<script type="text/javascript" src="Creating%20containers%20-%20Part%201_files/wombat.js" charset="utf-8"></script>
|
||
|
<script type="text/javascript">
|
||
|
if (window._WBWombatInit) {
|
||
|
wbinfo = {}
|
||
|
wbinfo.url = "http://crosbymichael.com:80/creating-containers-part-1.html";
|
||
|
wbinfo.timestamp = "20191223021405";
|
||
|
wbinfo.request_ts = "20191223021405";
|
||
|
wbinfo.prefix = "http://web.archive.org/web/";
|
||
|
wbinfo.mod = "if_";
|
||
|
wbinfo.is_framed = false;
|
||
|
wbinfo.is_live = false;
|
||
|
wbinfo.coll = "web";
|
||
|
wbinfo.proxy_magic = "";
|
||
|
wbinfo.static_prefix = "/_static/";
|
||
|
wbinfo.enable_auto_fetch = true;
|
||
|
wbinfo.auto_fetch_worker_prefix = "http://web.archive.org/web/";
|
||
|
wbinfo.wombat_ts = "20191223021405";
|
||
|
wbinfo.wombat_sec = "1577067245";
|
||
|
wbinfo.wombat_scheme = "https";
|
||
|
wbinfo.wombat_host = "crosbymichael.com:80";
|
||
|
wbinfo.ignore_prefixes = ["/__wb/",
|
||
|
"/_static/",
|
||
|
"/web/",
|
||
|
"http://analytics.archive.org/",
|
||
|
"https://analytics.archive.org/",
|
||
|
"//analytics.archive.org/",
|
||
|
"http://archive.org/",
|
||
|
"https://archive.org/",
|
||
|
"//archive.org/",
|
||
|
"http://faq.web.archive.org/",
|
||
|
"http://web.archive.org/",
|
||
|
"https://web.archive.org/"
|
||
|
];
|
||
|
wbinfo.wombat_opts = {};
|
||
|
window._WBWombatInit(wbinfo);
|
||
|
}
|
||
|
__wm.init("http://web.archive.org/web");
|
||
|
</script>
|
||
|
<link rel="stylesheet" type="text/css" href="Creating%20containers%20-%20Part%201_files/banner-styles.css">
|
||
|
<link rel="stylesheet" type="text/css" href="Creating%20containers%20-%20Part%201_files/iconochive.css">
|
||
|
<!-- End Wayback Rewrite JS Include -->
|
||
|
|
||
|
<meta charset="utf-8">
|
||
|
<title>Creating containers - Part 1</title>
|
||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
|
<meta name="description" content="">
|
||
|
<meta name="author" content="">
|
||
|
<link rel="stylesheet" href="Creating%20containers%20-%20Part%201_files/main.css" type="text/css">
|
||
|
<link href="Creating%20containers%20-%20Part%201_files/css.css" rel="stylesheet" type="text/css">
|
||
|
<link id="elemento-theme" href="Creating%20containers%20-%20Part%201_files/bootstrap.css" rel="stylesheet">
|
||
|
|
||
|
<link href="http://web.archive.org/web/20191223021405/http://crosbymichael.com/feeds/all.atom.xml" type="application/atom+xml" rel="alternate" title="Michael Crosby ATOM Feed">
|
||
|
|
||
|
|
||
|
<script src="Creating%20containers%20-%20Part%201_files/jquery_002.js"></script>
|
||
|
|
||
|
<script>
|
||
|
$(document).ready(function() {
|
||
|
$('#walter').on('click', function(e) {
|
||
|
e.stopPropagation();
|
||
|
window.location = 'http://web.archive.org/web/20191223021405/http://docker.io';
|
||
|
});
|
||
|
$('#scene').parallax();
|
||
|
});
|
||
|
</script>
|
||
|
|
||
|
<style type="text/css">
|
||
|
body {
|
||
|
padding-top: 20px;
|
||
|
padding-bottom: 40px;
|
||
|
}
|
||
|
|
||
|
/* Custom container */
|
||
|
.container-narrow {
|
||
|
margin: 0 auto;
|
||
|
max-width: 700px;
|
||
|
}
|
||
|
.container-narrow > hr {
|
||
|
margin: 30px 0;
|
||
|
}
|
||
|
|
||
|
/* Supporting marketing content */
|
||
|
.marketing {
|
||
|
margin: 60px 0;
|
||
|
}
|
||
|
.marketing p + h4 {
|
||
|
margin-top: 28px;
|
||
|
}
|
||
|
#scene {
|
||
|
position:absolute;
|
||
|
top: 0;
|
||
|
right: 0;
|
||
|
}
|
||
|
</style>
|
||
|
|
||
|
<script type="text/javascript">
|
||
|
var _gaq = _gaq || [];
|
||
|
_gaq.push(['_setAccount', 'UA-21167181-1']);
|
||
|
_gaq.push(['_trackPageview']);
|
||
|
|
||
|
(function() {
|
||
|
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
||
|
ga.src = ('https:' == document.location.protocol ? 'http://web.archive.org/web/20191223021405/https://ssl' : 'http://web.archive.org/web/20191223021405/http://www') + '.google-analytics.com/ga.js';
|
||
|
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
||
|
})();
|
||
|
</script>
|
||
|
<link href="Creating%20containers%20-%20Part%201_files/bootstrap-responsive.css" rel="stylesheet">
|
||
|
|
||
|
<!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
|
||
|
<!--[if lt IE 9]>
|
||
|
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
|
||
|
<![endif]-->
|
||
|
|
||
|
<!--[if IE]>
|
||
|
<script src="http://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
|
||
|
<script src="http://code.onion.com/fartscroll.js"></script>
|
||
|
<script type="text/javascript">
|
||
|
fartscroll(50);
|
||
|
</script>
|
||
|
<![endif]-->
|
||
|
|
||
|
<script type="text/javascript" async="" src="Creating%20containers%20-%20Part%201_files/embed.html"></script></head>
|
||
|
<body><!-- BEGIN WAYBACK TOOLBAR INSERT -->
|
||
|
<style type="text/css">
|
||
|
body {
|
||
|
margin-top:0 !important;
|
||
|
padding-top:0 !important;
|
||
|
/*min-width:800px !important;*/
|
||
|
}
|
||
|
</style>
|
||
|
<div id="wm-ipp-base" style="display: block; direction: ltr;" lang="en">
|
||
|
</div><div id="donato" style="position:relative;width:100%;">
|
||
|
<div id="donato-base">
|
||
|
<iframe id="donato-if" src="Creating%20containers%20-%20Part%201_files/donate.html" scrolling="no" style="width:100%; height:100%" frameborder="0">
|
||
|
</iframe>
|
||
|
</div>
|
||
|
</div><script type="text/javascript">
|
||
|
__wm.bt(625,27,25,2,"web","http://crosbymichael.com/creating-containers-part-1.html","20191223021405",1996,"/_static/",["/_static/css/banner-styles.css?v=HyR5oymJ","/_static/css/iconochive.css?v=qtvMKcIJ"]);
|
||
|
</script>
|
||
|
<!-- END WAYBACK TOOLBAR INSERT -->
|
||
|
<div class="container-narrow">
|
||
|
|
||
|
<div class="masthead">
|
||
|
<ul class="nav nav-pills pull-right">
|
||
|
|
||
|
<li><a href="http://web.archive.org/web/20191223021405/http://crosbymichael.com/feeds/all.atom.xml" rel="alternate">atom feed</a></li>
|
||
|
|
||
|
|
||
|
<li><a href="http://web.archive.org/web/20191223021405/http://github.com/crosbymichael">github</a></li>
|
||
|
</ul>
|
||
|
<h3 class="muted"><a href="http://web.archive.org/web/20191223021405/http://crosbymichael.com/index.html">Michael Crosby</a></h3>
|
||
|
</div> <ul id="scene" style="transform: translate3d(0px, 0px, 0px); transform-style: preserve-3d; backface-visibility: hidden;">
|
||
|
<li class="layer" data-depth="1.0" style="position: relative; display: block; height: 100%; width: 100%; left: 0px; top: 0px; transform: translate3d(-4.55115%, 2.10234%, 0px); transform-style: preserve-3d; backface-visibility: hidden;">
|
||
|
<img src="Creating%20containers%20-%20Part%201_files/docker-logo.png" id="walter">
|
||
|
</li>
|
||
|
</ul>
|
||
|
|
||
|
<hr>
|
||
|
|
||
|
<ul class="nav nav-pills pull-left">
|
||
|
<li><a href="http://web.archive.org/web/20191223021405/http://crosbymichael.com/category/dev.html">dev</a></li>
|
||
|
<li class="active"><a href="http://web.archive.org/web/20191223021405/http://crosbymichael.com/category/docker.html">docker</a></li>
|
||
|
<li><a href="http://web.archive.org/web/20191223021405/http://crosbymichael.com/category/go.html">go</a></li>
|
||
|
<li><a href="http://web.archive.org/web/20191223021405/http://crosbymichael.com/category/productivity.html">productivity</a></li>
|
||
|
|
||
|
<hr>
|
||
|
</ul>
|
||
|
|
||
|
<div class="row-fluid marketing">
|
||
|
<div class="span12">
|
||
|
<section id="content" class="body">
|
||
|
<article>
|
||
|
<header> <h1 class="entry-title"><a href="http://web.archive.org/web/20191223021405/http://crosbymichael.com/creating-containers-part-1.html" rel="bookmark" title="Permalink to Creating containers - Part 1">Creating containers - Part 1</a></h1> </header>
|
||
|
<div class="entry-content">
|
||
|
<footer class="post-info">
|
||
|
<abbr class="published" title="2014-11-16T00:00:00+00:00">
|
||
|
Sun 16 November 2014
|
||
|
</abbr>
|
||
|
|
||
|
<address class="vcard author">
|
||
|
By <a class="url fn" href="http://web.archive.org/web/20191223021405/http://crosbymichael.com/author/Michael%20Crosby.html">Michael Crosby</a>
|
||
|
</address>
|
||
|
<p>In <a href="http://web.archive.org/web/20191223021405/http://crosbymichael.com/category/docker.html">docker</a>. </p>
|
||
|
<p></p></footer><!-- /.post-info --><!-- /.post-info -->
|
||
|
<p>This is part one of a series of blog posts detailing how docker creates containers.
|
||
|
We will dig deep into the various pieces that are stitched together to see what
|
||
|
it takes to make <code>docker run ...</code> awesome.</p>
|
||
|
<h2>First, what is a container?</h2>
|
||
|
<p>I think the various pieces of technology that goes into creating a container is fairly
|
||
|
commonplace. You should have seen a few cool looking charts in various presentations
|
||
|
about Docker where you get a quick "Docker uses namespaces, cgroups, chroot, etc."
|
||
|
to create containers. But why does it take all these pieces to create a contaienr?<br>
|
||
|
Why is it not a simple syscall and it's all done for me?
|
||
|
The fact is that container's don't exist, they are made up. There is no such
|
||
|
thing as a "linux container" in the kernel. A container is a userland concept.</p>
|
||
|
<h2>Namespaces</h2>
|
||
|
<p>In part one I'll talk about how to create Linux namespaces in the context of how they
|
||
|
are used within docker. In later posts we will look into how namespaces are combined
|
||
|
with other features like cgroups and an isolated filesystem to create something useful.</p>
|
||
|
<p>First off we need a high level explanation of what a namespace does and why it's useful.
|
||
|
Basically, a namespace is a scoped view of your underlying Linux system. There are a
|
||
|
few different types of namespaces implemented inside the kernel. As we dig into
|
||
|
each of the different namespaces below you can follow along by running
|
||
|
<code>docker run -it --privileged --net host crosbymichael/make-containers</code>.
|
||
|
This has a few preloaded files and configuration to get your started. Even though we
|
||
|
will be creating namespaces inside an container that docker runs for us, don't let that
|
||
|
trip you up. I opted for this approach as providing a container preloaded with all
|
||
|
the dependencies that you need to run the examples is why we are doing this in the
|
||
|
first place. To make things a little easier, I'm using the <code>--net host</code> flag so that
|
||
|
we are able to see your host's network interfaces within our demo container. This will
|
||
|
be useful in the network examples. We also need to provide the <code>--privilged</code> flag so that
|
||
|
we have the correct permissions to create new namespaces within our container.</p>
|
||
|
<p>If you are interested in what the Dockerfile looks like then here it is:</p>
|
||
|
<div class="highlight"><pre>FROM debian:jessie
|
||
|
|
||
|
RUN apt-get update <span class="o">&&</span> apt-get install -y <span class="se">\</span>
|
||
|
gcc <span class="se">\</span>
|
||
|
vim <span class="se">\</span>
|
||
|
emacs
|
||
|
|
||
|
COPY containers/ /containers/
|
||
|
WORKDIR /containers
|
||
|
CMD <span class="o">[</span><span class="s2">"bash"</span><span class="o">]</span>
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>I'll be doing the examples in C, as it's sometimes easier
|
||
|
to explain the lower level details better than the abstractions that Go provides.
|
||
|
So lets start...</p>
|
||
|
<h3>NET Namespace</h3>
|
||
|
<p>The network namespaces provides your own view of the network stack of your system. This
|
||
|
can include your very own <code>localhost</code>. Make sure you are in the <code>crosbymichael/make-containers</code>
|
||
|
and run the command <code>ip a</code> to view all the network interfaces of your host machine.</p>
|
||
|
<div class="highlight"><pre>> ip a
|
||
|
root@development:/containers# ip a
|
||
|
1: lo: <LOOPBACK,UP,LOWER_UP> mtu <span class="m">65536</span> qdisc noqueue state UNKNOWN group default
|
||
|
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
|
||
|
inet 127.0.0.1/8 scope host lo
|
||
|
valid_lft forever preferred_lft forever
|
||
|
inet6 ::1/128 scope host
|
||
|
valid_lft forever preferred_lft forever
|
||
|
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu <span class="m">1500</span> qdisc pfifo_fast state UP group default qlen 1000
|
||
|
link/ether 08:00:27:19:ca:f2 brd ff:ff:ff:ff:ff:ff
|
||
|
inet 10.0.2.15/24 brd 10.0.2.255 scope global eth0
|
||
|
valid_lft forever preferred_lft forever
|
||
|
inet6 fe80::a00:27ff:fe19:caf2/64 scope link
|
||
|
valid_lft forever preferred_lft forever
|
||
|
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu <span class="m">1500</span> qdisc pfifo_fast state UP group default qlen 1000
|
||
|
link/ether 08:00:27:20:84:47 brd ff:ff:ff:ff:ff:ff
|
||
|
inet 192.168.56.103/24 brd 192.168.56.255 scope global eth1
|
||
|
valid_lft forever preferred_lft forever
|
||
|
inet6 fe80::a00:27ff:fe20:8447/64 scope link
|
||
|
valid_lft forever preferred_lft forever
|
||
|
4: docker0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu <span class="m">1500</span> qdisc noqueue state DOWN group default
|
||
|
link/ether 56:84:7a:fe:97:99 brd ff:ff:ff:ff:ff:ff
|
||
|
inet 172.17.42.1/16 scope global docker0
|
||
|
valid_lft forever preferred_lft forever
|
||
|
inet6 fe80::5484:7aff:fefe:9799/64 scope link
|
||
|
valid_lft forever preferred_lft forever
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>Ok cool, so this is all the network interfaces currently on <strong>my</strong> host system. Yours may look
|
||
|
a little different but you get the idea. Now let's write some code to create a new network
|
||
|
namespace. For this we will write a skeleton of a small C binary that uses the <code>clone</code> syscall. We will
|
||
|
start by using clone to run binaries that are already installed inside our demo container.
|
||
|
The file <code>skeleton.c</code> should be in the working directory of the demo container.
|
||
|
We will use this file as the basis of all our examples. Here is the code incase you don't feel
|
||
|
like running the container right now.</p>
|
||
|
<div class="highlight"><pre><span class="cp">#define _GNU_SOURCE</span>
|
||
|
<span class="cp">#include <stdio.h></span>
|
||
|
<span class="cp">#include <stdlib.h></span>
|
||
|
<span class="cp">#include <sched.h></span>
|
||
|
<span class="cp">#include <sys/wait.h></span>
|
||
|
<span class="cp">#include <errno.h></span>
|
||
|
|
||
|
<span class="cp">#define STACKSIZE (1024*1024)</span>
|
||
|
<span class="k">static</span> <span class="kt">char</span> <span class="n">child_stack</span><span class="p">[</span><span class="n">STACKSIZE</span><span class="p">];</span>
|
||
|
|
||
|
<span class="k">struct</span> <span class="n">clone_args</span> <span class="p">{</span>
|
||
|
<span class="kt">char</span> <span class="o">**</span><span class="n">argv</span><span class="p">;</span>
|
||
|
<span class="p">};</span>
|
||
|
|
||
|
<span class="c1">// child_exec is the func that will be executed as the result of clone</span>
|
||
|
<span class="k">static</span> <span class="kt">int</span> <span class="nf">child_exec</span><span class="p">(</span><span class="kt">void</span> <span class="o">*</span><span class="n">stuff</span><span class="p">)</span>
|
||
|
<span class="p">{</span>
|
||
|
<span class="k">struct</span> <span class="n">clone_args</span> <span class="o">*</span><span class="n">args</span> <span class="o">=</span> <span class="p">(</span><span class="k">struct</span> <span class="n">clone_args</span> <span class="o">*</span><span class="p">)</span><span class="n">stuff</span><span class="p">;</span>
|
||
|
<span class="k">if</span> <span class="p">(</span><span class="n">execvp</span><span class="p">(</span><span class="n">args</span><span class="o">-></span><span class="n">argv</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">args</span><span class="o">-></span><span class="n">argv</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">)</span> <span class="p">{</span>
|
||
|
<span class="n">fprintf</span><span class="p">(</span><span class="n">stderr</span><span class="p">,</span> <span class="s">"failed to execvp argments %s</span><span class="se">\n</span><span class="s">"</span><span class="p">,</span>
|
||
|
<span class="n">strerror</span><span class="p">(</span><span class="n">errno</span><span class="p">));</span>
|
||
|
<span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">);</span>
|
||
|
<span class="p">}</span>
|
||
|
<span class="c1">// we should never reach here!</span>
|
||
|
<span class="n">exit</span><span class="p">(</span><span class="n">EXIT_FAILURE</span><span class="p">);</span>
|
||
|
<span class="p">}</span>
|
||
|
|
||
|
<span class="kt">int</span> <span class="nf">main</span><span class="p">(</span><span class="kt">int</span> <span class="n">argc</span><span class="p">,</span> <span class="kt">char</span> <span class="o">**</span><span class="n">argv</span><span class="p">)</span>
|
||
|
<span class="p">{</span>
|
||
|
<span class="k">struct</span> <span class="n">clone_args</span> <span class="n">args</span><span class="p">;</span>
|
||
|
<span class="n">args</span><span class="p">.</span><span class="n">argv</span> <span class="o">=</span> <span class="o">&</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">];</span>
|
||
|
|
||
|
<span class="kt">int</span> <span class="n">clone_flags</span> <span class="o">=</span> <span class="n">SIGCHLD</span><span class="p">;</span>
|
||
|
|
||
|
<span class="c1">// the result of this call is that our child_exec will be run in another</span>
|
||
|
<span class="c1">// process returning it's pid</span>
|
||
|
<span class="kt">pid_t</span> <span class="n">pid</span> <span class="o">=</span>
|
||
|
<span class="n">clone</span><span class="p">(</span><span class="n">child_exec</span><span class="p">,</span> <span class="n">child_stack</span> <span class="o">+</span> <span class="n">STACKSIZE</span><span class="p">,</span> <span class="n">clone_flags</span><span class="p">,</span> <span class="o">&</span><span class="n">args</span><span class="p">);</span>
|
||
|
<span class="k">if</span> <span class="p">(</span><span class="n">pid</span> <span class="o"><</span> <span class="mi">0</span><span class="p">)</span> <span class="p">{</span>
|
||
|
<span class="n">fprintf</span><span class="p">(</span><span class="n">stderr</span><span class="p">,</span> <span class="s">"clone failed WTF!!!! %s</span><span class="se">\n</span><span class="s">"</span><span class="p">,</span> <span class="n">strerror</span><span class="p">(</span><span class="n">errno</span><span class="p">));</span>
|
||
|
<span class="n">exit</span><span class="p">(</span><span class="n">EXIT_FAILURE</span><span class="p">);</span>
|
||
|
<span class="p">}</span>
|
||
|
<span class="c1">// lets wait on our child process here before we, the parent, exits</span>
|
||
|
<span class="k">if</span> <span class="p">(</span><span class="n">waitpid</span><span class="p">(</span><span class="n">pid</span><span class="p">,</span> <span class="nb">NULL</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span> <span class="p">{</span>
|
||
|
<span class="n">fprintf</span><span class="p">(</span><span class="n">stderr</span><span class="p">,</span> <span class="s">"failed to wait pid %d</span><span class="se">\n</span><span class="s">"</span><span class="p">,</span> <span class="n">pid</span><span class="p">);</span>
|
||
|
<span class="n">exit</span><span class="p">(</span><span class="n">EXIT_FAILURE</span><span class="p">);</span>
|
||
|
<span class="p">}</span>
|
||
|
<span class="n">exit</span><span class="p">(</span><span class="n">EXIT_SUCCESS</span><span class="p">);</span>
|
||
|
<span class="p">}</span>
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>This is a small C binary that will allow you to run processes
|
||
|
like <code>./a.out ip a</code>. It uses the arguments that you pass on the cli as the arguments to whatever
|
||
|
process you want to use. Don't worry about the specific implementation too much as it's the
|
||
|
changes we will be making that are the interesting aspects. Remember, this will execute the
|
||
|
binary and arguments of whatever program you want, this means if you want to run one of these demos
|
||
|
below and have it spawn a shell session so that you can poke around in your new namespace then go
|
||
|
ahead. It is a great way to explore and inspect these different namespaces at your own pace.
|
||
|
So to get started let's make a copy of this file to start working with the network namespace.</p>
|
||
|
<div class="highlight"><pre>> cp skeleton.c network.c
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>Ok, within this file there is a very
|
||
|
special var called <code>clone_flags</code>. This is where most of our changes will happen throughout this
|
||
|
post. Namespaces are primarily controlled via the clone flags. The clone flag for the network
|
||
|
namespace is <code>CLONE_NEWNET</code>. We need to change the line in the file <code>int clone_flags = SIGCHLD;</code> to
|
||
|
<code>int clone_flags = CLONE_NEWNET | SIGCHLD;</code> so that the call to <code>clone</code> creates a new network namespace
|
||
|
for our process. Make this change in <code>network.c</code> then compile and run.</p>
|
||
|
<div class="highlight"><pre>> gcc -o net network.c
|
||
|
> ./net ip a
|
||
|
1: lo: <LOOPBACK> mtu <span class="m">65536</span> qdisc noop state DOWN group default
|
||
|
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>The result of this run now looks very different from the first time we ran the <code>ip a</code>
|
||
|
command. We only see a <code>loopback</code> interface in the output. This is because our process
|
||
|
that was created only has a view of its network namespace and not of the host.
|
||
|
And that's it. That is how you create a new network namespace. </p>
|
||
|
<p>Right now this is pretty useless as you don't have any usable
|
||
|
interfaces. Docker uses this new network namespace to setup a <code>veth</code>
|
||
|
interface so that your container has it's own ip address
|
||
|
allocated on a bridge, usually <code>docker0</code>.
|
||
|
We won't go down the path of how to setup interfaces in namespaces at this time. We can save
|
||
|
that for another post.</p>
|
||
|
<p>So now that we know how a network namespace is created lets look at the mount namespace.</p>
|
||
|
<h3>MNT Namespace</h3>
|
||
|
<p>The mount namespace gives you a scoped view of the mounts on your system. It's often
|
||
|
confused with jailing a process inside a <code>chroot</code> or similar. This is not true!
|
||
|
The mount namespaces does not equal a filesystem jail. So the next time you hear someone
|
||
|
say that a container uses the mount namespace to "jail" the process inside it's own root
|
||
|
filesystem you can call bullshit because they don't know what they are talking about. Do it, it's fun :)</p>
|
||
|
<p>Let's start by making a copy of <code>skeleton.c</code> again for our mount related changes.
|
||
|
We can do a quick build
|
||
|
and run to see what our current mount points looks like with the <code>mount</code> command.</p>
|
||
|
<div class="highlight"><pre>> cp skeleton.c mount.c
|
||
|
> gcc -o mount mount.c
|
||
|
> ./mount mount
|
||
|
proc on /proc <span class="nb">type </span>proc <span class="o">(</span>rw,nosuid,nodev,noexec,relatime<span class="o">)</span>
|
||
|
tmpfs on /dev <span class="nb">type </span>tmpfs <span class="o">(</span>rw,nosuid,mode<span class="o">=</span>755<span class="o">)</span>
|
||
|
shm on /dev/shm <span class="nb">type </span>tmpfs <span class="o">(</span>rw,nosuid,nodev,noexec,relatime,size<span class="o">=</span>65536k<span class="o">)</span>
|
||
|
mqueue on /dev/mqueue <span class="nb">type </span>mqueue <span class="o">(</span>rw,nosuid,nodev,noexec,relatime<span class="o">)</span>
|
||
|
devpts on /dev/pts <span class="nb">type </span>devpts <span class="o">(</span>rw,nosuid,noexec,relatime,gid<span class="o">=</span>5,mode<span class="o">=</span>620,ptmxmode<span class="o">=</span>666<span class="o">)</span>
|
||
|
sysfs on /sys <span class="nb">type </span>sysfs <span class="o">(</span>rw,nosuid,nodev,noexec,relatime<span class="o">)</span>
|
||
|
/dev/disk/by-uuid/d3aa2880-c290-4586-9da6-2f526e381f41 on /etc/resolv.conf <span class="nb">type </span>ext4 <span class="o">(</span>rw,relatime,errors<span class="o">=</span>remount-ro,data<span class="o">=</span>ordered<span class="o">)</span>
|
||
|
/dev/disk/by-uuid/d3aa2880-c290-4586-9da6-2f526e381f41 on /etc/hostname <span class="nb">type </span>ext4 <span class="o">(</span>rw,relatime,errors<span class="o">=</span>remount-ro,data<span class="o">=</span>ordered<span class="o">)</span>
|
||
|
/dev/disk/by-uuid/d3aa2880-c290-4586-9da6-2f526e381f41 on /etc/hosts <span class="nb">type </span>ext4 <span class="o">(</span>rw,relatime,errors<span class="o">=</span>remount-ro,data<span class="o">=</span>ordered<span class="o">)</span>
|
||
|
devpts on /dev/console <span class="nb">type </span>devpts <span class="o">(</span>rw,nosuid,noexec,relatime,gid<span class="o">=</span>5,mode<span class="o">=</span>620,ptmxmode<span class="o">=</span>000<span class="o">)</span>
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>This is what the mount points look like from within my demo container, yours may look different.
|
||
|
In order to create a new mount namespace we use the flag <code>CLONE_NEWNS</code>. You may notice something may look
|
||
|
weird here. Why is this flag not <code>CLONE_NEWMOUNT</code> or <code>CLONE_NEWMNT</code>? This is because the mount
|
||
|
namespace was the first Linux namespace introduced and the name was just an undersight.
|
||
|
If you write code you will understand that as you start building a feature or an application,
|
||
|
you don't often have a full picture of the end result. Anyway, let's add <code>CLONE_NEWNS</code> to our
|
||
|
<code>clone_flags</code> variable. It should look something like <code>int clone_flags = CLONE_NEWNS | SIGCHLD;</code>.</p>
|
||
|
<p>Lets go ahead and build <code>mount.c</code> again and run the same command.</p>
|
||
|
<div class="highlight"><pre>> cp skeleton.c mount.c
|
||
|
> gcc -o mount mount.c
|
||
|
> ./mount mount
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>Nothing changed. Whaaat??? This is because the process that we run inside the new mount
|
||
|
namespace still has a view on <code>/proc</code> of the underlying system. The result is that the
|
||
|
new process sort of <em>inherits</em> a view on the underlying mounts.
|
||
|
There are a few ways that we can prevent this, like using <code>pivot_root</code>, but we will leave
|
||
|
that for an additional post on filesystem jails and how <code>chroot</code> / <code>pivot_root</code> interact with
|
||
|
the container's mount namespace.</p>
|
||
|
<p>However, one way that we can try out our new mount namespace is to, well, mount something.
|
||
|
Let's create a new <code>tmpfs</code> mount in <code>/mytmp</code> for this demo. We will do this mount
|
||
|
in C and continue to run our same <code>mount</code> command as the args to our mount binary. In order to do a mount
|
||
|
<strong>inside</strong> of our mount namespace we need to add the code to the <code>child_exec</code> function,
|
||
|
before the call to <code>execvp</code>. The code within the <code>child_exec</code> function is run inside the newly
|
||
|
created process, i.e., inside our new namespace. The code in <code>child_exec</code> should look like this:</p>
|
||
|
<div class="highlight"><pre><span class="c1">// child_exec is the func that will be executed as the result of clone</span>
|
||
|
<span class="k">static</span> <span class="kt">int</span> <span class="nf">child_exec</span><span class="p">(</span><span class="kt">void</span> <span class="o">*</span><span class="n">stuff</span><span class="p">)</span>
|
||
|
<span class="p">{</span>
|
||
|
<span class="k">struct</span> <span class="n">clone_args</span> <span class="o">*</span><span class="n">args</span> <span class="o">=</span> <span class="p">(</span><span class="k">struct</span> <span class="n">clone_args</span> <span class="o">*</span><span class="p">)</span><span class="n">stuff</span><span class="p">;</span>
|
||
|
<span class="k">if</span> <span class="p">(</span><span class="n">mount</span><span class="p">(</span><span class="s">"none"</span><span class="p">,</span> <span class="s">"/mytmp"</span><span class="p">,</span> <span class="s">"tmpfs"</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="s">""</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">)</span> <span class="p">{</span>
|
||
|
<span class="n">fprintf</span><span class="p">(</span><span class="n">stderr</span><span class="p">,</span> <span class="s">"failed to mount tmpfs %s</span><span class="se">\n</span><span class="s">"</span><span class="p">,</span>
|
||
|
<span class="n">strerror</span><span class="p">(</span><span class="n">errno</span><span class="p">));</span>
|
||
|
<span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">);</span>
|
||
|
<span class="p">}</span>
|
||
|
<span class="k">if</span> <span class="p">(</span><span class="n">execvp</span><span class="p">(</span><span class="n">args</span><span class="o">-></span><span class="n">argv</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">args</span><span class="o">-></span><span class="n">argv</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">)</span> <span class="p">{</span>
|
||
|
<span class="n">fprintf</span><span class="p">(</span><span class="n">stderr</span><span class="p">,</span> <span class="s">"failed to execvp argments %s</span><span class="se">\n</span><span class="s">"</span><span class="p">,</span>
|
||
|
<span class="n">strerror</span><span class="p">(</span><span class="n">errno</span><span class="p">));</span>
|
||
|
<span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">);</span>
|
||
|
<span class="p">}</span>
|
||
|
<span class="c1">// we should never reach here!</span>
|
||
|
<span class="n">exit</span><span class="p">(</span><span class="n">EXIT_FAILURE</span><span class="p">);</span>
|
||
|
<span class="p">}</span>
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>We need to first create a directory <code>/mytmp</code> before we compile and run with the changes above.</p>
|
||
|
<div class="highlight"><pre>> mkdir /mytmp
|
||
|
> gcc -o mount mount.c
|
||
|
> ./mount mount
|
||
|
<span class="c"># cutting out the common output...</span>
|
||
|
none on /mytmp <span class="nb">type </span>tmpfs <span class="o">(</span>rw,relatime<span class="o">)</span>
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>I cut out the common output above from the first time I ran <code>mount</code>.<br>
|
||
|
The result is that you should see a new mount point for our <code>tmpfs</code> mount.
|
||
|
Nice! Go ahead and run <code>mount</code> in the current shell just for comparison.<br>
|
||
|
Notice how the <code>tmpfs</code> mount is not displayed? That is because we created the mount inside
|
||
|
our own mount namespace, not in the parent's namespace. </p>
|
||
|
<p>Remember how I said that the mount namepaces does not equal a filesystem jail? Go ahead and run our
|
||
|
<code>./mount</code> binary with the <code>ls</code> command. Everything is there. Now you have proof!</p>
|
||
|
<h3>UTS Namespace</h3>
|
||
|
<p>The next namespace is the UTS namespace that is responsible for system identification. This includes the
|
||
|
<code>hostname</code> and <code>domainname</code>. It allows a container to have it's own hostname independently
|
||
|
from the host system along with other containers. Let's start by making a copy of <code>skeleton.c</code> and running
|
||
|
the <code>hostname</code> command with it.</p>
|
||
|
<div class="highlight"><pre>> cp skeleton.c uts.c
|
||
|
> gcc -o uts uts.c
|
||
|
> ./uts hostname
|
||
|
development
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>This should display your system's hostname (<code>development</code> in my case). Like earlier, let's add
|
||
|
the clone flag for the UTS namespace to the <code>clone_flags</code> variable. The flag should be <code>CLONE_NEWUTS</code>.
|
||
|
If you compile and run then you should see the exact same output. This is totally fine.
|
||
|
The values in the UTS namespace are inherited from the <em>parent</em>.
|
||
|
However, within this new namespace, we can change the hostname without it affecting the <em>parent</em> or other
|
||
|
container's that have a separate UTS namespace.</p>
|
||
|
<p>Let's modify the hostname in the <code>child_exec</code> function. To do that, you will need to add
|
||
|
the <code>#include <unistd.h></code> header to gain access to the <code>sethostname</code> function, as well as
|
||
|
the <code>#include <string.h></code> header to use <code>strlen</code> needed by <code>sethostname</code>.
|
||
|
The new body of the <code>child_exec</code> function should look like the following:</p>
|
||
|
<div class="highlight"><pre><span class="c1">// child_exec is the func that will be executed as the result of clone</span>
|
||
|
<span class="k">static</span> <span class="kt">int</span> <span class="nf">child_exec</span><span class="p">(</span><span class="kt">void</span> <span class="o">*</span><span class="n">stuff</span><span class="p">)</span>
|
||
|
<span class="p">{</span>
|
||
|
<span class="k">struct</span> <span class="n">clone_args</span> <span class="o">*</span><span class="n">args</span> <span class="o">=</span> <span class="p">(</span><span class="k">struct</span> <span class="n">clone_args</span> <span class="o">*</span><span class="p">)</span><span class="n">stuff</span><span class="p">;</span>
|
||
|
<span class="k">const</span> <span class="kt">char</span> <span class="o">*</span> <span class="n">new_hostname</span> <span class="o">=</span> <span class="s">"myhostname"</span><span class="p">;</span>
|
||
|
<span class="k">if</span> <span class="p">(</span><span class="n">sethostname</span><span class="p">(</span><span class="n">new_hostname</span><span class="p">,</span> <span class="n">strlen</span><span class="p">(</span><span class="n">new_hostname</span><span class="p">))</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">)</span> <span class="p">{</span>
|
||
|
<span class="n">fprintf</span><span class="p">(</span><span class="n">stderr</span><span class="p">,</span> <span class="s">"failed to execvp argments %s</span><span class="se">\n</span><span class="s">"</span><span class="p">,</span>
|
||
|
<span class="n">strerror</span><span class="p">(</span><span class="n">errno</span><span class="p">));</span>
|
||
|
<span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">);</span>
|
||
|
<span class="p">}</span>
|
||
|
<span class="k">if</span> <span class="p">(</span><span class="n">execvp</span><span class="p">(</span><span class="n">args</span><span class="o">-></span><span class="n">argv</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">args</span><span class="o">-></span><span class="n">argv</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">)</span> <span class="p">{</span>
|
||
|
<span class="n">fprintf</span><span class="p">(</span><span class="n">stderr</span><span class="p">,</span> <span class="s">"failed to execvp argments %s</span><span class="se">\n</span><span class="s">"</span><span class="p">,</span>
|
||
|
<span class="n">strerror</span><span class="p">(</span><span class="n">errno</span><span class="p">));</span>
|
||
|
<span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">);</span>
|
||
|
<span class="p">}</span>
|
||
|
<span class="c1">// we should never reach here!</span>
|
||
|
<span class="n">exit</span><span class="p">(</span><span class="n">EXIT_FAILURE</span><span class="p">);</span>
|
||
|
<span class="p">}</span>
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>Ensure that <code>clone_flags</code> within your main look like <code>int clone_flags = CLONE_NEWUTS | SIGCHLD;</code> then
|
||
|
compile and run the binary with the same args. You should now see the value that we set returned
|
||
|
from the <code>hostname</code> command.
|
||
|
To verify that this change did not affect our current shell go ahead and run <code>hostname</code> and make
|
||
|
sure that you have your original value back.</p>
|
||
|
<div class="highlight"><pre>> gcc -o uts uts.c
|
||
|
> ./uts hostname
|
||
|
myhostname
|
||
|
> hostname
|
||
|
development
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>Awesome! We are doing good.</p>
|
||
|
<h3>IPC Namespace</h3>
|
||
|
<p>The IPC namespace is used for isolating interprocess communication, things like SysV message queues.
|
||
|
Let's make a copy of <code>skeleton.c</code> for this namespace.</p>
|
||
|
<div class="highlight"><pre>> cp skeleton.c ipc.c
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>The way we are going to test the IPC namespace is by creating a message queue on the host, and
|
||
|
ensuring that we cannot see it when we spawn a new process inside it's own IPC namespace.
|
||
|
Let's first create a message queue in our current shell then compile and run our copy of the
|
||
|
skeleton code to view the queue.</p>
|
||
|
<div class="highlight"><pre>> ipcmk -Q
|
||
|
Message queue id: 65536
|
||
|
> gcc -o ipc ipc.c
|
||
|
> ./ipc ipcs -q
|
||
|
------ Message Queues --------
|
||
|
key msqid owner perms used-bytes message
|
||
|
0xfe7f09d1 <span class="m">65536</span> root <span class="m">644</span> <span class="m">0</span> 0
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>Without a new IPC namespace you can see the same message queue that was created.
|
||
|
Now let's add the <code>CLONE_NEWIPC</code> flag to our <code>clone_flags</code> var to create a new IPC namespace for
|
||
|
our process.
|
||
|
The <code>clone_flags</code> var should look like <code>int clone_flags = CLONE_NEWIPC | SIGCHLD;</code>.
|
||
|
Recompile and run the same command again:</p>
|
||
|
<div class="highlight"><pre>> gcc -o ipc ipc.c
|
||
|
> ./ipc ipcs -q
|
||
|
------ Message Queues --------
|
||
|
key msqid owner perms used-bytes message
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>Done! The child process is now in a new IPC namespace and has completely separate view
|
||
|
and access to message queues.</p>
|
||
|
<h3>PID Namespace</h3>
|
||
|
<p>This one is fun. The PID namespace is a way to carve up the PIDs that one process can view and
|
||
|
interact with. When we create a new PID namespace the first process will get to be the loved PID 1.
|
||
|
If this process exits the kernel kills everyone else within the namespace.
|
||
|
Let's start by making a copy of <code>skeleton.c</code> for our changes.</p>
|
||
|
<div class="highlight"><pre>> cp skeleton.c pid.c
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>To create a new PID namespace, we will have to set the <code>clone_flags</code> with <code>CLONE_NEWPID</code>.
|
||
|
The variable should look like <code>int clone_flags = CLONE_NEWPID | SIGCHLD;</code>. Let's test
|
||
|
by running <code>ps aux</code> in our shell and then compile and run our <code>pid.c</code> binary with
|
||
|
the same arguments.</p>
|
||
|
<div class="highlight"><pre>> ps aux
|
||
|
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
|
||
|
root <span class="m">1</span> 0.0 0.1 <span class="m">20332</span> <span class="m">3388</span> ? Ss 21:50 0:00 bash
|
||
|
root <span class="m">147</span> 0.0 0.1 <span class="m">17492</span> <span class="m">2088</span> ? R+ 22:49 0:00 ps aux
|
||
|
> gcc -o pid pid.c
|
||
|
> ./pid ps aux
|
||
|
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
|
||
|
root <span class="m">1</span> 0.0 0.1 <span class="m">20332</span> <span class="m">3388</span> ? Ss 21:50 0:00 bash
|
||
|
root <span class="m">153</span> 0.0 0.0 <span class="m">5092</span> <span class="m">728</span> ? S+ 22:50 0:00 ./pid ps aux
|
||
|
root <span class="m">154</span> 0.0 0.1 <span class="m">17492</span> <span class="m">2064</span> ? R+ 22:50 0:00 ps aux
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>WTF??? We expected <code>ps aux</code> to be PID 1 or atleast not see any other pids from the parent.
|
||
|
Why is that? <strong>proc</strong>. The process that we spawned still has a view of <code>/proc</code> from the parent, i.e.
|
||
|
<code>/proc</code> mounted on the host system. So how do we fix this? How to we ensure that our new process
|
||
|
can only view pids within it's namespace? We can start by remounting <code>/proc</code>.<br>
|
||
|
Because we will be dealing with mounts, we can take the opportunity to take what we learned from the
|
||
|
MNT namespace and combine it with our PID namespace so that we don't mess with the <code>/proc</code> of our
|
||
|
host system.</p>
|
||
|
<p>We can start by including the clone flag for the mount namespace along side the
|
||
|
clone flag for pid. It should look something like
|
||
|
<code>int clone_flags = CLONE_NEWPID | CLONE_NEWNS | SIGCHLD;</code>. We need to edit the
|
||
|
<code>child_exec</code> function and remount proc. This will be a simple <code>unmount</code> and <code>mount</code> syscall
|
||
|
for the proc filesystem. Because we are creating a new mount namespace we know that this will
|
||
|
not mess up our host system. The result should look like this:</p>
|
||
|
<div class="highlight"><pre>// child_exec is the func that will be executed as the result of clone
|
||
|
static int child_exec<span class="o">(</span>void *stuff<span class="o">)</span>
|
||
|
<span class="o">{</span>
|
||
|
struct clone_args *args <span class="o">=</span> <span class="o">(</span>struct clone_args *<span class="o">)</span>stuff<span class="p">;</span>
|
||
|
<span class="k">if</span> <span class="o">(</span>umount<span class="o">(</span><span class="s2">"/proc"</span>, 0<span class="o">)</span> !<span class="o">=</span> 0<span class="o">)</span> <span class="o">{</span>
|
||
|
fprintf<span class="o">(</span>stderr, <span class="s2">"failed unmount /proc %s\n"</span>,
|
||
|
strerror<span class="o">(</span>errno<span class="o">))</span><span class="p">;</span>
|
||
|
<span class="nb">exit</span><span class="o">(</span>-1<span class="o">)</span><span class="p">;</span>
|
||
|
<span class="o">}</span>
|
||
|
<span class="k">if</span> <span class="o">(</span>mount<span class="o">(</span><span class="s2">"proc"</span>, <span class="s2">"/proc"</span>, <span class="s2">"proc"</span>, 0, <span class="s2">""</span><span class="o">)</span> !<span class="o">=</span> 0<span class="o">)</span> <span class="o">{</span>
|
||
|
fprintf<span class="o">(</span>stderr, <span class="s2">"failed mount /proc %s\n"</span>,
|
||
|
strerror<span class="o">(</span>errno<span class="o">))</span><span class="p">;</span>
|
||
|
<span class="nb">exit</span><span class="o">(</span>-1<span class="o">)</span><span class="p">;</span>
|
||
|
<span class="o">}</span>
|
||
|
<span class="k">if</span> <span class="o">(</span>execvp<span class="o">(</span>args->argv<span class="o">[</span>0<span class="o">]</span>, args->argv<span class="o">)</span> !<span class="o">=</span> 0<span class="o">)</span> <span class="o">{</span>
|
||
|
fprintf<span class="o">(</span>stderr, <span class="s2">"failed to execvp argments %s\n"</span>,
|
||
|
strerror<span class="o">(</span>errno<span class="o">))</span><span class="p">;</span>
|
||
|
<span class="nb">exit</span><span class="o">(</span>-1<span class="o">)</span><span class="p">;</span>
|
||
|
<span class="o">}</span>
|
||
|
// we should never reach here!
|
||
|
<span class="nb">exit</span><span class="o">(</span>EXIT_FAILURE<span class="o">)</span><span class="p">;</span>
|
||
|
<span class="o">}</span>
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>Build and run this again to see what happens.</p>
|
||
|
<div class="highlight"><pre>> gcc -o pid pid.c
|
||
|
> ./pid ps aux
|
||
|
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
|
||
|
root <span class="m">1</span> 0.0 0.0 <span class="m">9076</span> <span class="m">784</span> ? R+ 23:05 0:00 ps aux
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>Perfect! Our new PID namespace is now fully operational with the help of the mount namespace!</p>
|
||
|
<h3>USER Namespace</h3>
|
||
|
<p>The last namespace is the user namespace. This namespace is the new kid on the block and allows you to have
|
||
|
users within this namespace that are not equal users outside of the namespace. This is accomplished
|
||
|
via GID and UID mappings. </p>
|
||
|
<p>This one has a simple demo application without specifying a mapping, even though it's a totally
|
||
|
useless demo. If we add the flag <code>CLONE_NEWUSER</code> to our <code>clone_flags</code> then run something like <code>id</code> or <code>ls -la</code> you will notice that we get <code>nobody</code> within the user namespace. This is because the current user is undefined
|
||
|
right now.</p>
|
||
|
<div class="highlight"><pre>> cp skeleton.c user.c
|
||
|
<span class="c"># add the clone flag</span>
|
||
|
> gcc -o user user.c
|
||
|
> ./user ls -la
|
||
|
total 84
|
||
|
drwxr-xr-x <span class="m">1</span> nobody nogroup <span class="m">4096</span> Nov <span class="m">16</span> 23:10 .
|
||
|
drwxr-xr-x <span class="m">1</span> nobody nogroup <span class="m">4096</span> Nov <span class="m">16</span> 22:17 ..
|
||
|
-rwxr-xr-x <span class="m">1</span> nobody nogroup <span class="m">8336</span> Nov <span class="m">16</span> 22:15 mount
|
||
|
-rw-r--r-- <span class="m">1</span> nobody nogroup <span class="m">1577</span> Nov <span class="m">16</span> 22:15 mount.c
|
||
|
-rwxr-xr-x <span class="m">1</span> nobody nogroup <span class="m">8064</span> Nov <span class="m">16</span> 21:52 net
|
||
|
-rw-r--r-- <span class="m">1</span> nobody nogroup <span class="m">1441</span> Nov <span class="m">16</span> 21:52 network.c
|
||
|
-rwxr-xr-x <span class="m">1</span> nobody nogroup <span class="m">8544</span> Nov <span class="m">16</span> 23:05 pid
|
||
|
-rw-r--r-- <span class="m">1</span> nobody nogroup <span class="m">1772</span> Nov <span class="m">16</span> 23:02 pid.c
|
||
|
-rw-r--r-- <span class="m">1</span> nobody nogroup <span class="m">1426</span> Nov <span class="m">16</span> 21:59 skeleton.c
|
||
|
-rwxr-xr-x <span class="m">1</span> nobody nogroup <span class="m">8056</span> Nov <span class="m">16</span> 23:10 user
|
||
|
-rw-r--r-- <span class="m">1</span> nobody nogroup <span class="m">1442</span> Nov <span class="m">16</span> 23:10 user.c
|
||
|
-rwxr-xr-x <span class="m">1</span> nobody nogroup <span class="m">8408</span> Nov <span class="m">16</span> 22:40 uts
|
||
|
-rw-r--r-- <span class="m">1</span> nobody nogroup <span class="m">1694</span> Nov <span class="m">16</span> 22:36 uts.c
|
||
|
</pre></div>
|
||
|
|
||
|
|
||
|
<p>This is a very simple example of the user namespace but you can go much deeper with it. We will
|
||
|
save this for another post but the idea and hopes for the user namespace is that this will allow
|
||
|
us to run as "root" within the container but not as "root" on the host system. Don't forget you
|
||
|
can always change <code>ls -la</code> to <code>bash</code> and have a shell inside the namespace to poke around and learn
|
||
|
more.</p>
|
||
|
<h3>In the end...</h3>
|
||
|
<p>So to recap we went over the mount, network, user, PID, UTS, and IPC Linux namespaces.
|
||
|
The majority of the code that we changed was not much, just adding a flag most of the time.<br>
|
||
|
The "hard work" is mostly managing the interactions between the various kernel subsystems
|
||
|
in order to meet our requirements. Like most
|
||
|
of the descriptions before this, namespaces are just one of the tools that we use to make a
|
||
|
container. I hope the PID example is a glimpse of how we use multiple namespaces together
|
||
|
in order isolate and begin the creation of a container.</p>
|
||
|
<p>In future posts we will go into detail on how we jail the container's processes inside a root filesystem,
|
||
|
aka a docker image, as well as using cgroups and Linux capabilities. By the end we should be able
|
||
|
to pull all these things together to create a container.</p>
|
||
|
<p>Also a thanks to tibor and everyone that helps review my brain dump of a first draft ;)</p>
|
||
|
</div><!-- /.entry-content -->
|
||
|
|
||
|
</article>
|
||
|
<div id="disqus_thread"></div>
|
||
|
<script type="text/javascript">
|
||
|
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
|
||
|
var disqus_shortname = 'crosbymichael'; // required: replace example with your forum shortname
|
||
|
|
||
|
/* * * DON'T EDIT BELOW THIS LINE * * */
|
||
|
(function() {
|
||
|
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||
|
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
|
||
|
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||
|
})();
|
||
|
</script>
|
||
|
<noscript>Please enable JavaScript to view the <a href="http://web.archive.org/web/20191223021405/http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
|
||
|
<a href="http://web.archive.org/web/20191223021405/http://disqus.com/" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
|
||
|
|
||
|
</section>
|
||
|
</div>
|
||
|
</div>
|
||
|
|
||
|
<hr>
|
||
|
|
||
|
<div class="footer">
|
||
|
<p>© 2014 Michael Crosby</p>
|
||
|
</div>
|
||
|
</div>
|
||
|
|
||
|
<script src="Creating%20containers%20-%20Part%201_files/bootstrap.js"></script>
|
||
|
<script src="Creating%20containers%20-%20Part%201_files/jquery.js"></script>
|
||
|
|
||
|
|
||
|
</body></html>
|
||
|
<!--
|
||
|
FILE ARCHIVED ON 02:14:05 Dec 23, 2019 AND RETRIEVED FROM THE
|
||
|
INTERNET ARCHIVE ON 21:09:12 Jun 24, 2020.
|
||
|
JAVASCRIPT APPENDED BY WAYBACK MACHINE, COPYRIGHT INTERNET ARCHIVE.
|
||
|
|
||
|
ALL OTHER CONTENT MAY ALSO BE PROTECTED BY COPYRIGHT (17 U.S.C.
|
||
|
SECTION 108(a)(3)).
|
||
|
-->
|
||
|
<!--
|
||
|
playback timings (ms):
|
||
|
PetaboxLoader3.resolve: 260.51 (4)
|
||
|
exclusion.robots.policy: 0.342
|
||
|
load_resource: 714.677
|
||
|
esindex: 0.007
|
||
|
exclusion.robots: 0.353
|
||
|
captures_list: 233.362
|
||
|
LoadShardBlock: 205.234 (3)
|
||
|
CDXLines.iter: 14.759 (3)
|
||
|
PetaboxLoader3.datanode: 585.283 (4)
|
||
|
RedisCDXSource: 7.714
|
||
|
-->
|