haccfiles/pkgs/pluto/common.nix
stuebinm a00e28d85a
add pluto notebook server
Pluto [1] is one of these interactive notebook thingies that have become
so unreasonably popular with people doing machine learning or data
analysis, but – somewhat surprisingly – it's actually not shit (e.g. no
global mutable state in the notebook, no weird unreadable fileformat
that doesn't play well with version control, etc.)

In particular, it can be used collaboratively (while it doesn't do
real-time collaborative editing like a pad, it /does/ push out global
updates each time someone executes a cell, so it's reasonably close),
and I think it may be useful to have for julia-hacking sessions.

It may also be useful for people running low-end laptops, since code is
executed on the host — and I guess hainich has enough unused ressources
lying around that we can spare a few.

After deploying this, the notebook server should be reachable via:
  ssh hainich -L 9999:localhost:9999
and then visiting http://localhost:9999

Caveats: by design, pluto allows a user to execute arbitrary code on the
host. That is its main function, and not something we can prevent. I've
tried to mitigate this as far as possible by:
 - only allowing access via ssh port forwarding. In theory pluto does
   have basic access control, but that works via a secret link that
   it'll spit to stdout on startup (i.e. the journal), which cannot be
   set in advance, nor regenerted without restarting the entire process.
   Unfortunately, this means we won't be able to use it at e.g.
   conference sessions with people who don't have access to our infra
 - running it in a nixos-container as its own user, so it should never
   get any kind of access to the "main" directory tree apart from a
   single directory that we can keep notebooks in (which is currently a
   bind mount set to /data/pluto)
 - limiting memory and cpu for that container via systemd (less out of
   worry for exploits, and more so that a few accidental while-true
   loops will never consume enough cpu time to noticebly slow down
   anything else). The current limits for both a chosen relatively low;
   we'll have to see if they become too limiting should anyone run an
   actual weather model on this.

Things we could also do:
 - currently, the container does not have its own network (mostly since
   that would make it slightly less convenient to use with port
   forwarding); in theory, pluto should even be able to run entirely
   without internet access of its own, but I'm not sure if this would
   break things like loading images / raw data into a notebook
 - make the container ephemeral, and only keep the directory containing
   the notebooks. I haven't done this since it would require
   recompilation of pluto each time the container is wiped, which makes
   for a potentially inconvenient startup time (though still < 3-5 mins)

Questions:
 - have I missed anything important that should definitely be also
   sandboxed / limited in some way?
 - in general, are we comfortable running something like this?
 - would we (in principle) be comfortable opening this up to other
   people for congress sessions (assuming we figure out a reasonable
   access control)?

Notes to deployer:
 - while I have not tested this on hainich, it works on my own server
 - you will probably have to create the /data/pluto directory for the
   bind mount, and make it world-writable (or chown it to the pluto user
   inside the container)

[1] https://github.com/fonsp/Pluto.jl/
2021-08-26 21:27:49 +02:00

160 lines
4.7 KiB
Nix

{
callPackage,
curl,
fetchurl,
git,
stdenvNoCC,
cacert,
jq,
julia,
lib,
python3,
runCommand,
stdenv,
writeText,
makeWrapper,
# Arguments
makeWrapperArgs ? "",
precompile ? true,
extraBuildInputs ? []
}:
let
# We need to use a specially modified fetchgit that understands tree hashes, until
# https://github.com/NixOS/nixpkgs/pull/104714 lands
fetchgit = callPackage ./fetchgit {};
packages = callPackage ./packages.nix {};
### Repoify packages
# This step is needed because leaveDotGit is not reproducible
# https://github.com/NixOS/nixpkgs/issues/8567
repoified = map (item: if item.src == null then item else item // { src = repoify item.name item.treehash item.src; }) packages.closure;
repoify = name: treehash: src:
runCommand ''${name}-repoified'' {buildInputs = [git];} ''
mkdir -p $out
cp -r ${src}/. $out
cd $out
git init
git add . -f
git config user.email "julia2nix@localhost"
git config user.name "julia2nix"
git commit -m "Dummy commit"
if [[ -n "${treehash}" ]]; then
if [[ $(git cat-file -t ${treehash}) != "tree" ]]; then
echo "Couldn't find desired tree object for ${name} in repoify (${treehash})"
exit 1
fi
fi
'';
repoifiedReplaceInManifest = lib.filter (x: x.replaceUrlInManifest != null) repoified;
### Manifest.toml (processed)
manifestToml = runCommand "Manifest.toml" { buildInputs = [jq]; } ''
cp ${./Manifest.toml} ./Manifest.toml
echo ${writeText "packages.json" (lib.generators.toJSON {} repoifiedReplaceInManifest)}
cat ${writeText "packages.json" (lib.generators.toJSON {} repoifiedReplaceInManifest)} | jq -r '.[]|[.name, .replaceUrlInManifest, .src] | @tsv' |
while IFS=$'\t' read -r name replaceUrlInManifest src; do
sed -i "s|$replaceUrlInManifest|file://$src|g" ./Manifest.toml
done
cp ./Manifest.toml $out
'';
### Overrides.toml
fetchArtifact = x: stdenv.mkDerivation {
name = x.name;
src = fetchurl { url = x.url; sha256 = x.sha256; };
sourceRoot = ".";
dontConfigure = true;
dontBuild = true;
installPhase = "cp -r . $out";
dontFixup = true;
};
artifactOverrides = lib.zipAttrsWith (name: values: fetchArtifact (lib.head (lib.head values))) (
map (item: item.artifacts) packages.closure
);
overridesToml = runCommand "Overrides.toml" { buildInputs = [jq]; } ''
echo '${lib.generators.toJSON {} artifactOverrides}' | jq -r '. | to_entries | map ((.key + " = \"" + .value + "\"")) | .[]' > $out
'';
### Processed registry
generalRegistrySrc = repoify "julia-general" "" (fetchgit {
url = packages.registryUrl;
rev = packages.registryRev;
sha256 = packages.registrySha256;
branchName = "master";
});
registry = runCommand "julia-registry" { buildInputs = [(python3.withPackages (ps: [ps.toml])) jq git]; } ''
git clone ${generalRegistrySrc}/. $out
cd $out
cat ${writeText "packages.json" (lib.generators.toJSON {} repoified)} | jq -r '.[]|[.name, .path, .src] | @tsv' |
while IFS=$'\t' read -r name path src; do
# echo "Processing: $name, $path, $src"
if [[ "$path" != "null" ]]; then
python -c "import toml; \
packageTomlPath = '$path/Package.toml'; \
contents = toml.load(packageTomlPath); \
contents['repo'] = 'file://$src'; \
f = open(packageTomlPath, 'w'); \
f.write(toml.dumps(contents)); \
"
fi
done
export HOME=$(pwd)
git config --global user.email "julia-to-nix-depot@email.com"
git config --global user.name "julia-to-nix-depot script"
git add .
git commit -m "Switch to local package repos"
'';
depot = runCommand "julia-depot" {
buildInputs = [git curl julia] ++ extraBuildInputs;
inherit registry precompile;
} ''
export HOME=$(pwd)
echo "Using registry $registry"
echo "Using Julia ${julia}/bin/julia"
cp ${manifestToml} ./Manifest.toml
cp ${./Project.toml} ./Project.toml
mkdir -p $out/artifacts
cp ${overridesToml} $out/artifacts/Overrides.toml
export JULIA_DEPOT_PATH=$out
julia -e ' \
using Pkg
Pkg.Registry.add(RegistrySpec(path="${registry}"))
Pkg.activate(".")
Pkg.instantiate()
# Remove the registry to save space
Pkg.Registry.rm("General")
'
if [[ -n "$precompile" ]]; then
julia -e ' \
using Pkg
Pkg.activate(".")
Pkg.precompile()
'
fi
'';
in
runCommand "julia-env" {
inherit julia depot makeWrapperArgs;
buildInputs = [makeWrapper];
} ''
mkdir -p $out/bin
makeWrapper $julia/bin/julia $out/bin/julia --suffix JULIA_DEPOT_PATH : "$depot" $makeWrapperArgs
''