OPENBIS UGM 2019

Reproducible Data Analysis Workflows

Containers and Conda Hands-On


Connect to the VM

## ssh to the VM
ssh guest@IP
## ask for the password

Which OS?

cat /etc/centos-release
## CentOS Linux release 7.6.1810 (Core)

Containers

Docker - Version

docker --version
## Docker version 18.09.6, build 481bc77156

docker info
## Cannot connect to the Docker daemon at ... Is the docker daemon running?

Start docker deamon

sudo systemctl restart docker

docker info
## Got permission denied while trying to connect to the Docker daemon socket at ...

Docker: you need root access

sudo docker info

Docker Images and Containers

sudo docker run hello-world

## list all the files in the current directory
ls -lt
## list all docker images
sudo docker image ls            ## docker images
sudo docker image ls --help
sudo docker image ls --all
## list all containers (pay attention at the ID, ...)
sudo docker container ls --all  ## docker ps

Docker Pull

sudo docker pull godlovedc/lolcow
sudo docker images -a
sudo docker ps -a

Docker Run and Remove

sudo docker tag godlovedc/lolcow lolcow # tag
sudo docker images  # new image
sudo docker run lolcow 
sudo docker ps -a          ## a new container
sudo docker run -it lolcow ## run interactively

Remove docker images and containers

sudo docker rmi godlovedc/lolcow ## remove image
sudo docker rm ID ## copy the ID of hello-world
sudo docker images
sudo docker ps -a

Docker Entrypoint

## run bash in the container (different entrypoint)
sudo docker run -it --entrypoint bash lolcow

## In the container
ls
env
cat /etc/lsb-release  # Ubuntu
exit

Remove all containers

sudo docker ps -a -q             ## list all IDs
sudo docker rm $(sudo docker ps -a -q)
sudo docker ps -a
sudo docker run -it --rm lolcow  ## remove container at exit

Start and Push to the Docker Local Registry

sudo docker run -d -p 5000:5000 \
  --restart=always --name registry registry:2
sudo docker tag lolcow localhost:5000/lolcow
sudo docker images

## push the image to the local registry
sudo docker push localhost:5000/lolcow

Pull from the Docker Local Registry

sudo docker rmi lolcow ## remove lolcow
sudo docker rmi localhost:5000/lolcow 
sudo docker images

## pull the image from the local registry
sudo docker pull localhost:5000/lolcow
sudo docker run -it --rm localhost:5000/lolcow
sudo docker images
sudo docker ps -a
sudo docker rmi localhost:5000/lolcow

Singularity Version and Build Singularity Containers

Quick Start

singularity --version
## 2.6.1-HEAD.9103f01
## build from shub, dockerhub and local registry
singularity build shub-lolcow.simg \
  shub://GodloveD/lolcow
singularity build docker-lolcow.simg \
  docker://godlovedc/lolcow
sudo SINGULARITY_NOHTTPS=1 singularity build \
  local-lolcow.simg docker://localhost:5000/lolcow:latest

Run Singularity Containers

ls -lt 
singularity run shub-lolcow.simg
singularity run docker-lolcow.simg 
singularity run local-lolcow.simg 
singularity exec shub-lolcow.simg bash 
## In the container
ls   ## already bounded $HOME, /tmp, ..
env  ## already available the environment variable
cat /etc/lsb-release ## Ubuntu
exit

Conda

Conda Version

conda --version
## conda 4.6.14

Conda Update a Package, Create and List Environment

## update conda package 
conda update conda

## create a new environment with python 3.5
conda create --name py35 python=3.5

## list available environments
conda info --envs # conda env list

Conda Activate

## check base python
python --version
## Python 3.7.3

## activate the new environment
conda activate py35
python --version
## Python 3.5.6 :: Anaconda, Inc.

Conda List Packages and Export Environment

## list the installed packages in the env
conda list
conda list python

## install numpy in the current env
conda install numpy

## create an environment file
conda env export > environment.yml
cat environment.yml

Conda Deactivate, Remove and Create Environment

## deactivate the environment
conda deactivate

## remove the new environment
conda remove --name py35 --all
conda info --envs

## create the environment from environment.yml
conda env create -f environment.yml
conda activate py35

Conda Identical Environments: Export and Create

## identical conda environment
conda list --explicit > spec-file.txt
cat spec-file.txt 

## it is platform dependent
conda create --name py35_explicit --file spec-file.txt
conda info --envs

Conda - Manually Created Environment File

vim numpy_py35.yml ## emacs and nano are also available
  • Type i
  • copy /paste
    yml
    name: numpy_py35
    dependencies:
    - python=3.5
    - numpy
  • type esc+:wq+enter
    cat numpy_py35.yml
    conda env create -f numpy_py35.yml
    conda info --envs
    

Conda - Environment File Review

What you want:

manually created env.yaml

What you get:

conda env export

Precisely what you get

conda list --explicit

Reproducible Workflows

get data from openBIS via SFTP

mkdir workflow_example && cd workflow_example
lftp -p 2222 sftp://user@openbis-tst.ethz.ch ## change user
cd /UGM/WORKSHOP_3_REPRODUCIBLE_DATA_ANALYSIS_WORKFLOWS/WORKSHOP_3_REPRODUCIBLE_DATA_ANALYSIS_WORKFLOWS_EXP_1
get ./20190617170232225-2577/original/Dockerfile -o Dockerfile
get ./20190617171838273-2578/original/snakemake.simg -o snakemake.simg

cd ../WORKSHOP_3_REPRODUCIBLE_DATA_ANALYSIS_WORKFLOWS_EXP_2
get ./20190617182556444-2583/original/Snakefile -o Snakefile
exit

Build a Snakemake Docker Image

sudo docker build -f Dockerfile -t snakemake:latest .
sudo docker run -it --rm --entrypoint bash snakemake:latest 
exit
sudo docker run -it --rm snakemake:latest bash ## no entrypoint
exit

Convert it to a Singularity Image

## push to the local registry
sudo docker tag snakemake localhost:5000/snakemake
sudo docker push localhost:5000/snakemake

## Build Singularity Image
sudo SINGULARITY_NOHTTPS=1 singularity build \
  snakemake_local.simg docker://localhost:5000/snakemake:latest

Check Docker Container

sudo docker run -it --rm snakemake:latest bash

## In the container
cat /etc/centos-release
conda info --env
python --version
snakemake --version
ls   ## try to locate the workflow_example folder
env  ## environment variable
exit

Check Singularity Container

singularity exec snakemake.simg bash

## In the container
cat /etc/centos-release
conda info --env
python --version
snakemake --version
ls
env  ## environment variable
exit

Run the Singularity Image

singularity exec snakemake.simg snakemake -np
KeyError in line 11 of /home/guest/workflow_example/Snakefile:
'OPENBIS_USER'

export OPENBIS_USER=
export OPENBIS_PASSWORD= 
singularity exec snakemake.simg snakemake -np
singularity exec snakemake.simg snakemake -p
ls reports
rm -rf data/ data_processed/ reports/

Run the Docker Container:

sudo docker run -it --rm \
  -v ~/workflow_example:/workflow_example \  # mount volume
  -e OPENBIS_USER=$OPENBIS_USER \ ## pass environment variable
  -e OPENBIS_PASSWORD=$OPENBIS_PASSWORD \
  -w /workflow_example \ ## change working directory
    snakemake:latest snakemake -p
ls reports

Practice After the Workshop

  • Use a CentOS 7.6 Virtual Machine (vagrant)

Install Some Packages

sudo yum -y update 
sudo yum install -y vim emacs nano
sudo yum install -y graphviz
sudo yum install -y lftp

Install Docker CE

sudo yum install -y yum-utils \
  device-mapper-persistent-data lvm2

sudo yum-config-manager \
  --add-repo \
  https://download.docker.com/linux/centos/docker-ce.repo

sudo yum install -y docker-ce docker-ce-cli containerd.io
docker --version

Install Singularity

sudo yum groupinstall -y 'Development Tools'
sudo yum install -y libarchive-devel wget squashfs-tools

git clone --branch 2.6.1 https://github.com/sylabs/singularity.git
cd singularity/
./autogen.sh
./configure --prefix=/usr/local --sysconfdir=/etc
make 
sudo make install
cd ~/
rm -rf ~/singularity
singularity --version

Install Miniconda

wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh  
## Follow the instructions
rm ~/miniconda.sh
source .bashrc 

conda --version
python --version