Create a virtual environment.

This step assume you have installed miniconda following the instructions here:
https://github.com/kaust-rccl/ibex-miniconda-install

The first to clone the git repo is to create a corresponding conda environment for SEML; let's create this environment in the scratch partition for the user and add some useful libraries.

cd /ibex/scratch/$USER
git clone https://github.com/TUM-DAML/seml.git
cd seml
export ENV_PREFIX=$PWD/env

Option 1: pure conda environment

This option will create an environment that runs SEML 0.3.5 with python 3.9. SEML package is not updated on the conda channel, so it needs very specifically python 3.9.

mamba env create -f environment.yaml -p $ENV_PREFIX --force
conda activate $ENV_PREFIX

Here is the environment.yaml file :

name: null
channels:
  - conda-forge
  - defaults
dependencies:
  - _libgcc_mutex=0.1
  - _openmp_mutex=4.5
  - aiohttp=3.8.3
  - aiosignal=1.3.1
  - alsa-lib=1.2.8
  - anyio=3.6.2
  - argon2-cffi=21.3.0
  - argon2-cffi-bindings=21.2.0
  - asttokens=2.1.0
  - async-timeout=4.0.2
  - attr=2.5.1
  - attrs=22.1.0
  - babel=2.11.0
  - backcall=0.2.0
  - backports=1.0
  - backports.functools_lru_cache=1.6.4
  - beautifulsoup4=4.11.1
  - bleach=5.0.1
  - bokeh=3.0.1
  - boost-cpp=1.74.0
  - brotli=1.0.9
  - brotli-bin=1.0.9
  - brotlipy=0.7.0
  - bzip2=1.0.8
  - c-ares=1.18.1
  - ca-certificates=2022.9.24
  - certifi=2022.9.24
  - cffi=1.15.1
  - charset-normalizer=2.1.1
  - colorama=0.4.6
  - contourpy=1.0.6
  - cryptography=38.0.3
  - cycler=0.11.0
  - dbus=1.13.6
  - debugpy=1.6.3
  - decorator=5.1.1
  - defusedxml=0.7.1
  - docopt=0.6.2
  - entrypoints=0.4
  - executing=1.2.0
  - expat=2.5.0
  - fftw=3.3.10
  - flit-core=3.8.0
  - font-ttf-dejavu-sans-mono=2.37
  - font-ttf-inconsolata=3.000
  - font-ttf-source-code-pro=2.038
  - font-ttf-ubuntu=0.83
  - fontconfig=2.14.1
  - fonts-conda-ecosystem=1
  - fonts-conda-forge=1
  - fonttools=4.38.0
  - freetype=2.12.1
  - frozenlist=1.3.3
  - gettext=0.21.1
  - gitdb=4.0.9
  - gitpython=3.1.29
  - glib=2.74.1
  - glib-tools=2.74.1
  - gst-plugins-base=1.21.2
  - gstreamer=1.21.2
  - gstreamer-orc=0.4.33
  - icu=70.1
  - idna=3.4
  - importlib-metadata=5.0.0
  - importlib_metadata=5.0.0
  - importlib_resources=5.10.0
  - ipykernel=6.17.1
  - ipython=8.6.0
  - ipython_genutils=0.2.0
  - jack=1.9.21
  - jedi=0.18.1
  - jinja2=3.1.2
  - jpeg=9e
  - json5=0.9.5
  - jsonpickle=1.5.1
  - jsonschema=4.17.0
  - jupyter-server-proxy=3.2.2
  - jupyter_client=7.4.5
  - jupyter_core=5.0.0
  - jupyter_server=1.23.1
  - jupyterlab=3.5.0
  - jupyterlab-nvdashboard=0.7.0
  - jupyterlab_pygments=0.2.2
  - jupyterlab_server=2.16.3
  - keyutils=1.6.1
  - kiwisolver=1.4.4
  - krb5=1.19.3
  - lame=3.100
  - lcms2=2.14
  - ld_impl_linux-64=2.39
  - lerc=4.0.0
  - libabseil=20220623.0
  - libblas=3.9.0
  - libbrotlicommon=1.0.9
  - libbrotlidec=1.0.9
  - libbrotlienc=1.0.9
  - libcap=2.66
  - libcblas=3.9.0
  - libclang=15.0.6
  - libclang13=15.0.6
  - libcups=2.3.3
  - libcurl=7.86.0
  - libdb=6.2.32
  - libdeflate=1.14
  - libedit=3.1.20191231
  - libev=4.33
  - libevent=2.1.10
  - libffi=3.4.2
  - libflac=1.4.2
  - libgcc-ng=12.2.0
  - libgcrypt=1.10.1
  - libgfortran-ng=12.2.0
  - libgfortran5=12.2.0
  - libglib=2.74.1
  - libgomp=12.2.0
  - libgpg-error=1.45
  - libiconv=1.17
  - liblapack=3.9.0
  - libllvm15=15.0.6
  - libnghttp2=1.47.0
  - libnsl=2.0.0
  - libogg=1.3.4
  - libopenblas=0.3.21
  - libopus=1.3.1
  - libpng=1.6.39
  - libpq=15.1
  - libsndfile=1.1.0
  - libsodium=1.0.18
  - libsqlite=3.40.0
  - libssh2=1.10.0
  - libstdcxx-ng=12.2.0
  - libsystemd0=252
  - libtiff=4.4.0
  - libtool=2.4.6
  - libudev1=252
  - libuuid=2.32.1
  - libvorbis=1.3.7
  - libwebp-base=1.2.4
  - libxcb=1.13
  - libxkbcommon=1.0.3
  - libxml2=2.10.3
  - libzlib=1.2.13
  - lz4-c=1.9.3
  - markupsafe=2.1.1
  - matplotlib=3.6.2
  - matplotlib-base=3.6.2
  - matplotlib-inline=0.1.6
  - mistune=2.0.4
  - mongodb=6.0.2
  - mpg123=1.31.1
  - multidict=6.0.2
  - munch=2.5.0
  - munkres=1.1.4
  - mysql-common=8.0.31
  - mysql-libs=8.0.31
  - nbclassic=0.4.8
  - nbclient=0.7.0
  - nbconvert=7.2.4
  - nbconvert-core=7.2.4
  - nbconvert-pandoc=7.2.4
  - nbformat=5.7.0
  - ncurses=6.3
  - nest-asyncio=1.5.6
  - notebook=6.5.2
  - notebook-shim=0.2.2
  - nspr=4.35
  - nss=3.82
  - numpy=1.23.4
  - openjpeg=2.5.0
  - openssl=3.0.7
  - packaging=21.3
  - pandas=1.5.1
  - pandoc=2.19.2
  - pandocfilters=1.5.0
  - parso=0.8.3
  - pcre=8.45
  - pcre2=10.40
  - pexpect=4.8.0
  - pickleshare=0.7.5
  - pillow=9.2.0
  - pip=22.3.1
  - pkgutil-resolve-name=1.3.10
  - platformdirs=2.5.2
  - ply=3.11
  - prometheus_client=0.15.0
  - prompt-toolkit=3.0.32
  - psutil=5.9.4
  - pthread-stubs=0.4
  - ptyprocess=0.7.0
  - pulseaudio=16.1
  - pure_eval=0.2.2
  - py-cpuinfo=9.0.0
  - pycparser=2.21
  - pygments=2.13.0
  - pymongo=3.13.0
  - pynvml=11.4.1
  - pyopenssl=22.1.0
  - pyparsing=3.0.9
  - pyqt=5.15.7
  - pyqt5-sip=12.11.0
  - pyrsistent=0.19.2
  - pysocks=1.7.1
  - python=3.9.13
  - python-dateutil=2.8.2
  - python-fastjsonschema=2.16.2
  - python_abi=3.9
  - pytz=2022.6
  - pyyaml=6.0
  - pyzmq=24.0.1
  - qt-main=5.15.6
  - readline=8.1.2
  - requests=2.28.1
  - sacred=0.8.2
  - seml=0.3.5
  - send2trash=1.8.0
  - setuptools=65.5.1
  - simpervisor=0.4
  - sip=6.7.5
  - six=1.16.0
  - smmap=3.0.5
  - snappy=1.1.9
  - sniffio=1.3.0
  - soupsieve=2.3.2.post1
  - sqlite=3.40.0
  - stack_data=0.6.1
  - terminado=0.17.0
  - tinycss2=1.2.1
  - tk=8.6.12
  - toml=0.10.2
  - tomli=2.0.1
  - tornado=6.2
  - tqdm=4.64.1
  - traitlets=5.5.0
  - typing-extensions=4.4.0
  - typing_extensions=4.4.0
  - tzdata=2022f
  - unicodedata2=15.0.0
  - urllib3=1.26.11
  - wcwidth=0.2.5
  - webencodings=0.5.1
  - websocket-client=1.4.2
  - wheel=0.38.4
  - wrapt=1.14.1
  - xcb-util=0.4.0
  - xcb-util-image=0.4.0
  - xcb-util-keysyms=0.4.0
  - xcb-util-renderutil=0.3.9
  - xcb-util-wm=0.4.1
  - xorg-libxau=1.0.9
  - xorg-libxdmcp=1.1.3
  - xyzservices=2022.9.0
  - xz=5.2.6
  - yaml=0.2.5
  - yaml-cpp=0.7.0
  - yarl=1.8.1
  - zeromq=4.3.4
  - zipp=3.10.0
  - zstd=1.5.2

Option 2: Use conda and pip to install the latest SEML version

This option will create a conda environment with the latest version of SEML that comes from pip installation and python 3.11. However, it will also need an extra step to fix a numpy bug inside of a library. Brace yourself; we start building the conda environment.

mamba env create -f environment.yaml -p $ENV_PREFIX --force
conda activate $ENV_PREFIX

This environment. yaml looks like this :

name: null
channels:
  - conda-forge
  - defaults
dependencies:
  - _libgcc_mutex=0.1
  - _openmp_mutex=4.5
  - aiofiles=22.1.0
  - aiohttp=3.8.4
  - aiosignal=1.3.1
  - aiosqlite=0.18.0
  - alsa-lib=1.2.8
  - anyio=3.6.2
  - argon2-cffi=21.3.0
  - argon2-cffi-bindings=21.2.0
  - asttokens=2.2.1
  - async-timeout=4.0.2
  - attr=2.5.1
  - attrs=22.2.0
  - babel=2.11.0
  - backcall=0.2.0
  - backports=1.0
  - backports.functools_lru_cache=1.6.4
  - beautifulsoup4=4.11.2
  - bleach=6.0.0
  - bokeh=3.0.3
  - brotli=1.0.9
  - brotli-bin=1.0.9
  - brotlipy=0.7.0
  - bzip2=1.0.8
  - ca-certificates=2022.12.7
  - cairo=1.16.0
  - cffi=1.15.1
  - comm=0.1.2
  - contourpy=1.0.7
  - cryptography=39.0.1
  - cycler=0.11.0
  - dbus=1.13.6
  - debugpy=1.6.6
  - decorator=5.1.1
  - defusedxml=0.7.1
  - entrypoints=0.4
  - executing=1.2.0
  - expat=2.5.0
  - fftw=3.3.10
  - flit-core=3.8.0
  - font-ttf-dejavu-sans-mono=2.37
  - font-ttf-inconsolata=3.000
  - font-ttf-source-code-pro=2.038
  - font-ttf-ubuntu=0.83
  - fontconfig=2.14.2
  - fonts-conda-ecosystem=1
  - fonts-conda-forge=1
  - fonttools=4.38.0
  - freetype=2.12.1
  - frozenlist=1.3.3
  - gettext=0.21.1
  - glib=2.74.1
  - glib-tools=2.74.1
  - graphite2=1.3.13
  - gst-plugins-base=1.22.0
  - gstreamer=1.22.0
  - gstreamer-orc=0.4.33
  - harfbuzz=6.0.0
  - icu=70.1
  - idna=3.4
  - importlib-metadata=6.0.0
  - importlib_metadata=6.0.0
  - importlib_resources=5.10.2
  - ipykernel=6.21.2
  - ipython=8.10.0
  - ipython_genutils=0.2.0
  - jack=1.9.22
  - jedi=0.18.2
  - jinja2=3.1.2
  - jpeg=9e
  - json5=0.9.5
  - jsonschema=4.17.3
  - jupyter-server-proxy=3.2.2
  - jupyter_client=8.0.2
  - jupyter_core=5.2.0
  - jupyter_events=0.5.0
  - jupyter_server=2.2.1
  - jupyter_server_fileid=0.6.0
  - jupyter_server_terminals=0.4.4
  - jupyter_server_ydoc=0.6.1
  - jupyter_ydoc=0.2.2
  - jupyterlab=3.6.1
  - jupyterlab-nvdashboard=0.7.0
  - jupyterlab_pygments=0.2.2
  - jupyterlab_server=2.19.0
  - keyutils=1.6.1
  - kiwisolver=1.4.4
  - krb5=1.20.1
  - lame=3.100
  - lcms2=2.14
  - ld_impl_linux-64=2.40
  - lerc=4.0.0
  - libblas=3.9.0
  - libbrotlicommon=1.0.9
  - libbrotlidec=1.0.9
  - libbrotlienc=1.0.9
  - libcap=2.66
  - libcblas=3.9.0
  - libclang=15.0.7
  - libclang13=15.0.7
  - libcups=2.3.3
  - libdb=6.2.32
  - libdeflate=1.17
  - libedit=3.1.20191231
  - libevent=2.1.10
  - libffi=3.4.2
  - libflac=1.4.2
  - libgcc-ng=12.2.0
  - libgcrypt=1.10.1
  - libgfortran-ng=12.2.0
  - libgfortran5=12.2.0
  - libglib=2.74.1
  - libgomp=12.2.0
  - libgpg-error=1.46
  - libiconv=1.17
  - liblapack=3.9.0
  - libllvm15=15.0.7
  - libnsl=2.0.0
  - libogg=1.3.4
  - libopenblas=0.3.21
  - libopus=1.3.1
  - libpng=1.6.39
  - libpq=15.2
  - libsndfile=1.2.0
  - libsodium=1.0.18
  - libsqlite=3.40.0
  - libstdcxx-ng=12.2.0
  - libsystemd0=252
  - libtiff=4.5.0
  - libtool=2.4.7
  - libudev1=252
  - libuuid=2.32.1
  - libvorbis=1.3.7
  - libwebp-base=1.2.4
  - libxcb=1.13
  - libxkbcommon=1.0.3
  - libxml2=2.10.3
  - libzlib=1.2.13
  - lz4-c=1.9.4
  - markupsafe=2.1.2
  - matplotlib=3.6.3
  - matplotlib-base=3.6.3
  - matplotlib-inline=0.1.6
  - mistune=2.0.5
  - mpg123=1.31.2
  - multidict=6.0.4
  - munkres=1.1.4
  - mysql-common=8.0.32
  - mysql-libs=8.0.32
  - nbclassic=0.5.1
  - nbclient=0.7.2
  - nbconvert=7.2.9
  - nbconvert-core=7.2.9
  - nbconvert-pandoc=7.2.9
  - nbformat=5.7.3
  - ncurses=6.3
  - nest-asyncio=1.5.6
  - notebook=6.5.2
  - notebook-shim=0.2.2
  - nspr=4.35
  - nss=3.88
  - numpy=1.24.2
  - openjpeg=2.5.0
  - openssl=3.0.8
  - packaging=23.0
  - pandoc=2.19.2
  - pandocfilters=1.5.0
  - parso=0.8.3
  - pcre2=10.40
  - pexpect=4.8.0
  - pickleshare=0.7.5
  - pillow=9.4.0
  - pip=23.0
  - pixman=0.40.0
  - pkgutil-resolve-name=1.3.10
  - platformdirs=3.0.0
  - ply=3.11
  - prometheus_client=0.16.0
  - prompt-toolkit=3.0.36
  - psutil=5.9.4
  - pthread-stubs=0.4
  - ptyprocess=0.7.0
  - pulseaudio=16.1
  - pure_eval=0.2.2
  - pycparser=2.21
  - pygments=2.14.0
  - pynvml=11.4.1
  - pyopenssl=23.0.0
  - pyparsing=3.0.9
  - pyqt=5.15.7
  - pyqt5-sip=12.11.0
  - pyrsistent=0.19.3
  - pysocks=1.7.1
  - python=3.11.0
  - python-dateutil=2.8.2
  - python-fastjsonschema=2.16.2
  - python-json-logger=2.0.4
  - python_abi=3.11
  - pytz=2022.7.1
  - pyyaml=6.0
  - pyzmq=25.0.0
  - qt-main=5.15.8
  - readline=8.1.2
  - requests=2.28.2
  - send2trash=1.8.0
  - setuptools=67.1.0
  - simpervisor=0.4
  - sip=6.7.7
  - six=1.16.0
  - sniffio=1.3.0
  - soupsieve=2.3.2.post1
  - stack_data=0.6.2
  - terminado=0.17.1
  - tinycss2=1.2.1
  - tk=8.6.12
  - toml=0.10.2
  - tomli=2.0.1
  - tornado=6.2
  - traitlets=5.9.0
  - typing-extensions=4.4.0
  - typing_extensions=4.4.0
  - tzdata=2022g
  - urllib3=1.26.14
  - wcwidth=0.2.6
  - webencodings=0.5.1
  - websocket-client=1.5.1
  - wheel=0.38.4
  - xcb-util=0.4.0
  - xcb-util-image=0.4.0
  - xcb-util-keysyms=0.4.0
  - xcb-util-renderutil=0.3.9
  - xcb-util-wm=0.4.1
  - xorg-kbproto=1.0.7
  - xorg-libice=1.0.10
  - xorg-libsm=1.2.3
  - xorg-libx11=1.7.2
  - xorg-libxau=1.0.9
  - xorg-libxdmcp=1.1.3
  - xorg-libxext=1.3.4
  - xorg-libxrender=0.9.10
  - xorg-renderproto=0.11.1
  - xorg-xextproto=7.3.0
  - xorg-xproto=7.0.31
  - xyzservices=2022.9.0
  - xz=5.2.6
  - y-py=0.5.5
  - yaml=0.2.5
  - yarl=1.8.2
  - ypy-websocket=0.8.2
  - zeromq=4.3.4
  - zipp=3.13.0
  - zlib=1.2.13
  - zstd=1.5.2
  - pip:
    - certifi==2022.12.7
    - charset-normalizer==3.0.1
    - colorama==0.4.6
    - dnspython==2.3.0
    - docopt==0.6.2
    - gitdb==4.0.10
    - gitpython==3.1.30
    - jsonpickle==1.5.2
    - munch==2.5.0
    - pandas==1.5.3
    - py-cpuinfo==9.0.0
    - pymongo==4.3.3
    - sacred==0.8.4
    - seml==0.3.7
    - smmap==5.0.0
    - tqdm==4.64.1
    - wrapt==1.14.1

Fix the bug for numpy:

If you try to run SEML using this environment, you will get all your experiments killed and the following error related to numpy:

 /ibex/scratch/barradd/seml_2/seml/env2/lib/python3.11/site-packages/jsonpickle/ext/numpy.py:139: FutureWarning: In the future `np.object` will be defined as the corresponding NumPy scalar.
   if obj.dtype == np.object:
 2023-02-13 14:40:07 (WARNING): An error ocurred in the '<sacred.observers.mongo.MongoObserver object at 0x2ac9012ca5d0>' observer: module 'numpy' has no attribute 'object'.
 `np.object` was a deprecated alias for the builtin `object`. To avoid this error in existing code, use `object` by itself. Doing this will not modify any behavior and is safe.
 The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
     https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
   File "/ibex/scratch/barradd/seml_2/seml/env2/lib/python3.11/site-packages/jsonpickle/ext/numpy.py", line 139, in flatten
     if obj.dtype == np.object:
                     ^^^^^^^^^

To fix it, you will have to edit the file numpy that s on that site package and issue your favorite text editor:

vi $ENV_PREFIX/lib/python3.11/site-packages/jsonpickle/ext/numpy.py

Then you have to change line 139, and you will also see a warning about the same bug, note that this is the correct line if obj.dtype == "object":

        else:
             # encode as binary
             if obj.dtype == "object": ## this is correct way
             #if obj.dtype == np.object: ## this is not supported anymore
             
                 # There's a bug deep in the bowels of numpy that causes a
                 # segfault when round-tripping an ndarray of dtype object.
                 # E.g., the following will result in a segfault:
                 #     import numpy as np
                 #     arr = np.array([str(i) for i in range(3)],
                 #                    dtype=np.object)
                 #     dtype = arr.dtype
                 #     shape = arr.shape
                 #     buf = arr.tobytes()
                 #     del arr
                 #     arr = np.ndarray(buffer=buf, dtype=dtype,
                 #                      shape=shape).copy()
                 # So, save as a binary-encoded list in this case

Setup MongoDB using a container

You can do this step on any folder, even your home directory

Clone the git repository containing the Singularity definition file to create the image:

git clone https://github.com/singularityhub/mongo.git

Submit a job to make the singularity image, and the following is an example of the job script

#!/bin/bash
#SBATCH --time=01:00:00
#SBATCH --ntasks=1

module load singularity
cd $HOME/mongo
export XDG_RUNTIME_DIR=$HOME
singularity build --fakeroot mongo.sif Singularity

Successful completion should result in creating a singularity image file mongo.sif.

Launch the MongoDB

You can now copy the image file anywhere you want. The jobs are managed by SEML using MongoDB. A CPU partition must be assigned to the mongo database you will use. You also need a data folder to bind the instance to someplace.

If the mongo instance is killed due to time limits, you will have to launch another, but you may lose the files stored in them.

#!/bin/bash 
#SBATCH --time=04:00:00
#SBATCH --nodes=1
#SBATCH --partition=batch 
#SBATCH --job-name=mongoDB
#SBATCH --mail-type=ALL
#SBATCH --output=%x-%j-slurm.out
#SBATCH --error=%x-%j-slurm.err

module load singularity
export IP_ADDR=$(ifconfig ib0 | grep -w inet | cut -d " " -f 10)
echo IP_ADDRESS=$IP_ADDR
mkdir -p $PWD/data
singularity run $PWD/mongo.sif mongod --noauth --bind_ip localhost,${IP_ADDR} --dbpath=$PWD/data

Create a user on the database.

Inside the out file generated by the script, you will find the IP address you need to connect to the mongo instance; in this example, use head to see the IP_ADDRESS where the database is running.

head mongoDB-22912150-slurm.out
IP_ADDRESS=10.109.201.49

Use an interactive session to try the connection and access the mongosh interface

srun --time=02:00:00 --nodes=1 --pty singularity exec -B $PWD/data:/data/db $PWD/container/mongo.sif mongosh --host 10.109.201.49

After the resources are allocated, you will see the output like this below:

srun: job 22878644 queued and waiting for resources
srun: job 22878644 has been allocated resources
Current Mongosh Log ID:	6374e621ec85174afd042398
Connecting to:		mongodb://10.109.197.13:27017/?directConnection=true&appName=mongosh+1.6.0
Using MongoDB:		6.0.2
Using Mongosh:		1.6.0

For mongosh info see: https://docs.mongodb.com/mongodb-shell/


To help improve our products, anonymous usage data is collected and sent to MongoDB periodically (https://www.mongodb.com/legal/privacy-policy).
You can opt-out by running the disableTelemetry() command.

------
   The server generated these startup warnings when booting
   2022-11-16T16:16:36.057+03:00: /sys/kernel/mm/transparent_hugepage/enabled is 'always'. We suggest setting it to 'never'
   2022-11-16T16:16:36.058+03:00: /sys/kernel/mm/transparent_hugepage/defrag is 'always'. We suggest setting it to 'never'
   2022-11-16T16:16:36.058+03:00: vm.max_map_count is too low
------

------
   Enable MongoDB's free cloud-based monitoring service, which will then receive and display
   metrics about your deployment (disk utilization, CPU, operation statistics, etc).
   
   The monitoring data will be available on a MongoDB website with a unique URL accessible to you
   and anyone you share the URL with. MongoDB may use this information to make product
   improvements and to suggest MongoDB products and deployment options to you.
   
   To enable free monitoring, run the following command: db.enableFreeMonitoring()
   To permanently disable this reminder, run the following command: db.disableFreeMonitoring()
------

test>

Here you see the assigned port 27017, the database name test, and your shell command prompt from mongo test>. Next, you have to create a user and a password in the database using the following line

test> db.createUser ( { user:"barradd" , pwd:"test1" , roles:[ { role:"readWrite" , db:"test" }, {role:"readWrite" ,db:"admin"} ] } )

This command will create the user barradd with the password test1 and give it roles in the admin and test databases. You can modify this parameter as you like or create a new database for your project.

After this step, you can stop this interactive session

If you whish to see more command available for the mongosh interface you can type

singularity run $PWD/container/mongo.sif mongosh --help

Running the SEML configuration and setup

The next step is to create the corresponding setup for SEML. You need to activate the conda environment,

conda activate $ENV_PREFIX

Since the SEML library is active, it provides a CLI to use it. So first you type seml configure

Here you have to provide the corresponding data; the host is the IP address; you can leave the default port (unless you change it in previous steps), the user, passwords, and the database name you created during the last step.

seml configure
Configuring SEML. Warning: Password will be stored in plain text.
Please input the MongoDB host: 10.109.201.49
Port (default: 27017):
Please input the database name: test
Please input the user name: barradd
Please input the password: 
Saving the following configuration to /home/barradd/.config/seml/mongodb.config:
username: barradd
password: ********
port: 27017
database: test
host: 10.109.201.49

Changing the setup for ibex

As indicated by the authors, you can explore several options by looking at the file in the GitHub repo examples/example_config.yaml.

# Experiment configuration file.
#
# There are two special blocks. The 'seml' block is required for every experiment.
# It has to contain the following values:
# executable: Name of the Python script containing the experiment. The path should be relative to the `project_root_dir`.
# For backward compatibility SEML also supports paths relative to the location of the config file.
# In case there are files present both relative to the project root and the config file,
# the former takes precedence.
# It can optionally also contain the following values:
# name: Prefix for output file and Slurm job name. Default: Collection name
# output_dir: Directory to store log files in. Default: Current directory
# conda_environment: Specifies which Anaconda virtual environment will be activated before the experiment is executed.
# Default: The environment used when queuing.
# project_root_dir: (Relative or absolute) path to the root of the project. seml will then upload all the source
# files imported by the experiment to the MongoDB. Moreover, the uploaded source files will be
# downloaded before starting an experiment, so any changes to the source files in the project
# between queueing and starting the experiment will have no effect.
#
# The special 'slurm' block contains the slurm parameters. This block and all values are optional. Possible values are:
# experiments_per_job: Number of parallel experiments to run in each Slurm job.
# Note that only experiments from the same batch share a job. Default: 1
# max_simultaneous_jobs: Maximum number of simultaneously running Slurm jobs per job array. Default: No restriction
# sbatch_options_template: Name of a custom template of `SBATCH` options. Define your own templates in `settings.py`
# under `SBATCH_OPTIONS_TEMPLATES`, e.g. for long-running jobs, CPU-only jobs, etc.
# sbatch_options: dictionary that contains custom values that will be passed to `sbatch`, specifying e.g.
# the memory and number of GPUs to be allocated (prepended dashes are not required). See
# https://slurm.schedmd.com/sbatch.html for all possible options.
#
# Parameters under 'fixed' will be used for all the experiments.
#
# Under 'grid' you can define parameters that should be sampled from a regular grid. Options are:
# - choice: List the different values you want to evaluate under 'choices' as in the example below.
# - range: Specify the min, max, and step. Parameter values will be generated using np.arange(min, max, step).
# - uniform: Specify the min, max, and num. Parameter values will be generated using
# np.linspace(min, max, num, endpoint=True)
# - loguniform: Specify min, max, and num. Parameter values will be uniformly generated in log space (base 10).
#
# Under 'random' you can specify parameters for which you want to try several random values. Specify the number
# of samples per parameter with the 'samples' value as in the examples below.
# Specify the the seed under the 'random' dict or directly for the desired parameter(s).
# Supported parameter types are:
# - choice: Randomly samples <samples> entries (with replacement) from the list in parameter['options']
# - uniform: Uniformly samples between 'min' and 'max' as specified in the parameter dict.
# - loguniform: Uniformly samples in log space between 'min' and 'max' as specified in the parameter dict.
# - randint: Randomly samples integers between 'min' (included) and 'max' (excluded).
#
# The configuration file can be nested (as the example below) so that we can run different parameter sets
# e.g. for different datasets or models.
# We take the cartesian product of all `grid` parameters on a path and sample all random parameters on the path.
# The number of random parameters sampled will be max{n_samples} of all n_samples on the path. This is done because
# we need the same number of samples from all random parameters in a configuration.
#
# More specific settings (i.e., further down the hierarchy) always overwrite more general ones.

Change the `example_config.yaml` file

Here, we work with the examples/tutorial/example_config.yaml file that does not contains the whole explanation about the configuration settings. To be very explicit with the interface, you can put the complete path we have on the $ENV_PREFIX variable to add the conda environment that SEML will use.

seml:
  executable: examples/example_experiment.py
  name: example_experiment
  output_dir: examples/logs
  project_root_dir: ..
  conda_environment: /ibex/scratch/barradd/seml/env

Change the `slurm_template.sh`

A necessary step is that We need to edit the slur template that is going to be used by the library to launch the job arrays. This will be deep in the libraries of the environment:

vi $ENV_PREFIX/lib/python3.9/site-packages/seml/slurm_template.sh

Then we modify line 1 to #!/bin/bash --login and line 14 and change it to line 15 to source $CONDA_BASE/bin/activate

#!/bin/bash --login
{sbatch_options}

# Move either to project root dir or the config file path.
cd {working_dir}

# Print job information
echo "Starting job ${{SLURM_JOBID}}"
echo "SLURM assigned me the node(s): $(squeue -j ${{SLURM_JOBID}} -O nodelist:1000 | tail -n +2 | sed -e 's/[[:space:]]*$//')"

# Activate Anaconda environment
if {use_conda_env}; then
    CONDA_BASE=$(conda info --base)
    #source $CONDA_BASE/etc/profile.d/conda.sh
    source $CONDA_BASE/bin/activate ## this activates base in Ibex
    conda activate {conda_env}
fi

echo "Using Python executable: $(which python)"

# Chunked list with all experiment IDs
all_exp_ids=({exp_ids})

# Get experiment IDs for this Slurm task
exp_ids_str="${{all_exp_ids[$SLURM_ARRAY_TASK_ID]}}"
IFS=";" read -r -a exp_ids <<< "$exp_ids_str"

# Create directory for the source files in MongoDB
if {with_sources}; then
    tmpdir="/tmp/$(uuidgen)"  # unique temp dir based on UUID
    mkdir $tmpdir
    # Prepend the temp dir to $PYTHONPATH so it will be used by python.
    export PYTHONPATH="$tmpdir:$PYTHONPATH"
fi

# Start experiments in separate processes
process_ids=()
for exp_id in "${{exp_ids[@]}}"; do
    cmd=$(python -c '{prepare_experiment_script}' --experiment_id ${{exp_id}} --db_collection_name {db_collection_name} {sources_argument} --verbose {verbose} --unobserved {unobserved} --debug-server {debug_server})

    ret=$?
    if [ $ret -eq 0 ]; then
        eval $cmd &
        process_ids+=($!)
    elif [ $ret -eq 1 ]; then
        echo "WARNING: Experiment with ID ${{exp_id}} does not have status PENDING and will not be run."
    elif [ $ret -eq 2 ]; then
        (>&2 echo "ERROR: Experiment with id ${{exp_id}} not found in the database.")
    fi
done

# Print process information
echo "Experiments are running under the following process IDs:"
num_it=${{#process_ids[@]}}
for ((i=0; i<$num_it; i++)); do
    echo "Experiment ID: ${{exp_ids[$i]}}	Process ID: ${{process_ids[$i]}}"
done
echo

# Wait for all experiments to finish
wait

# Delete temporary source files
if {with_sources}; then
    rm -rf $tmpdir
fi

Run the experiments

Then you should be able to start your experiments as indicated on the tutorial demo with the corresponding bash commands.

seml seml_example add example_config.yaml
seml seml_example status
seml seml_example start

It should look like this:

(/ibex/scratch/barradd/seml/env) seml seml_tutorial add example_config.yaml
Adding 6 configs to the database (batch-ID 1).
(/ibex/scratch/barradd/seml/env) seml seml_tutorial start
Starting 6 experiments in 6 Slurm jobs in 1 Slurm job array.
(/ibex/scratch/barradd/seml/env) seml seml_tutorial status
********** Report for database collection 'seml_tutorial' **********
*     -   0 staged experiments
*     -   0 pending experiments
*     -   0 running experiments
*     -   6 completed experiments
*     -   0 interrupted experiments
*     -   0 failed experiments
*     -   0 killed experiments
********************************************************************

All the output (successful or not) will be on the logs folders seml/examples/logs