SEML on ibex
Create a virtual environment.
This step assume you have installed miniconda following the instructions here:
https://github.com/kaust-rccl/ibex-miniconda-install
The first to clone the git repo is to create a corresponding conda environment for SEML; let's create this environment in the scratch partition for the user and add some useful libraries.
cd /ibex/scratch/$USER
git clone https://github.com/TUM-DAML/seml.git
cd seml
export ENV_PREFIX=$PWD/env
Option 1: pure conda environment
This option will create an environment that runs SEML 0.3.5 with python 3.9. SEML package is not updated on the conda channel, so it needs very specifically python 3.9.
mamba env create -f environment.yaml -p $ENV_PREFIX --force
conda activate $ENV_PREFIX
Here is the environment.yaml file :
name: null
channels:
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1
- _openmp_mutex=4.5
- aiohttp=3.8.3
- aiosignal=1.3.1
- alsa-lib=1.2.8
- anyio=3.6.2
- argon2-cffi=21.3.0
- argon2-cffi-bindings=21.2.0
- asttokens=2.1.0
- async-timeout=4.0.2
- attr=2.5.1
- attrs=22.1.0
- babel=2.11.0
- backcall=0.2.0
- backports=1.0
- backports.functools_lru_cache=1.6.4
- beautifulsoup4=4.11.1
- bleach=5.0.1
- bokeh=3.0.1
- boost-cpp=1.74.0
- brotli=1.0.9
- brotli-bin=1.0.9
- brotlipy=0.7.0
- bzip2=1.0.8
- c-ares=1.18.1
- ca-certificates=2022.9.24
- certifi=2022.9.24
- cffi=1.15.1
- charset-normalizer=2.1.1
- colorama=0.4.6
- contourpy=1.0.6
- cryptography=38.0.3
- cycler=0.11.0
- dbus=1.13.6
- debugpy=1.6.3
- decorator=5.1.1
- defusedxml=0.7.1
- docopt=0.6.2
- entrypoints=0.4
- executing=1.2.0
- expat=2.5.0
- fftw=3.3.10
- flit-core=3.8.0
- font-ttf-dejavu-sans-mono=2.37
- font-ttf-inconsolata=3.000
- font-ttf-source-code-pro=2.038
- font-ttf-ubuntu=0.83
- fontconfig=2.14.1
- fonts-conda-ecosystem=1
- fonts-conda-forge=1
- fonttools=4.38.0
- freetype=2.12.1
- frozenlist=1.3.3
- gettext=0.21.1
- gitdb=4.0.9
- gitpython=3.1.29
- glib=2.74.1
- glib-tools=2.74.1
- gst-plugins-base=1.21.2
- gstreamer=1.21.2
- gstreamer-orc=0.4.33
- icu=70.1
- idna=3.4
- importlib-metadata=5.0.0
- importlib_metadata=5.0.0
- importlib_resources=5.10.0
- ipykernel=6.17.1
- ipython=8.6.0
- ipython_genutils=0.2.0
- jack=1.9.21
- jedi=0.18.1
- jinja2=3.1.2
- jpeg=9e
- json5=0.9.5
- jsonpickle=1.5.1
- jsonschema=4.17.0
- jupyter-server-proxy=3.2.2
- jupyter_client=7.4.5
- jupyter_core=5.0.0
- jupyter_server=1.23.1
- jupyterlab=3.5.0
- jupyterlab-nvdashboard=0.7.0
- jupyterlab_pygments=0.2.2
- jupyterlab_server=2.16.3
- keyutils=1.6.1
- kiwisolver=1.4.4
- krb5=1.19.3
- lame=3.100
- lcms2=2.14
- ld_impl_linux-64=2.39
- lerc=4.0.0
- libabseil=20220623.0
- libblas=3.9.0
- libbrotlicommon=1.0.9
- libbrotlidec=1.0.9
- libbrotlienc=1.0.9
- libcap=2.66
- libcblas=3.9.0
- libclang=15.0.6
- libclang13=15.0.6
- libcups=2.3.3
- libcurl=7.86.0
- libdb=6.2.32
- libdeflate=1.14
- libedit=3.1.20191231
- libev=4.33
- libevent=2.1.10
- libffi=3.4.2
- libflac=1.4.2
- libgcc-ng=12.2.0
- libgcrypt=1.10.1
- libgfortran-ng=12.2.0
- libgfortran5=12.2.0
- libglib=2.74.1
- libgomp=12.2.0
- libgpg-error=1.45
- libiconv=1.17
- liblapack=3.9.0
- libllvm15=15.0.6
- libnghttp2=1.47.0
- libnsl=2.0.0
- libogg=1.3.4
- libopenblas=0.3.21
- libopus=1.3.1
- libpng=1.6.39
- libpq=15.1
- libsndfile=1.1.0
- libsodium=1.0.18
- libsqlite=3.40.0
- libssh2=1.10.0
- libstdcxx-ng=12.2.0
- libsystemd0=252
- libtiff=4.4.0
- libtool=2.4.6
- libudev1=252
- libuuid=2.32.1
- libvorbis=1.3.7
- libwebp-base=1.2.4
- libxcb=1.13
- libxkbcommon=1.0.3
- libxml2=2.10.3
- libzlib=1.2.13
- lz4-c=1.9.3
- markupsafe=2.1.1
- matplotlib=3.6.2
- matplotlib-base=3.6.2
- matplotlib-inline=0.1.6
- mistune=2.0.4
- mongodb=6.0.2
- mpg123=1.31.1
- multidict=6.0.2
- munch=2.5.0
- munkres=1.1.4
- mysql-common=8.0.31
- mysql-libs=8.0.31
- nbclassic=0.4.8
- nbclient=0.7.0
- nbconvert=7.2.4
- nbconvert-core=7.2.4
- nbconvert-pandoc=7.2.4
- nbformat=5.7.0
- ncurses=6.3
- nest-asyncio=1.5.6
- notebook=6.5.2
- notebook-shim=0.2.2
- nspr=4.35
- nss=3.82
- numpy=1.23.4
- openjpeg=2.5.0
- openssl=3.0.7
- packaging=21.3
- pandas=1.5.1
- pandoc=2.19.2
- pandocfilters=1.5.0
- parso=0.8.3
- pcre=8.45
- pcre2=10.40
- pexpect=4.8.0
- pickleshare=0.7.5
- pillow=9.2.0
- pip=22.3.1
- pkgutil-resolve-name=1.3.10
- platformdirs=2.5.2
- ply=3.11
- prometheus_client=0.15.0
- prompt-toolkit=3.0.32
- psutil=5.9.4
- pthread-stubs=0.4
- ptyprocess=0.7.0
- pulseaudio=16.1
- pure_eval=0.2.2
- py-cpuinfo=9.0.0
- pycparser=2.21
- pygments=2.13.0
- pymongo=3.13.0
- pynvml=11.4.1
- pyopenssl=22.1.0
- pyparsing=3.0.9
- pyqt=5.15.7
- pyqt5-sip=12.11.0
- pyrsistent=0.19.2
- pysocks=1.7.1
- python=3.9.13
- python-dateutil=2.8.2
- python-fastjsonschema=2.16.2
- python_abi=3.9
- pytz=2022.6
- pyyaml=6.0
- pyzmq=24.0.1
- qt-main=5.15.6
- readline=8.1.2
- requests=2.28.1
- sacred=0.8.2
- seml=0.3.5
- send2trash=1.8.0
- setuptools=65.5.1
- simpervisor=0.4
- sip=6.7.5
- six=1.16.0
- smmap=3.0.5
- snappy=1.1.9
- sniffio=1.3.0
- soupsieve=2.3.2.post1
- sqlite=3.40.0
- stack_data=0.6.1
- terminado=0.17.0
- tinycss2=1.2.1
- tk=8.6.12
- toml=0.10.2
- tomli=2.0.1
- tornado=6.2
- tqdm=4.64.1
- traitlets=5.5.0
- typing-extensions=4.4.0
- typing_extensions=4.4.0
- tzdata=2022f
- unicodedata2=15.0.0
- urllib3=1.26.11
- wcwidth=0.2.5
- webencodings=0.5.1
- websocket-client=1.4.2
- wheel=0.38.4
- wrapt=1.14.1
- xcb-util=0.4.0
- xcb-util-image=0.4.0
- xcb-util-keysyms=0.4.0
- xcb-util-renderutil=0.3.9
- xcb-util-wm=0.4.1
- xorg-libxau=1.0.9
- xorg-libxdmcp=1.1.3
- xyzservices=2022.9.0
- xz=5.2.6
- yaml=0.2.5
- yaml-cpp=0.7.0
- yarl=1.8.1
- zeromq=4.3.4
- zipp=3.10.0
- zstd=1.5.2
Option 2: Use conda and pip to install the latest SEML version
This option will create a conda environment with the latest version of SEML that comes from pip installation and python 3.11. However, it will also need an extra step to fix a numpy bug inside of a library. Brace yourself; we start building the conda environment.
mamba env create -f environment.yaml -p $ENV_PREFIX --force
conda activate $ENV_PREFIX
This environment. yaml looks like this :
name: null
channels:
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1
- _openmp_mutex=4.5
- aiofiles=22.1.0
- aiohttp=3.8.4
- aiosignal=1.3.1
- aiosqlite=0.18.0
- alsa-lib=1.2.8
- anyio=3.6.2
- argon2-cffi=21.3.0
- argon2-cffi-bindings=21.2.0
- asttokens=2.2.1
- async-timeout=4.0.2
- attr=2.5.1
- attrs=22.2.0
- babel=2.11.0
- backcall=0.2.0
- backports=1.0
- backports.functools_lru_cache=1.6.4
- beautifulsoup4=4.11.2
- bleach=6.0.0
- bokeh=3.0.3
- brotli=1.0.9
- brotli-bin=1.0.9
- brotlipy=0.7.0
- bzip2=1.0.8
- ca-certificates=2022.12.7
- cairo=1.16.0
- cffi=1.15.1
- comm=0.1.2
- contourpy=1.0.7
- cryptography=39.0.1
- cycler=0.11.0
- dbus=1.13.6
- debugpy=1.6.6
- decorator=5.1.1
- defusedxml=0.7.1
- entrypoints=0.4
- executing=1.2.0
- expat=2.5.0
- fftw=3.3.10
- flit-core=3.8.0
- font-ttf-dejavu-sans-mono=2.37
- font-ttf-inconsolata=3.000
- font-ttf-source-code-pro=2.038
- font-ttf-ubuntu=0.83
- fontconfig=2.14.2
- fonts-conda-ecosystem=1
- fonts-conda-forge=1
- fonttools=4.38.0
- freetype=2.12.1
- frozenlist=1.3.3
- gettext=0.21.1
- glib=2.74.1
- glib-tools=2.74.1
- graphite2=1.3.13
- gst-plugins-base=1.22.0
- gstreamer=1.22.0
- gstreamer-orc=0.4.33
- harfbuzz=6.0.0
- icu=70.1
- idna=3.4
- importlib-metadata=6.0.0
- importlib_metadata=6.0.0
- importlib_resources=5.10.2
- ipykernel=6.21.2
- ipython=8.10.0
- ipython_genutils=0.2.0
- jack=1.9.22
- jedi=0.18.2
- jinja2=3.1.2
- jpeg=9e
- json5=0.9.5
- jsonschema=4.17.3
- jupyter-server-proxy=3.2.2
- jupyter_client=8.0.2
- jupyter_core=5.2.0
- jupyter_events=0.5.0
- jupyter_server=2.2.1
- jupyter_server_fileid=0.6.0
- jupyter_server_terminals=0.4.4
- jupyter_server_ydoc=0.6.1
- jupyter_ydoc=0.2.2
- jupyterlab=3.6.1
- jupyterlab-nvdashboard=0.7.0
- jupyterlab_pygments=0.2.2
- jupyterlab_server=2.19.0
- keyutils=1.6.1
- kiwisolver=1.4.4
- krb5=1.20.1
- lame=3.100
- lcms2=2.14
- ld_impl_linux-64=2.40
- lerc=4.0.0
- libblas=3.9.0
- libbrotlicommon=1.0.9
- libbrotlidec=1.0.9
- libbrotlienc=1.0.9
- libcap=2.66
- libcblas=3.9.0
- libclang=15.0.7
- libclang13=15.0.7
- libcups=2.3.3
- libdb=6.2.32
- libdeflate=1.17
- libedit=3.1.20191231
- libevent=2.1.10
- libffi=3.4.2
- libflac=1.4.2
- libgcc-ng=12.2.0
- libgcrypt=1.10.1
- libgfortran-ng=12.2.0
- libgfortran5=12.2.0
- libglib=2.74.1
- libgomp=12.2.0
- libgpg-error=1.46
- libiconv=1.17
- liblapack=3.9.0
- libllvm15=15.0.7
- libnsl=2.0.0
- libogg=1.3.4
- libopenblas=0.3.21
- libopus=1.3.1
- libpng=1.6.39
- libpq=15.2
- libsndfile=1.2.0
- libsodium=1.0.18
- libsqlite=3.40.0
- libstdcxx-ng=12.2.0
- libsystemd0=252
- libtiff=4.5.0
- libtool=2.4.7
- libudev1=252
- libuuid=2.32.1
- libvorbis=1.3.7
- libwebp-base=1.2.4
- libxcb=1.13
- libxkbcommon=1.0.3
- libxml2=2.10.3
- libzlib=1.2.13
- lz4-c=1.9.4
- markupsafe=2.1.2
- matplotlib=3.6.3
- matplotlib-base=3.6.3
- matplotlib-inline=0.1.6
- mistune=2.0.5
- mpg123=1.31.2
- multidict=6.0.4
- munkres=1.1.4
- mysql-common=8.0.32
- mysql-libs=8.0.32
- nbclassic=0.5.1
- nbclient=0.7.2
- nbconvert=7.2.9
- nbconvert-core=7.2.9
- nbconvert-pandoc=7.2.9
- nbformat=5.7.3
- ncurses=6.3
- nest-asyncio=1.5.6
- notebook=6.5.2
- notebook-shim=0.2.2
- nspr=4.35
- nss=3.88
- numpy=1.24.2
- openjpeg=2.5.0
- openssl=3.0.8
- packaging=23.0
- pandoc=2.19.2
- pandocfilters=1.5.0
- parso=0.8.3
- pcre2=10.40
- pexpect=4.8.0
- pickleshare=0.7.5
- pillow=9.4.0
- pip=23.0
- pixman=0.40.0
- pkgutil-resolve-name=1.3.10
- platformdirs=3.0.0
- ply=3.11
- prometheus_client=0.16.0
- prompt-toolkit=3.0.36
- psutil=5.9.4
- pthread-stubs=0.4
- ptyprocess=0.7.0
- pulseaudio=16.1
- pure_eval=0.2.2
- pycparser=2.21
- pygments=2.14.0
- pynvml=11.4.1
- pyopenssl=23.0.0
- pyparsing=3.0.9
- pyqt=5.15.7
- pyqt5-sip=12.11.0
- pyrsistent=0.19.3
- pysocks=1.7.1
- python=3.11.0
- python-dateutil=2.8.2
- python-fastjsonschema=2.16.2
- python-json-logger=2.0.4
- python_abi=3.11
- pytz=2022.7.1
- pyyaml=6.0
- pyzmq=25.0.0
- qt-main=5.15.8
- readline=8.1.2
- requests=2.28.2
- send2trash=1.8.0
- setuptools=67.1.0
- simpervisor=0.4
- sip=6.7.7
- six=1.16.0
- sniffio=1.3.0
- soupsieve=2.3.2.post1
- stack_data=0.6.2
- terminado=0.17.1
- tinycss2=1.2.1
- tk=8.6.12
- toml=0.10.2
- tomli=2.0.1
- tornado=6.2
- traitlets=5.9.0
- typing-extensions=4.4.0
- typing_extensions=4.4.0
- tzdata=2022g
- urllib3=1.26.14
- wcwidth=0.2.6
- webencodings=0.5.1
- websocket-client=1.5.1
- wheel=0.38.4
- xcb-util=0.4.0
- xcb-util-image=0.4.0
- xcb-util-keysyms=0.4.0
- xcb-util-renderutil=0.3.9
- xcb-util-wm=0.4.1
- xorg-kbproto=1.0.7
- xorg-libice=1.0.10
- xorg-libsm=1.2.3
- xorg-libx11=1.7.2
- xorg-libxau=1.0.9
- xorg-libxdmcp=1.1.3
- xorg-libxext=1.3.4
- xorg-libxrender=0.9.10
- xorg-renderproto=0.11.1
- xorg-xextproto=7.3.0
- xorg-xproto=7.0.31
- xyzservices=2022.9.0
- xz=5.2.6
- y-py=0.5.5
- yaml=0.2.5
- yarl=1.8.2
- ypy-websocket=0.8.2
- zeromq=4.3.4
- zipp=3.13.0
- zlib=1.2.13
- zstd=1.5.2
- pip:
- certifi==2022.12.7
- charset-normalizer==3.0.1
- colorama==0.4.6
- dnspython==2.3.0
- docopt==0.6.2
- gitdb==4.0.10
- gitpython==3.1.30
- jsonpickle==1.5.2
- munch==2.5.0
- pandas==1.5.3
- py-cpuinfo==9.0.0
- pymongo==4.3.3
- sacred==0.8.4
- seml==0.3.7
- smmap==5.0.0
- tqdm==4.64.1
- wrapt==1.14.1
Fix the bug for numpy:
If you try to run SEML using this environment, you will get all your experiments killed and the following error related to numpy:
/ibex/scratch/barradd/seml_2/seml/env2/lib/python3.11/site-packages/jsonpickle/ext/numpy.py:139: FutureWarning: In the future `np.object` will be defined as the corresponding NumPy scalar.
if obj.dtype == np.object:
2023-02-13 14:40:07 (WARNING): An error ocurred in the '<sacred.observers.mongo.MongoObserver object at 0x2ac9012ca5d0>' observer: module 'numpy' has no attribute 'object'.
`np.object` was a deprecated alias for the builtin `object`. To avoid this error in existing code, use `object` by itself. Doing this will not modify any behavior and is safe.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
File "/ibex/scratch/barradd/seml_2/seml/env2/lib/python3.11/site-packages/jsonpickle/ext/numpy.py", line 139, in flatten
if obj.dtype == np.object:
^^^^^^^^^
To fix it, you will have to edit the file numpy that s on that site package and issue your favorite text editor:
vi $ENV_PREFIX/lib/python3.11/site-packages/jsonpickle/ext/numpy.py
Then you have to change line 139, and you will also see a warning about the same bug, note that this is the correct line if obj.dtype == "object":
else:
# encode as binary
if obj.dtype == "object": ## this is correct way
#if obj.dtype == np.object: ## this is not supported anymore
# There's a bug deep in the bowels of numpy that causes a
# segfault when round-tripping an ndarray of dtype object.
# E.g., the following will result in a segfault:
# import numpy as np
# arr = np.array([str(i) for i in range(3)],
# dtype=np.object)
# dtype = arr.dtype
# shape = arr.shape
# buf = arr.tobytes()
# del arr
# arr = np.ndarray(buffer=buf, dtype=dtype,
# shape=shape).copy()
# So, save as a binary-encoded list in this case
Setup MongoDB using a container
You can do this step on any folder, even your home directory
Clone the git repository containing the Singularity definition file to create the image:
git clone https://github.com/singularityhub/mongo.git
Submit a job to make the singularity image, and the following is an example of the job script
#!/bin/bash
#SBATCH --time=01:00:00
#SBATCH --ntasks=1
module load singularity
cd $HOME/mongo
export XDG_RUNTIME_DIR=$HOME
singularity build --fakeroot mongo.sif Singularity
Successful completion should result in creating a singularity image file mongo.sif
.
Launch the MongoDB
You can now copy the image file anywhere you want. The jobs are managed by SEML using MongoDB. A CPU partition must be assigned to the mongo database you will use. You also need a data folder to bind the instance to someplace.
If the mongo instance is killed due to time limits, you will have to launch another, but you may lose the files stored in them.
#!/bin/bash
#SBATCH --time=04:00:00
#SBATCH --nodes=1
#SBATCH --partition=batch
#SBATCH --job-name=mongoDB
#SBATCH --mail-type=ALL
#SBATCH --output=%x-%j-slurm.out
#SBATCH --error=%x-%j-slurm.err
module load singularity
export IP_ADDR=$(ifconfig ib0 | grep -w inet | cut -d " " -f 10)
echo IP_ADDRESS=$IP_ADDR
mkdir -p $PWD/data
singularity run $PWD/mongo.sif mongod --noauth --bind_ip localhost,${IP_ADDR} --dbpath=$PWD/data
Create a user on the database.
Inside the out file generated by the script, you will find the IP address you need to connect to the mongo instance; in this example, use head
to see the IP_ADDRESS
where the database is running.
head mongoDB-22912150-slurm.out
IP_ADDRESS=10.109.201.49
Use an interactive session to try the connection and access the mongosh
interface
srun --time=02:00:00 --nodes=1 --pty singularity exec -B $PWD/data:/data/db $PWD/container/mongo.sif mongosh --host 10.109.201.49
After the resources are allocated, you will see the output like this below:
srun: job 22878644 queued and waiting for resources
srun: job 22878644 has been allocated resources
Current Mongosh Log ID: 6374e621ec85174afd042398
Connecting to: mongodb://10.109.197.13:27017/?directConnection=true&appName=mongosh+1.6.0
Using MongoDB: 6.0.2
Using Mongosh: 1.6.0
For mongosh info see: https://docs.mongodb.com/mongodb-shell/
To help improve our products, anonymous usage data is collected and sent to MongoDB periodically (https://www.mongodb.com/legal/privacy-policy).
You can opt-out by running the disableTelemetry() command.
------
The server generated these startup warnings when booting
2022-11-16T16:16:36.057+03:00: /sys/kernel/mm/transparent_hugepage/enabled is 'always'. We suggest setting it to 'never'
2022-11-16T16:16:36.058+03:00: /sys/kernel/mm/transparent_hugepage/defrag is 'always'. We suggest setting it to 'never'
2022-11-16T16:16:36.058+03:00: vm.max_map_count is too low
------
------
Enable MongoDB's free cloud-based monitoring service, which will then receive and display
metrics about your deployment (disk utilization, CPU, operation statistics, etc).
The monitoring data will be available on a MongoDB website with a unique URL accessible to you
and anyone you share the URL with. MongoDB may use this information to make product
improvements and to suggest MongoDB products and deployment options to you.
To enable free monitoring, run the following command: db.enableFreeMonitoring()
To permanently disable this reminder, run the following command: db.disableFreeMonitoring()
------
test>
Here you see the assigned port 27017
, the database name test,
and your shell command prompt from mongo test>
. Next, you have to create a user and a password in the database using the following line
test> db.createUser ( { user:"barradd" , pwd:"test1" , roles:[ { role:"readWrite" , db:"test" }, {role:"readWrite" ,db:"admin"} ] } )
This command will create the user barradd
with the password test1
and give it roles in the admin
and test
databases. You can modify this parameter as you like or create a new database for your project.
After this step, you can stop this interactive session
If you whish to see more command available for the mongosh
interface you can type
singularity run $PWD/container/mongo.sif mongosh --help
Running the SEML configuration and setup
The next step is to create the corresponding setup for SEML. You need to activate the conda environment,
conda activate $ENV_PREFIX
Since the SEML library is active, it provides a CLI to use it. So first you type seml configure
Here you have to provide the corresponding data; the host
is the IP address; you can leave the default port (unless you change it in previous steps), the user, passwords, and the database name you created during the last step.
seml configure
Configuring SEML. Warning: Password will be stored in plain text.
Please input the MongoDB host: 10.109.201.49
Port (default: 27017):
Please input the database name: test
Please input the user name: barradd
Please input the password:
Saving the following configuration to /home/barradd/.config/seml/mongodb.config:
username: barradd
password: ********
port: 27017
database: test
host: 10.109.201.49
Changing the setup for ibex
As indicated by the authors, you can explore several options by looking at the file in the GitHub repo examples/example_config.yaml
.
# Experiment configuration file.
#
# There are two special blocks. The 'seml' block is required for every experiment.
# It has to contain the following values:
# executable: Name of the Python script containing the experiment. The path should be relative to the `project_root_dir`.
# For backward compatibility SEML also supports paths relative to the location of the config file.
# In case there are files present both relative to the project root and the config file,
# the former takes precedence.
# It can optionally also contain the following values:
# name: Prefix for output file and Slurm job name. Default: Collection name
# output_dir: Directory to store log files in. Default: Current directory
# conda_environment: Specifies which Anaconda virtual environment will be activated before the experiment is executed.
# Default: The environment used when queuing.
# project_root_dir: (Relative or absolute) path to the root of the project. seml will then upload all the source
# files imported by the experiment to the MongoDB. Moreover, the uploaded source files will be
# downloaded before starting an experiment, so any changes to the source files in the project
# between queueing and starting the experiment will have no effect.
#
# The special 'slurm' block contains the slurm parameters. This block and all values are optional. Possible values are:
# experiments_per_job: Number of parallel experiments to run in each Slurm job.
# Note that only experiments from the same batch share a job. Default: 1
# max_simultaneous_jobs: Maximum number of simultaneously running Slurm jobs per job array. Default: No restriction
# sbatch_options_template: Name of a custom template of `SBATCH` options. Define your own templates in `settings.py`
# under `SBATCH_OPTIONS_TEMPLATES`, e.g. for long-running jobs, CPU-only jobs, etc.
# sbatch_options: dictionary that contains custom values that will be passed to `sbatch`, specifying e.g.
# the memory and number of GPUs to be allocated (prepended dashes are not required). See
# https://slurm.schedmd.com/sbatch.html for all possible options.
#
# Parameters under 'fixed' will be used for all the experiments.
#
# Under 'grid' you can define parameters that should be sampled from a regular grid. Options are:
# - choice: List the different values you want to evaluate under 'choices' as in the example below.
# - range: Specify the min, max, and step. Parameter values will be generated using np.arange(min, max, step).
# - uniform: Specify the min, max, and num. Parameter values will be generated using
# np.linspace(min, max, num, endpoint=True)
# - loguniform: Specify min, max, and num. Parameter values will be uniformly generated in log space (base 10).
#
# Under 'random' you can specify parameters for which you want to try several random values. Specify the number
# of samples per parameter with the 'samples' value as in the examples below.
# Specify the the seed under the 'random' dict or directly for the desired parameter(s).
# Supported parameter types are:
# - choice: Randomly samples <samples> entries (with replacement) from the list in parameter['options']
# - uniform: Uniformly samples between 'min' and 'max' as specified in the parameter dict.
# - loguniform: Uniformly samples in log space between 'min' and 'max' as specified in the parameter dict.
# - randint: Randomly samples integers between 'min' (included) and 'max' (excluded).
#
# The configuration file can be nested (as the example below) so that we can run different parameter sets
# e.g. for different datasets or models.
# We take the cartesian product of all `grid` parameters on a path and sample all random parameters on the path.
# The number of random parameters sampled will be max{n_samples} of all n_samples on the path. This is done because
# we need the same number of samples from all random parameters in a configuration.
#
# More specific settings (i.e., further down the hierarchy) always overwrite more general ones.
Change the example_config.yaml
file
Here, we work with the examples/tutorial/example_config.yaml
file that does not contains the whole explanation about the configuration settings. To be very explicit with the interface, you can put the complete path we have on the $ENV_PREFIX
variable to add the conda environment that SEML will use.
seml:
executable: examples/example_experiment.py
name: example_experiment
output_dir: examples/logs
project_root_dir: ..
conda_environment: /ibex/scratch/barradd/seml/env
Change the slurm_template.sh
A necessary step is that We need to edit the slur template that is going to be used by the library to launch the job arrays. This will be deep in the libraries of the environment:
vi $ENV_PREFIX/lib/python3.9/site-packages/seml/slurm_template.sh
Then we modify line 1 to #!/bin/bash --login
and line 14 and change it to line 15 to source $CONDA_BASE/bin/activate
#!/bin/bash --login
{sbatch_options}
# Move either to project root dir or the config file path.
cd {working_dir}
# Print job information
echo "Starting job ${{SLURM_JOBID}}"
echo "SLURM assigned me the node(s): $(squeue -j ${{SLURM_JOBID}} -O nodelist:1000 | tail -n +2 | sed -e 's/[[:space:]]*$//')"
# Activate Anaconda environment
if {use_conda_env}; then
CONDA_BASE=$(conda info --base)
#source $CONDA_BASE/etc/profile.d/conda.sh
source $CONDA_BASE/bin/activate ## this activates base in Ibex
conda activate {conda_env}
fi
echo "Using Python executable: $(which python)"
# Chunked list with all experiment IDs
all_exp_ids=({exp_ids})
# Get experiment IDs for this Slurm task
exp_ids_str="${{all_exp_ids[$SLURM_ARRAY_TASK_ID]}}"
IFS=";" read -r -a exp_ids <<< "$exp_ids_str"
# Create directory for the source files in MongoDB
if {with_sources}; then
tmpdir="/tmp/$(uuidgen)" # unique temp dir based on UUID
mkdir $tmpdir
# Prepend the temp dir to $PYTHONPATH so it will be used by python.
export PYTHONPATH="$tmpdir:$PYTHONPATH"
fi
# Start experiments in separate processes
process_ids=()
for exp_id in "${{exp_ids[@]}}"; do
cmd=$(python -c '{prepare_experiment_script}' --experiment_id ${{exp_id}} --db_collection_name {db_collection_name} {sources_argument} --verbose {verbose} --unobserved {unobserved} --debug-server {debug_server})
ret=$?
if [ $ret -eq 0 ]; then
eval $cmd &
process_ids+=($!)
elif [ $ret -eq 1 ]; then
echo "WARNING: Experiment with ID ${{exp_id}} does not have status PENDING and will not be run."
elif [ $ret -eq 2 ]; then
(>&2 echo "ERROR: Experiment with id ${{exp_id}} not found in the database.")
fi
done
# Print process information
echo "Experiments are running under the following process IDs:"
num_it=${{#process_ids[@]}}
for ((i=0; i<$num_it; i++)); do
echo "Experiment ID: ${{exp_ids[$i]}} Process ID: ${{process_ids[$i]}}"
done
echo
# Wait for all experiments to finish
wait
# Delete temporary source files
if {with_sources}; then
rm -rf $tmpdir
fi
Run the experiments
Then you should be able to start your experiments as indicated on the tutorial demo with the corresponding bash commands.
seml seml_example add example_config.yaml
seml seml_example status
seml seml_example start
It should look like this:
(/ibex/scratch/barradd/seml/env) seml seml_tutorial add example_config.yaml
Adding 6 configs to the database (batch-ID 1).
(/ibex/scratch/barradd/seml/env) seml seml_tutorial start
Starting 6 experiments in 6 Slurm jobs in 1 Slurm job array.
(/ibex/scratch/barradd/seml/env) seml seml_tutorial status
********** Report for database collection 'seml_tutorial' **********
* - 0 staged experiments
* - 0 pending experiments
* - 0 running experiments
* - 6 completed experiments
* - 0 interrupted experiments
* - 0 failed experiments
* - 0 killed experiments
********************************************************************
All the output (successful or not) will be on the logs folders seml/examples/logs