Contents
view
Global view
Launching a computation on the platform means submitting a “job” in the queue among those available. This involves the following procedure:
- Cluster connexion
- Data transfert
- BATCH script creation
- Job submission
Context
Nodes | Waiting queues (Partitions)
Monocore job sample : monocore.slurm
Requesting a computation core on a node and 5 MB for 10 minutes. Sending an email at each stage of the job’s life.
Create a sbatch file here named monocore.slurm
#!/bin/bash
#SBATCH --nodes=1
#SBATCH --partition=short
#SBATCH --ntasks-per-node=1
#SBATCH --time=00:10:00
#SBATCH --mail-type=ALL
#SBATCH --job-name=my_serial_job
#SBATCH --output=job_seq-%j.out
#SBATCH --mail-user=your.mail@your.domain
#SBATCH --mem=5M
time sleep 30
hostname
Job submission
sbatch monocore.slurm
MPI job sample
hello_mpi.c
hello_mpi.c
#include <mpi.h>
#include <stdio.h>
#include <unistd.h>
int main(int argc, char** argv) {
MPI_Init(NULL, NULL);
int world_size, world_rank;
char hostname[256];
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
gethostname(hostname, 256);
printf("Hello from row %d on the machine %s (Total: %d processes)\n",
world_rank, hostname, world_size);
MPI_Finalize();
return 0;
}
Compiling your code
ml libs/ompi gcc libs/ucx
mpicc hello_mpi.c -o hello_mpi
jobMPI.slurm
#!/bin/bash
#SBATCH --nodes=2
#SBATCH --partition=normal-amd ## also works with --partition=normal
#SBATCH --ntasks-per-node=16
#SBATCH --time=00:10:00
#SBATCH --job-name=my_mpi_job
#SBATCH --output=mpi_job-%j.out
#SBATCH --mem=2G
#SBATCH --mail-type=ALL
#SBATCH --mail-user=your.mail@domain
ml libs/ompi gcc libs/ucx
# You can force the display of UCX information to verify that it is functioning
#export UCX_LOG_LEVEL=info
echo "Lancement avec srun :"
srun ./hello_mpi
Job submission
sbatch jobompi.slurm
OpenMP sample
omp.cc
#include <iostream>
#include <omp.h>
int main() {
// This directive instructs the compiler to parallelize the section
#pragma omp parallel
{
int id = omp_get_thread_num();
int total = omp_get_num_threads();
#pragma omp critical
std::cout << "Thread " << id << " of " << total << " is ready !" << std::endl;
}
return 0;
}
Compiling your code
ml gcc
g++ -O3 -fopenmp omp.cc -o omp
job_openMP.slurm
#!/bin/bash
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8
#SBATCH --time=04:00:00
#SBATCH --job-name=my_openmp_job
#SBATCH --mem=16M
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
./omp
Job submission
sbatch job_openMP.slurm
GPU usage
To use GPUs, specify the following parameter --gres=gpu:X with X the number of GPUs.
Here sbatch script “mon_script.sh” to ask 2 GPUs and 28 cores (bigpu).
#!/bin/sh
#SBATCH --job-name=tensor
#SBATCH --partition=bigpu
#SBATCH --gres=gpu:2
#SBATCH --time=0:10:00
#SBATCH --mail-type=ALL
#SBATCH --output=job-%j.out
#SBATCH --mem=60G
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=28
hostname
python hello.py
- To submit the job, use the following command :
sbatch mon_script.sh
- Interactif sample :
srun --ntasks=1 --mem=4G --gres=gpu:1 --time=1:00:00 --partition=bigpu --pty /bin/bash nvidia-smicommand shows you GPUs usage.