Kokkos Tools
Kokkos tools are available for debugging and tracking memory allocation when executing Vertex-CFD. The Kokkos tools are open-source tools found on GitHub. Users can refer to the wiki page for installation instructions and usage.
Some of the tools were tested on Frontier with a submission script that is provided below. All other Kokkos tools follow a similar syntax.
#!/bin/bash
#SBATCH -A account-id
#SBATCH -J job-name
#SBATCH -o output%j.log
#SBATCH -e error%j.log
#SBATCH -t 0-0:30:00
#SBATCH -p batch
#SBATCH -q debug # This is for debug, for product runs, delete this line
#SBATCH --mail-type=ALL
#SBATCH --mail-user=user-email-address
#SBATCH -N 2
#SBATCH -n 32
#SBATCH --cpus-per-task=1
#SBATCH --gpus-per-node=8
module load cmake craype-accel-amd-gfx90a rocm metis parmetis netcdf-c openblas zlib
export OMP_NUM_THREADS=1
export OMP_PROC_BIND=spread
export OMP_PLACES=threads
# Set executable path and input file path
EXE=absolute_path_to_executable
INPUT=absolute_path_to_input_file
# Un-comment/comment below lines
# Kernel logger (one text file per rank - main solver output is in the first rank file)
export KOKKOS_TOOLS_LIBS="/ccs/home/mxd/software/kokkos-tools/debugging/kernel-logger/kp_kernel_logger.so"
#srun --output=kernel_logger_%j_rank_%t.txt -N $SLURM_JOB_NUM_NODES -n $SLURM_NTASKS --cpus-per-task=$SLURM_CPUS_PER_TASK --gpus-per-node=$SLURM_GPUS_PER_NODE --gpu-bind=closest -u $EXE --i=$INPUT
# Memory events (one text file per rank - main solver output is in the first rank file)
export KOKKOS_TOOLS_LIBS="/ccs/home/mxd/software/kokkos-tools/profiling/memory-events/kp_memory_events.so"
#srun --output=kernel_logger_%j_rank_%t.txt -N $SLURM_JOB_NUM_NODES -n $SLURM_NTASKS --cpus-per-task=$SLURM_CPUS_PER_TASK --gpus-per-node=$SLURM_GPUS_PER_NODE --gpu-bind=closest -u $EXE --i=$INPUT
# Memory high-water (one output file - output%j.log)
export KOKKOS_TOOLS_LIBS="/ccs/home/mxd/software/kokkos-tools/profiling/memory-hwm/kp_hwm.so"
srun -N $SLURM_JOB_NUM_NODES -n $SLURM_NTASKS --cpus-per-task=$SLURM_CPUS_PER_TASK --gpus-per-node=$SLURM_GPUS_PER_NODE --gpu-bind=closest -u $EXE --i=$INPUT