Skip to content

Benchmark Dispatch #345

Benchmark Dispatch

Benchmark Dispatch #345

name: Benchmark Dispatch
on:
workflow_dispatch:
inputs:
run_h100:
description: 'Run benchmark on H100'
required: false
type: boolean
default: false
run_b200:
description: 'Run benchmark on B200'
required: false
type: boolean
default: true
run_mi325x:
description: 'Run benchmark on MI325X'
required: false
type: boolean
default: false
kernels:
description: 'Comma-separated list of kernels to benchmark'
required: false
type: string
default: "softmax,jsd,welford,kl_div,int4_gemm,layer_norm,layer_norm-bwd,rms_norm,rms_norm-bwd,cross_entropy,flash_attention,gemm,grouped_gemm"
env_vars:
description: 'Environment variables for benchmark runner'
required: false
type: string
default: ""
custom_args:
description: 'Custom arguments to append to benchmark commands'
required: false
type: string
default: ""
jobs:
gen-matrix-h100:
if: ${{ github.event.inputs.run_h100 == 'true' }}
uses: ./.github/workflows/compute-benchmark-matrix.yml
with:
max-runners: 13
kernels: ${{ github.event.inputs.kernels }}
run-h100:
needs: gen-matrix-h100
uses: ./.github/workflows/benchmark.yml
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.gen-matrix-h100.outputs.matrix) }}
permissions:
id-token: write
contents: read
with:
runner: linux.aws.h100
python-version: "3.12"
image: nvidia/cuda:12.8.1-devel-ubuntu24.04
runtime-version: cu128
container-options: --gpus all
alias: h100
kernels: ${{ matrix.kernels }}
env-vars: ${{ github.event.inputs.env_vars }}
custom-args: ${{ github.event.inputs.custom_args }}
gen-matrix-b200:
uses: ./.github/workflows/compute-benchmark-matrix.yml
if: ${{ github.event.inputs.run_b200 == 'true' }}
with:
max-runners: 13
kernels: ${{ github.event.inputs.kernels }}
run-b200:
needs: gen-matrix-b200
uses: ./.github/workflows/benchmark.yml
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.gen-matrix-b200.outputs.matrix) }}
permissions:
id-token: write
contents: read
with:
runner: linux.dgx.b200
python-version: "3.12"
image: nvidia/cuda:13.0.1-devel-ubuntu24.04
runtime-version: cu130
container-options: --gpus all
alias: b200
kernels: ${{ matrix.kernels }}
env-vars: ${{ github.event.inputs.env_vars }}
custom-args: ${{ github.event.inputs.custom_args }}
gen-matrix-mi325x:
uses: ./.github/workflows/compute-benchmark-matrix.yml
if: ${{ github.event.inputs.run_mi325x == 'true' }}
with:
max-runners: 6
kernels: ${{ github.event.inputs.kernels }}
run-mi325x:
needs: gen-matrix-mi325x
uses: ./.github/workflows/benchmark.yml
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.gen-matrix-mi325x.outputs.matrix) }}
permissions:
id-token: write
contents: read
with:
runner: linux.rocm.gpu.gfx942.2
python-version: "3.12"
image: rocm/dev-ubuntu-24.04:6.4.4-complete
runtime-version: rocm6.4
container-options: --device=/dev/kfd --device=/dev/dri
alias: mi325x
kernels: ${{ matrix.kernels }}
env-vars: ${{ github.event.inputs.env_vars }}
custom-args: ${{ github.event.inputs.custom_args }}