Benchmark Dispatch #345

Workflow file for this run

.github/workflows/benchmark_dispatch.yml at 9cbfb30

	name: Benchmark Dispatch

	on:
	workflow_dispatch:
	inputs:
	run_h100:
	description: 'Run benchmark on H100'
	required: false
	type: boolean
	default: false
	run_b200:
	description: 'Run benchmark on B200'
	required: false
	type: boolean
	default: true
	run_mi325x:
	description: 'Run benchmark on MI325X'
	required: false
	type: boolean
	default: false
	kernels:
	description: 'Comma-separated list of kernels to benchmark'
	required: false
	type: string
	default: "softmax,jsd,welford,kl_div,int4_gemm,layer_norm,layer_norm-bwd,rms_norm,rms_norm-bwd,cross_entropy,flash_attention,gemm,grouped_gemm"
	env_vars:
	description: 'Environment variables for benchmark runner'
	required: false
	type: string
	default: ""
	custom_args:
	description: 'Custom arguments to append to benchmark commands'
	required: false
	type: string
	default: ""

	jobs:
	gen-matrix-h100:
	if: ${{ github.event.inputs.run_h100 == 'true' }}
	uses: ./.github/workflows/compute-benchmark-matrix.yml
	with:
	max-runners: 13
	kernels: ${{ github.event.inputs.kernels }}

	run-h100:
	needs: gen-matrix-h100
	uses: ./.github/workflows/benchmark.yml
	strategy:
	fail-fast: false
	matrix: ${{ fromJSON(needs.gen-matrix-h100.outputs.matrix) }}
	permissions:
	id-token: write
	contents: read
	with:
	runner: linux.aws.h100
	python-version: "3.12"
	image: nvidia/cuda:12.8.1-devel-ubuntu24.04
	runtime-version: cu128
	container-options: --gpus all
	alias: h100
	kernels: ${{ matrix.kernels }}
	env-vars: ${{ github.event.inputs.env_vars }}
	custom-args: ${{ github.event.inputs.custom_args }}

	gen-matrix-b200:
	uses: ./.github/workflows/compute-benchmark-matrix.yml
	if: ${{ github.event.inputs.run_b200 == 'true' }}
	with:
	max-runners: 13
	kernels: ${{ github.event.inputs.kernels }}

	run-b200:
	needs: gen-matrix-b200
	uses: ./.github/workflows/benchmark.yml
	strategy:
	fail-fast: false
	matrix: ${{ fromJSON(needs.gen-matrix-b200.outputs.matrix) }}
	permissions:
	id-token: write
	contents: read
	with:
	runner: linux.dgx.b200
	python-version: "3.12"
	image: nvidia/cuda:13.0.1-devel-ubuntu24.04
	runtime-version: cu130
	container-options: --gpus all
	alias: b200
	kernels: ${{ matrix.kernels }}
	env-vars: ${{ github.event.inputs.env_vars }}
	custom-args: ${{ github.event.inputs.custom_args }}

	gen-matrix-mi325x:
	uses: ./.github/workflows/compute-benchmark-matrix.yml
	if: ${{ github.event.inputs.run_mi325x == 'true' }}
	with:
	max-runners: 6
	kernels: ${{ github.event.inputs.kernels }}

	run-mi325x:
	needs: gen-matrix-mi325x
	uses: ./.github/workflows/benchmark.yml
	strategy:
	fail-fast: false
	matrix: ${{ fromJSON(needs.gen-matrix-mi325x.outputs.matrix) }}
	permissions:
	id-token: write
	contents: read
	with:
	runner: linux.rocm.gpu.gfx942.2
	python-version: "3.12"
	image: rocm/dev-ubuntu-24.04:6.4.4-complete
	runtime-version: rocm6.4
	container-options: --device=/dev/kfd --device=/dev/dri
	alias: mi325x
	kernels: ${{ matrix.kernels }}
	env-vars: ${{ github.event.inputs.env_vars }}
	custom-args: ${{ github.event.inputs.custom_args }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Benchmark Dispatch #345

Workflow file

Benchmark Dispatch #345

Uh oh!

Workflow file for this run