GenAI Scorecard #16

Workflow file for this run

.github/workflows/genai-scorecard.yaml at 489f7df

	################################################################################
	# GenAI Scorecard
	################################################################################
	# Tests AIMET quantization techniques on GenAI models in both Torch and ONNX
	#
	# Two testing modes:
	# 1. Regression: Run all regression tests
	# 2. Ad-hoc: Run all tests in provided config file
	#
	# Workflow:
	# - Builds AIMET-ONNX, AIMET-Torch from source with CUDA 12.1
	# - Runs tests on morph-lsf74/75-gpulv runner with GPU access
	# - Uploads CSV/HTML/JSON reports as downloadable artifacts
	################################################################################

	name: GenAI Scorecard

	on:
	# SCHEDULED: Run automatically every Friday at 6PM (like a cron job)
	schedule:
	- cron: '0 1 * * 6' # Every Friday at 6PM PT (Saturday 01:00 UTC)

	push:
	paths:
	- 'GenAITests/configs/torch_regression.yaml'
	- 'GenAITests/configs/onnx_regression.yaml'

	# Manual trigger from GitHub Actions UI
	workflow_dispatch:
	inputs:
	# What to run: full suite, single config, or filtered subset
	run_mode:
	description: 'What to run?'
	required: true
	type: choice
	options:
	- regression # All configs in suite
	- ad_hoc # One specific config
	default: 'regression'
	ad_hoc_config_b64:
	description: 'Config file to use converted to base64 (ad-hoc only). Can be generated via: cat <your config.yaml> \| base64 -w 0'
	required: false
	ad_hoc_variants:
	description: 'Variants to test (ad-hoc only)'
	required: false
	type: choice
	options:
	- both
	- torch
	- onnx
	default: 'both'


	################################################################################
	# JOBS
	################################################################################
	jobs:
	# ===========================================================================
	# JOB 1: Build AIMET ONNX
	# ===========================================================================
	# Compiles AIMET from source with ONNX + GPU support
	# Outputs: onnx-gpu-wheel artifact containing the compiled .whl file
	# ===========================================================================
	build-aimet-onnx-gpu:
	name: Build aimet_onnx

	# Reuses existing build workflow
	uses: ./.github/workflows/build-wheels.yml

	with:
	# Build variant: ONNX Runtime 1.19.2 + CUDA 12.1 + Python 3.10
	variants: >
	{"include":[{
	"id":"onnx-gpu",
	"runs-on":"a100",
	"VER_PYTHON":"3.10",
	"VER_ONNXRUNTIME":"1.19.2",
	"VER_CUDA":"12.1.1",
	"ENABLE_TESTS":"OFF",
	"PIP_INDEX":""
	}]}
	image-tag: latest

	# Pass repository secrets to build workflow
	secrets: inherit

	# ===========================================================================
	# JOB 2: Build AIMET Torch
	# ===========================================================================
	# Compiles AIMET from source with Torch + GPU support
	# Outputs: torch-gpu-wheel artifact containing the compiled .whl file
	# ===========================================================================
	build-aimet-torch-gpu:
	name: Build aimet_torch

	if: \|
	github.event_name != 'workflow_dispatch' \|\|
	github.event.inputs.run_mode != 'ad_hoc' \|\|
	github.event.inputs.ad_hoc_variants != 'onnx'


	# Reuses existing build workflow
	uses: ./.github/workflows/build-wheels.yml

	with:
	variants: >
	{"include":[{
	"id":"torch-gpu",
	"runs-on":"k8s-gpu",
	"VER_PYTHON":"3.10",
	"VER_TORCH":"2.*",
	"VER_ONNXRUNTIME": "",
	"VER_CUDA": "12.8",
	"ENABLE_TESTS":"OFF",
	"PIP_INDEX":""
	}]}
	image-tag: latest

	# Pass repository secrets to build workflow
	secrets: inherit

	# ===========================================================================
	# JOB 3: Run GenAI Tests
	# ===========================================================================
	# Downloads AIMET wheel, installs dependencies, runs tests based on mode
	# Uploads reports as artifacts: test-reports-<run_id>
	# ===========================================================================
	test-gpu:
	name: Run GenAI tests (${{ matrix.variant }})
	needs: [build-aimet-onnx-gpu, build-aimet-torch-gpu]
	runs-on: a100

	# Even though this stage has a dependency on both builds, we may not actually require both to run successfully
	# since there may be ONNX-only or Torch-only cases. In those cases, we can skip the unnecessary build and continue
	# with this job anyway
	if: \|
	!cancelled() &&
	!(needs.build-aimet-onnx-gpu.result == 'skipped' && needs.build-aimet-torch-gpu.result == 'skipped')

	strategy:
	matrix:
	variant: ${{
	fromJSON(
	github.event.inputs.run_mode == 'ad_hoc'
	&& github.event.inputs.ad_hoc_variants != ''
	&& github.event.inputs.ad_hoc_variants != 'both'
	&& format('["{0}"]', github.event.inputs.ad_hoc_variants)
	\|\| '["onnx","torch"]'
	) }}

	# Container required by morph-lsf74/75-gpulv runner
	# Uses same image as build for consistency
	container:
	image: "${{ vars.DOCKER_REGISTRY }}/${{ vars.DOCKER_IMAGE }}-${{ matrix.variant }}-gpu:latest"
	credentials:
	username: ${{ secrets.DOCKER_LOGIN }}
	password: ${{ secrets.DOCKER_CREDENTIALS }}
	options: --gpus all

	timeout-minutes: 10080 # 7 days (7 * 24 * 60)

	env:
	# GPU configuration
	CUDA_VISIBLE_DEVICES: "6"

	# CRITICAL: Auto-accept git clones for models that need external repos
	# Without this, models like mobilenet_v2 will prompt "Ok to clone? [Y/n]"
	# and block the pipeline waiting for input
	GIT_CLONE_PROTECTION_ACTIVE: "false"

	# Additional git safety settings
	GIT_TERMINAL_PROMPT: "0" # Disable any git prompts

	# Headless environment (prevent GUI-related errors)
	MPLBACKEND: "Agg"
	QT_QPA_PLATFORM: "offscreen"

	steps:
	# -----------------------------------------------------------------------
	# Setup: Get code, cache models, download AIMET wheel
	# -----------------------------------------------------------------------

	- name: Checkout code
	uses: actions/checkout@v4

	# Configure git to be non-interactive (prevents any prompts)
	- name: Configure git for non-interactive mode
	run: \|
	git config --global core.askPass ""
	git config --global credential.helper ""
	git config --global --add safe.directory '*'
	echo "Git configured for non-interactive cloning"

	# Cache downloaded models to speed up subsequent runs
	# Key changes when GenAITests/ code changes
	- name: Cache models
	uses: actions/cache@v4
	with:
	path: \|
	~/.cache
	~/.cache/huggingface
	~/.cache/torch
	key: aimet-cache-${{ runner.os }}-${{ hashFiles('GenAITests/**') }}
	restore-keys: aimet-cache-${{ runner.os }}-

	# Download aimet-torch wheel only when the build succeeded
	- name: Download aimet-torch wheel
	if: needs.build-aimet-torch-gpu.result == 'success'
	uses: actions/download-artifact@v3
	with:
	name: torch-gpu-wheel
	path: downloads

	# Download aimet-onnx wheel only when the build succeeded
	- name: Download aimet-onnx wheel
	if: needs.build-aimet-onnx-gpu.result == 'success'
	uses: actions/download-artifact@v3
	with:
	name: onnx-gpu-wheel
	path: downloads

	# -----------------------------------------------------------------------
	# Install: Python environment + dependencies + AIMET
	# -----------------------------------------------------------------------
	- name: Install system dependencies
	run: \|
	apt-get update -qq
	apt-get install -y \
	libgl1-mesa-glx \
	libglib2.0-0 \
	libsm6 \
	libxext6 \
	libxrender-dev \
	libgomp1 \
	git
	apt-get clean
	rm -rf /var/lib/apt/lists/*

	# Create Python virtual environment
	- name: Setup Python
	run: \|
	apt-get update -qq
	apt-get install -y python3 python3-venv python3-pip
	python3 -m venv .venv
	. .venv/bin/activate
	pip install --upgrade pip wheel setuptools

	# Install all Python dependencies
	# - onnxruntime-gpu: For GPU inference (version matches AIMET build)
	# - transformers, tokenizers: For GenAI models
	- name: Install dependencies
	run: \|
	. .venv/bin/activate
	pip install onnxruntime-gpu==1.19.2
	pip install -U "huggingface_hub"
	pip install -r GenAITests/requirements.txt

	# Install AIMET from the wheel and verify
	- name: Install AIMET
	run: \|
	. .venv/bin/activate
	pip install downloads/*.whl
	python -c "import aimet_${{ matrix.variant }}; print('AIMET version:', aimet_${{ matrix.variant }}.__version__)"

	# Set up config file based on run mode
	- name: Prepare scorecard config file
	run: \|
	if [ "${{ github.event.inputs.run_mode }}" = "ad_hoc" ] && [ -n "${{ github.event.inputs.ad_hoc_config_b64 }}" ]; then
	if ! echo "${{ github.event.inputs.ad_hoc_config_b64 }}" \| base64 -d > GenAITests/scorecard_config.yaml 2>/dev/null; then
	echo "Error: Failed to decode base64 config. Please ensure the input is valid base64."
	exit 1
	fi
	if [ ! -s GenAITests/scorecard_config.yaml ]; then
	echo "Error: Decoded config file is empty."
	exit 1
	fi
	echo "Saved ad-hoc config (shown below) to GenAITests/scorecard_config.yaml"
	cat GenAITests/scorecard_config.yaml
	else
	cp GenAITests/configs/${{ matrix.variant }}_regression.yaml GenAITests/scorecard_config.yaml
	echo "Copied ${{ matrix.variant }} regression config to GenAITests/scorecard_config.yaml"
	fi

	# -----------------------------------------------------------------------
	# Run: Execute aimet tests
	# -----------------------------------------------------------------------

	# Run tests
	- name: Run tests
	run: \|
	. .venv/bin/activate
	export PYTHONPATH=$PYTHONPATH:$(pwd)
	export HF_HOME=/local/mnt2/workspace2/aimet_bot/huggingface
	hf auth login --token ${{ secrets.HUGGINGFACE_TOKEN }}
	pytest -s GenAITests/${{ matrix.variant }}/test_genai.py --config GenAITests/scorecard_config.yaml

	- name: Run optional secondary evaluation using aimet-onnx
	if: ${{ matrix.variant == 'torch' }}
	run: \|
	OUT_FILE="GenAITests/secondary_onnx_eval.yaml"
	rm -f "$OUT_FILE"

	mapfile -t FILES < <(ls -1 artifacts//onnx.yml artifacts//onnx.yaml 2>/dev/null \| sort)
	if [ "${#FILES[@]}" -eq 0 ]; then
	echo "No YAML files found in artifacts/"
	exit 0
	fi

	for f in "${FILES[@]}"; do
	cat "$f" >> "$OUT_FILE"
	echo "---" >> "$OUT_FILE"
	done
	sed -i '$d' "$OUT_FILE"
	echo "Created $OUT_FILE with ${#FILES[@]} documents."
	cat "$OUT_FILE"

	. .venv/bin/activate
	export PYTHONPATH=$PYTHONPATH:$(pwd)
	export HF_HOME=/local/mnt2/workspace2/aimet_bot/huggingface
	hf auth login --token ${{ secrets.HUGGINGFACE_TOKEN }}
	pytest -s GenAITests/onnx/test_genai.py --config "$OUT_FILE"
	shell: bash

	# (after reports are generated, before upload)
	- name: Build file metadata string
	id: file-metadata
	run: \|
	COMMIT_TIMESTAMP=$(git show -s --format=%cd --date=format:%Y%m%d-%H%M%S ${{ github.sha }})
	SAFE_REF_NAME=$(echo "${{ github.ref_name }}" \| sed 's/[^a-zA-Z0-9._-]/_/g')
	FILE_SUFFIX=${{ matrix.variant }}-${SAFE_REF_NAME}-${{ github.sha }}-${COMMIT_TIMESTAMP}-${{ github.run_id }}
	echo "file_suffix=$FILE_SUFFIX" >> "$GITHUB_OUTPUT"

	# Rename report using suffix
	- name: Update report filename
	run: \|
	mv genai_test_artifacts/profiling_data.json genai_test_artifacts/profiling_data-${{ steps.file-metadata.outputs.file_suffix }}.json
	mv genai_test_artifacts/profiling_data.csv genai_test_artifacts/profiling_data-${{ steps.file-metadata.outputs.file_suffix }}.csv

	# Upload generated reports (CSV, HTML, JSON)
	# Note: Using v3 for GHES compatibility
	# Artifacts available under "Artifacts" section in workflow run
	- name: Upload reports
	if: always() # Upload even if tests failed
	uses: actions/upload-artifact@v3
	with:
	name: test-data-${{ steps.file-metadata.outputs.file_suffix }}
	path: genai_test_artifacts/profiling_data-${{ steps.file-metadata.outputs.file_suffix }}*
	retention-days: 30 # Keep for 1 week

	# Clean up cached models to prevent disk space issues
	- name: Clean up cache
	if: always()
	run: \|
	rm -rf ~/.cache ~/.cache/huggingface ~/.cache/torch \|\| true
	echo "✓ Cache cleared"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

GenAI Scorecard #16

Workflow file

GenAI Scorecard #16

Uh oh!

Workflow file for this run