Skip to content

GenAI Scorecard

GenAI Scorecard #16

################################################################################
# GenAI Scorecard
################################################################################
# Tests AIMET quantization techniques on GenAI models in both Torch and ONNX
#
# Two testing modes:
# 1. Regression: Run all regression tests
# 2. Ad-hoc: Run all tests in provided config file
#
# Workflow:
# - Builds AIMET-ONNX, AIMET-Torch from source with CUDA 12.1
# - Runs tests on morph-lsf74/75-gpulv runner with GPU access
# - Uploads CSV/HTML/JSON reports as downloadable artifacts
################################################################################
name: GenAI Scorecard
on:
# SCHEDULED: Run automatically every Friday at 6PM (like a cron job)
schedule:
- cron: '0 1 * * 6' # Every Friday at 6PM PT (Saturday 01:00 UTC)
push:
paths:
- 'GenAITests/configs/torch_regression.yaml'
- 'GenAITests/configs/onnx_regression.yaml'
# Manual trigger from GitHub Actions UI
workflow_dispatch:
inputs:
# What to run: full suite, single config, or filtered subset
run_mode:
description: 'What to run?'
required: true
type: choice
options:
- regression # All configs in suite
- ad_hoc # One specific config
default: 'regression'
ad_hoc_config_b64:
description: 'Config file to use converted to base64 (ad-hoc only). Can be generated via: cat <your config.yaml> | base64 -w 0'
required: false
ad_hoc_variants:
description: 'Variants to test (ad-hoc only)'
required: false
type: choice
options:
- both
- torch
- onnx
default: 'both'
################################################################################
# JOBS
################################################################################
jobs:
# ===========================================================================
# JOB 1: Build AIMET ONNX
# ===========================================================================
# Compiles AIMET from source with ONNX + GPU support
# Outputs: onnx-gpu-wheel artifact containing the compiled .whl file
# ===========================================================================
build-aimet-onnx-gpu:
name: Build aimet_onnx
# Reuses existing build workflow
uses: ./.github/workflows/build-wheels.yml
with:
# Build variant: ONNX Runtime 1.19.2 + CUDA 12.1 + Python 3.10
variants: >
{"include":[{
"id":"onnx-gpu",
"runs-on":"a100",
"VER_PYTHON":"3.10",
"VER_ONNXRUNTIME":"1.19.2",
"VER_CUDA":"12.1.1",
"ENABLE_TESTS":"OFF",
"PIP_INDEX":""
}]}
image-tag: latest
# Pass repository secrets to build workflow
secrets: inherit
# ===========================================================================
# JOB 2: Build AIMET Torch
# ===========================================================================
# Compiles AIMET from source with Torch + GPU support
# Outputs: torch-gpu-wheel artifact containing the compiled .whl file
# ===========================================================================
build-aimet-torch-gpu:
name: Build aimet_torch
if: |
github.event_name != 'workflow_dispatch' ||
github.event.inputs.run_mode != 'ad_hoc' ||
github.event.inputs.ad_hoc_variants != 'onnx'
# Reuses existing build workflow
uses: ./.github/workflows/build-wheels.yml
with:
variants: >
{"include":[{
"id":"torch-gpu",
"runs-on":"k8s-gpu",
"VER_PYTHON":"3.10",
"VER_TORCH":"2.*",
"VER_ONNXRUNTIME": "",
"VER_CUDA": "12.8",
"ENABLE_TESTS":"OFF",
"PIP_INDEX":""
}]}
image-tag: latest
# Pass repository secrets to build workflow
secrets: inherit
# ===========================================================================
# JOB 3: Run GenAI Tests
# ===========================================================================
# Downloads AIMET wheel, installs dependencies, runs tests based on mode
# Uploads reports as artifacts: test-reports-<run_id>
# ===========================================================================
test-gpu:
name: Run GenAI tests (${{ matrix.variant }})
needs: [build-aimet-onnx-gpu, build-aimet-torch-gpu]
runs-on: a100
# Even though this stage has a dependency on both builds, we may not actually require both to run successfully
# since there may be ONNX-only or Torch-only cases. In those cases, we can skip the unnecessary build and continue
# with this job anyway
if: |
!cancelled() &&
!(needs.build-aimet-onnx-gpu.result == 'skipped' && needs.build-aimet-torch-gpu.result == 'skipped')
strategy:
matrix:
variant: ${{
fromJSON(
github.event.inputs.run_mode == 'ad_hoc'
&& github.event.inputs.ad_hoc_variants != ''
&& github.event.inputs.ad_hoc_variants != 'both'
&& format('["{0}"]', github.event.inputs.ad_hoc_variants)
|| '["onnx","torch"]'
) }}
# Container required by morph-lsf74/75-gpulv runner
# Uses same image as build for consistency
container:
image: "${{ vars.DOCKER_REGISTRY }}/${{ vars.DOCKER_IMAGE }}-${{ matrix.variant }}-gpu:latest"
credentials:
username: ${{ secrets.DOCKER_LOGIN }}
password: ${{ secrets.DOCKER_CREDENTIALS }}
options: --gpus all
timeout-minutes: 10080 # 7 days (7 * 24 * 60)
env:
# GPU configuration
CUDA_VISIBLE_DEVICES: "6"
# CRITICAL: Auto-accept git clones for models that need external repos
# Without this, models like mobilenet_v2 will prompt "Ok to clone? [Y/n]"
# and block the pipeline waiting for input
GIT_CLONE_PROTECTION_ACTIVE: "false"
# Additional git safety settings
GIT_TERMINAL_PROMPT: "0" # Disable any git prompts
# Headless environment (prevent GUI-related errors)
MPLBACKEND: "Agg"
QT_QPA_PLATFORM: "offscreen"
steps:
# -----------------------------------------------------------------------
# Setup: Get code, cache models, download AIMET wheel
# -----------------------------------------------------------------------
- name: Checkout code
uses: actions/checkout@v4
# Configure git to be non-interactive (prevents any prompts)
- name: Configure git for non-interactive mode
run: |
git config --global core.askPass ""
git config --global credential.helper ""
git config --global --add safe.directory '*'
echo "Git configured for non-interactive cloning"
# Cache downloaded models to speed up subsequent runs
# Key changes when GenAITests/ code changes
- name: Cache models
uses: actions/cache@v4
with:
path: |
~/.cache
~/.cache/huggingface
~/.cache/torch
key: aimet-cache-${{ runner.os }}-${{ hashFiles('GenAITests/**') }}
restore-keys: aimet-cache-${{ runner.os }}-
# Download aimet-torch wheel only when the build succeeded
- name: Download aimet-torch wheel
if: needs.build-aimet-torch-gpu.result == 'success'
uses: actions/download-artifact@v3
with:
name: torch-gpu-wheel
path: downloads
# Download aimet-onnx wheel only when the build succeeded
- name: Download aimet-onnx wheel
if: needs.build-aimet-onnx-gpu.result == 'success'
uses: actions/download-artifact@v3
with:
name: onnx-gpu-wheel
path: downloads
# -----------------------------------------------------------------------
# Install: Python environment + dependencies + AIMET
# -----------------------------------------------------------------------
- name: Install system dependencies
run: |
apt-get update -qq
apt-get install -y \
libgl1-mesa-glx \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
libgomp1 \
git
apt-get clean
rm -rf /var/lib/apt/lists/*
# Create Python virtual environment
- name: Setup Python
run: |
apt-get update -qq
apt-get install -y python3 python3-venv python3-pip
python3 -m venv .venv
. .venv/bin/activate
pip install --upgrade pip wheel setuptools
# Install all Python dependencies
# - onnxruntime-gpu: For GPU inference (version matches AIMET build)
# - transformers, tokenizers: For GenAI models
- name: Install dependencies
run: |
. .venv/bin/activate
pip install onnxruntime-gpu==1.19.2
pip install -U "huggingface_hub"
pip install -r GenAITests/requirements.txt
# Install AIMET from the wheel and verify
- name: Install AIMET
run: |
. .venv/bin/activate
pip install downloads/*.whl
python -c "import aimet_${{ matrix.variant }}; print('AIMET version:', aimet_${{ matrix.variant }}.__version__)"
# Set up config file based on run mode
- name: Prepare scorecard config file
run: |
if [ "${{ github.event.inputs.run_mode }}" = "ad_hoc" ] && [ -n "${{ github.event.inputs.ad_hoc_config_b64 }}" ]; then
if ! echo "${{ github.event.inputs.ad_hoc_config_b64 }}" | base64 -d > GenAITests/scorecard_config.yaml 2>/dev/null; then
echo "Error: Failed to decode base64 config. Please ensure the input is valid base64."
exit 1
fi
if [ ! -s GenAITests/scorecard_config.yaml ]; then
echo "Error: Decoded config file is empty."
exit 1
fi
echo "Saved ad-hoc config (shown below) to GenAITests/scorecard_config.yaml"
cat GenAITests/scorecard_config.yaml
else
cp GenAITests/configs/${{ matrix.variant }}_regression.yaml GenAITests/scorecard_config.yaml
echo "Copied ${{ matrix.variant }} regression config to GenAITests/scorecard_config.yaml"
fi
# -----------------------------------------------------------------------
# Run: Execute aimet tests
# -----------------------------------------------------------------------
# Run tests
- name: Run tests
run: |
. .venv/bin/activate
export PYTHONPATH=$PYTHONPATH:$(pwd)
export HF_HOME=/local/mnt2/workspace2/aimet_bot/huggingface
hf auth login --token ${{ secrets.HUGGINGFACE_TOKEN }}
pytest -s GenAITests/${{ matrix.variant }}/test_genai.py --config GenAITests/scorecard_config.yaml
- name: Run optional secondary evaluation using aimet-onnx
if: ${{ matrix.variant == 'torch' }}
run: |
OUT_FILE="GenAITests/secondary_onnx_eval.yaml"
rm -f "$OUT_FILE"
mapfile -t FILES < <(ls -1 artifacts/*/*onnx*.yml artifacts/*/*onnx*.yaml 2>/dev/null | sort)
if [ "${#FILES[@]}" -eq 0 ]; then
echo "No YAML files found in artifacts/"
exit 0
fi
for f in "${FILES[@]}"; do
cat "$f" >> "$OUT_FILE"
echo "---" >> "$OUT_FILE"
done
sed -i '$d' "$OUT_FILE"
echo "Created $OUT_FILE with ${#FILES[@]} documents."
cat "$OUT_FILE"
. .venv/bin/activate
export PYTHONPATH=$PYTHONPATH:$(pwd)
export HF_HOME=/local/mnt2/workspace2/aimet_bot/huggingface
hf auth login --token ${{ secrets.HUGGINGFACE_TOKEN }}
pytest -s GenAITests/onnx/test_genai.py --config "$OUT_FILE"
shell: bash
# (after reports are generated, before upload)
- name: Build file metadata string
id: file-metadata
run: |
COMMIT_TIMESTAMP=$(git show -s --format=%cd --date=format:%Y%m%d-%H%M%S ${{ github.sha }})
SAFE_REF_NAME=$(echo "${{ github.ref_name }}" | sed 's/[^a-zA-Z0-9._-]/_/g')
FILE_SUFFIX=${{ matrix.variant }}-${SAFE_REF_NAME}-${{ github.sha }}-${COMMIT_TIMESTAMP}-${{ github.run_id }}
echo "file_suffix=$FILE_SUFFIX" >> "$GITHUB_OUTPUT"
# Rename report using suffix
- name: Update report filename
run: |
mv genai_test_artifacts/profiling_data.json genai_test_artifacts/profiling_data-${{ steps.file-metadata.outputs.file_suffix }}.json
mv genai_test_artifacts/profiling_data.csv genai_test_artifacts/profiling_data-${{ steps.file-metadata.outputs.file_suffix }}.csv
# Upload generated reports (CSV, HTML, JSON)
# Note: Using v3 for GHES compatibility
# Artifacts available under "Artifacts" section in workflow run
- name: Upload reports
if: always() # Upload even if tests failed
uses: actions/upload-artifact@v3
with:
name: test-data-${{ steps.file-metadata.outputs.file_suffix }}
path: genai_test_artifacts/profiling_data-${{ steps.file-metadata.outputs.file_suffix }}*
retention-days: 30 # Keep for 1 week
# Clean up cached models to prevent disk space issues
- name: Clean up cache
if: always()
run: |
rm -rf ~/.cache ~/.cache/huggingface ~/.cache/torch || true
echo "✓ Cache cleared"