Skip to content

Commit 377c416

Browse files
gnurizenclaude
andcommitted
Use Docker registry for CUDA headers instead of git repo
Create lightweight Docker images (~50-60MB each) containing only CUDA headers and libcupti.so needed for compilation. These images are built once manually and pushed to ghcr.io, then pulled during CI builds. The slim Dockerfile is now the main Dockerfile, eliminating the need for 3GB CUDA development images. Changes: - Add Dockerfile.cuda-headers to extract headers from NVIDIA images - Replace heavyweight Dockerfile with slim version using header images - Add push-cuda-headers Makefile target to build and push header images - Update all Makefile targets to use local cuda-headers:12/13 images - Update GitHub Actions workflow to use header images from registry - CI pulls pre-built header images (no rebuild on every run) Benefits: - No git repo bloat (0 bytes vs 136MB of checked-in headers) - Faster CI (pulls 60MB header image vs 3GB CUDA devel image) - Eliminates "No space left on device" errors in GitHub Actions - Header images can be reused across builds and projects - Only libcupti.so and stub libcuda.so included (no runtime bloat) - Headers only rebuilt manually when CUDA versions change Usage: # Build header images locally for development (run once) docker buildx build -f Dockerfile.cuda-headers \ --build-arg CUDA_VERSION=12.9.1 \ --platform linux/amd64 \ --tag cuda-headers:12 \ --load . docker buildx build -f Dockerfile.cuda-headers \ --build-arg CUDA_VERSION=13.0.2 \ --platform linux/amd64 \ --tag cuda-headers:13 \ --load . # Push to registry when CUDA versions update make push-cuda-headers 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent d86f8b5 commit 377c416

File tree

4 files changed

+97
-27
lines changed

4 files changed

+97
-27
lines changed

.github/workflows/container.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ on:
1313
env:
1414
REGISTRY: ghcr.io
1515
IMAGE_NAME: ${{ github.repository }}
16+
CUDA_HEADERS_REGISTRY: ghcr.io/parca-dev/cuda-headers
1617

1718
jobs:
1819
build-and-push:
@@ -55,6 +56,9 @@ jobs:
5556
with:
5657
context: .
5758
file: ./Dockerfile
59+
build-args: |
60+
CUDA_12_HEADERS=${{ env.CUDA_HEADERS_REGISTRY }}:12
61+
CUDA_13_HEADERS=${{ env.CUDA_HEADERS_REGISTRY }}:13
5862
platforms: linux/amd64,linux/arm64
5963
target: runtime
6064
push: ${{ github.event_name != 'pull_request' }}
@@ -104,6 +108,8 @@ jobs:
104108
run: |
105109
mkdir -p build/${{ matrix.arch }}
106110
docker buildx build -f Dockerfile \
111+
--build-arg CUDA_12_HEADERS=${{ env.CUDA_HEADERS_REGISTRY }}:12 \
112+
--build-arg CUDA_13_HEADERS=${{ env.CUDA_HEADERS_REGISTRY }}:13 \
107113
--target export-cuda${{ matrix.cuda_major }} \
108114
--output type=local,dest=build/${{ matrix.arch }} \
109115
--platform ${{ matrix.platform }} \

Dockerfile

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,36 @@
1-
# Multi-platform build for libparcagpucupti.so
2-
# Supports both AMD64 and ARM64 architectures
3-
# Builds both CUDA 12 and 13 versions in a single container
4-
#
5-
# Build args:
6-
# CUDA_12_FULL_VERSION: Full CUDA 12 version (default: 12.9.1)
7-
# CUDA_13_FULL_VERSION: Full CUDA 13 version (default: 13.0.2)
8-
#
9-
# Stages:
10-
# builder-cuda12: Builds library for CUDA 12
11-
# builder-cuda13: Builds library for CUDA 13
12-
# runtime: Final image with both CUDA versions included
13-
14-
ARG CUDA_12_FULL_VERSION=12.9.1
15-
ARG CUDA_13_FULL_VERSION=13.0.2
1+
# Slim multi-platform build for libparcagpucupti.so
2+
# Uses pre-built CUDA header images instead of full CUDA development images
3+
# This significantly reduces build time and disk space requirements
4+
5+
# CUDA header images (can be overridden at build time)
6+
ARG CUDA_12_HEADERS=ghcr.io/parca-dev/cuda-headers:12
7+
ARG CUDA_13_HEADERS=ghcr.io/parca-dev/cuda-headers:13
8+
9+
# Import CUDA 12 headers
10+
FROM ${CUDA_12_HEADERS} AS cuda12-headers
11+
12+
# Import CUDA 13 headers
13+
FROM ${CUDA_13_HEADERS} AS cuda13-headers
1614

1715
# Build stage for CUDA 12
18-
FROM nvidia/cuda:${CUDA_12_FULL_VERSION}-devel-ubuntu22.04 AS builder-cuda12
16+
FROM ubuntu:22.04 AS builder-cuda12
1917

20-
# Install build tools
18+
# Install only build tools (no CUDA toolkit needed)
2119
RUN apt-get update && apt-get install -y \
2220
cmake \
2321
make \
2422
gcc \
23+
g++ \
2524
systemtap-sdt-dev \
2625
&& rm -rf /var/lib/apt/lists/*
2726

28-
# Copy source code
2927
WORKDIR /build/cupti
30-
COPY . .
28+
29+
# Copy CUDA headers and libraries from header image
30+
COPY --from=cuda12-headers /usr/local/cuda /usr/local/cuda
31+
32+
# Copy source code
33+
COPY cupti/cupti-prof.c cupti/CMakeLists.txt ./
3134

3235
# Build the library for CUDA 12
3336
ENV CUDA_ROOT=/usr/local/cuda
@@ -38,19 +41,24 @@ RUN mkdir -p build && \
3841
mv libparcagpucupti.so libparcagpucupti.so.12
3942

4043
# Build stage for CUDA 13
41-
FROM nvidia/cuda:${CUDA_13_FULL_VERSION}-devel-ubuntu22.04 AS builder-cuda13
44+
FROM ubuntu:22.04 AS builder-cuda13
4245

43-
# Install build tools
46+
# Install only build tools (no CUDA toolkit needed)
4447
RUN apt-get update && apt-get install -y \
4548
cmake \
4649
make \
4750
gcc \
51+
g++ \
4852
systemtap-sdt-dev \
4953
&& rm -rf /var/lib/apt/lists/*
5054

51-
# Copy source code
5255
WORKDIR /build/cupti
53-
COPY . .
56+
57+
# Copy CUDA headers and libraries from header image
58+
COPY --from=cuda13-headers /usr/local/cuda /usr/local/cuda
59+
60+
# Copy source code
61+
COPY cupti/cupti-prof.c cupti/CMakeLists.txt ./
5462

5563
# Build the library for CUDA 13
5664
ENV CUDA_ROOT=/usr/local/cuda

Dockerfile.cuda-headers

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Dockerfile to create minimal CUDA header images
2+
# These are pushed to ghcr.io and used as build dependencies
3+
# Usage: docker build --build-arg CUDA_VERSION=12.9.1 -t ghcr.io/parca-dev/cuda-headers:12 .
4+
5+
ARG CUDA_VERSION=12.9.1
6+
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS extractor
7+
8+
# Extract only headers and CUPTI library (no CUDA driver library needed for build)
9+
RUN mkdir -p /cuda-sdk/include /cuda-sdk/lib64 && \
10+
cp -r /usr/local/cuda/include/* /cuda-sdk/include/ && \
11+
cp /usr/local/cuda/lib64/libcupti.so* /cuda-sdk/lib64/ && \
12+
cp /usr/local/cuda/lib64/stubs/libcuda.so /cuda-sdk/lib64/
13+
14+
# Minimal runtime image with just the SDK files
15+
FROM busybox:latest
16+
COPY --from=extractor /cuda-sdk /usr/local/cuda

Makefile

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: all clean test cupti-amd64 cupti-arm64 cupti-all cupti-all-versions cross test-infra docker-push docker-test-build docker-test-run format
1+
.PHONY: all clean test cupti-amd64 cupti-arm64 cupti-all cupti-all-versions cross test-infra docker-push push-cuda-headers docker-test-build docker-test-run format
22

33
# CUDA version configuration
44
CUDA_MAJOR ?= 12
@@ -14,9 +14,11 @@ cupti-amd64:
1414
@mkdir -p /tmp/parcagpu-build-amd64
1515
@docker buildx use default
1616
@docker buildx build -f Dockerfile \
17+
--build-arg CUDA_12_HEADERS=$(CUDA_12_HEADERS) \
18+
--build-arg CUDA_13_HEADERS=$(CUDA_13_HEADERS) \
1719
--target export-cuda$(CUDA_MAJOR) \
1820
--output type=local,dest=/tmp/parcagpu-build-amd64 \
19-
--platform linux/amd64 cupti
21+
--platform linux/amd64 .
2022
@mkdir -p build/$(CUDA_MAJOR)/amd64
2123
@cp /tmp/parcagpu-build-amd64/$(LIB_NAME) build/$(CUDA_MAJOR)/amd64/
2224
@ln -sf $(LIB_NAME) build/$(CUDA_MAJOR)/amd64/libparcagpucupti.so
@@ -28,9 +30,11 @@ cupti-arm64:
2830
@mkdir -p /tmp/parcagpu-build-arm64
2931
@docker buildx create --name parcagpu-builder --use --bootstrap 2>/dev/null || docker buildx use parcagpu-builder
3032
@docker buildx build -f Dockerfile \
33+
--build-arg CUDA_12_HEADERS=$(CUDA_12_HEADERS) \
34+
--build-arg CUDA_13_HEADERS=$(CUDA_13_HEADERS) \
3135
--target export-cuda$(CUDA_MAJOR) \
3236
--output type=local,dest=/tmp/parcagpu-build-arm64 \
33-
--platform linux/arm64 cupti
37+
--platform linux/arm64 .
3438
@mkdir -p build/$(CUDA_MAJOR)/arm64
3539
@cp /tmp/parcagpu-build-arm64/$(LIB_NAME) build/$(CUDA_MAJOR)/arm64/
3640
@ln -sf $(LIB_NAME) build/$(CUDA_MAJOR)/arm64/libparcagpucupti.so
@@ -46,9 +50,11 @@ cross:
4650
@echo "=== Building runtime container for AMD64 and ARM64 (includes CUDA 12 and 13) ==="
4751
@docker buildx create --name parcagpu-builder --use --bootstrap 2>/dev/null || docker buildx use parcagpu-builder
4852
@docker buildx build -f Dockerfile \
53+
--build-arg CUDA_12_HEADERS=$(CUDA_12_HEADERS) \
54+
--build-arg CUDA_13_HEADERS=$(CUDA_13_HEADERS) \
4955
--target runtime \
5056
--platform linux/amd64,linux/arm64 \
51-
cupti
57+
.
5258
@echo "Runtime container built for both platforms (cached, not loaded into Docker)"
5359

5460
# Build all artifacts (CUDA 12 & 13 for both amd64 and arm64)
@@ -64,6 +70,37 @@ cupti-all-versions:
6470
@echo "CUDA 13: build/13/amd64/libparcagpucupti.so.13"
6571
@echo "CUDA 13: build/13/arm64/libparcagpucupti.so.13"
6672

73+
# CUDA header image configuration
74+
# Can be overridden to use local images (e.g., make cupti-all CUDA_12_HEADERS=cuda-headers:12)
75+
CUDA_HEADERS_REGISTRY ?= ghcr.io/parca-dev/cuda-headers
76+
CUDA_12_HEADERS ?= $(CUDA_HEADERS_REGISTRY):12
77+
CUDA_13_HEADERS ?= $(CUDA_HEADERS_REGISTRY):13
78+
79+
# Build and push CUDA header images to registry
80+
# These are lightweight images (~35MB each) containing only CUDA headers and libcupti
81+
# Note: Only needs to be run manually when:
82+
# - CUDA versions are updated (12.9.1 -> 12.x.x, 13.0.2 -> 13.x.x)
83+
# - New CUDA major versions are added
84+
# - CUPTI API changes require header updates
85+
push-cuda-headers:
86+
@echo "=== Building and pushing CUDA header images ==="
87+
@docker buildx create --name parcagpu-builder --use --bootstrap 2>/dev/null || docker buildx use parcagpu-builder
88+
@echo "Building CUDA 12 headers..."
89+
@docker buildx build -f Dockerfile.cuda-headers \
90+
--build-arg CUDA_VERSION=12.9.1 \
91+
--platform linux/amd64,linux/arm64 \
92+
--tag $(CUDA_HEADERS_REGISTRY):12 \
93+
--push \
94+
.
95+
@echo "Building CUDA 13 headers..."
96+
@docker buildx build -f Dockerfile.cuda-headers \
97+
--build-arg CUDA_VERSION=13.0.2 \
98+
--platform linux/amd64,linux/arm64 \
99+
--tag $(CUDA_HEADERS_REGISTRY):13 \
100+
--push \
101+
.
102+
@echo "CUDA header images pushed to $(CUDA_HEADERS_REGISTRY):12 and :13"
103+
67104
# Build test infrastructure with Zig
68105
test-infra:
69106
@echo "=== Building test infrastructure with Zig ==="
@@ -84,6 +121,7 @@ clean:
84121
# Build and push multi-arch Docker images to ghcr.io
85122
# Set IMAGE_TAG to override the default tag (e.g., make docker-push IMAGE_TAG=v1.0.0)
86123
# Set IMAGE to override the image name (e.g., make docker-push IMAGE=ghcr.io/myuser/parcagpu)
124+
# Set CUDA_12_HEADERS and CUDA_13_HEADERS to override header images (e.g., cuda-headers:12 for local)
87125
# Note: Runtime image includes both CUDA 12 and 13
88126
IMAGE ?= ghcr.io/parca-dev/parcagpu
89127
IMAGE_TAG ?= latest
@@ -92,6 +130,8 @@ docker-push:
92130
@docker buildx create --name parcagpu-builder --use --bootstrap 2>/dev/null || docker buildx use parcagpu-builder
93131
@echo "=== Building and pushing multi-arch Docker images to $(IMAGE):$(IMAGE_TAG) (includes CUDA 12 and 13) ==="
94132
@docker buildx build -f Dockerfile \
133+
--build-arg CUDA_12_HEADERS=$(CUDA_12_HEADERS) \
134+
--build-arg CUDA_13_HEADERS=$(CUDA_13_HEADERS) \
95135
--target runtime \
96136
--platform linux/amd64,linux/arm64 \
97137
--tag $(IMAGE):$(IMAGE_TAG) \

0 commit comments

Comments
 (0)