Skip to content

Commit e090985

Browse files
authored
test: bring back e2e tests on Github CI (#48)
1 parent 91dd2e1 commit e090985

16 files changed

Lines changed: 413 additions & 63 deletions

File tree

.github/workflows/checks.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ jobs:
2626
steps:
2727
- uses: actions/checkout@v4
2828

29+
- name: Cache sccache
30+
uses: actions/cache@v4
31+
with:
32+
path: .cache/sccache
33+
key: sccache-rust-${{ runner.arch }}-${{ hashFiles('Cargo.lock') }}
34+
restore-keys: |
35+
sccache-rust-${{ runner.arch }}-
36+
2937
- name: Format
3038
run: mise run rust:format:check
3139

@@ -35,6 +43,10 @@ jobs:
3543
- name: Test
3644
run: mise run test:rust
3745

46+
- name: sccache stats
47+
if: always()
48+
run: sccache --show-stats
49+
3850
python:
3951
name: Python
4052
runs-on: build-amd64

.github/workflows/docker-build.yml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
name: Docker Build
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
component:
7+
description: "Component to build (server, sandbox, cluster)"
8+
required: true
9+
type: string
10+
timeout-minutes:
11+
description: "Job timeout in minutes"
12+
required: false
13+
type: number
14+
default: 20
15+
push:
16+
description: "Push image to registry"
17+
required: false
18+
type: boolean
19+
default: true
20+
21+
env:
22+
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
23+
GHCR_CACHE: ghcr.io/nvidia/nv-agent-env/cache
24+
25+
permissions:
26+
contents: read
27+
packages: write
28+
29+
jobs:
30+
build:
31+
name: Build ${{ inputs.component }}
32+
runs-on: build-amd64
33+
timeout-minutes: ${{ inputs.timeout-minutes }}
34+
container:
35+
image: ghcr.io/nvidia/nv-agent-env/ci:latest
36+
credentials:
37+
username: ${{ github.actor }}
38+
password: ${{ secrets.GITHUB_TOKEN }}
39+
options: --privileged
40+
volumes:
41+
- /var/run/docker.sock:/var/run/docker.sock
42+
env:
43+
IMAGE_TAG: ${{ github.sha }}
44+
IMAGE_REGISTRY: ghcr.io/nvidia/nv-agent-env
45+
DOCKER_PUSH: ${{ inputs.push && '1' || '0' }}
46+
steps:
47+
- uses: actions/checkout@v4
48+
49+
- name: Log in to GHCR
50+
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
51+
52+
- name: Set up Docker Buildx
53+
uses: ./.github/actions/setup-buildx
54+
55+
- name: Build ${{ inputs.component }} image
56+
env:
57+
DOCKER_BUILDER: navigator
58+
DOCKER_CACHE_FROM: "type=registry,ref=${{ env.GHCR_CACHE }}/${{ inputs.component }}:buildcache"
59+
DOCKER_CACHE_TO: "type=registry,ref=${{ env.GHCR_CACHE }}/${{ inputs.component }}:buildcache,mode=max"
60+
run: mise run --no-prepare docker:build:${{ inputs.component }}

.github/workflows/e2e.yml

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
name: E2E
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
types: [opened, synchronize, reopened, labeled]
8+
9+
permissions:
10+
contents: read
11+
packages: write
12+
13+
jobs:
14+
build-server:
15+
if: github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'e2e')
16+
uses: ./.github/workflows/docker-build.yml
17+
with:
18+
component: server
19+
20+
build-sandbox:
21+
if: github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'e2e')
22+
uses: ./.github/workflows/docker-build.yml
23+
with:
24+
component: sandbox
25+
26+
build-cluster:
27+
if: github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'e2e')
28+
uses: ./.github/workflows/docker-build.yml
29+
with:
30+
component: cluster
31+
32+
e2e:
33+
name: E2E
34+
needs: [build-server, build-sandbox, build-cluster]
35+
runs-on: build-amd64
36+
timeout-minutes: 30
37+
container:
38+
image: ghcr.io/nvidia/nv-agent-env/ci:latest
39+
credentials:
40+
username: ${{ github.actor }}
41+
password: ${{ secrets.GITHUB_TOKEN }}
42+
options: --privileged
43+
volumes:
44+
- /var/run/docker.sock:/var/run/docker.sock
45+
env:
46+
MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
47+
IMAGE_TAG: ${{ github.sha }}
48+
# Use GHCR as the image registry for both push (from CI) and pull (from k3s).
49+
NAVIGATOR_REGISTRY: ghcr.io/nvidia/nv-agent-env
50+
NAVIGATOR_REGISTRY_HOST: ghcr.io
51+
NAVIGATOR_REGISTRY_NAMESPACE: nvidia/nv-agent-env
52+
NAVIGATOR_REGISTRY_USERNAME: ${{ github.actor }}
53+
NAVIGATOR_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
54+
steps:
55+
- uses: actions/checkout@v4
56+
57+
- name: Log in to GHCR
58+
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
59+
60+
- name: Pull cluster image
61+
run: docker pull ghcr.io/nvidia/nv-agent-env/cluster:${{ github.sha }}
62+
63+
- name: Install Python dependencies and generate protobuf stubs
64+
run: uv sync --frozen && mise run --no-prepare python:proto
65+
66+
- name: Bootstrap and deploy cluster
67+
env:
68+
GATEWAY_HOST: host.docker.internal
69+
GATEWAY_PORT: "8080"
70+
SKIP_IMAGE_PUSH: "1"
71+
NAVIGATOR_CLUSTER_IMAGE: ghcr.io/nvidia/nv-agent-env/cluster:${{ github.sha }}
72+
run: mise run --no-prepare --skip-deps cluster
73+
74+
- name: Run E2E tests
75+
run: mise run --no-prepare --skip-deps test:e2e:sandbox

build/scripts/cluster-bootstrap.sh

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,13 +235,32 @@ if [ "${MODE}" = "fast" ]; then
235235
fi
236236
fi
237237

238-
if [ "${MODE}" = "build" ] || [ "${MODE}" = "fast" ]; then
238+
if [ "${SKIP_IMAGE_PUSH:-}" = "1" ]; then
239+
echo "Skipping image push (SKIP_IMAGE_PUSH=1; images already in registry)."
240+
elif [ "${MODE}" = "build" ] || [ "${MODE}" = "fast" ]; then
239241
for component in server sandbox; do
240242
build/scripts/cluster-push-component.sh "${component}"
241243
done
242244
fi
243245

244-
nav cluster admin deploy --name "${CLUSTER_NAME}" --port "${GATEWAY_PORT}" --update-kube-config
246+
GATEWAY_HOST_ARGS=()
247+
if [ -n "${GATEWAY_HOST:-}" ]; then
248+
GATEWAY_HOST_ARGS+=(--gateway-host "${GATEWAY_HOST}")
249+
250+
# Ensure the gateway host resolves from the current environment.
251+
# On Linux CI runners host.docker.internal is not set automatically
252+
# (it's a Docker Desktop feature). If the hostname doesn't resolve,
253+
# add it via the Docker bridge gateway IP.
254+
if ! getent hosts "${GATEWAY_HOST}" >/dev/null 2>&1; then
255+
BRIDGE_IP=$(docker network inspect bridge --format '{{(index .IPAM.Config 0).Gateway}}' 2>/dev/null || true)
256+
if [ -n "${BRIDGE_IP}" ]; then
257+
echo "Adding /etc/hosts entry: ${BRIDGE_IP} ${GATEWAY_HOST}"
258+
echo "${BRIDGE_IP} ${GATEWAY_HOST}" >> /etc/hosts
259+
fi
260+
fi
261+
fi
262+
263+
nav cluster admin deploy --name "${CLUSTER_NAME}" --port "${GATEWAY_PORT}" "${GATEWAY_HOST_ARGS[@]}" --update-kube-config
245264

246265
echo ""
247266
echo "Cluster '${CLUSTER_NAME}' is ready."

build/scripts/docker-build-cluster.sh

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,17 @@
77

88
# DOCKER_PLATFORM - Target platform (optional)
99
# DOCKER_BUILDER - Buildx builder name (default: auto-select)
10+
# DOCKER_CACHE_FROM - Explicit --cache-from value (e.g. type=registry,ref=...)
11+
# DOCKER_CACHE_TO - Explicit --cache-to value (e.g. type=registry,ref=...,mode=max)
12+
# DOCKER_PUSH - When set to "1", push instead of loading into local daemon
13+
# IMAGE_REGISTRY - Registry prefix for image name (e.g. ghcr.io/org/repo)
1014
set -euo pipefail
1115

1216
IMAGE_TAG=${IMAGE_TAG:-dev}
17+
IMAGE_NAME="navigator/cluster"
18+
if [[ -n "${IMAGE_REGISTRY:-}" ]]; then
19+
IMAGE_NAME="${IMAGE_REGISTRY}/cluster"
20+
fi
1321
DOCKER_BUILD_CACHE_DIR=${DOCKER_BUILD_CACHE_DIR:-.cache/buildkit}
1422
CACHE_PATH="${DOCKER_BUILD_CACHE_DIR}/cluster"
1523

@@ -26,13 +34,18 @@ elif [[ -z "${DOCKER_PLATFORM:-}" && -z "${CI:-}" ]]; then
2634
fi
2735

2836
CACHE_ARGS=()
29-
if [[ -n "${CI:-}" ]]; then
30-
echo "CI environment detected; skipping local build cache export options."
31-
elif docker buildx inspect ${BUILDER_ARGS[@]+"${BUILDER_ARGS[@]}"} 2>/dev/null | grep -q "Driver: docker-container"; then
32-
CACHE_ARGS=(
33-
--cache-from "type=local,src=${CACHE_PATH}"
34-
--cache-to "type=local,dest=${CACHE_PATH},mode=max"
35-
)
37+
if [[ -n "${DOCKER_CACHE_FROM:-}" || -n "${DOCKER_CACHE_TO:-}" ]]; then
38+
# Explicit cache configuration from the caller (e.g. CI registry cache).
39+
[[ -n "${DOCKER_CACHE_FROM:-}" ]] && CACHE_ARGS+=(--cache-from "${DOCKER_CACHE_FROM}")
40+
[[ -n "${DOCKER_CACHE_TO:-}" ]] && CACHE_ARGS+=(--cache-to "${DOCKER_CACHE_TO}")
41+
elif [[ -z "${CI:-}" ]]; then
42+
# Local development: use filesystem cache with docker-container driver.
43+
if docker buildx inspect ${BUILDER_ARGS[@]+"${BUILDER_ARGS[@]}"} 2>/dev/null | grep -q "Driver: docker-container"; then
44+
CACHE_ARGS=(
45+
--cache-from "type=local,src=${CACHE_PATH}"
46+
--cache-to "type=local,dest=${CACHE_PATH},mode=max"
47+
)
48+
fi
3649
fi
3750

3851
# Create build directory for charts
@@ -45,14 +58,20 @@ helm package deploy/helm/navigator -d deploy/docker/.build/charts/
4558
# Build cluster image (no bundled component images — they are pulled at runtime
4659
# from the distribution registry; credentials are injected at deploy time)
4760
echo "Building cluster image..."
61+
62+
OUTPUT_FLAG="--load"
63+
if [[ "${DOCKER_PUSH:-}" == "1" ]]; then
64+
OUTPUT_FLAG="--push"
65+
fi
66+
4867
docker buildx build \
4968
${BUILDER_ARGS[@]+"${BUILDER_ARGS[@]}"} \
5069
${DOCKER_PLATFORM:+--platform ${DOCKER_PLATFORM}} \
5170
${CACHE_ARGS[@]+"${CACHE_ARGS[@]}"} \
5271
-f deploy/docker/Dockerfile.cluster \
53-
-t navigator/cluster:${IMAGE_TAG} \
72+
-t ${IMAGE_NAME}:${IMAGE_TAG} \
5473
--build-arg K3S_VERSION=${K3S_VERSION} \
55-
--load \
74+
${OUTPUT_FLAG} \
5675
.
5776

58-
echo "Done! Cluster image: navigator/cluster:${IMAGE_TAG}"
77+
echo "Done! Cluster image: ${IMAGE_NAME}:${IMAGE_TAG}"

build/scripts/docker-build-component.sh

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
# IMAGE_TAG - Image tag (default: dev)
1515
# DOCKER_PLATFORM - Target platform (optional, e.g. linux/amd64)
1616
# DOCKER_BUILDER - Buildx builder name (default: auto-select)
17+
# DOCKER_CACHE_FROM - Explicit --cache-from value (e.g. type=registry,ref=...)
18+
# DOCKER_CACHE_TO - Explicit --cache-to value (e.g. type=registry,ref=...,mode=max)
19+
# DOCKER_PUSH - When set to "1", push instead of loading into local daemon
20+
# IMAGE_REGISTRY - Registry prefix for image name (e.g. ghcr.io/org/repo)
1721
set -euo pipefail
1822

1923
COMPONENT=${1:?"Usage: docker-build-component.sh <component> [variant] [extra-args...]"}
@@ -48,6 +52,13 @@ if [[ ! -f "${DOCKERFILE}" ]]; then
4852
exit 1
4953
fi
5054

55+
# Prefix with registry when set (e.g. ghcr.io/org/repo/server:tag).
56+
# Replaces the default "navigator/" prefix with the registry path.
57+
if [[ -n "${IMAGE_REGISTRY:-}" ]]; then
58+
_suffix="${IMAGE_NAME#navigator/}"
59+
IMAGE_NAME="${IMAGE_REGISTRY}/${_suffix}"
60+
fi
61+
5162
IMAGE_TAG=${IMAGE_TAG:-dev}
5263
DOCKER_BUILD_CACHE_DIR=${DOCKER_BUILD_CACHE_DIR:-.cache/buildkit}
5364
CACHE_PATH="${DOCKER_BUILD_CACHE_DIR}/${COMPONENT}${VARIANT:+-${VARIANT}}"
@@ -69,13 +80,23 @@ elif [[ -z "${DOCKER_PLATFORM:-}" && -z "${CI:-}" ]]; then
6980
fi
7081

7182
CACHE_ARGS=()
72-
if [[ -n "${CI:-}" ]]; then
73-
echo "CI environment detected; skipping local build cache export options."
74-
elif docker buildx inspect ${BUILDER_ARGS[@]+"${BUILDER_ARGS[@]}"} 2>/dev/null | grep -q "Driver: docker-container"; then
75-
CACHE_ARGS=(
76-
--cache-from "type=local,src=${CACHE_PATH}"
77-
--cache-to "type=local,dest=${CACHE_PATH},mode=max"
78-
)
83+
if [[ -n "${DOCKER_CACHE_FROM:-}" || -n "${DOCKER_CACHE_TO:-}" ]]; then
84+
# Explicit cache configuration from the caller (e.g. CI registry cache).
85+
[[ -n "${DOCKER_CACHE_FROM:-}" ]] && CACHE_ARGS+=(--cache-from "${DOCKER_CACHE_FROM}")
86+
[[ -n "${DOCKER_CACHE_TO:-}" ]] && CACHE_ARGS+=(--cache-to "${DOCKER_CACHE_TO}")
87+
elif [[ -z "${CI:-}" ]]; then
88+
# Local development: use filesystem cache with docker-container driver.
89+
if docker buildx inspect ${BUILDER_ARGS[@]+"${BUILDER_ARGS[@]}"} 2>/dev/null | grep -q "Driver: docker-container"; then
90+
CACHE_ARGS=(
91+
--cache-from "type=local,src=${CACHE_PATH}"
92+
--cache-to "type=local,dest=${CACHE_PATH},mode=max"
93+
)
94+
fi
95+
fi
96+
97+
OUTPUT_FLAG="--load"
98+
if [[ "${DOCKER_PUSH:-}" == "1" ]]; then
99+
OUTPUT_FLAG="--push"
79100
fi
80101

81102
docker buildx build \
@@ -86,5 +107,5 @@ docker buildx build \
86107
-t "${IMAGE_NAME}:${IMAGE_TAG}" \
87108
--provenance=false \
88109
"$@" \
89-
--load \
110+
${OUTPUT_FLAG} \
90111
.

0 commit comments

Comments
 (0)