Skip to content

Commit 17eaef1

Browse files
authored
Merge pull request #43 from MITLibraries/in-1500-new-workflows
* Updates For New Shared Workflows * Updates to address some CI errors
2 parents 3b79a44 + b51eab5 commit 17eaef1

File tree

9 files changed

+1416
-1091
lines changed

9 files changed

+1416
-1091
lines changed

.github/workflows/ci.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
name: CI
2-
on: push
2+
on:
3+
pull_request:
4+
paths-ignore:
5+
- '.github/**'
6+
37
jobs:
48
test:
59
uses: mitlibraries/.github/.github/workflows/python-shared-test.yml@main

.github/workflows/dev-build.yml

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
### This is the Terraform-generated dev-build.yml workflow for the browsertrix-harvester-dev app repository ###
2-
### If this is a Lambda repo, uncomment the FUNCTION line at the end of the document ###
3-
### If the container requires any additional pre-build commands, uncomment and edit ###
4-
### the PREBUILD line at the end of the document. ###
1+
### This is the Terraform-generated dev-build.yml workflow for the ###
2+
### browsertrix-harvester-dev app repository. ###
3+
### If this is a Lambda repo, uncomment the FUNCTION line at the end of ###
4+
### the document. If the container requires any additional pre-build ###
5+
### commands, uncomment and edit the PREBUILD line at the end of the ###
6+
### document. ###
7+
58
name: Dev Container Build and Deploy
69
on:
710
workflow_dispatch:
@@ -11,14 +14,47 @@ on:
1114
paths-ignore:
1215
- '.github/**'
1316

17+
permissions:
18+
id-token: write
19+
contents: read
20+
1421
jobs:
22+
prep:
23+
name: Prep for Build
24+
runs-on: ubuntu-latest
25+
outputs:
26+
cpuarch: ${{ steps.setarch.outputs.cpuarch }}
27+
steps:
28+
- name: Checkout
29+
uses: actions/checkout@v5
30+
31+
- name: Set CPU Architecture
32+
id: setarch
33+
run: |
34+
echo "### :abacus: Architecture Selection" >> $GITHUB_STEP_SUMMARY
35+
if [[ -f .aws-architecture ]]; then
36+
ARCH=$(cat .aws-architecture)
37+
echo "\`$ARCH\` was read from \`.aws-architecture\` and passed to the deploy job." >> $GITHUB_STEP_SUMMARY
38+
else
39+
ARCH="linux/amd64"
40+
echo "No \`.aws-architecture\` file, so default \`$ARCH\` was passed to the deploy job." >> $GITHUB_STEP_SUMMARY
41+
fi
42+
if [[ "$ARCH" != "linux/arm64" && "$ARCH" != "linux/amd64" ]]; then
43+
echo "$ARCH is INVALID architecture!"
44+
echo "$ARCH is INVALID architecture!" >> $GITHUB_STEP_SUMMARY
45+
exit 1
46+
fi
47+
echo "cpuarch=$ARCH" >> $GITHUB_OUTPUT
48+
1549
deploy:
16-
name: Dev Container Deploy
17-
uses: mitlibraries/.github/.github/workflows/ecr-shared-deploy-dev.yml@main
50+
needs: prep
51+
name: Dev Deploy
52+
uses: mitlibraries/.github/.github/workflows/ecr-multi-arch-deploy-dev.yml@main
1853
secrets: inherit
1954
with:
2055
AWS_REGION: "us-east-1"
2156
GHA_ROLE: "browsertrix-harvester-gha-dev"
2257
ECR: "browsertrix-harvester-dev"
58+
CPU_ARCH: ${{ needs.prep.outputs.cpuarch }}
2359
# FUNCTION: ""
24-
# PREBUILD:
60+
# PREBUILD:

.github/workflows/prod-promote.yml

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,57 @@
1-
### This is the Terraform-generated prod-promote.yml workflow for the browsertrix-harvester-prod repository. ###
2-
### If this is a Lambda repo, uncomment the FUNCTION line at the end of the document. ###
1+
### This is the Terraform-generated prod-promote.yml workflow for the ###
2+
### browsertrix-harvester-prod repository. ###
3+
### If this is a Lambda repo, uncomment the FUNCTION line at the end of ###
4+
### the document. ###
5+
36
name: Prod Container Promote
47
on:
58
workflow_dispatch:
69
release:
710
types: [published]
811

12+
permissions:
13+
id-token: write
14+
contents: read
15+
916
jobs:
17+
prep:
18+
name: Prep for Promote
19+
runs-on: ubuntu-latest
20+
outputs:
21+
cpuarch: ${{ steps.setarch.outputs.cpuarch }}
22+
steps:
23+
- name: Checkout
24+
uses: actions/checkout@v5
25+
26+
- name: Set CPU Architecture
27+
id: setarch
28+
run: |
29+
echo "### :abacus: Architecture Selection" >> $GITHUB_STEP_SUMMARY
30+
if [[ -f .aws-architecture ]]; then
31+
ARCH=$(cat .aws-architecture)
32+
echo "\`$ARCH\` was read from \`.aws-architecture\` and passed to the deploy job." >> $GITHUB_STEP_SUMMARY
33+
else
34+
ARCH="linux/amd64"
35+
echo "No \`.aws-architecture\` file, so default \`$ARCH\` was passed to the deploy job." >> $GITHUB_STEP_SUMMARY
36+
fi
37+
if [[ "$ARCH" != "linux/arm64" && "$ARCH" != "linux/amd64" ]]; then
38+
echo "$ARCH is INVALID architecture!"
39+
echo "$ARCH is INVALID architecture!" >> $GITHUB_STEP_SUMMARY
40+
exit 1
41+
fi
42+
echo "cpuarch=$ARCH" >> $GITHUB_OUTPUT
43+
1044
deploy:
11-
name: Prod Container Promote
12-
uses: mitlibraries/.github/.github/workflows/ecr-shared-promote-prod.yml@main
45+
needs: prep
46+
name: Deploy
47+
uses: mitlibraries/.github/.github/workflows/ecr-multi-arch-promote-prod.yml@main
1348
secrets: inherit
1449
with:
1550
AWS_REGION: "us-east-1"
1651
GHA_ROLE_STAGE: browsertrix-harvester-gha-stage
1752
GHA_ROLE_PROD: browsertrix-harvester-gha-prod
1853
ECR_STAGE: "browsertrix-harvester-stage"
1954
ECR_PROD: "browsertrix-harvester-prod"
55+
CPU_ARCH: ${{ needs.prep.outputs.cpuarch }}
2056
# FUNCTION: ""
2157

.github/workflows/stage-build.yml

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
### This is the Terraform-generated dev-build.yml workflow for the browsertrix-harvester-stage app repository ###
2-
### If this is a Lambda repo, uncomment the FUNCTION line at the end of the document ###
3-
### If the container requires any additional pre-build commands, uncomment and edit ###
4-
### the PREBUILD line at the end of the document. ###
1+
### This is the Terraform-generated stage-build.yml workflow for the ###
2+
### browsertrix-harvester-stage app repository. ###
3+
### If this is a Lambda repo, uncomment the FUNCTION line at the end of ###
4+
### the document. If the container requires any additional pre-build ###
5+
### commands, uncomment and edit the PREBUILD line at the end of the ###
6+
### document. ###
7+
58
name: Stage Container Build and Deploy
69
on:
710
workflow_dispatch:
@@ -11,14 +14,47 @@ on:
1114
paths-ignore:
1215
- '.github/**'
1316

17+
permissions:
18+
id-token: write
19+
contents: read
20+
1421
jobs:
22+
prep:
23+
name: Prep for Build
24+
runs-on: ubuntu-latest
25+
outputs:
26+
cpuarch: ${{ steps.setarch.outputs.cpuarch }}
27+
steps:
28+
- name: Checkout
29+
uses: actions/checkout@v5
30+
31+
- name: Set CPU Architecture
32+
id: setarch
33+
run: |
34+
echo "### :abacus: Architecture Selection" >> $GITHUB_STEP_SUMMARY
35+
if [[ -f .aws-architecture ]]; then
36+
ARCH=$(cat .aws-architecture)
37+
echo "\`$ARCH\` was read from \`.aws-architecture\` and passed to the deploy job." >> $GITHUB_STEP_SUMMARY
38+
else
39+
ARCH="linux/amd64"
40+
echo "No \`.aws-architecture\` file, so default \`$ARCH\` was passed to the deploy job." >> $GITHUB_STEP_SUMMARY
41+
fi
42+
if [[ "$ARCH" != "linux/arm64" && "$ARCH" != "linux/amd64" ]]; then
43+
echo "$ARCH is INVALID architecture!"
44+
echo "$ARCH is INVALID architecture!" >> $GITHUB_STEP_SUMMARY
45+
exit 1
46+
fi
47+
echo "cpuarch=$ARCH" >> $GITHUB_OUTPUT
48+
1549
deploy:
16-
name: Stage Container Deploy
17-
uses: mitlibraries/.github/.github/workflows/ecr-shared-deploy-stage.yml@main
50+
needs: prep
51+
name: Stage Deploy
52+
uses: mitlibraries/.github/.github/workflows/ecr-multi-arch-deploy-stage.yml@main
1853
secrets: inherit
1954
with:
2055
AWS_REGION: "us-east-1"
2156
GHA_ROLE: "browsertrix-harvester-gha-stage"
2257
ECR: "browsertrix-harvester-stage"
58+
CPU_ARCH: ${{ needs.prep.outputs.cpuarch }}
2359
# FUNCTION: ""
2460
# PREBUILD:

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,5 @@ cython_debug/
155155
.DS_Store
156156
output/
157157
.vscode/
158-
.idea/
158+
.idea/
159+
.arch_tag

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@ repos:
2424
types: ["python"]
2525
- id: pip-audit
2626
name: pip-audit
27-
entry: pipenv run pip-audit
27+
entry: pipenv run pip-audit --ignore-vuln GHSA-4xh5-x5gv-qwph
2828
language: system
2929
pass_filenames: false

Makefile

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
### This is the Terraform-generated header for browsertrix-harvester-dev. If ###
2-
### this is a Lambda repo, uncomment the FUNCTION line below ###
3-
### and review the other commented lines in the document. ###
4-
ECR_NAME_DEV:=browsertrix-harvester-dev
5-
ECR_URL_DEV:=222053980223.dkr.ecr.us-east-1.amazonaws.com/browsertrix-harvester-dev
6-
### End of Terraform-generated header ###
1+
### This is the Terraform-generated header for browsertrix-harvester-dev. If ###
2+
### this is a Lambda repo, uncomment the FUNCTION line below ###
3+
### and review the other commented lines in the document. ###
4+
ECR_NAME_DEV := browsertrix-harvester-dev
5+
ECR_URL_DEV := 222053980223.dkr.ecr.us-east-1.amazonaws.com/browsertrix-harvester-dev
6+
CPU_ARCH ?= $(shell cat .aws-architecture 2>/dev/null || echo "linux/amd64")
7+
### End of Terraform-generated header ###
78
SHELL=/bin/bash
89
DATETIME:=$(shell date -u +%Y%m%dT%H%M%SZ)
910

@@ -41,7 +42,7 @@ ruff:
4142
pipenv run ruff check .
4243

4344
safety: # Check for security vulnerabilities and verify Pipfile.lock is up-to-date
44-
pipenv run pip-audit
45+
pipenv run pip-audit --ignore-vuln GHSA-4xh5-x5gv-qwph
4546
pipenv verify
4647

4748
# apply changes to resolve any linting errors
@@ -85,30 +86,46 @@ test-parse-url-content:
8586
--wacz-input-file="tests/fixtures/example.wacz" \
8687
--url="https://example.com/hello-world"
8788

89+
8890
### Terraform-generated Developer Deploy Commands for Dev environment ###
89-
dist-dev: ## Build docker container (intended for developer-based manual build)
90-
docker build --platform linux/amd64 \
91-
-t $(ECR_URL_DEV):latest \
92-
-t $(ECR_URL_DEV):`git describe --always` \
93-
-t $(ECR_NAME_DEV):latest .
91+
check-arch:
92+
@ARCH_FILE=".aws-architecture"; \
93+
if [[ "$(CPU_ARCH)" != "linux/amd64" && "$(CPU_ARCH)" != "linux/arm64" ]]; then \
94+
echo "Invalid CPU_ARCH: $(CPU_ARCH)"; exit 1; \
95+
fi; \
96+
if [[ -f $$ARCH_FILE ]]; then \
97+
echo "latest-$(shell echo $(CPU_ARCH) | cut -d'/' -f2)" > .arch_tag; \
98+
else \
99+
echo "latest" > .arch_tag; \
100+
fi
101+
102+
dist-dev: check-arch ## Build docker container (intended for developer-based manual build)
103+
@ARCH_TAG=$$(cat .arch_tag); \
104+
docker buildx inspect $(ECR_NAME_DEV) >/dev/null 2>&1 || docker buildx create --name $(ECR_NAME_DEV) --use; \
105+
docker buildx use $(ECR_NAME_DEV); \
106+
docker buildx build --platform $(CPU_ARCH) \
107+
--load \
108+
--tag $(ECR_URL_DEV):$$ARCH_TAG \
109+
--tag $(ECR_URL_DEV):make-$$ARCH_TAG \
110+
--tag $(ECR_URL_DEV):make-$(shell git describe --always) \
111+
--tag $(ECR_NAME_DEV):$$ARCH_TAG \
112+
.
94113

95114
publish-dev: dist-dev ## Build, tag and push (intended for developer-based manual publish)
96-
docker login -u AWS -p $$(aws ecr get-login-password --region us-east-1) $(ECR_URL_DEV)
97-
docker push $(ECR_URL_DEV):latest
98-
docker push $(ECR_URL_DEV):`git describe --always`
99-
100-
### Terraform-generated manual shortcuts for deploying to Stage. This requires ###
101-
### that ECR_NAME_STAGE, ECR_URL_STAGE, and FUNCTION_STAGE environment ###
102-
### variables are set locally by the developer and that the developer has ###
103-
### authenticated to the correct AWS Account. The values for the environment ###
104-
### variables can be found in the stage_build.yml caller workflow. ###
105-
dist-stage: ## Only use in an emergency
106-
docker build --platform linux/amd64 \
107-
-t $(ECR_URL_STAGE):latest \
108-
-t $(ECR_URL_STAGE):`git describe --always` \
109-
-t $(ECR_NAME_STAGE):latest .
110-
111-
publish-stage: ## Only use in an emergency
112-
docker login -u AWS -p $$(aws ecr get-login-password --region us-east-1) $(ECR_URL_STAGE)
113-
docker push $(ECR_URL_STAGE):latest
114-
docker push $(ECR_URL_STAGE):`git describe --always`
115+
@ARCH_TAG=$$(cat .arch_tag); \
116+
aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $(ECR_URL_DEV); \
117+
docker push $(ECR_URL_DEV):$$ARCH_TAG; \
118+
docker push $(ECR_URL_DEV):make-$$ARCH_TAG; \
119+
docker push $(ECR_URL_DEV):make-$(shell git describe --always); \
120+
echo "Cleaning up dangling Docker images..."; \
121+
docker image prune -f --filter "dangling=true"
122+
123+
docker-clean: ## Clean up Docker detritus
124+
@ARCH_TAG=$$(cat .arch_tag); \
125+
echo "Cleaning up Docker leftovers (containers, images, builders)"; \
126+
docker rmi -f $(ECR_URL_DEV):$$ARCH_TAG; \
127+
docker rmi -f $(ECR_URL_DEV):make-$$ARCH_TAG; \
128+
docker rmi -f $(ECR_URL_DEV):make-$(shell git describe --always) || true; \
129+
docker rmi -f $(ECR_NAME_DEV):$$ARCH_TAG || true; \
130+
docker buildx rm $(ECR_NAME_DEV) || true
131+
@rm -rf .arch_tag

Pipfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,4 @@ python_version = "3.12"
3333

3434
[scripts]
3535
harvester = "python -c \"from harvester.cli import main; main()\""
36-
harvester-dockerized = "docker run -it -v $HOME/.aws:/root/.aws -v $PWD/output/crawls:/crawls browsertrix-harvester-dev:latest"
36+
harvester-dockerized = "docker run -it -v $PWD/output/crawls:/crawls browsertrix-harvester-dev:latest"

0 commit comments

Comments
 (0)