Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 53 additions & 19 deletions .github/workflows/base.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
name: PyDeequ V2 Tests

on:
push:
branches:
Expand All @@ -7,33 +9,65 @@ on:
- "master"

jobs:
test:
# V2 tests with Spark Connect (Python 3.12)
v2-tests:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
PYSPARK_VERSION: ["3.1.3", "3.2", "3.3", "3.5"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: actions/setup-python@v2
name: Install Python 3.8
- uses: actions/setup-python@v5
name: Install Python 3.12
with:
python-version: 3.8
python-version: "3.12"

- uses: actions/setup-java@v1
name: Setup Java 11
if: startsWith(matrix.PYSPARK_VERSION, '3')
- uses: actions/setup-java@v4
name: Setup Java 17
with:
java-version: "11"
distribution: "corretto"
java-version: "17"

- name: Running tests with pyspark==${{matrix.PYSPARK_VERSION}}
env:
SPARK_VERSION: ${{matrix.PYSPARK_VERSION}}
- name: Download Spark 3.5
run: |
curl -L -o spark-3.5.0-bin-hadoop3.tgz \
https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz
tar -xzf spark-3.5.0-bin-hadoop3.tgz
echo "SPARK_HOME=$PWD/spark-3.5.0-bin-hadoop3" >> $GITHUB_ENV

- name: Download Deequ JAR
run: |
pip install --upgrade pip
curl -L -o deequ_2.12-2.1.0b-spark-3.5.jar \
https://github.com/awslabs/python-deequ/releases/download/v2.0.0b1/deequ_2.12-2.1.0b-spark-3.5.jar

- name: Install Python dependencies
run: |
pip install --upgrade pip setuptools
pip install poetry==1.7.1
poetry install
poetry add pyspark==$SPARK_VERSION
poetry run python -m pytest -s tests
poetry add "pyspark[connect]==3.5.0"

- name: Run V2 unit tests
run: |
poetry run pytest tests/v2/test_unit.py -v

- name: Start Spark Connect Server
run: |
$SPARK_HOME/sbin/start-connect-server.sh \
--packages org.apache.spark:spark-connect_2.12:3.5.0 \
--jars $PWD/deequ_2.12-2.1.0b-spark-3.5.jar \
--conf spark.connect.extensions.relation.classes=com.amazon.deequ.connect.DeequRelationPlugin
# Wait for server to start
sleep 20
# Verify server is running
ps aux | grep SparkConnectServer | grep -v grep

- name: Run V2 integration tests
env:
SPARK_REMOTE: "sc://localhost:15002"
run: |
poetry run pytest tests/v2/ -v --ignore=tests/v2/test_unit.py

- name: Stop Spark Connect Server
if: always()
run: |
$SPARK_HOME/sbin/stop-connect-server.sh || true
Loading
Loading