diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml
index d5e4ec7..4c6c7cb 100644
--- a/.github/workflows/base.yml
+++ b/.github/workflows/base.yml
@@ -1,3 +1,5 @@
+name: PyDeequ V2 Tests
+
on:
push:
branches:
@@ -7,33 +9,65 @@ on:
- "master"
jobs:
- test:
+ # V2 tests with Spark Connect (Python 3.12)
+ v2-tests:
runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- PYSPARK_VERSION: ["3.1.3", "3.2", "3.3", "3.5"]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- - uses: actions/setup-python@v2
- name: Install Python 3.8
+ - uses: actions/setup-python@v5
+ name: Install Python 3.12
with:
- python-version: 3.8
+ python-version: "3.12"
- - uses: actions/setup-java@v1
- name: Setup Java 11
- if: startsWith(matrix.PYSPARK_VERSION, '3')
+ - uses: actions/setup-java@v4
+ name: Setup Java 17
with:
- java-version: "11"
+ distribution: "corretto"
+ java-version: "17"
- - name: Running tests with pyspark==${{matrix.PYSPARK_VERSION}}
- env:
- SPARK_VERSION: ${{matrix.PYSPARK_VERSION}}
+ - name: Download Spark 3.5
+ run: |
+ curl -L -o spark-3.5.0-bin-hadoop3.tgz \
+ https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz
+ tar -xzf spark-3.5.0-bin-hadoop3.tgz
+ echo "SPARK_HOME=$PWD/spark-3.5.0-bin-hadoop3" >> $GITHUB_ENV
+
+ - name: Download Deequ JAR
run: |
- pip install --upgrade pip
+ curl -L -o deequ_2.12-2.1.0b-spark-3.5.jar \
+ https://github.com/awslabs/python-deequ/releases/download/v2.0.0b1/deequ_2.12-2.1.0b-spark-3.5.jar
+
+ - name: Install Python dependencies
+ run: |
+ pip install --upgrade pip setuptools
pip install poetry==1.7.1
poetry install
- poetry add pyspark==$SPARK_VERSION
- poetry run python -m pytest -s tests
+ poetry add "pyspark[connect]==3.5.0"
+
+ - name: Run V2 unit tests
+ run: |
+ poetry run pytest tests/v2/test_unit.py -v
+
+ - name: Start Spark Connect Server
+ run: |
+ $SPARK_HOME/sbin/start-connect-server.sh \
+ --packages org.apache.spark:spark-connect_2.12:3.5.0 \
+ --jars $PWD/deequ_2.12-2.1.0b-spark-3.5.jar \
+ --conf spark.connect.extensions.relation.classes=com.amazon.deequ.connect.DeequRelationPlugin
+ # Wait for server to start
+ sleep 20
+ # Verify server is running
+ ps aux | grep SparkConnectServer | grep -v grep
+
+ - name: Run V2 integration tests
+ env:
+ SPARK_REMOTE: "sc://localhost:15002"
+ run: |
+ poetry run pytest tests/v2/ -v --ignore=tests/v2/test_unit.py
+
+ - name: Stop Spark Connect Server
+ if: always()
+ run: |
+ $SPARK_HOME/sbin/stop-connect-server.sh || true
diff --git a/README.md b/README.md
index a6003c9..2d19db5 100644
--- a/README.md
+++ b/README.md
@@ -1,103 +1,489 @@
# PyDeequ
-PyDeequ is a Python API for [Deequ](https://github.com/awslabs/deequ), a library built on top of Apache Spark for defining "unit tests for data", which measure data quality in large datasets. PyDeequ is written to support usage of Deequ in Python.
+PyDeequ is a Python API for [Deequ](https://github.com/awslabs/deequ), a library built on top of Apache Spark for defining "unit tests for data", which measure data quality in large datasets.
[](https://opensource.org/licenses/Apache-2.0) 
-There are 4 main components of Deequ, and they are:
-- Metrics Computation:
- - `Profiles` leverages Analyzers to analyze each column of a dataset.
- - `Analyzers` serve here as a foundational module that computes metrics for data profiling and validation at scale.
-- Constraint Suggestion:
- - Specify rules for various groups of Analyzers to be run over a dataset to return back a collection of constraints suggested to run in a Verification Suite.
-- Constraint Verification:
- - Perform data validation on a dataset with respect to various constraints set by you.
-- Metrics Repository
- - Allows for persistence and tracking of Deequ runs over time.
+## What's New in PyDeequ 2.0
+
+PyDeequ 2.0 introduces a new architecture using **Spark Connect**, bringing significant improvements:
+
+| Feature | PyDeequ 1.x | PyDeequ 2.0 |
+|---------|-------------|-------------|
+| Communication | Py4J (JVM bridge) | Spark Connect (gRPC) |
+| Assertions | Python lambdas | Serializable predicates |
+| Spark Session | Local only | Local or Remote |
+| Architecture | Tight JVM coupling | Clean client-server |
+
+**Key Benefits:**
+- **No Py4J dependency** - Uses Spark Connect protocol for communication
+- **Serializable predicates** - Replace Python lambdas with predicate objects (`eq`, `gte`, `between`, etc.)
+- **Remote execution** - Connect to remote Spark clusters via Spark Connect
+- **Cleaner API** - Simplified imports and more Pythonic interface
+
+### Architecture
+
+```mermaid
+flowchart LR
+ subgraph CLIENT["Python Client"]
+ A["Python Code"] --> B["Protobuf
Serialization"]
+ end
+ B -- gRPC --> C["Spark Connect (gRPC)"]
+ subgraph SERVER["Spark Connect Server"]
+ D["DeequRelationPlugin"] --> E["Deequ Core"] --> F["Spark DataFrame API"] --> G["(Data)"]
+ end
+ G --> H["Results"] -- gRPC --> I["Python DataFrame"]
+ %% Styling for compactness and distinction
+ classDef code fill:#C8F2FB,stroke:#35a7c2,color:#13505B,font-weight:bold;
+ class A code;
+```
-
+**How it works:**
+1. **Client Side**: PyDeequ 2.0 builds checks and analyzers as Protobuf messages
+2. **Transport**: Messages are sent via gRPC to the Spark Connect server
+3. **Server Side**: The `DeequRelationPlugin` deserializes messages and executes Deequ operations
+4. **Results**: Verification results are returned as a Spark DataFrame
+
+### Feature Support Matrix
+
+| Feature | PyDeequ 1.x | PyDeequ 2.0 |
+|---------|:-----------:|:-----------:|
+| **Constraint Verification** | | |
+| VerificationSuite | Yes | Yes |
+| Check constraints | Yes | Yes |
+| Custom SQL expressions | Yes | Yes |
+| **Metrics & Analysis** | | |
+| AnalysisRunner | Yes | Yes |
+| All standard analyzers | Yes | Yes |
+| **Column Profiling** | | |
+| ColumnProfilerRunner | Yes | Yes |
+| Numeric statistics | Yes | Yes |
+| KLL sketch profiling | Yes | Yes |
+| Low-cardinality histograms | Yes | Yes |
+| **Constraint Suggestions** | | |
+| ConstraintSuggestionRunner | Yes | Yes |
+| Rule sets (DEFAULT, EXTENDED, etc.) | Yes | Yes |
+| Train/test split evaluation | Yes | Yes |
+| **Metrics Repository** | | |
+| FileSystemMetricsRepository | Yes | Planned |
+| **Execution Mode** | | |
+| Local Spark | Yes | No |
+| Spark Connect (remote) | No | Yes |
+
+---
+
+## PyDeequ 2.0 Beta - Quick Start
+
+### Requirements
+
+- Python 3.9+
+- Apache Spark 3.5.0+
+- Java 17 (Java 21+ has known compatibility issues with Spark 3.5)
+
+### Step 1: Download Deequ Pre-release JAR
+
+Download the pre-compiled Deequ JAR with Spark Connect support from the [GitHub pre-releases](https://github.com/awslabs/python-deequ/releases):
-## 🎉 Announcements 🎉
-- **NEW!!!** The 1.4.0 release of Python Deequ has been published to PYPI https://pypi.org/project/pydeequ/. This release adds support for Spark 3.5.0.
-- The latest version of Deequ, 2.0.7, is made available With Python Deequ 1.3.0.
-- 1.1.0 release of Python Deequ has been published to PYPI https://pypi.org/project/pydeequ/. This release brings many recent upgrades including support up to Spark 3.3.0! Any feedbacks are welcome through github issues.
-- With PyDeequ v0.1.8+, we now officially support Spark3 ! Just make sure you have an environment variable `SPARK_VERSION` to specify your Spark version!
-- We've release a blogpost on integrating PyDeequ onto AWS leveraging services such as AWS Glue, Athena, and SageMaker! Check it out: [Monitor data quality in your data lake using PyDeequ and AWS Glue](https://aws.amazon.com/blogs/big-data/monitor-data-quality-in-your-data-lake-using-pydeequ-and-aws-glue/).
-- Check out the [PyDeequ Release Announcement Blogpost](https://aws.amazon.com/blogs/big-data/testing-data-quality-at-scale-with-pydeequ/) with a tutorial walkthrough the Amazon Reviews dataset!
-- Join the PyDeequ community on [PyDeequ Slack](https://join.slack.com/t/pydeequ/shared_invite/zt-te6bntpu-yaqPy7bhiN8Lu0NxpZs47Q) to chat with the devs!
+```bash
+mkdir -p ~/deequ-beta && cd ~/deequ-beta
-## Quickstart
+curl -L -o deequ_2.12-2.1.0b-spark-3.5.jar \
+ https://github.com/awslabs/python-deequ/releases/download/v2.0.0b1/deequ_2.12-2.1.0b-spark-3.5.jar
+```
-The following will quickstart you with some basic usage. For more in-depth examples, take a look in the [`tutorials/`](tutorials/) directory for executable Jupyter notebooks of each module. For documentation on supported interfaces, view the [`documentation`](https://pydeequ.readthedocs.io/).
+### Step 2: Set Up Spark (if needed)
-### Installation
+Optional, should only be needed for quick local testing.
+```bash
+# Download Spark 3.5
+curl -L -o spark-3.5.0-bin-hadoop3.tgz \
+ https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz
-You can install [PyDeequ via pip](https://pypi.org/project/pydeequ/).
+tar -xzf spark-3.5.0-bin-hadoop3.tgz
+export SPARK_HOME=~/deequ-beta/spark-3.5.0-bin-hadoop3
+export PATH=$SPARK_HOME/bin:$PATH
```
-pip install pydeequ
+
+### Step 3: Start Spark Connect Server
+
+Spark Connect is a client-server architecture introduced in Spark 3.4 that allows remote connectivity to Spark clusters. For more details, see the [Spark Connect Overview](https://spark.apache.org/docs/latest/spark-connect-overview.html).
+
+```bash
+export JAVA_HOME=/path/to/java17
+
+$SPARK_HOME/sbin/start-connect-server.sh \
+ --packages org.apache.spark:spark-connect_2.12:3.5.0 \
+ --jars ~/deequ-beta/deequ_2.12-2.1.0b-spark-3.5.jar \
+ --conf spark.connect.extensions.relation.classes=com.amazon.deequ.connect.DeequRelationPlugin
+```
+
+**Command explanation:**
+| Option | Description |
+|--------|-------------|
+| `--packages` | Downloads the Spark Connect package from Maven |
+| `--jars` | Loads the Deequ JAR with Spark Connect support |
+| `--conf spark.connect.extensions.relation.classes` | Registers the Deequ plugin to handle custom operations |
+
+The server starts on `localhost:15002` by default. You can verify it's running:
+```bash
+ps aux | grep SparkConnectServer
+```
+
+### Step 4: Install PyDeequ 2.0
+
+Install the beta wheel directly from the GitHub release:
+
+```bash
+pip install https://github.com/awslabs/python-deequ/releases/download/v2.0.0b1/pydeequ-2.0.0b1-py3-none-any.whl
+pip install pyspark[connect]==3.5.0
+
+# Python 3.12+ users: install setuptools (provides distutils removed in 3.12)
+pip install setuptools
```
-### Set up a PySpark session
+### Step 5: Run Your First Check
+
```python
from pyspark.sql import SparkSession, Row
-import pydeequ
+from pydeequ.v2.checks import Check, CheckLevel
+from pydeequ.v2.verification import VerificationSuite
+from pydeequ.v2.predicates import eq, gte
+
+# Connect to Spark Connect server
+spark = SparkSession.builder.remote("sc://localhost:15002").getOrCreate()
+
+# Create sample data
+df = spark.createDataFrame([
+ Row(id=1, name="Alice", age=25),
+ Row(id=2, name="Bob", age=30),
+ Row(id=3, name="Charlie", age=None),
+])
+
+# Define checks using the new predicate API
+check = (Check(CheckLevel.Error, "Data quality checks")
+ .hasSize(eq(3))
+ .isComplete("id")
+ .isComplete("name")
+ .hasCompleteness("age", gte(0.5))
+ .isUnique("id"))
+
+# Run verification
+result = (VerificationSuite(spark)
+ .onData(df)
+ .addCheck(check)
+ .run())
+
+result.show(truncate=False)
+spark.stop()
+```
-spark = (SparkSession
- .builder
- .config("spark.jars.packages", pydeequ.deequ_maven_coord)
- .config("spark.jars.excludes", pydeequ.f2j_maven_coord)
- .getOrCreate())
+### Stop the Server
-df = spark.sparkContext.parallelize([
- Row(a="foo", b=1, c=5),
- Row(a="bar", b=2, c=6),
- Row(a="baz", b=3, c=None)]).toDF()
+```bash
+$SPARK_HOME/sbin/stop-connect-server.sh
```
+### Full Example
+
+For a comprehensive example covering data analysis, constraint verification, column profiling, and constraint suggestions, see [tutorials/data_quality_example_v2.py](tutorials/data_quality_example_v2.py).
+
+---
+
+## PyDeequ 2.0 API Reference
+
+### Predicates (replace lambdas)
+
+```python
+from pydeequ.v2.predicates import eq, gt, gte, lt, lte, between
+
+check.hasSize(eq(3)) # size == 3
+check.hasCompleteness("col", gte(0.9)) # completeness >= 0.9
+check.hasMean("value", between(10, 20)) # 10 <= mean <= 20
+```
+
+| Predicate | Description | Example |
+|-----------|-------------|---------|
+| `eq(v)` | Equal to v | `eq(1.0)` |
+| `gt(v)` | Greater than v | `gt(0)` |
+| `gte(v)` | Greater than or equal | `gte(0.9)` |
+| `lt(v)` | Less than v | `lt(100)` |
+| `lte(v)` | Less than or equal | `lte(1.0)` |
+| `between(a, b)` | Between a and b (inclusive) | `between(0, 1)` |
+
### Analyzers
```python
-from pydeequ.analyzers import *
+from pydeequ.v2.verification import AnalysisRunner
+from pydeequ.v2.analyzers import (
+ Size, Completeness, Mean, Sum, Minimum, Maximum,
+ StandardDeviation, ApproxCountDistinct, Distinctness,
+ Uniqueness, Entropy, Correlation
+)
+
+result = (AnalysisRunner(spark)
+ .onData(df)
+ .addAnalyzer(Size())
+ .addAnalyzer(Completeness("name"))
+ .addAnalyzer(Mean("age"))
+ .run())
+
+result.show()
+```
-analysisResult = AnalysisRunner(spark) \
- .onData(df) \
- .addAnalyzer(Size()) \
- .addAnalyzer(Completeness("b")) \
- .run()
+### Constraint Methods
+
+| Method | Description |
+|--------|-------------|
+| `hasSize(predicate)` | Check total row count |
+| `isComplete(column)` | Check column has no nulls |
+| `hasCompleteness(column, predicate)` | Check completeness ratio |
+| `areComplete(columns)` | Check multiple columns have no nulls |
+| `isUnique(column)` | Check column values are unique |
+| `hasUniqueness(columns, predicate)` | Check uniqueness ratio |
+| `hasDistinctness(columns, predicate)` | Check distinctness ratio |
+| `hasMin(column, predicate)` | Check minimum value |
+| `hasMax(column, predicate)` | Check maximum value |
+| `hasMean(column, predicate)` | Check mean value |
+| `hasSum(column, predicate)` | Check sum |
+| `hasStandardDeviation(column, predicate)` | Check standard deviation |
+| `hasApproxCountDistinct(column, predicate)` | Check approximate distinct count |
+| `hasCorrelation(col1, col2, predicate)` | Check correlation between columns |
+| `hasEntropy(column, predicate)` | Check entropy |
+| `hasApproxQuantile(column, quantile, predicate)` | Check approximate quantile |
+| `satisfies(expression, name, predicate)` | Custom SQL expression |
+| `hasPattern(column, pattern, predicate)` | Check regex pattern match ratio |
+| `containsEmail(column, predicate)` | Check email format ratio |
+| `containsCreditCardNumber(column, predicate)` | Check credit card format ratio |
+| `isNonNegative(column)` | Check all values >= 0 |
+| `isPositive(column)` | Check all values > 0 |
+
+### Column Profiler
+
+Profile column distributions and statistics across your dataset:
-analysisResult_df = AnalyzerContext.successMetricsAsDataFrame(spark, analysisResult)
-analysisResult_df.show()
+```python
+from pydeequ.v2.profiles import ColumnProfilerRunner, KLLParameters
+
+# Basic profiling
+profiles = (ColumnProfilerRunner(spark)
+ .onData(df)
+ .run())
+
+profiles.show()
+
+# Advanced profiling with options
+profiles = (ColumnProfilerRunner(spark)
+ .onData(df)
+ .restrictToColumns(["id", "name", "age"]) # Profile specific columns
+ .withLowCardinalityHistogramThreshold(100) # Generate histograms for low-cardinality columns
+ .withKLLProfiling() # Enable KLL sketch for approximate quantiles
+ .setKLLParameters(KLLParameters(
+ sketch_size=2048,
+ shrinking_factor=0.64,
+ num_buckets=64
+ ))
+ .run())
```
-### Profile
+**Profile Result Schema:**
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `column` | STRING | Column name |
+| `completeness` | DOUBLE | Non-null ratio (0.0-1.0) |
+| `approx_distinct_values` | LONG | Approximate cardinality |
+| `data_type` | STRING | Detected data type |
+| `is_data_type_inferred` | BOOLEAN | Whether type was inferred |
+| `type_counts` | STRING | JSON of type distribution |
+| `histogram` | STRING | JSON histogram (low cardinality only) |
+| `mean` | DOUBLE | Mean (numeric columns only) |
+| `minimum` | DOUBLE | Minimum value (numeric only) |
+| `maximum` | DOUBLE | Maximum value (numeric only) |
+| `sum` | DOUBLE | Sum (numeric only) |
+| `std_dev` | DOUBLE | Standard deviation (numeric only) |
+| `approx_percentiles` | STRING | JSON percentiles (numeric only) |
+| `kll_buckets` | STRING | JSON KLL buckets (if enabled) |
+
+### Constraint Suggestions
+
+Auto-generate data quality constraints based on your data:
```python
-from pydeequ.profiles import *
+from pydeequ.v2.suggestions import ConstraintSuggestionRunner, Rules
+
+# Basic suggestion generation
+suggestions = (ConstraintSuggestionRunner(spark)
+ .onData(df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run())
+
+suggestions.show(truncate=False)
+
+# Advanced usage with train/test evaluation
+suggestions = (ConstraintSuggestionRunner(spark)
+ .onData(df)
+ .addConstraintRules(Rules.DEFAULT)
+ .addConstraintRules(Rules.EXTENDED)
+ .restrictToColumns(["id", "status", "score"])
+ .useTrainTestSplitWithTestsetRatio(0.2, seed=42) # Evaluate suggestions on test set
+ .run())
+```
-result = ColumnProfilerRunner(spark) \
- .onData(df) \
- .run()
+**Available Rule Sets:**
+
+| Rule Set | Description |
+|----------|-------------|
+| `Rules.DEFAULT` | Completeness, type, categorical range, non-negative |
+| `Rules.STRING` | String length constraints (min/max length) |
+| `Rules.NUMERICAL` | Numeric constraints (min, max, mean, stddev) |
+| `Rules.COMMON` | Uniqueness for approximately unique columns |
+| `Rules.EXTENDED` | All rules combined |
+
+**Suggestion Result Schema:**
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `column_name` | STRING | Column the constraint applies to |
+| `constraint_name` | STRING | Type of constraint |
+| `current_value` | STRING | Current metric value |
+| `description` | STRING | Human-readable description |
+| `suggesting_rule` | STRING | Rule that generated this |
+| `code_for_constraint` | STRING | Python code snippet |
+| `evaluation_status` | STRING | "Success" or "Failure" (if train/test enabled) |
+| `evaluation_metric_value` | DOUBLE | Metric value on test set |
+
+### Migration from 1.x to 2.0
+
+**Import changes:**
+```python
+# Before (1.x)
+from pydeequ.checks import Check, CheckLevel
+from pydeequ.verification import VerificationSuite
+
+# After (2.0)
+from pydeequ.v2.checks import Check, CheckLevel
+from pydeequ.v2.verification import VerificationSuite
+from pydeequ.v2.predicates import eq, gte, between
+```
+
+**Lambda to predicate:**
+```python
+# Before (1.x)
+check.hasSize(lambda x: x == 3)
+check.hasCompleteness("col", lambda x: x >= 0.9)
+# After (2.0)
+check.hasSize(eq(3))
+check.hasCompleteness("col", gte(0.9))
+```
+
+**Profiler changes:**
+```python
+# Before (1.x) - returns Python object
+from pydeequ.profiles import ColumnProfilerRunner
+result = ColumnProfilerRunner(spark).onData(df).run()
for col, profile in result.profiles.items():
print(profile)
+
+# After (2.0) - returns DataFrame
+from pydeequ.v2.profiles import ColumnProfilerRunner
+result = ColumnProfilerRunner(spark).onData(df).run()
+result.show()
```
-### Constraint Suggestions
+**Suggestions changes:**
+```python
+# Before (1.x) - returns Python object
+from pydeequ.suggestions import ConstraintSuggestionRunner, DEFAULT
+result = ConstraintSuggestionRunner(spark).onData(df).addConstraintRule(DEFAULT()).run()
+print(result)
+
+# After (2.0) - returns DataFrame
+from pydeequ.v2.suggestions import ConstraintSuggestionRunner, Rules
+result = ConstraintSuggestionRunner(spark).onData(df).addConstraintRules(Rules.DEFAULT).run()
+result.show()
+```
+
+---
+
+## PyDeequ 2.0 Troubleshooting
+
+### Server won't start
+1. Check Java version: `java -version` (must be Java 17, not 21+)
+2. Check port availability: `lsof -i :15002`
+3. Check logs: `tail -f $SPARK_HOME/logs/spark-*-SparkConnectServer-*.out`
+
+### Connection refused
+Ensure the Spark Connect server is running:
+```bash
+ps aux | grep SparkConnectServer
+```
+
+### ClassNotFoundException: DeequRelationPlugin
+Ensure the Deequ JAR is correctly specified in `--jars` when starting the server.
+
+### UnsupportedOperationException: sun.misc.Unsafe not available
+This error occurs when using Java 21+ with Spark 3.5. Use Java 17 instead:
+```bash
+export JAVA_HOME=/path/to/java17
+```
+
+### ModuleNotFoundError: No module named 'distutils'
+This occurs on Python 3.12+ because `distutils` was removed. Install setuptools:
+```bash
+pip install setuptools
+```
+
+---
+
+## PyDeequ 1.x (Legacy)
+
+The legacy PyDeequ API uses Py4J for JVM communication. It is still available for backward compatibility.
+
+### Installation
+
+```bash
+pip install pydeequ
+```
+
+**Note:** Set the `SPARK_VERSION` environment variable to match your Spark version.
+
+### Quick Start (1.x)
```python
-from pydeequ.suggestions import *
+from pyspark.sql import SparkSession, Row
+import pydeequ
-suggestionResult = ConstraintSuggestionRunner(spark) \
- .onData(df) \
- .addConstraintRule(DEFAULT()) \
- .run()
+spark = (SparkSession
+ .builder
+ .config("spark.jars.packages", pydeequ.deequ_maven_coord)
+ .config("spark.jars.excludes", pydeequ.f2j_maven_coord)
+ .getOrCreate())
-# Constraint Suggestions in JSON format
-print(suggestionResult)
+df = spark.sparkContext.parallelize([
+ Row(a="foo", b=1, c=5),
+ Row(a="bar", b=2, c=6),
+ Row(a="baz", b=3, c=None)
+]).toDF()
+```
+
+### Analyzers (1.x)
+
+```python
+from pydeequ.analyzers import *
+
+analysisResult = AnalysisRunner(spark) \
+ .onData(df) \
+ .addAnalyzer(Size()) \
+ .addAnalyzer(Completeness("b")) \
+ .run()
+
+analysisResult_df = AnalyzerContext.successMetricsAsDataFrame(spark, analysisResult)
+analysisResult_df.show()
```
-### Constraint Verification
+### Constraint Verification (1.x)
```python
from pydeequ.checks import *
@@ -110,8 +496,8 @@ checkResult = VerificationSuite(spark) \
.addCheck(
check.hasSize(lambda x: x >= 3) \
.hasMin("b", lambda x: x == 0) \
- .isComplete("c") \
- .isUnique("a") \
+ .isComplete("c") \
+ .isUnique("a") \
.isContainedIn("a", ["foo", "bar", "baz"]) \
.isNonNegative("b")) \
.run()
@@ -120,9 +506,34 @@ checkResult_df = VerificationResult.checkResultsAsDataFrame(spark, checkResult)
checkResult_df.show()
```
-### Repository
+### Profile (1.x)
+
+```python
+from pydeequ.profiles import *
+
+result = ColumnProfilerRunner(spark) \
+ .onData(df) \
+ .run()
+
+for col, profile in result.profiles.items():
+ print(profile)
+```
+
+### Constraint Suggestions (1.x)
+
+```python
+from pydeequ.suggestions import *
+
+suggestionResult = ConstraintSuggestionRunner(spark) \
+ .onData(df) \
+ .addConstraintRule(DEFAULT()) \
+ .run()
+
+print(suggestionResult)
+```
+
+### Repository (1.x)
-Save to a Metrics Repository by adding the `useRepository()` and `saveOrAppendResult()` calls to your Analysis Runner.
```python
from pydeequ.repository import *
from pydeequ.analyzers import *
@@ -140,120 +551,107 @@ analysisResult = AnalysisRunner(spark) \
.run()
```
-To load previous runs, use the `repository` object to load previous results back in.
-
-```python
-result_metrep_df = repository.load() \
- .before(ResultKey.current_milli_time()) \
- .forAnalyzers([ApproxCountDistinct('b')]) \
- .getSuccessMetricsAsDataFrame()
-```
-
-### Wrapping up
-
-After you've ran your jobs with PyDeequ, be sure to shut down your Spark session to prevent any hanging processes.
+### Wrapping Up (1.x)
```python
spark.sparkContext._gateway.shutdown_callback_server()
spark.stop()
```
-## [Contributing](https://github.com/awslabs/python-deequ/blob/master/CONTRIBUTING.md)
-Please refer to the [contributing doc](https://github.com/awslabs/python-deequ/blob/master/CONTRIBUTING.md) for how to contribute to PyDeequ.
+---
-## [License](https://github.com/awslabs/python-deequ/blob/master/LICENSE)
+## Deequ Components
-This library is licensed under the Apache 2.0 License.
+There are 4 main components of Deequ:
-******
+- **Metrics Computation**
+ - `Profiles` leverages Analyzers to analyze each column of a dataset.
+ - `Analyzers` compute metrics for data profiling and validation at scale.
+- **Constraint Suggestion**
+ - Specify rules for Analyzers to return suggested constraints.
+- **Constraint Verification**
+ - Validate data against constraints you define.
+- **Metrics Repository**
+ - Persist and track Deequ runs over time.
-## Contributing Developer Setup
+
-1. Setup [SDKMAN](#setup-sdkman)
-1. Setup [Java](#setup-java)
-1. Setup [Apache Spark](#setup-apache-spark)
-1. Install [Poetry](#poetry)
-1. Run [tests locally](#running-tests-locally)
+---
-### Setup SDKMAN
+## Feedback and Issues
-SDKMAN is a tool for managing parallel Versions of multiple Software Development Kits on any Unix based
-system. It provides a convenient command line interface for installing, switching, removing and listing
-Candidates. SDKMAN! installs smoothly on Mac OSX, Linux, WSL, Cygwin, etc... Support Bash and ZSH shells. See
-documentation on the [SDKMAN! website](https://sdkman.io).
+Please report any issues or feedback to:
+- GitHub Issues: https://github.com/awslabs/deequ/issues
+- Tag PyDeequ 2.0 issues with `pydeequ-2.0`
-Open your favourite terminal and enter the following:
+When reporting issues, include:
+1. Python version
+2. Spark version
+3. Java version
+4. Operating system
+5. Full error message and stack trace
+6. Minimal code to reproduce
-```bash
-$ curl -s https://get.sdkman.io | bash
-If the environment needs tweaking for SDKMAN to be installed,
-the installer will prompt you accordingly and ask you to restart.
+---
-Next, open a new terminal or enter:
+## Contributing
-$ source "$HOME/.sdkman/bin/sdkman-init.sh"
+Please refer to the [contributing doc](https://github.com/awslabs/python-deequ/blob/master/CONTRIBUTING.md) for how to contribute to PyDeequ.
-Lastly, run the following code snippet to ensure that installation succeeded:
+## License
-$ sdk version
-```
+This library is licensed under the Apache 2.0 License.
-### Setup Java
+---
-Install Java Now open favourite terminal and enter the following:
+## Developer Setup
+
+1. Setup [SDKMAN](#setup-sdkman)
+2. Setup [Java](#setup-java)
+3. Setup [Apache Spark](#setup-apache-spark)
+4. Install [Poetry](#poetry)
+5. Run [tests locally](#running-tests-locally)
+
+### Setup SDKMAN
```bash
-List the AdoptOpenJDK OpenJDK versions
-$ sdk list java
+curl -s https://get.sdkman.io | bash
+source "$HOME/.sdkman/bin/sdkman-init.sh"
+sdk version
+```
-To install For Java 11
-$ sdk install java 11.0.10.hs-adpt
+### Setup Java
-To install For Java 11
-$ sdk install java 8.0.292.hs-adpt
+```bash
+sdk list java
+sdk install java 17.0.9-amzn # For PyDeequ 2.0
+sdk install java 11.0.10.hs-adpt # For PyDeequ 1.x
```
### Setup Apache Spark
-Install Java Now open favourite terminal and enter the following:
-
```bash
-List the Apache Spark versions:
-$ sdk list spark
-
-To install For Spark 3
-$ sdk install spark 3.0.2
+sdk list spark
+sdk install spark 3.5.0
```
### Poetry
-Poetry [Commands](https://python-poetry.org/docs/cli/#search)
-
```bash
poetry install
-
poetry update
-
-# --tree: List the dependencies as a tree.
-# --latest (-l): Show the latest version.
-# --outdated (-o): Show the latest version but only for packages that are outdated.
poetry show -o
```
-## Running Tests Locally
-
-Take a look at tests in `tests/dataquality` and `tests/jobs`
+### Running Tests Locally
```bash
-$ poetry run pytest
+poetry run pytest
```
-## Running Tests Locally (Docker)
-
-If you have issues installing the dependencies listed above, another way to run the tests and verify your changes is through Docker. There is a Dockerfile that will install the required dependencies and run the tests in a container.
+### Running Tests (Docker)
+```bash
+docker build . -t spark-3.5-docker-test
+docker run spark-3.5-docker-test
```
-docker build . -t spark-3.3-docker-test
-docker run spark-3.3-docker-test
-```
-
diff --git a/poetry.lock b/poetry.lock
index 164c6c3..5b439ef 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,201 +1,71 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
-
-[[package]]
-name = "atomicwrites"
-version = "1.4.1"
-description = "Atomic file writes."
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
- {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"},
-]
-
-[[package]]
-name = "attrs"
-version = "22.1.0"
-description = "Classes Without Boilerplate"
-optional = false
-python-versions = ">=3.5"
-files = [
- {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"},
- {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"},
-]
-
-[package.extras]
-dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy (>=0.900,!=0.940)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "sphinx", "sphinx-notfound-page", "zope.interface"]
-docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"]
-tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"]
-tests-no-zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"]
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]]
name = "black"
-version = "21.12b0"
+version = "24.10.0"
description = "The uncompromising code formatter."
optional = false
-python-versions = ">=3.6.2"
-files = [
- {file = "black-21.12b0-py3-none-any.whl", hash = "sha256:a615e69ae185e08fdd73e4715e260e2479c861b5740057fde6e8b4e3b7dd589f"},
- {file = "black-21.12b0.tar.gz", hash = "sha256:77b80f693a569e2e527958459634f18df9b0ba2625ba4e0c2d5da5be42e6f2b3"},
+python-versions = ">=3.9"
+files = [
+ {file = "black-24.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6668650ea4b685440857138e5fe40cde4d652633b1bdffc62933d0db4ed9812"},
+ {file = "black-24.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1c536fcf674217e87b8cc3657b81809d3c085d7bf3ef262ead700da345bfa6ea"},
+ {file = "black-24.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:649fff99a20bd06c6f727d2a27f401331dc0cc861fb69cde910fe95b01b5928f"},
+ {file = "black-24.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe4d6476887de70546212c99ac9bd803d90b42fc4767f058a0baa895013fbb3e"},
+ {file = "black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad"},
+ {file = "black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50"},
+ {file = "black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392"},
+ {file = "black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175"},
+ {file = "black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3"},
+ {file = "black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65"},
+ {file = "black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f"},
+ {file = "black-24.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8"},
+ {file = "black-24.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981"},
+ {file = "black-24.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b"},
+ {file = "black-24.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2"},
+ {file = "black-24.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b"},
+ {file = "black-24.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:17374989640fbca88b6a448129cd1745c5eb8d9547b464f281b251dd00155ccd"},
+ {file = "black-24.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:63f626344343083322233f175aaf372d326de8436f5928c042639a4afbbf1d3f"},
+ {file = "black-24.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfa1d0cb6200857f1923b602f978386a3a2758a65b52e0950299ea014be6800"},
+ {file = "black-24.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:2cd9c95431d94adc56600710f8813ee27eea544dd118d45896bb734e9d7a0dc7"},
+ {file = "black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d"},
+ {file = "black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875"},
]
[package.dependencies]
-click = ">=7.1.2"
+click = ">=8.0.0"
mypy-extensions = ">=0.4.3"
-pathspec = ">=0.9.0,<1"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
platformdirs = ">=2"
-tomli = ">=0.2.6,<2.0.0"
-typing-extensions = [
- {version = ">=3.10.0.0,<3.10.0.1 || >3.10.0.1", markers = "python_version >= \"3.10\""},
- {version = ">=3.10.0.0", markers = "python_version < \"3.10\""},
-]
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
[package.extras]
colorama = ["colorama (>=0.4.3)"]
-d = ["aiohttp (>=3.7.4)"]
+d = ["aiohttp (>=3.10)"]
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
-python2 = ["typed-ast (>=1.4.3)"]
uvloop = ["uvloop (>=0.15.2)"]
-[[package]]
-name = "bleach"
-version = "5.0.1"
-description = "An easy safelist-based HTML-sanitizing tool."
-optional = false
-python-versions = ">=3.7"
-files = [
- {file = "bleach-5.0.1-py3-none-any.whl", hash = "sha256:085f7f33c15bd408dd9b17a4ad77c577db66d76203e5984b1bd59baeee948b2a"},
- {file = "bleach-5.0.1.tar.gz", hash = "sha256:0d03255c47eb9bd2f26aa9bb7f2107732e7e8fe195ca2f64709fcf3b0a4a085c"},
-]
-
-[package.dependencies]
-six = ">=1.9.0"
-webencodings = "*"
-
-[package.extras]
-css = ["tinycss2 (>=1.1.0,<1.2)"]
-dev = ["Sphinx (==4.3.2)", "black (==22.3.0)", "build (==0.8.0)", "flake8 (==4.0.1)", "hashin (==0.17.0)", "mypy (==0.961)", "pip-tools (==6.6.2)", "pytest (==7.1.2)", "tox (==3.25.0)", "twine (==4.0.1)", "wheel (==0.37.1)"]
-
-[[package]]
-name = "certifi"
-version = "2024.7.4"
-description = "Python package for providing Mozilla's CA Bundle."
-optional = false
-python-versions = ">=3.6"
-files = [
- {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
- {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
-]
-
-[[package]]
-name = "cffi"
-version = "1.15.1"
-description = "Foreign Function Interface for Python calling C code."
-optional = false
-python-versions = "*"
-files = [
- {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"},
- {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"},
- {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"},
- {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"},
- {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"},
- {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"},
- {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"},
- {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"},
- {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"},
- {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"},
- {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"},
- {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"},
- {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"},
- {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"},
- {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"},
- {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"},
- {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"},
- {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"},
- {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"},
- {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"},
- {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"},
- {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"},
- {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"},
- {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"},
- {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"},
- {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"},
- {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"},
- {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"},
- {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"},
- {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"},
- {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"},
- {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"},
- {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"},
- {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"},
- {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"},
- {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"},
- {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"},
- {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"},
- {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"},
- {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"},
- {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"},
- {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"},
- {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"},
- {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"},
- {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"},
- {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"},
- {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"},
- {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"},
- {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"},
- {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"},
- {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"},
- {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"},
- {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"},
- {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"},
- {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"},
- {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"},
- {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"},
- {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"},
- {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"},
- {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"},
- {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"},
- {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"},
- {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"},
- {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"},
-]
-
-[package.dependencies]
-pycparser = "*"
-
[[package]]
name = "cfgv"
-version = "3.3.1"
+version = "3.4.0"
description = "Validate configuration and produce human readable error messages."
optional = false
-python-versions = ">=3.6.1"
-files = [
- {file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"},
- {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"},
-]
-
-[[package]]
-name = "charset-normalizer"
-version = "2.1.1"
-description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
-optional = false
-python-versions = ">=3.6.0"
+python-versions = ">=3.8"
files = [
- {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"},
- {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"},
+ {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"},
+ {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"},
]
-[package.extras]
-unicode-backport = ["unicodedata2"]
-
[[package]]
name = "click"
-version = "8.1.3"
+version = "8.1.8"
description = "Composable command line interface toolkit"
optional = false
python-versions = ">=3.7"
files = [
- {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
- {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
+ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
+ {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
]
[package.dependencies]
@@ -203,535 +73,561 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
[[package]]
name = "colorama"
-version = "0.4.5"
+version = "0.4.6"
description = "Cross-platform colored terminal text."
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
files = [
- {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"},
- {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"},
+ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
[[package]]
name = "coverage"
-version = "5.5"
+version = "7.10.7"
description = "Code coverage measurement for Python"
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
-files = [
- {file = "coverage-5.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:b6d534e4b2ab35c9f93f46229363e17f63c53ad01330df9f2d6bd1187e5eaacf"},
- {file = "coverage-5.5-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:b7895207b4c843c76a25ab8c1e866261bcfe27bfaa20c192de5190121770672b"},
- {file = "coverage-5.5-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:c2723d347ab06e7ddad1a58b2a821218239249a9e4365eaff6649d31180c1669"},
- {file = "coverage-5.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:900fbf7759501bc7807fd6638c947d7a831fc9fdf742dc10f02956ff7220fa90"},
- {file = "coverage-5.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:004d1880bed2d97151facef49f08e255a20ceb6f9432df75f4eef018fdd5a78c"},
- {file = "coverage-5.5-cp27-cp27m-win32.whl", hash = "sha256:06191eb60f8d8a5bc046f3799f8a07a2d7aefb9504b0209aff0b47298333302a"},
- {file = "coverage-5.5-cp27-cp27m-win_amd64.whl", hash = "sha256:7501140f755b725495941b43347ba8a2777407fc7f250d4f5a7d2a1050ba8e82"},
- {file = "coverage-5.5-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:372da284cfd642d8e08ef606917846fa2ee350f64994bebfbd3afb0040436905"},
- {file = "coverage-5.5-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:8963a499849a1fc54b35b1c9f162f4108017b2e6db2c46c1bed93a72262ed083"},
- {file = "coverage-5.5-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:869a64f53488f40fa5b5b9dcb9e9b2962a66a87dab37790f3fcfb5144b996ef5"},
- {file = "coverage-5.5-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:4a7697d8cb0f27399b0e393c0b90f0f1e40c82023ea4d45d22bce7032a5d7b81"},
- {file = "coverage-5.5-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:8d0a0725ad7c1a0bcd8d1b437e191107d457e2ec1084b9f190630a4fb1af78e6"},
- {file = "coverage-5.5-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:51cb9476a3987c8967ebab3f0fe144819781fca264f57f89760037a2ea191cb0"},
- {file = "coverage-5.5-cp310-cp310-win_amd64.whl", hash = "sha256:c0891a6a97b09c1f3e073a890514d5012eb256845c451bd48f7968ef939bf4ae"},
- {file = "coverage-5.5-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:3487286bc29a5aa4b93a072e9592f22254291ce96a9fbc5251f566b6b7343cdb"},
- {file = "coverage-5.5-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:deee1077aae10d8fa88cb02c845cfba9b62c55e1183f52f6ae6a2df6a2187160"},
- {file = "coverage-5.5-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:f11642dddbb0253cc8853254301b51390ba0081750a8ac03f20ea8103f0c56b6"},
- {file = "coverage-5.5-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:6c90e11318f0d3c436a42409f2749ee1a115cd8b067d7f14c148f1ce5574d701"},
- {file = "coverage-5.5-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:30c77c1dc9f253283e34c27935fded5015f7d1abe83bc7821680ac444eaf7793"},
- {file = "coverage-5.5-cp35-cp35m-win32.whl", hash = "sha256:9a1ef3b66e38ef8618ce5fdc7bea3d9f45f3624e2a66295eea5e57966c85909e"},
- {file = "coverage-5.5-cp35-cp35m-win_amd64.whl", hash = "sha256:972c85d205b51e30e59525694670de6a8a89691186012535f9d7dbaa230e42c3"},
- {file = "coverage-5.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:af0e781009aaf59e25c5a678122391cb0f345ac0ec272c7961dc5455e1c40066"},
- {file = "coverage-5.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:74d881fc777ebb11c63736622b60cb9e4aee5cace591ce274fb69e582a12a61a"},
- {file = "coverage-5.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:92b017ce34b68a7d67bd6d117e6d443a9bf63a2ecf8567bb3d8c6c7bc5014465"},
- {file = "coverage-5.5-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:d636598c8305e1f90b439dbf4f66437de4a5e3c31fdf47ad29542478c8508bbb"},
- {file = "coverage-5.5-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:41179b8a845742d1eb60449bdb2992196e211341818565abded11cfa90efb821"},
- {file = "coverage-5.5-cp36-cp36m-win32.whl", hash = "sha256:040af6c32813fa3eae5305d53f18875bedd079960822ef8ec067a66dd8afcd45"},
- {file = "coverage-5.5-cp36-cp36m-win_amd64.whl", hash = "sha256:5fec2d43a2cc6965edc0bb9e83e1e4b557f76f843a77a2496cbe719583ce8184"},
- {file = "coverage-5.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:18ba8bbede96a2c3dde7b868de9dcbd55670690af0988713f0603f037848418a"},
- {file = "coverage-5.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2910f4d36a6a9b4214bb7038d537f015346f413a975d57ca6b43bf23d6563b53"},
- {file = "coverage-5.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:f0b278ce10936db1a37e6954e15a3730bea96a0997c26d7fee88e6c396c2086d"},
- {file = "coverage-5.5-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:796c9c3c79747146ebd278dbe1e5c5c05dd6b10cc3bcb8389dfdf844f3ead638"},
- {file = "coverage-5.5-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:53194af30d5bad77fcba80e23a1441c71abfb3e01192034f8246e0d8f99528f3"},
- {file = "coverage-5.5-cp37-cp37m-win32.whl", hash = "sha256:184a47bbe0aa6400ed2d41d8e9ed868b8205046518c52464fde713ea06e3a74a"},
- {file = "coverage-5.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2949cad1c5208b8298d5686d5a85b66aae46d73eec2c3e08c817dd3513e5848a"},
- {file = "coverage-5.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:217658ec7187497e3f3ebd901afdca1af062b42cfe3e0dafea4cced3983739f6"},
- {file = "coverage-5.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1aa846f56c3d49205c952d8318e76ccc2ae23303351d9270ab220004c580cfe2"},
- {file = "coverage-5.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:24d4a7de75446be83244eabbff746d66b9240ae020ced65d060815fac3423759"},
- {file = "coverage-5.5-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:d1f8bf7b90ba55699b3a5e44930e93ff0189aa27186e96071fac7dd0d06a1873"},
- {file = "coverage-5.5-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:970284a88b99673ccb2e4e334cfb38a10aab7cd44f7457564d11898a74b62d0a"},
- {file = "coverage-5.5-cp38-cp38-win32.whl", hash = "sha256:01d84219b5cdbfc8122223b39a954820929497a1cb1422824bb86b07b74594b6"},
- {file = "coverage-5.5-cp38-cp38-win_amd64.whl", hash = "sha256:2e0d881ad471768bf6e6c2bf905d183543f10098e3b3640fc029509530091502"},
- {file = "coverage-5.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d1f9ce122f83b2305592c11d64f181b87153fc2c2bbd3bb4a3dde8303cfb1a6b"},
- {file = "coverage-5.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:13c4ee887eca0f4c5a247b75398d4114c37882658300e153113dafb1d76de529"},
- {file = "coverage-5.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:52596d3d0e8bdf3af43db3e9ba8dcdaac724ba7b5ca3f6358529d56f7a166f8b"},
- {file = "coverage-5.5-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:2cafbbb3af0733db200c9b5f798d18953b1a304d3f86a938367de1567f4b5bff"},
- {file = "coverage-5.5-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:44d654437b8ddd9eee7d1eaee28b7219bec228520ff809af170488fd2fed3e2b"},
- {file = "coverage-5.5-cp39-cp39-win32.whl", hash = "sha256:d314ed732c25d29775e84a960c3c60808b682c08d86602ec2c3008e1202e3bb6"},
- {file = "coverage-5.5-cp39-cp39-win_amd64.whl", hash = "sha256:13034c4409db851670bc9acd836243aeee299949bd5673e11844befcb0149f03"},
- {file = "coverage-5.5-pp36-none-any.whl", hash = "sha256:f030f8873312a16414c0d8e1a1ddff2d3235655a2174e3648b4fa66b3f2f1079"},
- {file = "coverage-5.5-pp37-none-any.whl", hash = "sha256:2a3859cb82dcbda1cfd3e6f71c27081d18aa251d20a17d87d26d4cd216fb0af4"},
- {file = "coverage-5.5.tar.gz", hash = "sha256:ebe78fe9a0e874362175b02371bdfbee64d8edc42a044253ddf4ee7d3c15212c"},
-]
-
-[package.extras]
-toml = ["toml"]
-
-[[package]]
-name = "cryptography"
-version = "42.0.4"
-description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
-optional = false
-python-versions = ">=3.7"
-files = [
- {file = "cryptography-42.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449"},
- {file = "cryptography-42.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18"},
- {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2"},
- {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1"},
- {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b"},
- {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1"},
- {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992"},
- {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885"},
- {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824"},
- {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b"},
- {file = "cryptography-42.0.4-cp37-abi3-win32.whl", hash = "sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925"},
- {file = "cryptography-42.0.4-cp37-abi3-win_amd64.whl", hash = "sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923"},
- {file = "cryptography-42.0.4-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7"},
- {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52"},
- {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a"},
- {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9"},
- {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764"},
- {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff"},
- {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257"},
- {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929"},
- {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0"},
- {file = "cryptography-42.0.4-cp39-abi3-win32.whl", hash = "sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129"},
- {file = "cryptography-42.0.4-cp39-abi3-win_amd64.whl", hash = "sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854"},
- {file = "cryptography-42.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298"},
- {file = "cryptography-42.0.4-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88"},
- {file = "cryptography-42.0.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20"},
- {file = "cryptography-42.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce"},
- {file = "cryptography-42.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74"},
- {file = "cryptography-42.0.4-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd"},
- {file = "cryptography-42.0.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b"},
- {file = "cryptography-42.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660"},
- {file = "cryptography-42.0.4.tar.gz", hash = "sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb"},
+python-versions = ">=3.9"
+files = [
+ {file = "coverage-7.10.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc04cc7a3db33664e0c2d10eb8990ff6b3536f6842c9590ae8da4c614b9ed05a"},
+ {file = "coverage-7.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e201e015644e207139f7e2351980feb7040e6f4b2c2978892f3e3789d1c125e5"},
+ {file = "coverage-7.10.7-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:240af60539987ced2c399809bd34f7c78e8abe0736af91c3d7d0e795df633d17"},
+ {file = "coverage-7.10.7-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8421e088bc051361b01c4b3a50fd39a4b9133079a2229978d9d30511fd05231b"},
+ {file = "coverage-7.10.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6be8ed3039ae7f7ac5ce058c308484787c86e8437e72b30bf5e88b8ea10f3c87"},
+ {file = "coverage-7.10.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e28299d9f2e889e6d51b1f043f58d5f997c373cc12e6403b90df95b8b047c13e"},
+ {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c4e16bd7761c5e454f4efd36f345286d6f7c5fa111623c355691e2755cae3b9e"},
+ {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b1c81d0e5e160651879755c9c675b974276f135558cf4ba79fee7b8413a515df"},
+ {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:606cc265adc9aaedcc84f1f064f0e8736bc45814f15a357e30fca7ecc01504e0"},
+ {file = "coverage-7.10.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:10b24412692df990dbc34f8fb1b6b13d236ace9dfdd68df5b28c2e39cafbba13"},
+ {file = "coverage-7.10.7-cp310-cp310-win32.whl", hash = "sha256:b51dcd060f18c19290d9b8a9dd1e0181538df2ce0717f562fff6cf74d9fc0b5b"},
+ {file = "coverage-7.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:3a622ac801b17198020f09af3eaf45666b344a0d69fc2a6ffe2ea83aeef1d807"},
+ {file = "coverage-7.10.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a609f9c93113be646f44c2a0256d6ea375ad047005d7f57a5c15f614dc1b2f59"},
+ {file = "coverage-7.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:65646bb0359386e07639c367a22cf9b5bf6304e8630b565d0626e2bdf329227a"},
+ {file = "coverage-7.10.7-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5f33166f0dfcce728191f520bd2692914ec70fac2713f6bf3ce59c3deacb4699"},
+ {file = "coverage-7.10.7-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:35f5e3f9e455bb17831876048355dca0f758b6df22f49258cb5a91da23ef437d"},
+ {file = "coverage-7.10.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4da86b6d62a496e908ac2898243920c7992499c1712ff7c2b6d837cc69d9467e"},
+ {file = "coverage-7.10.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6b8b09c1fad947c84bbbc95eca841350fad9cbfa5a2d7ca88ac9f8d836c92e23"},
+ {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4376538f36b533b46f8971d3a3e63464f2c7905c9800db97361c43a2b14792ab"},
+ {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:121da30abb574f6ce6ae09840dae322bef734480ceafe410117627aa54f76d82"},
+ {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:88127d40df529336a9836870436fc2751c339fbaed3a836d42c93f3e4bd1d0a2"},
+ {file = "coverage-7.10.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ba58bbcd1b72f136080c0bccc2400d66cc6115f3f906c499013d065ac33a4b61"},
+ {file = "coverage-7.10.7-cp311-cp311-win32.whl", hash = "sha256:972b9e3a4094b053a4e46832b4bc829fc8a8d347160eb39d03f1690316a99c14"},
+ {file = "coverage-7.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:a7b55a944a7f43892e28ad4bc0561dfd5f0d73e605d1aa5c3c976b52aea121d2"},
+ {file = "coverage-7.10.7-cp311-cp311-win_arm64.whl", hash = "sha256:736f227fb490f03c6488f9b6d45855f8e0fd749c007f9303ad30efab0e73c05a"},
+ {file = "coverage-7.10.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7bb3b9ddb87ef7725056572368040c32775036472d5a033679d1fa6c8dc08417"},
+ {file = "coverage-7.10.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:18afb24843cbc175687225cab1138c95d262337f5473512010e46831aa0c2973"},
+ {file = "coverage-7.10.7-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:399a0b6347bcd3822be369392932884b8216d0944049ae22925631a9b3d4ba4c"},
+ {file = "coverage-7.10.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314f2c326ded3f4b09be11bc282eb2fc861184bc95748ae67b360ac962770be7"},
+ {file = "coverage-7.10.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c41e71c9cfb854789dee6fc51e46743a6d138b1803fab6cb860af43265b42ea6"},
+ {file = "coverage-7.10.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc01f57ca26269c2c706e838f6422e2a8788e41b3e3c65e2f41148212e57cd59"},
+ {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a6442c59a8ac8b85812ce33bc4d05bde3fb22321fa8294e2a5b487c3505f611b"},
+ {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:78a384e49f46b80fb4c901d52d92abe098e78768ed829c673fbb53c498bef73a"},
+ {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5e1e9802121405ede4b0133aa4340ad8186a1d2526de5b7c3eca519db7bb89fb"},
+ {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d41213ea25a86f69efd1575073d34ea11aabe075604ddf3d148ecfec9e1e96a1"},
+ {file = "coverage-7.10.7-cp312-cp312-win32.whl", hash = "sha256:77eb4c747061a6af8d0f7bdb31f1e108d172762ef579166ec84542f711d90256"},
+ {file = "coverage-7.10.7-cp312-cp312-win_amd64.whl", hash = "sha256:f51328ffe987aecf6d09f3cd9d979face89a617eacdaea43e7b3080777f647ba"},
+ {file = "coverage-7.10.7-cp312-cp312-win_arm64.whl", hash = "sha256:bda5e34f8a75721c96085903c6f2197dc398c20ffd98df33f866a9c8fd95f4bf"},
+ {file = "coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d"},
+ {file = "coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b"},
+ {file = "coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e"},
+ {file = "coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b"},
+ {file = "coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49"},
+ {file = "coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911"},
+ {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0"},
+ {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f"},
+ {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c"},
+ {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f"},
+ {file = "coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698"},
+ {file = "coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843"},
+ {file = "coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546"},
+ {file = "coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c"},
+ {file = "coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15"},
+ {file = "coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4"},
+ {file = "coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0"},
+ {file = "coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0"},
+ {file = "coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65"},
+ {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541"},
+ {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6"},
+ {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999"},
+ {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2"},
+ {file = "coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a"},
+ {file = "coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb"},
+ {file = "coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb"},
+ {file = "coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520"},
+ {file = "coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32"},
+ {file = "coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f"},
+ {file = "coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a"},
+ {file = "coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360"},
+ {file = "coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69"},
+ {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14"},
+ {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe"},
+ {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e"},
+ {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd"},
+ {file = "coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2"},
+ {file = "coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681"},
+ {file = "coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880"},
+ {file = "coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63"},
+ {file = "coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2"},
+ {file = "coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d"},
+ {file = "coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0"},
+ {file = "coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699"},
+ {file = "coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9"},
+ {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f"},
+ {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1"},
+ {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0"},
+ {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399"},
+ {file = "coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235"},
+ {file = "coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d"},
+ {file = "coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a"},
+ {file = "coverage-7.10.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fff7b9c3f19957020cac546c70025331113d2e61537f6e2441bc7657913de7d3"},
+ {file = "coverage-7.10.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bc91b314cef27742da486d6839b677b3f2793dfe52b51bbbb7cf736d5c29281c"},
+ {file = "coverage-7.10.7-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:567f5c155eda8df1d3d439d40a45a6a5f029b429b06648235f1e7e51b522b396"},
+ {file = "coverage-7.10.7-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2af88deffcc8a4d5974cf2d502251bc3b2db8461f0b66d80a449c33757aa9f40"},
+ {file = "coverage-7.10.7-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7315339eae3b24c2d2fa1ed7d7a38654cba34a13ef19fbcb9425da46d3dc594"},
+ {file = "coverage-7.10.7-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:912e6ebc7a6e4adfdbb1aec371ad04c68854cd3bf3608b3514e7ff9062931d8a"},
+ {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f49a05acd3dfe1ce9715b657e28d138578bc40126760efb962322c56e9ca344b"},
+ {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cce2109b6219f22ece99db7644b9622f54a4e915dad65660ec435e89a3ea7cc3"},
+ {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:f3c887f96407cea3916294046fc7dab611c2552beadbed4ea901cbc6a40cc7a0"},
+ {file = "coverage-7.10.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:635adb9a4507c9fd2ed65f39693fa31c9a3ee3a8e6dc64df033e8fdf52a7003f"},
+ {file = "coverage-7.10.7-cp39-cp39-win32.whl", hash = "sha256:5a02d5a850e2979b0a014c412573953995174743a3f7fa4ea5a6e9a3c5617431"},
+ {file = "coverage-7.10.7-cp39-cp39-win_amd64.whl", hash = "sha256:c134869d5ffe34547d14e174c866fd8fe2254918cc0a95e99052903bc1543e07"},
+ {file = "coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260"},
+ {file = "coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239"},
]
[package.dependencies]
-cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""}
+tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
[package.extras]
-docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"]
-docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"]
-nox = ["nox"]
-pep8test = ["check-sdist", "click", "mypy", "ruff"]
-sdist = ["build"]
-ssh = ["bcrypt (>=3.1.5)"]
-test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
-test-randomorder = ["pytest-randomly"]
+toml = ["tomli"]
[[package]]
name = "distlib"
-version = "0.3.6"
+version = "0.4.0"
description = "Distribution utilities"
optional = false
python-versions = "*"
files = [
- {file = "distlib-0.3.6-py2.py3-none-any.whl", hash = "sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e"},
- {file = "distlib-0.3.6.tar.gz", hash = "sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46"},
+ {file = "distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16"},
+ {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"},
]
[[package]]
-name = "docutils"
-version = "0.19"
-description = "Docutils -- Python Documentation Utilities"
+name = "exceptiongroup"
+version = "1.3.1"
+description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
files = [
- {file = "docutils-0.19-py3-none-any.whl", hash = "sha256:5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc"},
- {file = "docutils-0.19.tar.gz", hash = "sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6"},
-]
-
-[[package]]
-name = "dparse"
-version = "0.6.0"
-description = "A parser for Python dependency files"
-optional = false
-python-versions = ">=3.5"
-files = [
- {file = "dparse-0.6.0-py3-none-any.whl", hash = "sha256:3cb489bd06bfa8d285c85f7dec69d9ee8f89c29dd5f4ab48e159746dc13b78b2"},
- {file = "dparse-0.6.0.tar.gz", hash = "sha256:57068bb61859b1676c6beb10f399906eecb41a75b5d3fbc99d0311059cb67213"},
+ {file = "exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598"},
+ {file = "exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219"},
]
[package.dependencies]
-packaging = "*"
-toml = "*"
+typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""}
[package.extras]
-conda = ["pyyaml"]
-pipenv = ["pipenv"]
+test = ["pytest (>=6)"]
[[package]]
name = "filelock"
-version = "3.8.0"
+version = "3.19.1"
description = "A platform independent file lock."
optional = false
-python-versions = ">=3.7"
-files = [
- {file = "filelock-3.8.0-py3-none-any.whl", hash = "sha256:617eb4e5eedc82fc5f47b6d61e4d11cb837c56cb4544e39081099fa17ad109d4"},
- {file = "filelock-3.8.0.tar.gz", hash = "sha256:55447caa666f2198c5b6b13a26d2084d26fa5b115c00d065664b2124680c4edc"},
-]
-
-[package.extras]
-docs = ["furo (>=2022.6.21)", "sphinx (>=5.1.1)", "sphinx-autodoc-typehints (>=1.19.1)"]
-testing = ["covdefaults (>=2.2)", "coverage (>=6.4.2)", "pytest (>=7.1.2)", "pytest-cov (>=3)", "pytest-timeout (>=2.1)"]
-
-[[package]]
-name = "flake8"
-version = "3.9.2"
-description = "the modular source code checker: pep8 pyflakes and co"
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
-files = [
- {file = "flake8-3.9.2-py2.py3-none-any.whl", hash = "sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"},
- {file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"},
-]
-
-[package.dependencies]
-mccabe = ">=0.6.0,<0.7.0"
-pycodestyle = ">=2.7.0,<2.8.0"
-pyflakes = ">=2.3.0,<2.4.0"
-
-[[package]]
-name = "flake8-docstrings"
-version = "1.6.0"
-description = "Extension for flake8 which uses pydocstyle to check docstrings"
-optional = false
-python-versions = "*"
+python-versions = ">=3.9"
files = [
- {file = "flake8-docstrings-1.6.0.tar.gz", hash = "sha256:9fe7c6a306064af8e62a055c2f61e9eb1da55f84bb39caef2b84ce53708ac34b"},
- {file = "flake8_docstrings-1.6.0-py2.py3-none-any.whl", hash = "sha256:99cac583d6c7e32dd28bbfbef120a7c0d1b6dde4adb5a9fd441c4227a6534bde"},
+ {file = "filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d"},
+ {file = "filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58"},
]
-[package.dependencies]
-flake8 = ">=3"
-pydocstyle = ">=2.1"
-
[[package]]
-name = "identify"
-version = "2.5.5"
-description = "File identification library for Python"
-optional = false
-python-versions = ">=3.7"
-files = [
- {file = "identify-2.5.5-py2.py3-none-any.whl", hash = "sha256:ef78c0d96098a3b5fe7720be4a97e73f439af7cf088ebf47b620aeaa10fadf97"},
- {file = "identify-2.5.5.tar.gz", hash = "sha256:322a5699daecf7c6fd60e68852f36f2ecbb6a36ff6e6e973e0d2bb6fca203ee6"},
-]
-
-[package.extras]
-license = ["ukkonen"]
-
-[[package]]
-name = "idna"
-version = "3.4"
-description = "Internationalized Domain Names in Applications (IDNA)"
+name = "filelock"
+version = "3.20.3"
+description = "A platform independent file lock."
optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.10"
files = [
- {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
- {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
+ {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
+ {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
]
[[package]]
-name = "importlib-metadata"
-version = "4.12.0"
-description = "Read metadata from Python packages"
+name = "googleapis-common-protos"
+version = "1.72.0"
+description = "Common protobufs used in Google APIs"
optional = false
python-versions = ">=3.7"
files = [
- {file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"},
- {file = "importlib_metadata-4.12.0.tar.gz", hash = "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670"},
+ {file = "googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038"},
+ {file = "googleapis_common_protos-1.72.0.tar.gz", hash = "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5"},
]
[package.dependencies]
-zipp = ">=0.5"
+protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
[package.extras]
-docs = ["jaraco.packaging (>=9)", "rst.linker (>=1.9)", "sphinx"]
-perf = ["ipython"]
-testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"]
-
-[[package]]
-name = "iniconfig"
-version = "1.1.1"
-description = "iniconfig: brain-dead simple config-ini parsing"
-optional = false
-python-versions = "*"
-files = [
- {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
- {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
-]
-
-[[package]]
-name = "jaraco.classes"
-version = "3.2.2"
-description = "Utility functions for Python class constructs"
-optional = false
-python-versions = ">=3.7"
-files = [
- {file = "jaraco.classes-3.2.2-py3-none-any.whl", hash = "sha256:e6ef6fd3fcf4579a7a019d87d1e56a883f4e4c35cfe925f86731abc58804e647"},
- {file = "jaraco.classes-3.2.2.tar.gz", hash = "sha256:6745f113b0b588239ceb49532aa09c3ebb947433ce311ef2f8e3ad64ebb74594"},
+grpc = ["grpcio (>=1.44.0,<2.0.0)"]
+
+[[package]]
+name = "grpcio"
+version = "1.76.0"
+description = "HTTP/2-based RPC framework"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "grpcio-1.76.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:65a20de41e85648e00305c1bb09a3598f840422e522277641145a32d42dcefcc"},
+ {file = "grpcio-1.76.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:40ad3afe81676fd9ec6d9d406eda00933f218038433980aa19d401490e46ecde"},
+ {file = "grpcio-1.76.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:035d90bc79eaa4bed83f524331d55e35820725c9fbb00ffa1904d5550ed7ede3"},
+ {file = "grpcio-1.76.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4215d3a102bd95e2e11b5395c78562967959824156af11fa93d18fdd18050990"},
+ {file = "grpcio-1.76.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:49ce47231818806067aea3324d4bf13825b658ad662d3b25fada0bdad9b8a6af"},
+ {file = "grpcio-1.76.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8cc3309d8e08fd79089e13ed4819d0af72aa935dd8f435a195fd152796752ff2"},
+ {file = "grpcio-1.76.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:971fd5a1d6e62e00d945423a567e42eb1fa678ba89072832185ca836a94daaa6"},
+ {file = "grpcio-1.76.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d9adda641db7207e800a7f089068f6f645959f2df27e870ee81d44701dd9db3"},
+ {file = "grpcio-1.76.0-cp310-cp310-win32.whl", hash = "sha256:063065249d9e7e0782d03d2bca50787f53bd0fb89a67de9a7b521c4a01f1989b"},
+ {file = "grpcio-1.76.0-cp310-cp310-win_amd64.whl", hash = "sha256:a6ae758eb08088d36812dd5d9af7a9859c05b1e0f714470ea243694b49278e7b"},
+ {file = "grpcio-1.76.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:2e1743fbd7f5fa713a1b0a8ac8ebabf0ec980b5d8809ec358d488e273b9cf02a"},
+ {file = "grpcio-1.76.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:a8c2cf1209497cf659a667d7dea88985e834c24b7c3b605e6254cbb5076d985c"},
+ {file = "grpcio-1.76.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:08caea849a9d3c71a542827d6df9d5a69067b0a1efbea8a855633ff5d9571465"},
+ {file = "grpcio-1.76.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f0e34c2079d47ae9f6188211db9e777c619a21d4faba6977774e8fa43b085e48"},
+ {file = "grpcio-1.76.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8843114c0cfce61b40ad48df65abcfc00d4dba82eae8718fab5352390848c5da"},
+ {file = "grpcio-1.76.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8eddfb4d203a237da6f3cc8a540dad0517d274b5a1e9e636fd8d2c79b5c1d397"},
+ {file = "grpcio-1.76.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:32483fe2aab2c3794101c2a159070584e5db11d0aa091b2c0ea9c4fc43d0d749"},
+ {file = "grpcio-1.76.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dcfe41187da8992c5f40aa8c5ec086fa3672834d2be57a32384c08d5a05b4c00"},
+ {file = "grpcio-1.76.0-cp311-cp311-win32.whl", hash = "sha256:2107b0c024d1b35f4083f11245c0e23846ae64d02f40b2b226684840260ed054"},
+ {file = "grpcio-1.76.0-cp311-cp311-win_amd64.whl", hash = "sha256:522175aba7af9113c48ec10cc471b9b9bd4f6ceb36aeb4544a8e2c80ed9d252d"},
+ {file = "grpcio-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8"},
+ {file = "grpcio-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280"},
+ {file = "grpcio-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4"},
+ {file = "grpcio-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11"},
+ {file = "grpcio-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6"},
+ {file = "grpcio-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8"},
+ {file = "grpcio-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980"},
+ {file = "grpcio-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882"},
+ {file = "grpcio-1.76.0-cp312-cp312-win32.whl", hash = "sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958"},
+ {file = "grpcio-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347"},
+ {file = "grpcio-1.76.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:26ef06c73eb53267c2b319f43e6634c7556ea37672029241a056629af27c10e2"},
+ {file = "grpcio-1.76.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:45e0111e73f43f735d70786557dc38141185072d7ff8dc1829d6a77ac1471468"},
+ {file = "grpcio-1.76.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:83d57312a58dcfe2a3a0f9d1389b299438909a02db60e2f2ea2ae2d8034909d3"},
+ {file = "grpcio-1.76.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3e2a27c89eb9ac3d81ec8835e12414d73536c6e620355d65102503064a4ed6eb"},
+ {file = "grpcio-1.76.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:61f69297cba3950a524f61c7c8ee12e55c486cb5f7db47ff9dcee33da6f0d3ae"},
+ {file = "grpcio-1.76.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6a15c17af8839b6801d554263c546c69c4d7718ad4321e3166175b37eaacca77"},
+ {file = "grpcio-1.76.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:25a18e9810fbc7e7f03ec2516addc116a957f8cbb8cbc95ccc80faa072743d03"},
+ {file = "grpcio-1.76.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:931091142fd8cc14edccc0845a79248bc155425eee9a98b2db2ea4f00a235a42"},
+ {file = "grpcio-1.76.0-cp313-cp313-win32.whl", hash = "sha256:5e8571632780e08526f118f74170ad8d50fb0a48c23a746bef2a6ebade3abd6f"},
+ {file = "grpcio-1.76.0-cp313-cp313-win_amd64.whl", hash = "sha256:f9f7bd5faab55f47231ad8dba7787866b69f5e93bc306e3915606779bbfb4ba8"},
+ {file = "grpcio-1.76.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:ff8a59ea85a1f2191a0ffcc61298c571bc566332f82e5f5be1b83c9d8e668a62"},
+ {file = "grpcio-1.76.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:06c3d6b076e7b593905d04fdba6a0525711b3466f43b3400266f04ff735de0cd"},
+ {file = "grpcio-1.76.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fd5ef5932f6475c436c4a55e4336ebbe47bd3272be04964a03d316bbf4afbcbc"},
+ {file = "grpcio-1.76.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b331680e46239e090f5b3cead313cc772f6caa7d0fc8de349337563125361a4a"},
+ {file = "grpcio-1.76.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2229ae655ec4e8999599469559e97630185fdd53ae1e8997d147b7c9b2b72cba"},
+ {file = "grpcio-1.76.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:490fa6d203992c47c7b9e4a9d39003a0c2bcc1c9aa3c058730884bbbb0ee9f09"},
+ {file = "grpcio-1.76.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:479496325ce554792dba6548fae3df31a72cef7bad71ca2e12b0e58f9b336bfc"},
+ {file = "grpcio-1.76.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1c9b93f79f48b03ada57ea24725d83a30284a012ec27eab2cf7e50a550cbbbcc"},
+ {file = "grpcio-1.76.0-cp314-cp314-win32.whl", hash = "sha256:747fa73efa9b8b1488a95d0ba1039c8e2dca0f741612d80415b1e1c560febf4e"},
+ {file = "grpcio-1.76.0-cp314-cp314-win_amd64.whl", hash = "sha256:922fa70ba549fce362d2e2871ab542082d66e2aaf0c19480ea453905b01f384e"},
+ {file = "grpcio-1.76.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:8ebe63ee5f8fa4296b1b8cfc743f870d10e902ca18afc65c68cf46fd39bb0783"},
+ {file = "grpcio-1.76.0-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:3bf0f392c0b806905ed174dcd8bdd5e418a40d5567a05615a030a5aeddea692d"},
+ {file = "grpcio-1.76.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b7604868b38c1bfd5cf72d768aedd7db41d78cb6a4a18585e33fb0f9f2363fd"},
+ {file = "grpcio-1.76.0-cp39-cp39-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:e6d1db20594d9daba22f90da738b1a0441a7427552cc6e2e3d1297aeddc00378"},
+ {file = "grpcio-1.76.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d099566accf23d21037f18a2a63d323075bebace807742e4b0ac210971d4dd70"},
+ {file = "grpcio-1.76.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ebea5cc3aa8ea72e04df9913492f9a96d9348db876f9dda3ad729cfedf7ac416"},
+ {file = "grpcio-1.76.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:0c37db8606c258e2ee0c56b78c62fc9dee0e901b5dbdcf816c2dd4ad652b8b0c"},
+ {file = "grpcio-1.76.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ebebf83299b0cb1721a8859ea98f3a77811e35dce7609c5c963b9ad90728f886"},
+ {file = "grpcio-1.76.0-cp39-cp39-win32.whl", hash = "sha256:0aaa82d0813fd4c8e589fac9b65d7dd88702555f702fb10417f96e2a2a6d4c0f"},
+ {file = "grpcio-1.76.0-cp39-cp39-win_amd64.whl", hash = "sha256:acab0277c40eff7143c2323190ea57b9ee5fd353d8190ee9652369fae735668a"},
+ {file = "grpcio-1.76.0.tar.gz", hash = "sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73"},
]
[package.dependencies]
-more-itertools = "*"
+typing-extensions = ">=4.12,<5.0"
[package.extras]
-docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"]
-testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
+protobuf = ["grpcio-tools (>=1.76.0)"]
[[package]]
-name = "jeepney"
-version = "0.8.0"
-description = "Low-level, pure Python DBus protocol wrapper."
+name = "grpcio-status"
+version = "1.76.0"
+description = "Status proto mapping for gRPC"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.9"
files = [
- {file = "jeepney-0.8.0-py3-none-any.whl", hash = "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755"},
- {file = "jeepney-0.8.0.tar.gz", hash = "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806"},
+ {file = "grpcio_status-1.76.0-py3-none-any.whl", hash = "sha256:380568794055a8efbbd8871162df92012e0228a5f6dffaf57f2a00c534103b18"},
+ {file = "grpcio_status-1.76.0.tar.gz", hash = "sha256:25fcbfec74c15d1a1cb5da3fab8ee9672852dc16a5a9eeb5baf7d7a9952943cd"},
]
-[package.extras]
-test = ["async-timeout", "pytest", "pytest-asyncio (>=0.17)", "pytest-trio", "testpath", "trio"]
-trio = ["async_generator", "trio"]
+[package.dependencies]
+googleapis-common-protos = ">=1.5.5"
+grpcio = ">=1.76.0"
+protobuf = ">=6.31.1,<7.0.0"
[[package]]
-name = "keyring"
-version = "23.9.1"
-description = "Store and access your passwords safely."
+name = "identify"
+version = "2.6.15"
+description = "File identification library for Python"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.9"
files = [
- {file = "keyring-23.9.1-py3-none-any.whl", hash = "sha256:3565b9e4ea004c96e158d2d332a49f466733d565bb24157a60fd2e49f41a0fd1"},
- {file = "keyring-23.9.1.tar.gz", hash = "sha256:39e4f6572238d2615a82fcaa485e608b84b503cf080dc924c43bbbacb11c1c18"},
+ {file = "identify-2.6.15-py2.py3-none-any.whl", hash = "sha256:1181ef7608e00704db228516541eb83a88a9f94433a8c80bb9b5bd54b1d81757"},
+ {file = "identify-2.6.15.tar.gz", hash = "sha256:e4f4864b96c6557ef2a1e1c951771838f4edc9df3a72ec7118b338801b11c7bf"},
]
-[package.dependencies]
-importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""}
-"jaraco.classes" = "*"
-jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""}
-pywin32-ctypes = {version = "<0.1.0 || >0.1.0,<0.1.1 || >0.1.1", markers = "sys_platform == \"win32\""}
-SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""}
-
[package.extras]
-docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"]
-testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
-
-[[package]]
-name = "mccabe"
-version = "0.6.1"
-description = "McCabe checker, plugin for flake8"
-optional = false
-python-versions = "*"
-files = [
- {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"},
- {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"},
-]
+license = ["ukkonen"]
[[package]]
-name = "more-itertools"
-version = "8.14.0"
-description = "More routines for operating on iterables, beyond itertools"
+name = "iniconfig"
+version = "2.1.0"
+description = "brain-dead simple config-ini parsing"
optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.8"
files = [
- {file = "more-itertools-8.14.0.tar.gz", hash = "sha256:c09443cd3d5438b8dafccd867a6bc1cb0894389e90cb53d227456b0b0bccb750"},
- {file = "more_itertools-8.14.0-py3-none-any.whl", hash = "sha256:1bc4f91ee5b1b31ac7ceacc17c09befe6a40a503907baf9c839c229b5095cfd2"},
+ {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"},
+ {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"},
]
[[package]]
name = "mypy-extensions"
-version = "0.4.3"
-description = "Experimental type system extensions for programs checked with the mypy typechecker."
+version = "1.1.0"
+description = "Type system extensions for programs checked with the mypy type checker."
optional = false
-python-versions = "*"
+python-versions = ">=3.8"
files = [
- {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
- {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
+ {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"},
+ {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"},
]
[[package]]
name = "nodeenv"
-version = "1.7.0"
+version = "1.10.0"
description = "Node.js virtual environment builder"
optional = false
-python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
files = [
- {file = "nodeenv-1.7.0-py2.py3-none-any.whl", hash = "sha256:27083a7b96a25f2f5e1d8cb4b6317ee8aeda3bdd121394e5ac54e498028a042e"},
- {file = "nodeenv-1.7.0.tar.gz", hash = "sha256:e0e7f7dfb85fc5394c6fe1e8fa98131a2473e04311a45afb6508f7cf1836fa2b"},
+ {file = "nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827"},
+ {file = "nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb"},
]
-[package.dependencies]
-setuptools = "*"
-
[[package]]
name = "numpy"
-version = "1.23.3"
-description = "NumPy is the fundamental package for array computing with Python."
-optional = false
-python-versions = ">=3.8"
-files = [
- {file = "numpy-1.23.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c9f707b5bb73bf277d812ded9896f9512a43edff72712f31667d0a8c2f8e71ee"},
- {file = "numpy-1.23.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ffcf105ecdd9396e05a8e58e81faaaf34d3f9875f137c7372450baa5d77c9a54"},
- {file = "numpy-1.23.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ea3f98a0ffce3f8f57675eb9119f3f4edb81888b6874bc1953f91e0b1d4f440"},
- {file = "numpy-1.23.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:004f0efcb2fe1c0bd6ae1fcfc69cc8b6bf2407e0f18be308612007a0762b4089"},
- {file = "numpy-1.23.3-cp310-cp310-win32.whl", hash = "sha256:98dcbc02e39b1658dc4b4508442a560fe3ca5ca0d989f0df062534e5ca3a5c1a"},
- {file = "numpy-1.23.3-cp310-cp310-win_amd64.whl", hash = "sha256:39a664e3d26ea854211867d20ebcc8023257c1800ae89773cbba9f9e97bae036"},
- {file = "numpy-1.23.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1f27b5322ac4067e67c8f9378b41c746d8feac8bdd0e0ffede5324667b8a075c"},
- {file = "numpy-1.23.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2ad3ec9a748a8943e6eb4358201f7e1c12ede35f510b1a2221b70af4bb64295c"},
- {file = "numpy-1.23.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdc9febce3e68b697d931941b263c59e0c74e8f18861f4064c1f712562903411"},
- {file = "numpy-1.23.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:301c00cf5e60e08e04d842fc47df641d4a181e651c7135c50dc2762ffe293dbd"},
- {file = "numpy-1.23.3-cp311-cp311-win32.whl", hash = "sha256:7cd1328e5bdf0dee621912f5833648e2daca72e3839ec1d6695e91089625f0b4"},
- {file = "numpy-1.23.3-cp311-cp311-win_amd64.whl", hash = "sha256:8355fc10fd33a5a70981a5b8a0de51d10af3688d7a9e4a34fcc8fa0d7467bb7f"},
- {file = "numpy-1.23.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc6e8da415f359b578b00bcfb1d08411c96e9a97f9e6c7adada554a0812a6cc6"},
- {file = "numpy-1.23.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:22d43376ee0acd547f3149b9ec12eec2f0ca4a6ab2f61753c5b29bb3e795ac4d"},
- {file = "numpy-1.23.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a64403f634e5ffdcd85e0b12c08f04b3080d3e840aef118721021f9b48fc1460"},
- {file = "numpy-1.23.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efd9d3abe5774404becdb0748178b48a218f1d8c44e0375475732211ea47c67e"},
- {file = "numpy-1.23.3-cp38-cp38-win32.whl", hash = "sha256:f8c02ec3c4c4fcb718fdf89a6c6f709b14949408e8cf2a2be5bfa9c49548fd85"},
- {file = "numpy-1.23.3-cp38-cp38-win_amd64.whl", hash = "sha256:e868b0389c5ccfc092031a861d4e158ea164d8b7fdbb10e3b5689b4fc6498df6"},
- {file = "numpy-1.23.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:09f6b7bdffe57fc61d869a22f506049825d707b288039d30f26a0d0d8ea05164"},
- {file = "numpy-1.23.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8c79d7cf86d049d0c5089231a5bcd31edb03555bd93d81a16870aa98c6cfb79d"},
- {file = "numpy-1.23.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5d5420053bbb3dd64c30e58f9363d7a9c27444c3648e61460c1237f9ec3fa14"},
- {file = "numpy-1.23.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5422d6a1ea9b15577a9432e26608c73a78faf0b9039437b075cf322c92e98e7"},
- {file = "numpy-1.23.3-cp39-cp39-win32.whl", hash = "sha256:c1ba66c48b19cc9c2975c0d354f24058888cdc674bebadceb3cdc9ec403fb5d1"},
- {file = "numpy-1.23.3-cp39-cp39-win_amd64.whl", hash = "sha256:78a63d2df1d947bd9d1b11d35564c2f9e4b57898aae4626638056ec1a231c40c"},
- {file = "numpy-1.23.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:17c0e467ade9bda685d5ac7f5fa729d8d3e76b23195471adae2d6a6941bd2c18"},
- {file = "numpy-1.23.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91b8d6768a75247026e951dce3b2aac79dc7e78622fc148329135ba189813584"},
- {file = "numpy-1.23.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:94c15ca4e52671a59219146ff584488907b1f9b3fc232622b47e2cf832e94fb8"},
- {file = "numpy-1.23.3.tar.gz", hash = "sha256:51bf49c0cd1d52be0a240aa66f3458afc4b95d8993d2d04f0d91fa60c10af6cd"},
+version = "2.0.2"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"},
+ {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"},
+ {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8c5713284ce4e282544c68d1c3b2c7161d38c256d2eefc93c1d683cf47683e66"},
+ {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:becfae3ddd30736fe1889a37f1f580e245ba79a5855bff5f2a29cb3ccc22dd7b"},
+ {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2da5960c3cf0df7eafefd806d4e612c5e19358de82cb3c343631188991566ccd"},
+ {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:496f71341824ed9f3d2fd36cf3ac57ae2e0165c143b55c3a035ee219413f3318"},
+ {file = "numpy-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a61ec659f68ae254e4d237816e33171497e978140353c0c2038d46e63282d0c8"},
+ {file = "numpy-2.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d731a1c6116ba289c1e9ee714b08a8ff882944d4ad631fd411106a30f083c326"},
+ {file = "numpy-2.0.2-cp310-cp310-win32.whl", hash = "sha256:984d96121c9f9616cd33fbd0618b7f08e0cfc9600a7ee1d6fd9b239186d19d97"},
+ {file = "numpy-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:c7b0be4ef08607dd04da4092faee0b86607f111d5ae68036f16cc787e250a131"},
+ {file = "numpy-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:49ca4decb342d66018b01932139c0961a8f9ddc7589611158cb3c27cbcf76448"},
+ {file = "numpy-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195"},
+ {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:807ec44583fd708a21d4a11d94aedf2f4f3c3719035c76a2bbe1fe8e217bdc57"},
+ {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8cafab480740e22f8d833acefed5cc87ce276f4ece12fdaa2e8903db2f82897a"},
+ {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15f476a45e6e5a3a79d8a14e62161d27ad897381fecfa4a09ed5322f2085669"},
+ {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e689d772146140a252c3a28501da66dfecd77490b498b168b501835041f951"},
+ {file = "numpy-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9ea91dfb7c3d1c56a0e55657c0afb38cf1eeae4544c208dc465c3c9f3a7c09f9"},
+ {file = "numpy-2.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c1c9307701fec8f3f7a1e6711f9089c06e6284b3afbbcd259f7791282d660a15"},
+ {file = "numpy-2.0.2-cp311-cp311-win32.whl", hash = "sha256:a392a68bd329eafac5817e5aefeb39038c48b671afd242710b451e76090e81f4"},
+ {file = "numpy-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:286cd40ce2b7d652a6f22efdfc6d1edf879440e53e76a75955bc0c826c7e64dc"},
+ {file = "numpy-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:df55d490dea7934f330006d0f81e8551ba6010a5bf035a249ef61a94f21c500b"},
+ {file = "numpy-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8df823f570d9adf0978347d1f926b2a867d5608f434a7cff7f7908c6570dcf5e"},
+ {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9a92ae5c14811e390f3767053ff54eaee3bf84576d99a2456391401323f4ec2c"},
+ {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a842d573724391493a97a62ebbb8e731f8a5dcc5d285dfc99141ca15a3302d0c"},
+ {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05e238064fc0610c840d1cf6a13bf63d7e391717d247f1bf0318172e759e692"},
+ {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a"},
+ {file = "numpy-2.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:96a55f64139912d61de9137f11bf39a55ec8faec288c75a54f93dfd39f7eb40c"},
+ {file = "numpy-2.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec9852fb39354b5a45a80bdab5ac02dd02b15f44b3804e9f00c556bf24b4bded"},
+ {file = "numpy-2.0.2-cp312-cp312-win32.whl", hash = "sha256:671bec6496f83202ed2d3c8fdc486a8fc86942f2e69ff0e986140339a63bcbe5"},
+ {file = "numpy-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:cfd41e13fdc257aa5778496b8caa5e856dc4896d4ccf01841daee1d96465467a"},
+ {file = "numpy-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9059e10581ce4093f735ed23f3b9d283b9d517ff46009ddd485f1747eb22653c"},
+ {file = "numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:423e89b23490805d2a5a96fe40ec507407b8ee786d66f7328be214f9679df6dd"},
+ {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:2b2955fa6f11907cf7a70dab0d0755159bca87755e831e47932367fc8f2f2d0b"},
+ {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:97032a27bd9d8988b9a97a8c4d2c9f2c15a81f61e2f21404d7e8ef00cb5be729"},
+ {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e795a8be3ddbac43274f18588329c72939870a16cae810c2b73461c40718ab1"},
+ {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b258c385842546006213344c50655ff1555a9338e2e5e02a0756dc3e803dd"},
+ {file = "numpy-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fec9451a7789926bcf7c2b8d187292c9f93ea30284802a0ab3f5be8ab36865d"},
+ {file = "numpy-2.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9189427407d88ff25ecf8f12469d4d39d35bee1db5d39fc5c168c6f088a6956d"},
+ {file = "numpy-2.0.2-cp39-cp39-win32.whl", hash = "sha256:905d16e0c60200656500c95b6b8dca5d109e23cb24abc701d41c02d74c6b3afa"},
+ {file = "numpy-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:a3f4ab0caa7f053f6797fcd4e1e25caee367db3112ef2b6ef82d749530768c73"},
+ {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f0a0c6f12e07fa94133c8a67404322845220c06a9e80e85999afe727f7438b8"},
+ {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:312950fdd060354350ed123c0e25a71327d3711584beaef30cdaa93320c392d4"},
+ {file = "numpy-2.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26df23238872200f63518dd2aa984cfca675d82469535dc7162dc2ee52d9dd5c"},
+ {file = "numpy-2.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a46288ec55ebbd58947d31d72be2c63cbf839f0a63b49cb755022310792a3385"},
+ {file = "numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78"},
]
[[package]]
name = "packaging"
-version = "21.3"
+version = "25.0"
description = "Core utilities for Python packages"
optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
files = [
- {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
- {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
+ {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"},
+ {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"},
]
-[package.dependencies]
-pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
-
[[package]]
name = "pandas"
-version = "1.4.4"
+version = "2.3.3"
description = "Powerful data structures for data analysis, time series, and statistics"
optional = false
-python-versions = ">=3.8"
-files = [
- {file = "pandas-1.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:799e6a25932df7e6b1f8dabf63de064e2205dc309abb75956126a0453fd88e97"},
- {file = "pandas-1.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7cd1d69a387f7d5e1a5a06a87574d9ef2433847c0e78113ab51c84d3a8bcaeaa"},
- {file = "pandas-1.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:94f2ed1fd51e545ebf71da1e942fe1822ee01e10d3dd2a7276d01351333b7c6b"},
- {file = "pandas-1.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4591cadd06fbbbd16fafc2de6e840c1aaefeae3d5864b688004777ef1bbdede3"},
- {file = "pandas-1.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0022fe6a313df1c4869b5edc012d734c6519a6fffa3cf70930f32e6a1078e49"},
- {file = "pandas-1.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:785e878a6e6d8ddcdb8c181e600855402750052497d7fc6d6b508894f6b8830b"},
- {file = "pandas-1.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c4bb8b0ab9f94207d07e401d24baebfc63057246b1a5e0cd9ee50df85a656871"},
- {file = "pandas-1.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:51c424ca134fdaeac9a4acd719d1ab48046afc60943a489028f0413fdbe9ef1c"},
- {file = "pandas-1.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ce35f947202b0b99c660221d82beb91d2e6d553d55a40b30128204e3e2c63848"},
- {file = "pandas-1.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee6f1848148ed3204235967613b0a32be2d77f214e9623f554511047705c1e04"},
- {file = "pandas-1.4.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7cc960959be28d064faefc0cb2aef854d46b827c004ebea7e79b5497ed83e7d"},
- {file = "pandas-1.4.4-cp38-cp38-win32.whl", hash = "sha256:9d805bce209714b1c1fa29bfb1e42ad87e4c0a825e4b390c56a3e71593b7e8d8"},
- {file = "pandas-1.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:afbddad78a98ec4d2ce08b384b81730de1ccc975b99eb663e6dac43703f36d98"},
- {file = "pandas-1.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a08ceb59db499864c58a9bf85ab6219d527d91f14c0240cc25fa2c261032b2a7"},
- {file = "pandas-1.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0959c41004e3d2d16f39c828d6da66ebee329836a7ecee49fb777ac9ad8a7501"},
- {file = "pandas-1.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:87b4194f344dcd14c0f885cecb22005329b38bda10f1aaf7b9596a00ec8a4768"},
- {file = "pandas-1.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d2a7a3c1fea668d56bd91edbd5f2732e0af8feb9d2bf8d9bfacb2dea5fa9536"},
- {file = "pandas-1.4.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a981cfabf51c318a562deb4ae7deec594c07aee7cf18b4594a92c23718ec8275"},
- {file = "pandas-1.4.4-cp39-cp39-win32.whl", hash = "sha256:050aada67a5ec6699a7879e769825b510018a95fb9ac462bb1867483d0974a97"},
- {file = "pandas-1.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:8d4d2fe2863ecddb0ba1979bdda26c8bc2ea138f5a979abe3ba80c0fa4015c91"},
- {file = "pandas-1.4.4.tar.gz", hash = "sha256:ab6c0d738617b675183e5f28db32b5148b694ad9bba0a40c3ea26d96b431db67"},
+python-versions = ">=3.9"
+files = [
+ {file = "pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c"},
+ {file = "pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a"},
+ {file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1"},
+ {file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838"},
+ {file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250"},
+ {file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4"},
+ {file = "pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826"},
+ {file = "pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523"},
+ {file = "pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45"},
+ {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66"},
+ {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b"},
+ {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791"},
+ {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151"},
+ {file = "pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c"},
+ {file = "pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53"},
+ {file = "pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35"},
+ {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908"},
+ {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89"},
+ {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98"},
+ {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084"},
+ {file = "pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b"},
+ {file = "pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713"},
+ {file = "pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8"},
+ {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d"},
+ {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac"},
+ {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c"},
+ {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493"},
+ {file = "pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee"},
+ {file = "pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5"},
+ {file = "pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21"},
+ {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78"},
+ {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110"},
+ {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86"},
+ {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc"},
+ {file = "pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0"},
+ {file = "pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593"},
+ {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c"},
+ {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b"},
+ {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6"},
+ {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3"},
+ {file = "pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5"},
+ {file = "pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec"},
+ {file = "pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7"},
+ {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450"},
+ {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5"},
+ {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788"},
+ {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87"},
+ {file = "pandas-2.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c503ba5216814e295f40711470446bc3fd00f0faea8a086cbc688808e26f92a2"},
+ {file = "pandas-2.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a637c5cdfa04b6d6e2ecedcb81fc52ffb0fd78ce2ebccc9ea964df9f658de8c8"},
+ {file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:854d00d556406bffe66a4c0802f334c9ad5a96b4f1f868adf036a21b11ef13ff"},
+ {file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bf1f8a81d04ca90e32a0aceb819d34dbd378a98bf923b6398b9a3ec0bf44de29"},
+ {file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:23ebd657a4d38268c7dfbdf089fbc31ea709d82e4923c5ffd4fbd5747133ce73"},
+ {file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5554c929ccc317d41a5e3d1234f3be588248e61f08a74dd17c9eabb535777dc9"},
+ {file = "pandas-2.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:d3e28b3e83862ccf4d85ff19cf8c20b2ae7e503881711ff2d534dc8f761131aa"},
+ {file = "pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b"},
]
[package.dependencies]
numpy = [
- {version = ">=1.21.0", markers = "python_version >= \"3.10\""},
- {version = ">=1.18.5", markers = "(platform_machine != \"aarch64\" and platform_machine != \"arm64\") and python_version < \"3.10\""},
- {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""},
- {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""},
+ {version = ">=1.22.4", markers = "python_version < \"3.11\""},
+ {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+ {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
-python-dateutil = ">=2.8.1"
+python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
+tzdata = ">=2022.7"
[package.extras]
-test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"]
+all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
+aws = ["s3fs (>=2022.11.0)"]
+clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
+compression = ["zstandard (>=0.19.0)"]
+computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
+consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
+feather = ["pyarrow (>=10.0.1)"]
+fss = ["fsspec (>=2022.11.0)"]
+gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
+hdf5 = ["tables (>=3.8.0)"]
+html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
+mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
+parquet = ["pyarrow (>=10.0.1)"]
+performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
+plot = ["matplotlib (>=3.6.3)"]
+postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
+pyarrow = ["pyarrow (>=10.0.1)"]
+spss = ["pyreadstat (>=1.2.0)"]
+sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
+test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
+xml = ["lxml (>=4.9.2)"]
[[package]]
name = "pathspec"
-version = "0.10.1"
+version = "1.0.3"
description = "Utility library for gitignore style pattern matching of file paths."
optional = false
-python-versions = ">=3.7"
-files = [
- {file = "pathspec-0.10.1-py3-none-any.whl", hash = "sha256:46846318467efc4556ccfd27816e004270a9eeeeb4d062ce5e6fc7a87c573f93"},
- {file = "pathspec-0.10.1.tar.gz", hash = "sha256:7ace6161b621d31e7902eb6b5ae148d12cfd23f4a249b9ffb6b9fee12084323d"},
-]
-
-[[package]]
-name = "pkginfo"
-version = "1.8.3"
-description = "Query metadatdata from sdists / bdists / installed packages."
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
+python-versions = ">=3.9"
files = [
- {file = "pkginfo-1.8.3-py2.py3-none-any.whl", hash = "sha256:848865108ec99d4901b2f7e84058b6e7660aae8ae10164e015a6dcf5b242a594"},
- {file = "pkginfo-1.8.3.tar.gz", hash = "sha256:a84da4318dd86f870a9447a8c98340aa06216bfc6f2b7bdc4b8766984ae1867c"},
+ {file = "pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c"},
+ {file = "pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d"},
]
[package.extras]
-testing = ["coverage", "nose"]
+hyperscan = ["hyperscan (>=0.7)"]
+optional = ["typing-extensions (>=4)"]
+re2 = ["google-re2 (>=1.1)"]
+tests = ["pytest (>=9)", "typing-extensions (>=4.15)"]
[[package]]
name = "platformdirs"
-version = "2.5.2"
-description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+version = "4.4.0"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.9"
files = [
- {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"},
- {file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"},
+ {file = "platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85"},
+ {file = "platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf"},
]
[package.extras]
-docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx (>=4)", "sphinx-autodoc-typehints (>=1.12)"]
-test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)"]
+docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"]
+type = ["mypy (>=1.14.1)"]
[[package]]
name = "pluggy"
-version = "1.0.0"
+version = "1.6.0"
description = "plugin and hook calling mechanisms for python"
optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.9"
files = [
- {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
- {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
+ {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"},
+ {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"},
]
[package.extras]
dev = ["pre-commit", "tox"]
-testing = ["pytest", "pytest-benchmark"]
+testing = ["coverage", "pytest", "pytest-benchmark"]
[[package]]
name = "pre-commit"
-version = "2.20.0"
+version = "3.8.0"
description = "A framework for managing and maintaining multi-language pre-commit hooks."
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.9"
files = [
- {file = "pre_commit-2.20.0-py2.py3-none-any.whl", hash = "sha256:51a5ba7c480ae8072ecdb6933df22d2f812dc897d5fe848778116129a681aac7"},
- {file = "pre_commit-2.20.0.tar.gz", hash = "sha256:a978dac7bc9ec0bcee55c18a277d553b0f419d259dadb4b9418ff2d00eb43959"},
+ {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"},
+ {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"},
]
[package.dependencies]
@@ -739,225 +635,198 @@ cfgv = ">=2.0.0"
identify = ">=1.0.0"
nodeenv = ">=0.11.1"
pyyaml = ">=5.1"
-toml = "*"
-virtualenv = ">=20.0.8"
+virtualenv = ">=20.10.0"
[[package]]
-name = "py"
-version = "1.11.0"
-description = "library with cross-python path, ini-parsing, io, code, log facilities"
+name = "protobuf"
+version = "6.33.4"
+description = ""
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.9"
files = [
- {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
- {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
+ {file = "protobuf-6.33.4-cp310-abi3-win32.whl", hash = "sha256:918966612c8232fc6c24c78e1cd89784307f5814ad7506c308ee3cf86662850d"},
+ {file = "protobuf-6.33.4-cp310-abi3-win_amd64.whl", hash = "sha256:8f11ffae31ec67fc2554c2ef891dcb561dae9a2a3ed941f9e134c2db06657dbc"},
+ {file = "protobuf-6.33.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2fe67f6c014c84f655ee06f6f66213f9254b3a8b6bda6cda0ccd4232c73c06f0"},
+ {file = "protobuf-6.33.4-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:757c978f82e74d75cba88eddec479df9b99a42b31193313b75e492c06a51764e"},
+ {file = "protobuf-6.33.4-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c7c64f259c618f0bef7bee042075e390debbf9682334be2b67408ec7c1c09ee6"},
+ {file = "protobuf-6.33.4-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:3df850c2f8db9934de4cf8f9152f8dc2558f49f298f37f90c517e8e5c84c30e9"},
+ {file = "protobuf-6.33.4-cp39-cp39-win32.whl", hash = "sha256:955478a89559fa4568f5a81dce77260eabc5c686f9e8366219ebd30debf06aa6"},
+ {file = "protobuf-6.33.4-cp39-cp39-win_amd64.whl", hash = "sha256:0f12ddbf96912690c3582f9dffb55530ef32015ad8e678cd494312bd78314c4f"},
+ {file = "protobuf-6.33.4-py3-none-any.whl", hash = "sha256:1fe3730068fcf2e595816a6c34fe66eeedd37d51d0400b72fabc848811fdc1bc"},
+ {file = "protobuf-6.33.4.tar.gz", hash = "sha256:dc2e61bca3b10470c1912d166fe0af67bfc20eb55971dcef8dfa48ce14f0ed91"},
]
[[package]]
name = "py4j"
-version = "0.10.9.5"
+version = "0.10.9.7"
description = "Enables Python programs to dynamically access arbitrary Java objects"
-optional = true
-python-versions = "*"
-files = [
- {file = "py4j-0.10.9.5-py2.py3-none-any.whl", hash = "sha256:52d171a6a2b031d8a5d1de6efe451cf4f5baff1a2819aabc3741c8406539ba04"},
- {file = "py4j-0.10.9.5.tar.gz", hash = "sha256:276a4a3c5a2154df1860ef3303a927460e02e97b047dc0a47c1c3fb8cce34db6"},
-]
-
-[[package]]
-name = "pycodestyle"
-version = "2.7.0"
-description = "Python style guide checker"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
- {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"},
- {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"},
-]
-
-[[package]]
-name = "pycparser"
-version = "2.21"
-description = "C parser in Python"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
- {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
- {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
-]
-
-[[package]]
-name = "pydocstyle"
-version = "6.1.1"
-description = "Python docstring style checker"
optional = false
-python-versions = ">=3.6"
+python-versions = "*"
files = [
- {file = "pydocstyle-6.1.1-py3-none-any.whl", hash = "sha256:6987826d6775056839940041beef5c08cc7e3d71d63149b48e36727f70144dc4"},
- {file = "pydocstyle-6.1.1.tar.gz", hash = "sha256:1d41b7c459ba0ee6c345f2eb9ae827cab14a7533a88c5c6f7e94923f72df92dc"},
+ {file = "py4j-0.10.9.7-py2.py3-none-any.whl", hash = "sha256:85defdfd2b2376eb3abf5ca6474b51ab7e0de341c75a02f46dc9b5976f5a5c1b"},
+ {file = "py4j-0.10.9.7.tar.gz", hash = "sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"},
+]
+
+[[package]]
+name = "pyarrow"
+version = "21.0.0"
+description = "Python library for Apache Arrow"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "pyarrow-21.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e563271e2c5ff4d4a4cbeb2c83d5cf0d4938b891518e676025f7268c6fe5fe26"},
+ {file = "pyarrow-21.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fee33b0ca46f4c85443d6c450357101e47d53e6c3f008d658c27a2d020d44c79"},
+ {file = "pyarrow-21.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7be45519b830f7c24b21d630a31d48bcebfd5d4d7f9d3bdb49da9cdf6d764edb"},
+ {file = "pyarrow-21.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:26bfd95f6bff443ceae63c65dc7e048670b7e98bc892210acba7e4995d3d4b51"},
+ {file = "pyarrow-21.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bd04ec08f7f8bd113c55868bd3fc442a9db67c27af098c5f814a3091e71cc61a"},
+ {file = "pyarrow-21.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9b0b14b49ac10654332a805aedfc0147fb3469cbf8ea951b3d040dab12372594"},
+ {file = "pyarrow-21.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9d9f8bcb4c3be7738add259738abdeddc363de1b80e3310e04067aa1ca596634"},
+ {file = "pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c077f48aab61738c237802836fc3844f85409a46015635198761b0d6a688f87b"},
+ {file = "pyarrow-21.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:689f448066781856237eca8d1975b98cace19b8dd2ab6145bf49475478bcaa10"},
+ {file = "pyarrow-21.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:479ee41399fcddc46159a551705b89c05f11e8b8cb8e968f7fec64f62d91985e"},
+ {file = "pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40ebfcb54a4f11bcde86bc586cbd0272bac0d516cfa539c799c2453768477569"},
+ {file = "pyarrow-21.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8d58d8497814274d3d20214fbb24abcad2f7e351474357d552a8d53bce70c70e"},
+ {file = "pyarrow-21.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:585e7224f21124dd57836b1530ac8f2df2afc43c861d7bf3d58a4870c42ae36c"},
+ {file = "pyarrow-21.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:555ca6935b2cbca2c0e932bedd853e9bc523098c39636de9ad4693b5b1df86d6"},
+ {file = "pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd"},
+ {file = "pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876"},
+ {file = "pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d"},
+ {file = "pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e"},
+ {file = "pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82"},
+ {file = "pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623"},
+ {file = "pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18"},
+ {file = "pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a"},
+ {file = "pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe"},
+ {file = "pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd"},
+ {file = "pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61"},
+ {file = "pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d"},
+ {file = "pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99"},
+ {file = "pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636"},
+ {file = "pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da"},
+ {file = "pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7"},
+ {file = "pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6"},
+ {file = "pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8"},
+ {file = "pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503"},
+ {file = "pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79"},
+ {file = "pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10"},
+ {file = "pyarrow-21.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a7f6524e3747e35f80744537c78e7302cd41deee8baa668d56d55f77d9c464b3"},
+ {file = "pyarrow-21.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:203003786c9fd253ebcafa44b03c06983c9c8d06c3145e37f1b76a1f317aeae1"},
+ {file = "pyarrow-21.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3b4d97e297741796fead24867a8dabf86c87e4584ccc03167e4a811f50fdf74d"},
+ {file = "pyarrow-21.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:898afce396b80fdda05e3086b4256f8677c671f7b1d27a6976fa011d3fd0a86e"},
+ {file = "pyarrow-21.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:067c66ca29aaedae08218569a114e413b26e742171f526e828e1064fcdec13f4"},
+ {file = "pyarrow-21.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0c4e75d13eb76295a49e0ea056eb18dbd87d81450bfeb8afa19a7e5a75ae2ad7"},
+ {file = "pyarrow-21.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdc4c17afda4dab2a9c0b79148a43a7f4e1094916b3e18d8975bfd6d6d52241f"},
+ {file = "pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc"},
]
-[package.dependencies]
-snowballstemmer = "*"
-
[package.extras]
-toml = ["toml"]
-
-[[package]]
-name = "pyflakes"
-version = "2.3.1"
-description = "passive checker of Python programs"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
- {file = "pyflakes-2.3.1-py2.py3-none-any.whl", hash = "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3"},
- {file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"},
-]
+test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"]
[[package]]
name = "pygments"
-version = "2.13.0"
+version = "2.19.2"
description = "Pygments is a syntax highlighting package written in Python."
optional = false
-python-versions = ">=3.6"
-files = [
- {file = "Pygments-2.13.0-py3-none-any.whl", hash = "sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42"},
- {file = "Pygments-2.13.0.tar.gz", hash = "sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1"},
-]
-
-[package.extras]
-plugins = ["importlib-metadata"]
-
-[[package]]
-name = "pyparsing"
-version = "3.0.9"
-description = "pyparsing module - Classes and methods to define and execute parsing grammars"
-optional = false
-python-versions = ">=3.6.8"
+python-versions = ">=3.8"
files = [
- {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
- {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
+ {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"},
+ {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"},
]
[package.extras]
-diagrams = ["jinja2", "railroad-diagrams"]
+windows-terminal = ["colorama (>=0.4.6)"]
[[package]]
name = "pyspark"
-version = "3.2.2"
+version = "3.5.0"
description = "Apache Spark Python API"
-optional = true
-python-versions = ">=3.6"
+optional = false
+python-versions = ">=3.8"
files = [
- {file = "pyspark-3.2.2.tar.gz", hash = "sha256:5455214cf0b83d4a184cda25ca3b0812481915353b180cf7d7ac227728a4d99e"},
+ {file = "pyspark-3.5.0.tar.gz", hash = "sha256:d41a9b76bd2aca370a6100d075c029e22ba44c5940927877e9435a3a9c566558"},
]
[package.dependencies]
-py4j = "0.10.9.5"
+googleapis-common-protos = {version = ">=1.56.4", optional = true, markers = "extra == \"connect\""}
+grpcio = {version = ">=1.56.0", optional = true, markers = "extra == \"connect\""}
+grpcio-status = {version = ">=1.56.0", optional = true, markers = "extra == \"connect\""}
+numpy = {version = ">=1.15", optional = true, markers = "extra == \"connect\""}
+pandas = {version = ">=1.0.5", optional = true, markers = "extra == \"connect\""}
+py4j = "0.10.9.7"
+pyarrow = {version = ">=4.0.0", optional = true, markers = "extra == \"connect\""}
[package.extras]
-ml = ["numpy (>=1.7)"]
-mllib = ["numpy (>=1.7)"]
-pandas-on-spark = ["numpy (>=1.14)", "pandas (>=0.23.2)", "pyarrow (>=1.0.0)"]
-sql = ["pandas (>=0.23.2)", "pyarrow (>=1.0.0)"]
+connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.56.0)", "grpcio-status (>=1.56.0)", "numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
+ml = ["numpy (>=1.15)"]
+mllib = ["numpy (>=1.15)"]
+pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
+sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
[[package]]
name = "pytest"
-version = "6.2.5"
+version = "8.4.2"
description = "pytest: simple powerful testing with Python"
optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.9"
files = [
- {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"},
- {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"},
+ {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"},
+ {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"},
]
[package.dependencies]
-atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
-attrs = ">=19.2.0"
-colorama = {version = "*", markers = "sys_platform == \"win32\""}
-iniconfig = "*"
-packaging = "*"
-pluggy = ">=0.12,<2.0"
-py = ">=1.8.2"
-toml = "*"
+colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""}
+exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""}
+iniconfig = ">=1"
+packaging = ">=20"
+pluggy = ">=1.5,<2"
+pygments = ">=2.7.2"
+tomli = {version = ">=1", markers = "python_version < \"3.11\""}
[package.extras]
-testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
+dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"]
[[package]]
name = "pytest-cov"
-version = "2.12.1"
+version = "4.1.0"
description = "Pytest plugin for measuring coverage."
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.7"
files = [
- {file = "pytest-cov-2.12.1.tar.gz", hash = "sha256:261ceeb8c227b726249b376b8526b600f38667ee314f910353fa318caa01f4d7"},
- {file = "pytest_cov-2.12.1-py2.py3-none-any.whl", hash = "sha256:261bb9e47e65bd099c89c3edf92972865210c36813f80ede5277dceb77a4a62a"},
+ {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"},
+ {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"},
]
[package.dependencies]
-coverage = ">=5.2.1"
+coverage = {version = ">=5.2.1", extras = ["toml"]}
pytest = ">=4.6"
-toml = "*"
[package.extras]
testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"]
-[[package]]
-name = "pytest-flake8"
-version = "1.1.0"
-description = "pytest plugin to check FLAKE8 requirements"
-optional = false
-python-versions = "*"
-files = [
- {file = "pytest-flake8-1.1.0.tar.gz", hash = "sha256:358d449ca06b80dbadcb43506cd3e38685d273b4968ac825da871bd4cc436202"},
- {file = "pytest_flake8-1.1.0-py2.py3-none-any.whl", hash = "sha256:f1b19dad0b9f0aa651d391c9527ebc20ac1a0f847aa78581094c747462bfa182"},
-]
-
-[package.dependencies]
-flake8 = ">=3.5"
-pytest = ">=3.5"
-
[[package]]
name = "pytest-rerunfailures"
-version = "9.1.1"
+version = "14.0"
description = "pytest plugin to re-run tests to eliminate flaky failures"
optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.8"
files = [
- {file = "pytest-rerunfailures-9.1.1.tar.gz", hash = "sha256:1cb11a17fc121b3918414eb5eaf314ee325f2e693ac7cb3f6abf7560790827f2"},
- {file = "pytest_rerunfailures-9.1.1-py3-none-any.whl", hash = "sha256:2eb7d0ad651761fbe80e064b0fd415cf6730cdbc53c16a145fd84b66143e609f"},
+ {file = "pytest-rerunfailures-14.0.tar.gz", hash = "sha256:4a400bcbcd3c7a4ad151ab8afac123d90eca3abe27f98725dc4d9702887d2e92"},
+ {file = "pytest_rerunfailures-14.0-py3-none-any.whl", hash = "sha256:4197bdd2eaeffdbf50b5ea6e7236f47ff0e44d1def8dae08e409f536d84e7b32"},
]
[package.dependencies]
-pytest = ">=5.0"
-setuptools = ">=40.0"
-
-[[package]]
-name = "pytest-runner"
-version = "5.3.2"
-description = "Invoke py.test as distutils command with dependency resolution"
-optional = false
-python-versions = ">=3.6"
-files = [
- {file = "pytest-runner-5.3.2.tar.gz", hash = "sha256:48934ec94301f6727d30615af1960539ff62063f6c9b71b7227174e51ba5fb34"},
- {file = "pytest_runner-5.3.2-py3-none-any.whl", hash = "sha256:c7d785ea6c612396c11ddbaf467764d2cc746ef96a713fbe1a296c221503b7c3"},
-]
-
-[package.extras]
-docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"]
-testing = ["pytest (>=4.6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy", "pytest-virtualenv"]
+packaging = ">=17.1"
+pytest = ">=7.2"
[[package]]
name = "python-dateutil"
-version = "2.8.2"
+version = "2.9.0.post0"
description = "Extensions to the standard Python datetime module"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
files = [
- {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
- {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
+ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+ {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
]
[package.dependencies]
@@ -965,355 +834,231 @@ six = ">=1.5"
[[package]]
name = "pytz"
-version = "2022.2.1"
+version = "2025.2"
description = "World timezone definitions, modern and historical"
optional = false
python-versions = "*"
files = [
- {file = "pytz-2022.2.1-py2.py3-none-any.whl", hash = "sha256:220f481bdafa09c3955dfbdddb7b57780e9a94f5127e35456a48589b9e0c0197"},
- {file = "pytz-2022.2.1.tar.gz", hash = "sha256:cea221417204f2d1a2aa03ddae3e867921971d0d76f14d87abb4414415bbdcf5"},
-]
-
-[[package]]
-name = "pywin32-ctypes"
-version = "0.2.0"
-description = ""
-optional = false
-python-versions = "*"
-files = [
- {file = "pywin32-ctypes-0.2.0.tar.gz", hash = "sha256:24ffc3b341d457d48e8922352130cf2644024a4ff09762a2261fd34c36ee5942"},
- {file = "pywin32_ctypes-0.2.0-py2.py3-none-any.whl", hash = "sha256:9dc2d991b3479cc2df15930958b674a48a227d5361d413827a4cfd0b5876fc98"},
+ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"},
+ {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"},
]
[[package]]
name = "pyyaml"
-version = "6.0"
+version = "6.0.3"
description = "YAML parser and emitter for Python"
optional = false
-python-versions = ">=3.6"
-files = [
- {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
- {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
- {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"},
- {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"},
- {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
- {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
- {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
- {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
- {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
- {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
- {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
- {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
- {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
- {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
- {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
- {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
- {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
- {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"},
- {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"},
- {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"},
- {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"},
- {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"},
- {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"},
- {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"},
- {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"},
- {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"},
- {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"},
- {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"},
- {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"},
- {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"},
- {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"},
- {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"},
- {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"},
- {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"},
- {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"},
- {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"},
- {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"},
- {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"},
- {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
- {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
-]
-
-[[package]]
-name = "readme-renderer"
-version = "37.1"
-description = "readme_renderer is a library for rendering \"readme\" descriptions for Warehouse"
-optional = false
-python-versions = ">=3.7"
-files = [
- {file = "readme_renderer-37.1-py3-none-any.whl", hash = "sha256:16c914ca7731fd062a316a2a8e5434a175ee34661a608af771a60c881f528a34"},
- {file = "readme_renderer-37.1.tar.gz", hash = "sha256:96768c069729f69176f514477e57f2f8cd543fbb2cd7bad372976249fa509a0c"},
-]
-
-[package.dependencies]
-bleach = ">=2.1.0"
-docutils = ">=0.13.1"
-Pygments = ">=2.5.1"
-
-[package.extras]
-md = ["cmarkgfm (>=0.8.0)"]
-
-[[package]]
-name = "requests"
-version = "2.31.0"
-description = "Python HTTP for Humans."
-optional = false
-python-versions = ">=3.7"
-files = [
- {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
- {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
-]
-
-[package.dependencies]
-certifi = ">=2017.4.17"
-charset-normalizer = ">=2,<4"
-idna = ">=2.5,<4"
-urllib3 = ">=1.21.1,<3"
-
-[package.extras]
-socks = ["PySocks (>=1.5.6,!=1.5.7)"]
-use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
-
-[[package]]
-name = "requests-toolbelt"
-version = "0.9.1"
-description = "A utility belt for advanced users of python-requests"
-optional = false
-python-versions = "*"
-files = [
- {file = "requests-toolbelt-0.9.1.tar.gz", hash = "sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0"},
- {file = "requests_toolbelt-0.9.1-py2.py3-none-any.whl", hash = "sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f"},
-]
-
-[package.dependencies]
-requests = ">=2.0.1,<3.0.0"
-
-[[package]]
-name = "rfc3986"
-version = "2.0.0"
-description = "Validating URI References per RFC 3986"
-optional = false
-python-versions = ">=3.7"
-files = [
- {file = "rfc3986-2.0.0-py2.py3-none-any.whl", hash = "sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd"},
- {file = "rfc3986-2.0.0.tar.gz", hash = "sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c"},
-]
-
-[package.extras]
-idna2008 = ["idna"]
-
-[[package]]
-name = "safety"
-version = "1.10.3"
-description = "Checks installed dependencies for known vulnerabilities."
-optional = false
-python-versions = ">=3.5"
-files = [
- {file = "safety-1.10.3-py2.py3-none-any.whl", hash = "sha256:5f802ad5df5614f9622d8d71fedec2757099705c2356f862847c58c6dfe13e84"},
- {file = "safety-1.10.3.tar.gz", hash = "sha256:30e394d02a20ac49b7f65292d19d38fa927a8f9582cdfd3ad1adbbc66c641ad5"},
-]
-
-[package.dependencies]
-Click = ">=6.0"
-dparse = ">=0.5.1"
-packaging = "*"
-requests = "*"
-setuptools = "*"
-
-[[package]]
-name = "secretstorage"
-version = "3.3.3"
-description = "Python bindings to FreeDesktop.org Secret Service API"
-optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
files = [
- {file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"},
- {file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"},
+ {file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"},
+ {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"},
+ {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3"},
+ {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6"},
+ {file = "PyYAML-6.0.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369"},
+ {file = "PyYAML-6.0.3-cp38-cp38-win32.whl", hash = "sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295"},
+ {file = "PyYAML-6.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b"},
+ {file = "pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b"},
+ {file = "pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956"},
+ {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8"},
+ {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198"},
+ {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b"},
+ {file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0"},
+ {file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69"},
+ {file = "pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e"},
+ {file = "pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c"},
+ {file = "pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e"},
+ {file = "pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824"},
+ {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c"},
+ {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00"},
+ {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d"},
+ {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a"},
+ {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4"},
+ {file = "pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b"},
+ {file = "pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf"},
+ {file = "pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196"},
+ {file = "pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0"},
+ {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28"},
+ {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c"},
+ {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc"},
+ {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e"},
+ {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea"},
+ {file = "pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5"},
+ {file = "pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b"},
+ {file = "pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd"},
+ {file = "pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8"},
+ {file = "pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1"},
+ {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c"},
+ {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5"},
+ {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6"},
+ {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6"},
+ {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be"},
+ {file = "pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26"},
+ {file = "pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c"},
+ {file = "pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb"},
+ {file = "pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac"},
+ {file = "pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310"},
+ {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7"},
+ {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788"},
+ {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5"},
+ {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764"},
+ {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35"},
+ {file = "pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac"},
+ {file = "pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3"},
+ {file = "pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3"},
+ {file = "pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba"},
+ {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c"},
+ {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702"},
+ {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c"},
+ {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065"},
+ {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65"},
+ {file = "pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9"},
+ {file = "pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b"},
+ {file = "pyyaml-6.0.3-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da"},
+ {file = "pyyaml-6.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917"},
+ {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9"},
+ {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5"},
+ {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a"},
+ {file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926"},
+ {file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7"},
+ {file = "pyyaml-6.0.3-cp39-cp39-win32.whl", hash = "sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0"},
+ {file = "pyyaml-6.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007"},
+ {file = "pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f"},
]
-[package.dependencies]
-cryptography = ">=2.0"
-jeepney = ">=0.6"
-
[[package]]
name = "setuptools"
-version = "70.0.0"
+version = "80.9.0"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
files = [
- {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"},
- {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"},
+ {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"},
+ {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"},
]
[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.8.0)"]
+core = ["importlib_metadata (>=6)", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
+type = ["importlib_metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.14.*)", "pytest-mypy"]
[[package]]
name = "six"
-version = "1.16.0"
+version = "1.17.0"
description = "Python 2 and 3 compatibility utilities"
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
-files = [
- {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
- {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
-]
-
-[[package]]
-name = "snowballstemmer"
-version = "2.2.0"
-description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms."
-optional = false
-python-versions = "*"
-files = [
- {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
- {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
-]
-
-[[package]]
-name = "toml"
-version = "0.10.2"
-description = "Python Library for Tom's Obvious, Minimal Language"
-optional = false
-python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
files = [
- {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
- {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
+ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
+ {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
]
[[package]]
name = "tomli"
-version = "1.2.3"
+version = "2.4.0"
description = "A lil' TOML parser"
optional = false
-python-versions = ">=3.6"
-files = [
- {file = "tomli-1.2.3-py3-none-any.whl", hash = "sha256:e3069e4be3ead9668e21cb9b074cd948f7b3113fd9c8bba083f48247aab8b11c"},
- {file = "tomli-1.2.3.tar.gz", hash = "sha256:05b6166bff487dc068d322585c7ea4ef78deed501cc124060e0f238e89a9231f"},
-]
-
-[[package]]
-name = "tqdm"
-version = "4.64.1"
-description = "Fast, Extensible Progress Meter"
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
-files = [
- {file = "tqdm-4.64.1-py2.py3-none-any.whl", hash = "sha256:6fee160d6ffcd1b1c68c65f14c829c22832bc401726335ce92c52d395944a6a1"},
- {file = "tqdm-4.64.1.tar.gz", hash = "sha256:5f4f682a004951c1b450bc753c710e9280c5746ce6ffedee253ddbcbf54cf1e4"},
-]
-
-[package.dependencies]
-colorama = {version = "*", markers = "platform_system == \"Windows\""}
-
-[package.extras]
-dev = ["py-make (>=0.1.0)", "twine", "wheel"]
-notebook = ["ipywidgets (>=6)"]
-slack = ["slack-sdk"]
-telegram = ["requests"]
-
-[[package]]
-name = "twine"
-version = "3.8.0"
-description = "Collection of utilities for publishing packages on PyPI"
-optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
files = [
- {file = "twine-3.8.0-py3-none-any.whl", hash = "sha256:d0550fca9dc19f3d5e8eadfce0c227294df0a2a951251a4385797c8a6198b7c8"},
- {file = "twine-3.8.0.tar.gz", hash = "sha256:8efa52658e0ae770686a13b675569328f1fba9837e5de1867bfe5f46a9aefe19"},
+ {file = "tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867"},
+ {file = "tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9"},
+ {file = "tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95"},
+ {file = "tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76"},
+ {file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d"},
+ {file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576"},
+ {file = "tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a"},
+ {file = "tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa"},
+ {file = "tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614"},
+ {file = "tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1"},
+ {file = "tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8"},
+ {file = "tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a"},
+ {file = "tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1"},
+ {file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b"},
+ {file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51"},
+ {file = "tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729"},
+ {file = "tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da"},
+ {file = "tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3"},
+ {file = "tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0"},
+ {file = "tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e"},
+ {file = "tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4"},
+ {file = "tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e"},
+ {file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c"},
+ {file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f"},
+ {file = "tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86"},
+ {file = "tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87"},
+ {file = "tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132"},
+ {file = "tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6"},
+ {file = "tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc"},
+ {file = "tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66"},
+ {file = "tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d"},
+ {file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702"},
+ {file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8"},
+ {file = "tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776"},
+ {file = "tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475"},
+ {file = "tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2"},
+ {file = "tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9"},
+ {file = "tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0"},
+ {file = "tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df"},
+ {file = "tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d"},
+ {file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f"},
+ {file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b"},
+ {file = "tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087"},
+ {file = "tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd"},
+ {file = "tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4"},
+ {file = "tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a"},
+ {file = "tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c"},
]
-[package.dependencies]
-colorama = ">=0.4.3"
-importlib-metadata = ">=3.6"
-keyring = ">=15.1"
-pkginfo = ">=1.8.1"
-readme-renderer = ">=21.0"
-requests = ">=2.20"
-requests-toolbelt = ">=0.8.0,<0.9.0 || >0.9.0"
-rfc3986 = ">=1.4.0"
-tqdm = ">=4.14"
-urllib3 = ">=1.26.0"
-
[[package]]
name = "typing-extensions"
-version = "4.3.0"
-description = "Backported and Experimental Type Hints for Python 3.7+"
+version = "4.15.0"
+description = "Backported and Experimental Type Hints for Python 3.9+"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.9"
files = [
- {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"},
- {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"},
+ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"},
+ {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"},
]
[[package]]
-name = "urllib3"
-version = "1.26.12"
-description = "HTTP library with thread-safe connection pooling, file post, and more."
+name = "tzdata"
+version = "2025.3"
+description = "Provider of IANA time zone data"
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4"
+python-versions = ">=2"
files = [
- {file = "urllib3-1.26.12-py2.py3-none-any.whl", hash = "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"},
- {file = "urllib3-1.26.12.tar.gz", hash = "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e"},
+ {file = "tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1"},
+ {file = "tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7"},
]
-[package.extras]
-brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
-secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
-socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
-
[[package]]
name = "virtualenv"
-version = "20.16.5"
+version = "20.36.1"
description = "Virtual Python Environment builder"
optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
files = [
- {file = "virtualenv-20.16.5-py3-none-any.whl", hash = "sha256:d07dfc5df5e4e0dbc92862350ad87a36ed505b978f6c39609dc489eadd5b0d27"},
- {file = "virtualenv-20.16.5.tar.gz", hash = "sha256:227ea1b9994fdc5ea31977ba3383ef296d7472ea85be9d6732e42a91c04e80da"},
+ {file = "virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f"},
+ {file = "virtualenv-20.36.1.tar.gz", hash = "sha256:8befb5c81842c641f8ee658481e42641c68b5eab3521d8e092d18320902466ba"},
]
[package.dependencies]
-distlib = ">=0.3.5,<1"
-filelock = ">=3.4.1,<4"
-platformdirs = ">=2.4,<3"
-
-[package.extras]
-docs = ["proselint (>=0.13)", "sphinx (>=5.1.1)", "sphinx-argparse (>=0.3.1)", "sphinx-rtd-theme (>=1)", "towncrier (>=21.9)"]
-testing = ["coverage (>=6.2)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=21.3)", "pytest (>=7.0.1)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.2)", "pytest-mock (>=3.6.1)", "pytest-randomly (>=3.10.3)", "pytest-timeout (>=2.1)"]
-
-[[package]]
-name = "webencodings"
-version = "0.5.1"
-description = "Character encoding aliases for legacy web content"
-optional = false
-python-versions = "*"
-files = [
- {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
- {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
-]
-
-[[package]]
-name = "zipp"
-version = "3.8.1"
-description = "Backport of pathlib-compatible object wrapper for zip files"
-optional = false
-python-versions = ">=3.7"
-files = [
- {file = "zipp-3.8.1-py3-none-any.whl", hash = "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009"},
- {file = "zipp-3.8.1.tar.gz", hash = "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2"},
+distlib = ">=0.3.7,<1"
+filelock = [
+ {version = ">=3.16.1,<4", markers = "python_version < \"3.10\""},
+ {version = ">=3.20.1,<4", markers = "python_version >= \"3.10\""},
]
+platformdirs = ">=3.9.1,<5"
+typing-extensions = {version = ">=4.13.2", markers = "python_version < \"3.11\""}
[package.extras]
-docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"]
-testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
-
-[extras]
-pyspark = ["pyspark"]
+docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
+test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
[metadata]
lock-version = "2.0"
-python-versions = ">=3.8,<4"
-content-hash = "19b8066a730bfeebe017b2f1be860d76a005a46d64784d9458c555e018c77be5"
+python-versions = ">=3.9,<4"
+content-hash = "18db29f1829ab8baebdd68c486c74b5e7e4304a6d344a26773685b07b85fe7c3"
diff --git a/pydeequ/__init__.py b/pydeequ/__init__.py
index 49a06e5..6d2202f 100644
--- a/pydeequ/__init__.py
+++ b/pydeequ/__init__.py
@@ -11,35 +11,89 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
-"""Placeholder docstrings"""
-__version__ = "1.2.0"
+"""
+PyDeequ - Python API for Deequ data quality library.
-from pyspark.sql import SparkSession
+For PyDeequ 2.0 (Spark Connect), use:
+ from pydeequ.v2 import VerificationSuite, Check, CheckLevel
+ from pydeequ.v2.predicates import eq, gte
-from pydeequ.analyzers import AnalysisRunner
-from pydeequ.checks import Check, CheckLevel
-from pydeequ.configs import DEEQU_MAVEN_COORD
-from pydeequ.profiles import ColumnProfilerRunner
+For PyDeequ 1.x (Legacy Py4J), set SPARK_VERSION env var and use:
+ from pydeequ import deequ_maven_coord
+ from pydeequ.checks import Check, CheckLevel
+"""
+__version__ = "2.0.0b1"
-deequ_maven_coord = DEEQU_MAVEN_COORD
-f2j_maven_coord = "net.sourceforge.f2j:arpack_combined_all"
+# Legacy imports are deferred to avoid requiring SPARK_VERSION for V2 users.
+# V2 users should import from pydeequ.v2 directly.
+_deequ_maven_coord = None
+_f2j_maven_coord = "net.sourceforge.f2j:arpack_combined_all"
-class PyDeequSession:
- """
- For interacting with PyDeequ Modules at the "Runner" Level
- """
- def __init__(self, spark_session: SparkSession):
- self._spark_session = spark_session
- self._sc = spark_session.sparkContext
- self._jvm = spark_session._jvm
+def __getattr__(name):
+ """Lazy loading for legacy module attributes."""
+ global _deequ_maven_coord
- def createColumnProfileRunner(self):
- return ColumnProfilerRunner(self._spark_session)
+ if name == "deequ_maven_coord":
+ if _deequ_maven_coord is None:
+ from pydeequ.configs import DEEQU_MAVEN_COORD
+ _deequ_maven_coord = DEEQU_MAVEN_COORD
+ return _deequ_maven_coord
- def createAnalysisRunner(self):
- return AnalysisRunner(self._spark_session)
+ if name == "f2j_maven_coord":
+ return _f2j_maven_coord
- def createCheck(self, level: CheckLevel, description: str, constraints=None):
- return Check(self._spark_session, level, description, constraints)
+ if name in ("AnalysisRunner", "Check", "CheckLevel", "ColumnProfilerRunner",
+ "PyDeequSession", "DEEQU_MAVEN_COORD"):
+ # Import legacy modules on demand
+ if name == "AnalysisRunner":
+ from pydeequ.analyzers import AnalysisRunner
+ return AnalysisRunner
+ elif name == "Check":
+ from pydeequ.checks import Check
+ return Check
+ elif name == "CheckLevel":
+ from pydeequ.checks import CheckLevel
+ return CheckLevel
+ elif name == "ColumnProfilerRunner":
+ from pydeequ.profiles import ColumnProfilerRunner
+ return ColumnProfilerRunner
+ elif name == "DEEQU_MAVEN_COORD":
+ from pydeequ.configs import DEEQU_MAVEN_COORD
+ return DEEQU_MAVEN_COORD
+
+ if name == "PyDeequSession":
+ # Return the lazily-defined class
+ return _get_pydeequ_session_class()
+
+ raise AttributeError(f"module 'pydeequ' has no attribute '{name}'")
+
+
+def _get_pydeequ_session_class():
+ """Lazily create PyDeequSession class to avoid importing SparkSession at module load."""
+ from pyspark.sql import SparkSession
+ from pydeequ.analyzers import AnalysisRunner
+ from pydeequ.checks import Check, CheckLevel
+ from pydeequ.profiles import ColumnProfilerRunner
+
+ class PyDeequSession:
+ """
+ For interacting with PyDeequ Modules at the "Runner" Level
+ """
+
+ def __init__(self, spark_session: SparkSession):
+ self._spark_session = spark_session
+ self._sc = spark_session.sparkContext
+ self._jvm = spark_session._jvm
+
+ def createColumnProfileRunner(self):
+ return ColumnProfilerRunner(self._spark_session)
+
+ def createAnalysisRunner(self):
+ return AnalysisRunner(self._spark_session)
+
+ def createCheck(self, level: CheckLevel, description: str, constraints=None):
+ return Check(self._spark_session, level, description, constraints)
+
+ return PyDeequSession
diff --git a/pydeequ/analyzers.py b/pydeequ/analyzers.py
index 3952c93..fa711c1 100644
--- a/pydeequ/analyzers.py
+++ b/pydeequ/analyzers.py
@@ -10,7 +10,6 @@
from pydeequ.repository import MetricsRepository, ResultKey
from enum import Enum
from pydeequ.scala_utils import to_scala_seq
-from pydeequ.configs import SPARK_VERSION
class _AnalyzerObject:
"""
@@ -852,4 +851,4 @@ def _create_java_object(self, jvm):
elif self == DataTypeInstances.Fractional:
return dataType_analyzers_class.Fractional()
else:
- raise ValueError(f"{jvm} is not a valid datatype Object")
\ No newline at end of file
+ raise ValueError(f"{jvm} is not a valid datatype Object")
diff --git a/pydeequ/checks.py b/pydeequ/checks.py
index 749f74d..c0c6796 100644
--- a/pydeequ/checks.py
+++ b/pydeequ/checks.py
@@ -6,7 +6,6 @@
from pydeequ.check_functions import is_one
from pydeequ.scala_utils import ScalaFunction1, to_scala_seq
-from pydeequ.configs import SPARK_VERSION
# TODO implement custom assertions
# TODO implement all methods without outside class dependencies
diff --git a/pydeequ/v2/__init__.py b/pydeequ/v2/__init__.py
new file mode 100644
index 0000000..cefe70d
--- /dev/null
+++ b/pydeequ/v2/__init__.py
@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+"""
+PyDeequ Spark Connect Module.
+
+This module provides Spark Connect compatible implementations of PyDeequ's
+data quality verification capabilities. It replaces the Py4J-based bridge
+with a protobuf-based communication protocol that works with Spark Connect's
+client-server architecture.
+
+Key differences from the legacy Py4J-based PyDeequ:
+1. Uses serializable predicates instead of Python lambdas
+2. Communicates via protobuf messages over gRPC
+3. No direct JVM access required
+
+Example usage:
+ from pyspark.sql import SparkSession
+ from pydeequ.v2 import VerificationSuite, Check, CheckLevel
+ from pydeequ.v2.predicates import gte, eq
+
+ # Connect to Spark Connect server
+ spark = SparkSession.builder.remote("sc://localhost:15002").getOrCreate()
+
+ # Create a check with constraints
+ check = (Check(CheckLevel.Error, "Data quality check")
+ .isComplete("id")
+ .hasCompleteness("email", gte(0.95))
+ .hasSize(eq(1000)))
+
+ # Run verification
+ result = (VerificationSuite(spark)
+ .onData(df)
+ .addCheck(check)
+ .run())
+
+ # Result is a DataFrame with check results
+ result.show()
+"""
+
+# Import analyzers
+from pydeequ.v2.analyzers import (
+ ApproxCountDistinct,
+ ApproxQuantile,
+ Completeness,
+ Compliance,
+ Correlation,
+ CountDistinct,
+ DataType,
+ Distinctness,
+ Entropy,
+ Histogram,
+ Maximum,
+ MaxLength,
+ Mean,
+ Minimum,
+ MinLength,
+ MutualInformation,
+ PatternMatch,
+ Size,
+ StandardDeviation,
+ Sum,
+ Uniqueness,
+ UniqueValueRatio,
+)
+
+# Import checks
+from pydeequ.v2.checks import (
+ Check,
+ CheckLevel,
+)
+
+# Import predicates
+from pydeequ.v2.predicates import (
+ Predicate,
+ between,
+ eq,
+ gt,
+ gte,
+ is_non_negative,
+ is_one,
+ is_positive,
+ is_zero,
+ lt,
+ lte,
+ neq,
+)
+
+# Import profiles
+from pydeequ.v2.profiles import (
+ ColumnProfilerRunner,
+ ColumnProfilerRunBuilder,
+ KLLParameters,
+)
+
+# Import suggestions
+from pydeequ.v2.suggestions import (
+ ConstraintSuggestionRunner,
+ ConstraintSuggestionRunBuilder,
+ Rules,
+)
+
+# Import verification
+from pydeequ.v2.verification import (
+ AnalysisRunBuilder,
+ AnalysisRunner,
+ VerificationRunBuilder,
+ VerificationSuite,
+)
+
+__all__ = [
+ # Predicates
+ "Predicate",
+ "eq",
+ "neq",
+ "gt",
+ "gte",
+ "lt",
+ "lte",
+ "between",
+ "is_one",
+ "is_zero",
+ "is_positive",
+ "is_non_negative",
+ # Checks
+ "Check",
+ "CheckLevel",
+ # Analyzers
+ "Size",
+ "Completeness",
+ "Mean",
+ "Sum",
+ "Maximum",
+ "Minimum",
+ "StandardDeviation",
+ "Distinctness",
+ "Uniqueness",
+ "UniqueValueRatio",
+ "CountDistinct",
+ "ApproxCountDistinct",
+ "ApproxQuantile",
+ "Correlation",
+ "MutualInformation",
+ "MaxLength",
+ "MinLength",
+ "PatternMatch",
+ "Compliance",
+ "Entropy",
+ "Histogram",
+ "DataType",
+ # Profiles
+ "ColumnProfilerRunner",
+ "ColumnProfilerRunBuilder",
+ "KLLParameters",
+ # Suggestions
+ "ConstraintSuggestionRunner",
+ "ConstraintSuggestionRunBuilder",
+ "Rules",
+ # Verification
+ "VerificationSuite",
+ "VerificationRunBuilder",
+ "AnalysisRunner",
+ "AnalysisRunBuilder",
+]
diff --git a/pydeequ/v2/analyzers.py b/pydeequ/v2/analyzers.py
new file mode 100644
index 0000000..53a979c
--- /dev/null
+++ b/pydeequ/v2/analyzers.py
@@ -0,0 +1,712 @@
+# -*- coding: utf-8 -*-
+"""
+Analyzer classes for Deequ Spark Connect.
+
+This module provides Spark Connect compatible analyzer classes that build
+protobuf messages instead of using Py4J to call Scala code directly.
+
+Example usage:
+ from pydeequ.v2.analyzers import (
+ AnalysisRunner, AnalyzerContext,
+ Size, Completeness, Mean, Maximum, Minimum
+ )
+
+ result = (AnalysisRunner(spark)
+ .onData(df)
+ .addAnalyzer(Size())
+ .addAnalyzer(Completeness("email"))
+ .addAnalyzer(Mean("amount"))
+ .run())
+
+ metrics = AnalyzerContext.successMetricsAsDataFrame(result)
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import List, Optional, Sequence, Union
+
+from pydeequ.v2.proto import deequ_connect_pb2 as proto
+
+
+class _ConnectAnalyzer(ABC):
+ """Base class for Spark Connect compatible analyzers."""
+
+ @abstractmethod
+ def to_proto(self) -> proto.AnalyzerMessage:
+ """Convert analyzer to protobuf message."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def __repr__(self) -> str:
+ raise NotImplementedError
+
+
+# ============================================================================
+# Size Analyzer
+# ============================================================================
+
+
+@dataclass
+class Size(_ConnectAnalyzer):
+ """
+ Computes the number of rows in a DataFrame.
+
+ Args:
+ where: Optional SQL WHERE clause to filter rows before counting
+
+ Example:
+ Size() # Count all rows
+ Size(where="status = 'active'") # Count only active rows
+ """
+
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="Size")
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ if self.where:
+ return f"Size(where='{self.where}')"
+ return "Size()"
+
+
+# ============================================================================
+# Completeness Analyzers
+# ============================================================================
+
+
+@dataclass
+class Completeness(_ConnectAnalyzer):
+ """
+ Computes the fraction of non-null values in a column.
+
+ Args:
+ column: Column name to analyze
+ where: Optional SQL WHERE clause to filter rows
+
+ Example:
+ Completeness("email")
+ Completeness("email", where="status = 'active'")
+ """
+
+ column: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="Completeness", column=self.column)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ if self.where:
+ return f"Completeness('{self.column}', where='{self.where}')"
+ return f"Completeness('{self.column}')"
+
+
+# ============================================================================
+# Statistical Analyzers
+# ============================================================================
+
+
+@dataclass
+class Mean(_ConnectAnalyzer):
+ """
+ Computes the mean of a numeric column.
+
+ Args:
+ column: Column name to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="Mean", column=self.column)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ if self.where:
+ return f"Mean('{self.column}', where='{self.where}')"
+ return f"Mean('{self.column}')"
+
+
+@dataclass
+class Sum(_ConnectAnalyzer):
+ """
+ Computes the sum of a numeric column.
+
+ Args:
+ column: Column name to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="Sum", column=self.column)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ if self.where:
+ return f"Sum('{self.column}', where='{self.where}')"
+ return f"Sum('{self.column}')"
+
+
+@dataclass
+class Maximum(_ConnectAnalyzer):
+ """
+ Computes the maximum value of a numeric column.
+
+ Args:
+ column: Column name to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="Maximum", column=self.column)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ if self.where:
+ return f"Maximum('{self.column}', where='{self.where}')"
+ return f"Maximum('{self.column}')"
+
+
+@dataclass
+class Minimum(_ConnectAnalyzer):
+ """
+ Computes the minimum value of a numeric column.
+
+ Args:
+ column: Column name to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="Minimum", column=self.column)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ if self.where:
+ return f"Minimum('{self.column}', where='{self.where}')"
+ return f"Minimum('{self.column}')"
+
+
+@dataclass
+class StandardDeviation(_ConnectAnalyzer):
+ """
+ Computes the standard deviation of a numeric column.
+
+ Args:
+ column: Column name to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="StandardDeviation", column=self.column)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ if self.where:
+ return f"StandardDeviation('{self.column}', where='{self.where}')"
+ return f"StandardDeviation('{self.column}')"
+
+
+# ============================================================================
+# Uniqueness Analyzers
+# ============================================================================
+
+
+@dataclass
+class Distinctness(_ConnectAnalyzer):
+ """
+ Computes the fraction of distinct values in column(s).
+
+ Args:
+ columns: Column name(s) to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ columns: Union[str, Sequence[str]]
+ where: Optional[str] = None
+
+ def __post_init__(self):
+ if isinstance(self.columns, str):
+ self.columns = [self.columns]
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="Distinctness")
+ msg.columns.extend(self.columns)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"Distinctness({self.columns})"
+
+
+@dataclass
+class Uniqueness(_ConnectAnalyzer):
+ """
+ Computes the fraction of unique values (appearing exactly once) in column(s).
+
+ Args:
+ columns: Column name(s) to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ columns: Union[str, Sequence[str]]
+ where: Optional[str] = None
+
+ def __post_init__(self):
+ if isinstance(self.columns, str):
+ self.columns = [self.columns]
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="Uniqueness")
+ msg.columns.extend(self.columns)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"Uniqueness({self.columns})"
+
+
+@dataclass
+class UniqueValueRatio(_ConnectAnalyzer):
+ """
+ Computes the ratio of unique values to total distinct values.
+
+ Args:
+ columns: Column name(s) to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ columns: Union[str, Sequence[str]]
+ where: Optional[str] = None
+
+ def __post_init__(self):
+ if isinstance(self.columns, str):
+ self.columns = [self.columns]
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="UniqueValueRatio")
+ msg.columns.extend(self.columns)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"UniqueValueRatio({self.columns})"
+
+
+@dataclass
+class CountDistinct(_ConnectAnalyzer):
+ """
+ Computes the count of distinct values in column(s).
+
+ Args:
+ columns: Column name(s) to analyze
+ """
+
+ columns: Union[str, Sequence[str]]
+
+ def __post_init__(self):
+ if isinstance(self.columns, str):
+ self.columns = [self.columns]
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="CountDistinct")
+ msg.columns.extend(self.columns)
+ return msg
+
+ def __repr__(self) -> str:
+ return f"CountDistinct({self.columns})"
+
+
+@dataclass
+class ApproxCountDistinct(_ConnectAnalyzer):
+ """
+ Computes approximate count distinct using HyperLogLog.
+
+ Args:
+ column: Column name to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="ApproxCountDistinct", column=self.column)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"ApproxCountDistinct('{self.column}')"
+
+
+# ============================================================================
+# Quantile Analyzers
+# ============================================================================
+
+
+@dataclass
+class ApproxQuantile(_ConnectAnalyzer):
+ """
+ Computes an approximate quantile of a numeric column.
+
+ Args:
+ column: Column name to analyze
+ quantile: Quantile to compute (0.0 to 1.0)
+ relative_error: Relative error tolerance (default 0.01)
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ quantile: float
+ relative_error: float = 0.01
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(
+ type="ApproxQuantile",
+ column=self.column,
+ quantile=self.quantile,
+ relative_error=self.relative_error,
+ )
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"ApproxQuantile('{self.column}', {self.quantile})"
+
+
+# ============================================================================
+# Correlation Analyzers
+# ============================================================================
+
+
+@dataclass
+class Correlation(_ConnectAnalyzer):
+ """
+ Computes Pearson correlation between two columns.
+
+ Args:
+ column1: First column name
+ column2: Second column name
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column1: str
+ column2: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="Correlation")
+ msg.columns.extend([self.column1, self.column2])
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"Correlation('{self.column1}', '{self.column2}')"
+
+
+@dataclass
+class MutualInformation(_ConnectAnalyzer):
+ """
+ Computes mutual information between columns.
+
+ Args:
+ columns: Column names to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ columns: Sequence[str]
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="MutualInformation")
+ msg.columns.extend(self.columns)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"MutualInformation({self.columns})"
+
+
+# ============================================================================
+# String Analyzers
+# ============================================================================
+
+
+@dataclass
+class MaxLength(_ConnectAnalyzer):
+ """
+ Computes the maximum string length in a column.
+
+ Args:
+ column: Column name to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="MaxLength", column=self.column)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"MaxLength('{self.column}')"
+
+
+@dataclass
+class MinLength(_ConnectAnalyzer):
+ """
+ Computes the minimum string length in a column.
+
+ Args:
+ column: Column name to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="MinLength", column=self.column)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"MinLength('{self.column}')"
+
+
+# ============================================================================
+# Pattern Analyzers
+# ============================================================================
+
+
+@dataclass
+class PatternMatch(_ConnectAnalyzer):
+ """
+ Computes the fraction of values matching a regex pattern.
+
+ Args:
+ column: Column name to analyze
+ pattern: Regex pattern to match
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ pattern: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(
+ type="PatternMatch", column=self.column, pattern=self.pattern
+ )
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"PatternMatch('{self.column}', '{self.pattern}')"
+
+
+# ============================================================================
+# Compliance Analyzer
+# ============================================================================
+
+
+@dataclass
+class Compliance(_ConnectAnalyzer):
+ """
+ Computes the fraction of rows satisfying a SQL condition.
+
+ Args:
+ instance: Name for this compliance check
+ predicate: SQL predicate (WHERE clause condition)
+ where: Optional additional SQL WHERE clause to filter rows
+ """
+
+ instance: str
+ predicate: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ # Use column for instance name and pattern for predicate
+ msg = proto.AnalyzerMessage(
+ type="Compliance", column=self.instance, pattern=self.predicate
+ )
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"Compliance('{self.instance}', '{self.predicate}')"
+
+
+# ============================================================================
+# Entropy Analyzer
+# ============================================================================
+
+
+@dataclass
+class Entropy(_ConnectAnalyzer):
+ """
+ Computes the entropy of a column.
+
+ Args:
+ column: Column name to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="Entropy", column=self.column)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"Entropy('{self.column}')"
+
+
+# ============================================================================
+# Histogram Analyzer
+# ============================================================================
+
+
+@dataclass
+class Histogram(_ConnectAnalyzer):
+ """
+ Computes histogram of values in a column.
+
+ Args:
+ column: Column name to analyze
+ max_detail_bins: Maximum number of bins for detailed output
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ max_detail_bins: Optional[int] = None
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="Histogram", column=self.column)
+ if self.max_detail_bins is not None:
+ msg.max_detail_bins = self.max_detail_bins
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"Histogram('{self.column}')"
+
+
+# ============================================================================
+# DataType Analyzer
+# ============================================================================
+
+
+@dataclass
+class DataType(_ConnectAnalyzer):
+ """
+ Analyzes the data types present in a column.
+
+ Args:
+ column: Column name to analyze
+ where: Optional SQL WHERE clause to filter rows
+ """
+
+ column: str
+ where: Optional[str] = None
+
+ def to_proto(self) -> proto.AnalyzerMessage:
+ msg = proto.AnalyzerMessage(type="DataType", column=self.column)
+ if self.where:
+ msg.where = self.where
+ return msg
+
+ def __repr__(self) -> str:
+ return f"DataType('{self.column}')"
+
+
+# Export all public symbols
+__all__ = [
+ # Base class
+ "_ConnectAnalyzer",
+ # Size
+ "Size",
+ # Completeness
+ "Completeness",
+ # Statistical
+ "Mean",
+ "Sum",
+ "Maximum",
+ "Minimum",
+ "StandardDeviation",
+ # Uniqueness
+ "Distinctness",
+ "Uniqueness",
+ "UniqueValueRatio",
+ "CountDistinct",
+ "ApproxCountDistinct",
+ # Quantile
+ "ApproxQuantile",
+ # Correlation
+ "Correlation",
+ "MutualInformation",
+ # String
+ "MaxLength",
+ "MinLength",
+ # Pattern
+ "PatternMatch",
+ # Compliance
+ "Compliance",
+ # Entropy
+ "Entropy",
+ # Histogram
+ "Histogram",
+ # DataType
+ "DataType",
+]
diff --git a/pydeequ/v2/checks.py b/pydeequ/v2/checks.py
new file mode 100644
index 0000000..2a86ba8
--- /dev/null
+++ b/pydeequ/v2/checks.py
@@ -0,0 +1,938 @@
+# -*- coding: utf-8 -*-
+"""
+Check class for Deequ Spark Connect.
+
+This module provides a Spark Connect compatible Check class that builds
+protobuf messages instead of using Py4J to call Scala code directly.
+
+Example usage:
+ from pydeequ.v2.checks import Check, CheckLevel
+ from pydeequ.v2.predicates import gte, eq, between
+
+ check = (Check(CheckLevel.Error, "Data quality check")
+ .isComplete("id")
+ .hasCompleteness("email", gte(0.95))
+ .hasSize(eq(1000))
+ .hasMean("amount", between(100, 500)))
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+from typing import List, Optional, Sequence, Union
+
+from pydeequ.v2.predicates import Predicate, is_one
+from pydeequ.v2.proto import deequ_connect_pb2 as proto
+
+
+class CheckLevel(Enum):
+ """Check severity level."""
+
+ Error = "Error"
+ Warning = "Warning"
+
+
+class Check:
+ """
+ Check class for Spark Connect - builds protobuf messages.
+
+ A Check is a collection of constraints that can be applied to a DataFrame.
+ When the Check is run, each constraint is evaluated and the results are
+ aggregated based on the Check's level (Error or Warning).
+
+ Unlike the Py4J-based Check, this class does not require a SparkSession
+ at construction time since it only builds protobuf messages.
+
+ Example:
+ check = (Check(CheckLevel.Error, "Data quality check")
+ .isComplete("id")
+ .hasCompleteness("email", gte(0.95))
+ .hasSize(eq(1000)))
+ """
+
+ def __init__(self, level: CheckLevel, description: str):
+ """
+ Create a new Check.
+
+ Args:
+ level: The severity level (Error or Warning)
+ description: Human-readable description of this check
+ """
+ self.level = level
+ self.description = description
+ self._constraints: List[proto.ConstraintMessage] = []
+
+ def _add_constraint(
+ self,
+ constraint_type: str,
+ column: Optional[str] = None,
+ columns: Optional[Sequence[str]] = None,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ where: Optional[str] = None,
+ pattern: Optional[str] = None,
+ column_condition: Optional[str] = None,
+ constraint_name: Optional[str] = None,
+ allowed_values: Optional[Sequence[str]] = None,
+ quantile: Optional[float] = None,
+ ) -> "Check":
+ """Internal method to add a constraint."""
+ constraint = proto.ConstraintMessage(type=constraint_type)
+
+ if column is not None:
+ constraint.column = column
+ if columns is not None:
+ constraint.columns.extend(columns)
+ if assertion is not None:
+ constraint.assertion.CopyFrom(assertion.to_proto())
+ if hint is not None:
+ constraint.hint = hint
+ if where is not None:
+ constraint.where = where
+ if pattern is not None:
+ constraint.pattern = pattern
+ if column_condition is not None:
+ constraint.column_condition = column_condition
+ if constraint_name is not None:
+ constraint.constraint_name = constraint_name
+ if allowed_values is not None:
+ constraint.allowed_values.extend(allowed_values)
+ if quantile is not None:
+ constraint.quantile = quantile
+
+ self._constraints.append(constraint)
+ return self
+
+ # ========================================================================
+ # Size Constraints
+ # ========================================================================
+
+ def hasSize(self, assertion: Predicate, hint: Optional[str] = None) -> "Check":
+ """
+ Check that the DataFrame has a size satisfying the assertion.
+
+ Args:
+ assertion: Predicate to apply to the row count
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.hasSize(eq(1000)) # Must have exactly 1000 rows
+ check.hasSize(gte(100)) # Must have at least 100 rows
+ """
+ return self._add_constraint("hasSize", assertion=assertion, hint=hint)
+
+ # ========================================================================
+ # Completeness Constraints
+ # ========================================================================
+
+ def isComplete(self, column: str, hint: Optional[str] = None) -> "Check":
+ """
+ Check that a column has no null values (100% complete).
+
+ Args:
+ column: Column name to check
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.isComplete("id") # id column must have no nulls
+ """
+ return self._add_constraint(
+ "isComplete", column=column, assertion=is_one(), hint=hint
+ )
+
+ def hasCompleteness(
+ self, column: str, assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that a column's completeness satisfies the assertion.
+
+ Completeness is the fraction of non-null values (0.0 to 1.0).
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to completeness value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.hasCompleteness("email", gte(0.95)) # At least 95% complete
+ """
+ return self._add_constraint(
+ "hasCompleteness", column=column, assertion=assertion, hint=hint
+ )
+
+ def areComplete(
+ self, columns: Sequence[str], hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that all specified columns have no null values.
+
+ Args:
+ columns: Column names to check
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.areComplete(["id", "name", "email"])
+ """
+ return self._add_constraint(
+ "areComplete", columns=columns, assertion=is_one(), hint=hint
+ )
+
+ def haveCompleteness(
+ self, columns: Sequence[str], assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that combined completeness of columns satisfies the assertion.
+
+ Args:
+ columns: Column names to check
+ assertion: Predicate to apply to completeness value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "haveCompleteness", columns=columns, assertion=assertion, hint=hint
+ )
+
+ # ========================================================================
+ # Uniqueness Constraints
+ # ========================================================================
+
+ def isUnique(self, column: str, hint: Optional[str] = None) -> "Check":
+ """
+ Check that a column has only unique values.
+
+ Args:
+ column: Column name to check
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.isUnique("id") # id must be unique
+ """
+ return self._add_constraint("isUnique", column=column, hint=hint)
+
+ def hasUniqueness(
+ self, columns: Sequence[str], assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that uniqueness of column(s) satisfies the assertion.
+
+ Uniqueness is the fraction of unique values.
+
+ Args:
+ columns: Column names to check
+ assertion: Predicate to apply to uniqueness value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "hasUniqueness", columns=columns, assertion=assertion, hint=hint
+ )
+
+ def hasDistinctness(
+ self, columns: Sequence[str], assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that distinctness of column(s) satisfies the assertion.
+
+ Distinctness is the fraction of distinct values.
+
+ Args:
+ columns: Column names to check
+ assertion: Predicate to apply to distinctness value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "hasDistinctness", columns=columns, assertion=assertion, hint=hint
+ )
+
+ def hasUniqueValueRatio(
+ self, columns: Sequence[str], assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that unique value ratio of column(s) satisfies the assertion.
+
+ Args:
+ columns: Column names to check
+ assertion: Predicate to apply to ratio value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "hasUniqueValueRatio", columns=columns, assertion=assertion, hint=hint
+ )
+
+ # ========================================================================
+ # Statistical Constraints
+ # ========================================================================
+
+ def hasMin(
+ self, column: str, assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that the minimum value of a column satisfies the assertion.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to minimum value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.hasMin("age", gte(0)) # Age must be non-negative
+ """
+ return self._add_constraint(
+ "hasMin", column=column, assertion=assertion, hint=hint
+ )
+
+ def hasMax(
+ self, column: str, assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that the maximum value of a column satisfies the assertion.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to maximum value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.hasMax("price", lte(10000)) # Price must be <= 10000
+ """
+ return self._add_constraint(
+ "hasMax", column=column, assertion=assertion, hint=hint
+ )
+
+ def hasMean(
+ self, column: str, assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that the mean value of a column satisfies the assertion.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to mean value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.hasMean("score", between(70, 90))
+ """
+ return self._add_constraint(
+ "hasMean", column=column, assertion=assertion, hint=hint
+ )
+
+ def hasSum(
+ self, column: str, assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that the sum of a column satisfies the assertion.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to sum value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "hasSum", column=column, assertion=assertion, hint=hint
+ )
+
+ def hasStandardDeviation(
+ self, column: str, assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that the standard deviation of a column satisfies the assertion.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to std dev value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "hasStandardDeviation", column=column, assertion=assertion, hint=hint
+ )
+
+ def hasApproxCountDistinct(
+ self, column: str, assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that the approximate count distinct satisfies the assertion.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to count distinct value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "hasApproxCountDistinct", column=column, assertion=assertion, hint=hint
+ )
+
+ def hasApproxQuantile(
+ self,
+ column: str,
+ quantile: float,
+ assertion: Predicate,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that an approximate quantile satisfies the assertion.
+
+ Args:
+ column: Column name to check
+ quantile: Quantile to compute (0.0 to 1.0)
+ assertion: Predicate to apply to quantile value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.hasApproxQuantile("income", 0.5, between(30000, 80000)) # Median
+ """
+ return self._add_constraint(
+ "hasApproxQuantile",
+ column=column,
+ quantile=quantile,
+ assertion=assertion,
+ hint=hint,
+ )
+
+ def hasCorrelation(
+ self,
+ column_a: str,
+ column_b: str,
+ assertion: Predicate,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that correlation between two columns satisfies the assertion.
+
+ Args:
+ column_a: First column name
+ column_b: Second column name
+ assertion: Predicate to apply to correlation value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "hasCorrelation",
+ columns=[column_a, column_b],
+ assertion=assertion,
+ hint=hint,
+ )
+
+ def hasEntropy(
+ self, column: str, assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that the entropy of a column satisfies the assertion.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to entropy value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "hasEntropy", column=column, assertion=assertion, hint=hint
+ )
+
+ def hasMutualInformation(
+ self,
+ column_a: str,
+ column_b: str,
+ assertion: Predicate,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that mutual information between columns satisfies the assertion.
+
+ Args:
+ column_a: First column name
+ column_b: Second column name
+ assertion: Predicate to apply to mutual information value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "hasMutualInformation",
+ columns=[column_a, column_b],
+ assertion=assertion,
+ hint=hint,
+ )
+
+ # ========================================================================
+ # String Length Constraints
+ # ========================================================================
+
+ def hasMinLength(
+ self, column: str, assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that minimum string length satisfies the assertion.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to min length value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "hasMinLength", column=column, assertion=assertion, hint=hint
+ )
+
+ def hasMaxLength(
+ self, column: str, assertion: Predicate, hint: Optional[str] = None
+ ) -> "Check":
+ """
+ Check that maximum string length satisfies the assertion.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to max length value
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "hasMaxLength", column=column, assertion=assertion, hint=hint
+ )
+
+ # ========================================================================
+ # Pattern & Content Constraints
+ # ========================================================================
+
+ def hasPattern(
+ self,
+ column: str,
+ pattern: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that values match a regex pattern.
+
+ Args:
+ column: Column name to check
+ pattern: Regex pattern to match
+ assertion: Predicate to apply to match fraction (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.hasPattern("phone", r"^\\d{3}-\\d{3}-\\d{4}$")
+ """
+ return self._add_constraint(
+ "hasPattern",
+ column=column,
+ pattern=pattern,
+ assertion=assertion or is_one(),
+ hint=hint,
+ )
+
+ def containsEmail(
+ self,
+ column: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that values contain valid email addresses.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to match fraction (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "containsEmail", column=column, assertion=assertion or is_one(), hint=hint
+ )
+
+ def containsURL(
+ self,
+ column: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that values contain valid URLs.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to match fraction (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "containsURL", column=column, assertion=assertion or is_one(), hint=hint
+ )
+
+ def containsCreditCardNumber(
+ self,
+ column: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that values contain valid credit card numbers.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to match fraction (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "containsCreditCardNumber",
+ column=column,
+ assertion=assertion or is_one(),
+ hint=hint,
+ )
+
+ def containsSocialSecurityNumber(
+ self,
+ column: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that values contain valid SSNs.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to match fraction (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "containsSocialSecurityNumber",
+ column=column,
+ assertion=assertion or is_one(),
+ hint=hint,
+ )
+
+ # ========================================================================
+ # Comparison Constraints
+ # ========================================================================
+
+ def isPositive(
+ self,
+ column: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that all values in a column are positive.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to compliance (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "isPositive", column=column, assertion=assertion or is_one(), hint=hint
+ )
+
+ def isNonNegative(
+ self,
+ column: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that all values in a column are non-negative.
+
+ Args:
+ column: Column name to check
+ assertion: Predicate to apply to compliance (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "isNonNegative", column=column, assertion=assertion or is_one(), hint=hint
+ )
+
+ def isLessThan(
+ self,
+ column_a: str,
+ column_b: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that column_a < column_b for all rows.
+
+ Args:
+ column_a: First column name
+ column_b: Second column name
+ assertion: Predicate to apply to compliance (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "isLessThan",
+ columns=[column_a, column_b],
+ assertion=assertion or is_one(),
+ hint=hint,
+ )
+
+ def isLessThanOrEqualTo(
+ self,
+ column_a: str,
+ column_b: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that column_a <= column_b for all rows.
+
+ Args:
+ column_a: First column name
+ column_b: Second column name
+ assertion: Predicate to apply to compliance (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "isLessThanOrEqualTo",
+ columns=[column_a, column_b],
+ assertion=assertion or is_one(),
+ hint=hint,
+ )
+
+ def isGreaterThan(
+ self,
+ column_a: str,
+ column_b: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that column_a > column_b for all rows.
+
+ Args:
+ column_a: First column name
+ column_b: Second column name
+ assertion: Predicate to apply to compliance (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "isGreaterThan",
+ columns=[column_a, column_b],
+ assertion=assertion or is_one(),
+ hint=hint,
+ )
+
+ def isGreaterThanOrEqualTo(
+ self,
+ column_a: str,
+ column_b: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that column_a >= column_b for all rows.
+
+ Args:
+ column_a: First column name
+ column_b: Second column name
+ assertion: Predicate to apply to compliance (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+ """
+ return self._add_constraint(
+ "isGreaterThanOrEqualTo",
+ columns=[column_a, column_b],
+ assertion=assertion or is_one(),
+ hint=hint,
+ )
+
+ def isContainedIn(
+ self,
+ column: str,
+ allowed_values: Sequence[str],
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that all values are in the allowed set.
+
+ Args:
+ column: Column name to check
+ allowed_values: List of allowed values
+ assertion: Predicate to apply to compliance (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.isContainedIn("status", ["active", "inactive", "pending"])
+ """
+ return self._add_constraint(
+ "isContainedIn",
+ column=column,
+ allowed_values=allowed_values,
+ assertion=assertion or is_one(),
+ hint=hint,
+ )
+
+ # ========================================================================
+ # Custom Constraints
+ # ========================================================================
+
+ def satisfies(
+ self,
+ column_condition: str,
+ constraint_name: str,
+ assertion: Optional[Predicate] = None,
+ hint: Optional[str] = None,
+ ) -> "Check":
+ """
+ Check that rows satisfy a SQL condition.
+
+ Args:
+ column_condition: SQL WHERE clause condition
+ constraint_name: Name for this constraint
+ assertion: Predicate to apply to compliance (default: is_one)
+ hint: Optional hint message for failures
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.satisfies("price > 0 AND quantity > 0", "positive_values")
+ """
+ return self._add_constraint(
+ "satisfies",
+ column_condition=column_condition,
+ constraint_name=constraint_name,
+ assertion=assertion or is_one(),
+ hint=hint,
+ )
+
+ # ========================================================================
+ # Filter (WHERE clause)
+ # ========================================================================
+
+ def where(self, filter_condition: str) -> "Check":
+ """
+ Apply a filter to the last added constraint.
+
+ Args:
+ filter_condition: SQL WHERE clause to filter rows
+
+ Returns:
+ self for method chaining
+
+ Example:
+ check.isComplete("email").where("status = 'active'")
+ """
+ if self._constraints:
+ self._constraints[-1].where = filter_condition
+ return self
+
+ # ========================================================================
+ # Serialization
+ # ========================================================================
+
+ def to_proto(self) -> proto.CheckMessage:
+ """
+ Convert this Check to a protobuf message.
+
+ Returns:
+ CheckMessage protobuf
+ """
+ level = (
+ proto.CheckMessage.Level.ERROR
+ if self.level == CheckLevel.Error
+ else proto.CheckMessage.Level.WARNING
+ )
+
+ check_msg = proto.CheckMessage(level=level, description=self.description)
+ check_msg.constraints.extend(self._constraints)
+
+ return check_msg
+
+ def __repr__(self) -> str:
+ return f"Check(level={self.level.value}, description='{self.description}', constraints={len(self._constraints)})"
+
+
+# Export all public symbols
+__all__ = [
+ "Check",
+ "CheckLevel",
+]
diff --git a/pydeequ/v2/predicates.py b/pydeequ/v2/predicates.py
new file mode 100644
index 0000000..adaf23d
--- /dev/null
+++ b/pydeequ/v2/predicates.py
@@ -0,0 +1,274 @@
+# -*- coding: utf-8 -*-
+"""
+Serializable predicates for Deequ Spark Connect.
+
+These predicates replace Python lambda functions that were used in the Py4J-based
+PyDeequ. Since lambdas cannot be serialized over Spark Connect's gRPC channel,
+we use these predicate classes that serialize to protobuf messages.
+
+Example usage:
+ # Old (Py4J) - NOT serializable
+ check.hasSize(lambda x: x >= 100)
+ check.hasCompleteness("col", lambda x: x >= 0.95)
+
+ # New (Spark Connect) - Serializable
+ from pydeequ.v2.predicates import gte, eq, between
+
+ check.hasSize(gte(100))
+ check.hasCompleteness("col", gte(0.95))
+ check.hasMean("amount", between(100, 200))
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Union
+
+from pydeequ.v2.proto import deequ_connect_pb2 as proto
+
+
+class Predicate(ABC):
+ """Base class for serializable predicates."""
+
+ @abstractmethod
+ def to_proto(self) -> proto.PredicateMessage:
+ """Convert predicate to protobuf message."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def __repr__(self) -> str:
+ raise NotImplementedError
+
+
+@dataclass
+class Comparison(Predicate):
+ """Comparison predicate for single-value comparisons."""
+
+ operator: proto.PredicateMessage.Operator
+ value: float
+
+ def to_proto(self) -> proto.PredicateMessage:
+ return proto.PredicateMessage(operator=self.operator, value=self.value)
+
+ def __repr__(self) -> str:
+ op_map = {
+ proto.PredicateMessage.Operator.EQ: "==",
+ proto.PredicateMessage.Operator.NE: "!=",
+ proto.PredicateMessage.Operator.GT: ">",
+ proto.PredicateMessage.Operator.GE: ">=",
+ proto.PredicateMessage.Operator.LT: "<",
+ proto.PredicateMessage.Operator.LE: "<=",
+ }
+ return f"x {op_map.get(self.operator, '?')} {self.value}"
+
+
+@dataclass
+class Between(Predicate):
+ """Between predicate for range checks (inclusive)."""
+
+ lower: float
+ upper: float
+
+ def to_proto(self) -> proto.PredicateMessage:
+ return proto.PredicateMessage(
+ operator=proto.PredicateMessage.Operator.BETWEEN,
+ lower_bound=self.lower,
+ upper_bound=self.upper,
+ )
+
+ def __repr__(self) -> str:
+ return f"{self.lower} <= x <= {self.upper}"
+
+
+# ============================================================================
+# Factory Functions - Convenient way to create predicates
+# ============================================================================
+
+
+def eq(value: Union[int, float]) -> Predicate:
+ """
+ Create an equality predicate (x == value).
+
+ Args:
+ value: The value to compare against
+
+ Returns:
+ Predicate that checks if metric equals value
+
+ Example:
+ check.hasSize(eq(100)) # size must equal 100
+ """
+ return Comparison(proto.PredicateMessage.Operator.EQ, float(value))
+
+
+def neq(value: Union[int, float]) -> Predicate:
+ """
+ Create a not-equal predicate (x != value).
+
+ Args:
+ value: The value to compare against
+
+ Returns:
+ Predicate that checks if metric does not equal value
+
+ Example:
+ check.hasSize(neq(0)) # size must not be zero
+ """
+ return Comparison(proto.PredicateMessage.Operator.NE, float(value))
+
+
+def gt(value: Union[int, float]) -> Predicate:
+ """
+ Create a greater-than predicate (x > value).
+
+ Args:
+ value: The value to compare against
+
+ Returns:
+ Predicate that checks if metric is greater than value
+
+ Example:
+ check.hasSize(gt(0)) # size must be greater than 0
+ """
+ return Comparison(proto.PredicateMessage.Operator.GT, float(value))
+
+
+def gte(value: Union[int, float]) -> Predicate:
+ """
+ Create a greater-than-or-equal predicate (x >= value).
+
+ Args:
+ value: The value to compare against
+
+ Returns:
+ Predicate that checks if metric is >= value
+
+ Example:
+ check.hasCompleteness("col", gte(0.95)) # at least 95% complete
+ """
+ return Comparison(proto.PredicateMessage.Operator.GE, float(value))
+
+
+def lt(value: Union[int, float]) -> Predicate:
+ """
+ Create a less-than predicate (x < value).
+
+ Args:
+ value: The value to compare against
+
+ Returns:
+ Predicate that checks if metric is less than value
+
+ Example:
+ check.hasMean("errors", lt(10)) # mean errors less than 10
+ """
+ return Comparison(proto.PredicateMessage.Operator.LT, float(value))
+
+
+def lte(value: Union[int, float]) -> Predicate:
+ """
+ Create a less-than-or-equal predicate (x <= value).
+
+ Args:
+ value: The value to compare against
+
+ Returns:
+ Predicate that checks if metric is <= value
+
+ Example:
+ check.hasMax("price", lte(1000)) # max price <= 1000
+ """
+ return Comparison(proto.PredicateMessage.Operator.LE, float(value))
+
+
+def between(lower: Union[int, float], upper: Union[int, float]) -> Predicate:
+ """
+ Create a between predicate (lower <= x <= upper).
+
+ Args:
+ lower: Lower bound (inclusive)
+ upper: Upper bound (inclusive)
+
+ Returns:
+ Predicate that checks if metric is within range
+
+ Example:
+ check.hasMean("age", between(18, 65)) # mean age between 18 and 65
+ """
+ return Between(float(lower), float(upper))
+
+
+def is_one() -> Predicate:
+ """
+ Create a predicate that checks if value equals 1.0.
+
+ This is the default assertion for many constraints like isComplete().
+
+ Returns:
+ Predicate that checks if metric equals 1.0
+
+ Example:
+ check.hasCompleteness("col", is_one()) # 100% complete
+ """
+ return eq(1.0)
+
+
+def is_zero() -> Predicate:
+ """
+ Create a predicate that checks if value equals 0.0.
+
+ Returns:
+ Predicate that checks if metric equals 0.0
+
+ Example:
+ check.hasMean("null_count", is_zero()) # no nulls
+ """
+ return eq(0.0)
+
+
+def is_positive() -> Predicate:
+ """
+ Create a predicate that checks if value is positive (> 0).
+
+ Returns:
+ Predicate that checks if metric is greater than 0
+
+ Example:
+ check.hasMin("quantity", is_positive()) # all quantities positive
+ """
+ return gt(0.0)
+
+
+def is_non_negative() -> Predicate:
+ """
+ Create a predicate that checks if value is non-negative (>= 0).
+
+ Returns:
+ Predicate that checks if metric is >= 0
+
+ Example:
+ check.hasMin("balance", is_non_negative()) # no negative balances
+ """
+ return gte(0.0)
+
+
+# Export all public symbols
+__all__ = [
+ # Base classes
+ "Predicate",
+ "Comparison",
+ "Between",
+ # Factory functions
+ "eq",
+ "neq",
+ "gt",
+ "gte",
+ "lt",
+ "lte",
+ "between",
+ "is_one",
+ "is_zero",
+ "is_positive",
+ "is_non_negative",
+]
diff --git a/pydeequ/v2/profiles.py b/pydeequ/v2/profiles.py
new file mode 100644
index 0000000..97f71ef
--- /dev/null
+++ b/pydeequ/v2/profiles.py
@@ -0,0 +1,282 @@
+# -*- coding: utf-8 -*-
+"""
+Column Profiler for Deequ Spark Connect.
+
+This module provides column profiling capabilities that analyze DataFrame columns
+to compute statistics like completeness, data type distribution, and optional
+KLL sketch-based quantile estimation.
+
+Example usage:
+ from pyspark.sql import SparkSession
+ from pydeequ.v2.profiles import ColumnProfilerRunner, KLLParameters
+
+ spark = SparkSession.builder.remote("sc://localhost:15002").getOrCreate()
+
+ # Basic profiling
+ profiles = (ColumnProfilerRunner(spark)
+ .onData(df)
+ .run())
+
+ # With KLL profiling for quantile estimation
+ profiles = (ColumnProfilerRunner(spark)
+ .onData(df)
+ .withKLLProfiling()
+ .setKLLParameters(KLLParameters(sketch_size=2048))
+ .run())
+
+ profiles.show() # Result is a DataFrame with one row per column
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Dict, Optional, Sequence
+
+from google.protobuf import any_pb2
+
+from pydeequ.v2.proto import deequ_connect_pb2 as proto
+from pydeequ.v2.spark_helpers import create_deequ_plan, dataframe_from_plan
+
+if TYPE_CHECKING:
+ from pyspark.sql import DataFrame, SparkSession
+
+
+@dataclass
+class KLLParameters:
+ """
+ Parameters for KLL sketch-based quantile estimation.
+
+ KLL sketches provide memory-efficient approximate quantile computation
+ for numeric columns.
+
+ Attributes:
+ sketch_size: Size of the KLL sketch (default: 2048)
+ shrinking_factor: Factor for sketch compaction (default: 0.64)
+ num_buckets: Number of histogram buckets for distribution (default: 64)
+ """
+
+ sketch_size: int = 2048
+ shrinking_factor: float = 0.64
+ num_buckets: int = 64
+
+ def to_proto(self) -> proto.KLLParameters:
+ """Convert to protobuf message."""
+ return proto.KLLParameters(
+ sketch_size=self.sketch_size,
+ shrinking_factor=self.shrinking_factor,
+ number_of_buckets=self.num_buckets,
+ )
+
+
+class ColumnProfilerRunner:
+ """
+ Entry point for running column profiling.
+
+ ColumnProfilerRunner analyzes DataFrame columns to compute statistics
+ including completeness, data type, distinct values, and optionally
+ KLL sketches for numeric columns.
+
+ Example:
+ profiles = (ColumnProfilerRunner(spark)
+ .onData(df)
+ .restrictToColumns(["col1", "col2"])
+ .withKLLProfiling()
+ .run())
+ """
+
+ def __init__(self, spark: "SparkSession"):
+ """
+ Create a new ColumnProfilerRunner.
+
+ Args:
+ spark: SparkSession (can be either local or Spark Connect)
+ """
+ self._spark = spark
+
+ def onData(self, df: "DataFrame") -> "ColumnProfilerRunBuilder":
+ """
+ Specify the DataFrame to profile.
+
+ Args:
+ df: DataFrame to profile
+
+ Returns:
+ ColumnProfilerRunBuilder for method chaining
+ """
+ return ColumnProfilerRunBuilder(self._spark, df)
+
+
+class ColumnProfilerRunBuilder:
+ """
+ Builder for configuring and executing a column profiling run.
+
+ This class collects profiling options and executes the profiling
+ when run() is called.
+ """
+
+ def __init__(self, spark: "SparkSession", df: "DataFrame"):
+ """
+ Create a new ColumnProfilerRunBuilder.
+
+ Args:
+ spark: SparkSession
+ df: DataFrame to profile
+ """
+ self._spark = spark
+ self._df = df
+ self._restrict_to_columns: Optional[Sequence[str]] = None
+ self._low_cardinality_threshold: int = 0
+ self._enable_kll: bool = False
+ self._kll_parameters: Optional[KLLParameters] = None
+ self._predefined_types: Optional[Dict[str, str]] = None
+
+ def restrictToColumns(self, columns: Sequence[str]) -> "ColumnProfilerRunBuilder":
+ """
+ Restrict profiling to specific columns.
+
+ Args:
+ columns: List of column names to profile
+
+ Returns:
+ self for method chaining
+ """
+ self._restrict_to_columns = columns
+ return self
+
+ def withLowCardinalityHistogramThreshold(
+ self, threshold: int
+ ) -> "ColumnProfilerRunBuilder":
+ """
+ Set threshold for computing histograms.
+
+ Columns with distinct values <= threshold will have histograms computed.
+
+ Args:
+ threshold: Maximum distinct values for histogram computation
+
+ Returns:
+ self for method chaining
+ """
+ self._low_cardinality_threshold = threshold
+ return self
+
+ def withKLLProfiling(self) -> "ColumnProfilerRunBuilder":
+ """
+ Enable KLL sketch profiling for numeric columns.
+
+ KLL sketches provide approximate quantile estimation.
+
+ Returns:
+ self for method chaining
+ """
+ self._enable_kll = True
+ return self
+
+ def setKLLParameters(self, params: KLLParameters) -> "ColumnProfilerRunBuilder":
+ """
+ Set KLL sketch parameters.
+
+ Args:
+ params: KLLParameters configuration
+
+ Returns:
+ self for method chaining
+ """
+ self._kll_parameters = params
+ return self
+
+ def setPredefinedTypes(
+ self, types: Dict[str, str]
+ ) -> "ColumnProfilerRunBuilder":
+ """
+ Set predefined data types for columns.
+
+ This overrides automatic type inference for specified columns.
+
+ Args:
+ types: Dictionary mapping column names to type names.
+ Supported types: "String", "Integer", "Long", "Double", "Boolean"
+
+ Returns:
+ self for method chaining
+ """
+ self._predefined_types = types
+ return self
+
+ def run(self) -> "DataFrame":
+ """
+ Execute the profiling and return results as a DataFrame.
+
+ The result DataFrame contains columns:
+ - column: Column name
+ - completeness: Non-null ratio (0.0-1.0)
+ - approx_distinct_values: Approximate cardinality
+ - data_type: Detected/provided type
+ - is_data_type_inferred: Whether type was inferred
+ - type_counts: JSON string of type counts
+ - histogram: JSON string of histogram (or null)
+ - mean, minimum, maximum, sum, std_dev: Numeric stats (null for non-numeric)
+ - approx_percentiles: JSON array of percentiles (null if not computed)
+ - kll_buckets: JSON string of KLL buckets (null if KLL disabled)
+
+ Returns:
+ DataFrame with profiling results (one row per column)
+
+ Raises:
+ RuntimeError: If the Deequ plugin is not available on the server
+ """
+ # Build the protobuf message
+ profiler_msg = self._build_profiler_message()
+
+ # V2 only supports Spark Connect
+ return self._run_via_spark_connect(profiler_msg)
+
+ def _build_profiler_message(self) -> proto.DeequColumnProfilerRelation:
+ """Build the protobuf profiler message."""
+ msg = proto.DeequColumnProfilerRelation()
+
+ # Set column restrictions
+ if self._restrict_to_columns:
+ msg.restrict_to_columns.extend(self._restrict_to_columns)
+
+ # Set histogram threshold
+ if self._low_cardinality_threshold > 0:
+ msg.low_cardinality_histogram_threshold = self._low_cardinality_threshold
+
+ # Set KLL profiling
+ msg.enable_kll_profiling = self._enable_kll
+ if self._kll_parameters:
+ msg.kll_parameters.CopyFrom(self._kll_parameters.to_proto())
+
+ # Set predefined types
+ if self._predefined_types:
+ for col, dtype in self._predefined_types.items():
+ msg.predefined_types[col] = dtype
+
+ return msg
+
+ def _run_via_spark_connect(
+ self, msg: proto.DeequColumnProfilerRelation
+ ) -> "DataFrame":
+ """Execute profiling via Spark Connect plugin."""
+ # Get the input DataFrame's plan as serialized bytes
+ input_plan = self._df._plan.to_proto(self._spark._client)
+ msg.input_relation = input_plan.root.SerializeToString()
+
+ # Wrap our Deequ message in a google.protobuf.Any
+ extension = any_pb2.Any()
+ extension.Pack(msg, type_url_prefix="type.googleapis.com")
+
+ # Create a proper LogicalPlan subclass with the extension
+ plan = create_deequ_plan(extension)
+
+ # Create DataFrame from the plan (handles Spark 3.x vs 4.x)
+ return dataframe_from_plan(plan, self._spark)
+
+
+# Export all public symbols
+__all__ = [
+ "ColumnProfilerRunner",
+ "ColumnProfilerRunBuilder",
+ "KLLParameters",
+]
diff --git a/pydeequ/v2/proto/__init__.py b/pydeequ/v2/proto/__init__.py
new file mode 100644
index 0000000..b33a48d
--- /dev/null
+++ b/pydeequ/v2/proto/__init__.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+"""
+Generated protobuf classes for Deequ Spark Connect.
+"""
+
+from pydeequ.v2.proto.deequ_connect_pb2 import (
+ AnalyzerMessage,
+ CheckMessage,
+ CheckStatus,
+ ConstraintMessage,
+ ConstraintStatus,
+ DeequAnalysisRelation,
+ DeequColumnProfilerRelation,
+ DeequConstraintSuggestionRelation,
+ DeequVerificationRelation,
+ KLLParameters,
+ MetricEntity,
+ PredicateMessage,
+ VerificationStatus,
+)
+
+__all__ = [
+ "DeequVerificationRelation",
+ "DeequAnalysisRelation",
+ "DeequColumnProfilerRelation",
+ "DeequConstraintSuggestionRelation",
+ "CheckMessage",
+ "ConstraintMessage",
+ "PredicateMessage",
+ "AnalyzerMessage",
+ "KLLParameters",
+ "VerificationStatus",
+ "CheckStatus",
+ "ConstraintStatus",
+ "MetricEntity",
+]
diff --git a/pydeequ/v2/proto/deequ_connect.proto b/pydeequ/v2/proto/deequ_connect.proto
new file mode 100644
index 0000000..e2fe1c1
--- /dev/null
+++ b/pydeequ/v2/proto/deequ_connect.proto
@@ -0,0 +1,236 @@
+/**
+ * Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"). You may not
+ * use this file except in compliance with the License. A copy of the License
+ * is located at
+ *
+ * http://aws.amazon.com/apache2.0/
+ *
+ * or in the "license" file accompanying this file. This file is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+syntax = "proto3";
+
+package com.amazon.deequ.connect;
+
+option java_multiple_files = true;
+option java_package = "com.amazon.deequ.connect.proto";
+option java_outer_classname = "DeequConnectProtos";
+
+// ============================================================================
+// Main Request Messages - Used as Spark Connect Relation Extensions
+// ============================================================================
+
+// Verification request - runs checks and returns results as DataFrame
+message DeequVerificationRelation {
+ // Reference to the input DataFrame (serialized Spark Connect Relation)
+ bytes input_relation = 1;
+
+ // Checks to run
+ repeated CheckMessage checks = 2;
+
+ // Additional analyzers to run (beyond those required by checks)
+ repeated AnalyzerMessage required_analyzers = 3;
+}
+
+// Analysis request - runs analyzers and returns metrics as DataFrame
+message DeequAnalysisRelation {
+ // Reference to the input DataFrame
+ bytes input_relation = 1;
+
+ // Analyzers to run
+ repeated AnalyzerMessage analyzers = 2;
+}
+
+// ============================================================================
+// Check Messages
+// ============================================================================
+
+// Check definition - a named collection of constraints
+message CheckMessage {
+ // Check severity level
+ enum Level {
+ ERROR = 0;
+ WARNING = 1;
+ }
+
+ Level level = 1;
+ string description = 2;
+ repeated ConstraintMessage constraints = 3;
+}
+
+// ============================================================================
+// Constraint Messages
+// ============================================================================
+
+// Constraint definition - a single data quality rule
+message ConstraintMessage {
+ // Constraint type identifier
+ string type = 1;
+
+ // Common fields
+ string column = 2; // Single column name
+ repeated string columns = 3; // Multiple column names
+ PredicateMessage assertion = 4; // Assertion predicate
+ string hint = 5; // Hint message for failures
+ string where = 6; // SQL WHERE clause filter
+
+ // Type-specific fields
+ string pattern = 7; // Regex pattern (for hasPattern, containsEmail, etc.)
+ string column_condition = 8; // SQL condition (for satisfies)
+ string constraint_name = 9; // Name for custom constraints (for satisfies)
+ repeated string allowed_values = 10; // Allowed values (for isContainedIn)
+
+ // Numeric parameters
+ double quantile = 11; // For hasApproxQuantile
+}
+
+// ============================================================================
+// Predicate Messages - Replaces Python Lambda Assertions
+// ============================================================================
+
+// Predicate for numeric assertions
+message PredicateMessage {
+ enum Operator {
+ UNSPECIFIED = 0; // Default/unset - used to detect "no predicate" vs "EQ 0.0"
+ EQ = 1; // ==
+ NE = 2; // !=
+ GT = 3; // >
+ GE = 4; // >=
+ LT = 5; // <
+ LE = 6; // <=
+ BETWEEN = 7; // lower <= x <= upper
+ }
+
+ Operator operator = 1;
+ double value = 2; // For comparison operators
+ double lower_bound = 3; // For BETWEEN
+ double upper_bound = 4; // For BETWEEN
+}
+
+// ============================================================================
+// Analyzer Messages
+// ============================================================================
+
+// Analyzer definition - computes a metric on data
+message AnalyzerMessage {
+ // Analyzer type identifier
+ string type = 1;
+
+ // Common fields
+ string column = 2; // Single column name
+ repeated string columns = 3; // Multiple column names
+ string where = 4; // SQL WHERE clause filter
+
+ // Type-specific parameters
+ double quantile = 5; // For ApproxQuantile
+ double relative_error = 6; // For ApproxQuantile, ApproxCountDistinct
+ string pattern = 7; // For PatternMatch
+ int32 max_detail_bins = 8; // For Histogram
+
+ // KLL Sketch parameters
+ KLLParameters kll_parameters = 9;
+}
+
+// Parameters for KLL Sketch analyzer
+message KLLParameters {
+ int32 sketch_size = 1;
+ double shrinking_factor = 2;
+ int32 number_of_buckets = 3;
+}
+
+// ============================================================================
+// Result Messages
+// ============================================================================
+
+// Verification result status
+enum VerificationStatus {
+ VERIFICATION_SUCCESS = 0;
+ VERIFICATION_WARNING = 1;
+ VERIFICATION_ERROR = 2;
+}
+
+// Check result status
+enum CheckStatus {
+ CHECK_SUCCESS = 0;
+ CHECK_WARNING = 1;
+ CHECK_ERROR = 2;
+}
+
+// Constraint result status
+enum ConstraintStatus {
+ CONSTRAINT_SUCCESS = 0;
+ CONSTRAINT_FAILURE = 1;
+}
+
+// Metric entity type
+enum MetricEntity {
+ DATASET = 0;
+ COLUMN = 1;
+ MULTICOLUMN = 2;
+}
+
+// ============================================================================
+// Column Profiler Messages
+// ============================================================================
+
+// Column profiler request - analyzes column distributions and statistics
+message DeequColumnProfilerRelation {
+ // Reference to the input DataFrame (serialized Spark Connect Relation)
+ bytes input_relation = 1;
+
+ // Restrict profiling to specific columns (empty = all columns)
+ repeated string restrict_to_columns = 2;
+
+ // Threshold for computing histograms (columns with distinct values <= threshold get histograms)
+ int32 low_cardinality_histogram_threshold = 3;
+
+ // Enable KLL sketch profiling for approximate quantiles
+ bool enable_kll_profiling = 4;
+
+ // KLL sketch parameters (only used if enable_kll_profiling is true)
+ KLLParameters kll_parameters = 5;
+
+ // Predefined data types for columns (column_name -> type_name)
+ // Supported types: "String", "Integer", "Long", "Double", "Boolean"
+ map predefined_types = 6;
+}
+
+// ============================================================================
+// Constraint Suggestion Messages
+// ============================================================================
+
+// Constraint suggestion request - auto-generates data quality rules
+message DeequConstraintSuggestionRelation {
+ // Reference to the input DataFrame (serialized Spark Connect Relation)
+ bytes input_relation = 1;
+
+ // Constraint rule sets to apply
+ // Values: "DEFAULT", "STRING", "NUMERICAL", "COMMON", "EXTENDED"
+ repeated string constraint_rules = 2;
+
+ // Restrict suggestions to specific columns (empty = all columns)
+ repeated string restrict_to_columns = 3;
+
+ // Threshold for computing histograms
+ int32 low_cardinality_histogram_threshold = 4;
+
+ // Enable KLL sketch profiling
+ bool enable_kll_profiling = 5;
+
+ // KLL sketch parameters
+ KLLParameters kll_parameters = 6;
+
+ // Predefined data types for columns
+ map predefined_types = 7;
+
+ // Train/test split ratio (0.0 = disabled, 0.0-1.0 = ratio for test set)
+ double testset_ratio = 8;
+
+ // Random seed for train/test split (0 = no seed)
+ int64 testset_split_random_seed = 9;
+}
diff --git a/pydeequ/v2/proto/deequ_connect_pb2.py b/pydeequ/v2/proto/deequ_connect_pb2.py
new file mode 100644
index 0000000..61aadf3
--- /dev/null
+++ b/pydeequ/v2/proto/deequ_connect_pb2.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
+# source: deequ_connect.proto
+# Protobuf Python Version: 6.33.2
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
+_runtime_version.ValidateProtobufRuntimeVersion(
+ _runtime_version.Domain.PUBLIC,
+ 6,
+ 33,
+ 2,
+ '',
+ 'deequ_connect.proto'
+)
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13\x64\x65\x65qu_connect.proto\x12\x18\x63om.amazon.deequ.connect\"\xb2\x01\n\x19\x44\x65\x65quVerificationRelation\x12\x16\n\x0einput_relation\x18\x01 \x01(\x0c\x12\x36\n\x06\x63hecks\x18\x02 \x03(\x0b\x32&.com.amazon.deequ.connect.CheckMessage\x12\x45\n\x12required_analyzers\x18\x03 \x03(\x0b\x32).com.amazon.deequ.connect.AnalyzerMessage\"m\n\x15\x44\x65\x65quAnalysisRelation\x12\x16\n\x0einput_relation\x18\x01 \x01(\x0c\x12<\n\tanalyzers\x18\x02 \x03(\x0b\x32).com.amazon.deequ.connect.AnalyzerMessage\"\xc3\x01\n\x0c\x43heckMessage\x12;\n\x05level\x18\x01 \x01(\x0e\x32,.com.amazon.deequ.connect.CheckMessage.Level\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12@\n\x0b\x63onstraints\x18\x03 \x03(\x0b\x32+.com.amazon.deequ.connect.ConstraintMessage\"\x1f\n\x05Level\x12\t\n\x05\x45RROR\x10\x00\x12\x0b\n\x07WARNING\x10\x01\"\x8c\x02\n\x11\x43onstraintMessage\x12\x0c\n\x04type\x18\x01 \x01(\t\x12\x0e\n\x06\x63olumn\x18\x02 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x03 \x03(\t\x12=\n\tassertion\x18\x04 \x01(\x0b\x32*.com.amazon.deequ.connect.PredicateMessage\x12\x0c\n\x04hint\x18\x05 \x01(\t\x12\r\n\x05where\x18\x06 \x01(\t\x12\x0f\n\x07pattern\x18\x07 \x01(\t\x12\x18\n\x10\x63olumn_condition\x18\x08 \x01(\t\x12\x17\n\x0f\x63onstraint_name\x18\t \x01(\t\x12\x16\n\x0e\x61llowed_values\x18\n \x03(\t\x12\x10\n\x08quantile\x18\x0b \x01(\x01\"\xec\x01\n\x10PredicateMessage\x12\x45\n\x08operator\x18\x01 \x01(\x0e\x32\x33.com.amazon.deequ.connect.PredicateMessage.Operator\x12\r\n\x05value\x18\x02 \x01(\x01\x12\x13\n\x0blower_bound\x18\x03 \x01(\x01\x12\x13\n\x0bupper_bound\x18\x04 \x01(\x01\"X\n\x08Operator\x12\x0f\n\x0bUNSPECIFIED\x10\x00\x12\x06\n\x02\x45Q\x10\x01\x12\x06\n\x02NE\x10\x02\x12\x06\n\x02GT\x10\x03\x12\x06\n\x02GE\x10\x04\x12\x06\n\x02LT\x10\x05\x12\x06\n\x02LE\x10\x06\x12\x0b\n\x07\x42\x45TWEEN\x10\x07\"\xe4\x01\n\x0f\x41nalyzerMessage\x12\x0c\n\x04type\x18\x01 \x01(\t\x12\x0e\n\x06\x63olumn\x18\x02 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x03 \x03(\t\x12\r\n\x05where\x18\x04 \x01(\t\x12\x10\n\x08quantile\x18\x05 \x01(\x01\x12\x16\n\x0erelative_error\x18\x06 \x01(\x01\x12\x0f\n\x07pattern\x18\x07 \x01(\t\x12\x17\n\x0fmax_detail_bins\x18\x08 \x01(\x05\x12?\n\x0ekll_parameters\x18\t \x01(\x0b\x32\'.com.amazon.deequ.connect.KLLParameters\"Y\n\rKLLParameters\x12\x13\n\x0bsketch_size\x18\x01 \x01(\x05\x12\x18\n\x10shrinking_factor\x18\x02 \x01(\x01\x12\x19\n\x11number_of_buckets\x18\x03 \x01(\x05\"\xfc\x02\n\x1b\x44\x65\x65quColumnProfilerRelation\x12\x16\n\x0einput_relation\x18\x01 \x01(\x0c\x12\x1b\n\x13restrict_to_columns\x18\x02 \x03(\t\x12+\n#low_cardinality_histogram_threshold\x18\x03 \x01(\x05\x12\x1c\n\x14\x65nable_kll_profiling\x18\x04 \x01(\x08\x12?\n\x0ekll_parameters\x18\x05 \x01(\x0b\x32\'.com.amazon.deequ.connect.KLLParameters\x12\x64\n\x10predefined_types\x18\x06 \x03(\x0b\x32J.com.amazon.deequ.connect.DeequColumnProfilerRelation.PredefinedTypesEntry\x1a\x36\n\x14PredefinedTypesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xdc\x03\n!DeequConstraintSuggestionRelation\x12\x16\n\x0einput_relation\x18\x01 \x01(\x0c\x12\x18\n\x10\x63onstraint_rules\x18\x02 \x03(\t\x12\x1b\n\x13restrict_to_columns\x18\x03 \x03(\t\x12+\n#low_cardinality_histogram_threshold\x18\x04 \x01(\x05\x12\x1c\n\x14\x65nable_kll_profiling\x18\x05 \x01(\x08\x12?\n\x0ekll_parameters\x18\x06 \x01(\x0b\x32\'.com.amazon.deequ.connect.KLLParameters\x12j\n\x10predefined_types\x18\x07 \x03(\x0b\x32P.com.amazon.deequ.connect.DeequConstraintSuggestionRelation.PredefinedTypesEntry\x12\x15\n\rtestset_ratio\x18\x08 \x01(\x01\x12!\n\x19testset_split_random_seed\x18\t \x01(\x03\x1a\x36\n\x14PredefinedTypesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01*`\n\x12VerificationStatus\x12\x18\n\x14VERIFICATION_SUCCESS\x10\x00\x12\x18\n\x14VERIFICATION_WARNING\x10\x01\x12\x16\n\x12VERIFICATION_ERROR\x10\x02*D\n\x0b\x43heckStatus\x12\x11\n\rCHECK_SUCCESS\x10\x00\x12\x11\n\rCHECK_WARNING\x10\x01\x12\x0f\n\x0b\x43HECK_ERROR\x10\x02*B\n\x10\x43onstraintStatus\x12\x16\n\x12\x43ONSTRAINT_SUCCESS\x10\x00\x12\x16\n\x12\x43ONSTRAINT_FAILURE\x10\x01*8\n\x0cMetricEntity\x12\x0b\n\x07\x44\x41TASET\x10\x00\x12\n\n\x06\x43OLUMN\x10\x01\x12\x0f\n\x0bMULTICOLUMN\x10\x02\x42\x36\n\x1e\x63om.amazon.deequ.connect.protoB\x12\x44\x65\x65quConnectProtosP\x01\x62\x06proto3')
+
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'deequ_connect_pb2', _globals)
+if not _descriptor._USE_C_DESCRIPTORS:
+ _globals['DESCRIPTOR']._loaded_options = None
+ _globals['DESCRIPTOR']._serialized_options = b'\n\036com.amazon.deequ.connect.protoB\022DeequConnectProtosP\001'
+ _globals['_DEEQUCOLUMNPROFILERRELATION_PREDEFINEDTYPESENTRY']._loaded_options = None
+ _globals['_DEEQUCOLUMNPROFILERRELATION_PREDEFINEDTYPESENTRY']._serialized_options = b'8\001'
+ _globals['_DEEQUCONSTRAINTSUGGESTIONRELATION_PREDEFINEDTYPESENTRY']._loaded_options = None
+ _globals['_DEEQUCONSTRAINTSUGGESTIONRELATION_PREDEFINEDTYPESENTRY']._serialized_options = b'8\001'
+ _globals['_VERIFICATIONSTATUS']._serialized_start=2233
+ _globals['_VERIFICATIONSTATUS']._serialized_end=2329
+ _globals['_CHECKSTATUS']._serialized_start=2331
+ _globals['_CHECKSTATUS']._serialized_end=2399
+ _globals['_CONSTRAINTSTATUS']._serialized_start=2401
+ _globals['_CONSTRAINTSTATUS']._serialized_end=2467
+ _globals['_METRICENTITY']._serialized_start=2469
+ _globals['_METRICENTITY']._serialized_end=2525
+ _globals['_DEEQUVERIFICATIONRELATION']._serialized_start=50
+ _globals['_DEEQUVERIFICATIONRELATION']._serialized_end=228
+ _globals['_DEEQUANALYSISRELATION']._serialized_start=230
+ _globals['_DEEQUANALYSISRELATION']._serialized_end=339
+ _globals['_CHECKMESSAGE']._serialized_start=342
+ _globals['_CHECKMESSAGE']._serialized_end=537
+ _globals['_CHECKMESSAGE_LEVEL']._serialized_start=506
+ _globals['_CHECKMESSAGE_LEVEL']._serialized_end=537
+ _globals['_CONSTRAINTMESSAGE']._serialized_start=540
+ _globals['_CONSTRAINTMESSAGE']._serialized_end=808
+ _globals['_PREDICATEMESSAGE']._serialized_start=811
+ _globals['_PREDICATEMESSAGE']._serialized_end=1047
+ _globals['_PREDICATEMESSAGE_OPERATOR']._serialized_start=959
+ _globals['_PREDICATEMESSAGE_OPERATOR']._serialized_end=1047
+ _globals['_ANALYZERMESSAGE']._serialized_start=1050
+ _globals['_ANALYZERMESSAGE']._serialized_end=1278
+ _globals['_KLLPARAMETERS']._serialized_start=1280
+ _globals['_KLLPARAMETERS']._serialized_end=1369
+ _globals['_DEEQUCOLUMNPROFILERRELATION']._serialized_start=1372
+ _globals['_DEEQUCOLUMNPROFILERRELATION']._serialized_end=1752
+ _globals['_DEEQUCOLUMNPROFILERRELATION_PREDEFINEDTYPESENTRY']._serialized_start=1698
+ _globals['_DEEQUCOLUMNPROFILERRELATION_PREDEFINEDTYPESENTRY']._serialized_end=1752
+ _globals['_DEEQUCONSTRAINTSUGGESTIONRELATION']._serialized_start=1755
+ _globals['_DEEQUCONSTRAINTSUGGESTIONRELATION']._serialized_end=2231
+ _globals['_DEEQUCONSTRAINTSUGGESTIONRELATION_PREDEFINEDTYPESENTRY']._serialized_start=1698
+ _globals['_DEEQUCONSTRAINTSUGGESTIONRELATION_PREDEFINEDTYPESENTRY']._serialized_end=1752
+# @@protoc_insertion_point(module_scope)
diff --git a/pydeequ/v2/proto/deequ_connect_pb2.pyi b/pydeequ/v2/proto/deequ_connect_pb2.pyi
new file mode 100644
index 0000000..b46b22f
--- /dev/null
+++ b/pydeequ/v2/proto/deequ_connect_pb2.pyi
@@ -0,0 +1,216 @@
+from google.protobuf.internal import containers as _containers
+from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from collections.abc import Iterable as _Iterable, Mapping as _Mapping
+from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union
+
+DESCRIPTOR: _descriptor.FileDescriptor
+
+class VerificationStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
+ __slots__ = ()
+ VERIFICATION_SUCCESS: _ClassVar[VerificationStatus]
+ VERIFICATION_WARNING: _ClassVar[VerificationStatus]
+ VERIFICATION_ERROR: _ClassVar[VerificationStatus]
+
+class CheckStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
+ __slots__ = ()
+ CHECK_SUCCESS: _ClassVar[CheckStatus]
+ CHECK_WARNING: _ClassVar[CheckStatus]
+ CHECK_ERROR: _ClassVar[CheckStatus]
+
+class ConstraintStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
+ __slots__ = ()
+ CONSTRAINT_SUCCESS: _ClassVar[ConstraintStatus]
+ CONSTRAINT_FAILURE: _ClassVar[ConstraintStatus]
+
+class MetricEntity(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
+ __slots__ = ()
+ DATASET: _ClassVar[MetricEntity]
+ COLUMN: _ClassVar[MetricEntity]
+ MULTICOLUMN: _ClassVar[MetricEntity]
+VERIFICATION_SUCCESS: VerificationStatus
+VERIFICATION_WARNING: VerificationStatus
+VERIFICATION_ERROR: VerificationStatus
+CHECK_SUCCESS: CheckStatus
+CHECK_WARNING: CheckStatus
+CHECK_ERROR: CheckStatus
+CONSTRAINT_SUCCESS: ConstraintStatus
+CONSTRAINT_FAILURE: ConstraintStatus
+DATASET: MetricEntity
+COLUMN: MetricEntity
+MULTICOLUMN: MetricEntity
+
+class DeequVerificationRelation(_message.Message):
+ __slots__ = ()
+ INPUT_RELATION_FIELD_NUMBER: _ClassVar[int]
+ CHECKS_FIELD_NUMBER: _ClassVar[int]
+ REQUIRED_ANALYZERS_FIELD_NUMBER: _ClassVar[int]
+ input_relation: bytes
+ checks: _containers.RepeatedCompositeFieldContainer[CheckMessage]
+ required_analyzers: _containers.RepeatedCompositeFieldContainer[AnalyzerMessage]
+ def __init__(self, input_relation: _Optional[bytes] = ..., checks: _Optional[_Iterable[_Union[CheckMessage, _Mapping]]] = ..., required_analyzers: _Optional[_Iterable[_Union[AnalyzerMessage, _Mapping]]] = ...) -> None: ...
+
+class DeequAnalysisRelation(_message.Message):
+ __slots__ = ()
+ INPUT_RELATION_FIELD_NUMBER: _ClassVar[int]
+ ANALYZERS_FIELD_NUMBER: _ClassVar[int]
+ input_relation: bytes
+ analyzers: _containers.RepeatedCompositeFieldContainer[AnalyzerMessage]
+ def __init__(self, input_relation: _Optional[bytes] = ..., analyzers: _Optional[_Iterable[_Union[AnalyzerMessage, _Mapping]]] = ...) -> None: ...
+
+class CheckMessage(_message.Message):
+ __slots__ = ()
+ class Level(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
+ __slots__ = ()
+ ERROR: _ClassVar[CheckMessage.Level]
+ WARNING: _ClassVar[CheckMessage.Level]
+ ERROR: CheckMessage.Level
+ WARNING: CheckMessage.Level
+ LEVEL_FIELD_NUMBER: _ClassVar[int]
+ DESCRIPTION_FIELD_NUMBER: _ClassVar[int]
+ CONSTRAINTS_FIELD_NUMBER: _ClassVar[int]
+ level: CheckMessage.Level
+ description: str
+ constraints: _containers.RepeatedCompositeFieldContainer[ConstraintMessage]
+ def __init__(self, level: _Optional[_Union[CheckMessage.Level, str]] = ..., description: _Optional[str] = ..., constraints: _Optional[_Iterable[_Union[ConstraintMessage, _Mapping]]] = ...) -> None: ...
+
+class ConstraintMessage(_message.Message):
+ __slots__ = ()
+ TYPE_FIELD_NUMBER: _ClassVar[int]
+ COLUMN_FIELD_NUMBER: _ClassVar[int]
+ COLUMNS_FIELD_NUMBER: _ClassVar[int]
+ ASSERTION_FIELD_NUMBER: _ClassVar[int]
+ HINT_FIELD_NUMBER: _ClassVar[int]
+ WHERE_FIELD_NUMBER: _ClassVar[int]
+ PATTERN_FIELD_NUMBER: _ClassVar[int]
+ COLUMN_CONDITION_FIELD_NUMBER: _ClassVar[int]
+ CONSTRAINT_NAME_FIELD_NUMBER: _ClassVar[int]
+ ALLOWED_VALUES_FIELD_NUMBER: _ClassVar[int]
+ QUANTILE_FIELD_NUMBER: _ClassVar[int]
+ type: str
+ column: str
+ columns: _containers.RepeatedScalarFieldContainer[str]
+ assertion: PredicateMessage
+ hint: str
+ where: str
+ pattern: str
+ column_condition: str
+ constraint_name: str
+ allowed_values: _containers.RepeatedScalarFieldContainer[str]
+ quantile: float
+ def __init__(self, type: _Optional[str] = ..., column: _Optional[str] = ..., columns: _Optional[_Iterable[str]] = ..., assertion: _Optional[_Union[PredicateMessage, _Mapping]] = ..., hint: _Optional[str] = ..., where: _Optional[str] = ..., pattern: _Optional[str] = ..., column_condition: _Optional[str] = ..., constraint_name: _Optional[str] = ..., allowed_values: _Optional[_Iterable[str]] = ..., quantile: _Optional[float] = ...) -> None: ...
+
+class PredicateMessage(_message.Message):
+ __slots__ = ()
+ class Operator(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
+ __slots__ = ()
+ UNSPECIFIED: _ClassVar[PredicateMessage.Operator]
+ EQ: _ClassVar[PredicateMessage.Operator]
+ NE: _ClassVar[PredicateMessage.Operator]
+ GT: _ClassVar[PredicateMessage.Operator]
+ GE: _ClassVar[PredicateMessage.Operator]
+ LT: _ClassVar[PredicateMessage.Operator]
+ LE: _ClassVar[PredicateMessage.Operator]
+ BETWEEN: _ClassVar[PredicateMessage.Operator]
+ UNSPECIFIED: PredicateMessage.Operator
+ EQ: PredicateMessage.Operator
+ NE: PredicateMessage.Operator
+ GT: PredicateMessage.Operator
+ GE: PredicateMessage.Operator
+ LT: PredicateMessage.Operator
+ LE: PredicateMessage.Operator
+ BETWEEN: PredicateMessage.Operator
+ OPERATOR_FIELD_NUMBER: _ClassVar[int]
+ VALUE_FIELD_NUMBER: _ClassVar[int]
+ LOWER_BOUND_FIELD_NUMBER: _ClassVar[int]
+ UPPER_BOUND_FIELD_NUMBER: _ClassVar[int]
+ operator: PredicateMessage.Operator
+ value: float
+ lower_bound: float
+ upper_bound: float
+ def __init__(self, operator: _Optional[_Union[PredicateMessage.Operator, str]] = ..., value: _Optional[float] = ..., lower_bound: _Optional[float] = ..., upper_bound: _Optional[float] = ...) -> None: ...
+
+class AnalyzerMessage(_message.Message):
+ __slots__ = ()
+ TYPE_FIELD_NUMBER: _ClassVar[int]
+ COLUMN_FIELD_NUMBER: _ClassVar[int]
+ COLUMNS_FIELD_NUMBER: _ClassVar[int]
+ WHERE_FIELD_NUMBER: _ClassVar[int]
+ QUANTILE_FIELD_NUMBER: _ClassVar[int]
+ RELATIVE_ERROR_FIELD_NUMBER: _ClassVar[int]
+ PATTERN_FIELD_NUMBER: _ClassVar[int]
+ MAX_DETAIL_BINS_FIELD_NUMBER: _ClassVar[int]
+ KLL_PARAMETERS_FIELD_NUMBER: _ClassVar[int]
+ type: str
+ column: str
+ columns: _containers.RepeatedScalarFieldContainer[str]
+ where: str
+ quantile: float
+ relative_error: float
+ pattern: str
+ max_detail_bins: int
+ kll_parameters: KLLParameters
+ def __init__(self, type: _Optional[str] = ..., column: _Optional[str] = ..., columns: _Optional[_Iterable[str]] = ..., where: _Optional[str] = ..., quantile: _Optional[float] = ..., relative_error: _Optional[float] = ..., pattern: _Optional[str] = ..., max_detail_bins: _Optional[int] = ..., kll_parameters: _Optional[_Union[KLLParameters, _Mapping]] = ...) -> None: ...
+
+class KLLParameters(_message.Message):
+ __slots__ = ()
+ SKETCH_SIZE_FIELD_NUMBER: _ClassVar[int]
+ SHRINKING_FACTOR_FIELD_NUMBER: _ClassVar[int]
+ NUMBER_OF_BUCKETS_FIELD_NUMBER: _ClassVar[int]
+ sketch_size: int
+ shrinking_factor: float
+ number_of_buckets: int
+ def __init__(self, sketch_size: _Optional[int] = ..., shrinking_factor: _Optional[float] = ..., number_of_buckets: _Optional[int] = ...) -> None: ...
+
+class DeequColumnProfilerRelation(_message.Message):
+ __slots__ = ()
+ class PredefinedTypesEntry(_message.Message):
+ __slots__ = ()
+ KEY_FIELD_NUMBER: _ClassVar[int]
+ VALUE_FIELD_NUMBER: _ClassVar[int]
+ key: str
+ value: str
+ def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
+ INPUT_RELATION_FIELD_NUMBER: _ClassVar[int]
+ RESTRICT_TO_COLUMNS_FIELD_NUMBER: _ClassVar[int]
+ LOW_CARDINALITY_HISTOGRAM_THRESHOLD_FIELD_NUMBER: _ClassVar[int]
+ ENABLE_KLL_PROFILING_FIELD_NUMBER: _ClassVar[int]
+ KLL_PARAMETERS_FIELD_NUMBER: _ClassVar[int]
+ PREDEFINED_TYPES_FIELD_NUMBER: _ClassVar[int]
+ input_relation: bytes
+ restrict_to_columns: _containers.RepeatedScalarFieldContainer[str]
+ low_cardinality_histogram_threshold: int
+ enable_kll_profiling: bool
+ kll_parameters: KLLParameters
+ predefined_types: _containers.ScalarMap[str, str]
+ def __init__(self, input_relation: _Optional[bytes] = ..., restrict_to_columns: _Optional[_Iterable[str]] = ..., low_cardinality_histogram_threshold: _Optional[int] = ..., enable_kll_profiling: _Optional[bool] = ..., kll_parameters: _Optional[_Union[KLLParameters, _Mapping]] = ..., predefined_types: _Optional[_Mapping[str, str]] = ...) -> None: ...
+
+class DeequConstraintSuggestionRelation(_message.Message):
+ __slots__ = ()
+ class PredefinedTypesEntry(_message.Message):
+ __slots__ = ()
+ KEY_FIELD_NUMBER: _ClassVar[int]
+ VALUE_FIELD_NUMBER: _ClassVar[int]
+ key: str
+ value: str
+ def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
+ INPUT_RELATION_FIELD_NUMBER: _ClassVar[int]
+ CONSTRAINT_RULES_FIELD_NUMBER: _ClassVar[int]
+ RESTRICT_TO_COLUMNS_FIELD_NUMBER: _ClassVar[int]
+ LOW_CARDINALITY_HISTOGRAM_THRESHOLD_FIELD_NUMBER: _ClassVar[int]
+ ENABLE_KLL_PROFILING_FIELD_NUMBER: _ClassVar[int]
+ KLL_PARAMETERS_FIELD_NUMBER: _ClassVar[int]
+ PREDEFINED_TYPES_FIELD_NUMBER: _ClassVar[int]
+ TESTSET_RATIO_FIELD_NUMBER: _ClassVar[int]
+ TESTSET_SPLIT_RANDOM_SEED_FIELD_NUMBER: _ClassVar[int]
+ input_relation: bytes
+ constraint_rules: _containers.RepeatedScalarFieldContainer[str]
+ restrict_to_columns: _containers.RepeatedScalarFieldContainer[str]
+ low_cardinality_histogram_threshold: int
+ enable_kll_profiling: bool
+ kll_parameters: KLLParameters
+ predefined_types: _containers.ScalarMap[str, str]
+ testset_ratio: float
+ testset_split_random_seed: int
+ def __init__(self, input_relation: _Optional[bytes] = ..., constraint_rules: _Optional[_Iterable[str]] = ..., restrict_to_columns: _Optional[_Iterable[str]] = ..., low_cardinality_histogram_threshold: _Optional[int] = ..., enable_kll_profiling: _Optional[bool] = ..., kll_parameters: _Optional[_Union[KLLParameters, _Mapping]] = ..., predefined_types: _Optional[_Mapping[str, str]] = ..., testset_ratio: _Optional[float] = ..., testset_split_random_seed: _Optional[int] = ...) -> None: ...
diff --git a/pydeequ/v2/spark_helpers.py b/pydeequ/v2/spark_helpers.py
new file mode 100644
index 0000000..56c72e2
--- /dev/null
+++ b/pydeequ/v2/spark_helpers.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+"""
+Spark helper functions for PyDeequ v2.
+
+This module provides helper functions for working with Spark Connect,
+including compatibility shims for different Spark versions.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from google.protobuf import any_pb2
+
+if TYPE_CHECKING:
+ from pyspark.sql import DataFrame, SparkSession
+ from pyspark.sql.connect.plan import LogicalPlan
+
+
+def dataframe_from_plan(plan: "LogicalPlan", session: "SparkSession") -> "DataFrame":
+ """
+ Create a DataFrame from a LogicalPlan, handling Spark version differences.
+
+ Spark 3.x uses DataFrame.withPlan(plan, session)
+ Spark 4.x uses DataFrame(plan, session)
+
+ Args:
+ plan: LogicalPlan to create DataFrame from
+ session: SparkSession
+
+ Returns:
+ DataFrame wrapping the plan
+ """
+ from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
+
+ if hasattr(ConnectDataFrame, "withPlan"):
+ # Spark 3.x
+ return ConnectDataFrame.withPlan(plan, session=session)
+
+ # Spark 4.x
+ return ConnectDataFrame(plan, session)
+
+
+def create_deequ_plan(extension: any_pb2.Any) -> "LogicalPlan":
+ """
+ Create a LogicalPlan subclass for Deequ that properly integrates with PySpark.
+
+ We dynamically import and subclass LogicalPlan to avoid import issues
+ when Spark Connect is not available.
+
+ Args:
+ extension: Protobuf Any message containing the Deequ operation
+
+ Returns:
+ LogicalPlan instance for the Deequ operation
+ """
+ import pyspark.sql.connect.proto as spark_proto
+ from pyspark.sql.connect.plan import LogicalPlan
+
+ class _DeequExtensionPlan(LogicalPlan):
+ """
+ Custom LogicalPlan for Deequ operations via Spark Connect.
+
+ This plan wraps our protobuf message as a Relation extension,
+ which is sent to the server and handled by DeequRelationPlugin.
+ """
+
+ def __init__(self, ext: any_pb2.Any):
+ # Pass None as child - this is a leaf node
+ super().__init__(child=None)
+ self._extension = ext
+
+ def plan(self, session) -> spark_proto.Relation:
+ """Return the Relation proto for this plan."""
+ rel = self._create_proto_relation()
+ rel.extension.CopyFrom(self._extension)
+ return rel
+
+ def __repr__(self) -> str:
+ return "DeequExtensionPlan"
+
+ return _DeequExtensionPlan(extension)
+
+
+__all__ = [
+ "dataframe_from_plan",
+ "create_deequ_plan",
+]
diff --git a/pydeequ/v2/suggestions.py b/pydeequ/v2/suggestions.py
new file mode 100644
index 0000000..b89b07b
--- /dev/null
+++ b/pydeequ/v2/suggestions.py
@@ -0,0 +1,340 @@
+# -*- coding: utf-8 -*-
+"""
+Constraint Suggestions for Deequ Spark Connect.
+
+This module provides automatic constraint suggestion capabilities that analyze
+DataFrame columns and suggest appropriate data quality constraints based on
+the data characteristics.
+
+Example usage:
+ from pyspark.sql import SparkSession
+ from pydeequ.v2.suggestions import ConstraintSuggestionRunner, Rules
+
+ spark = SparkSession.builder.remote("sc://localhost:15002").getOrCreate()
+
+ # Basic suggestions with default rules
+ suggestions = (ConstraintSuggestionRunner(spark)
+ .onData(df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run())
+
+ # With train/test evaluation
+ suggestions = (ConstraintSuggestionRunner(spark)
+ .onData(df)
+ .addConstraintRules(Rules.EXTENDED)
+ .useTrainTestSplitWithTestsetRatio(0.2, seed=42)
+ .run())
+
+ suggestions.show() # Result is a DataFrame with suggested constraints
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+from typing import TYPE_CHECKING, Dict, List, Optional, Sequence
+
+from google.protobuf import any_pb2
+
+from pydeequ.v2.profiles import KLLParameters
+from pydeequ.v2.proto import deequ_connect_pb2 as proto
+from pydeequ.v2.spark_helpers import create_deequ_plan, dataframe_from_plan
+
+if TYPE_CHECKING:
+ from pyspark.sql import DataFrame, SparkSession
+
+
+class Rules(Enum):
+ """
+ Constraint suggestion rule sets.
+
+ Different rule sets analyze different aspects of the data:
+
+ - DEFAULT: Core rules for completeness, type retention, categorical ranges
+ - STRING: String-specific rules for length constraints
+ - NUMERICAL: Numeric rules for min/max/mean/stddev
+ - COMMON: Common patterns like uniqueness
+ - EXTENDED: All rules combined
+ """
+
+ DEFAULT = "DEFAULT"
+ """Core rules: CompleteIfComplete, RetainCompleteness, RetainType,
+ CategoricalRange, FractionalCategoricalRange, NonNegativeNumbers"""
+
+ STRING = "STRING"
+ """String rules: HasMinLength, HasMaxLength"""
+
+ NUMERICAL = "NUMERICAL"
+ """Numeric rules: HasMin, HasMax, HasMean, HasStandardDeviation"""
+
+ COMMON = "COMMON"
+ """Common patterns: UniqueIfApproximatelyUnique"""
+
+ EXTENDED = "EXTENDED"
+ """All rules combined: DEFAULT + STRING + NUMERICAL + COMMON"""
+
+
+class ConstraintSuggestionRunner:
+ """
+ Entry point for generating constraint suggestions.
+
+ ConstraintSuggestionRunner analyzes DataFrame columns to suggest
+ appropriate data quality constraints based on the data characteristics.
+
+ Example:
+ suggestions = (ConstraintSuggestionRunner(spark)
+ .onData(df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run())
+ """
+
+ def __init__(self, spark: "SparkSession"):
+ """
+ Create a new ConstraintSuggestionRunner.
+
+ Args:
+ spark: SparkSession (can be either local or Spark Connect)
+ """
+ self._spark = spark
+
+ def onData(self, df: "DataFrame") -> "ConstraintSuggestionRunBuilder":
+ """
+ Specify the DataFrame to analyze.
+
+ Args:
+ df: DataFrame to analyze for constraint suggestions
+
+ Returns:
+ ConstraintSuggestionRunBuilder for method chaining
+ """
+ return ConstraintSuggestionRunBuilder(self._spark, df)
+
+
+class ConstraintSuggestionRunBuilder:
+ """
+ Builder for configuring and executing a constraint suggestion run.
+
+ This class collects suggestion options and executes the analysis
+ when run() is called.
+ """
+
+ def __init__(self, spark: "SparkSession", df: "DataFrame"):
+ """
+ Create a new ConstraintSuggestionRunBuilder.
+
+ Args:
+ spark: SparkSession
+ df: DataFrame to analyze
+ """
+ self._spark = spark
+ self._df = df
+ self._rules: List[Rules] = []
+ self._restrict_to_columns: Optional[Sequence[str]] = None
+ self._low_cardinality_threshold: int = 0
+ self._enable_kll: bool = False
+ self._kll_parameters: Optional[KLLParameters] = None
+ self._predefined_types: Optional[Dict[str, str]] = None
+ self._testset_ratio: float = 0.0
+ self._testset_seed: Optional[int] = None
+
+ def addConstraintRules(self, rules: Rules) -> "ConstraintSuggestionRunBuilder":
+ """
+ Add a constraint rule set.
+
+ Can be called multiple times to add multiple rule sets.
+
+ Args:
+ rules: Rules enum value specifying which rules to use
+
+ Returns:
+ self for method chaining
+ """
+ self._rules.append(rules)
+ return self
+
+ def restrictToColumns(
+ self, columns: Sequence[str]
+ ) -> "ConstraintSuggestionRunBuilder":
+ """
+ Restrict suggestions to specific columns.
+
+ Args:
+ columns: List of column names to analyze
+
+ Returns:
+ self for method chaining
+ """
+ self._restrict_to_columns = columns
+ return self
+
+ def withLowCardinalityHistogramThreshold(
+ self, threshold: int
+ ) -> "ConstraintSuggestionRunBuilder":
+ """
+ Set threshold for computing histograms during profiling.
+
+ Args:
+ threshold: Maximum distinct values for histogram computation
+
+ Returns:
+ self for method chaining
+ """
+ self._low_cardinality_threshold = threshold
+ return self
+
+ def withKLLProfiling(self) -> "ConstraintSuggestionRunBuilder":
+ """
+ Enable KLL sketch profiling for numeric columns.
+
+ Returns:
+ self for method chaining
+ """
+ self._enable_kll = True
+ return self
+
+ def setKLLParameters(
+ self, params: KLLParameters
+ ) -> "ConstraintSuggestionRunBuilder":
+ """
+ Set KLL sketch parameters.
+
+ Args:
+ params: KLLParameters configuration
+
+ Returns:
+ self for method chaining
+ """
+ self._kll_parameters = params
+ return self
+
+ def setPredefinedTypes(
+ self, types: Dict[str, str]
+ ) -> "ConstraintSuggestionRunBuilder":
+ """
+ Set predefined data types for columns.
+
+ Args:
+ types: Dictionary mapping column names to type names
+
+ Returns:
+ self for method chaining
+ """
+ self._predefined_types = types
+ return self
+
+ def useTrainTestSplitWithTestsetRatio(
+ self, ratio: float, seed: Optional[int] = None
+ ) -> "ConstraintSuggestionRunBuilder":
+ """
+ Enable train/test split for evaluating suggestions.
+
+ When enabled, the data is split into training and test sets.
+ Suggestions are generated from the training set and then
+ evaluated against the test set.
+
+ Args:
+ ratio: Fraction of data to use as test set (0.0-1.0)
+ seed: Optional random seed for reproducibility
+
+ Returns:
+ self for method chaining
+ """
+ if not 0.0 < ratio < 1.0:
+ raise ValueError("testset_ratio must be between 0.0 and 1.0 (exclusive)")
+ self._testset_ratio = ratio
+ self._testset_seed = seed
+ return self
+
+ def run(self) -> "DataFrame":
+ """
+ Execute the suggestion analysis and return results as a DataFrame.
+
+ The result DataFrame contains columns:
+ - column_name: Column the constraint applies to
+ - constraint_name: Type of constraint (e.g., "Completeness", "IsIn")
+ - current_value: Current metric value that triggered suggestion
+ - description: Human-readable description
+ - suggesting_rule: Rule that generated this suggestion
+ - code_for_constraint: Python code snippet for the constraint
+
+ If train/test split is enabled:
+ - evaluation_status: "Success" or "Failure" on test set
+ - evaluation_metric_value: Actual metric on test set
+
+ Returns:
+ DataFrame with constraint suggestions
+
+ Raises:
+ RuntimeError: If the Deequ plugin is not available on the server
+ ValueError: If no rules have been added
+ """
+ if not self._rules:
+ raise ValueError(
+ "At least one constraint rule set must be added. "
+ "Use .addConstraintRules(Rules.DEFAULT) to add rules."
+ )
+
+ # Build the protobuf message
+ suggestion_msg = self._build_suggestion_message()
+
+ # V2 only supports Spark Connect
+ return self._run_via_spark_connect(suggestion_msg)
+
+ def _build_suggestion_message(self) -> proto.DeequConstraintSuggestionRelation:
+ """Build the protobuf suggestion message."""
+ msg = proto.DeequConstraintSuggestionRelation()
+
+ # Add constraint rules
+ for rule in self._rules:
+ msg.constraint_rules.append(rule.value)
+
+ # Set column restrictions
+ if self._restrict_to_columns:
+ msg.restrict_to_columns.extend(self._restrict_to_columns)
+
+ # Set histogram threshold
+ if self._low_cardinality_threshold > 0:
+ msg.low_cardinality_histogram_threshold = self._low_cardinality_threshold
+
+ # Set KLL profiling
+ msg.enable_kll_profiling = self._enable_kll
+ if self._kll_parameters:
+ msg.kll_parameters.CopyFrom(self._kll_parameters.to_proto())
+
+ # Set predefined types
+ if self._predefined_types:
+ for col, dtype in self._predefined_types.items():
+ msg.predefined_types[col] = dtype
+
+ # Set train/test split
+ if self._testset_ratio > 0:
+ msg.testset_ratio = self._testset_ratio
+ if self._testset_seed is not None:
+ msg.testset_split_random_seed = self._testset_seed
+
+ return msg
+
+ def _run_via_spark_connect(
+ self, msg: proto.DeequConstraintSuggestionRelation
+ ) -> "DataFrame":
+ """Execute suggestion analysis via Spark Connect plugin."""
+ # Get the input DataFrame's plan as serialized bytes
+ input_plan = self._df._plan.to_proto(self._spark._client)
+ msg.input_relation = input_plan.root.SerializeToString()
+
+ # Wrap our Deequ message in a google.protobuf.Any
+ extension = any_pb2.Any()
+ extension.Pack(msg, type_url_prefix="type.googleapis.com")
+
+ # Create a proper LogicalPlan subclass with the extension
+ plan = create_deequ_plan(extension)
+
+ # Create DataFrame from the plan (handles Spark 3.x vs 4.x)
+ return dataframe_from_plan(plan, self._spark)
+
+
+# Export all public symbols
+__all__ = [
+ "ConstraintSuggestionRunner",
+ "ConstraintSuggestionRunBuilder",
+ "Rules",
+]
diff --git a/pydeequ/v2/verification.py b/pydeequ/v2/verification.py
new file mode 100644
index 0000000..c6d8d2f
--- /dev/null
+++ b/pydeequ/v2/verification.py
@@ -0,0 +1,279 @@
+# -*- coding: utf-8 -*-
+"""
+VerificationSuite for Deequ Spark Connect.
+
+This module provides the main entry point for running data quality checks
+via Spark Connect. It builds protobuf messages and sends them to the
+server-side Deequ plugin.
+
+Example usage:
+ from pyspark.sql import SparkSession
+ from pydeequ.v2.verification import VerificationSuite
+ from pydeequ.v2.checks import Check, CheckLevel
+ from pydeequ.v2.predicates import gte, eq
+
+ spark = SparkSession.builder.remote("sc://localhost:15002").getOrCreate()
+
+ check = (Check(CheckLevel.Error, "Data quality check")
+ .isComplete("id")
+ .hasCompleteness("email", gte(0.95)))
+
+ result = (VerificationSuite(spark)
+ .onData(df)
+ .addCheck(check)
+ .run())
+
+ result.show() # Result is a DataFrame
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, List
+
+from google.protobuf import any_pb2
+
+from pydeequ.v2.analyzers import _ConnectAnalyzer
+from pydeequ.v2.checks import Check
+from pydeequ.v2.proto import deequ_connect_pb2 as proto
+from pydeequ.v2.spark_helpers import create_deequ_plan, dataframe_from_plan
+
+if TYPE_CHECKING:
+ from pyspark.sql import DataFrame, SparkSession
+
+
+class VerificationSuite:
+ """
+ Main entry point for running data quality verification.
+
+ VerificationSuite allows you to define checks and analyzers to run
+ on a DataFrame. When run() is called, the checks and analyzers are
+ serialized to protobuf and sent to the Spark Connect server where
+ the Deequ plugin executes them.
+
+ Example:
+ suite = VerificationSuite(spark)
+ result = (suite
+ .onData(df)
+ .addCheck(check)
+ .run())
+ """
+
+ def __init__(self, spark: "SparkSession"):
+ """
+ Create a new VerificationSuite.
+
+ Args:
+ spark: SparkSession connected via Spark Connect
+ """
+ self._spark = spark
+
+ def onData(self, df: "DataFrame") -> "VerificationRunBuilder":
+ """
+ Specify the DataFrame to run verification on.
+
+ Args:
+ df: DataFrame to verify
+
+ Returns:
+ VerificationRunBuilder for method chaining
+ """
+ return VerificationRunBuilder(self._spark, df)
+
+
+class VerificationRunBuilder:
+ """
+ Builder for configuring and executing a verification run.
+
+ This class collects checks and analyzers, then executes them
+ when run() is called.
+ """
+
+ def __init__(self, spark: "SparkSession", df: "DataFrame"):
+ """
+ Create a new VerificationRunBuilder.
+
+ Args:
+ spark: SparkSession
+ df: DataFrame to verify
+ """
+ self._spark = spark
+ self._df = df
+ self._checks: List[Check] = []
+ self._analyzers: List[_ConnectAnalyzer] = []
+
+ def addCheck(self, check: Check) -> "VerificationRunBuilder":
+ """
+ Add a check to run.
+
+ Args:
+ check: Check to add
+
+ Returns:
+ self for method chaining
+ """
+ self._checks.append(check)
+ return self
+
+ def addAnalyzer(self, analyzer: _ConnectAnalyzer) -> "VerificationRunBuilder":
+ """
+ Add an analyzer to run (in addition to those required by checks).
+
+ Args:
+ analyzer: Analyzer to add
+
+ Returns:
+ self for method chaining
+ """
+ self._analyzers.append(analyzer)
+ return self
+
+ def run(self) -> "DataFrame":
+ """
+ Execute the verification and return results as a DataFrame.
+
+ The result DataFrame contains columns:
+ - check: Check description
+ - check_level: Error or Warning
+ - check_status: Success, Warning, or Error
+ - constraint: Constraint description
+ - constraint_status: Success or Failure
+ - constraint_message: Details about failures
+
+ Returns:
+ DataFrame with verification results
+
+ Raises:
+ RuntimeError: If the Deequ plugin is not available on the server
+ """
+ # Build the protobuf message
+ msg = proto.DeequVerificationRelation()
+
+ # Add checks
+ for check in self._checks:
+ msg.checks.append(check.to_proto())
+
+ # Add required analyzers
+ for analyzer in self._analyzers:
+ msg.required_analyzers.append(analyzer.to_proto())
+
+ # Get the input DataFrame's plan as serialized bytes
+ # We serialize just the Relation (plan.root), not the full Plan,
+ # because Scala expects to parse it as a Relation
+ input_plan = self._df._plan.to_proto(self._spark._client)
+ msg.input_relation = input_plan.root.SerializeToString()
+
+ # Wrap our Deequ message in a google.protobuf.Any
+ extension = any_pb2.Any()
+ extension.Pack(msg, type_url_prefix="type.googleapis.com")
+
+ # Create a proper LogicalPlan subclass with the extension
+ plan = create_deequ_plan(extension)
+
+ # Create DataFrame from the plan (handles Spark 3.x vs 4.x)
+ return dataframe_from_plan(plan, self._spark)
+
+
+class AnalysisRunner:
+ """
+ Entry point for running analyzers without checks.
+
+ Use this when you want to compute metrics without defining
+ pass/fail constraints.
+
+ Example:
+ from pydeequ.v2.analyzers import Size, Completeness, Mean
+
+ result = (AnalysisRunner(spark)
+ .onData(df)
+ .addAnalyzer(Size())
+ .addAnalyzer(Completeness("email"))
+ .addAnalyzer(Mean("amount"))
+ .run())
+ """
+
+ def __init__(self, spark: "SparkSession"):
+ """
+ Create a new AnalysisRunner.
+
+ Args:
+ spark: SparkSession connected via Spark Connect
+ """
+ self._spark = spark
+
+ def onData(self, df: "DataFrame") -> "AnalysisRunBuilder":
+ """
+ Specify the DataFrame to analyze.
+
+ Args:
+ df: DataFrame to analyze
+
+ Returns:
+ AnalysisRunBuilder for method chaining
+ """
+ return AnalysisRunBuilder(self._spark, df)
+
+
+class AnalysisRunBuilder:
+ """Builder for configuring and executing an analysis run."""
+
+ def __init__(self, spark: "SparkSession", df: "DataFrame"):
+ """
+ Create a new AnalysisRunBuilder.
+
+ Args:
+ spark: SparkSession
+ df: DataFrame to analyze
+ """
+ self._spark = spark
+ self._df = df
+ self._analyzers: List[_ConnectAnalyzer] = []
+
+ def addAnalyzer(self, analyzer: _ConnectAnalyzer) -> "AnalysisRunBuilder":
+ """
+ Add an analyzer to run.
+
+ Args:
+ analyzer: Analyzer to add
+
+ Returns:
+ self for method chaining
+ """
+ self._analyzers.append(analyzer)
+ return self
+
+ def run(self) -> "DataFrame":
+ """
+ Execute the analysis and return metrics as DataFrame.
+
+ Returns:
+ DataFrame with computed metrics
+ """
+ # Build protobuf message
+ msg = proto.DeequAnalysisRelation()
+ for analyzer in self._analyzers:
+ msg.analyzers.append(analyzer.to_proto())
+
+ # Get the input DataFrame's plan as serialized bytes
+ # We serialize just the Relation (plan.root), not the full Plan,
+ # because Scala expects to parse it as a Relation
+ input_plan = self._df._plan.to_proto(self._spark._client)
+ msg.input_relation = input_plan.root.SerializeToString()
+
+ # Wrap our Deequ message in a google.protobuf.Any
+ extension = any_pb2.Any()
+ extension.Pack(msg, type_url_prefix="type.googleapis.com")
+
+ # Create a proper LogicalPlan subclass with the extension
+ plan = create_deequ_plan(extension)
+
+ # Create DataFrame from the plan (handles Spark 3.x vs 4.x)
+ return dataframe_from_plan(plan, self._spark)
+
+
+# Export all public symbols
+__all__ = [
+ "VerificationSuite",
+ "VerificationRunBuilder",
+ "AnalysisRunner",
+ "AnalysisRunBuilder",
+]
diff --git a/pyproject.toml b/pyproject.toml
index 0a2fafa..8168444 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pydeequ"
-version = "1.5.0"
+version = "2.0.0b1"
description = "PyDeequ - Unit Tests for Data"
authors = ["Chenyang Liu ", "Rahul Sharma "]
maintainers = ["Chenyang Liu ","Rahul Sharma "]
@@ -28,27 +28,22 @@ classifiers = [
[tool.poetry.dependencies]
-python = ">=3.8,<4"
-numpy = ">=1.14.1"
-pandas = ">=0.23.0"
-pyspark = { version = ">=2.4.7, <3.3.0", optional = true }
+python = ">=3.9,<4"
+numpy = ">=1.23.0"
+pandas = ">=1.5.0"
+protobuf = ">=4.21.0"
+setuptools = ">=69.0.0" # Required for Python 3.12+ (distutils removed)
+pyspark = {version = "3.5.0", extras = ["connect"]}
-[tool.poetry.dev-dependencies]
-pytest = "^6.2.4"
-pytest-cov = "^2.11.1"
-coverage = "^5.5"
-pytest-runner = "^5.3.0"
-black = "^21.5b1"
-flake8 = "^3.9.2"
-flake8-docstrings = "^1.6.0"
-pytest-flake8 = "^1.0.7"
-pre-commit = "^2.12.1"
-pytest-rerunfailures = "^9.1.1"
-twine = "^3.4.1"
-safety = "^1.10.3"
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.0.0"
+pytest-cov = "^4.1.0"
+coverage = "^7.4.0"
+black = "^24.0.0"
+pre-commit = "^3.6.0"
+pytest-rerunfailures = "^14.0"
[tool.poetry.extras]
-pyspark = ["pyspark"]
[build-system]
requires = ["poetry-core>=1.0.0"]
diff --git a/tests/conftest.py b/tests/conftest.py
index 34926a4..543a27e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,22 +1,80 @@
# -*- coding: utf-8 -*-
-# pylint: disable=redefined-outer-name
-import logging
+"""
+Pytest configuration for PyDeequ tests using Spark Connect.
-from pydeequ import deequ_maven_coord, f2j_maven_coord
+All tests use the Spark Connect server which must be running before tests.
+Start it with: scripts/start-spark-connect.sh
+"""
+import os
+import pytest
+from pyspark.sql import SparkSession
-# @pytest.yield_fixture(autouse=True)
+
+# Set environment variables required for pydeequ
+os.environ.setdefault("SPARK_VERSION", "3.5")
+
+
+def create_spark_connect_session() -> SparkSession:
+ """
+ Create a Spark Connect session for testing.
+
+ Requires Spark Connect server to be running on localhost:15002.
+ Start the server with the Deequ plugin loaded.
+
+ Returns:
+ SparkSession connected to Spark Connect server
+ """
+ return SparkSession.builder.remote("sc://localhost:15002").getOrCreate()
+
+
+@pytest.fixture(scope="module")
+def spark() -> SparkSession:
+ """
+ Pytest fixture providing a Spark Connect session.
+
+ The session is shared within each test module for efficiency.
+
+ Yields:
+ SparkSession for testing
+ """
+ session = create_spark_connect_session()
+ yield session
+ session.stop()
+
+
+# Alias for backward compatibility with existing tests
+spark_session = spark
+
+
+# Legacy function for unittest-based tests
def setup_pyspark():
- from pyspark.sql import SparkSession
-
- return (
- SparkSession.builder.master("local[*]")
- .config("spark.executor.memory", "2g")
- .config("spark.jars.packages", deequ_maven_coord)
- .config("spark.pyspark.python", "/usr/bin/python3")
- .config("spark.pyspark.driver.python", "/usr/bin/python3")
- .config("spark.jars.excludes", f2j_maven_coord)
- .config("spark.driver.extraJavaOptions", "-XX:+UseG1GC")
- .config("spark.executor.extraJavaOptions", "-XX:+UseG1GC")
- .config("spark.sql.autoBroadcastJoinThreshold", "-1")
- )
+ """
+ Legacy setup function for unittest-based tests.
+
+ Returns a SparkSession builder configured for Spark Connect.
+ This is used by existing unittest classes that call setup_pyspark().getOrCreate().
+ """
+
+ class SparkConnectBuilder:
+ """Builder that creates Spark Connect sessions."""
+
+ def __init__(self):
+ self._app_name = "pydeequ-test"
+
+ def appName(self, name):
+ self._app_name = name
+ return self
+
+ def master(self, master):
+ # Ignored - we always use Spark Connect
+ return self
+
+ def config(self, key, value):
+ # Ignored - Spark Connect doesn't need these configs
+ return self
+
+ def getOrCreate(self):
+ return get_spark_connect_session()
+
+ return SparkConnectBuilder()
diff --git a/tests/v2/__init__.py b/tests/v2/__init__.py
new file mode 100644
index 0000000..386bcea
--- /dev/null
+++ b/tests/v2/__init__.py
@@ -0,0 +1 @@
+# PyDeequ v2 tests using Spark Connect
diff --git a/tests/v2/conftest.py b/tests/v2/conftest.py
new file mode 100644
index 0000000..0474335
--- /dev/null
+++ b/tests/v2/conftest.py
@@ -0,0 +1,179 @@
+# -*- coding: utf-8 -*-
+"""
+Pytest configuration for PyDeequ v2 tests using Spark Connect.
+
+Requirements:
+- Spark Connect server running on localhost:15002
+- Deequ plugin loaded on the server
+
+Start server with:
+ $SPARK_HOME/sbin/start-connect-server.sh \
+ --jars /path/to/deequ-2.0.9-spark-3.5.jar \
+ --conf spark.connect.extensions.relation.classes=com.amazon.deequ.connect.DeequRelationPlugin
+
+Run tests with:
+ SPARK_REMOTE=sc://localhost:15002 pytest tests/v2/ -v
+"""
+
+import os
+
+import pytest
+from pyspark.sql import Row, SparkSession
+
+@pytest.fixture(scope="session")
+def spark():
+ """
+ Session-scoped Spark Connect session.
+ Shared across all tests for efficiency.
+ """
+ remote_url = os.environ.get("SPARK_REMOTE", "sc://localhost:15002")
+ session = SparkSession.builder.remote(remote_url).getOrCreate()
+ yield session
+ session.stop()
+
+
+@pytest.fixture(scope="module")
+def sample_df(spark):
+ """
+ Sample DataFrame used across multiple tests.
+
+ Schema:
+ - a: string (complete)
+ - b: int (complete, unique: 1,2,3)
+ - c: int (has null)
+ - d: int (all same value: 5)
+ """
+ return spark.createDataFrame(
+ [
+ Row(a="foo", b=1, c=5, d=5),
+ Row(a="bar", b=2, c=6, d=5),
+ Row(a="baz", b=3, c=None, d=5),
+ ]
+ )
+
+
+@pytest.fixture(scope="module")
+def extended_df(spark):
+ """
+ Extended DataFrame with more columns for comprehensive tests.
+ """
+ return spark.createDataFrame(
+ [
+ Row(
+ a="foo",
+ b=1,
+ c=5,
+ d=5,
+ e=3,
+ f=1,
+ g="a",
+ email="foo@example.com",
+ creditCard="5130566665286573",
+ ),
+ Row(
+ a="bar",
+ b=2,
+ c=6,
+ d=5,
+ e=2,
+ f=2,
+ g="b",
+ email="bar@example.com",
+ creditCard="4532677117740914",
+ ),
+ Row(
+ a="baz",
+ b=3,
+ c=None,
+ d=5,
+ e=1,
+ f=1,
+ g=None,
+ email="baz@example.com",
+ creditCard="340145324521741",
+ ),
+ ]
+ )
+
+
+@pytest.fixture(scope="module")
+def profiler_df(spark):
+ """
+ DataFrame with varied data types for Column Profiler testing.
+
+ Schema:
+ - id: int (complete, unique)
+ - name: string (complete)
+ - age: int (has 1 null)
+ - salary: double (has 1 null)
+ - active: boolean (complete)
+ - email: string (has 1 null)
+ - score: double (has 1 null)
+ """
+ return spark.createDataFrame(
+ [
+ Row(id=1, name="Alice", age=30, salary=75000.0, active=True,
+ email="alice@example.com", score=85.5),
+ Row(id=2, name="Bob", age=25, salary=65000.0, active=True,
+ email="bob@example.com", score=92.0),
+ Row(id=3, name="Charlie", age=35, salary=None, active=False,
+ email=None, score=78.5),
+ Row(id=4, name="Diana", age=28, salary=80000.0, active=True,
+ email="diana@example.com", score=95.0),
+ Row(id=5, name="Eve", age=None, salary=70000.0, active=True,
+ email="eve@example.com", score=None),
+ Row(id=6, name="Frank", age=45, salary=95000.0, active=True,
+ email="frank@example.com", score=88.0),
+ Row(id=7, name="Grace", age=32, salary=72000.0, active=False,
+ email="grace@example.com", score=91.5),
+ Row(id=8, name="Henry", age=29, salary=68000.0, active=True,
+ email="henry@example.com", score=82.0),
+ ]
+ )
+
+
+@pytest.fixture(scope="module")
+def suggestion_df(spark):
+ """
+ DataFrame designed to trigger specific constraint suggestions.
+
+ Characteristics:
+ - id: complete and unique -> should suggest NotNull + Unique
+ - status: categorical (3 values) -> should suggest IsIn
+ - score: numeric range -> should suggest Min/Max
+ - category: categorical (3 values) -> should suggest IsIn
+ """
+ return spark.createDataFrame(
+ [
+ Row(id=1, status="active", score=85, category="A"),
+ Row(id=2, status="active", score=92, category="B"),
+ Row(id=3, status="inactive", score=78, category="A"),
+ Row(id=4, status="active", score=95, category="C"),
+ Row(id=5, status="pending", score=88, category="B"),
+ Row(id=6, status="active", score=91, category="A"),
+ Row(id=7, status="inactive", score=82, category="C"),
+ Row(id=8, status="active", score=89, category="B"),
+ ]
+ )
+
+
+@pytest.fixture(scope="module")
+def e2e_df(spark):
+ """
+ DataFrame for end-to-end testing with realistic data.
+
+ Characteristics:
+ - Mixed data types (int, string, double)
+ - Some null values
+ - Valid email patterns
+ - Range of numeric values
+ """
+ return spark.createDataFrame(
+ [
+ Row(id=1, name="Alice", email="alice@example.com", age=30, score=85.5),
+ Row(id=2, name="Bob", email="bob@example.com", age=25, score=92.0),
+ Row(id=3, name="Charlie", email=None, age=35, score=78.5),
+ Row(id=4, name="Diana", email="diana@example.com", age=28, score=95.0),
+ Row(id=5, name="Eve", email="eve@example.com", age=None, score=88.0),
+ ]
+ )
diff --git a/tests/v2/test_analyzers.py b/tests/v2/test_analyzers.py
new file mode 100644
index 0000000..89da7c4
--- /dev/null
+++ b/tests/v2/test_analyzers.py
@@ -0,0 +1,339 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for Analyzers using Spark Connect.
+
+These tests verify the core analyzer functionality of PyDeequ v2.
+"""
+
+import pytest
+from pyspark.sql import Row
+
+from pydeequ.v2.verification import AnalysisRunner
+from pydeequ.v2.analyzers import (
+ Size,
+ Completeness,
+ Mean,
+ Sum,
+ Minimum,
+ Maximum,
+ StandardDeviation,
+ ApproxCountDistinct,
+ Distinctness,
+ Uniqueness,
+ UniqueValueRatio,
+ Entropy,
+ MinLength,
+ MaxLength,
+ Correlation,
+ ApproxQuantile,
+ PatternMatch,
+ Compliance,
+)
+
+
+class TestBasicAnalyzers:
+ """Test basic analyzer types."""
+
+ def test_size(self, spark, sample_df):
+ """Test Size analyzer."""
+ result = AnalysisRunner(spark).onData(sample_df).addAnalyzer(Size()).run()
+
+ rows = result.collect()
+ size_row = [r for r in rows if r["name"] == "Size"][0]
+ assert size_row["value"] == 3.0
+
+ def test_completeness(self, spark, sample_df):
+ """Test Completeness analyzer on complete column."""
+ result = (
+ AnalysisRunner(spark).onData(sample_df).addAnalyzer(Completeness("a")).run()
+ )
+
+ rows = result.collect()
+ assert rows[0]["value"] == 1.0
+
+ def test_completeness_with_nulls(self, spark, sample_df):
+ """Test Completeness analyzer on column with nulls."""
+ result = (
+ AnalysisRunner(spark).onData(sample_df).addAnalyzer(Completeness("c")).run()
+ )
+
+ rows = result.collect()
+ assert abs(rows[0]["value"] - 2 / 3) < 0.001
+
+ def test_mean(self, spark, sample_df):
+ """Test Mean analyzer."""
+ result = AnalysisRunner(spark).onData(sample_df).addAnalyzer(Mean("b")).run()
+
+ rows = result.collect()
+ assert rows[0]["value"] == 2.0
+
+ def test_sum(self, spark, sample_df):
+ """Test Sum analyzer."""
+ result = AnalysisRunner(spark).onData(sample_df).addAnalyzer(Sum("b")).run()
+
+ rows = result.collect()
+ assert rows[0]["value"] == 6.0
+
+ def test_minimum(self, spark, sample_df):
+ """Test Minimum analyzer."""
+ result = AnalysisRunner(spark).onData(sample_df).addAnalyzer(Minimum("b")).run()
+
+ rows = result.collect()
+ assert rows[0]["value"] == 1.0
+
+ def test_maximum(self, spark, sample_df):
+ """Test Maximum analyzer."""
+ result = AnalysisRunner(spark).onData(sample_df).addAnalyzer(Maximum("b")).run()
+
+ rows = result.collect()
+ assert rows[0]["value"] == 3.0
+
+ def test_standard_deviation(self, spark, sample_df):
+ """Test StandardDeviation analyzer."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(StandardDeviation("b"))
+ .run()
+ )
+
+ rows = result.collect()
+ # std of [1,2,3] is approximately 0.816
+ assert abs(rows[0]["value"] - 0.816496580927726) < 0.001
+
+
+class TestDistinctnessAnalyzers:
+ """Test distinctness-related analyzers."""
+
+ def test_approx_count_distinct(self, spark, sample_df):
+ """Test ApproxCountDistinct analyzer."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(ApproxCountDistinct("b"))
+ .run()
+ )
+
+ rows = result.collect()
+ assert rows[0]["value"] == 3.0
+
+ def test_distinctness(self, spark, sample_df):
+ """Test Distinctness analyzer."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Distinctness(["b"]))
+ .run()
+ )
+
+ rows = result.collect()
+ assert rows[0]["value"] == 1.0 # All values are distinct
+
+ def test_distinctness_non_unique(self, spark, sample_df):
+ """Test Distinctness analyzer on non-unique column."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Distinctness(["d"]))
+ .run()
+ )
+
+ rows = result.collect()
+ # Column d has all same values, so 1 distinct / 3 rows = 1/3
+ assert abs(rows[0]["value"] - 1 / 3) < 0.001
+
+ def test_uniqueness(self, spark, sample_df):
+ """Test Uniqueness analyzer."""
+ result = (
+ AnalysisRunner(spark).onData(sample_df).addAnalyzer(Uniqueness(["b"])).run()
+ )
+
+ rows = result.collect()
+ assert rows[0]["value"] == 1.0
+
+ def test_unique_value_ratio(self, spark, sample_df):
+ """Test UniqueValueRatio analyzer."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(UniqueValueRatio(["b"]))
+ .run()
+ )
+
+ rows = result.collect()
+ assert rows[0]["value"] == 1.0
+
+
+class TestStringAnalyzers:
+ """Test string-related analyzers."""
+
+ def test_min_length(self, spark, sample_df):
+ """Test MinLength analyzer."""
+ result = (
+ AnalysisRunner(spark).onData(sample_df).addAnalyzer(MinLength("a")).run()
+ )
+
+ rows = result.collect()
+ # "foo", "bar", "baz" all have length 3
+ assert rows[0]["value"] == 3.0
+
+ def test_max_length(self, spark, sample_df):
+ """Test MaxLength analyzer."""
+ result = (
+ AnalysisRunner(spark).onData(sample_df).addAnalyzer(MaxLength("a")).run()
+ )
+
+ rows = result.collect()
+ assert rows[0]["value"] == 3.0
+
+ def test_pattern_match(self, spark, sample_df):
+ """Test PatternMatch analyzer."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(PatternMatch("a", r"ba.*"))
+ .run()
+ )
+
+ rows = result.collect()
+ # "bar" and "baz" match, "foo" doesn't = 2/3
+ assert abs(rows[0]["value"] - 2 / 3) < 0.001
+
+
+class TestStatisticalAnalyzers:
+ """Test statistical analyzers."""
+
+ def test_entropy(self, spark, sample_df):
+ """Test Entropy analyzer."""
+ result = AnalysisRunner(spark).onData(sample_df).addAnalyzer(Entropy("a")).run()
+
+ rows = result.collect()
+ # 3 distinct values with equal frequency -> log(3) ~ 1.099
+ assert abs(rows[0]["value"] - 1.0986122886681096) < 0.001
+
+ def test_correlation(self, spark, sample_df):
+ """Test Correlation analyzer."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Correlation("b", "c"))
+ .run()
+ )
+
+ rows = result.collect()
+ # b=[1,2,3], c=[5,6,None] -> perfect correlation on non-null pairs
+ assert rows[0]["value"] == 1.0
+
+ def test_approx_quantile(self, spark, sample_df):
+ """Test ApproxQuantile analyzer."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(ApproxQuantile("b", 0.5))
+ .run()
+ )
+
+ rows = result.collect()
+ # Median of [1,2,3] is 2
+ assert rows[0]["value"] == 2.0
+
+ def test_compliance(self, spark, sample_df):
+ """Test Compliance analyzer."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Compliance("positive_b", "b > 0"))
+ .run()
+ )
+
+ rows = result.collect()
+ # All values are positive
+ assert rows[0]["value"] == 1.0
+
+
+class TestMultipleAnalyzers:
+ """Test running multiple analyzers together."""
+
+ def test_multiple_analyzers(self, spark, sample_df):
+ """Test running multiple analyzers in one run."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Size())
+ .addAnalyzer(Completeness("a"))
+ .addAnalyzer(Mean("b"))
+ .addAnalyzer(Maximum("b"))
+ .addAnalyzer(Minimum("b"))
+ .run()
+ )
+
+ rows = result.collect()
+
+ # Check we got results for all analyzers
+ names = [r["name"] for r in rows]
+ assert "Size" in names
+ assert "Completeness" in names
+ assert "Mean" in names
+ assert "Maximum" in names
+ assert "Minimum" in names
+
+ def test_multiple_completeness(self, spark, sample_df):
+ """Test Completeness on multiple columns."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Completeness("a"))
+ .addAnalyzer(Completeness("b"))
+ .addAnalyzer(Completeness("c"))
+ .run()
+ )
+
+ rows = result.collect()
+ values = {r["instance"]: r["value"] for r in rows}
+
+ assert values["a"] == 1.0
+ assert values["b"] == 1.0
+ assert abs(values["c"] - 2 / 3) < 0.001
+
+
+class TestAnalyzerWithWhere:
+ """Test analyzers with where clause filtering."""
+
+ def test_size_with_where(self, spark, sample_df):
+ """Test Size analyzer with where clause."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Size(where="b > 1"))
+ .run()
+ )
+
+ rows = result.collect()
+ # Only rows where b > 1 (b=2 and b=3) = 2 rows
+ assert rows[0]["value"] == 2.0
+
+ def test_completeness_with_where(self, spark, sample_df):
+ """Test Completeness analyzer with where clause."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Completeness("c", where="b <= 2"))
+ .run()
+ )
+
+ rows = result.collect()
+ # Rows where b <= 2: (b=1, c=5), (b=2, c=6) -> both have c values
+ assert rows[0]["value"] == 1.0
+
+ def test_mean_with_where(self, spark, sample_df):
+ """Test Mean analyzer with where clause."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Mean("b", where="b > 1"))
+ .run()
+ )
+
+ rows = result.collect()
+ # Mean of [2, 3] = 2.5
+ assert rows[0]["value"] == 2.5
diff --git a/tests/v2/test_checks.py b/tests/v2/test_checks.py
new file mode 100644
index 0000000..a92a81b
--- /dev/null
+++ b/tests/v2/test_checks.py
@@ -0,0 +1,320 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for Check constraints using Spark Connect.
+
+These tests verify the core constraint functionality of PyDeequ v2.
+"""
+
+import pytest
+from pyspark.sql import Row
+
+from pydeequ.v2.checks import Check, CheckLevel
+from pydeequ.v2.verification import VerificationSuite
+from pydeequ.v2.predicates import eq, gt, gte, lt, lte, between
+
+
+class TestCheckConstraints:
+ """Test individual constraint types."""
+
+ def test_hasSize(self, spark, sample_df):
+ """Test hasSize constraint."""
+ check = Check(CheckLevel.Error, "size check").hasSize(eq(3))
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert len(rows) == 1
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_hasSize_failure(self, spark, sample_df):
+ """Test hasSize constraint failure."""
+ check = Check(CheckLevel.Error, "size check").hasSize(eq(5))
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Failure"
+
+ def test_isComplete(self, spark, sample_df):
+ """Test isComplete constraint on complete column."""
+ check = Check(CheckLevel.Error, "completeness check").isComplete("a")
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_isComplete_failure(self, spark, sample_df):
+ """Test isComplete constraint on incomplete column."""
+ check = Check(CheckLevel.Error, "completeness check").isComplete("c")
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Failure"
+
+ def test_hasCompleteness(self, spark, sample_df):
+ """Test hasCompleteness with threshold."""
+ # Column c has 2/3 completeness
+ check = Check(CheckLevel.Error, "completeness check").hasCompleteness(
+ "c", gte(0.5)
+ )
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_hasCompleteness_failure(self, spark, sample_df):
+ """Test hasCompleteness failure."""
+ check = Check(CheckLevel.Error, "completeness check").hasCompleteness(
+ "c", gte(0.9)
+ )
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Failure"
+
+ def test_isUnique(self, spark, sample_df):
+ """Test isUnique constraint."""
+ check = Check(CheckLevel.Error, "uniqueness check").isUnique("b")
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_isUnique_failure(self, spark, sample_df):
+ """Test isUnique constraint failure on non-unique column."""
+ check = Check(CheckLevel.Error, "uniqueness check").isUnique("d")
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Failure"
+
+ def test_hasUniqueness(self, spark, sample_df):
+ """Test hasUniqueness with multiple columns."""
+ check = Check(CheckLevel.Error, "uniqueness check").hasUniqueness(
+ ["a", "b"], eq(1.0)
+ )
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_hasMin(self, spark, sample_df):
+ """Test hasMin constraint."""
+ check = Check(CheckLevel.Error, "min check").hasMin("b", eq(1.0))
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_hasMax(self, spark, sample_df):
+ """Test hasMax constraint."""
+ check = Check(CheckLevel.Error, "max check").hasMax("b", eq(3.0))
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_hasMean(self, spark, sample_df):
+ """Test hasMean constraint."""
+ check = Check(CheckLevel.Error, "mean check").hasMean("b", eq(2.0))
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_hasSum(self, spark, sample_df):
+ """Test hasSum constraint."""
+ check = Check(CheckLevel.Error, "sum check").hasSum("b", eq(6.0))
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_hasStandardDeviation(self, spark, sample_df):
+ """Test hasStandardDeviation constraint."""
+ # std of [1,2,3] is ~0.816
+ check = Check(CheckLevel.Error, "std check").hasStandardDeviation(
+ "b", between(0.8, 0.9)
+ )
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+
+class TestCheckChaining:
+ """Test chaining multiple constraints."""
+
+ def test_multiple_constraints_all_pass(self, spark, sample_df):
+ """Test multiple constraints that all pass."""
+ check = (
+ Check(CheckLevel.Error, "multi check")
+ .hasSize(eq(3))
+ .isComplete("a")
+ .isUnique("b")
+ )
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert len(rows) == 3
+ assert all(row["constraint_status"] == "Success" for row in rows)
+
+ def test_multiple_constraints_some_fail(self, spark, sample_df):
+ """Test multiple constraints with some failures."""
+ check = (
+ Check(CheckLevel.Error, "multi check")
+ .hasSize(eq(3)) # pass
+ .isComplete("c") # fail (has null)
+ .isUnique("d")
+ ) # fail (all same value)
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert len(rows) == 3
+ statuses = [row["constraint_status"] for row in rows]
+ assert statuses.count("Success") == 1
+ assert statuses.count("Failure") == 2
+
+
+class TestCheckLevels:
+ """Test check level (Error vs Warning)."""
+
+ def test_error_level(self, spark, sample_df):
+ """Test Error level check."""
+ check = Check(CheckLevel.Error, "error check").hasSize(eq(3))
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["check_level"] == "Error"
+
+ def test_warning_level(self, spark, sample_df):
+ """Test Warning level check."""
+ check = Check(CheckLevel.Warning, "warning check").hasSize(eq(3))
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["check_level"] == "Warning"
+
+
+class TestPredicates:
+ """Test different predicate types."""
+
+ def test_eq_predicate(self, spark, sample_df):
+ """Test eq (equals) predicate."""
+ check = Check(CheckLevel.Error, "eq test").hasSize(eq(3))
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_gt_predicate(self, spark, sample_df):
+ """Test gt (greater than) predicate."""
+ check = Check(CheckLevel.Error, "gt test").hasSize(gt(2))
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_gte_predicate(self, spark, sample_df):
+ """Test gte (greater than or equal) predicate."""
+ check = Check(CheckLevel.Error, "gte test").hasSize(gte(3))
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_lt_predicate(self, spark, sample_df):
+ """Test lt (less than) predicate."""
+ check = Check(CheckLevel.Error, "lt test").hasSize(lt(4))
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_lte_predicate(self, spark, sample_df):
+ """Test lte (less than or equal) predicate."""
+ check = Check(CheckLevel.Error, "lte test").hasSize(lte(3))
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_between_predicate(self, spark, sample_df):
+ """Test between predicate."""
+ check = Check(CheckLevel.Error, "between test").hasSize(between(2, 4))
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+
+class TestAdditionalConstraints:
+ """Test additional constraint types."""
+
+ def test_areComplete(self, spark, sample_df):
+ """Test areComplete constraint."""
+ check = Check(CheckLevel.Error, "are complete").areComplete(["a", "b"])
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_hasDistinctness(self, spark, sample_df):
+ """Test hasDistinctness constraint."""
+ # Column b has 3 distinct values out of 3 rows = 1.0 distinctness
+ check = Check(CheckLevel.Error, "distinctness").hasDistinctness(["b"], eq(1.0))
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_hasApproxCountDistinct(self, spark, sample_df):
+ """Test hasApproxCountDistinct constraint."""
+ check = Check(CheckLevel.Error, "approx count").hasApproxCountDistinct(
+ "b", eq(3.0)
+ )
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_satisfies(self, spark, sample_df):
+ """Test satisfies constraint with SQL expression."""
+ check = Check(CheckLevel.Error, "satisfies").satisfies(
+ "b > 0", "positive_b", eq(1.0)
+ )
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_hasPattern(self, spark, extended_df):
+ """Test hasPattern constraint."""
+ # All emails match the pattern
+ check = Check(CheckLevel.Error, "pattern").hasPattern(
+ "email", r".*@.*\.com", eq(1.0)
+ )
+ result = VerificationSuite(spark).onData(extended_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_containsEmail(self, spark, extended_df):
+ """Test containsEmail constraint."""
+ check = Check(CheckLevel.Error, "email").containsEmail("email", eq(1.0))
+ result = VerificationSuite(spark).onData(extended_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_containsCreditCardNumber(self, spark, extended_df):
+ """Test containsCreditCardNumber constraint."""
+ check = Check(CheckLevel.Error, "credit card").containsCreditCardNumber(
+ "creditCard", eq(1.0)
+ )
+ result = VerificationSuite(spark).onData(extended_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_isNonNegative(self, spark, sample_df):
+ """Test isNonNegative constraint."""
+ check = Check(CheckLevel.Error, "non negative").isNonNegative("b")
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
+
+ def test_isPositive(self, spark, sample_df):
+ """Test isPositive constraint."""
+ check = Check(CheckLevel.Error, "positive").isPositive("b")
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+ assert result.collect()[0]["constraint_status"] == "Success"
diff --git a/tests/v2/test_e2e_spark_connect.py b/tests/v2/test_e2e_spark_connect.py
new file mode 100644
index 0000000..58c18fd
--- /dev/null
+++ b/tests/v2/test_e2e_spark_connect.py
@@ -0,0 +1,647 @@
+# -*- coding: utf-8 -*-
+"""
+End-to-End tests for PyDeequ via Spark Connect.
+
+These tests verify that the full Spark Connect pipeline works correctly,
+from Python client through the gRPC protocol to the Scala DeequRelationPlugin.
+
+Prerequisites:
+1. Build the Deequ JAR with Spark Connect plugin:
+ cd deequ && mvn package -DskipTests
+
+2. Start Spark Connect server with the plugin:
+ ./scripts/start-spark-connect.sh
+
+3. Run these tests:
+ SPARK_REMOTE=sc://localhost:15002 pytest tests/test_e2e_spark_connect.py -v
+
+Note: These tests do NOT use Py4J fallback - they test the actual Spark Connect
+protocol with the DeequRelationPlugin on the server side.
+"""
+
+import os
+
+import pytest
+from pyspark.sql import Row
+
+from pydeequ.v2.analyzers import (
+ Completeness,
+ Distinctness,
+ Maximum,
+ Mean,
+ Minimum,
+ Size,
+ StandardDeviation,
+ Uniqueness,
+)
+from pydeequ.v2.checks import Check, CheckLevel
+from pydeequ.v2.predicates import between, eq, gt, gte, is_one, lt, lte
+from pydeequ.v2.profiles import ColumnProfilerRunner, KLLParameters
+from pydeequ.v2.suggestions import ConstraintSuggestionRunner, Rules
+
+# Import the new Spark Connect API
+from pydeequ.v2.verification import AnalysisRunner, VerificationSuite
+
+# Skip all tests if SPARK_REMOTE is not set
+pytestmark = pytest.mark.skipif(
+ "SPARK_REMOTE" not in os.environ,
+ reason="SPARK_REMOTE environment variable not set. Start Spark Connect server first.",
+)
+
+
+# Note: spark fixture is defined in conftest.py (session-scoped)
+
+
+@pytest.fixture(scope="module")
+def sample_df(e2e_df):
+ """
+ Alias for e2e_df from conftest.py.
+
+ Schema: id (int), name (string), email (string), age (int), score (double)
+ - 5 rows total
+ - email has 1 null (80% complete)
+ - age has 1 null (80% complete)
+ """
+ return e2e_df
+
+
+class TestVerificationSuiteE2E:
+ """End-to-end tests for VerificationSuite via Spark Connect."""
+
+ def test_size_check(self, spark, sample_df):
+ """Test that hasSize check works via Spark Connect."""
+ check = Check(CheckLevel.Error, "Size check").hasSize(eq(5))
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ # Result should be a DataFrame
+ assert result is not None
+
+ # Collect results
+ rows = result.collect()
+ assert len(rows) > 0
+
+ # Check should pass (we have exactly 5 rows)
+ row = rows[0]
+ assert row["constraint_status"] == "Success"
+
+ def test_completeness_check_passing(self, spark, sample_df):
+ """Test completeness check that should pass."""
+ check = (
+ Check(CheckLevel.Error, "Completeness check")
+ .isComplete("id")
+ .isComplete("name")
+ )
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+
+ # Both constraints should pass (id and name are complete)
+ for row in rows:
+ assert row["constraint_status"] == "Success"
+
+ def test_completeness_check_failing(self, spark, sample_df):
+ """Test completeness check that should fail."""
+ check = Check(CheckLevel.Error, "Completeness check").isComplete(
+ "email"
+ ) # email has NULL values
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+
+ # Should fail because email has NULL
+ assert len(rows) > 0
+ assert rows[0]["constraint_status"] == "Failure"
+
+ def test_has_completeness_with_threshold(self, spark, sample_df):
+ """Test hasCompleteness with a threshold."""
+ # email is 80% complete (4 out of 5)
+ check = Check(CheckLevel.Warning, "Completeness threshold").hasCompleteness(
+ "email", gte(0.8)
+ )
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert len(rows) > 0
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_uniqueness_check(self, spark, sample_df):
+ """Test uniqueness check."""
+ check = Check(CheckLevel.Error, "Uniqueness check").isUnique("id")
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert len(rows) > 0
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_mean_check(self, spark, sample_df):
+ """Test mean check with range assertion."""
+ # Mean age should be around 29.5 (average of 30, 25, 35, 28, NULL)
+ check = Check(CheckLevel.Error, "Mean check").hasMean(
+ "score", between(80.0, 95.0)
+ )
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert len(rows) > 0
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_multiple_checks(self, spark, sample_df):
+ """Test multiple checks in a single verification run."""
+ check = (
+ Check(CheckLevel.Error, "Multiple checks")
+ .hasSize(eq(5))
+ .isComplete("id")
+ .isComplete("name")
+ .isUnique("id")
+ .hasCompleteness("email", gte(0.7))
+ )
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+
+ # All 5 constraints should pass
+ assert len(rows) == 5
+ for row in rows:
+ assert row["constraint_status"] == "Success"
+
+ def test_check_levels(self, spark, sample_df):
+ """Test both Error and Warning check levels."""
+ error_check = Check(CheckLevel.Error, "Error level check").isComplete("id")
+
+ warning_check = Check(CheckLevel.Warning, "Warning level check").isComplete(
+ "email"
+ ) # Will fail
+
+ result = (
+ VerificationSuite(spark)
+ .onData(sample_df)
+ .addCheck(error_check)
+ .addCheck(warning_check)
+ .run()
+ )
+
+ rows = result.collect()
+
+ # Find the results for each check
+ error_result = [r for r in rows if r["check"] == "Error level check"][0]
+ warning_result = [r for r in rows if r["check"] == "Warning level check"][0]
+
+ assert error_result["check_level"] == "Error"
+ assert error_result["constraint_status"] == "Success"
+
+ assert warning_result["check_level"] == "Warning"
+ assert warning_result["constraint_status"] == "Failure"
+
+
+class TestAnalysisRunnerE2E:
+ """End-to-end tests for AnalysisRunner via Spark Connect."""
+
+ def test_size_analyzer(self, spark, sample_df):
+ """Test Size analyzer."""
+ result = AnalysisRunner(spark).onData(sample_df).addAnalyzer(Size()).run()
+
+ rows = result.collect()
+ assert len(rows) > 0
+
+ # Find the Size metric
+ size_row = [r for r in rows if r["name"] == "Size"][0]
+ assert float(size_row["value"]) == 5.0
+
+ def test_completeness_analyzer(self, spark, sample_df):
+ """Test Completeness analyzer."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Completeness("id"))
+ .addAnalyzer(Completeness("email"))
+ .run()
+ )
+
+ rows = result.collect()
+
+ # id should be 100% complete
+ id_row = [r for r in rows if r["instance"] == "id"][0]
+ assert float(id_row["value"]) == 1.0
+
+ # email should be 80% complete
+ email_row = [r for r in rows if r["instance"] == "email"][0]
+ assert float(email_row["value"]) == 0.8
+
+ def test_statistical_analyzers(self, spark, sample_df):
+ """Test statistical analyzers (Mean, Min, Max, StdDev)."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Mean("score"))
+ .addAnalyzer(Minimum("score"))
+ .addAnalyzer(Maximum("score"))
+ .addAnalyzer(StandardDeviation("score"))
+ .run()
+ )
+
+ rows = result.collect()
+
+ # Extract values by metric name
+ metrics = {r["name"]: float(r["value"]) for r in rows}
+
+ # Verify expected ranges
+ assert 85.0 <= metrics["Mean"] <= 90.0 # Mean of scores
+ assert metrics["Minimum"] == 78.5
+ assert metrics["Maximum"] == 95.0
+ assert metrics["StandardDeviation"] > 0 # Should have some variance
+
+ def test_multiple_analyzers(self, spark, sample_df):
+ """Test running multiple analyzers together."""
+ result = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Size())
+ .addAnalyzer(Completeness("id"))
+ .addAnalyzer(Completeness("email"))
+ .addAnalyzer(Mean("age"))
+ .addAnalyzer(Mean("score"))
+ .run()
+ )
+
+ rows = result.collect()
+
+ # Should have results for all analyzers
+ assert len(rows) >= 5
+
+
+class TestEdgeCasesE2E:
+ """Test edge cases and error handling."""
+
+ def test_empty_dataframe(self, spark):
+ """Test verification on empty DataFrame."""
+ empty_df = spark.createDataFrame([], "id: int, name: string")
+
+ check = Check(CheckLevel.Error, "Empty DF check").hasSize(eq(0))
+
+ result = VerificationSuite(spark).onData(empty_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert len(rows) > 0
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_all_null_column(self, spark):
+ """Test completeness on all-NULL column."""
+ from pyspark.sql.types import IntegerType, StringType, StructField, StructType
+
+ schema = StructType([
+ StructField("id", IntegerType(), False),
+ StructField("val", StringType(), True),
+ ])
+ data = [Row(id=1, val=None), Row(id=2, val=None)]
+ df = spark.createDataFrame(data, schema=schema)
+
+ check = Check(CheckLevel.Error, "Null column check").hasCompleteness(
+ "val", eq(0.0)
+ )
+
+ result = VerificationSuite(spark).onData(df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_single_row(self, spark):
+ """Test verification on single-row DataFrame."""
+ data = [Row(id=1, name="Test")]
+ df = spark.createDataFrame(data)
+
+ check = (
+ Check(CheckLevel.Error, "Single row check")
+ .hasSize(eq(1))
+ .isComplete("id")
+ .isUnique("id")
+ )
+
+ result = VerificationSuite(spark).onData(df).addCheck(check).run()
+
+ rows = result.collect()
+ for row in rows:
+ assert row["constraint_status"] == "Success"
+
+
+class TestPredicatesE2E:
+ """Test various predicates via Spark Connect."""
+
+ def test_eq_predicate(self, spark, sample_df):
+ """Test eq() predicate."""
+ check = Check(CheckLevel.Error, "EQ test").hasSize(eq(5))
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_gte_predicate(self, spark, sample_df):
+ """Test gte() predicate."""
+ check = Check(CheckLevel.Error, "GTE test").hasCompleteness("id", gte(1.0))
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_between_predicate(self, spark, sample_df):
+ """Test between() predicate."""
+ check = Check(CheckLevel.Error, "Between test").hasMean(
+ "score", between(80.0, 95.0)
+ )
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_lt_predicate(self, spark, sample_df):
+ """Test lt() predicate - should fail when condition not met."""
+ check = Check(CheckLevel.Error, "LT test").hasSize(
+ lt(3)
+ ) # We have 5 rows, so this should fail
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Failure"
+
+
+class TestColumnProfilerE2E:
+ """End-to-end tests for Column Profiler via Spark Connect."""
+
+ def test_basic_profiling(self, spark, sample_df):
+ """Test basic column profiling."""
+ result = ColumnProfilerRunner(spark).onData(sample_df).run()
+
+ rows = result.collect()
+
+ # Should have one profile per column
+ assert len(rows) == len(sample_df.columns)
+
+ # Verify columns are profiled
+ profiled_columns = {r["column"] for r in rows}
+ expected_columns = set(sample_df.columns)
+ assert profiled_columns == expected_columns
+
+ def test_completeness_profiling(self, spark, sample_df):
+ """Test completeness values in profiles."""
+ result = ColumnProfilerRunner(spark).onData(sample_df).run()
+
+ rows = {r["column"]: r for r in result.collect()}
+
+ # id is complete (100%)
+ assert rows["id"]["completeness"] == 1.0
+
+ # email has one null (80%)
+ assert abs(rows["email"]["completeness"] - 0.8) < 0.001
+
+ # age has one null (80%)
+ assert abs(rows["age"]["completeness"] - 0.8) < 0.001
+
+ def test_numeric_statistics_profiling(self, spark, sample_df):
+ """Test numeric statistics in profiles."""
+ result = ColumnProfilerRunner(spark).onData(sample_df).run()
+
+ rows = {r["column"]: r for r in result.collect()}
+
+ # Verify score statistics
+ score_profile = rows["score"]
+ assert score_profile["minimum"] == 78.5
+ assert score_profile["maximum"] == 95.0
+ assert score_profile["mean"] is not None
+
+ def test_restrict_to_columns(self, spark, sample_df):
+ """Test profiling restricted to specific columns."""
+ result = (
+ ColumnProfilerRunner(spark)
+ .onData(sample_df)
+ .restrictToColumns(["id", "name"])
+ .run()
+ )
+
+ rows = result.collect()
+ profiled_columns = {r["column"] for r in rows}
+
+ assert profiled_columns == {"id", "name"}
+
+ def test_kll_profiling(self, spark, sample_df):
+ """Test KLL sketch profiling for numeric columns."""
+ result = (
+ ColumnProfilerRunner(spark)
+ .onData(sample_df)
+ .withKLLProfiling()
+ .run()
+ )
+
+ rows = {r["column"]: r for r in result.collect()}
+
+ # Numeric columns should have KLL buckets
+ assert rows["score"]["kll_buckets"] is not None
+ assert rows["age"]["kll_buckets"] is not None
+
+ # String columns should not have KLL buckets
+ assert rows["name"]["kll_buckets"] is None
+
+ def test_kll_custom_parameters(self, spark, sample_df):
+ """Test KLL profiling with custom parameters."""
+ params = KLLParameters(sketch_size=1024, shrinking_factor=0.5, num_buckets=32)
+ result = (
+ ColumnProfilerRunner(spark)
+ .onData(sample_df)
+ .withKLLProfiling()
+ .setKLLParameters(params)
+ .run()
+ )
+
+ # Verify it runs without error
+ assert result.count() > 0
+
+ def test_histogram_threshold(self, spark, sample_df):
+ """Test histogram computation for low cardinality columns."""
+ result = (
+ ColumnProfilerRunner(spark)
+ .onData(sample_df)
+ .withLowCardinalityHistogramThreshold(10)
+ .run()
+ )
+
+ rows = {r["column"]: r for r in result.collect()}
+
+ # id has 5 distinct values, should have histogram
+ assert rows["id"]["histogram"] is not None
+
+
+class TestConstraintSuggestionsE2E:
+ """End-to-end tests for Constraint Suggestions via Spark Connect."""
+
+ def test_default_rules(self, spark, sample_df):
+ """Test DEFAULT rules generate suggestions."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(sample_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run()
+ )
+
+ rows = result.collect()
+
+ # Should generate some suggestions
+ assert len(rows) > 0
+
+ # Check required columns
+ columns = result.columns
+ assert "column_name" in columns
+ assert "constraint_name" in columns
+ assert "code_for_constraint" in columns
+
+ def test_extended_rules(self, spark, sample_df):
+ """Test EXTENDED rules generate comprehensive suggestions."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(sample_df)
+ .addConstraintRules(Rules.EXTENDED)
+ .run()
+ )
+
+ # Extended rules should generate suggestions
+ assert result.count() >= 0
+
+ def test_restrict_to_columns(self, spark, sample_df):
+ """Test suggestions restricted to specific columns."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(sample_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .restrictToColumns(["id", "name"])
+ .run()
+ )
+
+ rows = result.collect()
+ columns_with_suggestions = set(r["column_name"] for r in rows)
+
+ # Only restricted columns should have suggestions
+ assert columns_with_suggestions.issubset({"id", "name"})
+
+ def test_train_test_split(self, spark, sample_df):
+ """Test train/test split evaluation."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(sample_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .useTrainTestSplitWithTestsetRatio(0.3, seed=42)
+ .run()
+ )
+
+ # Should have evaluation columns
+ assert "evaluation_status" in result.columns
+ assert "evaluation_metric_value" in result.columns
+
+ def test_code_for_constraint(self, spark, sample_df):
+ """Test code_for_constraint is properly formatted."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(sample_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run()
+ )
+
+ rows = result.collect()
+ for row in rows:
+ code = row["code_for_constraint"]
+ # Should be non-empty
+ assert code is not None
+ assert len(code) > 0
+ # Should not have Scala-specific syntax
+ assert "Some(" not in code
+ assert "Seq(" not in code
+
+ def test_suggestion_to_check_workflow(self, spark, sample_df):
+ """Test end-to-end workflow: get suggestions and verify data."""
+ # Step 1: Get suggestions
+ suggestions = (
+ ConstraintSuggestionRunner(spark)
+ .onData(sample_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run()
+ )
+
+ suggestion_rows = suggestions.collect()
+ assert len(suggestion_rows) > 0
+
+ # Step 2: Use suggestions to build verification
+ # Find a completeness suggestion for 'id'
+ id_suggestions = [
+ s for s in suggestion_rows
+ if s["column_name"] == "id" and "Completeness" in s["constraint_name"]
+ ]
+
+ if id_suggestions:
+ # We have a completeness suggestion - verify it with a check
+ check = Check(CheckLevel.Error, "From suggestion").isComplete("id")
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+
+class TestCombinedFeaturesE2E:
+ """Test combining multiple V2 features in workflows."""
+
+ def test_profile_then_verify(self, spark, sample_df):
+ """Test workflow: profile data, then verify based on findings."""
+ # Step 1: Profile the data
+ profiles = ColumnProfilerRunner(spark).onData(sample_df).run()
+
+ profile_rows = {r["column"]: r for r in profiles.collect()}
+
+ # Step 2: Create checks based on profile findings
+ # If id is 100% complete, verify that
+ if profile_rows["id"]["completeness"] == 1.0:
+ check = Check(CheckLevel.Error, "Profile-based check").isComplete("id")
+
+ result = VerificationSuite(spark).onData(sample_df).addCheck(check).run()
+
+ rows = result.collect()
+ assert rows[0]["constraint_status"] == "Success"
+
+ def test_analyze_profile_suggest(self, spark, sample_df):
+ """Test combined workflow: analyze, profile, and get suggestions."""
+ # Step 1: Run analysis
+ analysis = (
+ AnalysisRunner(spark)
+ .onData(sample_df)
+ .addAnalyzer(Size())
+ .addAnalyzer(Completeness("id"))
+ .run()
+ )
+ analysis_rows = analysis.collect()
+ assert len(analysis_rows) >= 2
+
+ # Step 2: Profile columns
+ profiles = ColumnProfilerRunner(spark).onData(sample_df).run()
+ profile_rows = profiles.collect()
+ assert len(profile_rows) == len(sample_df.columns)
+
+ # Step 3: Get suggestions
+ suggestions = (
+ ConstraintSuggestionRunner(spark)
+ .onData(sample_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run()
+ )
+ suggestion_rows = suggestions.collect()
+ assert len(suggestion_rows) >= 0 # May be empty for small datasets
+
+
+if __name__ == "__main__":
+ # Run tests directly
+ pytest.main([__file__, "-v"])
diff --git a/tests/v2/test_profiles.py b/tests/v2/test_profiles.py
new file mode 100644
index 0000000..e1c8030
--- /dev/null
+++ b/tests/v2/test_profiles.py
@@ -0,0 +1,254 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for Column Profiler functionality.
+
+These tests verify that the Column Profiler correctly analyzes DataFrame columns
+and returns expected statistics.
+"""
+
+import json
+
+import pytest
+from pyspark.sql import Row
+
+from pydeequ.v2.profiles import ColumnProfilerRunner, KLLParameters
+
+
+class TestBasicProfiling:
+ """Test basic profiling metrics."""
+
+ def test_completeness_calculation(self, spark, profiler_df):
+ """Test completeness is correctly calculated."""
+ result = ColumnProfilerRunner(spark).onData(profiler_df).run()
+ rows = {r["column"]: r for r in result.collect()}
+
+ # id column is complete (8/8 = 1.0)
+ assert rows["id"]["completeness"] == 1.0
+
+ # salary has 1 null out of 8 (7/8 = 0.875)
+ assert abs(rows["salary"]["completeness"] - 7 / 8) < 0.001
+
+ # age has 1 null out of 8
+ assert abs(rows["age"]["completeness"] - 7 / 8) < 0.001
+
+ def test_data_type_inference(self, spark, profiler_df):
+ """Test data types are correctly inferred."""
+ result = ColumnProfilerRunner(spark).onData(profiler_df).run()
+ rows = {r["column"]: r for r in result.collect()}
+
+ # Check data types contain expected type indicators
+ # Deequ returns "Integral" for integer types
+ assert (
+ "Integral" in rows["id"]["data_type"]
+ or "Integer" in rows["id"]["data_type"]
+ or "Long" in rows["id"]["data_type"]
+ )
+ assert "String" in rows["name"]["data_type"]
+ # Deequ returns "Fractional" for double types
+ assert (
+ "Fractional" in rows["salary"]["data_type"]
+ or "Double" in rows["salary"]["data_type"]
+ )
+ assert "Boolean" in rows["active"]["data_type"]
+
+ def test_approx_distinct_values(self, spark, profiler_df):
+ """Test approximate distinct value count."""
+ result = ColumnProfilerRunner(spark).onData(profiler_df).run()
+ rows = {r["column"]: r for r in result.collect()}
+
+ # id should have 8 distinct values
+ assert rows["id"]["approx_distinct_values"] == 8
+
+ # active (boolean) should have 2 distinct values
+ assert rows["active"]["approx_distinct_values"] == 2
+
+ def test_all_columns_profiled(self, spark, profiler_df):
+ """Test that all columns are profiled by default."""
+ result = ColumnProfilerRunner(spark).onData(profiler_df).run()
+ rows = result.collect()
+
+ expected_columns = {"id", "name", "age", "salary", "active", "email", "score"}
+ profiled_columns = {r["column"] for r in rows}
+
+ assert profiled_columns == expected_columns
+
+
+class TestNumericProfiling:
+ """Test numeric column profiling."""
+
+ def test_numeric_statistics(self, spark, profiler_df):
+ """Test mean, min, max, sum, stddev for numeric columns."""
+ result = ColumnProfilerRunner(spark).onData(profiler_df).run()
+ rows = {r["column"]: r for r in result.collect()}
+
+ age_profile = rows["age"]
+ # age values: 30, 25, 35, 28, None, 45, 32, 29
+ # min=25, max=45
+ assert age_profile["minimum"] == 25.0
+ assert age_profile["maximum"] == 45.0
+ assert age_profile["mean"] is not None
+ assert age_profile["std_dev"] is not None
+
+ def test_non_numeric_has_null_stats(self, spark, profiler_df):
+ """Test non-numeric columns have null for numeric stats."""
+ result = ColumnProfilerRunner(spark).onData(profiler_df).run()
+ rows = {r["column"]: r for r in result.collect()}
+
+ name_profile = rows["name"]
+ assert name_profile["mean"] is None
+ assert name_profile["minimum"] is None
+ assert name_profile["maximum"] is None
+
+
+class TestKLLProfiling:
+ """Test KLL sketch profiling."""
+
+ def test_kll_disabled_by_default(self, spark, profiler_df):
+ """Test KLL is not computed by default."""
+ result = ColumnProfilerRunner(spark).onData(profiler_df).run()
+ rows = {r["column"]: r for r in result.collect()}
+
+ assert rows["age"]["kll_buckets"] is None
+
+ def test_kll_enabled(self, spark, profiler_df):
+ """Test KLL buckets are computed when enabled."""
+ result = (
+ ColumnProfilerRunner(spark).onData(profiler_df).withKLLProfiling().run()
+ )
+ rows = {r["column"]: r for r in result.collect()}
+
+ # Numeric columns should have KLL buckets
+ assert rows["age"]["kll_buckets"] is not None
+ assert rows["salary"]["kll_buckets"] is not None
+ # Non-numeric should not
+ assert rows["name"]["kll_buckets"] is None
+
+ def test_kll_custom_parameters(self, spark, profiler_df):
+ """Test custom KLL parameters are applied."""
+ params = KLLParameters(sketch_size=1024, shrinking_factor=0.5, num_buckets=32)
+ result = (
+ ColumnProfilerRunner(spark)
+ .onData(profiler_df)
+ .withKLLProfiling()
+ .setKLLParameters(params)
+ .run()
+ )
+ # Just verify it runs without error
+ assert result.count() > 0
+
+
+class TestProfilerOptions:
+ """Test profiler configuration options."""
+
+ def test_restrict_to_columns(self, spark, profiler_df):
+ """Test restricting profiling to specific columns."""
+ result = (
+ ColumnProfilerRunner(spark)
+ .onData(profiler_df)
+ .restrictToColumns(["id", "name"])
+ .run()
+ )
+
+ columns = [r["column"] for r in result.collect()]
+ assert set(columns) == {"id", "name"}
+
+ def test_low_cardinality_histogram(self, spark, profiler_df):
+ """Test histogram is computed for low cardinality columns."""
+ result = (
+ ColumnProfilerRunner(spark)
+ .onData(profiler_df)
+ .withLowCardinalityHistogramThreshold(10)
+ .run()
+ )
+ rows = {r["column"]: r for r in result.collect()}
+
+ # active (2 values) should have histogram
+ assert rows["active"]["histogram"] is not None
+ # Verify histogram is valid JSON
+ histogram = json.loads(rows["active"]["histogram"])
+ assert len(histogram) > 0
+
+ def test_predefined_types(self, spark, profiler_df):
+ """Test predefined types override inference."""
+ result = (
+ ColumnProfilerRunner(spark)
+ .onData(profiler_df)
+ .setPredefinedTypes({"id": "String"})
+ .run()
+ )
+ rows = {r["column"]: r for r in result.collect()}
+
+ assert rows["id"]["is_data_type_inferred"] is False
+
+
+class TestProfilerEdgeCases:
+ """Test edge cases for profiler."""
+
+ def test_all_null_column(self, spark):
+ """Test profiling column with all nulls."""
+ from pyspark.sql.types import IntegerType, StringType, StructField, StructType
+
+ schema = StructType(
+ [
+ StructField("id", IntegerType(), False),
+ StructField("value", StringType(), True),
+ ]
+ )
+ df = spark.createDataFrame(
+ [(1, None), (2, None)],
+ schema=schema,
+ )
+ result = ColumnProfilerRunner(spark).onData(df).run()
+ rows = {r["column"]: r for r in result.collect()}
+
+ assert rows["value"]["completeness"] == 0.0
+
+ def test_single_row(self, spark):
+ """Test profiling single row DataFrame."""
+ df = spark.createDataFrame([Row(id=1, value=100)])
+ result = ColumnProfilerRunner(spark).onData(df).run()
+ rows = {r["column"]: r for r in result.collect()}
+
+ assert rows["value"]["minimum"] == 100.0
+ assert rows["value"]["maximum"] == 100.0
+ assert rows["value"]["completeness"] == 1.0
+
+ def test_large_dataframe(self, spark):
+ """Test profiling larger DataFrame."""
+ df = spark.createDataFrame(
+ [Row(id=i, value=i * 10, category=f"cat_{i % 5}") for i in range(1000)]
+ )
+ result = ColumnProfilerRunner(spark).onData(df).run()
+ rows = {r["column"]: r for r in result.collect()}
+
+ # Allow some approximation error for HyperLogLog-based distinct count
+ assert rows["id"]["approx_distinct_values"] >= 950
+ assert rows["category"]["approx_distinct_values"] == 5
+
+
+class TestKLLParametersUnit:
+ """Unit tests for KLLParameters (no Spark needed)."""
+
+ def test_default_parameters(self):
+ """Test default KLL parameters."""
+ params = KLLParameters()
+ assert params.sketch_size == 2048
+ assert params.shrinking_factor == 0.64
+ assert params.num_buckets == 64
+
+ def test_custom_parameters(self):
+ """Test custom KLL parameters."""
+ params = KLLParameters(sketch_size=1024, shrinking_factor=0.5, num_buckets=32)
+ assert params.sketch_size == 1024
+ assert params.shrinking_factor == 0.5
+ assert params.num_buckets == 32
+
+ def test_to_proto(self):
+ """Test conversion to protobuf."""
+ params = KLLParameters(sketch_size=512, shrinking_factor=0.7, num_buckets=16)
+ proto_msg = params.to_proto()
+
+ # Proto uses snake_case field names
+ assert proto_msg.sketch_size == 512
+ assert proto_msg.shrinking_factor == 0.7
+ assert proto_msg.number_of_buckets == 16
diff --git a/tests/v2/test_suggestions.py b/tests/v2/test_suggestions.py
new file mode 100644
index 0000000..360b10b
--- /dev/null
+++ b/tests/v2/test_suggestions.py
@@ -0,0 +1,330 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for Constraint Suggestion functionality.
+
+These tests verify that the Constraint Suggestion module correctly analyzes
+DataFrame columns and suggests appropriate data quality constraints.
+"""
+
+import pytest
+from pyspark.sql import Row
+
+from pydeequ.v2.suggestions import ConstraintSuggestionRunner, Rules
+
+
+class TestBasicSuggestions:
+ """Test basic constraint suggestion generation."""
+
+ def test_default_rules_generate_suggestions(self, spark, suggestion_df):
+ """Test DEFAULT rules generate suggestions."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run()
+ )
+
+ rows = result.collect()
+ assert len(rows) > 0
+
+ # Check required columns exist
+ columns = result.columns
+ assert "column_name" in columns
+ assert "constraint_name" in columns
+ assert "code_for_constraint" in columns
+ assert "description" in columns
+ assert "suggesting_rule" in columns
+
+ def test_completeness_suggestion(self, spark, suggestion_df):
+ """Test completeness constraints are suggested for complete columns."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run()
+ )
+
+ rows = result.collect()
+ id_suggestions = [r for r in rows if r["column_name"] == "id"]
+
+ # id column is complete, should have completeness-related suggestion
+ constraint_names = [s["constraint_name"] for s in id_suggestions]
+ assert any(
+ "Complete" in name or "NotNull" in name or "Completeness" in name
+ for name in constraint_names
+ )
+
+ def test_categorical_suggestion(self, spark, suggestion_df):
+ """Test categorical constraints are suggested for low-cardinality columns."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run()
+ )
+
+ rows = result.collect()
+ status_suggestions = [r for r in rows if r["column_name"] == "status"]
+
+ constraint_names = [s["constraint_name"] for s in status_suggestions]
+ # Should suggest IsIn/Contained for categorical column (3 distinct values)
+ has_categorical = any(
+ "IsIn" in name or "Contained" in name or "Categorical" in name
+ for name in constraint_names
+ )
+ # If no categorical suggestion, at least verify we got some suggestions
+ assert has_categorical or len(constraint_names) > 0
+
+
+class TestRulesCombinations:
+ """Test different rule combinations."""
+
+ def test_numerical_rules(self, spark, suggestion_df):
+ """Test NUMERICAL rules generate statistical constraints."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.NUMERICAL)
+ .run()
+ )
+
+ rows = result.collect()
+ score_suggestions = [r for r in rows if r["column_name"] == "score"]
+
+ # Numerical rules should suggest min/max/mean constraints for numeric column
+ constraint_names = [s["constraint_name"] for s in score_suggestions]
+ has_numeric_constraint = any(
+ name in ["HasMin", "HasMax", "HasMean", "Minimum", "Maximum", "Mean"]
+ or "Min" in name
+ or "Max" in name
+ for name in constraint_names
+ )
+ # Either we have numeric constraints or the rule set is empty
+ assert has_numeric_constraint or len(rows) == 0
+
+ def test_extended_rules(self, spark, suggestion_df):
+ """Test EXTENDED rules include all rule types."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.EXTENDED)
+ .run()
+ )
+
+ extended_count = result.count()
+
+ # Extended should generate suggestions
+ assert extended_count >= 0
+
+ def test_multiple_rules_combined(self, spark, suggestion_df):
+ """Test adding multiple rule sets."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .addConstraintRules(Rules.NUMERICAL)
+ .run()
+ )
+
+ assert result.count() >= 0
+
+ def test_common_rules_uniqueness(self, spark, suggestion_df):
+ """Test COMMON rules suggest uniqueness for unique columns."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.COMMON)
+ .run()
+ )
+
+ rows = result.collect()
+ id_suggestions = [r for r in rows if r["column_name"] == "id"]
+
+ constraint_names = [s["constraint_name"] for s in id_suggestions]
+ # id column is unique, should potentially get uniqueness suggestion
+ has_unique = any("Unique" in name for name in constraint_names)
+ # If no unique suggestion, at least verify we ran without error
+ assert has_unique or len(rows) >= 0
+
+
+class TestTrainTestSplit:
+ """Test train/test split evaluation."""
+
+ def test_train_test_split_evaluation(self, spark, suggestion_df):
+ """Test suggestions are evaluated on test set."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .useTrainTestSplitWithTestsetRatio(0.3)
+ .run()
+ )
+
+ rows = result.collect()
+ # When train/test split is used, evaluation columns should exist
+ assert "evaluation_status" in result.columns
+ assert "evaluation_metric_value" in result.columns
+
+ def test_train_test_with_seed(self, spark, suggestion_df):
+ """Test reproducible train/test split with seed."""
+ result1 = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .useTrainTestSplitWithTestsetRatio(0.3, seed=42)
+ .run()
+ )
+
+ result2 = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .useTrainTestSplitWithTestsetRatio(0.3, seed=42)
+ .run()
+ )
+
+ # Same seed should produce same suggestion count
+ assert result1.count() == result2.count()
+
+ def test_train_test_invalid_ratio(self, spark, suggestion_df):
+ """Test invalid train/test ratio raises error."""
+ with pytest.raises(ValueError, match="between 0.0 and 1.0"):
+ (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .useTrainTestSplitWithTestsetRatio(1.5)
+ .run()
+ )
+
+ with pytest.raises(ValueError, match="between 0.0 and 1.0"):
+ (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .useTrainTestSplitWithTestsetRatio(0.0)
+ .run()
+ )
+
+
+class TestSuggestionOptions:
+ """Test suggestion configuration options."""
+
+ def test_restrict_to_columns(self, spark, suggestion_df):
+ """Test restricting suggestions to specific columns."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .restrictToColumns(["id", "status"])
+ .run()
+ )
+
+ rows = result.collect()
+ columns_with_suggestions = set(r["column_name"] for r in rows)
+
+ # Should only have suggestions for restricted columns
+ assert columns_with_suggestions.issubset({"id", "status"})
+
+ def test_code_for_constraint_format(self, spark, suggestion_df):
+ """Test code_for_constraint is valid Python-like syntax."""
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(suggestion_df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run()
+ )
+
+ rows = result.collect()
+ for row in rows:
+ code = row["code_for_constraint"]
+ # Should be non-empty string
+ assert code is not None
+ assert len(code) > 0
+ # Should not contain Scala-specific syntax (after conversion)
+ assert "Some(" not in code
+ assert "Seq(" not in code
+
+ def test_no_rules_raises_error(self, spark, suggestion_df):
+ """Test that running without rules raises an error."""
+ with pytest.raises(ValueError, match="At least one constraint rule"):
+ ConstraintSuggestionRunner(spark).onData(suggestion_df).run()
+
+
+class TestSuggestionEdgeCases:
+ """Test edge cases for suggestions."""
+
+ def test_single_row(self, spark):
+ """Test suggestions on single row DataFrame."""
+ df = spark.createDataFrame([Row(id=1, value="test")])
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run()
+ )
+
+ # Should handle gracefully
+ assert result.count() >= 0
+
+ def test_high_cardinality_column(self, spark):
+ """Test suggestions for high cardinality column."""
+ df = spark.createDataFrame(
+ [Row(id=i, unique_value=f"value_{i}") for i in range(100)]
+ )
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run()
+ )
+
+ rows = result.collect()
+ unique_suggestions = [r for r in rows if r["column_name"] == "unique_value"]
+
+ # Should NOT suggest IsIn for high cardinality
+ constraint_names = [s["constraint_name"] for s in unique_suggestions]
+ assert not any("IsIn" in name for name in constraint_names)
+
+ def test_all_null_column(self, spark):
+ """Test suggestions for column with all nulls."""
+ from pyspark.sql.types import IntegerType, StringType, StructField, StructType
+
+ schema = StructType(
+ [
+ StructField("id", IntegerType(), False),
+ StructField("value", StringType(), True),
+ ]
+ )
+ df = spark.createDataFrame(
+ [(1, None), (2, None), (3, None)],
+ schema=schema,
+ )
+ result = (
+ ConstraintSuggestionRunner(spark)
+ .onData(df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run()
+ )
+
+ rows = result.collect()
+ # Should handle all-null column gracefully
+ assert len(rows) >= 0
+
+
+class TestRulesEnum:
+ """Unit tests for Rules enum (no Spark needed)."""
+
+ def test_rules_values(self):
+ """Test Rules enum has expected values."""
+ assert Rules.DEFAULT.value == "DEFAULT"
+ assert Rules.STRING.value == "STRING"
+ assert Rules.NUMERICAL.value == "NUMERICAL"
+ assert Rules.COMMON.value == "COMMON"
+ assert Rules.EXTENDED.value == "EXTENDED"
+
+ def test_all_rules_defined(self):
+ """Test all expected rules are defined."""
+ expected_rules = {"DEFAULT", "STRING", "NUMERICAL", "COMMON", "EXTENDED"}
+ actual_rules = {r.value for r in Rules}
+ assert actual_rules == expected_rules
diff --git a/tests/v2/test_unit.py b/tests/v2/test_unit.py
new file mode 100644
index 0000000..766e6ef
--- /dev/null
+++ b/tests/v2/test_unit.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for PyDeequ V2 Spark Connect module.
+
+These tests verify the Python client API works correctly without
+requiring a Spark session. They test protobuf serialization of
+predicates, checks, and analyzers.
+"""
+
+import unittest
+
+from pydeequ.v2 import (
+ # Checks
+ Check,
+ CheckLevel,
+ Completeness,
+ Mean,
+ # Analyzers
+ Size,
+ between,
+ # Predicates
+ eq,
+ gte,
+ is_one,
+)
+class TestPredicates(unittest.TestCase):
+ """Test predicate serialization.
+
+ These tests use hardcoded numeric values for operator enums to detect
+ proto sync issues between deequ (source of truth) and python-deequ.
+
+ Expected values from deequ_connect.proto:
+ UNSPECIFIED = 0, EQ = 1, NE = 2, GT = 3, GE = 4, LT = 5, LE = 6, BETWEEN = 7
+ """
+
+ def test_eq_predicate(self):
+ p = eq(100)
+ proto = p.to_proto()
+ self.assertEqual(proto.operator, 1) # EQ
+ self.assertEqual(proto.value, 100.0)
+
+ def test_gte_predicate(self):
+ p = gte(0.95)
+ proto = p.to_proto()
+ self.assertEqual(proto.operator, 4) # GE
+ self.assertEqual(proto.value, 0.95)
+
+ def test_between_predicate(self):
+ p = between(10, 20)
+ proto = p.to_proto()
+ self.assertEqual(proto.operator, 7) # BETWEEN
+ self.assertEqual(proto.lower_bound, 10.0)
+ self.assertEqual(proto.upper_bound, 20.0)
+
+ def test_is_one_predicate(self):
+ p = is_one()
+ proto = p.to_proto()
+ self.assertEqual(proto.operator, 1) # EQ
+ self.assertEqual(proto.value, 1.0)
+
+
+class TestCheckBuilder(unittest.TestCase):
+ """Test Check class protobuf building."""
+
+ def test_check_with_constraints(self):
+ check = (
+ Check(CheckLevel.Error, "Test check")
+ .isComplete("id")
+ .hasCompleteness("email", gte(0.95))
+ .hasSize(eq(100))
+ )
+
+ proto = check.to_proto()
+
+ self.assertEqual(proto.level, 0) # ERROR
+ self.assertEqual(proto.description, "Test check")
+ self.assertEqual(len(proto.constraints), 3)
+
+ # Check constraint types
+ self.assertEqual(proto.constraints[0].type, "isComplete")
+ self.assertEqual(proto.constraints[0].column, "id")
+
+ self.assertEqual(proto.constraints[1].type, "hasCompleteness")
+ self.assertEqual(proto.constraints[1].column, "email")
+
+ self.assertEqual(proto.constraints[2].type, "hasSize")
+
+ def test_check_warning_level(self):
+ check = Check(CheckLevel.Warning, "Warning check")
+ proto = check.to_proto()
+ self.assertEqual(proto.level, 1) # WARNING
+
+
+class TestAnalyzerBuilder(unittest.TestCase):
+ """Test Analyzer classes protobuf building."""
+
+ def test_size_analyzer(self):
+ analyzer = Size()
+ proto = analyzer.to_proto()
+ self.assertEqual(proto.type, "Size")
+
+ def test_completeness_analyzer(self):
+ analyzer = Completeness("email")
+ proto = analyzer.to_proto()
+ self.assertEqual(proto.type, "Completeness")
+ self.assertEqual(proto.column, "email")
+
+ def test_mean_analyzer(self):
+ analyzer = Mean("amount")
+ proto = analyzer.to_proto()
+ self.assertEqual(proto.type, "Mean")
+ self.assertEqual(proto.column, "amount")
+
+ def test_analyzer_with_where(self):
+ analyzer = Size(where="status = 'active'")
+ proto = analyzer.to_proto()
+ self.assertEqual(proto.type, "Size")
+ self.assertEqual(proto.where, "status = 'active'")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tutorials/data_quality_example_v2.py b/tutorials/data_quality_example_v2.py
new file mode 100644
index 0000000..2f343c6
--- /dev/null
+++ b/tutorials/data_quality_example_v2.py
@@ -0,0 +1,289 @@
+#!/usr/bin/env python3
+"""
+Testing Data Quality at Scale with PyDeequ V2
+
+This example demonstrates the PyDeequ 2.0 API using Spark Connect.
+It covers data analysis, constraint verification, column profiling,
+and constraint suggestions.
+
+Prerequisites:
+1. Start the Spark Connect server with the Deequ plugin:
+
+ $SPARK_HOME/sbin/start-connect-server.sh \
+ --packages org.apache.spark:spark-connect_2.12:3.5.0 \
+ --jars /path/to/deequ_2.12-2.1.0b-spark-3.5.jar \
+ --conf spark.connect.extensions.relation.classes=com.amazon.deequ.connect.DeequRelationPlugin
+
+2. Run this script:
+ SPARK_REMOTE=sc://localhost:15002 python data_quality_example_v2.py
+"""
+
+import os
+from pyspark.sql import SparkSession, Row
+
+# PyDeequ V2 imports
+from pydeequ.v2.analyzers import (
+ Size,
+ Completeness,
+ Distinctness,
+ Mean,
+ Minimum,
+ Maximum,
+ StandardDeviation,
+ Correlation,
+)
+from pydeequ.v2.checks import Check, CheckLevel
+from pydeequ.v2.verification import AnalysisRunner, VerificationSuite
+from pydeequ.v2.predicates import eq, gte, lte, between
+from pydeequ.v2.profiles import ColumnProfilerRunner
+from pydeequ.v2.suggestions import ConstraintSuggestionRunner, Rules
+
+
+def create_sample_data(spark: SparkSession):
+ """Create a sample product reviews dataset for demonstration."""
+ data = [
+ # Normal reviews
+ Row(review_id="R001", customer_id="C100", product_id="P001",
+ marketplace="US", star_rating=5, helpful_votes=10, total_votes=12,
+ review_year=2023, product_title="Great Product", insight="Y"),
+ Row(review_id="R002", customer_id="C101", product_id="P002",
+ marketplace="US", star_rating=4, helpful_votes=8, total_votes=10,
+ review_year=2023, product_title="Good Value", insight="Y"),
+ Row(review_id="R003", customer_id="C102", product_id="P001",
+ marketplace="UK", star_rating=5, helpful_votes=15, total_votes=18,
+ review_year=2022, product_title="Great Product", insight="N"),
+ Row(review_id="R004", customer_id="C103", product_id="P003",
+ marketplace="DE", star_rating=3, helpful_votes=5, total_votes=8,
+ review_year=2022, product_title="Decent Item", insight="Y"),
+ Row(review_id="R005", customer_id="C104", product_id="P002",
+ marketplace="FR", star_rating=4, helpful_votes=12, total_votes=15,
+ review_year=2021, product_title="Good Value", insight="N"),
+ Row(review_id="R006", customer_id="C105", product_id="P004",
+ marketplace="JP", star_rating=5, helpful_votes=20, total_votes=22,
+ review_year=2023, product_title="Excellent!", insight="Y"),
+ Row(review_id="R007", customer_id="C106", product_id="P001",
+ marketplace="US", star_rating=2, helpful_votes=3, total_votes=10,
+ review_year=2020, product_title="Great Product", insight="N"),
+ Row(review_id="R008", customer_id="C107", product_id="P005",
+ marketplace="UK", star_rating=1, helpful_votes=25, total_votes=30,
+ review_year=2021, product_title="Disappointing", insight="Y"),
+ # Review with missing marketplace (data quality issue)
+ Row(review_id="R009", customer_id="C108", product_id="P002",
+ marketplace=None, star_rating=4, helpful_votes=7, total_votes=9,
+ review_year=2023, product_title="Good Value", insight="Y"),
+ # Duplicate review_id (data quality issue)
+ Row(review_id="R001", customer_id="C109", product_id="P003",
+ marketplace="US", star_rating=3, helpful_votes=4, total_votes=6,
+ review_year=2022, product_title="Decent Item", insight="N"),
+ ]
+ return spark.createDataFrame(data)
+
+
+def run_data_analysis(spark: SparkSession, df):
+ """
+ Run data analysis using AnalysisRunner.
+
+ This demonstrates computing various metrics on the dataset:
+ - Size: Total row count
+ - Completeness: Ratio of non-null values
+ - Distinctness: Ratio of distinct values
+ - Mean, Min, Max: Statistical measures
+ - Correlation: Relationship between columns
+ """
+ print("\n" + "=" * 60)
+ print("DATA ANALYSIS")
+ print("=" * 60)
+
+ result = (AnalysisRunner(spark)
+ .onData(df)
+ .addAnalyzer(Size())
+ .addAnalyzer(Completeness("review_id"))
+ .addAnalyzer(Completeness("marketplace"))
+ .addAnalyzer(Distinctness("review_id"))
+ .addAnalyzer(Mean("star_rating"))
+ .addAnalyzer(Minimum("star_rating"))
+ .addAnalyzer(Maximum("star_rating"))
+ .addAnalyzer(StandardDeviation("star_rating"))
+ .addAnalyzer(Correlation("total_votes", "helpful_votes"))
+ .run())
+
+ print("\nAnalysis Results:")
+ result.show(truncate=False)
+
+ # Extract key insights
+ rows = result.collect()
+ metrics = {(r["name"], r["instance"]): r["value"] for r in rows}
+
+ print("\nKey Insights:")
+ print(f" - Dataset contains {int(metrics.get(('Size', '*'), 0))} reviews")
+ print(f" - review_id completeness: {metrics.get(('Completeness', 'review_id'), 0):.1%}")
+ print(f" - marketplace completeness: {metrics.get(('Completeness', 'marketplace'), 0):.1%}")
+ print(f" - review_id distinctness: {metrics.get(('Distinctness', 'review_id'), 0):.1%}")
+ print(f" - Average star rating: {metrics.get(('Mean', 'star_rating'), 0):.2f}")
+ print(f" - Star rating range: {metrics.get(('Minimum', 'star_rating'), 0):.0f} - {metrics.get(('Maximum', 'star_rating'), 0):.0f}")
+
+ return result
+
+
+def run_constraint_verification(spark: SparkSession, df):
+ """
+ Run constraint verification using VerificationSuite.
+
+ This demonstrates defining and verifying data quality rules:
+ - Size checks
+ - Completeness checks
+ - Uniqueness checks
+ - Range checks (min/max)
+ - Categorical value checks
+ """
+ print("\n" + "=" * 60)
+ print("CONSTRAINT VERIFICATION")
+ print("=" * 60)
+
+ # Define checks using the V2 predicate API
+ # Note: In V2, we use predicates like eq(), gte(), between() instead of lambdas
+ check = (Check(CheckLevel.Warning, "Product Reviews Quality Check")
+ # Size check: at least 5 reviews
+ .hasSize(gte(5))
+ # Completeness checks
+ .isComplete("review_id")
+ .isComplete("customer_id")
+ .hasCompleteness("marketplace", gte(0.8)) # Allow some missing
+ # Uniqueness check
+ .isUnique("review_id")
+ # Star rating range check
+ .hasMin("star_rating", eq(1.0))
+ .hasMax("star_rating", eq(5.0))
+ .hasMean("star_rating", between(1.0, 5.0))
+ # Year range check
+ .hasMin("review_year", gte(2015))
+ .hasMax("review_year", lte(2025))
+ # Categorical check
+ .isContainedIn("marketplace", ["US", "UK", "DE", "JP", "FR"])
+ .isContainedIn("insight", ["Y", "N"])
+ )
+
+ result = (VerificationSuite(spark)
+ .onData(df)
+ .addCheck(check)
+ .run())
+
+ print("\nVerification Results:")
+ result.show(truncate=False)
+
+ # Summarize results
+ rows = result.collect()
+ passed = sum(1 for r in rows if r["constraint_status"] == "Success")
+ failed = sum(1 for r in rows if r["constraint_status"] == "Failure")
+
+ print(f"\nSummary: {passed} passed, {failed} failed out of {len(rows)} constraints")
+
+ if failed > 0:
+ print("\nFailed Constraints:")
+ for r in rows:
+ if r["constraint_status"] == "Failure":
+ print(f" - {r['constraint']}")
+ if r["constraint_message"]:
+ print(f" Message: {r['constraint_message']}")
+
+ return result
+
+
+def run_column_profiling(spark: SparkSession, df):
+ """
+ Run column profiling using ColumnProfilerRunner.
+
+ This automatically computes statistics for each column:
+ - Completeness
+ - Approximate distinct values
+ - Data type detection
+ - Numeric statistics (mean, min, max, etc.)
+ - Optional: KLL sketches for approximate quantiles
+ """
+ print("\n" + "=" * 60)
+ print("COLUMN PROFILING")
+ print("=" * 60)
+
+ result = (ColumnProfilerRunner(spark)
+ .onData(df)
+ .withLowCardinalityHistogramThreshold(10) # Generate histograms for low-cardinality columns
+ .run())
+
+ print("\nColumn Profiles:")
+ # Show selected columns for readability
+ result.select(
+ "column", "completeness", "approx_distinct_values",
+ "data_type", "mean", "minimum", "maximum"
+ ).show(truncate=False)
+
+ return result
+
+
+def run_constraint_suggestions(spark: SparkSession, df):
+ """
+ Run automated constraint suggestion using ConstraintSuggestionRunner.
+
+ This analyzes the data and suggests appropriate constraints:
+ - Completeness constraints for complete columns
+ - Uniqueness constraints for unique columns
+ - Categorical range constraints for low-cardinality columns
+ - Non-negative constraints for numeric columns
+ """
+ print("\n" + "=" * 60)
+ print("CONSTRAINT SUGGESTIONS")
+ print("=" * 60)
+
+ result = (ConstraintSuggestionRunner(spark)
+ .onData(df)
+ .addConstraintRules(Rules.DEFAULT)
+ .run())
+
+ print("\nSuggested Constraints:")
+ result.select(
+ "column_name", "constraint_name", "description", "code_for_constraint"
+ ).show(truncate=False)
+
+ # Count suggestions per column
+ rows = result.collect()
+ print(f"\nTotal suggestions: {len(rows)}")
+
+ return result
+
+
+def main():
+ # Get Spark Connect URL from environment
+ spark_remote = os.environ.get("SPARK_REMOTE", "sc://localhost:15002")
+
+ print("PyDeequ V2 Data Quality Example")
+ print(f"Connecting to: {spark_remote}")
+
+ # Create Spark Connect session
+ spark = SparkSession.builder.remote(spark_remote).getOrCreate()
+
+ try:
+ # Create sample data
+ print("\nCreating sample product reviews dataset...")
+ df = create_sample_data(spark)
+
+ print("\nDataset Schema:")
+ df.printSchema()
+
+ print("\nSample Data:")
+ df.show(truncate=False)
+
+ # Run all examples
+ run_data_analysis(spark, df)
+ run_constraint_verification(spark, df)
+ run_column_profiling(spark, df)
+ run_constraint_suggestions(spark, df)
+
+ print("\n" + "=" * 60)
+ print("EXAMPLE COMPLETE")
+ print("=" * 60)
+
+ finally:
+ spark.stop()
+
+
+if __name__ == "__main__":
+ main()