Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ their individual contributions.

.. NOTE - this list is in alphabetical order by first name (or handle).

* `A. Jesse Jiryu Davis <https://github.com/ajdavis>`_
* `Aaron Meurer <https://github.com/asmeurer>`_
* `Adam Johnson <https://github.com/adamchainz>`_
* `Adam Matan <https://github.com/adamatan/adamatan>_`
Expand Down
8 changes: 8 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
RELEASE_TYPE: patch

This patch makes Hypothesis more tolerant of slow-to-satisfy ``assume()`` calls.
Previously, Hypothesis would give up after ``max_examples * 10`` attempts; now it
uses a statistical test to stop only when 99% confident that <1% of examples
would pass (:issue:`4623`).

Thanks to @ajdavis for this improvement!
40 changes: 31 additions & 9 deletions hypothesis-python/src/hypothesis/internal/conjecture/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,30 @@ def timing_report(self) -> str:
return "\n".join(out)


# Stop when 99% confident the true valid rate is below 1%.
# For k valid examples, we need n invalid such that:
# P(seeing <= k valid in n+k trials | rate=1%) <= 1%
# k=0: (0.99)^n <= 0.01 -> n >= ln(0.01)/ln(0.99)
# Each additional valid example adds ~ln(0.01)/ln(0.99)/3 to threshold.
def _calculate_thresholds(
confidence: float = 0.99, min_valid_rate: float = 0.01
) -> tuple[int, int]:
log_confidence = math.log(1 - confidence)
log_invalid_rate = math.log(1 - min_valid_rate)
base = math.ceil(log_confidence / log_invalid_rate)
# Approximate increase per valid example (from binomial CDF)
per_valid = math.ceil(base / 3)
return base, per_valid


INVALID_THRESHOLD_BASE, INVALID_PER_VALID = _calculate_thresholds()


class ExitReason(Enum):
max_examples = "settings.max_examples={s.max_examples}"
max_iterations = (
"settings.max_examples={s.max_examples}, "
"but < 10% of examples satisfied assumptions"
"but < 1% of examples satisfied assumptions"
)
max_shrinks = f"shrunk example {MAX_SHRINKS} times"
finished = "nothing left to do"
Expand Down Expand Up @@ -713,12 +732,11 @@ def test_function(self, data: ConjectureData) -> None:
# while in the other case below we just want to move on to shrinking.)
if self.valid_examples >= self.settings.max_examples:
self.exit_with(ExitReason.max_examples)
if self.call_count >= max(
self.settings.max_examples * 10,
# We have a high-ish default max iterations, so that tests
# don't become flaky when max_examples is too low.
1000,
):
# Stop when we're 99% confident the true valid rate is below 1%.
invalid_threshold = (
INVALID_THRESHOLD_BASE + INVALID_PER_VALID * self.valid_examples
)
if (self.invalid_examples + self.overrun_examples) > invalid_threshold:
self.exit_with(ExitReason.max_iterations)

if self.__tree_is_exhausted():
Expand Down Expand Up @@ -1077,8 +1095,12 @@ def should_generate_more(self) -> bool:
# but with the important distinction that this clause will move on to
# the shrinking phase having found one or more bugs, while the other
# will exit having found zero bugs.
if self.valid_examples >= self.settings.max_examples or self.call_count >= max(
self.settings.max_examples * 10, 1000
invalid_threshold = (
INVALID_THRESHOLD_BASE + INVALID_PER_VALID * self.valid_examples
)
if (
self.valid_examples >= self.settings.max_examples
or (self.invalid_examples + self.overrun_examples) > invalid_threshold
): # pragma: no cover
return False

Expand Down
54 changes: 51 additions & 3 deletions hypothesis-python/tests/conjecture/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
from hypothesis.internal.conjecture.data import ConjectureData, Overrun, Status
from hypothesis.internal.conjecture.datatree import compute_max_children
from hypothesis.internal.conjecture.engine import (
INVALID_PER_VALID,
INVALID_THRESHOLD_BASE,
MIN_TEST_CALLS,
ConjectureRunner,
ExitReason,
Expand Down Expand Up @@ -883,6 +885,52 @@ def f(data):
assert runner.exit_reason == ExitReason.max_iterations


def test_max_iterations_with_all_invalid():
# With assume(False) on every example, we stop after INVALID_THRESHOLD_BASE + 1
# invalid attempts (the check is > not >=).
def f(data):
data.draw_integer(0, 2**64 - 1)
data.mark_invalid()

runner = ConjectureRunner(
f,
settings=settings(
max_examples=10_000, database=None, suppress_health_check=list(HealthCheck)
),
)
runner.run()

assert runner.call_count == INVALID_THRESHOLD_BASE + 1
assert runner.exit_reason == ExitReason.max_iterations


@pytest.mark.parametrize("n_valid", [1, 2, 5])
def test_max_iterations_with_some_valid(n_valid):
valid_count = 0

def f(data):
nonlocal valid_count
data.draw_integer(0, 2**64 - 1)
if valid_count < n_valid:
valid_count += 1
else:
data.mark_invalid()

runner = ConjectureRunner(
f,
settings=settings(
max_examples=10_000, database=None, suppress_health_check=list(HealthCheck)
),
)
runner.run()

assert (
runner.call_count
== n_valid + INVALID_THRESHOLD_BASE + n_valid * INVALID_PER_VALID + 1
)
assert runner.exit_reason == ExitReason.max_iterations


def test_exit_because_shrink_phase_timeout(monkeypatch):
val = 0

Expand Down Expand Up @@ -1215,11 +1263,11 @@ def test(data):


def test_shrink_after_max_iterations():
"""If we find a bug, keep looking for more, and then hit the test call
limit, we should still proceed to shrinking.
"""If we find a bug, keep looking for more, and then hit the invalid
examples limit, we should still proceed to shrinking.
"""
max_examples = 10
max_iterations = max_examples * 10
max_iterations = INVALID_THRESHOLD_BASE
fail_at = max_iterations - 5

invalid = set()
Expand Down
9 changes: 5 additions & 4 deletions hypothesis-python/tests/cover/test_testdecorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
strategies as st,
)
from hypothesis.errors import Unsatisfiable
from hypothesis.internal.conjecture.engine import INVALID_THRESHOLD_BASE
from hypothesis.strategies import (
binary,
booleans,
Expand Down Expand Up @@ -507,8 +508,8 @@ def f(v):
with pytest.raises(
Unsatisfiable,
match=(
r"Unable to satisfy assumptions of f\. 1000 of 1000 examples "
r"failed a \.filter\(\) or assume\(\)"
rf"Unable to satisfy assumptions of f\. {INVALID_THRESHOLD_BASE+1} of "
rf"{INVALID_THRESHOLD_BASE+1} examples failed a \.filter\(\) or assume\(\)"
),
):
f()
Expand All @@ -532,8 +533,8 @@ def f(v):
pass

match = (
r"1000 of 1000 examples were too large to finish generating; try "
r"reducing the typical size of your inputs\?"
rf"{INVALID_THRESHOLD_BASE+1} of {INVALID_THRESHOLD_BASE+1} examples were too large to"
rf" finish generating; try reducing the typical size of your inputs\?"
)
with (
pytest.raises(Unsatisfiable, match=match),
Expand Down
2 changes: 1 addition & 1 deletion hypothesis-python/tests/nocover/test_conjecture_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_lot_of_dead_nodes():
@run_to_nodes
def nodes(data):
for i in range(4):
if data.draw_integer(0, 2**8 - 1) != i:
if data.draw_integer(0, 2**7 - 1) != i:
data.mark_invalid()
data.mark_interesting(interesting_origin())

Expand Down
8 changes: 4 additions & 4 deletions hypothesis-python/tests/pytest/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,21 +53,21 @@ def test_prints_statistics_given_option(testdir):
out = get_output(testdir, TESTSUITE, PRINT_STATISTICS_OPTION)
assert "Hypothesis Statistics" in out
assert "max_examples=100" in out
assert "< 10% of examples satisfied assumptions" in out
assert "< 1% of examples satisfied assumptions" in out


def test_prints_statistics_given_option_under_xdist(testdir):
out = get_output(testdir, TESTSUITE, PRINT_STATISTICS_OPTION, "-n", "2")
assert "Hypothesis Statistics" in out
assert "max_examples=100" in out
assert "< 10% of examples satisfied assumptions" in out
assert "< 1% of examples satisfied assumptions" in out


def test_prints_statistics_given_option_with_junitxml(testdir):
out = get_output(testdir, TESTSUITE, PRINT_STATISTICS_OPTION, "--junit-xml=out.xml")
assert "Hypothesis Statistics" in out
assert "max_examples=100" in out
assert "< 10% of examples satisfied assumptions" in out
assert "< 1% of examples satisfied assumptions" in out


@skipif_threading
Expand All @@ -80,7 +80,7 @@ def test_prints_statistics_given_option_under_xdist_with_junitxml(testdir):
)
assert "Hypothesis Statistics" in out
assert "max_examples=100" in out
assert "< 10% of examples satisfied assumptions" in out
assert "< 1% of examples satisfied assumptions" in out


UNITTEST_TESTSUITE = """
Expand Down