Skip to content

Commit 76d3207

Browse files
committed
permit up to 99% assume() failures #4623
1 parent 800b401 commit 76d3207

File tree

7 files changed

+50
-21
lines changed

7 files changed

+50
-21
lines changed

AUTHORS.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ their individual contributions.
88

99
.. NOTE - this list is in alphabetical order by first name (or handle).
1010
11+
* `A. Jesse Jiryu Davis <https://github.com/ajdavis>`_
1112
* `Aaron Meurer <https://github.com/asmeurer>`_
1213
* `Adam Johnson <https://github.com/adamchainz>`_
1314
* `Adam Matan <https://github.com/adamatan/adamatan>_`

hypothesis-python/RELEASE.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
RELEASE_TYPE: patch
2+
3+
This patch makes Hypothesis more tolerant of slow-to-satisfy ``assume()`` calls.
4+
Previously, Hypothesis would give up after ``max_examples * 10`` attempts; now it
5+
uses a statistical test to stop only when 99% confident that <1% of examples
6+
would pass (:issue:`4623`).
7+
8+
Thanks to @ajdavis for this improvement!

hypothesis-python/src/hypothesis/internal/conjecture/engine.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -156,11 +156,26 @@ def timing_report(self) -> str:
156156
return "\n".join(out)
157157

158158

159+
# Statistical thresholds for assumption satisfaction rate.
160+
# We want to stop when we're 99% confident the true valid rate is below 1%.
161+
#
162+
# With k valid examples, we need n invalid examples such that:
163+
# P(seeing <=k valid in n+k trials | true rate = 1%) <= 1%
164+
#
165+
# For k=0: (0.99)^n <= 0.01 → n >= ln(0.01)/ln(0.99) ~= 459
166+
# Each additional valid example adds ~153 to the threshold (solving the
167+
# cumulative binomial for subsequent k values).
168+
#
169+
# Formula: stop when invalid_examples > INVALID_THRESHOLD_BASE + INVALID_PER_VALID * valid_examples
170+
INVALID_THRESHOLD_BASE = 459
171+
INVALID_PER_VALID = 153
172+
173+
159174
class ExitReason(Enum):
160175
max_examples = "settings.max_examples={s.max_examples}"
161176
max_iterations = (
162177
"settings.max_examples={s.max_examples}, "
163-
"but < 10% of examples satisfied assumptions"
178+
"but < 1% of examples satisfied assumptions"
164179
)
165180
max_shrinks = f"shrunk example {MAX_SHRINKS} times"
166181
finished = "nothing left to do"
@@ -713,12 +728,11 @@ def test_function(self, data: ConjectureData) -> None:
713728
# while in the other case below we just want to move on to shrinking.)
714729
if self.valid_examples >= self.settings.max_examples:
715730
self.exit_with(ExitReason.max_examples)
716-
if self.call_count >= max(
717-
self.settings.max_examples * 10,
718-
# We have a high-ish default max iterations, so that tests
719-
# don't become flaky when max_examples is too low.
720-
1000,
721-
):
731+
# Stop when we're 99% confident the true valid rate is below 1%.
732+
invalid_threshold = (
733+
INVALID_THRESHOLD_BASE + INVALID_PER_VALID * self.valid_examples
734+
)
735+
if (self.invalid_examples + self.overrun_examples) > invalid_threshold:
722736
self.exit_with(ExitReason.max_iterations)
723737

724738
if self.__tree_is_exhausted():
@@ -1077,8 +1091,12 @@ def should_generate_more(self) -> bool:
10771091
# but with the important distinction that this clause will move on to
10781092
# the shrinking phase having found one or more bugs, while the other
10791093
# will exit having found zero bugs.
1080-
if self.valid_examples >= self.settings.max_examples or self.call_count >= max(
1081-
self.settings.max_examples * 10, 1000
1094+
invalid_threshold = (
1095+
INVALID_THRESHOLD_BASE + INVALID_PER_VALID * self.valid_examples
1096+
)
1097+
if (
1098+
self.valid_examples >= self.settings.max_examples
1099+
or (self.invalid_examples + self.overrun_examples) > invalid_threshold
10821100
): # pragma: no cover
10831101
return False
10841102

hypothesis-python/tests/conjecture/test_engine.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from hypothesis.internal.conjecture.data import ConjectureData, Overrun, Status
3737
from hypothesis.internal.conjecture.datatree import compute_max_children
3838
from hypothesis.internal.conjecture.engine import (
39+
INVALID_THRESHOLD_BASE,
3940
MIN_TEST_CALLS,
4041
ConjectureRunner,
4142
ExitReason,
@@ -1215,11 +1216,11 @@ def test(data):
12151216

12161217

12171218
def test_shrink_after_max_iterations():
1218-
"""If we find a bug, keep looking for more, and then hit the test call
1219-
limit, we should still proceed to shrinking.
1219+
"""If we find a bug, keep looking for more, and then hit the invalid
1220+
examples limit, we should still proceed to shrinking.
12201221
"""
12211222
max_examples = 10
1222-
max_iterations = max_examples * 10
1223+
max_iterations = INVALID_THRESHOLD_BASE
12231224
fail_at = max_iterations - 5
12241225

12251226
invalid = set()

hypothesis-python/tests/cover/test_testdecorators.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
strategies as st,
2626
)
2727
from hypothesis.errors import Unsatisfiable
28+
from hypothesis.internal.conjecture.engine import INVALID_THRESHOLD_BASE
2829
from hypothesis.strategies import (
2930
binary,
3031
booleans,
@@ -507,8 +508,8 @@ def f(v):
507508
with pytest.raises(
508509
Unsatisfiable,
509510
match=(
510-
r"Unable to satisfy assumptions of f\. 1000 of 1000 examples "
511-
r"failed a \.filter\(\) or assume\(\)"
511+
rf"Unable to satisfy assumptions of f\. {INVALID_THRESHOLD_BASE+1} of "
512+
rf"{INVALID_THRESHOLD_BASE+1} examples failed a \.filter\(\) or assume\(\)"
512513
),
513514
):
514515
f()
@@ -532,8 +533,8 @@ def f(v):
532533
pass
533534

534535
match = (
535-
r"1000 of 1000 examples were too large to finish generating; try "
536-
r"reducing the typical size of your inputs\?"
536+
rf"{INVALID_THRESHOLD_BASE+1} of {INVALID_THRESHOLD_BASE+1} examples were too large to"
537+
rf" finish generating; try reducing the typical size of your inputs\?"
537538
)
538539
with (
539540
pytest.raises(Unsatisfiable, match=match),

hypothesis-python/tests/nocover/test_conjecture_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def test_lot_of_dead_nodes():
2828
@run_to_nodes
2929
def nodes(data):
3030
for i in range(4):
31-
if data.draw_integer(0, 2**8 - 1) != i:
31+
if data.draw_integer(0, 2**7 - 1) != i:
3232
data.mark_invalid()
3333
data.mark_interesting(interesting_origin())
3434

hypothesis-python/tests/pytest/test_statistics.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,21 +53,21 @@ def test_prints_statistics_given_option(testdir):
5353
out = get_output(testdir, TESTSUITE, PRINT_STATISTICS_OPTION)
5454
assert "Hypothesis Statistics" in out
5555
assert "max_examples=100" in out
56-
assert "< 10% of examples satisfied assumptions" in out
56+
assert "< 1% of examples satisfied assumptions" in out
5757

5858

5959
def test_prints_statistics_given_option_under_xdist(testdir):
6060
out = get_output(testdir, TESTSUITE, PRINT_STATISTICS_OPTION, "-n", "2")
6161
assert "Hypothesis Statistics" in out
6262
assert "max_examples=100" in out
63-
assert "< 10% of examples satisfied assumptions" in out
63+
assert "< 1% of examples satisfied assumptions" in out
6464

6565

6666
def test_prints_statistics_given_option_with_junitxml(testdir):
6767
out = get_output(testdir, TESTSUITE, PRINT_STATISTICS_OPTION, "--junit-xml=out.xml")
6868
assert "Hypothesis Statistics" in out
6969
assert "max_examples=100" in out
70-
assert "< 10% of examples satisfied assumptions" in out
70+
assert "< 1% of examples satisfied assumptions" in out
7171

7272

7373
@skipif_threading
@@ -80,7 +80,7 @@ def test_prints_statistics_given_option_under_xdist_with_junitxml(testdir):
8080
)
8181
assert "Hypothesis Statistics" in out
8282
assert "max_examples=100" in out
83-
assert "< 10% of examples satisfied assumptions" in out
83+
assert "< 1% of examples satisfied assumptions" in out
8484

8585

8686
UNITTEST_TESTSUITE = """

0 commit comments

Comments
 (0)