@@ -156,11 +156,26 @@ def timing_report(self) -> str:
156156 return "\n " .join (out )
157157
158158
159+ # Statistical thresholds for assumption satisfaction rate.
160+ # We want to stop when we're 99% confident the true valid rate is below 1%.
161+ #
162+ # With k valid examples, we need n invalid examples such that:
163+ # P(seeing <=k valid in n+k trials | true rate = 1%) <= 1%
164+ #
165+ # For k=0: (0.99)^n <= 0.01 → n >= ln(0.01)/ln(0.99) ~= 459
166+ # Each additional valid example adds ~153 to the threshold (solving the
167+ # cumulative binomial for subsequent k values).
168+ #
169+ # Formula: stop when invalid_examples > INVALID_THRESHOLD_BASE + INVALID_PER_VALID * valid_examples
170+ INVALID_THRESHOLD_BASE = 459
171+ INVALID_PER_VALID = 153
172+
173+
159174class ExitReason (Enum ):
160175 max_examples = "settings.max_examples={s.max_examples}"
161176 max_iterations = (
162177 "settings.max_examples={s.max_examples}, "
163- "but < 10 % of examples satisfied assumptions"
178+ "but < 1 % of examples satisfied assumptions"
164179 )
165180 max_shrinks = f"shrunk example { MAX_SHRINKS } times"
166181 finished = "nothing left to do"
@@ -713,12 +728,11 @@ def test_function(self, data: ConjectureData) -> None:
713728 # while in the other case below we just want to move on to shrinking.)
714729 if self .valid_examples >= self .settings .max_examples :
715730 self .exit_with (ExitReason .max_examples )
716- if self .call_count >= max (
717- self .settings .max_examples * 10 ,
718- # We have a high-ish default max iterations, so that tests
719- # don't become flaky when max_examples is too low.
720- 1000 ,
721- ):
731+ # Stop when we're 99% confident the true valid rate is below 1%.
732+ invalid_threshold = (
733+ INVALID_THRESHOLD_BASE + INVALID_PER_VALID * self .valid_examples
734+ )
735+ if (self .invalid_examples + self .overrun_examples ) > invalid_threshold :
722736 self .exit_with (ExitReason .max_iterations )
723737
724738 if self .__tree_is_exhausted ():
@@ -1077,8 +1091,12 @@ def should_generate_more(self) -> bool:
10771091 # but with the important distinction that this clause will move on to
10781092 # the shrinking phase having found one or more bugs, while the other
10791093 # will exit having found zero bugs.
1080- if self .valid_examples >= self .settings .max_examples or self .call_count >= max (
1081- self .settings .max_examples * 10 , 1000
1094+ invalid_threshold = (
1095+ INVALID_THRESHOLD_BASE + INVALID_PER_VALID * self .valid_examples
1096+ )
1097+ if (
1098+ self .valid_examples >= self .settings .max_examples
1099+ or (self .invalid_examples + self .overrun_examples ) > invalid_threshold
10821100 ): # pragma: no cover
10831101 return False
10841102
0 commit comments