@@ -199,6 +199,7 @@ def done(self, *nodes):
199199 self ._ready_nodes .append (successor )
200200 self ._nfinished += 1
201201
202+ # See note "On Finding Cycles" at the bottom.
202203 def _find_cycle (self ):
203204 n2i = self ._node2info
204205 stack = []
@@ -212,8 +213,6 @@ def _find_cycle(self):
212213
213214 while True :
214215 if node in seen :
215- # If we have seen already the node and is in the
216- # current stack we have found a cycle.
217216 if node in node2stacki :
218217 return stack [node2stacki [node ] :] + [node ]
219218 # else go on to get next successor
@@ -228,11 +227,15 @@ def _find_cycle(self):
228227 while stack :
229228 try :
230229 node = itstack [- 1 ]()
231- break
230+ break # resume at top of "while True:"
232231 except StopIteration :
232+ # no more successors; pop the stack
233+ # and continue looking up
233234 del node2stacki [stack .pop ()]
234235 itstack .pop ()
235236 else :
237+ # stack is empty; look for a fresh node to
238+ # start over from (a node not yet in seen)
236239 break
237240 return None
238241
@@ -252,3 +255,55 @@ def static_order(self):
252255 self .done (* node_group )
253256
254257 __class_getitem__ = classmethod (GenericAlias )
258+
259+
260+ # On Finding Cycles
261+ # -----------------
262+ # There is a (at least one) total order if and only if the graph is
263+ # acyclic.
264+ #
265+ # When it is cyclic, "there's a cycle - somewhere!" isn't very helpful.
266+ # In theory, it would be most helpful to partition the graph into
267+ # strongly connected components (SCCs) and display those with more than
268+ # one node. Then all cycles could easily be identified "by eyeball".
269+ #
270+ # That's a lot of work, though, and we can get most of the benefit much
271+ # more easily just by showing a single specific cycle.
272+ #
273+ # Approaches to that are based on breadth first or depth first search
274+ # (BFS or DFS). BFS is most natural, which can easily be arranged to
275+ # find a shortest-possible cycle. But memory burden can be high, because
276+ # every path-in-progress has to keep its own idea of what "the path" is
277+ # so far.
278+ #
279+ # DFS is much easier on RAM, only requiring keeping track of _the_ path
280+ # from the starting node to the current node at the current recursion
281+ # level. But there may be any number of nodes, and so there's no bound
282+ # on recursion depth short of the total number of nodes.
283+ #
284+ # So we use an iterative version of DFS, keeping an exploit list
285+ # (`stack`) of the path so far. A parallel stack (`itstack`) holds the
286+ # `__next__` method of an iterator over the current level's node's
287+ # successors, so when backtracking to a shallower level we can just call
288+ # that to get the node's next successor. This is state that a recursive
289+ # version would implicitly store in a `for` loop's internals.
290+ #
291+ # `seen()` is a set recording which nodes have already been, at some
292+ # time, pushed on the stack. If a node has been pushed on the stack, DFS
293+ # will find any cycle it's part of, so there's no need to ever look at
294+ # it again.
295+ #
296+ # Finally, `node2stacki` maps a node to its index on the current stack,
297+ # for and only for nodes currently _on_ the stack. If a successor to be
298+ # pushed on the stack is in that dict, the node is already on the path,
299+ # at that index. The cycle is then `stack[that_index :] + [node]`.
300+ #
301+ # As is often the case when removing recursion, the control flow looks a
302+ # bit off. The "while True:" loop here rarely actually loops - it's only
303+ # looking to go "up the stack" until finding a level that has another
304+ # successor to consider, emulating a chain of returns in a recursive
305+ # version.
306+ #
307+ # Worst cases: O(V+E) for time, and O(V) for memory, where V is the
308+ # number of nodes and E the number of edges (which may be quadratic in
309+ # V!). It requires care to ensure these bounds are met.
0 commit comments