@@ -187,19 +187,23 @@ func (w *When) PromoteRollout() *When {
187187 if w .rollout == nil {
188188 w .t .Fatal ("Rollout not set" )
189189 }
190+ w .waitForPauseConditionsSet ()
190191 _ , err := promote .PromoteRollout (w .rolloutClient .ArgoprojV1alpha1 ().Rollouts (w .namespace ), w .rollout .GetName (), false , false , false )
191192 w .CheckError (err )
192193 w .log .Info ("Promoted rollout" )
194+ w .clearControllerPauseIfNeeded ()
193195 return w
194196}
195197
196198func (w * When ) PromoteRolloutFull () * When {
197199 if w .rollout == nil {
198200 w .t .Fatal ("Rollout not set" )
199201 }
202+ w .waitForPauseConditionsSet ()
200203 _ , err := promote .PromoteRollout (w .rolloutClient .ArgoprojV1alpha1 ().Rollouts (w .namespace ), w .rollout .GetName (), false , false , true )
201204 w .CheckError (err )
202205 w .log .Info ("Promoted rollout fully" )
206+ w .clearControllerPauseIfNeeded ()
203207 return w
204208}
205209
@@ -729,6 +733,109 @@ func (w *When) StopLoad() *When {
729733 return w
730734}
731735
736+ // waitForPauseConditionsSet waits for the controller to finish setting up the pause state.
737+ // This ensures pauseConditions is populated when controllerPause is true, which indicates
738+ // the controller has completed its reconciliation and is ready for a promote.
739+ func (w * When ) waitForPauseConditionsSet () {
740+ rolloutIf := w .rolloutClient .ArgoprojV1alpha1 ().Rollouts (w .namespace )
741+ err := retryutil .OnError (retryutil .DefaultBackoff , func (err error ) bool {
742+ return true
743+ }, func () error {
744+ ro , err := rolloutIf .Get (w .Context , w .rollout .GetName (), metav1.GetOptions {})
745+ if err != nil {
746+ return err
747+ }
748+ if ro .Status .ObservedGeneration != strconv .FormatInt (ro .Generation , 10 ) {
749+ return fmt .Errorf ("waiting for observedGeneration (%s) to match generation (%d)" , ro .Status .ObservedGeneration , ro .Generation )
750+ }
751+ if ro .Status .ControllerPause && len (ro .Status .PauseConditions ) == 0 {
752+ return fmt .Errorf ("waiting for pauseConditions to be set (controllerPause=true)" )
753+ }
754+ return nil
755+ })
756+ w .CheckError (err )
757+ }
758+
759+ // clearControllerPauseIfNeeded checks if the controller has processed the promote.
760+ // Due to the controller's writeBackToInformer function, watch events may be missed
761+ // after a reconciliation. If the controller hasn't processed the promote (indicated
762+ // by controllerPause=true with empty pauseConditions), we force a reconciliation
763+ // by scaling the rollout, which triggers ReplicaSet changes.
764+ // Note: This workaround is only applied to canary rollouts. Bluegreen rollouts have
765+ // different pause/promote semantics where the scale workaround causes the controller
766+ // to re-add the pause when reconciling with stale informer cache.
767+ func (w * When ) clearControllerPauseIfNeeded () {
768+ rolloutIf := w .rolloutClient .ArgoprojV1alpha1 ().Rollouts (w .namespace )
769+
770+ // Poll until the controller processes the promote (clears controllerPause).
771+ err := retryutil .OnError (retryutil .DefaultBackoff , func (err error ) bool {
772+ return true
773+ }, func () error {
774+ ro , err := rolloutIf .Get (w .Context , w .rollout .GetName (), metav1.GetOptions {})
775+ if err != nil {
776+ return err
777+ }
778+ if len (ro .Status .PauseConditions ) > 0 {
779+ return nil
780+ }
781+ if ! ro .Status .ControllerPause {
782+ return nil
783+ }
784+ return fmt .Errorf ("waiting for controller to process promote" )
785+ })
786+
787+ ro , getErr := rolloutIf .Get (w .Context , w .rollout .GetName (), metav1.GetOptions {})
788+ w .CheckError (getErr )
789+
790+ if len (ro .Status .PauseConditions ) > 0 || ! ro .Status .ControllerPause {
791+ return
792+ }
793+
794+ // Force a reconciliation to make the controller process the promote
795+ if err != nil {
796+ w .log .Info ("Forcing reconciliation (controller race condition workaround)" )
797+
798+ if ro .Spec .Strategy .Canary != nil {
799+ currentReplicas := int32 (1 )
800+ if ro .Spec .Replicas != nil {
801+ currentReplicas = * ro .Spec .Replicas
802+ }
803+ scalePatch := []byte (fmt .Sprintf (`{"spec":{"replicas":%d}}` , currentReplicas + 1 ))
804+ _ , scaleErr := rolloutIf .Patch (w .Context , w .rollout .GetName (), types .MergePatchType , scalePatch , metav1.PatchOptions {})
805+ w .CheckError (scaleErr )
806+
807+ err = retryutil .OnError (retryutil .DefaultBackoff , func (err error ) bool {
808+ return true
809+ }, func () error {
810+ ro , err := rolloutIf .Get (w .Context , w .rollout .GetName (), metav1.GetOptions {})
811+ if err != nil {
812+ return err
813+ }
814+
815+ observedGen , err := strconv .Atoi (ro .Status .ObservedGeneration )
816+ if err != nil {
817+ return err
818+ }
819+
820+ if int64 (observedGen ) >= ro .Generation {
821+ return nil
822+ }
823+
824+ return fmt .Errorf ("waiting for controller to reconcile" )
825+ })
826+ w .CheckError (err )
827+
828+ scalePatch = []byte (fmt .Sprintf (`{"spec":{"replicas":%d}}` , currentReplicas ))
829+ _ , scaleErr = rolloutIf .Patch (w .Context , w .rollout .GetName (), types .MergePatchType , scalePatch , metav1.PatchOptions {})
830+ w .CheckError (scaleErr )
831+ } else if ro .Spec .Strategy .BlueGreen != nil {
832+ annotationPatch := []byte (fmt .Sprintf (`{"metadata":{"annotations":{"e2e-reconcile-trigger":"%d"}}}` , time .Now ().UnixNano ()))
833+ _ , patchErr := rolloutIf .Patch (w .Context , w .rollout .GetName (), types .MergePatchType , annotationPatch , metav1.PatchOptions {})
834+ w .CheckError (patchErr )
835+ }
836+ }
837+ }
838+
732839func (w * When ) Then () * Then {
733840 return & Then {
734841 Common : w .Common ,
0 commit comments