◐ Shell
clean mode source ↗

fix: allow lifecycle code path to retry failed stop jobs (#26277) · coder/coder@0847137

@@ -653,6 +653,72 @@ func TestWorkspaceAutobuild(t *testing.T) {

653653

require.Equal(t, stats.Transitions[ws.ID], database.WorkspaceTransitionStop)

654654

})

655655656+

// FailureTTLStopOK verifies that a workspace whose latest build is a failed

657+

// stop is retried by issuing another stop after the failure TTL elapses.

658+

t.Run("FailureTTLStopOK", func(t *testing.T) {

659+

t.Parallel()

660+661+

var (

662+

ticker = make(chan time.Time)

663+

statCh = make(chan autobuild.Stats)

664+

logger = slogtest.Make(t, &slogtest.Options{

665+

// We ignore errors here since we expect to fail

666+

// builds.

667+

IgnoreErrors: true,

668+

})

669+

failureTTL = time.Minute

670+

)

671+672+

client, db, user := coderdenttest.NewWithDatabase(t, &coderdenttest.Options{

673+

Options: &coderdtest.Options{

674+

Logger: &logger,

675+

AutobuildTicker: ticker,

676+

IncludeProvisionerDaemon: true,

677+

AutobuildStats: statCh,

678+

TemplateScheduleStore: schedule.NewEnterpriseTemplateScheduleStore(agplUserQuietHoursScheduleStore(), notifications.NewNoopEnqueuer(), logger, nil),

679+

},

680+

LicenseOptions: &coderdenttest.LicenseOptions{

681+

Features: license.Features{codersdk.FeatureAdvancedTemplateScheduling: 1},

682+

},

683+

})

684+685+

// The start build succeeds, but the stop build fails. This leaves the

686+

// workspace's latest build as a failed stop.

687+

version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{

688+

Parse: echo.ParseComplete,

689+

ProvisionPlan: echo.PlanComplete,

690+

ProvisionApplyMap: map[proto.WorkspaceTransition][]*proto.Response{

691+

proto.WorkspaceTransition_START: echo.ApplyComplete,

692+

proto.WorkspaceTransition_STOP: echo.ApplyFailed,

693+

},

694+

})

695+

template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) {

696+

ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds())

697+

})

698+

coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)

699+

ws := coderdtest.CreateWorkspace(t, client, template.ID)

700+

coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, ws.LatestBuild.ID)

701+702+

ctx := testutil.Context(t, testutil.WaitLong)

703+

stopBuild, err := client.CreateWorkspaceBuild(ctx, ws.ID, codersdk.CreateWorkspaceBuildRequest{

704+

Transition: codersdk.WorkspaceTransitionStop,

705+

})

706+

require.NoError(t, err)

707+

build := coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, stopBuild.ID)

708+

require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status)

709+

require.Equal(t, codersdk.WorkspaceTransitionStop, build.Transition)

710+

tickTime := build.Job.CompletedAt.Add(failureTTL * 2)

711+712+

p, err := coderdtest.GetProvisionerForTags(db, time.Now(), ws.OrganizationID, nil)

713+

require.NoError(t, err)

714+

coderdtest.UpdateProvisionerLastSeenAt(t, db, p.ID, tickTime)

715+

ticker <- tickTime

716+

stats := <-statCh

717+

// Expect the workspace to be stopped again for breaching failure TTL.

718+

require.Len(t, stats.Transitions, 1)

719+

require.Equal(t, stats.Transitions[ws.ID], database.WorkspaceTransitionStop)

720+

})

721+656722

t.Run("FailureTTLTooEarly", func(t *testing.T) {

657723

t.Parallel()

658724