Skip to content

Commit c1333ed

Browse files
committed
Fix workflow failing because of function timeouts
1 parent 1e9535f commit c1333ed

File tree

2 files changed

+24
-9
lines changed

2 files changed

+24
-9
lines changed

.changeset/cyan-ducks-wonder.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@workflow/core": patch
3+
---
4+
5+
Allow step retrying if it fails without proper cleanup

packages/core/src/runtime.ts

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -640,9 +640,11 @@ export const stepEntrypoint =
640640
let result: unknown;
641641
const attempt = step.attempt + 1;
642642
try {
643-
if (step.status !== 'pending') {
644-
// We should only be running the step if it's pending
645-
// (initial state, or state set on re-try), so the step has been
643+
if (!['pending', 'running'].includes(step.status)) {
644+
// We should only be running the step if it's either
645+
// a) pending - initial state, or state set on re-try
646+
// b) running - if a step fails mid-execution, like a function timeout
647+
// so the step has been
646648
// invoked erroneously.
647649
console.error(
648650
`[Workflows] "${workflowRunId}" - Step invoked erroneously, expected status "pending", got "${step.status}" instead, skipping execution`
@@ -698,11 +700,24 @@ export const stepEntrypoint =
698700
() => stepFn(...args)
699701
);
700702

703+
// NOTE: None of the code from this point is guaranteed to run
704+
// Since the step might fail or cause a function timeout and the process might be SIGKILL'd
705+
// The workflow runtime must be resilient to the below code not executing on a failed step
706+
701707
result = dehydrateStepReturnValue(result, ops);
702708

703709
waitUntil(Promise.all(ops));
704710

705-
// Update the event log with the step result
711+
// Mark the step as completed first. This order is important. If a concurrent
712+
// execution marked the step as complete, this request should throw, and
713+
// this prevent the step_completed event in the event log
714+
// TODO: this should really be atomic and handled by the world
715+
await world.steps.update(workflowRunId, stepId, {
716+
status: 'completed',
717+
output: result as Serializable,
718+
});
719+
720+
// Then, append the event log with the step result
706721
await world.events.create(workflowRunId, {
707722
eventType: 'step_completed',
708723
correlationId: stepId,
@@ -711,11 +726,6 @@ export const stepEntrypoint =
711726
},
712727
});
713728

714-
await world.steps.update(workflowRunId, stepId, {
715-
status: 'completed',
716-
output: result as Serializable,
717-
});
718-
719729
span?.setAttributes({
720730
...Attribute.StepStatus('completed'),
721731
...Attribute.StepResultType(typeof result),

0 commit comments

Comments
 (0)