@@ -233,6 +233,10 @@ enum InstanceStateChangeRequestAction {
233233 /// Request the appropriate state change from the sled with the specified
234234 /// UUID.
235235 SendToSled { sled_id : SledUuid , propolis_id : PropolisUuid } ,
236+
237+ /// The instance is not currently incarnated on a sled, so just update its
238+ /// runtime state in the database without communicating with a sled-agent.
239+ UpdateRuntime ( db:: model:: InstanceRuntimeState ) ,
236240}
237241
238242/// What is the higher level operation that is calling
@@ -811,13 +815,39 @@ impl super::Nexus {
811815 still being created"
812816 ) )
813817 }
818+ // Failed instances may transition to Stopped by just changing
819+ // the Nexus state in the database to NoVmm.
820+ //
821+ // An instance's effective state will never be Failed while it
822+ // is linked with a VMM. If the instance has an active VMM which
823+ // is Failed, the instance's effective state will be Stopping,
824+ // rather than Failed, until an instance-update saga has
825+ // unlinked the Failed VMM. We can guarantee this is the case,
826+ // as a database CHECK constraint will not permit an instance
827+ // with an active Propolis ID to be Failed. Therefore, we know
828+ // that a Failed instance is definitely not incarnated on a
829+ // sled, so all we need to do to "stop" it is to update its
830+ // state in the database.
831+ InstanceState :: Failed if matches ! ( requested, InstanceStateChangeRequest :: Stop ) => {
832+ // As discussed above, this shouldn't happen, so return an
833+ // internal error and complain about it in the logs.
834+ if vmm_state. is_some ( ) {
835+ return Err ( Error :: internal_error (
836+ "an instance should not be in the Failed \
837+ effective state if it has an active VMM"
838+ ) ) ;
839+ }
814840
841+ let prev_runtime = instance_state. runtime ( ) ;
842+ return Ok ( InstanceStateChangeRequestAction :: UpdateRuntime ( db:: model:: InstanceRuntimeState {
843+ time_updated : chrono:: Utc :: now ( ) ,
844+ r#gen : prev_runtime. r#gen . 0 . next ( ) . into ( ) ,
845+ nexus_state : db:: model:: InstanceState :: NoVmm ,
846+ ..prev_runtime. clone ( )
847+ } ) ) ;
848+ }
815849 // If the instance has no sled beacuse it's been destroyed or
816850 // has fallen over, reject the state change.
817- //
818- // TODO(#2825): Failed instances should be allowed to stop, but
819- // this requires a special action because there is no sled to
820- // send the request to.
821851 InstanceState :: Failed | InstanceState :: Destroyed => {
822852 return Err ( Error :: invalid_request ( & format ! (
823853 "instance state cannot be changed from {}" ,
@@ -898,6 +928,26 @@ impl super::Nexus {
898928 & requested,
899929 ) ? {
900930 InstanceStateChangeRequestAction :: AlreadyDone => Ok ( ( ) ) ,
931+ InstanceStateChangeRequestAction :: UpdateRuntime ( new_runtime) => {
932+ let instance_id =
933+ InstanceUuid :: from_untyped_uuid ( prev_instance_state. id ( ) ) ;
934+ let changed = self
935+ . datastore ( )
936+ . instance_update_runtime ( & instance_id, & new_runtime)
937+ . await
938+ . map_err ( InstanceStateChangeError :: Other ) ?;
939+ if !changed {
940+ // TODO(eliza): perhaps we should refetch the instance here
941+ // and return Ok if it was in the desired state...
942+ Err ( InstanceStateChangeError :: Other ( Error :: conflict (
943+ "The instance was previously in a state that allowed \
944+ the requested state change, but the instance's state \
945+ changed before the request could be completed",
946+ ) ) )
947+ } else {
948+ Ok ( ( ) )
949+ }
950+ }
901951 InstanceStateChangeRequestAction :: SendToSled {
902952 sled_id,
903953 propolis_id,
0 commit comments