Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions arbnode/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
"github.com/offchainlabs/nitro/util/containers"
"github.com/offchainlabs/nitro/util/contracts"
"github.com/offchainlabs/nitro/util/headerreader"
"github.com/offchainlabs/nitro/util/livedbsnapshotter"
"github.com/offchainlabs/nitro/util/redisutil"
"github.com/offchainlabs/nitro/util/rpcclient"
"github.com/offchainlabs/nitro/util/signature"
Expand All @@ -74,6 +75,7 @@ type Config struct {
ResourceMgmt resourcemanager.Config `koanf:"resource-mgmt" reload:"hot"`
BlockMetadataFetcher BlockMetadataFetcherConfig `koanf:"block-metadata-fetcher" reload:"hot"`
ConsensusExecutionSyncer ConsensusExecutionSyncerConfig `koanf:"consensus-execution-syncer"`
LiveDBSnapshotter livedbsnapshotter.Config `koanf:"live-db-snapshotter"`
// SnapSyncConfig is only used for testing purposes, these should not be configured in production.
SnapSyncTest SnapSyncConfig
}
Expand Down Expand Up @@ -145,6 +147,7 @@ func ConfigAddOptions(prefix string, f *flag.FlagSet, feedInputEnable bool, feed
MaintenanceConfigAddOptions(prefix+".maintenance", f)
BlockMetadataFetcherConfigAddOptions(prefix+".block-metadata-fetcher", f)
ConsensusExecutionSyncerConfigAddOptions(prefix+".consensus-execution-syncer", f)
livedbsnapshotter.ConfigAddOptions(prefix+".live-db-snapshotter", f)
}

var ConfigDefault = Config{
Expand All @@ -168,6 +171,7 @@ var ConfigDefault = Config{
Maintenance: DefaultMaintenanceConfig,
ConsensusExecutionSyncer: DefaultConsensusExecutionSyncerConfig,
SnapSyncTest: DefaultSnapSyncConfig,
LiveDBSnapshotter: livedbsnapshotter.DefaultConfig,
}

func ConfigDefaultL1Test() *Config {
Expand Down Expand Up @@ -270,6 +274,7 @@ type Node struct {
configFetcher ConfigFetcher
ctx context.Context
ConsensusExecutionSyncer *ConsensusExecutionSyncer
liveDBSnapshotter *livedbsnapshotter.LiveDBSnapshotter
}

type SnapSyncConfig struct {
Expand Down Expand Up @@ -942,6 +947,7 @@ func getNodeParentChainReaderDisabled(
syncMonitor *SyncMonitor,
configFetcher ConfigFetcher,
blockMetadataFetcher *BlockMetadataFetcher,
liveDBSnapshotter *livedbsnapshotter.LiveDBSnapshotter,
) *Node {
return &Node{
ArbDB: arbDb,
Expand Down Expand Up @@ -970,6 +976,7 @@ func getNodeParentChainReaderDisabled(
configFetcher: configFetcher,
ctx: ctx,
blockMetadataFetcher: blockMetadataFetcher,
liveDBSnapshotter: liveDBSnapshotter,
}
}

Expand All @@ -991,6 +998,7 @@ func createNodeImpl(
fatalErrChan chan error,
parentChainID *big.Int,
blobReader daprovider.BlobReader,
dBSnapshotTrigger chan struct{},
) (*Node, error) {
config := configFetcher.Get()

Expand Down Expand Up @@ -1041,8 +1049,13 @@ func createNodeImpl(
return nil, err
}

var liveDBSnapshotter *livedbsnapshotter.LiveDBSnapshotter
if config.LiveDBSnapshotter.Enable {
liveDBSnapshotter = livedbsnapshotter.NewLiveDBSnapshotter(arbDb, "arbitrumdata", dBSnapshotTrigger, config.LiveDBSnapshotter.Dir, false, nil)
}

if !config.ParentChainReader.Enable {
return getNodeParentChainReaderDisabled(ctx, arbDb, stack, executionClient, executionSequencer, executionRecorder, txStreamer, blobReader, broadcastServer, broadcastClients, coordinator, maintenanceRunner, syncMonitor, configFetcher, blockMetadataFetcher), nil
return getNodeParentChainReaderDisabled(ctx, arbDb, stack, executionClient, executionSequencer, executionRecorder, txStreamer, blobReader, broadcastServer, broadcastClients, coordinator, maintenanceRunner, syncMonitor, configFetcher, blockMetadataFetcher, liveDBSnapshotter), nil
}

delayedBridge, sequencerInbox, err := getDelayedBridgeAndSequencerInbox(deployInfo, l1client)
Expand Down Expand Up @@ -1118,6 +1131,7 @@ func createNodeImpl(
configFetcher: configFetcher,
ctx: ctx,
ConsensusExecutionSyncer: consensusExecutionSyncer,
liveDBSnapshotter: liveDBSnapshotter,
}, nil
}

Expand Down Expand Up @@ -1221,11 +1235,12 @@ func CreateNodeExecutionClient(
fatalErrChan chan error,
parentChainID *big.Int,
blobReader daprovider.BlobReader,
dBSnapshotTrigger chan struct{},
) (*Node, error) {
if executionClient == nil {
return nil, errors.New("execution client must be non-nil")
}
currentNode, err := createNodeImpl(ctx, stack, executionClient, nil, nil, nil, arbDb, configFetcher, l2Config, l1client, deployInfo, txOptsValidator, txOptsBatchPoster, dataSigner, fatalErrChan, parentChainID, blobReader)
currentNode, err := createNodeImpl(ctx, stack, executionClient, nil, nil, nil, arbDb, configFetcher, l2Config, l1client, deployInfo, txOptsValidator, txOptsBatchPoster, dataSigner, fatalErrChan, parentChainID, blobReader, dBSnapshotTrigger)
if err != nil {
return nil, err
}
Expand All @@ -1251,11 +1266,12 @@ func CreateNodeFullExecutionClient(
fatalErrChan chan error,
parentChainID *big.Int,
blobReader daprovider.BlobReader,
dBSnapshotTrigger chan struct{},
) (*Node, error) {
if (executionClient == nil) || (executionSequencer == nil) || (executionRecorder == nil) || (executionBatchPoster == nil) {
return nil, errors.New("execution client, sequencer, recorder, and batch poster must be non-nil")
}
currentNode, err := createNodeImpl(ctx, stack, executionClient, executionSequencer, executionRecorder, executionBatchPoster, arbDb, configFetcher, l2Config, l1client, deployInfo, txOptsValidator, txOptsBatchPoster, dataSigner, fatalErrChan, parentChainID, blobReader)
currentNode, err := createNodeImpl(ctx, stack, executionClient, executionSequencer, executionRecorder, executionBatchPoster, arbDb, configFetcher, l2Config, l1client, deployInfo, txOptsValidator, txOptsBatchPoster, dataSigner, fatalErrChan, parentChainID, blobReader, dBSnapshotTrigger)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -1403,10 +1419,16 @@ func (n *Node) Start(ctx context.Context) error {
if n.ConsensusExecutionSyncer != nil {
n.ConsensusExecutionSyncer.Start(ctx)
}
if n.liveDBSnapshotter != nil {
n.liveDBSnapshotter.Start(ctx)
}
return nil
}

func (n *Node) StopAndWait() {
if n.liveDBSnapshotter != nil {
n.liveDBSnapshotter.StopAndWait()
}
if n.MaintenanceRunner != nil && n.MaintenanceRunner.Started() {
n.MaintenanceRunner.StopAndWait()
}
Expand Down
29 changes: 29 additions & 0 deletions cmd/nitro/nitro.go
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,32 @@ func mainImpl() int {
}
}

// TODO: after the consensus-execution split, remove this code block and modify LiveDBSnapshotter to
// directly read from sigur2. Currently execution will trigger snapshot creation of consensus side once
// its own snapshotting is complete.
var executionDBSnapShotTrigger, consensusDBSnapShotTrigger chan struct{}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick:

Suggested change
var executionDBSnapShotTrigger, consensusDBSnapShotTrigger chan struct{}
var executionDBSnapshotTrigger, consensusDBSnapshotTrigger chan struct{}

if nodeConfig.Execution.LiveDBSnapshotter.Enable {
executionDBSnapShotTrigger = make(chan struct{}, 1)
go func() {
sigusr := make(chan os.Signal, 1)
signal.Notify(sigusr, syscall.SIGUSR2)
for {
select {
case <-ctx.Done():
return
case <-sigusr:
select {
case executionDBSnapShotTrigger <- struct{}{}:
default:
}
}
}
}()
}
if nodeConfig.Node.LiveDBSnapshotter.Enable {
consensusDBSnapShotTrigger = make(chan struct{}, 1)
}

execNode, err := gethexec.CreateExecutionNode(
ctx,
stack,
Expand All @@ -531,6 +557,8 @@ func mainImpl() int {
l1Client,
func() *gethexec.Config { return &liveNodeConfig.Get().Execution },
liveNodeConfig.Get().Node.TransactionStreamer.SyncTillBlock,
executionDBSnapShotTrigger,
consensusDBSnapShotTrigger, // execution will invoke conensus's db snapshotting
Copy link
Contributor

@diegoximenes diegoximenes Apr 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Today we have a circular call dependency between Consensus and Execution, but ideally we would like to get rid of that, and we are moving in that direction.

In the current model that we have Consensus calls Execution, e.g., Consensus retrieves messages from different sources and then calls Execution to generate blocks related to those messages.
We want to avoid the need of Execution calling Consensus.
This can also enable us to, in the future, have a single Consensus node controlling multiple Execution nodes.

That said, I think we should move the snapshot trigger API from Execution to Consensus, and Consensus will trigger Execution's DB snapshot.
How about that?
We already have some RPC APIs on Consensus, so we can reuse the API framework that is implemented, examples are the BlockValidatorAPI and the MaintenanceAPI.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, in the near future we intend to split Consensus and Execution in different processes.
So Execution communicating with Consensus directly through a channel can make the splitting a little bit less straightforward.

Today we rely on having functions on ConsensusSequencer interface in which Execution can use to call Consensus.
If we decide to keep the this snapshot trigger API on Execution we could use this interface to enable Execution -> Consensus communication here too.

Copy link
Contributor Author

@ganeshvanahalli ganeshvanahalli Apr 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the context! But for the working snapshot to be generated we need to first snapshot execution's dbs and then consensus's db i.e blockheight in chainDB <= arbDB.

I understand your suggestion that the execution shouldn't call consensus actions, but this is merely a signal transfer to start db snapshotting. Moreover it is implemented in a way to be easily removed after the split with the idea that both consensus and execution will each have snaphotting API that will snapshot their respective dbs.

Trying to make consensus trigger execution's database creation is essentially reverse of what we currently have but moving code around to consensus side, the code that will anyway be need to be removed after the split.

If this is really a priority then I can completely decouple it so that livedbsnapshotter will return a success signal after db creation and nitro.go can handle signals entirely, let me know.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will ping you so we can chat 🙂, it will be easier to give you more context, but here goes a summary.

But for the working snapshot to be generated we need to first snapshot execution's dbs and then consensus's db i.e blockheight in chainDB <= arbDB

There is a common misconception, that the procedure that coordinates an action between multiple components should be placed where the action starts to happen.
The analogy in computer networking, and distributed systems, is that the control plane doesn't need to live where the data plane is.

Nitro, in some parts, follows this colocation pattern today, which has some issues.
One example is that the procedure that triggers regular sequencing is placed where the sequencing takes place, Execution.
This PR #3111 moves away from that pattern, and moves sequencing triggering to Consensus, there are several benefits with that.
So, with this PR, sequencing triggering coordination will not be placed where sequencing takes place anymore.

That said, live snapshot coordination can be placed in Consensus, and we will still be able to snapshot Execution's DB before Consensus' DB.
We could even place it in another component, like a CLI similar to dbconv.

Moreover it is implemented in a way to be easily removed after the split with the idea that both consensus and execution will each have snaphotting API that will snapshot their respective dbs.
Trying to make consensus trigger execution's database creation is essentially reverse of what we currently have but moving code around to consensus side, the code that will anyway be need to be removed after the split.

I don't fully understand that 😕

Why code related to live snapshot will be removed after the split?

If I inferred correctly, you are saying that, only after the split, we will have an API in Consensus that will trigger a snapshot of Consensus's DB, so a human node operator will be able to first trigger Execution's DB snapshot, and after that trigger Consensus' DB snapshot, right?

I am more inclined to have an UX in which the human node operator only triggers snapshot once, and the software is responsible to coordinate snapshotting Execution and Consensus sides in the correct order.
It is simpler for human operators and less error prone too 🙂
Also, we should move forward in a way that we will not need to redo a big chunk of this feature in order to make Consensus/Execution split ready, which will happen soon 🙂 , we want this feature to be fully compliant with Consensus and Execution split from the start.

Also, we intend to have other Execution clients, such as one based on reth, but we do not intend to have multiple Consensus clients, at least in the near future.
If we keep live snapshot coordination in Execution side then we are unnecessarily increasing the effort to develop new Execution clients, which will need to implement that coordination behavior.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, to snapshot Consensus' DB we will need to hold insertionMutex, right?

)
if err != nil {
log.Error("failed to create execution node", "err", err)
Expand All @@ -555,6 +583,7 @@ func mainImpl() int {
fatalErrChan,
new(big.Int).SetUint64(nodeConfig.ParentChain.ID),
blobReader,
consensusDBSnapShotTrigger,
)
if err != nil {
log.Error("failed to create node", "err", err)
Expand Down
4 changes: 4 additions & 0 deletions cmd/replay/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ import (

type PreimageDb struct{}

func (db PreimageDb) CreateDBSnapshot(dir string) error {
return errors.New("createDBSnapshot method is not supported by PreimageDb")
}

func (db PreimageDb) Has(key []byte) (bool, error) {
if len(key) != 32 {
return false, nil
Expand Down
19 changes: 14 additions & 5 deletions execution/gethexec/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"math/big"
"sync"
"sync/atomic"
"syscall"
"time"

"github.com/ethereum/go-ethereum/arbitrum"
Expand Down Expand Up @@ -84,13 +85,21 @@ func (a *ArbTimeboostAPI) SendExpressLaneTransaction(ctx context.Context, msg *t
}

type ArbDebugAPI struct {
blockchain *core.BlockChain
blockRangeBound uint64
timeoutQueueBound uint64
blockchain *core.BlockChain
blockRangeBound uint64
timeoutQueueBound uint64
enableLiveDBSnapshotting bool
}

func NewArbDebugAPI(blockchain *core.BlockChain, blockRangeBound uint64, timeoutQueueBound uint64) *ArbDebugAPI {
return &ArbDebugAPI{blockchain, blockRangeBound, timeoutQueueBound}
func NewArbDebugAPI(blockchain *core.BlockChain, blockRangeBound uint64, timeoutQueueBound uint64, enableLiveDBSnapshotting bool) *ArbDebugAPI {
return &ArbDebugAPI{blockchain, blockRangeBound, timeoutQueueBound, enableLiveDBSnapshotting}
}

func (api *ArbDebugAPI) CreateDBSnapshot(ctx context.Context) error {
if !api.enableLiveDBSnapshotting {
return errors.New("live database snapshot creation is not enabled")
}
return syscall.Kill(syscall.Getpid(), syscall.SIGUSR2)
}

type PricingModelHistory struct {
Expand Down
17 changes: 17 additions & 0 deletions execution/gethexec/executionengine.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
"github.com/offchainlabs/nitro/cmd/chaininfo"
"github.com/offchainlabs/nitro/execution"
"github.com/offchainlabs/nitro/util/arbmath"
"github.com/offchainlabs/nitro/util/livedbsnapshotter"
"github.com/offchainlabs/nitro/util/sharedmetrics"
"github.com/offchainlabs/nitro/util/stopwaiter"
)
Expand Down Expand Up @@ -103,6 +104,8 @@ type ExecutionEngine struct {

cachedL1PriceData *L1PriceData
syncTillBlock uint64

liveDBSnapshotter *livedbsnapshotter.LiveDBSnapshotter
}

func NewL1PriceData() *L1PriceData {
Expand Down Expand Up @@ -226,6 +229,16 @@ func (s *ExecutionEngine) SetReorgEventsNotifier(reorgEventsNotifier chan struct
s.reorgEventsNotifier = reorgEventsNotifier
}

func (s *ExecutionEngine) SetLiveDBSnapshotter(liveDBSnapshotter *livedbsnapshotter.LiveDBSnapshotter) {
if s.Started() {
panic("trying to set liveDBSnapshotter after start")
}
if s.liveDBSnapshotter != nil {
panic("trying to set liveDBSnapshotter when already set")
}
s.liveDBSnapshotter = liveDBSnapshotter
}

func (s *ExecutionEngine) EnableReorgSequencing() {
if s.Started() {
panic("trying to enable reorg sequencing after start")
Expand Down Expand Up @@ -798,6 +811,10 @@ func (s *ExecutionEngine) appendBlock(block *types.Block, statedb *state.StateDB
blockGasUsedHistogram.Update(int64(blockGasused))
gasUsedSinceStartupCounter.Inc(int64(blockGasused))
s.updateL1GasPriceEstimateMetric()
if s.liveDBSnapshotter != nil && s.liveDBSnapshotter.IsSnapshotDue() {
s.bc.FlushToDisk(false)
s.liveDBSnapshotter.CreateDBSnapshot()
}
return nil
}

Expand Down
Loading
Loading