diff --git a/HA_Design/create_backup.sh b/HA_Design/create_backup.sh
new file mode 100644
index 0000000..388080e
--- /dev/null
+++ b/HA_Design/create_backup.sh
@@ -0,0 +1,130 @@
+#!/bin/sh
+
+PATH=/usr/bin:/sbin:; export PATH
+
+date=$(date '+%Y_%m_%d_%H_%M_%S')
+
+echo "=== starting backup: now is $date ==="
+
+name="backup_$date.tgz"
+tmp_name="backup_$date.tmp.tgz"
+name2="backup_$date.evm.tgz"
+tmp_name2="backup_$date.evm.tmp.tgz"
+
+curl --location --request POST "127.0.0.1:8881/" --header "Content-Type: application/json" --data-raw "{\"method\":\"eth_blockNumber\",\"params\":[\"0x1\",false],\"id\":0}"
+r=$?
+
+if [ $r -eq 0 ]
+then
+ echo "rpc seems responding"
+else
+ echo "OH! rpc not working!!!"
+ exit 1
+fi
+
+echo "=== try to grep error from rpc response ==="
+r=`curl --location --request POST "127.0.0.1:8881/" --header "Content-Type: application/json" --data-raw "{\"method\":\"eth_blockNumber\",\"params\":[\"0x1\",false],\"id\":0}" | grep error | wc -l`
+
+if [ $r -eq 0 ]
+then
+ echo "rpc seems working fine without error"
+else
+ echo "OH! rpc response has error!!!"
+ exit 1
+fi
+
+LEAP_BASE=/home/ubuntu/leap
+${LEAP_BASE}/cleos get info
+r=$?
+
+if [ $r -eq 0 ]
+then
+ echo "cleos get info seems working fine"
+else
+ echo "OH! cleos get info not working!!!"
+ exit 2
+fi
+
+echo "stop eos-evm node & rpc"
+pkill eos-evm-node
+pkill eos-evm-rpc
+sleep 2.0
+
+echo "create leap snapshot..."
+rm -rf ${LEAP_BASE}/data-dir/snapshots/*
+curl http://127.0.0.1:8888/v1/producer/create_snapshot
+
+echo "snapshot created.. stop nodeos"
+pkill nodeos
+sleep 30.0
+
+curl --location --request POST "127.0.0.1:8881/" --header "Content-Type: application/json" --data-raw "{\"method\":\"eth_blockNumber\",\"params\":[\"0x1\",false],\"id\":0}"
+r=$?
+
+if [ $r -eq 0 ]
+then
+ echo "rpc not killed!!!"
+ exit 2
+fi
+
+${LEAP_BASE}/cleos get info
+r=$?
+if [ $r -eq 0 ]
+then
+ echo "nodeos not killed!!!"
+ exit 3
+fi
+
+cd /home/ubuntu
+r=`ls ./backups/ -ltr | wc -l`
+if [ $r -gt 6 ]
+then
+ # remove old backups first to save space
+ find ./backups/ -mtime +1 -type f -delete
+ echo "removed old backups"
+fi
+rm ${LEAP_BASE}/nodeos.log
+rm /home/ubuntu/node/eos-evm/node.log
+
+mkdir backups
+tar zcvf backups/$tmp_name2 ./node/eos-evm/chain-data
+tar zcvf backups/$tmp_name ./leap/data-dir/state-history ./leap/data-dir/blocks ./leap/data-dir/snapshots ./leap/data-dir/protocol_features
+
+echo "now bring back nodeos"
+cd ${LEAP_BASE}
+./start.sh > nodeos.log 2>&1 &
+sleep 30.0
+
+echo "now bring back eos-evm"
+cd /home/ubuntu/node/eos-evm
+./start_evm_node.sh > node.log 2>&1 &
+./start_rpc.sh > rpc.log 2>&1 &
+
+cd /home/ubuntu
+mv backups/$tmp_name backups/$name
+mv backups/$tmp_name2 backups/$name2
+
+ln -sf backups/$name ./last_full_backup.tgz
+ln -sf backups/$name2 ./last_evm_backup.tgz
+
+echo "backup files $name & $name2 created successfully!"
+
+r=`ls ./backups/ -ltr | wc -l`
+if [ $r -gt 6 ]
+then
+ find ./backups/ -mtime +1 -type f -delete
+ echo "removed old backups"
+fi
+
+if [ -z "$(ls -A ${LEAP_BASE}/data-dir/snapshots)" ]; then
+ echo "snapshot dir ${LEAP_BASE}/data-dir/snapshots contain no files, please fix snapshot"
+ exit 4
+else
+ rm -rf /home/ubuntu/snapshots.old
+ mv /home/ubuntu/snapshots /home/ubuntu/snapshots.old
+ mv ${LEAP_BASE}/data-dir/snapshots /home/ubuntu/snapshots
+ ln -sf ./snapshots/* last_snapshot.bin
+ echo "snapshot created at /home/ubuntu/snapshots"
+fi
+exit 0
+
diff --git a/HA_Design/readme.md b/HA_Design/readme.md
new file mode 100644
index 0000000..6a15cb9
--- /dev/null
+++ b/HA_Design/readme.md
@@ -0,0 +1,180 @@
+# High availability design for EOS EVM Infrastucture
+
+This document will describe how to setup an EOS EVM infrastucture with high availability.
+
+## Prerequisite: setup a miniumum EOS EVM service infrastructure
+This is a minimum EOS EVM infrastructure setup without high availablity support. Follow the steps from https://github.com/eosnetworkfoundation/evm-public-docs/tree/main/deployments_plan_for_CEX#RMS to setup this infrasture step by step.
+```
+Real-time service:
+ +--VM1 (leap VM) -------------------+ +-- VM2 (EVM node VM) -------+
+ | leap node running in head mode | <-- | eos-evm-node & eos-evm-rpc | <-- read requests
+ | with state_history_plugin enabled | +----------------------------+ \ +------ VM2 -------+
+ +-----------------------------------+ ---- | proxy |
+ ^ / | web-socket-proxy |
+ | +-- VM2 (EVM node VM) ----+ / +------------------+
+ \-- push EOS ---| eos-evm-miner (wrapper) | <-- write requests
+ transactions +-------------------------+
+
+Periodic Backup service (no need to scale):
+ +--VM3 (Backup VM) ------------------------+ +--VM3 (Backup VM) ----------+
+ | leap node running in irreversible mode | <----- | eos-evm-node & eos-evm-rpc |
+ | with state_history_plugin enabled | +----------------------------+
+ +------------------------------------------+
+```
+
+## High availability design step 1: deploy leap nodes to multiple VMs
+We first scale the leap node (in the real-time service) from 1 leap VM instance into 2 or more leap VM instance in the same region.
+```
+Real-time service:
+ +-- VM11 ----------+
+ | leap node | <---
+ +------------------+ \ +---- VM2 -----------------+
+ select the available leap to connect --| eos-evm-node, rpc, miner |
+ +-- VM12 ----------+ / | proxy, web-socket-proxy |
+ | leap node | <--- +--------------------------+
+ +------------------+
+```
+We can use the `get_info` request via a script to find out the available leap node to connect/reconnect.
+
+
+## High availability design step 2: deploy eos-evm-node, rpc, miner, proxy and other services to multiple VMs
+We then scale up the number of eos-evm-node VM instances from 1 to 2 or even more. Each of them will independently detect and select the available leap node to connect with.
+```
+Real-time service:
+ +----- VM21 -------------+
+ /| eos-evm-node, rpc, ... |
+ +-- VM11 ----------+ / +------------------------+
+ | leap node | <------\ /
+ +------------------+ \ VM21, VM22, VM23 independently / +----- VM22 -------------+
+ select the available leap to connect ---| eos-evm-node, rpc, ... |
+ +-- VM12 ----------+ / \ +------------------------+
+ | leap node | <------/ \
+ +------------------+ \ +----- VM23 -------------+
+ \ | eos-evm-node, rpc, ... |
+ +------------------------+
+```
+
+
+## High availability design step 3: Using script & pm2 service to manage leap node process
+In order to make sure all leap node will be running all the time, we need some auto restart & recover script so that it will:
+
+- 1. detect if there's already a running leap (nodeos) process
+- 2. try start leap process normally.
+- 3. if leap start fails, clean up the state, recover the state via snapshot generated from backup VM, and restart leap process with snapshot
+
+[This is the template for leap's start.sh script](start_leap.sh)
+
+we also need to use pm2 service to run the above script as a service.
+
+
+## High availability design step 4: Using script & pm2 service to manage eos-evm-node, rpc, miner, proxy..
+We also need to use a script `start_evm_node.sh` to auto restart & recover eos-evm-node. The script will do:
+
+- 1. detect which leap node is avaiable
+- 2. try to start eos-evm-node normally, connecting to the state-history-plugin endpoint of the avaiable leap node
+- 3. if eos-evm-node start fails, clean up evm-node database, download the evm backup from backup VM, try step 2 one more time.
+
+[This is the template for start_evm_node.sh script](start_evm_node.sh)
+
+
+we also need to use pm2 service to run the script as a service. for example:
+`cd eos-evm && pm2 start start_evm_node.sh -l node.log --name evm_node --kill-timeout 10000`
+
+use pm2 to run eos-evm-rpc. for example:
+`cd eos-evm && pm2 start start_rpc.sh -l rpc.log --name evm_rpc1 --kill-timeout 10000`
+in which start_rpc.sh is:
+`./eos-evm-rpc --api-spec=eth,debug,net,trace --chain-id=17777 --http-port=0.0.0.0:8881 --eos-evm-node=127.0.0.1:8080 --chaindata=./chain-data`
+
+use pm2 to run miner. for example:
+`cd tx_wrapper && pm2 start index.js -l wrapper.log --name tx_wrapper --kill-timeout 10000`
+
+use docker to run proxy when VM starts.
+```
+cd tx_proxy
+sudo mkdir -p logs
+sudo mkdir -p logs/error
+sudo docker run --add-host=host.docker.internal:host-gateway -p 80:80 -v ${PWD}/logs:/var/log/nginx -d --restart=always --name=tx_proxy evm/tx_proxy
+sudo docker restart tx_proxy
+```
+see https://github.com/eosnetworkfoundation/eos-evm-node/tree/main/peripherals/proxy for more details.
+
+
+use pm2 to run web-socket-proxy. for example:
+```
+cd eos-evm-ws-proxy
+WS_LISTENING_HOST=0.0.0.0 pm2 start main.js -l ws_proxy.log --name ws_proxy --kill-timeout 10000 --update-env
+```
+see https://github.com/eosnetworkfoundation/eos-evm-node/tree/main/peripherals/eos-evm-ws-proxy for more details.
+
+
+
+
+## High availability design step 5 (Optional): scale up multiple miners in each EVM VM:
+To further scale up transactions per second, we may also consider scale up multiple miners in each evm machine:
+```
+Real-time service:
+ +--VM1 (leap VM) -------------------+ +-- VM2 (EVM node VM) -------+
+ | leap node running in head mode | <-- | eos-evm-node & eos-evm-rpc | <-- read requests
+ | with state_history_plugin enabled | +----------------------------+ \ +------ VM2 -------+
+ +-----------------------------------+ ---- | proxy |
+ ^ / | web-socket-proxy |
+ | +-- VM2 (EVM node VM) ----+ / +------------------+
+ \-- push EOS ---| eos-evm-miner1 | <-- write requests
+ transactions | eos-evm-miner2 |
+ | eos-evm-miner3 |
+ | eos-evm-miner4 |
+ +-------------------------+
+```
+This can be easily done by appending `-i 4` into the pm2 command:
+```
+pm2 start ./dist/index.js --name evm-miner -l miner.log --name evm-miner --kill-timeout 10000 -i 4
+```
+
+## High availability design step 6: setup the same infrastructure on a second region
+We further scale the infrastructure from 1 region to 2 or multiple regions.
+In each region, we can setup a target group for load balancing the traffice between multiple evm-nodes.
+And finally, setup a global DNS load balancer to split the traffic between different region according to their geographical locations.
+
+However, the backup service is not required to be scaled.
+```
+ +---- Region 1 (Real-time service) -------------------------- +
+ | VM11 (leap) |
+ | VM12 (leap) |
+ | VM21 (evm-node, rpc, miners, proxy,...)-\ |
+ | - target group1 | <----\
+ | VM22 (evm-node, rpc, miners, proxy,...)-/ | \
+ +-------------------------------------------------------------+ \--- Global DNS Load balancer
+ /
+ +---- Region 2 (Real-time service) ---------------------------+ /
+ | VM11 (leap) | <----/
+ | VM12 (leap) |
+ | VM21 (evm-node, rpc, miners, proxy,...)-\ |
+ | - target group1 |
+ | VM22 (evm-node, rpc, miners, proxy,...)-/ |
+ +-------------------------------------------------------------+
+
+ +--- Backup VM in region 1 --------+
+ | leap (backup, irreversible mode) |
+ | evm-node, rpc |
+ +----------------------------------+
+```
+
+
+
+## Generate leap & evm backup periodically
+Here are the steps to generate leap backup and EVM backup:
+
+- 1. ensure leap & eos-evm-node is up
+- 2. gracefully stop eos-evm-node & eos-evm-rpc
+- 3. create leap snapshot
+- 4. gracefully stop leap
+- 5. backup evm chain-data folder
+- 6. backup leap's snapshot, state_history, block logs
+- 7. bring up leap
+- 8. bring up eos-evm
+- 9. remove old backups
+
+[This sample script can be used in backup VM to create leap & evm backup](create_backup.sh) Each time when a backup is generated, both leap node and evm node need to be gracefully shutted down.
+
+
+
diff --git a/HA_Design/start_evm_node.sh b/HA_Design/start_evm_node.sh
new file mode 100644
index 0000000..889c242
--- /dev/null
+++ b/HA_Design/start_evm_node.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+PATH=/usr/bin:/sbin:; export PATH
+
+curpwd=${PWD}
+
+leap1=
+leap2=
+backupip=
+backup_sshkeyfile=
+
+url=$leap1
+date=$(date '+%Y_%m_%d_%H_%M_%S')
+sec=`date +%s`
+
+curl http://$leap1:8888/v1/chain/get_info
+r1=$?
+curl http://$leap2:8888/v1/chain/get_info
+r2=$?
+
+if [ $r1 -eq 0 ]
+then
+ url=$leap1
+else
+ if [ $r2 -eq 0 ]
+ then
+ url=$leap2
+ else
+ echo "no leap connection available!!!"
+ exit 1
+ fi
+fi
+
+sleep 5.0
+
+sec1=`date +%s`
+./eos-evm-node --ship-endpoint=$url:8999 --ship-core-account eosio.evm --chain-data ./chain-data --plugin block_conversion_plugin --plugin blockchain_plugin --nocolor 1 --verbosity=4 --genesis-json=./genesis.json
+
+sec2=`date +%s`
+diff=$((sec2-sec1))
+
+if [ $diff -lt 5 ]
+then
+ echo "=== failed to start eos-evm-node, try to restore from backup from $backupip==="
+ # wait for port 8080 released
+ sleep 60.0
+ scp -i $backup_sshkeyfile -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@$backupip:~/last_evm_backup.tgz ~/
+ mv ./chain-data ./chain-data.$date
+ mkdir ./chain-data
+ cd ~/
+ tar zxvf ~/last_evm_backup.tgz
+ echo "=== killing eos-evm-rpc process & start eos-evm-node again ==="
+ pkill eos-evm-rpc
+ cd $curpwd
+ ./eos-evm-node --ship-endpoint=$url:8999 --ship-core-account eosio.evm --chain-data ./chain-data --plugin block_conversion_plugin --plugin blockchain_plugin --nocolor 1 --verbosity=4 --genesis-json=./genesis.json
+fi
+
diff --git a/HA_Design/start_leap.sh b/HA_Design/start_leap.sh
new file mode 100644
index 0000000..d97a054
--- /dev/null
+++ b/HA_Design/start_leap.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+# modify the following parameters if necessary
+backupip=
+backup_sshkeyfile=
+
+sysctl -w kernel.core_pattern=core.leap
+
+cd /home/ubuntu/leap/
+
+./cleos get info
+r=$?
+if [ $r -eq 0 ]
+then
+ echo "=== seems nodeos is running already ==="
+ # avoid this script frequently being called
+ sleep 30
+ exit 1
+fi
+
+sec1=`date +%s`
+
+sudo sh -c "ulimit -c unlimited && ulimit -n 30000 && ulimit -s 64000 && ./nodeos --p2p-accept-transactions=0 --database-map-mode=locked --data-dir=./data-dir --config-dir=./data-dir --http-max-response-time-ms=1000 --disable-replay-opts --max-body-size=10000000 $@"
+
+sec2=`date +%s`
+diff=$((sec2-sec1))
+
+if [ $diff -lt 60 ]
+then
+ echo "=== failed to start nodeos, try to recover from snapshot file ==="
+ sudo pkill -9 nodeos
+ sleep 10
+ scp -i $backup_sshkeyfile -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@$backupip:~/last_snapshot.bin ~/last_snapshot.bin
+ sudo rm -rf ./data-dir/state/*
+ sudo sh -c "ulimit -c unlimited && ulimit -n 30000 && ulimit -s 64000 && ./nodeos --p2p-accept-transactions=0 --database-map-mode=locked --data-dir=./data-dir --config-dir=./data-dir --http-max-response-time-ms=1000 --disable-replay-opts --max-body-size=10000000 --snapshot /home/ubuntu/last_snapshot.bin $@"
+fi
+