# 0L epoch-archive (BACKUPS) for v6.9 # RESOURCES: https://aptos.dev/nodes/full-node/aptos-db-restore/#restore-a-db-using-the-public-backup-files https://aptos.dev/nodes/full-node/aptos-db-restore/#public-backup-files https://github.com/0LNetworkCommunity/diem/blob/0de006b7ff52c32f0e267d9188975f52a826eb39/specifications/db_backup/spec.md?plain=1#L314 https://github.com/0LNetworkCommunity/diem/blob/0de006b7ff52c32f0e267d9188975f52a826eb39/storage/backup/backup-cli/src/utils/backup_service_client.rs#L75 https://github.com/0LNetworkCommunity/diem/blob/0de006b7ff52c32f0e267d9188975f52a826eb39/storage/backup/backup-service/src/lib.rs https://github.com/aptos-labs/aptos-networks/blob/main/mainnet/backups/s3-public.yaml https://github.com/0LNetworkCommunity/diem/blob/0de006b7ff52c32f0e267d9188975f52a826eb39/storage/backup/backup-service/src/handlers/mod.rs#L17 https://github.com/0LNetworkCommunity/diem/blob/0de006b7ff52c32f0e267d9188975f52a826eb39/helm/fullnode/templates/backup.yaml#L52 https://github.com/facebook/rocksdb/blob/master/include/rocksdb/utilities/backupable_db.h # OS packages sudo apt update && sudo apt -y upgrade sudo apt -y install git zip unzip jq build-essential cmake clang llvm libgmp-dev secure-delete pkg-config libssl-dev lld tmux # Ubuntu 22.04 does not have libssl1.1 support so use the following # you might get dpkg: warning: downgrading libssl-dev:amd64 from 3.0.2-0ubuntu1.10 to 1.1.1f-1ubuntu2 # ok no problem cd ~ wget http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb sudo dpkg -i libssl1.1_1.1.1f-1ubuntu2_amd64.deb wget http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.1.1f-1ubuntu2_amd64.deb sudo dpkg -i libssl-dev_1.1.1f-1ubuntu2_amd64.deb # Cargo rustup self uninstall -y rm -rf ~/.cargo ~/.rustup unset RUSTC_WRAPPER unset RUSTC_WORKSPACE_WRAPPER sudo apt remove rustc curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain stable -y source "$HOME/.cargo/env" rustup default nightly && rustup update cd ~ && cargo install toml-cli --force # clone the upstream vendorized diem repo cd ~ git clone https://github.com/0LNetworkCommunity/libra-framework cd libra-framework # build the bins (and get some coffee) cargo build -r # or build the backup cli tool from release (much less time) cargo build -r -p diem-db-tool # - OR - TESTING: to run from the diem repo cargo run -p libra -- diem-db-tool # copy the bins mkdir -p ~/bin cp -f ~/libra-framework/target/release/libra ~/bin/libra cp -f ~/libra-framework/target/release/diem-db-tool ~/bin/diem-db-tool # start a fresh epoch-archive and grab the Makefile from the epoch-archive repo mkdir -p ~/test-epoch-archive cd ~/test-epoch-archive # First: either perform genesis or restore from backup # METHOD 1: developing the Makefile (on pause as we need to first use the Aptos tooling) # METHOD 2: Amazon S3 Backups using vendorized diem repo as an upstream from libra-framework # https://github.com/aptos-labs/aptos-core/blob/8a7bcab84b167c1f2dc47792887f3cdec392f5ae/storage/README.md?plain=1#L163 # make sure the backup coordinator template is as follows (modify the FOLDER value) nano /home/sirouk/test-epoch-archive/test-epoch-archive.yaml # PREVIOUSLY: ``` env_vars: - key: "FOLDER" value: "/home/sirouk/test-epoch-archive" commands: create_backup: 'cd "$FOLDER" && mkdir $BACKUP_NAME && echo $BACKUP_NAME' create_for_write: 'cd "$FOLDER" && cd "$BACKUP_HANDLE" && test ! -f $FILE_NAME && touch $FILE_NAME && echo $BACKUP_HANDLE/$FILE_NAME && exec >&- && gzip -c > $FILE_NAME' open_for_read: 'cat "$FOLDER/$FILE_HANDLE" | gzip -cd' save_metadata_line: 'cd "$FOLDER" && mkdir -p metadata && cd metadata && FILE_HANDLE="metadata/$FILE_NAME" && echo "$FILE_HANDLE"; exec 1>&- && gzip -c > $FILE_NAME' list_metadata_files: 'cd "$FOLDER" && (test -d metadata && cd metadata && ls -1 || exec) | while read f; do echo metadata/$f; done' backup_metadata_file: 'cd "$FOLDER" && mkdir -p metadata_backup && mv metadata/$FILE_NAME metadata_backup/$FILE_NAME' ``` # NEW (modeled after the Aptos s3-public.yaml) ``` env_vars: - key: "FOLDER" value: "/home/sirouk/test-epoch-archive" commands: create_backup: 'echo $BACKUP_NAME' create_for_write: | mkdir -p "$FOLDER/$BACKUP_HANDLE" FILE_HANDLE="$BACKUP_HANDLE/$FILE_NAME" echo "$FILE_HANDLE" exec 1>&- gzip -c > "$FOLDER/$FILE_HANDLE" open_for_read: 'cat "$FOLDER/$FILE_HANDLE" | gzip -cd' save_metadata_line: | mkdir -p "$FOLDER/metadata" FILE_HANDLE="metadata/$FILE_NAME" echo "$FILE_HANDLE" exec 1>&- gzip -c > "$FOLDER/$FILE_HANDLE" list_metadata_files: '(test -d $FOLDER/metadata && cd $FOLDER/metadata && ls -1 || exec) | while read f; do echo metadata/$f; done' backup_metadata_file: 'mkdir -p $FOLDER/metadata_backup && mv $FOLDER/metadata/$FILE_NAME $FOLDER/metadata_backup/$FILE_NAME' ``` # Start the node libra node --config-path ~/.libra/validator.yaml # Take a Continuous Backup (state snapshot each epoch, every 100K transactions) diem-db-tool backup continuously --command-adapter-config ~/test-epoch-archive/test-epoch-archive.yaml --state-snapshot-interval-epochs 1 --transaction-batch-size 100000 # Without transaction 0, the transaction portion of the command above will fail # if so, we will need to specify a transaction range starting point - so get the height curl 127.0.0.1:9101/metrics 2> /dev/null | grep "state_sync_version{type=\"synced\"}" # Use the transaction version to fire off the transaction backup. This will be helpful in providing a backup with a node that does not have the height from genesis diem-db-tool backup oneoff transaction --command-adapter-config ~/test-epoch-archive/test-epoch-archive.yaml \ --start-version 3393207 \ --num_transactions 100 # Try again for a Continuous Backup (state snapshot each epoch, every 1K transactions) diem-db-tool backup continuously --command-adapter-config ~/test-epoch-archive/test-epoch-archive.yaml --state-snapshot-interval-epochs 1 --transaction-batch-size 1000 # Test the backups against the current DB # 1) stop the node # 2) test! diem-db-tool replay-verify --target-db-dir ~/.libra/data/db --command-adapter-config ~/test-epoch-archive/test-epoch-archive.yaml # you will see Error: DB version is already beyond start_version requested. naturally as the DB it was taken from was live # Wipe/Backup and Restore the DB # move the current db rm -Rf ~/.libra/data/db-bak mv ~/.libra/data/db ~/.libra/data/db-bak # OPTION A: kill db, restore cache, and restore rm -Rf ~/.libra/data/db rm -Rf ~/test-epoch-archive/metacache diem-db-tool restore bootstrap-db \ --target-db-dir ~/.libra/data/db \ --metadata-cache-dir ~/test-epoch-archive/metacache \ --command-adapter-config ~/test-epoch-archive/test-epoch-archive.yaml # OPTION B: restore to a specific version rm -Rf ~/.libra/data/db rm -Rf ~/test-epoch-archive/metacache diem-db-tool restore bootstrap-db \ --concurrent-downloads 2 \ --target-db-dir ~/.libra/data/db \ --ledger-history-start-version 3393207 \ --target-version 3393207 \ --metadata-cache-dir ~/test-epoch-archive/metacache \ --command-adapter-config ~/test-epoch-archive/test-epoch-archive.yaml # Start the node libra node --config-path ~/.libra/validator.yaml # While your node is syncing, you'll be able to see the state_sync_version{type="synced"} metric gradually increase watch -n1 'curl 127.0.0.1:9101/metrics 2> /dev/null | grep "state_sync_version{type=\"synced\"}"' #### OTHER THINGS TO TAKE NOTE OF: ### STATE SYNC DRIVER SELECTION ### NOTE: we might want to use C) Intelligent syncing # consider modifying the validator yaml to match the scenario as needed # https://aptos.dev/guides/state-sync/#configuring-the-state-sync-modes nano ~/.libra/validator.yaml ### By default the ledger pruner keeps 150 million recent transactions. The approximate amount of disk space required for every 150M transactions is 200G. Unless bootstrapped from the genesis and configured to disable the pruner or a long prune window, the node doesn't carry the entirety of the ledger history. Majority of the nodes on both the testnet and mainnet have a partial history of 150 million transactions according to this configuration. # INDEXER - Validator # There should be one foundation and one community node that performs copmlete indexing ``` storage: enable_indexer: true storage_pruner_config: ledger_pruner_config: enable: false ``` # VALIDATOR: add this to disable the indexer ``` storage: enable_indexer: false storage_pruner_config: ledger_pruner_config: enable: true # By default the ledger pruner keeps 150 million # Setting the pruning window smaller than 100 million can lead to runtime errors and damage the health of the node prune_window: 100000000 ``` # A) Executing all transactions (To execute all the transactions since genesis and continue to execute new transactions as they are committed) ``` state_sync: state_sync_driver: bootstrapping_mode: ExecuteTransactionsFromGenesis continuous_syncing_mode: ExecuteTransactions ``` # B) Applying all transaction outputs (To apply all transaction outputs since genesis and continue to apply new transaction outputs as transactions are committed) ``` state_sync: state_sync_driver: bootstrapping_mode: ApplyTransactionOutputsFromGenesis continuous_syncing_mode: ApplyTransactionOutputs ``` # C) Intelligent syncing (To execute or apply all transactions and outputs since genesis and continue to do the same as new transactions are committed) ``` state_sync: state_sync_driver: bootstrapping_mode: ExecuteOrApplyFromGenesis continuous_syncing_mode: ExecuteTransactionsOrApplyOutputs ``` # D) Fast syncing (This is the fastest and cheapest method of syncing your node. It requires the node to start from an empty state, for example not have any existing storage data) # Fast sync should only be used as a last resort for validators and validator fullnodes. This is because fast sync skips all of the blockchain history and as a result: (i) reduces the data availability in the network; and (ii) may hinder validator consensus performance if too much data has been skipped. Thus, validator and validator fullnode operators should be careful to consider alternate ways of syncing before resorting to fast sync. ``` state_sync: state_sync_driver: bootstrapping_mode: DownloadLatestStates continuous_syncing_mode: ApplyTransactionOutputs ``` # 0D chat w/siruok - NOTES: - Backups should backup the things that are necessary for a successful restore - Restore needs to be able to increment the db from the backup to any state that is valid "bootstrapping the db" RocksDb = low level system that stores files MerkleDb = figures a way of spreading out that through a tree (libra uses a sparse Merkle Tree - Jellyfish Db) Storage Drivers = Things that read/write to the Jellyfish DB depending on data types. Each piece of data has a path to it. The storage driver is the thing that knows how to read/write to that path. Jellyfish > RocksDb > files on system. (state management) Storage Management = How we backup, extract, compress Node Layer = node software reads and writes unto the merkle tree # Compare the Aptos bucket files: https://raw.githubusercontent.com/aptos-labs/aptos-networks/main/mainnet/backups/s3-public.yaml https://console.aws.amazon.com/s3/buckets/aptos-mainnet-backup-backup-831a69a8?region=us-east-1&tab=objects # Testing with Aptos rm -Rf ~/test-epoch-archive/metacache cd ~/aptos-core/target/release/ ./aptos-db-tool restore bootstrap-db \ --target-db-dir ~/aptos-core/config/data/db \ --metadata-cache-dir ~/test-epoch-archive/metacache \ --command-adapter-config ~/test-epoch-archive/test-epoch-archive.yaml # SUCESSFUL APTOS RESTORE ``` ... 2023-09-19T07:47:21.309828Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/epoch_ending/restore.rs:200 epoch ending restore succeeded. 2023-09-19T07:47:21.310359Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/epoch_ending/restore.rs:192 epoch ending restore started. Manifest: epoch_ending_4097-.87c4/epoch_ending.manifest 2023-09-19T07:47:21.312356Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/epoch_ending/restore.rs:200 epoch ending restore succeeded. 2023-09-19T07:47:21.887034Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/epoch_ending/restore.rs:192 epoch ending restore started. Manifest: epoch_ending_4098-.6b22/epoch_ending.manifest 2023-09-19T07:47:21.888411Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/epoch_ending/restore.rs:200 epoch ending restore succeeded. 2023-09-19T07:47:23.567793Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/epoch_ending/restore.rs:192 epoch ending restore started. Manifest: epoch_ending_4099-.819f/epoch_ending.manifest 2023-09-19T07:47:23.569211Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/epoch_ending/restore.rs:200 epoch ending restore succeeded. 2023-09-19T07:47:27.809284Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/epoch_ending/restore.rs:192 epoch ending restore started. Manifest: epoch_ending_4100-.9c83/epoch_ending.manifest 2023-09-19T07:47:27.811689Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/epoch_ending/restore.rs:200 epoch ending restore succeeded. 2023-09-19T07:47:27.811717Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/epoch_ending/restore.rs:407 Epoch history recovered in 7012.51 seconds 2023-09-19T07:47:27.811906Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/epoch_ending/restore.rs:342 epoch history restore succeeded. 2023-09-19T07:47:27.811917Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/coordinators/restore.rs:307 Starting restore DB from version 0 to target version 267000000 2023-09-19T07:47:27.811926Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/coordinators/restore.rs:319 Start restoring tree snapshot at 266967614 with db_next_version 0 2023-09-19T07:47:27.811932Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/state_snapshot/restore.rs:99 state snapshot restore started. Manifest: state_epoch_4100_ver_266967614.4af5/state.manifest 2023-09-19T07:48:28.977486Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/state_snapshot/restore.rs:219 State chunk added. {"chunk":0,"chunks_to_add":78,"last_idx":555654,"values_per_second":135983} ... 2023-09-19T07:59:56.079404Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/state_snapshot/restore.rs:219 State chunk added. {"chunk":76,"chunks_to_add":78,"last_idx":42691135,"values_per_second":61764} 2023-09-19T07:59:59.266303Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/state_snapshot/restore.rs:219 State chunk added. {"chunk":77,"chunks_to_add":78,"last_idx":43131397,"values_per_second":62115} 2023-09-19T08:00:00.890357Z [tokio-runtime-worker] INFO storage/aptosdb/src/state_store/mod.rs:481 Initializing BufferedState. {"latest_snapshot_version":null,"num_transactions":0} 2023-09-19T08:00:00.893786Z [tokio-runtime-worker] INFO storage/aptosdb/src/state_store/mod.rs:568 StateStore initialization finished. {"latest_in_memory_root_hash":"5350415253455f4d45524b4c455f504c414345484f4c4445525f484153480000","latest_in_memory_version":null,"latest_snapshot_root_hash":"5350415253455f4d45524b4c455f504c414345484f4c4445525f484153480000","latest_snapshot_version":null} 2023-09-19T08:00:00.900327Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/state_snapshot/restore.rs:103 state snapshot restore succeeded. {"time":753} 2023-09-19T08:00:00.902751Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/transaction/restore.rs:261 transaction restore started. 2023-09-19T08:00:43.515481Z [tokio-runtime-worker] WARN storage/aptosdb/src/lib.rs:2319 AptosDB API returned error. {"api_name":"get_latest_version","error":"No progress in db."} 2023-09-19T08:00:45.798571Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/transaction/restore.rs:474 Transactions saved. {"accumulative_tps":23934,"version":266054643} 2023-09-19T08:00:48.320255Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/transaction/restore.rs:474 Transactions saved. {"accumulative_tps":23122,"version":266111096} ... 2023-09-19T08:01:32.451902Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/transaction/restore.rs:474 Transactions saved. {"accumulative_tps":19145,"version":266936933} 2023-09-19T08:01:37.592470Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/transaction/restore.rs:474 Transactions saved. {"accumulative_tps":17893,"version":266967614} 2023-09-19T08:01:37.594368Z [tokio-runtime-worker] INFO storage/aptosdb/src/state_store/mod.rs:481 Initializing BufferedState. {"latest_snapshot_version":266967614,"num_transactions":266967615} 2023-09-19T08:01:37.596412Z [tokio-runtime-worker] INFO storage/aptosdb/src/state_store/mod.rs:568 StateStore initialization finished. {"latest_in_memory_root_hash":"0f0427df2d532674d964b6ad04d7663bf2aaa8c251098326695d9ee1c1d11e99","latest_in_memory_version":266967614,"latest_snapshot_root_hash":"0f0427df2d532674d964b6ad04d7663bf2aaa8c251098326695d9ee1c1d11e99","latest_snapshot_version":266967614} 2023-09-19T08:01:39.318405Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/transaction/restore.rs:636 Transactions replayed. {"accumulative_tps":5807,"version":266977614} 2023-09-19T08:01:40.313150Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/transaction/restore.rs:636 Transactions replayed. {"accumulative_tps":7362,"version":266987614} 2023-09-19T08:01:40.985047Z [tokio-runtime-worker] INFO storage/aptosdb/src/state_store/buffered_state.rs:123 Sent StateDelta to async commit thread. {"base_version":266967614,"version":266997613} 2023-09-19T08:01:41.017875Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/transaction/restore.rs:636 Transactions replayed. {"accumulative_tps":8768,"version":266997614} 2023-09-19T08:01:41.141470Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/transaction/restore.rs:636 Transactions replayed. {"accumulative_tps":9135,"version":267000000} 2023-09-19T08:01:41.146187Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/transaction/restore.rs:650 Replay finished. {"accumulative_tps":75219637,"total_replayed":267000000} 2023-09-19T08:01:41.202664Z [state_batch_committer] INFO storage/aptosdb/src/state_merkle_db.rs:229 Committing StateMerkleDb. {"version":266997613} 2023-09-19T08:01:41.202900Z [state_batch_committer] INFO storage/aptosdb/src/state_store/state_merkle_batch_committer.rs:77 State snapshot committed. {"base_version":266967614,"root_hash":"d819bdeac8523157f3d6d3db0623eb45a094301ad4ee156eb5cb89705332955f","version":266997613} 2023-09-19T08:01:41.375555Z [state_batch_committer] INFO storage/aptosdb/src/state_merkle_db.rs:229 Committing StateMerkleDb. {"version":266999999} 2023-09-19T08:01:41.375763Z [state_batch_committer] INFO storage/aptosdb/src/state_store/state_merkle_batch_committer.rs:77 State snapshot committed. {"base_version":266997613,"root_hash":"da74fafc8cd9bce9e77b6d58e5c4fd9e46a90b0f8508f320716dad46c9b0b64c","version":266999999} 2023-09-19T08:01:41.391802Z [state-committer] INFO storage/schemadb/src/lib.rs:304 Dropped RocksDB. {"rocksdb_name":"state_merkle_db"} 2023-09-19T08:01:41.462656Z [tokio-runtime-worker] INFO storage/schemadb/src/lib.rs:304 Dropped RocksDB. {"rocksdb_name":"ledger_db"} 2023-09-19T08:01:41.513492Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/backup_types/transaction/restore.rs:265 transaction restore succeeded. 2023-09-19T08:01:41.535693Z [tokio-runtime-worker] INFO storage/backup/backup-cli/src/coordinators/restore.rs:85 Restore coordinator exiting with success. { "Result": null } ``` # STARTING THE NODE: ``` 500} 2023-09-19T12:52:13.824772Z [state_kv_pruner] INFO storage/aptosdb/src/pruner/state_kv_pruner/mod.rs:58 Pruning state kv data. {"progress":117110500,"target_version":117111000} 2023-09-19T12:52:13.824954Z [state_kv_pruner] INFO storage/aptosdb/src/pruner/state_kv_pruner/mod.rs:73 Pruning state kv data is done. {"progress":117111000} ```