Skip to content

Commit e78de64

Browse files
Nathaniel BajoNathaniel Bajo
authored andcommitted
prevent fork-choice race condition with shared weight storage
1 parent 781c777 commit e78de64

File tree

2 files changed

+87
-6
lines changed

2 files changed

+87
-6
lines changed

substrate/client/consensus/babe/src/lib.rs

Lines changed: 84 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ use std::{
7676
time::Duration,
7777
};
7878

79+
use parking_lot::RwLock;
80+
use std::collections::HashMap;
81+
7982
use codec::{Decode, Encode};
8083
use futures::{
8184
channel::{
@@ -956,11 +959,49 @@ fn find_next_config_digest<B: BlockT>(
956959
Ok(config_digest)
957960
}
958961

962+
/// Shared storage for block weights to ensure consistent fork-choice.
963+
///
964+
/// This stores block weights in memory to provide immediate access for
965+
/// fork-choice decisions, preventing race conditions that occur when
966+
/// weights are loaded from the database.
967+
#[derive(Clone)]
968+
pub struct SharedBlockWeightStorage<Block: BlockT> {
969+
weights: Arc<RwLock<HashMap<Block::Hash, BabeBlockWeight>>>,
970+
}
971+
972+
impl<Block: BlockT> SharedBlockWeightStorage<Block> {
973+
/// Create a new shared weight storage
974+
pub fn new() -> Self {
975+
Self { weights: Arc::new(RwLock::new(HashMap::new())) }
976+
}
977+
978+
/// Store a block weight
979+
pub fn store(&self, hash: Block::Hash, weight: BabeBlockWeight) {
980+
self.weights.write().insert(hash, weight);
981+
}
982+
983+
/// Load a block weight
984+
pub fn load(&self, hash: &Block::Hash) -> Option<BabeBlockWeight> {
985+
self.weights.read().get(hash).copied()
986+
}
987+
988+
/// Remove a block weight
989+
pub fn remove(&self, hash: &Block::Hash) {
990+
self.weights.write().remove(hash);
991+
}
992+
993+
/// Clear all weights
994+
pub fn clear(&self) {
995+
self.weights.write().clear();
996+
}
997+
}
998+
959999
/// State that must be shared between the import queue and the authoring logic.
9601000
#[derive(Clone)]
9611001
pub struct BabeLink<Block: BlockT> {
9621002
epoch_changes: SharedEpochChanges<Block, Epoch>,
9631003
config: BabeConfiguration,
1004+
weight_storage: SharedBlockWeightStorage<Block>,
9641005
}
9651006

9661007
impl<Block: BlockT> BabeLink<Block> {
@@ -973,6 +1014,11 @@ impl<Block: BlockT> BabeLink<Block> {
9731014
pub fn config(&self) -> &BabeConfiguration {
9741015
&self.config
9751016
}
1017+
1018+
/// Get the weight storage of this link.
1019+
pub fn weight_storage(&self) -> &SharedBlockWeightStorage<Block> {
1020+
&self.weight_storage
1021+
}
9761022
}
9771023

9781024
/// A verifier for Babe blocks.
@@ -1121,6 +1167,8 @@ pub struct BabeBlockImport<Block: BlockT, Client, I, CIDP, SC> {
11211167
//
11221168
// Will be used when sending equivocation reports.
11231169
offchain_tx_pool_factory: OffchainTransactionPoolFactory<Block>,
1170+
// Shared block weight storage for consistent fork-choice
1171+
weight_storage: SharedBlockWeightStorage<Block>,
11241172
}
11251173

11261174
impl<Block: BlockT, I: Clone, Client, CIDP: Clone, SC: Clone> Clone
@@ -1135,6 +1183,7 @@ impl<Block: BlockT, I: Clone, Client, CIDP: Clone, SC: Clone> Clone
11351183
create_inherent_data_providers: self.create_inherent_data_providers.clone(),
11361184
select_chain: self.select_chain.clone(),
11371185
offchain_tx_pool_factory: self.offchain_tx_pool_factory.clone(),
1186+
weight_storage: self.weight_storage.clone(),
11381187
}
11391188
}
11401189
}
@@ -1148,6 +1197,7 @@ impl<Block: BlockT, Client, I, CIDP, SC> BabeBlockImport<Block, Client, I, CIDP,
11481197
create_inherent_data_providers: CIDP,
11491198
select_chain: SC,
11501199
offchain_tx_pool_factory: OffchainTransactionPoolFactory<Block>,
1200+
weight_storage: SharedBlockWeightStorage<Block>,
11511201
) -> Self {
11521202
BabeBlockImport {
11531203
client,
@@ -1157,6 +1207,7 @@ impl<Block: BlockT, Client, I, CIDP, SC> BabeBlockImport<Block, Client, I, CIDP,
11571207
create_inherent_data_providers,
11581208
select_chain,
11591209
offchain_tx_pool_factory,
1210+
weight_storage,
11601211
}
11611212
}
11621213
}
@@ -1671,6 +1722,8 @@ where
16711722
});
16721723
}
16731724

1725+
self.weight_storage.store(hash, total_weight);
1726+
16741727
aux_schema::write_block_weight(hash, total_weight, |values| {
16751728
block
16761729
.auxiliary
@@ -1688,8 +1741,12 @@ where
16881741
// so we don't need to cover again here.
16891742
parent_weight
16901743
} else {
1691-
aux_schema::load_block_weight(&*self.client, last_best)
1692-
.map_err(|e| ConsensusError::ChainLookup(e.to_string()))?
1744+
// Try to get weight from shared storage first, then from aux storage
1745+
self.weight_storage
1746+
.load(&last_best)
1747+
.or_else(|| {
1748+
aux_schema::load_block_weight(&*self.client, last_best).ok().flatten()
1749+
})
16931750
.ok_or_else(|| {
16941751
ConsensusError::ChainLookup(
16951752
"No block weight for parent header.".to_string(),
@@ -1788,17 +1845,33 @@ where
17881845
+ 'static,
17891846
{
17901847
let epoch_changes = aux_schema::load_epoch_changes::<Block, _>(&*client, &config)?;
1791-
let link = BabeLink { epoch_changes: epoch_changes.clone(), config: config.clone() };
1848+
1849+
// Create shared weight storage
1850+
let weight_storage = SharedBlockWeightStorage::new();
1851+
1852+
let link =
1853+
BabeLink { epoch_changes: epoch_changes.clone(), config: config.clone(), weight_storage };
17921854

17931855
// NOTE: this isn't entirely necessary, but since we didn't use to prune the
17941856
// epoch tree it is useful as a migration, so that nodes prune long trees on
17951857
// startup rather than waiting until importing the next epoch change block.
17961858
prune_finalized(client.clone(), &mut epoch_changes.shared_data())?;
17971859

17981860
let client_weak = Arc::downgrade(&client);
1861+
let weight_storage_for_closure = link.weight_storage.clone();
17991862
let on_finality = move |summary: &FinalityNotification<Block>| {
18001863
if let Some(client) = client_weak.upgrade() {
1801-
aux_storage_cleanup(client.as_ref(), summary)
1864+
// Also clean up from shared storage
1865+
let hashes = aux_storage_cleanup(client.as_ref(), summary);
1866+
for op in &hashes {
1867+
if let (key, None) = op {
1868+
// Extract hash from key and remove from shared storage
1869+
if let Ok(hash) = Block::Hash::decode(&mut &key[..]) {
1870+
weight_storage_for_closure.remove(&hash);
1871+
}
1872+
}
1873+
}
1874+
hashes
18021875
} else {
18031876
Default::default()
18041877
}
@@ -1813,6 +1886,7 @@ where
18131886
create_inherent_data_providers,
18141887
select_chain,
18151888
offchain_tx_pool_factory,
1889+
link.weight_storage.clone(),
18161890
);
18171891

18181892
Ok((import, link))
@@ -1901,6 +1975,7 @@ pub fn revert<Block, Client, Backend>(
19011975
client: Arc<Client>,
19021976
backend: Arc<Backend>,
19031977
blocks: NumberFor<Block>,
1978+
weight_storage: Option<SharedBlockWeightStorage<Block>>,
19041979
) -> ClientResult<()>
19051980
where
19061981
Block: BlockT,
@@ -1959,6 +2034,11 @@ where
19592034
// We've reached the revert point or an already processed branch, stop here.
19602035
break
19612036
}
2037+
2038+
if let Some(weight_storage) = &weight_storage {
2039+
weight_storage.remove(&hash);
2040+
}
2041+
19622042
hash = meta.parent;
19632043
}
19642044
}

substrate/client/consensus/babe/src/tests.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -789,7 +789,7 @@ async fn revert_prunes_epoch_changes_and_removes_weights() {
789789
assert_eq!(epoch_changes.shared_data().tree().roots().count(), 1);
790790

791791
// Revert canon chain to block #10 (best(21) - 11)
792-
revert(client.clone(), backend, 11).expect("revert should work for baked test scenario");
792+
revert(client.clone(), backend, 11, None).expect("revert should work for baked test scenario");
793793

794794
// Load and check epoch changes.
795795

@@ -853,7 +853,7 @@ async fn revert_not_allowed_for_finalized() {
853853
client.finalize_block(canon[2], None, false).unwrap();
854854

855855
// Revert canon chain to last finalized block
856-
revert(client.clone(), backend, 100).expect("revert should work for baked test scenario");
856+
revert(client.clone(), backend, 100, None).expect("revert should work for baked test scenario");
857857

858858
let weight_data_check = |hashes: &[Hash], expected: bool| {
859859
hashes.iter().all(|hash| {
@@ -1343,3 +1343,4 @@ async fn allows_skipping_epochs_on_some_forks() {
13431343

13441344
assert_eq!(epoch_data, epoch3);
13451345
}
1346+

0 commit comments

Comments
 (0)