Reduce MR + ME Routing hot-path contention

This commit is contained in:
Alexey
2026-05-22 20:19:09 +03:00
parent 77a7f89075
commit dc8951eae8
8 changed files with 293 additions and 283 deletions
+8 -6
View File
@@ -77,26 +77,24 @@ struct HotBindingTable {
struct BindingState {
inner: Mutex<BindingInner>,
writer_idle_since_epoch_secs: DashMap<u64, u64>,
bound_clients_by_writer: DashMap<u64, usize>,
active_sessions_by_target_dc: DashMap<i16, usize>,
last_meta_for_writer: DashMap<u64, ConnMeta>,
}
struct BindingInner {
writers: HashMap<u64, mpsc::Sender<WriterCommand>>,
writer_for_conn: HashMap<u64, u64>,
conns_for_writer: HashMap<u64, HashSet<u64>>,
meta: HashMap<u64, ConnMeta>,
last_meta_for_writer: HashMap<u64, ConnMeta>,
writer_idle_since_epoch_secs: HashMap<u64, u64>,
}
impl BindingInner {
fn new() -> Self {
Self {
writers: HashMap::new(),
writer_for_conn: HashMap::new(),
conns_for_writer: HashMap::new(),
meta: HashMap::new(),
last_meta_for_writer: HashMap::new(),
writer_idle_since_epoch_secs: HashMap::new(),
}
}
}
@@ -149,6 +147,10 @@ impl ConnRegistry {
},
binding: BindingState {
inner: Mutex::new(BindingInner::new()),
writer_idle_since_epoch_secs: DashMap::new(),
bound_clients_by_writer: DashMap::new(),
active_sessions_by_target_dc: DashMap::new(),
last_meta_for_writer: DashMap::new(),
},
next_id: AtomicU64::new(start),
route_channel_capacity,
+122 -70
View File
@@ -13,13 +13,55 @@ use super::{
};
impl ConnRegistry {
fn set_writer_bound_count(&self, writer_id: u64, count: usize) {
self.binding.bound_clients_by_writer.insert(writer_id, count);
if count == 0 {
self.binding
.writer_idle_since_epoch_secs
.entry(writer_id)
.or_insert_with(Self::now_epoch_secs);
} else {
self.binding.writer_idle_since_epoch_secs.remove(&writer_id);
}
}
fn adjust_active_target_dc(&self, target_dc: i16, delta: isize) {
if target_dc == 0 || delta == 0 {
return;
}
if delta > 0 {
self.binding
.active_sessions_by_target_dc
.entry(target_dc)
.and_modify(|count| *count = count.saturating_add(delta as usize))
.or_insert(delta as usize);
return;
}
let remove =
if let Some(mut count) = self.binding.active_sessions_by_target_dc.get_mut(&target_dc) {
let decrement = delta.unsigned_abs();
*count = count.saturating_sub(decrement);
*count == 0
} else {
false
};
if remove {
self.binding.active_sessions_by_target_dc.remove(&target_dc);
}
}
pub async fn register_writer(&self, writer_id: u64, tx: mpsc::Sender<WriterCommand>) {
let mut binding = self.binding.inner.lock().await;
binding.writers.insert(writer_id, tx.clone());
binding
.conns_for_writer
.entry(writer_id)
.or_insert_with(HashSet::new);
self.binding.bound_clients_by_writer.entry(writer_id).or_insert(0);
self.binding
.writer_idle_since_epoch_secs
.entry(writer_id)
.or_insert_with(Self::now_epoch_secs);
self.writers.map.insert(writer_id, tx);
}
@@ -29,19 +71,18 @@ impl ConnRegistry {
self.routing.byte_budget.remove(&id);
self.hot_binding.map.remove(&id);
let mut binding = self.binding.inner.lock().await;
binding.meta.remove(&id);
let previous_meta = binding.meta.remove(&id);
if let Some(meta) = previous_meta.as_ref() {
self.adjust_active_target_dc(meta.target_dc, -1);
}
if let Some(writer_id) = binding.writer_for_conn.remove(&id) {
let became_empty = if let Some(set) = binding.conns_for_writer.get_mut(&writer_id) {
let next_count = if let Some(set) = binding.conns_for_writer.get_mut(&writer_id) {
set.remove(&id);
set.is_empty()
set.len()
} else {
false
0
};
if became_empty {
binding
.writer_idle_since_epoch_secs
.insert(writer_id, Self::now_epoch_secs());
}
self.set_writer_bound_count(writer_id, next_count);
return Some(writer_id);
}
None
@@ -248,7 +289,7 @@ impl ConnRegistry {
if !self.routing.map.contains_key(&conn_id) {
return false;
}
if !binding.writers.contains_key(&writer_id) {
if !self.writers.map.contains_key(&writer_id) {
return false;
}
@@ -256,28 +297,32 @@ impl ConnRegistry {
if let Some(previous_writer_id) = previous_writer_id
&& previous_writer_id != writer_id
{
let became_empty =
let next_count =
if let Some(set) = binding.conns_for_writer.get_mut(&previous_writer_id) {
set.remove(&conn_id);
set.is_empty()
set.len()
} else {
false
0
};
if became_empty {
binding
.writer_idle_since_epoch_secs
.insert(previous_writer_id, Self::now_epoch_secs());
}
self.set_writer_bound_count(previous_writer_id, next_count);
}
binding.meta.insert(conn_id, meta.clone());
binding.last_meta_for_writer.insert(writer_id, meta.clone());
binding.writer_idle_since_epoch_secs.remove(&writer_id);
binding
.conns_for_writer
.entry(writer_id)
.or_insert_with(HashSet::new)
.insert(conn_id);
if let Some(previous_meta) = binding.meta.insert(conn_id, meta.clone()) {
self.adjust_active_target_dc(previous_meta.target_dc, -1);
}
self.adjust_active_target_dc(meta.target_dc, 1);
self.binding
.last_meta_for_writer
.insert(writer_id, meta.clone());
let next_count = {
let set = binding
.conns_for_writer
.entry(writer_id)
.or_insert_with(HashSet::new);
set.insert(conn_id);
set.len()
};
self.set_writer_bound_count(writer_id, next_count);
self.hot_binding
.map
.insert(conn_id, HotConnBinding { writer_id, meta });
@@ -290,27 +335,38 @@ impl ConnRegistry {
.conns_for_writer
.entry(writer_id)
.or_insert_with(HashSet::new);
binding
.writer_idle_since_epoch_secs
.entry(writer_id)
.or_insert(Self::now_epoch_secs());
let count = binding
.conns_for_writer
.get(&writer_id)
.map(|set| set.len())
.unwrap_or(0);
self.set_writer_bound_count(writer_id, count);
}
pub async fn get_last_writer_meta(&self, writer_id: u64) -> Option<ConnMeta> {
let binding = self.binding.inner.lock().await;
binding.last_meta_for_writer.get(&writer_id).cloned()
self.binding
.last_meta_for_writer
.get(&writer_id)
.map(|entry| entry.value().clone())
}
pub async fn writer_idle_since_snapshot(&self) -> HashMap<u64, u64> {
let binding = self.binding.inner.lock().await;
binding.writer_idle_since_epoch_secs.clone()
self.binding
.writer_idle_since_epoch_secs
.iter()
.map(|entry| (*entry.key(), *entry.value()))
.collect()
}
pub async fn writer_idle_since_for_writer_ids(&self, writer_ids: &[u64]) -> HashMap<u64, u64> {
let binding = self.binding.inner.lock().await;
let mut out = HashMap::<u64, u64>::with_capacity(writer_ids.len());
for writer_id in writer_ids {
if let Some(idle_since) = binding.writer_idle_since_epoch_secs.get(writer_id).copied() {
if let Some(idle_since) = self
.binding
.writer_idle_since_epoch_secs
.get(writer_id)
.map(|entry| *entry.value())
{
out.insert(*writer_id, idle_since);
}
}
@@ -320,25 +376,19 @@ impl ConnRegistry {
pub(in crate::transport::middle_proxy) async fn writer_activity_snapshot(
&self,
) -> WriterActivitySnapshot {
let binding = self.binding.inner.lock().await;
let mut bound_clients_by_writer = HashMap::<u64, usize>::new();
let mut active_sessions_by_target_dc = HashMap::<i16, usize>::new();
for (writer_id, conn_ids) in &binding.conns_for_writer {
bound_clients_by_writer.insert(*writer_id, conn_ids.len());
}
for conn_meta in binding.meta.values() {
if conn_meta.target_dc == 0 {
continue;
}
*active_sessions_by_target_dc
.entry(conn_meta.target_dc)
.or_insert(0) += 1;
}
WriterActivitySnapshot {
bound_clients_by_writer,
active_sessions_by_target_dc,
bound_clients_by_writer: self
.binding
.bound_clients_by_writer
.iter()
.map(|entry| (*entry.key(), *entry.value()))
.collect(),
active_sessions_by_target_dc: self
.binding
.active_sessions_by_target_dc
.iter()
.map(|entry| (*entry.key(), *entry.value()))
.collect(),
}
}
@@ -393,10 +443,10 @@ impl ConnRegistry {
pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> {
let mut binding = self.binding.inner.lock().await;
binding.writers.remove(&writer_id);
self.writers.map.remove(&writer_id);
binding.last_meta_for_writer.remove(&writer_id);
binding.writer_idle_since_epoch_secs.remove(&writer_id);
self.binding.last_meta_for_writer.remove(&writer_id);
self.binding.writer_idle_since_epoch_secs.remove(&writer_id);
self.binding.bound_clients_by_writer.remove(&writer_id);
let conns = binding
.conns_for_writer
.remove(&writer_id)
@@ -410,6 +460,10 @@ impl ConnRegistry {
continue;
}
binding.writer_for_conn.remove(&conn_id);
let meta = binding.meta.remove(&conn_id);
if let Some(meta) = meta.as_ref() {
self.adjust_active_target_dc(meta.target_dc, -1);
}
let remove_hot = self
.hot_binding
.map
@@ -419,10 +473,10 @@ impl ConnRegistry {
if remove_hot {
self.hot_binding.map.remove(&conn_id);
}
if let Some(m) = binding.meta.get(&conn_id) {
if let Some(m) = meta {
out.push(BoundConn {
conn_id,
meta: m.clone(),
meta: m,
});
}
}
@@ -438,11 +492,10 @@ impl ConnRegistry {
}
pub async fn is_writer_empty(&self, writer_id: u64) -> bool {
let binding = self.binding.inner.lock().await;
binding
.conns_for_writer
self.binding
.bound_clients_by_writer
.get(&writer_id)
.map(|s| s.is_empty())
.map(|count| *count.value() == 0)
.unwrap_or(true)
}
@@ -457,21 +510,20 @@ impl ConnRegistry {
return false;
}
binding.writers.remove(&writer_id);
self.writers.map.remove(&writer_id);
binding.last_meta_for_writer.remove(&writer_id);
binding.writer_idle_since_epoch_secs.remove(&writer_id);
self.binding.last_meta_for_writer.remove(&writer_id);
self.binding.writer_idle_since_epoch_secs.remove(&writer_id);
self.binding.bound_clients_by_writer.remove(&writer_id);
binding.conns_for_writer.remove(&writer_id);
true
}
#[allow(dead_code)]
pub(super) async fn non_empty_writer_ids(&self, writer_ids: &[u64]) -> HashSet<u64> {
let binding = self.binding.inner.lock().await;
let mut out = HashSet::<u64>::with_capacity(writer_ids.len());
for writer_id in writer_ids {
if let Some(conns) = binding.conns_for_writer.get(writer_id)
&& !conns.is_empty()
if let Some(count) = self.binding.bound_clients_by_writer.get(writer_id)
&& *count.value() > 0
{
out.insert(*writer_id);
}
+22 -27
View File
@@ -15,7 +15,6 @@ use super::registry::ConnMeta;
use super::wire::build_proxy_req_payload;
use crate::config::{MeRouteNoWriterMode, MeWriterPickMode};
use crate::error::{ProxyError, Result};
use crate::network::IpFamily;
use crate::stream::PooledBuffer;
use rand::seq::SliceRandom;
@@ -124,9 +123,8 @@ impl MePool {
}
let mut writers_snapshot = {
let ws = self.writers.read().await;
let ws = self.writers.snapshot();
if ws.is_empty() {
drop(ws);
match no_writer_mode {
MeRouteNoWriterMode::AsyncRecoveryFailfast => {
let deadline = *no_writer_deadline.get_or_insert_with(|| {
@@ -154,38 +152,32 @@ impl MePool {
for _ in
0..self.route_runtime.me_route_inline_recovery_attempts.max(1)
{
for family in self.family_order() {
let map = match family {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
IpFamily::V6 => self.proxy_map_v6.read().await.clone(),
};
for (dc, addrs) in &map {
for (ip, port) in addrs {
let addr = SocketAddr::new(*ip, *port);
let _ = self
.connect_one_for_dc(
addr,
*dc,
self.rng.as_ref(),
)
.await;
}
let preferred = self.preferred_endpoints_by_dc.load_full();
for (dc, addrs) in preferred.iter() {
for addr in addrs {
let _ = self
.connect_one_for_dc(
*addr,
*dc,
self.rng.as_ref(),
)
.await;
}
}
if !self.writers.read().await.is_empty() {
if !self.writers.snapshot().is_empty() {
break;
}
}
}
if !self.writers.read().await.is_empty() {
if !self.writers.snapshot().is_empty() {
continue;
}
let deadline = *no_writer_deadline.get_or_insert_with(|| {
Instant::now() + self.route_runtime.me_route_inline_recovery_wait
});
if !self.wait_for_writer_until(deadline).await {
if !self.writers.read().await.is_empty() {
if !self.writers.snapshot().is_empty() {
continue;
}
self.stats.increment_me_no_writer_failfast_total();
@@ -222,7 +214,7 @@ impl MePool {
}
}
}
ws.clone()
ws
};
let mut candidate_indices = self
@@ -285,7 +277,12 @@ impl MePool {
));
}
emergency_attempts += 1;
let mut endpoints = self.endpoint_candidates_for_target_dc(routed_dc).await;
let mut endpoints = self
.preferred_endpoints_by_dc
.load()
.get(&routed_dc)
.cloned()
.unwrap_or_default();
endpoints.shuffle(&mut rand::rng());
for addr in endpoints {
if self
@@ -298,9 +295,7 @@ impl MePool {
}
tokio::time::sleep(Duration::from_millis(100 * emergency_attempts as u64))
.await;
let ws2 = self.writers.read().await;
writers_snapshot = ws2.clone();
drop(ws2);
writers_snapshot = self.writers.snapshot();
candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, routed_dc, false)
.await;
+15 -33
View File
@@ -1,13 +1,9 @@
use std::collections::HashSet;
use std::net::SocketAddr;
use std::sync::Arc;
use std::sync::atomic::Ordering;
use std::time::{Duration, Instant};
use tracing::warn;
use crate::network::IpFamily;
use super::super::MePool;
use super::{
HYBRID_GLOBAL_BURST_PERIOD_ROUNDS, HYBRID_RECENT_SUCCESS_WINDOW_MS,
@@ -17,18 +13,18 @@ use super::{
impl MePool {
pub(super) async fn wait_for_writer_until(&self, deadline: Instant) -> bool {
let mut rx = self.writer_epoch.subscribe();
if !self.writers.read().await.is_empty() {
if !self.writers.snapshot().is_empty() {
return true;
}
let now = Instant::now();
if now >= deadline {
return !self.writers.read().await.is_empty();
return !self.writers.snapshot().is_empty();
}
let timeout = deadline.saturating_duration_since(now);
if tokio::time::timeout(timeout, rx.changed()).await.is_ok() {
return !self.writers.read().await.is_empty();
return !self.writers.snapshot().is_empty();
}
!self.writers.read().await.is_empty()
!self.writers.snapshot().is_empty()
}
pub(super) async fn wait_for_candidate_until(&self, routed_dc: i32, deadline: Instant) -> bool {
@@ -58,11 +54,11 @@ impl MePool {
pub(super) async fn has_candidate_for_target_dc(&self, routed_dc: i32) -> bool {
let writers_snapshot = {
let ws = self.writers.read().await;
let ws = self.writers.snapshot();
if ws.is_empty() {
return false;
}
ws.clone()
ws
};
let mut candidate_indices = self
.candidate_indices_for_dc(&writers_snapshot, routed_dc, false)
@@ -79,7 +75,7 @@ impl MePool {
self: &Arc<Self>,
routed_dc: i32,
) -> bool {
let endpoints = self.endpoint_candidates_for_target_dc(routed_dc).await;
let endpoints = self.preferred_endpoints_for_dc(routed_dc).await;
if endpoints.is_empty() {
return false;
}
@@ -92,33 +88,19 @@ impl MePool {
pub(super) async fn trigger_async_recovery_global(self: &Arc<Self>) {
self.stats.increment_me_async_recovery_trigger_total();
let mut seen = HashSet::<(i32, SocketAddr)>::new();
for family in self.family_order() {
let map_guard = match family {
IpFamily::V4 => self.proxy_map_v4.read().await,
IpFamily::V6 => self.proxy_map_v6.read().await,
};
for (dc, addrs) in map_guard.iter() {
for (ip, port) in addrs {
let addr = SocketAddr::new(*ip, *port);
if seen.insert((*dc, addr)) {
self.trigger_immediate_refill_for_dc(addr, *dc);
}
if seen.len() >= 8 {
return;
}
let preferred = self.preferred_endpoints_by_dc.load();
let mut triggered = 0usize;
for (dc, addrs) in preferred.iter() {
for addr in addrs {
self.trigger_immediate_refill_for_dc(*addr, *dc);
triggered = triggered.saturating_add(1);
if triggered >= 8 {
return;
}
}
}
}
pub(super) async fn endpoint_candidates_for_target_dc(
&self,
routed_dc: i32,
) -> Vec<SocketAddr> {
self.preferred_endpoints_for_dc(routed_dc).await
}
pub(super) async fn maybe_trigger_hybrid_recovery(
self: &Arc<Self>,
routed_dc: i32,
+5 -2
View File
@@ -15,7 +15,10 @@ impl MePool {
routed_dc: i32,
include_warm: bool,
) -> Vec<usize> {
let preferred = self.preferred_endpoints_for_dc(routed_dc).await;
let preferred_snapshot = self.preferred_endpoints_by_dc.load();
let Some(preferred) = preferred_snapshot.get(&routed_dc) else {
return Vec::new();
};
if preferred.is_empty() {
return Vec::new();
}
@@ -25,7 +28,7 @@ impl MePool {
if !self.writer_eligible_for_selection(w, include_warm) {
continue;
}
if w.writer_dc == routed_dc && preferred.contains(&w.addr) {
if w.writer_dc == routed_dc && preferred.binary_search(&w.addr).is_ok() {
out.push(idx);
}
}