Skip to content

Commit 1308394

Browse files
emmaling27Convex, Inc.
authored andcommitted
Remove timeout for acquiring an isolate permit, fail immediately instead (#41988)
Instead of waiting for 100ms timeout to acquire a permit if the isolate concurrency limit is reached, fail immediately. GitOrigin-RevId: 287cdd4620a6bdc152634bbb530ebc8e72a103ce
1 parent 128c37a commit 1308394

File tree

3 files changed

+20
-16
lines changed

3 files changed

+20
-16
lines changed

crates/common/src/knobs.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,11 +1036,6 @@ pub static SEARCHLIGHT_CLUSTER_NAME: LazyLock<String> = LazyLock::new(|| {
10361036
pub static FUNRUN_ISOLATE_ACTIVE_THREADS: LazyLock<usize> =
10371037
LazyLock::new(|| env_config("FUNRUN_ISOLATE_ACTIVE_THREADS", 0));
10381038

1039-
/// The maximum length of time to wait to start running a function (when the
1040-
/// FUNRUN_ISOLATE_ACTIVE_THREADS limit is reached).
1041-
pub static FUNRUN_INITIAL_PERMIT_TIMEOUT: LazyLock<Duration> =
1042-
LazyLock::new(|| Duration::from_millis(env_config("FUNRUN_INITIAL_PERMIT_TIMEOUT_MS", 100)));
1043-
10441039
/// How long to splay deploying AWS Lambdas due to changes in the backend. This
10451040
/// knob doesn't delay deploys that are required due to the user pushing new
10461041
/// node actions. Only affects deploys on startup triggered by changes to

crates/isolate/src/concurrency_limiter.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,17 @@ impl ConcurrencyLimiter {
7070
}
7171
}
7272

73+
pub fn try_acquire(&self, client_id: Arc<String>) -> anyhow::Result<ConcurrencyPermit> {
74+
self.tx.try_send(())?;
75+
let permit_id = self.tracker.lock().register(client_id.clone());
76+
Ok(ConcurrencyPermit {
77+
permit_id,
78+
rx: self.rx.clone(),
79+
limiter: self.clone(),
80+
client_id,
81+
})
82+
}
83+
7384
pub async fn acquire(&self, client_id: Arc<String>) -> ConcurrencyPermit {
7485
let timer = concurrency_permit_acquire_timer();
7586
self.tx

crates/isolate/src/isolate.rs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ use std::{
99
use anyhow::Context as _;
1010
use common::{
1111
knobs::{
12-
FUNRUN_INITIAL_PERMIT_TIMEOUT,
1312
ISOLATE_MAX_ARRAY_BUFFER_TOTAL_SIZE,
1413
ISOLATE_MAX_USER_HEAP_SIZE,
1514
},
@@ -296,18 +295,17 @@ impl<RT: Runtime> Isolate<RT> {
296295
"IsolateNotClean",
297296
"Selected isolate was not clean",
298297
))?;
299-
// Acquire a concurrency permit without counting it against the timeout.
300-
let permit = tokio::select! {
301-
biased;
302-
permit = self.limiter.acquire(client_id) => permit,
303-
// Do not apply a timeout for subfunctions that can't be retried
304-
() = self.rt.wait(*FUNRUN_INITIAL_PERMIT_TIMEOUT),
305-
if !environment.is_nested_function() => {
306-
anyhow::bail!(ErrorMetadata::rejected_before_execution(
298+
// Wait for a permit for subfunctions that cannot be retried, otherwise try to
299+
// acquire and fail immediately if there are no permits available.
300+
let permit = if environment.is_nested_function() {
301+
self.limiter.acquire(client_id).await
302+
} else {
303+
self.limiter.try_acquire(client_id).context(
304+
ErrorMetadata::rejected_before_execution(
307305
"InitialPermitTimeoutError",
308306
"Couldn't acquire a permit on this funrun",
309-
));
310-
}
307+
),
308+
)?
311309
};
312310
let context_handle = self.handle.new_context_created();
313311
let mut user_timeout = environment.user_timeout();

0 commit comments

Comments
 (0)