Skip to content

Commit 07e45a1

Browse files
emmaling27Convex, Inc.
authored andcommitted
Revert "Remove timeout for acquiring an isolate permit, fail immediately instead" (#42174)
GitOrigin-RevId: dd33425af61ac7c2f95d407232551e9f3ba31465
1 parent afddfc5 commit 07e45a1

File tree

3 files changed

+16
-20
lines changed

3 files changed

+16
-20
lines changed

crates/common/src/knobs.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,11 @@ pub static SEARCHLIGHT_CLUSTER_NAME: LazyLock<String> = LazyLock::new(|| {
10361036
pub static FUNRUN_ISOLATE_ACTIVE_THREADS: LazyLock<usize> =
10371037
LazyLock::new(|| env_config("FUNRUN_ISOLATE_ACTIVE_THREADS", 0));
10381038

1039+
/// The maximum length of time to wait to start running a function (when the
1040+
/// FUNRUN_ISOLATE_ACTIVE_THREADS limit is reached).
1041+
pub static FUNRUN_INITIAL_PERMIT_TIMEOUT: LazyLock<Duration> =
1042+
LazyLock::new(|| Duration::from_millis(env_config("FUNRUN_INITIAL_PERMIT_TIMEOUT_MS", 100)));
1043+
10391044
/// How long to splay deploying AWS Lambdas due to changes in the backend. This
10401045
/// knob doesn't delay deploys that are required due to the user pushing new
10411046
/// node actions. Only affects deploys on startup triggered by changes to

crates/isolate/src/concurrency_limiter.rs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -70,17 +70,6 @@ impl ConcurrencyLimiter {
7070
}
7171
}
7272

73-
pub fn try_acquire(&self, client_id: Arc<String>) -> anyhow::Result<ConcurrencyPermit> {
74-
self.tx.try_send(())?;
75-
let permit_id = self.tracker.lock().register(client_id.clone());
76-
Ok(ConcurrencyPermit {
77-
permit_id,
78-
rx: self.rx.clone(),
79-
limiter: self.clone(),
80-
client_id,
81-
})
82-
}
83-
8473
pub async fn acquire(&self, client_id: Arc<String>) -> ConcurrencyPermit {
8574
let timer = concurrency_permit_acquire_timer();
8675
self.tx

crates/isolate/src/isolate.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::{
99
use anyhow::Context as _;
1010
use common::{
1111
knobs::{
12+
FUNRUN_INITIAL_PERMIT_TIMEOUT,
1213
ISOLATE_MAX_ARRAY_BUFFER_TOTAL_SIZE,
1314
ISOLATE_MAX_USER_HEAP_SIZE,
1415
},
@@ -295,17 +296,18 @@ impl<RT: Runtime> Isolate<RT> {
295296
"IsolateNotClean",
296297
"Selected isolate was not clean",
297298
))?;
298-
// Wait for a permit for subfunctions that cannot be retried, otherwise try to
299-
// acquire and fail immediately if there are no permits available.
300-
let permit = if environment.is_nested_function() {
301-
self.limiter.acquire(client_id).await
302-
} else {
303-
self.limiter.try_acquire(client_id).context(
304-
ErrorMetadata::rejected_before_execution(
299+
// Acquire a concurrency permit without counting it against the timeout.
300+
let permit = tokio::select! {
301+
biased;
302+
permit = self.limiter.acquire(client_id) => permit,
303+
// Do not apply a timeout for subfunctions that can't be retried
304+
() = self.rt.wait(*FUNRUN_INITIAL_PERMIT_TIMEOUT),
305+
if !environment.is_nested_function() => {
306+
anyhow::bail!(ErrorMetadata::rejected_before_execution(
305307
"InitialPermitTimeoutError",
306308
"Couldn't acquire a permit on this funrun",
307-
),
308-
)?
309+
));
310+
}
309311
};
310312
let context_handle = self.handle.new_context_created();
311313
let mut user_timeout = environment.user_timeout();

0 commit comments

Comments
 (0)