Skip to content

Commit d24d086

Browse files
Adist319meta-codesync[bot]
authored andcommitted
Fix sequence pattern matching type narrowing (#2049)
Summary: Adds proper type narrowing for sequence patterns in match/case statements. Previously, sequence patterns like [*values] did not narrow union types correctly, and captured variables were not properly typed. This PR: - Adds Sequence type to the stdlib for type checking - Introduces IsSequence/IsNotSequence narrowing operations - Emits IsSequence narrowing for sequence patterns to properly filter union types - Correctly types captured variables (e.g., [*values] gives values: list[T] when matching Sequence[T]) - Excludes str, bytes, and bytearray from sequence pattern matching per PEP 634 Fixes #1806. Pull Request resolved: #2049 Test Plan: Added 9 new test cases covering: - Basic star capture patterns ([*values]) - Union type narrowing (int | Sequence[int]) - Fixed-length patterns ([a, b]) - Mixed patterns ([first, *middle, last]) - String exclusion per PEP 634 - List and tuple patterns Reviewed By: yangdanny97 Differential Revision: D90423128 Pulled By: stroxler fbshipit-source-id: 359a6441285870de71157476e6562eb50dcd39dc
1 parent e34b42a commit d24d086

File tree

6 files changed

+260
-31
lines changed

6 files changed

+260
-31
lines changed

crates/pyrefly_types/src/stdlib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ pub struct Stdlib {
6262
async_iterable: StdlibResult<(Class, Arc<TParams>)>,
6363
async_iterator: StdlibResult<(Class, Arc<TParams>)>,
6464
mutable_sequence: StdlibResult<(Class, Arc<TParams>)>,
65+
sequence: StdlibResult<(Class, Arc<TParams>)>,
6566
generator: StdlibResult<(Class, Arc<TParams>)>,
6667
async_generator: StdlibResult<(Class, Arc<TParams>)>,
6768
awaitable: StdlibResult<(Class, Arc<TParams>)>,
@@ -202,6 +203,7 @@ impl Stdlib {
202203
async_iterable: lookup_generic(typing, "AsyncIterable", 1),
203204
async_iterator: lookup_generic(typing, "AsyncIterator", 1),
204205
mutable_sequence: lookup_generic(typing, "MutableSequence", 1),
206+
sequence: lookup_generic(typing, "Sequence", 1),
205207
generator: lookup_generic(typing, "Generator", 3),
206208
async_generator: lookup_generic(typing, "AsyncGenerator", 2),
207209
awaitable: lookup_generic(typing, "Awaitable", 1),
@@ -460,6 +462,10 @@ impl Stdlib {
460462
Self::apply(&self.mutable_sequence, vec![x])
461463
}
462464

465+
pub fn sequence(&self, x: Type) -> ClassType {
466+
Self::apply(&self.sequence, vec![x])
467+
}
468+
463469
pub fn generator(&self, yield_ty: Type, send_ty: Type, return_ty: Type) -> ClassType {
464470
Self::apply(&self.generator, vec![yield_ty, send_ty, return_ty])
465471
}

pyrefly/lib/alt/narrow.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,30 @@ impl<'a, Ans: LookupAnswer> AnswersSolver<'a, Ans> {
624624
};
625625
self.narrow_length_less_than(ty, len + 1)
626626
}
627+
AtomicNarrowOp::IsSequence => {
628+
// Narrow to only sequence types (for sequence pattern matching)
629+
self.distribute_over_union(ty, |t| {
630+
if self.is_sequence_for_pattern(t) {
631+
t.clone()
632+
} else {
633+
Type::never()
634+
}
635+
})
636+
}
637+
AtomicNarrowOp::IsNotSequence => {
638+
// Narrow to exclude sequence types (negation of sequence pattern)
639+
// Note: Any and classes that extend Any must be preserved (not narrowed to Never)
640+
// since we can't know at static analysis time whether they're sequences or not
641+
self.distribute_over_union(ty, |t| {
642+
if self.behaves_like_any(t) {
643+
t.clone()
644+
} else if self.is_sequence_for_pattern(t) {
645+
Type::never()
646+
} else {
647+
t.clone()
648+
}
649+
})
650+
}
627651
AtomicNarrowOp::In(v) => {
628652
let exprs = match v {
629653
Expr::List(list) => Some(list.elts.clone()),

pyrefly/lib/alt/unwrap.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,41 @@ impl<'a, Ans: LookupAnswer> AnswersSolver<'a, Ans> {
161161
self.is_subset_eq(ty, &coroutine_ty)
162162
}
163163

164+
/// Check if a type is a sequence type for pattern matching purposes (PEP 634).
165+
///
166+
/// Per PEP 634, sequence patterns match:
167+
/// - Builtins with Py_TPFLAGS_SEQUENCE: list, tuple, range, memoryview,
168+
/// collections.deque, array.array
169+
/// - Classes that inherit from collections.abc.Sequence
170+
/// - Classes registered as collections.abc.Sequence (cannot detect statically)
171+
///
172+
/// Explicitly excluded (even though they're sequences in other contexts):
173+
/// - str, bytes, bytearray
174+
///
175+
/// Warning: this returns `true` if the type is `Any` or a class that extends `Any`
176+
pub fn is_sequence_for_pattern(&self, ty: &Type) -> bool {
177+
// Handle special exclusions first - str, bytes, bytearray are NOT sequences
178+
// for pattern matching per PEP 634
179+
match ty {
180+
Type::ClassType(cls)
181+
if cls.is_builtin("str")
182+
|| cls.is_builtin("bytes")
183+
|| cls.is_builtin("bytearray") =>
184+
{
185+
return false;
186+
}
187+
Type::LiteralString(_) => return false,
188+
// Tuples are always sequences for pattern matching
189+
Type::Tuple(_) => return true,
190+
_ => {}
191+
}
192+
193+
// Check if the type is a subtype of Sequence[T] for some T
194+
let var = self.fresh_var();
195+
let sequence_ty = self.stdlib.sequence(var.to_type()).to_type();
196+
self.is_subset_eq(ty, &sequence_ty)
197+
}
198+
164199
/// Warning: this returns `Some` if the type is `Any` or a class that extends `Any`
165200
pub fn unwrap_coroutine(&self, ty: &Type) -> Option<(Type, Type, Type)> {
166201
let yield_ty = self.fresh_var();

pyrefly/lib/binding/narrow.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ pub enum AtomicNarrowOp {
8686
LenGte(Expr),
8787
LenLt(Expr),
8888
LenLte(Expr),
89+
/// Narrowing for sequence pattern matching - confirms the subject is a sequence type
90+
IsSequence,
91+
/// Negation of IsSequence - confirms the subject is NOT a sequence type
92+
IsNotSequence,
8993
/// (func, args) for a function call that may narrow the type of its first argument.
9094
Call(Box<Expr>, Arguments),
9195
NotCall(Box<Expr>, Arguments),
@@ -164,6 +168,8 @@ impl DisplayWith<ModuleInfo> for AtomicNarrowOp {
164168
AtomicNarrowOp::LenGte(expr) => write!(f, "LenGte({})", expr.display_with(ctx)),
165169
AtomicNarrowOp::LenLt(expr) => write!(f, "LenLt({})", expr.display_with(ctx)),
166170
AtomicNarrowOp::LenLte(expr) => write!(f, "LenLte({})", expr.display_with(ctx)),
171+
AtomicNarrowOp::IsSequence => write!(f, "IsSequence"),
172+
AtomicNarrowOp::IsNotSequence => write!(f, "IsNotSequence"),
167173
AtomicNarrowOp::Call(expr, arguments) => write!(
168174
f,
169175
"Call({}, {})",
@@ -233,6 +239,8 @@ impl AtomicNarrowOp {
233239
Self::LenLte(v) => Self::LenGt(v.clone()),
234240
Self::LenLt(v) => Self::LenGte(v.clone()),
235241
Self::LenNotEq(v) => Self::LenEq(v.clone()),
242+
Self::IsSequence => Self::IsNotSequence,
243+
Self::IsNotSequence => Self::IsSequence,
236244
Self::TypeGuard(ty, args) => Self::NotTypeGuard(ty.clone(), args.clone()),
237245
Self::NotTypeGuard(ty, args) => Self::TypeGuard(ty.clone(), args.clone()),
238246
Self::TypeIs(ty, args) => Self::NotTypeIs(ty.clone(), args.clone()),
@@ -419,7 +427,8 @@ impl NarrowOps {
419427
seen.insert(name.clone());
420428
self.and(name, op, range);
421429
}
422-
// For names present in `self` but not `other`, `And` their narrows with a placeholder
430+
// For names present in `self` but not `other`, `And` their narrows with a placeholder.
431+
// This ensures that if a sub-pattern can't prove something, we don't claim we proved it.
423432
let unmerged_names: Vec<_> = self
424433
.0
425434
.keys()

pyrefly/lib/binding/pattern.rs

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -119,25 +119,38 @@ impl<'a> BindingsBuilder<'a> {
119119
value: Number::Int(Int::from(num_non_star_patterns as u64)),
120120
});
121121
if let Some(subject) = &match_subject {
122-
// Narrow the match subject by length
123-
let narrow_op = if num_patterns == num_non_star_patterns {
122+
// Narrow the match subject by:
123+
// 1. IsSequence - confirms the subject is a sequence type
124+
// 2. Length - confirms the sequence has the right length
125+
let len_narrow_op = if num_patterns == num_non_star_patterns {
124126
AtomicNarrowOp::LenEq(synthesized_len)
125127
} else {
126128
AtomicNarrowOp::LenGte(synthesized_len)
127129
};
130+
131+
// Create a combined narrowing: IsSequence AND LenXxx
132+
let combined_narrow_op = NarrowOp::And(vec![
133+
NarrowOp::Atomic(None, AtomicNarrowOp::IsSequence),
134+
NarrowOp::Atomic(None, len_narrow_op.clone()),
135+
]);
136+
128137
subject_idx = self.insert_binding(
129138
Key::PatternNarrow(x.range()),
130139
Binding::Narrow(
131140
subject_idx,
132-
Box::new(NarrowOp::Atomic(None, narrow_op.clone())),
141+
Box::new(combined_narrow_op.clone()),
133142
NarrowUseLocation::Span(x.range()),
134143
),
135144
);
136-
narrow_ops.and_all(NarrowOps::from_single_narrow_op_for_subject(
137-
subject.clone(),
138-
narrow_op,
139-
x.range,
140-
));
145+
146+
// Add the combined narrow op to the returned narrow_ops.
147+
// We insert directly instead of using and_all twice to avoid
148+
// Placeholder issues when starting from an empty NarrowOps.
149+
let name = match subject {
150+
NarrowingSubject::Name(name) => name.clone(),
151+
NarrowingSubject::Facets(name, _) => name.clone(),
152+
};
153+
narrow_ops.0.insert(name, (combined_narrow_op, x.range));
141154
}
142155
let mut seen_star = false;
143156
for (i, x) in x.patterns.into_iter().enumerate() {
@@ -240,6 +253,24 @@ impl<'a> BindingsBuilder<'a> {
240253
NarrowUseLocation::Span(x.cls.range()),
241254
),
242255
);
256+
257+
// Check if this is a single-positional-slot builtin type
258+
// These types (bool, bytearray, bytes, dict, float, frozenset, int, list, set, str, tuple)
259+
// bind the entire narrowed value when used with a single positional pattern
260+
let is_single_slot_builtin = if let Expr::Name(name) = x.cls.as_ref() {
261+
SpecialExport::new(&name.id)
262+
.map(|se| se.is_single_positional_slot_builtin())
263+
.unwrap_or(false)
264+
} else {
265+
false
266+
};
267+
268+
// For single-slot builtins with exactly one positional arg, the pattern matches
269+
// all instances of the type, so we don't need a placeholder
270+
let is_exhaustive_single_slot = is_single_slot_builtin
271+
&& x.arguments.patterns.len() == 1
272+
&& x.arguments.keywords.is_empty();
273+
243274
let mut narrow_ops = if let Some(ref subject) = match_subject {
244275
let mut narrow_for_subject = NarrowOps::from_single_narrow_op_for_subject(
245276
subject.clone(),
@@ -249,8 +280,10 @@ impl<'a> BindingsBuilder<'a> {
249280
// We're not sure whether the pattern matches all possible instances of a class, and
250281
// the placeholder prevents negative narrowing from removing the class in later branches.
251282
// However, if there are no arguments, it's just an isinstance check, so we don't need
252-
// the placeholder.
253-
if !x.arguments.patterns.is_empty() || !x.arguments.keywords.is_empty() {
283+
// the placeholder. Similarly, single-slot builtins with one positional arg are exhaustive.
284+
if (!x.arguments.patterns.is_empty() || !x.arguments.keywords.is_empty())
285+
&& !is_exhaustive_single_slot
286+
{
254287
let placeholder = NarrowOps::from_single_narrow_op_for_subject(
255288
subject.clone(),
256289
AtomicNarrowOp::Placeholder,
@@ -263,30 +296,19 @@ impl<'a> BindingsBuilder<'a> {
263296
NarrowOps::new()
264297
};
265298

266-
// Check if this is a single-positional-slot builtin type
267-
// These types (bool, bytearray, bytes, dict, float, frozenset, int, list, set, str, tuple)
268-
// bind the entire narrowed value when used with a single positional pattern
269-
let is_single_slot_builtin = if let Expr::Name(name) = x.cls.as_ref() {
270-
SpecialExport::new(&name.id)
271-
.map(|se| se.is_single_positional_slot_builtin())
272-
.unwrap_or(false)
273-
} else {
274-
false
275-
};
276-
277299
// Handle positional patterns
278-
if is_single_slot_builtin
279-
&& x.arguments.patterns.len() == 1
280-
&& x.arguments.keywords.is_empty()
281-
{
300+
if is_exhaustive_single_slot {
282301
// For single-positional-slot builtins with exactly one positional pattern,
283302
// bind the pattern directly to the narrowed subject (like MatchAs)
284303
let pattern = x.arguments.patterns.into_iter().next().unwrap();
285-
narrow_ops.and_all(self.bind_pattern(
286-
match_subject.clone(),
287-
pattern,
288-
subject_idx,
289-
));
304+
let inner_narrow_ops =
305+
self.bind_pattern(match_subject.clone(), pattern, subject_idx);
306+
// Only combine if the inner pattern produced narrow ops.
307+
// If it's empty (e.g., a simple MatchAs like `value`), we don't want
308+
// and_all to add Placeholders that would invalidate our outer narrow.
309+
if !inner_narrow_ops.0.is_empty() {
310+
narrow_ops.and_all(inner_narrow_ops);
311+
}
290312
return narrow_ops;
291313
}
292314
// Normal MatchClass handling

0 commit comments

Comments
 (0)