Skip to content

Commit d6f5b7f

Browse files
authored
Be more careful about iterator invalidation during recursive invalida… (JuliaLang#57934)
…tion It is possible for one MethodInstance to have a backedge to itself (`typejoin` is a prime example). This didn't used to be much of a problem prior to JuliaLang#57625 because we would just delete the backedges list at the top of invalidation. However, now that we're more selective, we need to be careful not to move backedges around while a frame higher on the stack may be looking at it. To this end, move the compaction part of the deletion into a separate pass and only delete (but don't move around) backedges while a frame higher on the stack may be looking at it. Fixes JuliaLang#57696
1 parent 13311f3 commit d6f5b7f

File tree

6 files changed

+204
-107
lines changed

6 files changed

+204
-107
lines changed

Compiler/test/invalidation.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,3 +283,8 @@ begin take!(GLOBAL_BUFFER)
283283
@test isnothing(pr48932_caller_inlined(42))
284284
@test "42" == String(take!(GLOBAL_BUFFER))
285285
end
286+
287+
# Issue #57696
288+
# This test checks for invalidation of recursive backedges. However, unfortunately, the original failure
289+
# manifestation was an unreliable segfault or an assertion failure, so we don't have a more compact test.
290+
@test success(`$(Base.julia_cmd()) -e 'Base.typejoin(x, ::Type) = 0; exit()'`)

src/gf.c

Lines changed: 134 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1831,43 +1831,133 @@ JL_DLLEXPORT void jl_invalidate_code_instance(jl_code_instance_t *replaced, size
18311831
}
18321832

18331833
static void _invalidate_backedges(jl_method_instance_t *replaced_mi, jl_code_instance_t *replaced_ci, size_t max_world, int depth) {
1834-
jl_array_t *backedges = replaced_mi->backedges;
1835-
if (backedges) {
1836-
// invalidate callers (if any)
1834+
uint8_t recursion_flags = 0;
1835+
jl_array_t *backedges = jl_mi_get_backedges_mutate(replaced_mi, &recursion_flags);
1836+
if (!backedges)
1837+
return;
1838+
// invalidate callers (if any)
1839+
if (!replaced_ci) {
1840+
// We know all backedges are deleted - clear them eagerly
1841+
// Clears both array and flags
18371842
replaced_mi->backedges = NULL;
1838-
JL_GC_PUSH1(&backedges);
1839-
size_t i = 0, l = jl_array_nrows(backedges);
1840-
size_t ins = 0;
1841-
jl_code_instance_t *replaced;
1842-
while (i < l) {
1843-
jl_value_t *invokesig = NULL;
1844-
i = get_next_edge(backedges, i, &invokesig, &replaced);
1845-
JL_GC_PROMISE_ROOTED(replaced); // propagated by get_next_edge from backedges
1846-
if (replaced_ci) {
1847-
// If we're invalidating a particular codeinstance, only invalidate
1848-
// this backedge it actually has an edge for our codeinstance.
1849-
jl_svec_t *edges = jl_atomic_load_relaxed(&replaced->edges);
1850-
for (size_t j = 0; j < jl_svec_len(edges); ++j) {
1851-
jl_value_t *edge = jl_svecref(edges, j);
1852-
if (edge == (jl_value_t*)replaced_mi || edge == (jl_value_t*)replaced_ci)
1853-
goto found;
1854-
}
1855-
// Keep this entry in the backedge list, but compact it
1856-
ins = set_next_edge(backedges, ins, invokesig, replaced);
1857-
continue;
1858-
found:;
1843+
jl_atomic_fetch_and_relaxed(&replaced_mi->flags, ~MI_FLAG_BACKEDGES_ALL);
1844+
}
1845+
JL_GC_PUSH1(&backedges);
1846+
size_t i = 0, l = jl_array_nrows(backedges);
1847+
size_t ins = 0;
1848+
jl_code_instance_t *replaced;
1849+
while (i < l) {
1850+
jl_value_t *invokesig = NULL;
1851+
i = get_next_edge(backedges, i, &invokesig, &replaced);
1852+
if (!replaced) {
1853+
ins = i;
1854+
continue;
1855+
}
1856+
JL_GC_PROMISE_ROOTED(replaced); // propagated by get_next_edge from backedges
1857+
if (replaced_ci) {
1858+
// If we're invalidating a particular codeinstance, only invalidate
1859+
// this backedge it actually has an edge for our codeinstance.
1860+
jl_svec_t *edges = jl_atomic_load_relaxed(&replaced->edges);
1861+
for (size_t j = 0; j < jl_svec_len(edges); ++j) {
1862+
jl_value_t *edge = jl_svecref(edges, j);
1863+
if (edge == (jl_value_t*)replaced_mi || edge == (jl_value_t*)replaced_ci)
1864+
goto found;
18591865
}
1860-
invalidate_code_instance(replaced, max_world, depth);
1866+
ins = set_next_edge(backedges, ins, invokesig, replaced);
1867+
continue;
1868+
found:;
1869+
ins = clear_next_edge(backedges, ins, invokesig, replaced);
1870+
jl_atomic_fetch_or(&replaced_mi->flags, MI_FLAG_BACKEDGES_DIRTY);
1871+
/* fallthrough */
1872+
}
1873+
invalidate_code_instance(replaced, max_world, depth);
1874+
if (replaced_ci && !replaced_mi->backedges) {
1875+
// Fast-path early out. If `invalidate_code_instance` invalidated
1876+
// the entire mi via a recursive edge, there's no point to keep
1877+
// iterating - they'll already have been invalidated.
1878+
break;
18611879
}
1862-
if (replaced_ci && ins != 0) {
1863-
jl_array_del_end(backedges, l - ins);
1864-
// If we're only invalidating one ci, we don't know which ci any particular
1865-
// backedge was for, so we can't delete them. Put them back.
1866-
replaced_mi->backedges = backedges;
1867-
jl_gc_wb(replaced_mi, backedges);
1880+
}
1881+
if (replaced_ci)
1882+
jl_mi_done_backedges(replaced_mi, recursion_flags);
1883+
JL_GC_POP();
1884+
}
1885+
1886+
enum morespec_options {
1887+
morespec_unknown,
1888+
morespec_isnot,
1889+
morespec_is
1890+
};
1891+
1892+
// check if `type` is replacing `m` with an ambiguity here, given other methods in `d` that already match it
1893+
static int is_replacing(char ambig, jl_value_t *type, jl_method_t *m, jl_method_t *const *d, size_t n, jl_value_t *isect, jl_value_t *isect2, char *morespec)
1894+
{
1895+
size_t k;
1896+
for (k = 0; k < n; k++) {
1897+
jl_method_t *m2 = d[k];
1898+
// see if m2 also fully covered this intersection
1899+
if (m == m2 || !(jl_subtype(isect, m2->sig) || (isect2 && jl_subtype(isect2, m2->sig))))
1900+
continue;
1901+
if (morespec[k] == (char)morespec_unknown)
1902+
morespec[k] = (char)(jl_type_morespecific(m2->sig, type) ? morespec_is : morespec_isnot);
1903+
if (morespec[k] == (char)morespec_is)
1904+
// not actually shadowing this--m2 will still be better
1905+
return 0;
1906+
// if type is not more specific than m (thus now dominating it)
1907+
// then there is a new ambiguity here,
1908+
// since m2 was also a previous match over isect,
1909+
// see if m was previously dominant over all m2
1910+
// or if this was already ambiguous before
1911+
if (ambig != morespec_is && !jl_type_morespecific(m->sig, m2->sig)) {
1912+
// m and m2 were previously ambiguous over the full intersection of mi with type, and will still be ambiguous with addition of type
1913+
return 0;
18681914
}
1869-
JL_GC_POP();
18701915
}
1916+
return 1;
1917+
}
1918+
1919+
static int _invalidate_dispatch_backedges(jl_method_instance_t *mi, jl_value_t *type, jl_method_t *m,
1920+
jl_method_t *const *d, size_t n, int replaced_dispatch, int ambig,
1921+
size_t max_world, char *morespec)
1922+
{
1923+
uint8_t backedge_recursion_flags = 0;
1924+
jl_array_t *backedges = jl_mi_get_backedges_mutate(mi, &backedge_recursion_flags);
1925+
if (!backedges)
1926+
return 0;
1927+
size_t ib = 0, insb = 0, nb = jl_array_nrows(backedges);
1928+
jl_value_t *invokeTypes;
1929+
jl_code_instance_t *caller;
1930+
int invalidated_any = 0;
1931+
while (mi->backedges && ib < nb) {
1932+
ib = get_next_edge(backedges, ib, &invokeTypes, &caller);
1933+
if (!caller) {
1934+
insb = ib;
1935+
continue;
1936+
}
1937+
JL_GC_PROMISE_ROOTED(caller); // propagated by get_next_edge from backedges
1938+
int replaced_edge;
1939+
if (invokeTypes) {
1940+
// n.b. normally we must have mi.specTypes <: invokeTypes <: m.sig (though it might not strictly hold), so we only need to check the other subtypes
1941+
if (jl_egal(invokeTypes, jl_get_ci_mi(caller)->def.method->sig))
1942+
replaced_edge = 0; // if invokeTypes == m.sig, then the only way to change this invoke is to replace the method itself
1943+
else
1944+
replaced_edge = jl_subtype(invokeTypes, type) && is_replacing(ambig, type, m, d, n, invokeTypes, NULL, morespec);
1945+
}
1946+
else {
1947+
replaced_edge = replaced_dispatch;
1948+
}
1949+
if (replaced_edge) {
1950+
invalidate_code_instance(caller, max_world, 1);
1951+
insb = clear_next_edge(backedges, insb, invokeTypes, caller);
1952+
jl_atomic_fetch_or(&mi->flags, MI_FLAG_BACKEDGES_DIRTY);
1953+
invalidated_any = 1;
1954+
}
1955+
else {
1956+
insb = set_next_edge(backedges, insb, invokeTypes, caller);
1957+
}
1958+
}
1959+
jl_mi_done_backedges(mi, backedge_recursion_flags);
1960+
return invalidated_any;
18711961
}
18721962

18731963
// invalidate cached methods that overlap this definition
@@ -1898,20 +1988,22 @@ JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee,
18981988
JL_LOCK(&callee->def.method->writelock);
18991989
if (jl_atomic_load_relaxed(&allow_new_worlds)) {
19001990
int found = 0;
1991+
jl_array_t *backedges = jl_mi_get_backedges(callee);
19011992
// TODO: use jl_cache_type_(invokesig) like cache_method does to save memory
1902-
if (!callee->backedges) {
1993+
if (!backedges) {
19031994
// lazy-init the backedges array
1904-
callee->backedges = jl_alloc_vec_any(0);
1905-
jl_gc_wb(callee, callee->backedges);
1995+
backedges = jl_alloc_vec_any(0);
1996+
callee->backedges = backedges;
1997+
jl_gc_wb(callee, backedges);
19061998
}
19071999
else {
1908-
size_t i = 0, l = jl_array_nrows(callee->backedges);
2000+
size_t i = 0, l = jl_array_nrows(backedges);
19092001
for (i = 0; i < l; i++) {
19102002
// optimized version of while (i < l) i = get_next_edge(callee->backedges, i, &invokeTypes, &mi);
1911-
jl_value_t *ciedge = jl_array_ptr_ref(callee->backedges, i);
2003+
jl_value_t *ciedge = jl_array_ptr_ref(backedges, i);
19122004
if (ciedge != (jl_value_t*)caller)
19132005
continue;
1914-
jl_value_t *invokeTypes = i > 0 ? jl_array_ptr_ref(callee->backedges, i - 1) : NULL;
2006+
jl_value_t *invokeTypes = i > 0 ? jl_array_ptr_ref(backedges, i - 1) : NULL;
19152007
if (invokeTypes && jl_is_method_instance(invokeTypes))
19162008
invokeTypes = NULL;
19172009
if ((invokesig == NULL && invokeTypes == NULL) ||
@@ -1922,7 +2014,7 @@ JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee,
19222014
}
19232015
}
19242016
if (!found)
1925-
push_edge(callee->backedges, invokesig, caller);
2017+
push_edge(backedges, invokesig, caller);
19262018
}
19272019
JL_UNLOCK(&callee->def.method->writelock);
19282020
}
@@ -2111,13 +2203,13 @@ static int erase_method_backedges(jl_typemap_entry_t *def, void *closure)
21112203
for (i = 0; i < l; i++) {
21122204
jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
21132205
if ((jl_value_t*)mi != jl_nothing) {
2114-
mi->backedges = NULL;
2206+
mi->backedges = 0;
21152207
}
21162208
}
21172209
}
21182210
else {
21192211
jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
2120-
mi->backedges = NULL;
2212+
mi->backedges = 0;
21212213
}
21222214
JL_UNLOCK(&method->writelock);
21232215
return 1;
@@ -2189,39 +2281,6 @@ static int jl_type_intersection2(jl_value_t *t1, jl_value_t *t2, jl_value_t **is
21892281
return 1;
21902282
}
21912283

2192-
enum morespec_options {
2193-
morespec_unknown,
2194-
morespec_isnot,
2195-
morespec_is
2196-
};
2197-
2198-
// check if `type` is replacing `m` with an ambiguity here, given other methods in `d` that already match it
2199-
static int is_replacing(char ambig, jl_value_t *type, jl_method_t *m, jl_method_t *const *d, size_t n, jl_value_t *isect, jl_value_t *isect2, char *morespec)
2200-
{
2201-
size_t k;
2202-
for (k = 0; k < n; k++) {
2203-
jl_method_t *m2 = d[k];
2204-
// see if m2 also fully covered this intersection
2205-
if (m == m2 || !(jl_subtype(isect, m2->sig) || (isect2 && jl_subtype(isect2, m2->sig))))
2206-
continue;
2207-
if (morespec[k] == (char)morespec_unknown)
2208-
morespec[k] = (char)(jl_type_morespecific(m2->sig, type) ? morespec_is : morespec_isnot);
2209-
if (morespec[k] == (char)morespec_is)
2210-
// not actually shadowing this--m2 will still be better
2211-
return 0;
2212-
// if type is not more specific than m (thus now dominating it)
2213-
// then there is a new ambiguity here,
2214-
// since m2 was also a previous match over isect,
2215-
// see if m was previously dominant over all m2
2216-
// or if this was already ambiguous before
2217-
if (ambig != morespec_is && !jl_type_morespecific(m->sig, m2->sig)) {
2218-
// m and m2 were previously ambiguous over the full intersection of mi with type, and will still be ambiguous with addition of type
2219-
return 0;
2220-
}
2221-
}
2222-
return 1;
2223-
}
2224-
22252284
jl_typemap_entry_t *jl_method_table_add(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype)
22262285
{
22272286
JL_TIMING(ADD_METHOD, ADD_METHOD);
@@ -2386,35 +2445,7 @@ void jl_method_table_activate(jl_methtable_t *mt, jl_typemap_entry_t *newentry)
23862445
// found that this specialization dispatch got replaced by m
23872446
// call invalidate_backedges(mi, max_world, "jl_method_table_insert");
23882447
// but ignore invoke-type edges
2389-
jl_array_t *backedges = mi->backedges;
2390-
if (backedges) {
2391-
size_t ib = 0, insb = 0, nb = jl_array_nrows(backedges);
2392-
jl_value_t *invokeTypes;
2393-
jl_code_instance_t *caller;
2394-
while (ib < nb) {
2395-
ib = get_next_edge(backedges, ib, &invokeTypes, &caller);
2396-
JL_GC_PROMISE_ROOTED(caller); // propagated by get_next_edge from backedges
2397-
int replaced_edge;
2398-
if (invokeTypes) {
2399-
// n.b. normally we must have mi.specTypes <: invokeTypes <: m.sig (though it might not strictly hold), so we only need to check the other subtypes
2400-
if (jl_egal(invokeTypes, jl_get_ci_mi(caller)->def.method->sig))
2401-
replaced_edge = 0; // if invokeTypes == m.sig, then the only way to change this invoke is to replace the method itself
2402-
else
2403-
replaced_edge = jl_subtype(invokeTypes, type) && is_replacing(ambig, type, m, d, n, invokeTypes, NULL, morespec);
2404-
}
2405-
else {
2406-
replaced_edge = replaced_dispatch;
2407-
}
2408-
if (replaced_edge) {
2409-
invalidate_code_instance(caller, max_world, 1);
2410-
invalidated = 1;
2411-
}
2412-
else {
2413-
insb = set_next_edge(backedges, insb, invokeTypes, caller);
2414-
}
2415-
}
2416-
jl_array_del_end(backedges, nb - insb);
2417-
}
2448+
invalidated = _invalidate_dispatch_backedges(mi, type, m, d, n, replaced_dispatch, ambig, max_world, morespec);
24182449
jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi);
24192450
if (_jl_debug_method_invalidation && invalidated) {
24202451
jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);

src/julia.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,13 +404,16 @@ struct _jl_method_instance_t {
404404
} def; // pointer back to the context for this code
405405
jl_value_t *specTypes; // argument types this was specialized for
406406
jl_svec_t *sparam_vals; // static parameter values, indexed by def.method->sig
407-
jl_array_t *backedges; // list of code-instances which call this method-instance; `invoke` records (invokesig, caller) pairs
407+
// list of code-instances which call this method-instance; `invoke` records (invokesig, caller) pairs
408+
jl_array_t *backedges;
408409
_Atomic(struct _jl_code_instance_t*) cache;
409410
uint8_t cache_with_orig; // !cache_with_specTypes
410411

411412
// flags for this method instance
412413
// bit 0: generated by an explicit `precompile(...)`
413414
// bit 1: dispatched
415+
// bit 2: The ->backedges field is currently being walked higher up the stack - entries may be deleted, but not moved
416+
// bit 3: The ->backedges field was modified and should be compacted when clearing bit 2
414417
_Atomic(uint8_t) flags;
415418
};
416419
#define JL_MI_FLAGS_MASK_PRECOMPILED 0x01

src/julia_internal.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,9 +726,29 @@ JL_DLLEXPORT void jl_resolve_definition_effects_in_ir(jl_array_t *stmts, jl_modu
726726
JL_DLLEXPORT int jl_maybe_add_binding_backedge(jl_binding_t *b, jl_value_t *edge, jl_method_t *in_method);
727727
JL_DLLEXPORT void jl_add_binding_backedge(jl_binding_t *b, jl_value_t *edge);
728728

729+
static const uint8_t MI_FLAG_BACKEDGES_INUSE = 0b0100;
730+
static const uint8_t MI_FLAG_BACKEDGES_DIRTY = 0b1000;
731+
static const uint8_t MI_FLAG_BACKEDGES_ALL = 0b1100;
732+
733+
STATIC_INLINE jl_array_t *jl_mi_get_backedges_mutate(jl_method_instance_t *mi JL_PROPAGATES_ROOT, uint8_t *flags) {
734+
*flags = jl_atomic_load_relaxed(&mi->flags) & (MI_FLAG_BACKEDGES_ALL);
735+
jl_array_t *ret = mi->backedges;
736+
if (ret)
737+
jl_atomic_fetch_or_relaxed(&mi->flags, MI_FLAG_BACKEDGES_INUSE);
738+
return ret;
739+
}
740+
741+
STATIC_INLINE jl_array_t *jl_mi_get_backedges(jl_method_instance_t *mi JL_PROPAGATES_ROOT) {
742+
assert(!(jl_atomic_load_relaxed(&mi->flags) & MI_FLAG_BACKEDGES_ALL));
743+
jl_array_t *ret = mi->backedges;
744+
return ret;
745+
}
746+
729747
int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_code_instance_t **caller) JL_NOTSAFEPOINT;
730748
int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_code_instance_t *caller);
749+
int clear_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_code_instance_t *caller);
731750
void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_code_instance_t *caller);
751+
void jl_mi_done_backedges(jl_method_instance_t *mi JL_PROPAGATES_ROOT, uint8_t old_flags);
732752

733753
JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_t* root);
734754
void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots);

0 commit comments

Comments
 (0)