Skip to content

Commit 1123357

Browse files
committed
Extended SSA optimization with multi-insn analysis
This transforms SSA optimizations from single-instruction to multi- instruction analysis for improved code generation: Extended Load-After-Store Forwarding: - Analyze up to 10 instructions backwards for forwarding opportunities - Validate safety across intervening instructions and function calls - Eliminate redundant memory operations across instruction sequences Full Basic Block Load Elimination: - Scan entire basic blocks for duplicate loads - Reuse first load result when no intervening stores detected - Significantly reduce memory traffic Comprehensive Algebraic Simplifications: - Complete set of self-operations (div, mod, all comparisons) - Full identity operations (x+0, x*1, x&-1, etc.) - Handle constants in both operand positions - Transform operations to simpler forms (0-x → -x, x*-1 → -x)
1 parent 2df7780 commit 1123357

File tree

1 file changed

+361
-0
lines changed

1 file changed

+361
-0
lines changed

src/ssa.c

Lines changed: 361 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1911,6 +1911,367 @@ void optimize(void)
19111911
}
19121912
}
19131913

1914+
/* Extended load-after-store forwarding with multi-instruction
1915+
* analysis */
1916+
/* Search backwards for any store to the same location */
1917+
if (insn->opcode == OP_load && insn->rs1) {
1918+
insn_t *search = insn->prev;
1919+
int search_limit = 10; /* Look back up to 10 instructions */
1920+
1921+
while (search && search_limit > 0) {
1922+
/* Found a store to the same location */
1923+
if ((search->opcode == OP_store ||
1924+
search->opcode == OP_global_store) &&
1925+
search->rd == insn->rs1 && search->rs1) {
1926+
/* Check if the stored location is not modified
1927+
* between store and load */
1928+
bool safe_to_forward = true;
1929+
insn_t *check = search->next;
1930+
while (check && check != insn) {
1931+
/* Check for any instruction that might modify
1932+
* the location */
1933+
if (check->rd == insn->rs1 &&
1934+
(check->opcode == OP_store ||
1935+
check->opcode == OP_global_store ||
1936+
check->opcode == OP_write)) {
1937+
safe_to_forward = false;
1938+
break;
1939+
}
1940+
/* Check for function calls that might have side
1941+
* effects */
1942+
if (check->opcode == OP_call ||
1943+
check->opcode == OP_indirect) {
1944+
safe_to_forward = false;
1945+
break;
1946+
}
1947+
check = check->next;
1948+
}
1949+
1950+
if (safe_to_forward) {
1951+
/* Forward the stored value */
1952+
insn->opcode = OP_assign;
1953+
insn->rs1 = search->rs1;
1954+
insn->rs2 = NULL;
1955+
break;
1956+
}
1957+
}
1958+
1959+
/* Stop if we hit a call or branch */
1960+
if (search->opcode == OP_call ||
1961+
search->opcode == OP_branch ||
1962+
search->opcode == OP_jump ||
1963+
search->opcode == OP_indirect) {
1964+
break;
1965+
}
1966+
1967+
search = search->prev;
1968+
search_limit--;
1969+
}
1970+
}
1971+
1972+
/* Redundant load elimination */
1973+
/* If we load from the same location multiple times, reuse the
1974+
* first load
1975+
*/
1976+
if (insn->opcode == OP_load && insn->rs1 && insn->rd) {
1977+
insn_t *search = bb->insn_list.head;
1978+
1979+
while (search && search != insn) {
1980+
/* Found an earlier load from the same location */
1981+
if (search->opcode == OP_load &&
1982+
search->rs1 == insn->rs1 && search->rd) {
1983+
/* Check if the location wasn't modified between
1984+
* loads */
1985+
bool safe_to_reuse = true;
1986+
insn_t *check = search->next;
1987+
1988+
while (check && check != insn) {
1989+
if ((check->opcode == OP_store ||
1990+
check->opcode == OP_global_store ||
1991+
check->opcode == OP_write) &&
1992+
check->rd == insn->rs1) {
1993+
safe_to_reuse = false;
1994+
break;
1995+
}
1996+
/* Function calls might modify memory */
1997+
if (check->opcode == OP_call ||
1998+
check->opcode == OP_indirect) {
1999+
safe_to_reuse = false;
2000+
break;
2001+
}
2002+
check = check->next;
2003+
}
2004+
2005+
if (safe_to_reuse) {
2006+
/* Replace load with assignment from previous
2007+
* load */
2008+
insn->opcode = OP_assign;
2009+
insn->rs1 = search->rd;
2010+
insn->rs2 = NULL;
2011+
break;
2012+
}
2013+
}
2014+
search = search->next;
2015+
}
2016+
}
2017+
2018+
/* Self-operation optimizations at SSA level */
2019+
/* These patterns must be handled at SSA level for correct
2020+
* self-hosting despite some duplication with peephole optimizer
2021+
*/
2022+
if (insn->rs1 && insn->rs2 && insn->rs1 == insn->rs2) {
2023+
/* x - x = 0 */
2024+
if (insn->opcode == OP_sub && insn->rd) {
2025+
insn->opcode = OP_load_constant;
2026+
insn->rd->is_const = true;
2027+
insn->rd->init_val = 0;
2028+
insn->rs1 = NULL;
2029+
insn->rs2 = NULL;
2030+
}
2031+
/* x ^ x = 0 */
2032+
else if (insn->opcode == OP_bit_xor && insn->rd) {
2033+
insn->opcode = OP_load_constant;
2034+
insn->rd->is_const = true;
2035+
insn->rd->init_val = 0;
2036+
insn->rs1 = NULL;
2037+
insn->rs2 = NULL;
2038+
}
2039+
/* x & x = x */
2040+
else if (insn->opcode == OP_bit_and && insn->rd) {
2041+
insn->opcode = OP_assign;
2042+
insn->rs2 = NULL;
2043+
}
2044+
/* x | x = x */
2045+
else if (insn->opcode == OP_bit_or && insn->rd) {
2046+
insn->opcode = OP_assign;
2047+
insn->rs2 = NULL;
2048+
}
2049+
/* x / x = 1 (if x != 0) */
2050+
else if (insn->opcode == OP_div && insn->rd) {
2051+
insn->opcode = OP_load_constant;
2052+
insn->rd->is_const = true;
2053+
insn->rd->init_val = 1;
2054+
insn->rs1 = NULL;
2055+
insn->rs2 = NULL;
2056+
}
2057+
/* x % x = 0 */
2058+
else if (insn->opcode == OP_mod && insn->rd) {
2059+
insn->opcode = OP_load_constant;
2060+
insn->rd->is_const = true;
2061+
insn->rd->init_val = 0;
2062+
insn->rs1 = NULL;
2063+
insn->rs2 = NULL;
2064+
}
2065+
/* x == x = 1 */
2066+
else if (insn->opcode == OP_eq && insn->rd) {
2067+
insn->opcode = OP_load_constant;
2068+
insn->rd->is_const = true;
2069+
insn->rd->init_val = 1;
2070+
insn->rs1 = NULL;
2071+
insn->rs2 = NULL;
2072+
}
2073+
/* x != x = 0 */
2074+
else if (insn->opcode == OP_neq && insn->rd) {
2075+
insn->opcode = OP_load_constant;
2076+
insn->rd->is_const = true;
2077+
insn->rd->init_val = 0;
2078+
insn->rs1 = NULL;
2079+
insn->rs2 = NULL;
2080+
}
2081+
/* x < x = 0, x > x = 0 */
2082+
else if ((insn->opcode == OP_lt || insn->opcode == OP_gt) &&
2083+
insn->rd) {
2084+
insn->opcode = OP_load_constant;
2085+
insn->rd->is_const = true;
2086+
insn->rd->init_val = 0;
2087+
insn->rs1 = NULL;
2088+
insn->rs2 = NULL;
2089+
}
2090+
/* x <= x = 1, x >= x = 1 */
2091+
else if ((insn->opcode == OP_leq ||
2092+
insn->opcode == OP_geq) &&
2093+
insn->rd) {
2094+
insn->opcode = OP_load_constant;
2095+
insn->rd->is_const = true;
2096+
insn->rd->init_val = 1;
2097+
insn->rs1 = NULL;
2098+
insn->rs2 = NULL;
2099+
}
2100+
}
2101+
2102+
/* Comprehensive algebraic simplifications with identity
2103+
* operations */
2104+
if (insn->rs2 && insn->rs2->is_const && insn->rd) {
2105+
int val = insn->rs2->init_val;
2106+
2107+
/* x + 0 = x, x - 0 = x, x | 0 = x, x ^ 0 = x */
2108+
if (val == 0) {
2109+
if (insn->opcode == OP_add || insn->opcode == OP_sub ||
2110+
insn->opcode == OP_bit_or ||
2111+
insn->opcode == OP_bit_xor) {
2112+
insn->opcode = OP_assign;
2113+
insn->rs2 = NULL;
2114+
}
2115+
/* x * 0 = 0, x & 0 = 0 */
2116+
else if (insn->opcode == OP_mul ||
2117+
insn->opcode == OP_bit_and) {
2118+
insn->opcode = OP_load_constant;
2119+
insn->rd->is_const = true;
2120+
insn->rd->init_val = 0;
2121+
insn->rs1 = NULL;
2122+
insn->rs2 = NULL;
2123+
}
2124+
/* x << 0 = x, x >> 0 = x */
2125+
else if (insn->opcode == OP_lshift ||
2126+
insn->opcode == OP_rshift) {
2127+
insn->opcode = OP_assign;
2128+
insn->rs2 = NULL;
2129+
}
2130+
}
2131+
/* x * 1 = x, x / 1 = x */
2132+
else if (val == 1) {
2133+
if (insn->opcode == OP_mul || insn->opcode == OP_div) {
2134+
insn->opcode = OP_assign;
2135+
insn->rs2 = NULL;
2136+
}
2137+
}
2138+
/* x & -1 = x (all bits set) */
2139+
else if (val == -1) {
2140+
if (insn->opcode == OP_bit_and) {
2141+
insn->opcode = OP_assign;
2142+
insn->rs2 = NULL;
2143+
}
2144+
/* x | -1 = -1 */
2145+
else if (insn->opcode == OP_bit_or) {
2146+
insn->opcode = OP_load_constant;
2147+
insn->rd->is_const = true;
2148+
insn->rd->init_val = -1;
2149+
insn->rs1 = NULL;
2150+
insn->rs2 = NULL;
2151+
}
2152+
}
2153+
/* x * -1 = -x */
2154+
else if (val == -1 && insn->opcode == OP_mul) {
2155+
insn->opcode = OP_negate;
2156+
insn->rs2 = NULL;
2157+
}
2158+
}
2159+
2160+
/* Simplifications with rs1 constant */
2161+
if (insn->rs1 && insn->rs1->is_const && insn->rd) {
2162+
int val = insn->rs1->init_val;
2163+
2164+
/* 0 + x = x, 0 | x = x, 0 ^ x = x */
2165+
if (val == 0) {
2166+
if (insn->opcode == OP_add ||
2167+
insn->opcode == OP_bit_or ||
2168+
insn->opcode == OP_bit_xor) {
2169+
insn->opcode = OP_assign;
2170+
insn->rs1 = insn->rs2;
2171+
insn->rs2 = NULL;
2172+
}
2173+
/* 0 * x = 0, 0 & x = 0, 0 / x = 0 */
2174+
else if (insn->opcode == OP_mul ||
2175+
insn->opcode == OP_bit_and ||
2176+
insn->opcode == OP_div) {
2177+
insn->opcode = OP_load_constant;
2178+
insn->rd->is_const = true;
2179+
insn->rd->init_val = 0;
2180+
insn->rs1 = NULL;
2181+
insn->rs2 = NULL;
2182+
}
2183+
/* 0 - x = -x */
2184+
else if (insn->opcode == OP_sub) {
2185+
insn->opcode = OP_negate;
2186+
insn->rs1 = insn->rs2;
2187+
insn->rs2 = NULL;
2188+
}
2189+
}
2190+
/* 1 * x = x */
2191+
else if (val == 1 && insn->opcode == OP_mul) {
2192+
insn->opcode = OP_assign;
2193+
insn->rs1 = insn->rs2;
2194+
insn->rs2 = NULL;
2195+
}
2196+
/* -1 & x = x */
2197+
else if (val == -1 && insn->opcode == OP_bit_and) {
2198+
insn->opcode = OP_assign;
2199+
insn->rs1 = insn->rs2;
2200+
insn->rs2 = NULL;
2201+
}
2202+
}
2203+
2204+
/* Phi node optimization - eliminate trivial phi nodes */
2205+
if (insn->opcode == OP_phi && insn->phi_ops) {
2206+
/* Count unique operands and check for constants */
2207+
var_t *first_var = insn->phi_ops->var;
2208+
bool all_same = true;
2209+
bool all_const = true;
2210+
int const_val = 0;
2211+
int num_ops = 0;
2212+
2213+
for (phi_operand_t *op = insn->phi_ops; op; op = op->next) {
2214+
num_ops++;
2215+
/* Check if all same variable */
2216+
if (op != insn->phi_ops && op->var != first_var)
2217+
all_same = false;
2218+
/* Check if all same constant */
2219+
if (op->var && op->var->is_const) {
2220+
if (op == insn->phi_ops) {
2221+
const_val = op->var->init_val;
2222+
} else if (op->var->init_val != const_val) {
2223+
all_const = false;
2224+
}
2225+
} else {
2226+
all_const = false;
2227+
}
2228+
}
2229+
2230+
/* Eliminate trivial phi - all operands are the same var */
2231+
if (all_same && first_var && num_ops > 0) {
2232+
insn->opcode = OP_assign;
2233+
insn->rs1 = first_var;
2234+
insn->rs2 = NULL;
2235+
insn->phi_ops = NULL;
2236+
}
2237+
/* Constant phi - all operands have the same constant value
2238+
*/
2239+
else if (all_const && num_ops > 0 && insn->rd) {
2240+
insn->opcode = OP_load_constant;
2241+
insn->rd->is_const = true;
2242+
insn->rd->init_val = const_val;
2243+
insn->rs1 = NULL;
2244+
insn->rs2 = NULL;
2245+
insn->phi_ops = NULL;
2246+
}
2247+
}
2248+
2249+
/* Strength reduction for division and modulo by power of 2 */
2250+
if (insn->rs2 && insn->rs2->is_const) {
2251+
int divisor = insn->rs2->init_val;
2252+
2253+
/* Check if divisor is positive power of 2 */
2254+
if (divisor > 0 && (divisor & (divisor - 1)) == 0) {
2255+
/* Find shift amount */
2256+
int shift = 0;
2257+
int tmp = divisor;
2258+
while (tmp > 1) {
2259+
tmp >>= 1;
2260+
shift++;
2261+
}
2262+
2263+
if (insn->opcode == OP_div) {
2264+
/* Convert division to right shift */
2265+
insn->opcode = OP_rshift;
2266+
insn->rs2->init_val = shift;
2267+
} else if (insn->opcode == OP_mod) {
2268+
/* Convert modulo to bitwise AND */
2269+
insn->opcode = OP_bit_and;
2270+
insn->rs2->init_val = divisor - 1;
2271+
}
2272+
}
2273+
}
2274+
19142275
/* more optimizations */
19152276
}
19162277
}

0 commit comments

Comments
 (0)