From 5cf569218796cdc9116a00333480bef5a5c4b9c3 Mon Sep 17 00:00:00 2001
From: Unkn0wn <teamlightning2018@outlook.com>
Date: Sun, 28 Sep 2025 16:48:21 -0500
Subject: [PATCH 1/9] hypothetically enabled the math unit

---
 src/vhdl/gs4510.vhdl | 77 ++++++++++++++++++++++++++++++++------------
 1 file changed, 57 insertions(+), 20 deletions(-)

diff --git a/src/vhdl/gs4510.vhdl b/src/vhdl/gs4510.vhdl
index 77d89b3e2..01f042483 100755
--- a/src/vhdl/gs4510.vhdl
+++ b/src/vhdl/gs4510.vhdl
@@ -37,7 +37,7 @@ use work.victypes.all;
 
 entity gs4510 is
   generic(
-    math_unit_enable : boolean := false;
+    math_unit_enable : boolean := true;
     chipram_1mb : std_logic := '0';
 
     cpufrequency : integer := 40;
@@ -1471,9 +1471,11 @@ architecture Behavioural of gs4510 is
   -- We have the output counter out of phase with the input counter, so that we
   -- have time to catch an output, and store it, ready for presenting as an input
   -- very soon after.
+  -- note: for whatever reason the way this was phased meant that the math cycle would
+  -- count up by 1 before math unit 1 had actually output anything
   signal math_input_counter : integer range 0 to 15 := 0;
-  signal math_output_counter : integer range 0 to 15 := 3;
-  signal prev_math_output_counter : integer range 0 to 15 := 2;
+  signal math_output_counter : integer range 0 to 15 := 2;  -- originally 3
+  signal prev_math_output_counter : integer range 0 to 15 := 1;  -- originally 2
 
   signal math_input_number : integer range 0 to 15 := 0;
   signal math_input_value : unsigned(31 downto 0) := (others => '0');
@@ -1494,6 +1496,8 @@ architecture Behavioural of gs4510 is
   -- Count # of math cycles since cycle latch last written to
   signal reg_math_cycle_counter : unsigned(31 downto 0) := to_unsigned(0,32);
   signal reg_math_cycle_counter_plus_one : unsigned(31 downto 0) := to_unsigned(0,32);
+  -- Reset math cycle counters
+  signal reg_math_cycle_counter_reset : std_logic := '1';
   -- # of math cycles to trigger end of job / math interrupt
   signal reg_math_cycle_compare : unsigned(31 downto 0) := to_unsigned(0,32);
 
@@ -1687,23 +1691,38 @@ begin
       -- counters at the CPU speed.
 
       -- Present input value to all math units
-      if math_input_counter /= 15 then
-        math_input_counter <= math_input_counter + 1;
+      -- reset the counter if bit 0 is set (write enabled)
+      if math_unit_flags(0) = '0' then
+        if math_input_counter /= 15 then
+          math_input_counter <= math_input_counter + 1;
+        else
+          math_input_counter <= 0;
+        end if;
+        -- only update the input value and reg when the counter is running
+        -- to prevent register updates while the inputs are offline from messing with
+        -- the math unit's internal registers
+        math_input_number <= math_input_counter;
+        math_input_value <= reg_math_regs(math_input_counter);
+        report "MATH: Presenting math reg #" & integer'image(math_input_counter)
+          &" = $" & to_hstring(reg_math_regs(math_input_counter));
       else
         math_input_counter <= 0;
       end if;
-      math_input_number <= math_input_counter;
-      math_input_value <= reg_math_regs(math_input_counter);
-      report "MATH: Presenting math reg #" & integer'image(math_input_counter)
-        &" = $" & to_hstring(reg_math_regs(math_input_counter));
 
       -- Update output counter being shown to math units
-      if math_output_counter /= 15 then
-        math_output_counter <= math_output_counter + 1;
+      -- reset counters when bit 0 is set (write enabled)
+      if math_unit_flags(0) = '0' then
+        if math_output_counter /= 15 then
+          math_output_counter <= math_output_counter + 1;
+        else
+          math_output_counter <= 0;
+        end if;
+        prev_math_output_counter <= math_output_counter;
       else
-        math_output_counter <= 0;
+        math_output_counter <= 2;
+        prev_math_output_counter <= 1;
       end if;
-      prev_math_output_counter <= math_output_counter;
+
       -- Based on the configuration for the previously selected unit,
       -- stash the results in the appropriate place
       if true then
@@ -1715,7 +1734,8 @@ begin
           & std_logic'image(reg_math_config(prev_math_output_counter).latched) & ".";
       end if;
 
-      if math_unit_flags(1) = '1' then
+      -- Make sure output counter is running before starting to stash outputs, to avoid constantly writing a register
+      if math_unit_flags(1) = '1' and math_unit_flags(0) = '0' then
         if (reg_math_config_drive(prev_math_output_counter).latched='0') or (reg_math_latch_counter = x"00") then
           if reg_math_config_drive(prev_math_output_counter).output_high = '0' then
             if reg_math_config_drive(prev_math_output_counter).output_low = '0' then
@@ -1755,8 +1775,10 @@ begin
       if reg_math_write_toggle /= last_reg_math_write_toggle then
         last_reg_math_write_toggle <= reg_math_write_toggle;
         reg_math_write <= '1';
+      else
+        reg_math_write <= '0';
       end if;
-      reg_math_write <= '0';
+
       if math_unit_flags(0) = '1' then
         if reg_math_write = '1' then
           case reg_math_regbyte is
@@ -1773,18 +1795,32 @@ begin
       -- output to appear on the inputs again, i.e., once per lap of the input
       -- and output propagation.
       -- TODO: implement reg_math_cycle_counter_reset signal, see D7E1
-      reg_math_cycle_counter_plus_one <= reg_math_cycle_counter + 1;
       if math_output_counter = 1 then
         -- Decrement latch counter
         if reg_math_latch_counter = x"00" then
           reg_math_latch_counter <= reg_math_latch_interval;
-          -- And update math cycle counter, if math unit is active
-          if math_unit_flags(1) = '1' then
-            reg_math_cycle_counter <= reg_math_cycle_counter_plus_one;
-          end if;
         else
           reg_math_latch_counter <= reg_math_latch_counter - 1;
         end if;
+        -- And update math cycle counter, if math unit is active
+        -- include a case for the reset, to avoid a possible edge case resulting in a double-drive
+        if math_unit_flags(1) = '1' and reg_math_cycle_counter_reset = '0' then
+          if reg_math_cycle_counter_plus_one = reg_math_cycle_compare then
+            math_unit_flags(1 downto 0) <= "01";  -- disable calculation, enable writing to regs from CPU (disables counters)
+            reg_math_cycle_counter_reset <= '1';
+          else
+            reg_math_cycle_counter <= reg_math_cycle_counter_plus_one;
+          end if;
+        end if;
+      end if;
+
+      -- handle resetting the cycle counter, as well as updating reg_math_cycle_counter_plus_one, to avoid a multiple drive situation
+      if reg_math_cycle_counter_reset = '1' then
+        reg_math_cycle_counter <= (others => '0');
+        reg_math_cycle_counter_plus_one <= (0 => '1', others => '0');
+        reg_math_cycle_counter_reset <= '0';
+      else
+        reg_math_cycle_counter_plus_one <= reg_math_cycle_counter + 1;
       end if;
     end if;
   end process;
@@ -3582,6 +3618,7 @@ begin
           -- @IO:GS $D7E1.1 MATH:CALCEN Enable committing of output values from math units back to math registers (clearing effectively pauses iterative formulae)
           math_unit_flags <= value;
           -- reg_math_cycle_counter <= to_unsigned(0,32); -- TODO: Should generate a reg_math_cycle_counter_reset signal
+          reg_math_cycle_counter_reset <= '1';
         elsif long_address(7 downto 0) = x"E8" then
           reg_math_cycle_compare(7 downto 0) <= value;
         elsif long_address(7 downto 0) = x"E9" then

From ee020306cded41469580d7864c77387beaeb58f8 Mon Sep 17 00:00:00 2001
From: Unkn0wn <teamlightning2018@outlook.com>
Date: Sun, 28 Sep 2025 21:43:15 -0500
Subject: [PATCH 2/9] actually enabled the math unit

---
 src/vhdl/divider32.vhdl  |  20 ++++----
 src/vhdl/gs4510.vhdl     | 105 ++++++++++++++++++++++++---------------
 src/vhdl/multiply32.vhdl |  20 ++++----
 src/vhdl/shifter32.vhdl  |  20 ++++----
 4 files changed, 97 insertions(+), 68 deletions(-)

diff --git a/src/vhdl/divider32.vhdl b/src/vhdl/divider32.vhdl
index c00189422..cf60e2a62 100644
--- a/src/vhdl/divider32.vhdl
+++ b/src/vhdl/divider32.vhdl
@@ -28,16 +28,18 @@ use Std.TextIO.all;
 use work.debugtools.all;
   
 entity divider32 is
+  generic (
+    unit : integer range 0 to 15
+    );
   port (
     clock : in std_logic;
-    unit : in integer range 0 to 15;
     do_add : in std_logic;
     input_a : in integer range 0 to 15;
     input_b : in integer range 0 to 15;
     input_value_number : in integer range 0 to 15;
     input_value : unsigned(31 downto 0);
-    output_select : in integer range 0 to 15;
-    output_value : out unsigned(63 downto 0)
+    -- output_select : in integer range 0 to 15;
+    output_value : out unsigned(63 downto 0) := (others => '0')
     );
 end entity;
 
@@ -84,13 +86,13 @@ begin
       p <= p4;
       -- Even units do addition, odd ones do subtraction
       if (unit mod 2) = 0 then
-        s <= to_unsigned(to_integer(a)+to_integer(b),33);
+        s <= unsigned((a(31) & a)+(b(31) & b));
       else
-        s <= to_unsigned(to_integer(a)-to_integer(b),33);
+        s <= unsigned((a(31) & a)-(b(31) & b));
       end if;
 
       -- Display output value when requested, and tri-state outputs otherwise
-      if output_select = unit then
+      -- if output_select = unit then
         if do_add='1' then
           -- Output sign-extended 33 bit addition result
           output_value(63 downto 33) <= (others => s(32));
@@ -102,9 +104,9 @@ begin
           report "MATH: Unit #" & integer'image(unit)
             & " outputting multiplication product $" & to_hstring(unsigned(p));
         end if;
-      else
-        output_value <= (others => 'Z');
-      end if;
+      -- else
+      --   output_value <= (others => 'Z');
+      -- end if;
     end if;
   end process;
 end neo_gregorian;
diff --git a/src/vhdl/gs4510.vhdl b/src/vhdl/gs4510.vhdl
index 01f042483..41a287c54 100755
--- a/src/vhdl/gs4510.vhdl
+++ b/src/vhdl/gs4510.vhdl
@@ -1462,6 +1462,7 @@ architecture Behavioural of gs4510 is
   constant math_unit_count : integer := 16;
   type math_reg_array is array(0 to 15) of unsigned(31 downto 0);
   type math_config_array is array(0 to math_unit_count - 1) of math_unit_config;
+  type math_output_array is array(0 to math_unit_count - 1) of unsigned(63 downto 0);
   signal reg_math_regs : math_reg_array := (others => to_unsigned(0,32));
   signal reg_math_config : math_config_array := (others => math_unit_config_v);
   signal reg_math_config_drive : math_config_array := (others => math_unit_config_v);
@@ -1479,11 +1480,15 @@ architecture Behavioural of gs4510 is
 
   signal math_input_number : integer range 0 to 15 := 0;
   signal math_input_value : unsigned(31 downto 0) := (others => '0');
-  signal math_output_value_low : unsigned(31 downto 0) := (others => '0');
-  signal math_output_value_high : unsigned(31 downto 0) := (others => '0');
+  signal math_output_values : math_output_array := (others => (others => '0'));
+  -- signal math_output_value_low : unsigned(31 downto 0) := (others => 'Z');
+  -- signal math_output_value_high : unsigned(31 downto 0) := (others => 'Z');
 
   -- Start with input and outputting enabled
-  signal math_unit_flags : unsigned(7 downto 0) := x"03";
+  signal math_unit_flags : unsigned(7 downto 0) := x"01";
+  -- halt math unit when math_unit_halted /= last_math_unit_halted
+  signal math_unit_halted : std_logic := '0';
+  signal last_math_unit_halted : std_logic := '0';
   -- Each write to the math registers is passed to the math unit to handle
   -- (this is to avoid ISE doing really weird things in synthesis, thinking
   -- that each bit of each register was a clock or something similarly odd.)
@@ -1497,7 +1502,9 @@ architecture Behavioural of gs4510 is
   signal reg_math_cycle_counter : unsigned(31 downto 0) := to_unsigned(0,32);
   signal reg_math_cycle_counter_plus_one : unsigned(31 downto 0) := to_unsigned(0,32);
   -- Reset math cycle counters
-  signal reg_math_cycle_counter_reset : std_logic := '1';
+  signal reg_math_cycle_counter_reset : std_logic := '0';
+  signal reg_math_cycle_counter_reset_toggle : std_logic := '0';
+  signal last_reg_math_cycle_counter_reset_toggle : std_logic := '0';
   -- # of math cycles to trigger end of job / math interrupt
   signal reg_math_cycle_compare : unsigned(31 downto 0) := to_unsigned(0,32);
 
@@ -1605,47 +1612,53 @@ begin
 
 
   multipliers: for unit in 0 to 7 generate
-    mult_unit : entity work.multiply32 port map (
+    mult_unit : entity work.multiply32 generic map (
+      unit => unit
+      ) port map (
       clock => mathclock,
-      unit => unit,
       do_add => reg_math_config_drive(unit).do_add,
       input_a => reg_math_config_drive(unit).source_a,
       input_b => reg_math_config_drive(unit).source_b,
       input_value_number => math_input_number,
       input_value => math_input_value,
-      output_select => math_output_counter,
-      output_value(31 downto 0) => math_output_value_low,
-      output_value(63 downto 32) => math_output_value_high
+      output_value => math_output_values(unit)
+      -- output_select => math_output_counter,
+      -- output_value(31 downto 0) => math_output_value_low,
+      -- output_value(63 downto 32) => math_output_value_high
       );
   end generate;
 
   shifters: for unit in 8 to 11 generate
-    mult_unit : entity work.shifter32 port map (
+    shift_unit : entity work.shifter32 generic map (
+      unit => unit
+      ) port map (
       clock => mathclock,
-      unit => unit,
       do_add => reg_math_config_drive(unit).do_add,
       input_a => reg_math_config_drive(unit).source_a,
       input_b => reg_math_config_drive(unit).source_b,
       input_value_number => math_input_number,
       input_value => math_input_value,
-      output_select => math_output_counter,
-      output_value(31 downto 0) => math_output_value_low,
-      output_value(63 downto 32) => math_output_value_high
+      output_value => math_output_values(unit)
+      -- output_select => math_output_counter,
+      -- output_value(31 downto 0) => math_output_value_low,
+      -- output_value(63 downto 32) => math_output_value_high
       );
   end generate;
 
   dividerrs: for unit in 12 to 15 generate
-    mult_unit : entity work.divider32 port map (
+    div_unit : entity work.divider32 generic map (
+      unit => unit
+      ) port map (
       clock => mathclock,
-      unit => unit,
       do_add => reg_math_config_drive(unit).do_add,
       input_a => reg_math_config_drive(unit).source_a,
       input_b => reg_math_config_drive(unit).source_b,
       input_value_number => math_input_number,
       input_value => math_input_value,
-      output_select => math_output_counter,
-      output_value(31 downto 0) => math_output_value_low,
-      output_value(63 downto 32) => math_output_value_high
+      output_value => math_output_values(unit)
+      -- output_select => math_output_counter,
+      -- output_value(31 downto 0) => math_output_value_low,
+      -- output_value(63 downto 32) => math_output_value_high
       );
   end generate;
 
@@ -1692,7 +1705,7 @@ begin
 
       -- Present input value to all math units
       -- reset the counter if bit 0 is set (write enabled)
-      if math_unit_flags(0) = '0' then
+      if math_unit_flags(0) = '0' and math_unit_halted = last_math_unit_halted then
         if math_input_counter /= 15 then
           math_input_counter <= math_input_counter + 1;
         else
@@ -1711,7 +1724,7 @@ begin
 
       -- Update output counter being shown to math units
       -- reset counters when bit 0 is set (write enabled)
-      if math_unit_flags(0) = '0' then
+      if math_unit_flags(0) = '0' and math_unit_halted = last_math_unit_halted then
         if math_output_counter /= 15 then
           math_output_counter <= math_output_counter + 1;
         else
@@ -1735,7 +1748,7 @@ begin
       end if;
 
       -- Make sure output counter is running before starting to stash outputs, to avoid constantly writing a register
-      if math_unit_flags(1) = '1' and math_unit_flags(0) = '0' then
+      if math_unit_flags(1) = '1' and math_unit_flags(0) = '0' and math_unit_halted = last_math_unit_halted then
         if (reg_math_config_drive(prev_math_output_counter).latched='0') or (reg_math_latch_counter = x"00") then
           if reg_math_config_drive(prev_math_output_counter).output_high = '0' then
             if reg_math_config_drive(prev_math_output_counter).output_low = '0' then
@@ -1745,24 +1758,24 @@ begin
               -- Only low output being kept
               report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(prev_math_output_counter).output)
                 & ") from output of math unit #" & integer'image(prev_math_output_counter)
-                & " ( = $" & to_hstring(math_output_value_low) & ")";
-              reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_value_low;
+                & " ( = $" & to_hstring(math_output_values(prev_math_output_counter)(31 downto 0)) & ")";
+              reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_values(prev_math_output_counter)(31 downto 0);
             end if;
           else
             if reg_math_config_drive(prev_math_output_counter).output_low = '0' then
               -- Only high half of output is being kept, so stash it
               report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(prev_math_output_counter).output)
                 & ") from output of math unit #" & integer'image(prev_math_output_counter);
-              reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_value_high;
+              reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_values(prev_math_output_counter)(63 downto 32);
             else
               -- Both are being stashed, so store in consecutive slots
               report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(prev_math_output_counter).output)
                 & ") (and next) from output of math unit #" & integer'image(prev_math_output_counter);
-              reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_value_low;
+              reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_values(prev_math_output_counter)(31 downto 0);
               if reg_math_config_drive(prev_math_output_counter).output /= 15 then
-                reg_math_regs(reg_math_config_drive(prev_math_output_counter).output + 1) <= math_output_value_high;
+                reg_math_regs(reg_math_config_drive(prev_math_output_counter).output + 1) <= math_output_values(prev_math_output_counter)(63 downto 32);
               else
-                reg_math_regs(0) <= math_output_value_high;
+                reg_math_regs(0) <= math_output_values(prev_math_output_counter)(63 downto 32);
               end if;
             end if;
           end if;
@@ -1779,7 +1792,15 @@ begin
         reg_math_write <= '0';
       end if;
 
-      if math_unit_flags(0) = '1' then
+      if reg_math_cycle_counter_reset_toggle /= last_reg_math_cycle_counter_reset_toggle then
+        last_reg_math_cycle_counter_reset_toggle <= reg_math_cycle_counter_reset_toggle;
+        reg_math_cycle_counter_reset <= '1';
+      else
+        reg_math_cycle_counter_reset <= '0';
+      end if;
+
+      -- when math unit has been halted by the comparator, behave as if math_unit_flags(1 downto 0) = "01"
+      if math_unit_flags(0) = '1' or math_unit_halted /= last_math_unit_halted then
         if reg_math_write = '1' then
           case reg_math_regbyte is
             when 0 => reg_math_regs(reg_math_regnum)(7 downto 0) <= reg_math_write_value;
@@ -1804,24 +1825,20 @@ begin
         end if;
         -- And update math cycle counter, if math unit is active
         -- include a case for the reset, to avoid a possible edge case resulting in a double-drive
-        if math_unit_flags(1) = '1' and reg_math_cycle_counter_reset = '0' then
+        if math_unit_flags(1) = '1' and reg_math_cycle_counter_reset = '0' and math_unit_halted = last_math_unit_halted then
           if reg_math_cycle_counter_plus_one = reg_math_cycle_compare then
-            math_unit_flags(1 downto 0) <= "01";  -- disable calculation, enable writing to regs from CPU (disables counters)
-            reg_math_cycle_counter_reset <= '1';
-          else
-            reg_math_cycle_counter <= reg_math_cycle_counter_plus_one;
+            math_unit_halted <= not last_math_unit_halted;  -- disable calculation, enable writing to regs from CPU (disables counters)
           end if;
+          reg_math_cycle_counter <= reg_math_cycle_counter_plus_one;
         end if;
       end if;
 
       -- handle resetting the cycle counter, as well as updating reg_math_cycle_counter_plus_one, to avoid a multiple drive situation
       if reg_math_cycle_counter_reset = '1' then
         reg_math_cycle_counter <= (others => '0');
-        reg_math_cycle_counter_plus_one <= (0 => '1', others => '0');
-        reg_math_cycle_counter_reset <= '0';
-      else
-        reg_math_cycle_counter_plus_one <= reg_math_cycle_counter + 1;
+        -- reg_math_cycle_counter_plus_one <= x"00000001";
       end if;
+      reg_math_cycle_counter_plus_one <= reg_math_cycle_counter + 1;
     end if;
   end process;
 
@@ -3025,7 +3042,12 @@ begin
               -- @IO:GS $D7E0 MATH:LATCHINT Latch interval for latched outputs (in CPU cycles)
               -- $D7E1 is documented higher up
             when x"E0" => return reg_math_latch_interval;
-            when x"E1" => return math_unit_flags;
+            when x"E1" =>
+              if math_unit_halted = last_math_unit_halted then
+                return math_unit_flags;
+              else
+                return math_unit_flags(7 downto 2) & "01";
+              end if;
             -- @IO:GS $D7E2 MATH:RESERVED Reserved
             -- @IO:GS $D7E3 MATH:RESERVED Reserved
             --@IO:GS $D7E4 MATH:ITERCNT Iteration Counter (32 bit)
@@ -3616,9 +3638,10 @@ begin
           -- @IO:GS $D7E1 - Math unit general settings (writing also clears math cycle counter)
           -- @IO:GS $D7E1.0 MATH:WREN Enable setting of math registers (must normally be set)
           -- @IO:GS $D7E1.1 MATH:CALCEN Enable committing of output values from math units back to math registers (clearing effectively pauses iterative formulae)
-          math_unit_flags <= value;
+          math_unit_flags(3 downto 0) <= value(3 downto 0);
           -- reg_math_cycle_counter <= to_unsigned(0,32); -- TODO: Should generate a reg_math_cycle_counter_reset signal
-          reg_math_cycle_counter_reset <= '1';
+          reg_math_cycle_counter_reset_toggle <= not last_reg_math_cycle_counter_reset_toggle;
+          last_math_unit_halted <= math_unit_halted;
         elsif long_address(7 downto 0) = x"E8" then
           reg_math_cycle_compare(7 downto 0) <= value;
         elsif long_address(7 downto 0) = x"E9" then
diff --git a/src/vhdl/multiply32.vhdl b/src/vhdl/multiply32.vhdl
index 58a612941..54012803b 100644
--- a/src/vhdl/multiply32.vhdl
+++ b/src/vhdl/multiply32.vhdl
@@ -28,16 +28,18 @@ use Std.TextIO.all;
 use work.debugtools.all;
   
 entity multiply32 is
+  generic (
+    unit : integer range 0 to 15
+    );
   port (
     clock : in std_logic;
-    unit : in integer range 0 to 15;
     do_add : in std_logic;
     input_a : in integer range 0 to 15;
     input_b : in integer range 0 to 15;
     input_value_number : in integer range 0 to 15;
     input_value : unsigned(31 downto 0);
-    output_select : in integer range 0 to 15;
-    output_value : out unsigned(63 downto 0)
+    -- output_select : in integer range 0 to 15;
+    output_value : out unsigned(63 downto 0) := (others => '0')
     );
 end entity;
 
@@ -84,13 +86,13 @@ begin
       p <= p4;
       -- Even units do addition, odd ones do subtraction
       if (unit mod 2) = 0 then
-        s <= to_unsigned(to_integer(a)+to_integer(b),33);
+        s <= unsigned((a(31) & a)+(b(31) & b));
       else
-        s <= to_unsigned(to_integer(a)-to_integer(b),33);
+        s <= unsigned((a(31) & a)-(b(31) & b));
       end if;
 
       -- Display output value when requested, and tri-state outputs otherwise
-      if output_select = unit then
+      -- if output_select = unit then
         if do_add='1' then
           -- Output sign-extended 33 bit addition result
           output_value(63 downto 33) <= (others => s(32));
@@ -102,9 +104,9 @@ begin
 --          report "MATH: Unit #" & integer'image(unit)
 --            & " outputting multiplication product $" & to_hstring(unsigned(p));
         end if;
-      else
-        output_value <= (others => 'Z');
-      end if;
+      -- else
+      --   output_value <= (others => 'Z');
+      -- end if;
     end if;
   end process;
 end neo_gregorian;
diff --git a/src/vhdl/shifter32.vhdl b/src/vhdl/shifter32.vhdl
index dace057d8..e15a79dd4 100644
--- a/src/vhdl/shifter32.vhdl
+++ b/src/vhdl/shifter32.vhdl
@@ -28,16 +28,18 @@ use Std.TextIO.all;
 use work.debugtools.all;
   
 entity shifter32 is
+  generic (
+    unit : integer range 0 to 15
+    );
   port (
     clock : in std_logic;
-    unit : in integer range 0 to 15;
     do_add : in std_logic;
     input_a : in integer range 0 to 15;
     input_b : in integer range 0 to 15;
     input_value_number : in integer range 0 to 15;
     input_value : unsigned(31 downto 0);
-    output_select : in integer range 0 to 15;
-    output_value : out unsigned(63 downto 0)
+    -- output_select : in integer range 0 to 15;
+    output_value : out unsigned(63 downto 0) := (others => '0')
     );
 end entity;
 
@@ -65,9 +67,9 @@ begin
       -- Calculate the result
       -- Even units do addition, odd ones do subtraction
       if (unit mod 2) = 0 then
-        s <= to_unsigned(to_integer(a)+to_integer(b),33);
+        s <= unsigned((a(31) & a)+(b(31) & b));
       else
-        s <= to_unsigned(to_integer(a)-to_integer(b),33);
+        s <= unsigned((a(31) & a)-(b(31) & b));
       end if;
 
       if b(7 downto 0) = x"00" then
@@ -87,7 +89,7 @@ begin
       end if;
 
       -- Display output value when requested, and tri-state outputs otherwise
-      if output_select = unit then
+      -- if output_select = unit then
         if do_add='1' then
           -- Output sign-extended 33 bit addition result
           output_value(63 downto 33) <= (others => s(32));
@@ -95,9 +97,9 @@ begin
         else
           output_value <= p;
         end if;
-      else
-        output_value <= (others => 'Z');
-      end if;
+      -- else
+      --   output_value <= (others => 'Z');
+      -- end if;
     end if;
   end process;
 end neo_gregorian;

From b3a07030c6f23a293cd9c628e3074d87a950f4c3 Mon Sep 17 00:00:00 2001
From: Unkn0wn <teamlightning2018@outlook.com>
Date: Mon, 29 Sep 2025 13:03:25 -0500
Subject: [PATCH 3/9] modified the phasing, fixed a bug that broke STQ when
 writing to the math registers

---
 src/vhdl/gs4510.vhdl | 76 ++++++++++++++++++++------------------------
 1 file changed, 35 insertions(+), 41 deletions(-)

diff --git a/src/vhdl/gs4510.vhdl b/src/vhdl/gs4510.vhdl
index 41a287c54..e80ec2b5d 100755
--- a/src/vhdl/gs4510.vhdl
+++ b/src/vhdl/gs4510.vhdl
@@ -1474,9 +1474,10 @@ architecture Behavioural of gs4510 is
   -- very soon after.
   -- note: for whatever reason the way this was phased meant that the math cycle would
   -- count up by 1 before math unit 1 had actually output anything
-  signal math_input_counter : integer range 0 to 15 := 0;
-  signal math_output_counter : integer range 0 to 15 := 2;  -- originally 3
-  signal prev_math_output_counter : integer range 0 to 15 := 1;  -- originally 2
+  constant math_input_counter_init : integer range 0 to 15 := 0;
+  signal math_input_counter : integer range 0 to 15 := math_input_counter_init;
+  constant math_output_counter_init : integer range 0 to 15 := 0;
+  signal math_output_counter : integer range 0 to 15 := math_output_counter_init;  -- originally 3
 
   signal math_input_number : integer range 0 to 15 := 0;
   signal math_input_value : unsigned(31 downto 0) := (others => '0');
@@ -1492,7 +1493,9 @@ architecture Behavioural of gs4510 is
   -- Each write to the math registers is passed to the math unit to handle
   -- (this is to avoid ISE doing really weird things in synthesis, thinking
   -- that each bit of each register was a clock or something similarly odd.)
-  signal reg_math_write : std_logic := '0';
+  -- The reset and write systems need to directly read the toggle state, since
+  -- any intermediate adds enough latency to cause writes on every cycle to fail.
+  -- This is particularly bad with math register writing, since it breaks STQ.
   signal reg_math_write_toggle : std_logic := '0';
   signal last_reg_math_write_toggle : std_logic := '0';
   signal reg_math_regnum : integer range 0 to 15 := 0;
@@ -1502,7 +1505,6 @@ architecture Behavioural of gs4510 is
   signal reg_math_cycle_counter : unsigned(31 downto 0) := to_unsigned(0,32);
   signal reg_math_cycle_counter_plus_one : unsigned(31 downto 0) := to_unsigned(0,32);
   -- Reset math cycle counters
-  signal reg_math_cycle_counter_reset : std_logic := '0';
   signal reg_math_cycle_counter_reset_toggle : std_logic := '0';
   signal last_reg_math_cycle_counter_reset_toggle : std_logic := '0';
   -- # of math cycles to trigger end of job / math interrupt
@@ -1719,7 +1721,7 @@ begin
         report "MATH: Presenting math reg #" & integer'image(math_input_counter)
           &" = $" & to_hstring(reg_math_regs(math_input_counter));
       else
-        math_input_counter <= 0;
+        math_input_counter <= math_input_counter_init;
       end if;
 
       -- Update output counter being shown to math units
@@ -1730,52 +1732,50 @@ begin
         else
           math_output_counter <= 0;
         end if;
-        prev_math_output_counter <= math_output_counter;
       else
-        math_output_counter <= 2;
-        prev_math_output_counter <= 1;
+        math_output_counter <= math_output_counter_init;
       end if;
 
       -- Based on the configuration for the previously selected unit,
       -- stash the results in the appropriate place
       if true then
-        report "MATH: output flags for unit #" & integer'image(prev_math_output_counter)
+        report "MATH: output flags for unit #" & integer'image(math_output_counter)
           & " = "
-          & std_logic'image(reg_math_config(prev_math_output_counter).output_low) & ", "
-          & std_logic'image(reg_math_config(prev_math_output_counter).output_high) & ", "
-          & integer'image(reg_math_config(prev_math_output_counter).output) & ", "
-          & std_logic'image(reg_math_config(prev_math_output_counter).latched) & ".";
+          & std_logic'image(reg_math_config(math_output_counter).output_low) & ", "
+          & std_logic'image(reg_math_config(math_output_counter).output_high) & ", "
+          & integer'image(reg_math_config(math_output_counter).output) & ", "
+          & std_logic'image(reg_math_config(math_output_counter).latched) & ".";
       end if;
 
       -- Make sure output counter is running before starting to stash outputs, to avoid constantly writing a register
       if math_unit_flags(1) = '1' and math_unit_flags(0) = '0' and math_unit_halted = last_math_unit_halted then
-        if (reg_math_config_drive(prev_math_output_counter).latched='0') or (reg_math_latch_counter = x"00") then
-          if reg_math_config_drive(prev_math_output_counter).output_high = '0' then
-            if reg_math_config_drive(prev_math_output_counter).output_low = '0' then
+        if (reg_math_config_drive(math_output_counter).latched='0') or (reg_math_latch_counter = x"00") then
+          if reg_math_config_drive(math_output_counter).output_high = '0' then
+            if reg_math_config_drive(math_output_counter).output_low = '0' then
               -- No output being kept, so nothing to do.
               null;
             else
               -- Only low output being kept
-              report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(prev_math_output_counter).output)
-                & ") from output of math unit #" & integer'image(prev_math_output_counter)
-                & " ( = $" & to_hstring(math_output_values(prev_math_output_counter)(31 downto 0)) & ")";
-              reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_values(prev_math_output_counter)(31 downto 0);
+              report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output)
+                & ") from output of math unit #" & integer'image(math_output_counter)
+                & " ( = $" & to_hstring(math_output_values(math_output_counter)(31 downto 0)) & ")";
+              reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0);
             end if;
           else
-            if reg_math_config_drive(prev_math_output_counter).output_low = '0' then
+            if reg_math_config_drive(math_output_counter).output_low = '0' then
               -- Only high half of output is being kept, so stash it
-              report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(prev_math_output_counter).output)
-                & ") from output of math unit #" & integer'image(prev_math_output_counter);
-              reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_values(prev_math_output_counter)(63 downto 32);
+              report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output)
+                & ") from output of math unit #" & integer'image(math_output_counter);
+              reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(63 downto 32);
             else
               -- Both are being stashed, so store in consecutive slots
-              report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(prev_math_output_counter).output)
-                & ") (and next) from output of math unit #" & integer'image(prev_math_output_counter);
-              reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_values(prev_math_output_counter)(31 downto 0);
-              if reg_math_config_drive(prev_math_output_counter).output /= 15 then
-                reg_math_regs(reg_math_config_drive(prev_math_output_counter).output + 1) <= math_output_values(prev_math_output_counter)(63 downto 32);
+              report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output)
+                & ") (and next) from output of math unit #" & integer'image(math_output_counter);
+              reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0);
+              if reg_math_config_drive(math_output_counter).output /= 15 then
+                reg_math_regs(reg_math_config_drive(math_output_counter).output + 1) <= math_output_values(math_output_counter)(63 downto 32);
               else
-                reg_math_regs(0) <= math_output_values(prev_math_output_counter)(63 downto 32);
+                reg_math_regs(0) <= math_output_values(math_output_counter)(63 downto 32);
               end if;
             end if;
           end if;
@@ -1787,21 +1787,15 @@ begin
       -- Implement writing to math registers
       if reg_math_write_toggle /= last_reg_math_write_toggle then
         last_reg_math_write_toggle <= reg_math_write_toggle;
-        reg_math_write <= '1';
-      else
-        reg_math_write <= '0';
       end if;
 
       if reg_math_cycle_counter_reset_toggle /= last_reg_math_cycle_counter_reset_toggle then
         last_reg_math_cycle_counter_reset_toggle <= reg_math_cycle_counter_reset_toggle;
-        reg_math_cycle_counter_reset <= '1';
-      else
-        reg_math_cycle_counter_reset <= '0';
       end if;
 
       -- when math unit has been halted by the comparator, behave as if math_unit_flags(1 downto 0) = "01"
       if math_unit_flags(0) = '1' or math_unit_halted /= last_math_unit_halted then
-        if reg_math_write = '1' then
+        if reg_math_write_toggle /= last_reg_math_write_toggle then
           case reg_math_regbyte is
             when 0 => reg_math_regs(reg_math_regnum)(7 downto 0) <= reg_math_write_value;
             when 1 => reg_math_regs(reg_math_regnum)(15 downto 8) <= reg_math_write_value;
@@ -1816,7 +1810,7 @@ begin
       -- output to appear on the inputs again, i.e., once per lap of the input
       -- and output propagation.
       -- TODO: implement reg_math_cycle_counter_reset signal, see D7E1
-      if math_output_counter = 1 then
+      if math_output_counter = (15 + math_output_counter_init) mod 16 then
         -- Decrement latch counter
         if reg_math_latch_counter = x"00" then
           reg_math_latch_counter <= reg_math_latch_interval;
@@ -1834,7 +1828,7 @@ begin
       end if;
 
       -- handle resetting the cycle counter, as well as updating reg_math_cycle_counter_plus_one, to avoid a multiple drive situation
-      if reg_math_cycle_counter_reset = '1' then
+      if reg_math_cycle_counter_reset_toggle /= last_reg_math_cycle_counter_reset_toggle then
         reg_math_cycle_counter <= (others => '0');
         -- reg_math_cycle_counter_plus_one <= x"00000001";
       end if;
@@ -3640,7 +3634,7 @@ begin
           -- @IO:GS $D7E1.1 MATH:CALCEN Enable committing of output values from math units back to math registers (clearing effectively pauses iterative formulae)
           math_unit_flags(3 downto 0) <= value(3 downto 0);
           -- reg_math_cycle_counter <= to_unsigned(0,32); -- TODO: Should generate a reg_math_cycle_counter_reset signal
-          reg_math_cycle_counter_reset_toggle <= not last_reg_math_cycle_counter_reset_toggle;
+          reg_math_cycle_counter_reset_toggle <= not reg_math_cycle_counter_reset_toggle;
           last_math_unit_halted <= math_unit_halted;
         elsif long_address(7 downto 0) = x"E8" then
           reg_math_cycle_compare(7 downto 0) <= value;

From 2208c90f275923150f5c99e583d48df8492bd351 Mon Sep 17 00:00:00 2001
From: Unkn0wn <teamlightning2018@outlook.com>
Date: Mon, 29 Sep 2025 13:07:16 -0500
Subject: [PATCH 4/9] forgot to replace all instances of
 reg_math_cycle_counter_reset

---
 src/vhdl/gs4510.vhdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vhdl/gs4510.vhdl b/src/vhdl/gs4510.vhdl
index e80ec2b5d..e22aa3c4c 100755
--- a/src/vhdl/gs4510.vhdl
+++ b/src/vhdl/gs4510.vhdl
@@ -1819,7 +1819,7 @@ begin
         end if;
         -- And update math cycle counter, if math unit is active
         -- include a case for the reset, to avoid a possible edge case resulting in a double-drive
-        if math_unit_flags(1) = '1' and reg_math_cycle_counter_reset = '0' and math_unit_halted = last_math_unit_halted then
+        if math_unit_flags(1) = '1' and reg_math_cycle_counter_reset_toggle = last_reg_math_cycle_counter_reset_toggle and math_unit_halted = last_math_unit_halted then
           if reg_math_cycle_counter_plus_one = reg_math_cycle_compare then
             math_unit_halted <= not last_math_unit_halted;  -- disable calculation, enable writing to regs from CPU (disables counters)
           end if;

From b61fa13677b6b14abb2626bc6b3990c9ca6ef6b6 Mon Sep 17 00:00:00 2001
From: Unkn0wn <teamlightning2018@outlook.com>
Date: Thu, 2 Oct 2025 11:36:55 -0500
Subject: [PATCH 5/9] implemented dividers, implemented some experimental stuff
 to allow for inverting input B instead of making every other unit a
 subtractor

---
 src/vhdl/divider32.vhdl  | 218 ++++++++++++++++++++++++++++++++++-----
 src/vhdl/gs4510.vhdl     |  83 +++++++++++----
 src/vhdl/multiply32.vhdl |  15 ++-
 src/vhdl/shifter32.vhdl  |  15 ++-
 4 files changed, 280 insertions(+), 51 deletions(-)

diff --git a/src/vhdl/divider32.vhdl b/src/vhdl/divider32.vhdl
index cf60e2a62..da6a105a8 100644
--- a/src/vhdl/divider32.vhdl
+++ b/src/vhdl/divider32.vhdl
@@ -34,6 +34,7 @@ entity divider32 is
   port (
     clock : in std_logic;
     do_add : in std_logic;
+    invert_b : in std_logic;
     input_a : in integer range 0 to 15;
     input_b : in integer range 0 to 15;
     input_value_number : in integer range 0 to 15;
@@ -45,18 +46,178 @@ end entity;
 
 architecture neo_gregorian of divider32 is
 
-  signal a : signed(31 downto 0) := to_signed(0,32);
-  signal b : signed(31 downto 0) := to_signed(0,32);
-  signal p : signed(63 downto 0) := to_signed(0,64);
+  signal a : unsigned(31 downto 0) := to_unsigned(0,32);
+  signal b : unsigned(31 downto 0) := to_unsigned(0,32);
+  signal p : unsigned(63 downto 0) := to_unsigned(0,64);
+  signal q : unsigned(63 downto 0) := to_unsigned(0,64);
   signal s : unsigned(32 downto 0) := to_unsigned(0,33);
-  
-  signal p1 : signed(63 downto 0);
-  signal p2 : signed(63 downto 0);
-  signal p3 : signed(63 downto 0);
-  signal p4 : signed(63 downto 0);
 
+  signal busy : std_logic := '0';
+  signal start_over : std_logic := '0';
+
+  type state_t is (idle, start_1, start_2, start_3, step_1, step_2, output);
+  signal state : state_t := idle;
+  signal steps_remaining : integer range 0 to 5 := 0;
+
+  signal mult_a : unsigned(67 downto 0) := (others => '0');
+  signal mult_b : unsigned(69 downto 0) := (others => '0');
+  signal mult_out : unsigned(137 downto 0) := (others => '0');
+
+  signal dd : unsigned(67 downto 0) := to_unsigned(0,68);
+  signal nn : unsigned(67 downto 0) := to_unsigned(0,68);
+
+  pure function count_leading_zeros(arg : unsigned(31 downto 0)) return natural is
+  begin
+    for i in 0 to 31 loop
+      if arg(31-i) = '1' then
+        return i;
+      end if;
+    end loop;
+    return 0;
+  end function count_leading_zeros;
 begin
 
+  -- instance "fast_divide_1"
+  -- fast_divide_1: entity work.fast_divide
+  --   port map (
+  --     clock      => clock,
+  --     n          => a,
+  --     d          => b,
+  --     q          => q,
+  --     start_over => start_over,
+  --     busy       => busy);
+
+  process (clock) is
+    variable temp64 : unsigned(73 downto 0) := to_unsigned(0,74);
+    variable temp96 : unsigned(105 downto 0) := to_unsigned(0,106);
+    -- variable temp138 : unsigned(137 downto 0) := to_unsigned(0,138);
+    variable f : unsigned(69 downto 0) := to_unsigned(0,70);
+    variable leading_zeros : natural range 0 to 31;
+    variable new_dd : unsigned( 35 downto 0);
+    variable new_nn : unsigned( 67 downto 0);
+    variable padded_d : unsigned(63 downto 0);
+  begin
+    if rising_edge(clock) then
+      report "state is " & state_t'image(state);
+      -- only for vunit test
+      -- report "q$" & to_hstring(q) & " = n$" & to_hstring(n) & " / d$" & to_hstring(d);
+      mult_out <= mult_a * mult_b;
+      case state is
+        when idle =>
+          null;
+        -- special startup case to allow for multiplier outputs to settle
+        when start_1 =>
+          -- f = 2 - dd
+          f := to_unsigned(0,70);
+          f(69) := '1';
+          f := f - dd;
+          -- Now multiply both nn and dd by f
+          -- temp138 := nn * f;
+          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+          mult_a <= nn;
+          mult_b <= f;
+          state <= start_2;
+        when start_2 =>
+          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+          mult_a <= dd; 
+          mult_b <= f;
+          state <= start_3;
+        when start_3 =>
+          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+          mult_a <= nn;
+          mult_b <= f;
+          state <= step_2;
+        when step_1 =>
+          report "nn=$" & to_hstring(nn(67 downto 36)) & "." & to_hstring(nn(35 downto 4)) & "." & to_hstring(nn(3 downto 0))
+            & " / dd=$" & to_hstring(dd(67 downto 36)) & "." & to_hstring(dd(35 downto 4)) & "." & to_hstring(dd(3 downto 0));
+          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+        -- f = 2 - dd
+          -- f := to_unsigned(0,70);
+          -- f(69) := '1';
+          -- f := f - dd;
+          report "f = $" & to_hstring(f);
+
+          -- Check whether to round up
+          if mult_out(67) = '1' then
+             nn <= mult_out(135 downto 68) + 1;
+             mult_a <= mult_out(135 downto 68) + 1;
+          else
+             nn <= mult_out(135 downto 68);
+             mult_a <= mult_out(135 downto 68);
+          end if;
+          -- Now multiply both nn and dd by f
+          -- temp138 := nn * f;
+          mult_b <= f;
+          state <= step_2;
+          -- report "temp138=$" & to_hstring(temp138);
+        when step_2 =>
+          report "nn=$" & to_hstring(nn(67 downto 36)) & "." & to_hstring(nn(35 downto 4)) & "." & to_hstring(nn(3 downto 0))
+            & " / dd=$" & to_hstring(dd(67 downto 36)) & "." & to_hstring(dd(35 downto 4)) & "." & to_hstring(dd(3 downto 0));
+          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+          -- temp138 := dd * f;
+          -- Check whether to round up, but avoid overflow
+          f := to_unsigned(0,70);
+          f(69) := '1';
+          -- f := f - dd;
+          if mult_out(67) = '1' and mult_out(135 downto 68) /= X"FFFFFFFFFFFFFFFFF" then
+             dd <= mult_out(135 downto 68) + 1;
+             mult_a <= mult_out(135 downto 68) + 1;
+             f := f - (mult_out(135 downto 68) + 1);
+          else
+             dd <= mult_out(135 downto 68);
+             mult_a <= mult_out(135 downto 68);
+             f := f - mult_out(135 downto 68);
+          end if;
+          -- report "temp138=$" & to_hstring(temp138);          
+          mult_b <= f;
+          -- Perform number of required steps, or abort early if we can
+          if steps_remaining /= 0 and dd /= x"FFFFFFFFFFFFFFFFF" then
+            steps_remaining <= steps_remaining - 1;
+            state <= step_1;
+          else
+            state <= output;
+          end if;
+        when output =>
+          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+          -- No idea why we need to add one, but we do to stop things like 4/2
+          -- giving a result of 1.999999999
+          if mult_out(67) = '1' then
+            temp64(67 downto 0) := mult_out(135 downto 68) + 1;
+          else
+            temp64(67 downto 0) := mult_out(135 downto 68);
+          end if;
+          -- temp64(67 downto 0) := nn;
+          temp64(73 downto 68) := (others => '0');
+          temp64 := temp64 + 8;
+          report "temp64=$" & to_hstring(temp64);
+          busy <= '0';
+          q <= temp64(67 downto 4);
+          state <= idle;
+      end case;
+
+      if start_over='1' and b /= to_unsigned(0,32) then
+        report "Calculating $" & to_hstring(a) & " / $" & to_hstring(b);
+        leading_zeros := count_leading_zeros(b);
+        padded_d := b & X"00000000";
+        new_dd := (others => '0');
+        new_dd(35 downto 4) := padded_d(63-leading_zeros downto 32-leading_zeros);
+        new_nn := (others => '0');
+        new_nn(35+leading_zeros downto 4+leading_zeros) := a;
+        report "Normalised to $" & to_hstring(new_nn(67 downto 36)) & "." &
+          to_hstring(new_nn(35 downto 4)) & "." & to_hstring(new_nn(3 downto 0))
+          & " / $" & to_hstring(new_dd(35 downto 4)) & "." & to_hstring(new_dd(3 downto 0));
+        dd <= new_dd & X"00000000";
+        nn <= new_nn;
+        state <= start_1;
+        steps_remaining <= 5;
+        busy <= '1';
+      elsif start_over='1' then
+        report "Ignoring divide by zero";
+      end if;
+
+    end if;
+  end process;
+  
   process(clock) is
   begin
     if rising_edge(clock) then
@@ -70,27 +231,38 @@ begin
       if input_value_number = input_a then
 --        report "MATH: Unit #" & integer'image(unit)
 --          & ": Setting a=$" & to_hstring(input_value);
-        a <= signed(input_value);
+        a <= input_value;
+        if a /= input_value then
+          start_over <= '1';
+        end if;
       end if;
       if input_value_number = input_b then
 --        report "MATH: Unit #" & integer'image(unit)
  --         & ": Setting b=$" & to_hstring(input_value);
-        b <= signed(input_value);
+        if invert_b = '1' then
+          b <= unsigned(-signed(input_value));
+          if b /= unsigned(-signed(input_value)) then
+            start_over <= '1';
+          end if;
+        else
+          b <= input_value;
+          if b /= input_value then
+            start_over <= '1';
+          end if;
+        end if;
       end if;
 
-      -- Calculate the result
-      p1 <= a*b;
-      p2 <= p1;
-      p3 <= p2;
-      p4 <= p3;
-      p <= p4;
-      -- Even units do addition, odd ones do subtraction
-      if (unit mod 2) = 0 then
-        s <= unsigned((a(31) & a)+(b(31) & b));
-      else
-        s <= unsigned((a(31) & a)-(b(31) & b));
+      if start_over = '1' then
+        start_over <= '0';
       end if;
 
+      -- Even units do addition, odd ones do subtraction
+      -- if (unit mod 2) = 0 then
+        s <= unsigned((a(31) & a) + (b(31) & b));
+      -- else
+      --   s <= unsigned((a(31) & a)-(b(31) & b));
+      -- end if;
+
       -- Display output value when requested, and tri-state outputs otherwise
       -- if output_select = unit then
         if do_add='1' then
@@ -100,9 +272,9 @@ begin
           report "MATH: Unit #" & integer'image(unit)
             & " outputting addition sum $" & to_hstring(s);
         else
-          output_value <= unsigned(p);
+          output_value <= q;
           report "MATH: Unit #" & integer'image(unit)
-            & " outputting multiplication product $" & to_hstring(unsigned(p));
+            & " outputting division quotient $" & to_hstring(q);
         end if;
       -- else
       --   output_value <= (others => 'Z');
diff --git a/src/vhdl/gs4510.vhdl b/src/vhdl/gs4510.vhdl
index e22aa3c4c..845954b4a 100755
--- a/src/vhdl/gs4510.vhdl
+++ b/src/vhdl/gs4510.vhdl
@@ -1463,11 +1463,16 @@ architecture Behavioural of gs4510 is
   type math_reg_array is array(0 to 15) of unsigned(31 downto 0);
   type math_config_array is array(0 to math_unit_count - 1) of math_unit_config;
   type math_output_array is array(0 to math_unit_count - 1) of unsigned(63 downto 0);
+  type math_latch_array is array(0 to math_unit_count - 1) of unsigned(3 downto 0);
   signal reg_math_regs : math_reg_array := (others => to_unsigned(0,32));
   signal reg_math_config : math_config_array := (others => math_unit_config_v);
   signal reg_math_config_drive : math_config_array := (others => math_unit_config_v);
   signal reg_math_latch_counter : unsigned(7 downto 0) := x"00";
   signal reg_math_latch_interval : unsigned(7 downto 0) := x"00";
+  -- signal math_latch_value : unsigned(3 downto 0);  -- Latch value to write
+  -- signal math_latch_address : unsigned(3 downto 0);  -- Which unit to write latch value to
+  signal reg_math_latch_counters : math_latch_array := (others => (others => '0'));
+  signal reg_math_latch_intervals : math_latch_array := (others => (others => '0'));
 
   -- We have the output counter out of phase with the input counter, so that we
   -- have time to catch an output, and store it, ready for presenting as an input
@@ -1482,11 +1487,13 @@ architecture Behavioural of gs4510 is
   signal math_input_number : integer range 0 to 15 := 0;
   signal math_input_value : unsigned(31 downto 0) := (others => '0');
   signal math_output_values : math_output_array := (others => (others => '0'));
+  -- signal math_output_values_alt : math_alt_output_array := (others => (others => '0'));
   -- signal math_output_value_low : unsigned(31 downto 0) := (others => 'Z');
   -- signal math_output_value_high : unsigned(31 downto 0) := (others => 'Z');
 
   -- Start with input and outputting enabled
   signal math_unit_flags : unsigned(7 downto 0) := x"01";
+  signal math_unit_invert_b : std_logic_vector(15 downto 0) := (others => '0');
   -- halt math unit when math_unit_halted /= last_math_unit_halted
   signal math_unit_halted : std_logic := '0';
   signal last_math_unit_halted : std_logic := '0';
@@ -1613,12 +1620,13 @@ begin
       );
 
 
-  multipliers: for unit in 0 to 7 generate
+  multipliers_a: for unit in 0 to 7 generate
     mult_unit : entity work.multiply32 generic map (
       unit => unit
       ) port map (
       clock => mathclock,
       do_add => reg_math_config_drive(unit).do_add,
+      invert_b => math_unit_invert_b(unit),
       input_a => reg_math_config_drive(unit).source_a,
       input_b => reg_math_config_drive(unit).source_b,
       input_value_number => math_input_number,
@@ -1629,6 +1637,21 @@ begin
       -- output_value(63 downto 32) => math_output_value_high
       );
   end generate;
+  
+  -- multipliers_b: for unit in 8 to 15 generate
+  --   mult_unit_alt: entity work.multiply32 generic map (
+  --     unit => unit
+  --     ) port map (
+  --     clock => mathclock,
+  --     do_add => reg_math_config_drive(unit).do_add,
+  --     invert_b => math_unit_invert_b(unit),
+  --     input_a => reg_math_config_drive(unit).source_a,
+  --     input_b => reg_math_config_drive(unit).source_b,
+  --     input_value_number => math_input_number,
+  --     input_value => math_input_value,
+  --     output_value => math_output_values_alt(unit mod 8)
+  --     );
+  -- end generate;       
 
   shifters: for unit in 8 to 11 generate
     shift_unit : entity work.shifter32 generic map (
@@ -1636,6 +1659,7 @@ begin
       ) port map (
       clock => mathclock,
       do_add => reg_math_config_drive(unit).do_add,
+      invert_b => math_unit_invert_b(unit),
       input_a => reg_math_config_drive(unit).source_a,
       input_b => reg_math_config_drive(unit).source_b,
       input_value_number => math_input_number,
@@ -1653,6 +1677,7 @@ begin
       ) port map (
       clock => mathclock,
       do_add => reg_math_config_drive(unit).do_add,
+      invert_b => math_unit_invert_b(unit),
       input_a => reg_math_config_drive(unit).source_a,
       input_b => reg_math_config_drive(unit).source_b,
       input_value_number => math_input_number,
@@ -1759,23 +1784,43 @@ begin
               report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output)
                 & ") from output of math unit #" & integer'image(math_output_counter)
                 & " ( = $" & to_hstring(math_output_values(math_output_counter)(31 downto 0)) & ")";
-              reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0);
+              -- if math_output_counter >= 8 and math_unit_flags(2) = '1' then
+              --   reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values_alt(math_output_counter mod 8)(31 downto 0);
+              -- else
+                reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0);
+              -- end if;
             end if;
           else
             if reg_math_config_drive(math_output_counter).output_low = '0' then
               -- Only high half of output is being kept, so stash it
               report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output)
                 & ") from output of math unit #" & integer'image(math_output_counter);
-              reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(63 downto 32);
+              -- if math_output_counter >= 8 and math_unit_flags(2) = '1' then
+              --   reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values_alt(math_output_counter mod 8)(63 downto 32);
+              -- else
+                reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(63 downto 32);
+              -- end if;
             else
               -- Both are being stashed, so store in consecutive slots
               report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output)
                 & ") (and next) from output of math unit #" & integer'image(math_output_counter);
-              reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0);
+              -- if math_output_counter >= 8 and math_unit_flags(2) = '1' then
+              --   reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values_alt(math_output_counter mod 8)(31 downto 0);
+              -- else
+                reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0);
+              -- end if;
               if reg_math_config_drive(math_output_counter).output /= 15 then
-                reg_math_regs(reg_math_config_drive(math_output_counter).output + 1) <= math_output_values(math_output_counter)(63 downto 32);
+                -- if math_output_counter >= 8 and math_unit_flags(2) = '1' then
+                --   reg_math_regs(reg_math_config(math_output_counter).output + 1) <= math_output_values_alt(math_output_counter mod 8)(63 downto 32);
+                -- else
+                  reg_math_regs(reg_math_config(math_output_counter).output + 1) <= math_output_values(math_output_counter)(63 downto 32);
+                -- end if;
               else
-                reg_math_regs(0) <= math_output_values(math_output_counter)(63 downto 32);
+                -- if math_output_counter >= 8 and math_unit_flags(2) = '1' then
+                --   reg_math_regs(0) <= math_output_values_alt(math_output_counter mod 8)(63 downto 32);
+                -- else
+                  reg_math_regs(0) <= math_output_values(math_output_counter)(63 downto 32);
+                -- end if;
               end if;
             end if;
           end if;
@@ -3044,6 +3089,8 @@ begin
               end if;
             -- @IO:GS $D7E2 MATH:RESERVED Reserved
             -- @IO:GS $D7E3 MATH:RESERVED Reserved
+            when x"E2" => return unsigned(math_unit_invert_b(7 downto 0));
+            when x"E3" => return unsigned(math_unit_invert_b(15 downto 8));
             --@IO:GS $D7E4 MATH:ITERCNT Iteration Counter (32 bit)
             --@IO:GS $D7E5 MATH:ITERCNT Iteration Counter (32 bit)
             --@IO:GS $D7E6 MATH:ITERCNT Iteration Counter (32 bit)
@@ -3636,6 +3683,10 @@ begin
           -- reg_math_cycle_counter <= to_unsigned(0,32); -- TODO: Should generate a reg_math_cycle_counter_reset signal
           reg_math_cycle_counter_reset_toggle <= not reg_math_cycle_counter_reset_toggle;
           last_math_unit_halted <= math_unit_halted;
+        elsif long_address(7 downto 0) = x"E2" then
+          math_unit_invert_b(7 downto 0) <= std_logic_vector(value);
+        elsif long_address(7 downto 0) = x"E3" then
+          math_unit_invert_b(15 downto 8) <= std_logic_vector(value);
         elsif long_address(7 downto 0) = x"E8" then
           reg_math_cycle_compare(7 downto 0) <= value;
         elsif long_address(7 downto 0) = x"E9" then
@@ -4303,31 +4354,27 @@ begin
         -- on the equality of math registers 14 and 15
         if reg_math_regs(14) = reg_math_regs(15) then
           math_unit_flags(6) <= '1';
-          if math_unit_flags(3 downto 2) = "00" then
-            math_unit_flags(7) <= '1' ;
-          end if;
         else
           math_unit_flags(6) <= '0';
-          if math_unit_flags(3 downto 2) = "11" then
-            math_unit_flags(7) <= '1' ;
-          end if;
         end if;
         if reg_math_regs(14) < reg_math_regs(15) then
           math_unit_flags(5) <= '1';
-          if math_unit_flags(3 downto 2) = "10" then
-            math_unit_flags(7) <= '1' ;
-          end if;
+          -- if math_unit_flags(3 downto 2) = "10" then
+          --   math_unit_flags(7) <= '1' ;
+          -- end if;
         else
           math_unit_flags(5) <= '0';
         end if;
         if reg_math_regs(14) > reg_math_regs(15) then
           math_unit_flags(4) <= '1';
-          if math_unit_flags(3 downto 2) = "01" then
-            math_unit_flags(7) <= '1' ;
-          end if;
+          -- if math_unit_flags(3 downto 2) = "01" then
+          --   math_unit_flags(7) <= '1' ;
+          -- end if;
         else
           math_unit_flags(4) <= '0';
         end if;
+        -- temp, maybe use $D7E1.7 as an interrupt indicate later?
+        math_unit_flags(7) <= '0';
       end if;
 
     end if;
diff --git a/src/vhdl/multiply32.vhdl b/src/vhdl/multiply32.vhdl
index 54012803b..519a102f1 100644
--- a/src/vhdl/multiply32.vhdl
+++ b/src/vhdl/multiply32.vhdl
@@ -34,6 +34,7 @@ entity multiply32 is
   port (
     clock : in std_logic;
     do_add : in std_logic;
+    invert_b : in std_logic;
     input_a : in integer range 0 to 15;
     input_b : in integer range 0 to 15;
     input_value_number : in integer range 0 to 15;
@@ -75,7 +76,11 @@ begin
       if input_value_number = input_b then
 --        report "MATH: Unit #" & integer'image(unit)
 --          & ": Setting b=$" & to_hstring(input_value);
-        b <= signed(input_value);
+        if invert_b = '1' then
+          b <= -signed(input_value);
+        else
+          b <= signed(input_value);
+        end if;
       end if;
 
       -- Calculate the result
@@ -85,11 +90,11 @@ begin
       p4 <= p3;
       p <= p4;
       -- Even units do addition, odd ones do subtraction
-      if (unit mod 2) = 0 then
+      -- if (unit mod 2) = 0 then
         s <= unsigned((a(31) & a)+(b(31) & b));
-      else
-        s <= unsigned((a(31) & a)-(b(31) & b));
-      end if;
+      -- else
+      --   s <= unsigned((a(31) & a)-(b(31) & b));
+      -- end if;
 
       -- Display output value when requested, and tri-state outputs otherwise
       -- if output_select = unit then
diff --git a/src/vhdl/shifter32.vhdl b/src/vhdl/shifter32.vhdl
index e15a79dd4..26c0f53ca 100644
--- a/src/vhdl/shifter32.vhdl
+++ b/src/vhdl/shifter32.vhdl
@@ -34,6 +34,7 @@ entity shifter32 is
   port (
     clock : in std_logic;
     do_add : in std_logic;
+    invert_b : in std_logic;
     input_a : in integer range 0 to 15;
     input_b : in integer range 0 to 15;
     input_value_number : in integer range 0 to 15;
@@ -61,16 +62,20 @@ begin
         a <= input_value;
       end if;
       if input_value_number = input_b then
-        b <= input_value;
+        if invert_b = '1' then
+          b <= unsigned(-signed(input_value));
+        else
+          b <= input_value;
+        end if;
       end if;
 
       -- Calculate the result
       -- Even units do addition, odd ones do subtraction
-      if (unit mod 2) = 0 then
+      -- if (unit mod 2) = 0 then
         s <= unsigned((a(31) & a)+(b(31) & b));
-      else
-        s <= unsigned((a(31) & a)-(b(31) & b));
-      end if;
+      -- else
+      --   s <= unsigned((a(31) & a)-(b(31) & b));
+      -- end if;
 
       if b(7 downto 0) = x"00" then
         p(63 downto 32) <= (others => '0');

From d53dfe8ad7ebdbbd0ebfb7995100267e565732e1 Mon Sep 17 00:00:00 2001
From: Unkn0wn <teamlightning2018@outlook.com>
Date: Thu, 2 Oct 2025 13:57:44 -0500
Subject: [PATCH 6/9] implemented new latching system

---
 src/vhdl/gs4510.vhdl | 137 ++++++++++++++++++++++++++++++-------------
 1 file changed, 96 insertions(+), 41 deletions(-)

diff --git a/src/vhdl/gs4510.vhdl b/src/vhdl/gs4510.vhdl
index 845954b4a..29de1e873 100755
--- a/src/vhdl/gs4510.vhdl
+++ b/src/vhdl/gs4510.vhdl
@@ -1463,16 +1463,25 @@ architecture Behavioural of gs4510 is
   type math_reg_array is array(0 to 15) of unsigned(31 downto 0);
   type math_config_array is array(0 to math_unit_count - 1) of math_unit_config;
   type math_output_array is array(0 to math_unit_count - 1) of unsigned(63 downto 0);
-  type math_latch_array is array(0 to math_unit_count - 1) of unsigned(3 downto 0);
+  type math_latch_array is array(0 to math_unit_count - 1) of integer range 0 to 15;
   signal reg_math_regs : math_reg_array := (others => to_unsigned(0,32));
   signal reg_math_config : math_config_array := (others => math_unit_config_v);
   signal reg_math_config_drive : math_config_array := (others => math_unit_config_v);
-  signal reg_math_latch_counter : unsigned(7 downto 0) := x"00";
-  signal reg_math_latch_interval : unsigned(7 downto 0) := x"00";
-  -- signal math_latch_value : unsigned(3 downto 0);  -- Latch value to write
-  -- signal math_latch_address : unsigned(3 downto 0);  -- Which unit to write latch value to
-  signal reg_math_latch_counters : math_latch_array := (others => (others => '0'));
-  signal reg_math_latch_intervals : math_latch_array := (others => (others => '0'));
+  -- signal reg_math_latch_counter : unsigned(7 downto 0) := x"00";
+  -- signal reg_math_latch_interval : unsigned(7 downto 0) := x"00";
+  signal math_latch_value : integer range 0 to 15;  -- Latch value to write
+  signal math_latch_address : integer range 0 to 15;  -- Which unit to write latch value to
+  signal math_latch_write_toggle : std_logic := '0';
+  signal last_math_latch_write_toggle : std_logic := '0';
+  signal math_latch_reset_toggle : std_logic := '0';
+  signal last_math_latch_reset_toggle : std_logic := '0';
+  signal reg_math_latch_counters : math_latch_array := (others => 0);
+  signal reg_math_latch_fired : std_logic_vector(15 downto 0) := (others => '0');
+  signal reg_math_latch_intervals : math_latch_array := (others => 0);
+  -- Unit 15 needs to write to the last cycle status instead of current cycle, since
+  -- the register is copied over at the same time.
+  signal math_was_latched_current_cycle : std_logic_vector(14 downto 0) := (others => '0');
+  signal math_was_latched_last_cycle : std_logic_vector(15 downto 0) := (others => '0');
 
   -- We have the output counter out of phase with the input counter, so that we
   -- have time to catch an output, and store it, ready for presenting as an input
@@ -1713,6 +1722,7 @@ begin
     );
 
   process (mathclock)
+    variable math_current_unit_has_latched : std_logic := '0';
   begin
     if rising_edge(mathclock) and math_unit_enable then
       -- For the plumbed math units, we want to avoid having two huge 16x32x32
@@ -1773,8 +1783,58 @@ begin
       end if;
 
       -- Make sure output counter is running before starting to stash outputs, to avoid constantly writing a register
+      -- Math config latch bit indicates whether to treat its latch interval as a counter (unset) or as a unit index (set).
       if math_unit_flags(1) = '1' and math_unit_flags(0) = '0' and math_unit_halted = last_math_unit_halted then
-        if (reg_math_config_drive(math_output_counter).latched='0') or (reg_math_latch_counter = x"00") then
+        math_current_unit_has_latched := '0';
+        if reg_math_config_drive(math_output_counter).latched = '0' then
+          -- Latched bit unset, use latch interval and counter to determine when to latch.
+          if math_latch_reset_toggle = last_math_latch_reset_toggle then
+            -- Math latches are not resetting, proceed with checks.
+            if reg_math_latch_counters(math_output_counter) = 0 then
+              math_current_unit_has_latched := '1';
+              reg_math_latch_counters(math_output_counter) <= reg_math_latch_intervals(math_output_counter);
+            elsif reg_math_latch_counters(math_output_counter) = 8 then
+              reg_math_latch_fired(math_output_counter) <= '1';
+              if reg_math_latch_fired(math_output_counter) = '0' then
+                math_current_unit_has_latched := '1';
+              end if;
+            else
+              reg_math_latch_counters(math_output_counter) <= reg_math_latch_counters(math_output_counter) - 1;
+            end if;
+          else
+            -- Math latches are resetting, so only latch if the interval to latch on is zero cycles.
+            if reg_math_latch_intervals(math_output_counter) = 0 then
+              math_current_unit_has_latched := '1';
+              reg_math_latch_fired(math_output_counter) <= '0';
+              reg_math_latch_counters(math_output_counter) <= reg_math_latch_intervals(math_output_counter);
+            elsif reg_math_latch_intervals(math_output_counter) = 8 then
+              math_current_unit_has_latched := '1';
+              reg_math_latch_fired(math_output_counter) <= '1';
+              reg_math_latch_counters(math_output_counter) <= reg_math_latch_intervals(math_output_counter);
+            else
+              reg_math_latch_fired(math_output_counter) <= '0';
+              reg_math_latch_counters(math_output_counter) <= reg_math_latch_intervals(math_output_counter) - 1;
+            end if;
+          end if;
+        else
+          -- Latched bit set, use a math unit's previous latch state to determine when to latch.
+          -- When resetting, assume no units were latched last cycle.
+          if math_latch_reset_toggle = last_math_latch_reset_toggle then
+            if math_was_latched_last_cycle(reg_math_latch_intervals(math_output_counter)) = '1' then
+              math_current_unit_has_latched := '1';
+            end if;
+          end if;
+        end if;
+
+        if math_output_counter = 15 then
+          -- Since this is the last unit, no intermediate is required.
+          math_was_latched_last_cycle(15) <= math_current_unit_has_latched;
+        else
+          math_was_latched_current_cycle(math_output_counter) <= math_current_unit_has_latched;
+        end if;
+
+        -- Process output if current unit has latched
+        if math_current_unit_has_latched = '1' then
           if reg_math_config_drive(math_output_counter).output_high = '0' then
             if reg_math_config_drive(math_output_counter).output_low = '0' then
               -- No output being kept, so nothing to do.
@@ -1784,43 +1844,23 @@ begin
               report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output)
                 & ") from output of math unit #" & integer'image(math_output_counter)
                 & " ( = $" & to_hstring(math_output_values(math_output_counter)(31 downto 0)) & ")";
-              -- if math_output_counter >= 8 and math_unit_flags(2) = '1' then
-              --   reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values_alt(math_output_counter mod 8)(31 downto 0);
-              -- else
-                reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0);
-              -- end if;
+              reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0);
             end if;
           else
             if reg_math_config_drive(math_output_counter).output_low = '0' then
               -- Only high half of output is being kept, so stash it
               report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output)
                 & ") from output of math unit #" & integer'image(math_output_counter);
-              -- if math_output_counter >= 8 and math_unit_flags(2) = '1' then
-              --   reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values_alt(math_output_counter mod 8)(63 downto 32);
-              -- else
-                reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(63 downto 32);
-              -- end if;
+              reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(63 downto 32);
             else
               -- Both are being stashed, so store in consecutive slots
               report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output)
                 & ") (and next) from output of math unit #" & integer'image(math_output_counter);
-              -- if math_output_counter >= 8 and math_unit_flags(2) = '1' then
-              --   reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values_alt(math_output_counter mod 8)(31 downto 0);
-              -- else
-                reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0);
-              -- end if;
+              reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0);
               if reg_math_config_drive(math_output_counter).output /= 15 then
-                -- if math_output_counter >= 8 and math_unit_flags(2) = '1' then
-                --   reg_math_regs(reg_math_config(math_output_counter).output + 1) <= math_output_values_alt(math_output_counter mod 8)(63 downto 32);
-                -- else
-                  reg_math_regs(reg_math_config(math_output_counter).output + 1) <= math_output_values(math_output_counter)(63 downto 32);
-                -- end if;
+                reg_math_regs(reg_math_config(math_output_counter).output + 1) <= math_output_values(math_output_counter)(63 downto 32);
               else
-                -- if math_output_counter >= 8 and math_unit_flags(2) = '1' then
-                --   reg_math_regs(0) <= math_output_values_alt(math_output_counter mod 8)(63 downto 32);
-                -- else
-                  reg_math_regs(0) <= math_output_values(math_output_counter)(63 downto 32);
-                -- end if;
+                reg_math_regs(0) <= math_output_values(math_output_counter)(63 downto 32);
               end if;
             end if;
           end if;
@@ -1834,6 +1874,10 @@ begin
         last_reg_math_write_toggle <= reg_math_write_toggle;
       end if;
 
+      if math_latch_write_toggle /= last_math_latch_write_toggle then
+        last_math_latch_write_toggle <= math_latch_write_toggle;
+      end if;
+
       if reg_math_cycle_counter_reset_toggle /= last_reg_math_cycle_counter_reset_toggle then
         last_reg_math_cycle_counter_reset_toggle <= reg_math_cycle_counter_reset_toggle;
       end if;
@@ -1849,6 +1893,10 @@ begin
             when others =>
           end case;
         end if;
+        if math_latch_write_toggle /= last_math_latch_write_toggle then
+          reg_math_latch_intervals(math_latch_address) <= math_latch_value;
+          reg_math_latch_counters(math_latch_address) <= math_latch_value;
+        end if;
       end if;
 
       -- Latch counter counts "math cycles", which is the time it takes for an
@@ -1856,13 +1904,17 @@ begin
       -- and output propagation.
       -- TODO: implement reg_math_cycle_counter_reset signal, see D7E1
       if math_output_counter = (15 + math_output_counter_init) mod 16 then
-        -- Decrement latch counter
-        if reg_math_latch_counter = x"00" then
-          reg_math_latch_counter <= reg_math_latch_interval;
-        else
-          reg_math_latch_counter <= reg_math_latch_counter - 1;
+        -- If a bit is set in math_was_latched_current_cycle, then that unit reset its latch counter and wrote an output.
+        -- For sequential latching, a unit needs to know the latch status from the previous cycle,
+        -- so the FPU needs to store which units latched on this cycle.
+        -- Unit 15 is an exception, since when this code runs, it still hasn't finished processing.
+        -- In order to avoid weird glitchiness with 15, it will write to the last cycle reg directly.
+        math_was_latched_last_cycle(14 downto 0) <= math_was_latched_current_cycle;
+        -- All units have been cycled through, so no more resetting to do.
+        if math_latch_reset_toggle /= last_math_latch_reset_toggle then
+          last_math_latch_reset_toggle <= math_latch_reset_toggle;
         end if;
-        -- And update math cycle counter, if math unit is active
+        -- Update math cycle counter, if math unit is active
         -- include a case for the reset, to avoid a possible edge case resulting in a double-drive
         if math_unit_flags(1) = '1' and reg_math_cycle_counter_reset_toggle = last_reg_math_cycle_counter_reset_toggle and math_unit_halted = last_math_unit_halted then
           if reg_math_cycle_counter_plus_one = reg_math_cycle_compare then
@@ -3080,7 +3132,7 @@ begin
                 &to_unsigned(reg_math_config(to_integer(the_read_address(3 downto 0))).output,4);
               -- @IO:GS $D7E0 MATH:LATCHINT Latch interval for latched outputs (in CPU cycles)
               -- $D7E1 is documented higher up
-            when x"E0" => return reg_math_latch_interval;
+            when x"E0" => return to_unsigned(math_latch_address,4) & to_unsigned(reg_math_latch_intervals(math_latch_address),4);
             when x"E1" =>
               if math_unit_halted = last_math_unit_halted then
                 return math_unit_flags;
@@ -3674,7 +3726,9 @@ begin
           reg_math_config(to_integer(long_address(3 downto 0))).output <= to_integer(value(3 downto 0));
         elsif long_address(7 downto 0) = x"E0" then
           -- @IO:GS $D7E0 - Math unit latch interval (only update output of math function units every this many cycles, if they have the latch output flag set)
-          reg_math_latch_interval <= value;
+          math_latch_address <= to_integer(value(7 downto 4));
+          math_latch_value <= to_integer(value(3 downto 0));
+          math_latch_write_toggle <= not math_latch_write_toggle;
         elsif long_address(7 downto 0) = x"E1" then
           -- @IO:GS $D7E1 - Math unit general settings (writing also clears math cycle counter)
           -- @IO:GS $D7E1.0 MATH:WREN Enable setting of math registers (must normally be set)
@@ -3682,6 +3736,7 @@ begin
           math_unit_flags(3 downto 0) <= value(3 downto 0);
           -- reg_math_cycle_counter <= to_unsigned(0,32); -- TODO: Should generate a reg_math_cycle_counter_reset signal
           reg_math_cycle_counter_reset_toggle <= not reg_math_cycle_counter_reset_toggle;
+          math_latch_reset_toggle <= not last_math_latch_reset_toggle;
           last_math_unit_halted <= math_unit_halted;
         elsif long_address(7 downto 0) = x"E2" then
           math_unit_invert_b(7 downto 0) <= std_logic_vector(value);

From 5489d2f959a986a55ea871ef364b83ddd7863789 Mon Sep 17 00:00:00 2001
From: Unkn0wn <teamlightning2018@outlook.com>
Date: Fri, 3 Oct 2025 16:14:50 -0500
Subject: [PATCH 7/9] implemented divider multiply mode, fixed dividers not
 restarting correctly sometimes. this commit occasionally has multi-driven net
 errors with math_unit_flags, for whatever reason.

---
 src/vhdl/divider32.vhdl | 210 ++++++++++++++++++++++------------------
 src/vhdl/gs4510.vhdl    |   8 +-
 2 files changed, 117 insertions(+), 101 deletions(-)

diff --git a/src/vhdl/divider32.vhdl b/src/vhdl/divider32.vhdl
index da6a105a8..d4c8fbd52 100644
--- a/src/vhdl/divider32.vhdl
+++ b/src/vhdl/divider32.vhdl
@@ -35,6 +35,7 @@ entity divider32 is
     clock : in std_logic;
     do_add : in std_logic;
     invert_b : in std_logic;
+    do_mult : in std_logic;
     input_a : in integer range 0 to 15;
     input_b : in integer range 0 to 15;
     input_value_number : in integer range 0 to 15;
@@ -102,98 +103,102 @@ begin
       -- only for vunit test
       -- report "q$" & to_hstring(q) & " = n$" & to_hstring(n) & " / d$" & to_hstring(d);
       mult_out <= mult_a * mult_b;
-      case state is
-        when idle =>
-          null;
-        -- special startup case to allow for multiplier outputs to settle
-        when start_1 =>
-          -- f = 2 - dd
-          f := to_unsigned(0,70);
-          f(69) := '1';
-          f := f - dd;
-          -- Now multiply both nn and dd by f
-          -- temp138 := nn * f;
-          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
-          mult_a <= nn;
-          mult_b <= f;
-          state <= start_2;
-        when start_2 =>
-          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
-          mult_a <= dd; 
-          mult_b <= f;
-          state <= start_3;
-        when start_3 =>
-          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
-          mult_a <= nn;
-          mult_b <= f;
-          state <= step_2;
-        when step_1 =>
-          report "nn=$" & to_hstring(nn(67 downto 36)) & "." & to_hstring(nn(35 downto 4)) & "." & to_hstring(nn(3 downto 0))
-            & " / dd=$" & to_hstring(dd(67 downto 36)) & "." & to_hstring(dd(35 downto 4)) & "." & to_hstring(dd(3 downto 0));
-          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
-        -- f = 2 - dd
-          -- f := to_unsigned(0,70);
-          -- f(69) := '1';
-          -- f := f - dd;
-          report "f = $" & to_hstring(f);
+      if start_over = '0' then
+        case state is
+          when idle =>
+            null;
+            -- special startup case to allow for multiplier outputs to settle
+          when start_1 =>
+            -- f = 2 - dd
+            f := to_unsigned(0,70);
+            f(69) := '1';
+            f := f - dd;
+            -- Now multiply both nn and dd by f
+            -- temp138 := nn * f;
+            report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+            mult_a <= nn;
+            mult_b <= f;
+            state <= start_2;
+          when start_2 =>
+            report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+            mult_a <= dd; 
+            mult_b <= f;
+            -- multiplier gets set to a * b when start_over is asserted, so store the product.
+            p <= mult_out(135 downto 72);
+            state <= start_3;
+          when start_3 =>
+            report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+            mult_a <= nn;
+            mult_b <= f;
+            state <= step_2;
+          when step_1 =>
+            report "nn=$" & to_hstring(nn(67 downto 36)) & "." & to_hstring(nn(35 downto 4)) & "." & to_hstring(nn(3 downto 0))
+              & " / dd=$" & to_hstring(dd(67 downto 36)) & "." & to_hstring(dd(35 downto 4)) & "." & to_hstring(dd(3 downto 0));
+            report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+            -- f = 2 - dd
+            -- f := to_unsigned(0,70);
+            -- f(69) := '1';
+            -- f := f - dd;
+            report "f = $" & to_hstring(f);
 
-          -- Check whether to round up
-          if mult_out(67) = '1' then
-             nn <= mult_out(135 downto 68) + 1;
-             mult_a <= mult_out(135 downto 68) + 1;
-          else
-             nn <= mult_out(135 downto 68);
-             mult_a <= mult_out(135 downto 68);
-          end if;
-          -- Now multiply both nn and dd by f
-          -- temp138 := nn * f;
-          mult_b <= f;
-          state <= step_2;
-          -- report "temp138=$" & to_hstring(temp138);
-        when step_2 =>
-          report "nn=$" & to_hstring(nn(67 downto 36)) & "." & to_hstring(nn(35 downto 4)) & "." & to_hstring(nn(3 downto 0))
-            & " / dd=$" & to_hstring(dd(67 downto 36)) & "." & to_hstring(dd(35 downto 4)) & "." & to_hstring(dd(3 downto 0));
-          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
-          -- temp138 := dd * f;
-          -- Check whether to round up, but avoid overflow
-          f := to_unsigned(0,70);
-          f(69) := '1';
-          -- f := f - dd;
-          if mult_out(67) = '1' and mult_out(135 downto 68) /= X"FFFFFFFFFFFFFFFFF" then
-             dd <= mult_out(135 downto 68) + 1;
-             mult_a <= mult_out(135 downto 68) + 1;
-             f := f - (mult_out(135 downto 68) + 1);
-          else
-             dd <= mult_out(135 downto 68);
-             mult_a <= mult_out(135 downto 68);
-             f := f - mult_out(135 downto 68);
-          end if;
-          -- report "temp138=$" & to_hstring(temp138);          
-          mult_b <= f;
-          -- Perform number of required steps, or abort early if we can
-          if steps_remaining /= 0 and dd /= x"FFFFFFFFFFFFFFFFF" then
-            steps_remaining <= steps_remaining - 1;
-            state <= step_1;
-          else
-            state <= output;
-          end if;
-        when output =>
-          report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
-          -- No idea why we need to add one, but we do to stop things like 4/2
-          -- giving a result of 1.999999999
-          if mult_out(67) = '1' then
-            temp64(67 downto 0) := mult_out(135 downto 68) + 1;
-          else
-            temp64(67 downto 0) := mult_out(135 downto 68);
-          end if;
-          -- temp64(67 downto 0) := nn;
-          temp64(73 downto 68) := (others => '0');
-          temp64 := temp64 + 8;
-          report "temp64=$" & to_hstring(temp64);
-          busy <= '0';
-          q <= temp64(67 downto 4);
-          state <= idle;
-      end case;
+            -- Check whether to round up
+            if mult_out(67) = '1' then
+              nn <= mult_out(135 downto 68) + 1;
+              mult_a <= mult_out(135 downto 68) + 1;
+            else
+              nn <= mult_out(135 downto 68);
+              mult_a <= mult_out(135 downto 68);
+            end if;
+            -- Now multiply both nn and dd by f
+            -- temp138 := nn * f;
+            mult_b <= f;
+            state <= step_2;
+            -- report "temp138=$" & to_hstring(temp138);
+          when step_2 =>
+            report "nn=$" & to_hstring(nn(67 downto 36)) & "." & to_hstring(nn(35 downto 4)) & "." & to_hstring(nn(3 downto 0))
+              & " / dd=$" & to_hstring(dd(67 downto 36)) & "." & to_hstring(dd(35 downto 4)) & "." & to_hstring(dd(3 downto 0));
+            report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+            -- temp138 := dd * f;
+            -- Check whether to round up, but avoid overflow
+            f := to_unsigned(0,70);
+            f(69) := '1';
+            -- f := f - dd;
+            if mult_out(67) = '1' and mult_out(135 downto 68) /= X"FFFFFFFFFFFFFFFFF" then
+              dd <= mult_out(135 downto 68) + 1;
+              mult_a <= mult_out(135 downto 68) + 1;
+              f := f - (mult_out(135 downto 68) + 1);
+            else
+              dd <= mult_out(135 downto 68);
+              mult_a <= mult_out(135 downto 68);
+              f := f - mult_out(135 downto 68);
+            end if;
+            -- report "temp138=$" & to_hstring(temp138);          
+            mult_b <= f;
+            -- Perform number of required steps, or abort early if we can
+            if steps_remaining /= 0 and dd /= x"FFFFFFFFFFFFFFFFF" then
+              steps_remaining <= steps_remaining - 1;
+              state <= step_1;
+            else
+              state <= output;
+            end if;
+          when output =>
+            report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
+            -- No idea why we need to add one, but we do to stop things like 4/2
+            -- giving a result of 1.999999999
+            if mult_out(67) = '1' then
+              temp64(67 downto 0) := mult_out(135 downto 68) + 1;
+            else
+              temp64(67 downto 0) := mult_out(135 downto 68);
+            end if;
+            -- temp64(67 downto 0) := nn;
+            temp64(73 downto 68) := (others => '0');
+            temp64 := temp64 + 8;
+            report "temp64=$" & to_hstring(temp64);
+            busy <= '0';
+            q <= temp64(67 downto 4);
+            state <= idle;
+        end case;
+      end if;
 
       if start_over='1' and b /= to_unsigned(0,32) then
         report "Calculating $" & to_hstring(a) & " / $" & to_hstring(b);
@@ -211,10 +216,21 @@ begin
         state <= start_1;
         steps_remaining <= 5;
         busy <= '1';
+        -- calculate multiplication
+        mult_a(35 downto 0) <= (others => '0');
+        mult_a(67 downto 36) <= a;
+        mult_b(35 downto 0) <= (others => '0');
+        mult_b(67 downto 36) <= b;
+        mult_b(69 downto 68) <= (others => '0');
       elsif start_over='1' then
+        -- define divide by zero as zero
         report "Ignoring divide by zero";
+        state <= idle;
+        busy <= '0';
+        q <= (others => '0');
+        -- zero product of a * b, since we know b = 0
+        p <= (others => '0');
       end if;
-
     end if;
   end process;
   
@@ -232,7 +248,7 @@ begin
 --        report "MATH: Unit #" & integer'image(unit)
 --          & ": Setting a=$" & to_hstring(input_value);
         a <= input_value;
-        if a /= input_value then
+        if a /= input_value or busy = '0' then
           start_over <= '1';
         end if;
       end if;
@@ -241,12 +257,12 @@ begin
  --         & ": Setting b=$" & to_hstring(input_value);
         if invert_b = '1' then
           b <= unsigned(-signed(input_value));
-          if b /= unsigned(-signed(input_value)) then
+          if b /= unsigned(-signed(input_value)) or busy = '0' then
             start_over <= '1';
           end if;
         else
           b <= input_value;
-          if b /= input_value then
+          if b /= input_value or busy = '0' then
             start_over <= '1';
           end if;
         end if;
@@ -271,6 +287,10 @@ begin
           output_value(32 downto 0) <= s;
           report "MATH: Unit #" & integer'image(unit)
             & " outputting addition sum $" & to_hstring(s);
+        elsif do_mult = '1' then
+          output_value <= p;
+          report "MATH: Unit #" & integer'image(unit)
+            & " outputting multiplication product $" & to_hstring(p);
         else
           output_value <= q;
           report "MATH: Unit #" & integer'image(unit)
diff --git a/src/vhdl/gs4510.vhdl b/src/vhdl/gs4510.vhdl
index 29de1e873..378a8084a 100755
--- a/src/vhdl/gs4510.vhdl
+++ b/src/vhdl/gs4510.vhdl
@@ -1686,6 +1686,7 @@ begin
       ) port map (
       clock => mathclock,
       do_add => reg_math_config_drive(unit).do_add,
+      do_mult => math_unit_flags(2),
       invert_b => math_unit_invert_b(unit),
       input_a => reg_math_config_drive(unit).source_a,
       input_b => reg_math_config_drive(unit).source_b,
@@ -4407,26 +4408,21 @@ begin
       if math_unit_enable then
         -- We also provide some flags (which will later trigger interrupts) based
         -- on the equality of math registers 14 and 15
+        math_unit_flags(6 downto 4) <= (others => '0');
         if reg_math_regs(14) = reg_math_regs(15) then
           math_unit_flags(6) <= '1';
-        else
-          math_unit_flags(6) <= '0';
         end if;
         if reg_math_regs(14) < reg_math_regs(15) then
           math_unit_flags(5) <= '1';
           -- if math_unit_flags(3 downto 2) = "10" then
           --   math_unit_flags(7) <= '1' ;
           -- end if;
-        else
-          math_unit_flags(5) <= '0';
         end if;
         if reg_math_regs(14) > reg_math_regs(15) then
           math_unit_flags(4) <= '1';
           -- if math_unit_flags(3 downto 2) = "01" then
           --   math_unit_flags(7) <= '1' ;
           -- end if;
-        else
-          math_unit_flags(4) <= '0';
         end if;
         -- temp, maybe use $D7E1.7 as an interrupt indicate later?
         math_unit_flags(7) <= '0';

From 428e6488133406028a0a4246d49d28186be3e079 Mon Sep 17 00:00:00 2001
From: Unkn0wn <teamlightning2018@outlook.com>
Date: Sun, 5 Oct 2025 13:43:16 -0500
Subject: [PATCH 8/9] fixed divider multiplier mode not being signed, added
 byte shift option

---
 src/vhdl/divider32.vhdl  | 19 ++++++---
 src/vhdl/gs4510.vhdl     | 92 +++++++++++++++++++++-------------------
 src/vhdl/multiply32.vhdl |  5 ++-
 3 files changed, 64 insertions(+), 52 deletions(-)

diff --git a/src/vhdl/divider32.vhdl b/src/vhdl/divider32.vhdl
index d4c8fbd52..c8907a46a 100644
--- a/src/vhdl/divider32.vhdl
+++ b/src/vhdl/divider32.vhdl
@@ -41,6 +41,7 @@ entity divider32 is
     input_value_number : in integer range 0 to 15;
     input_value : unsigned(31 downto 0);
     -- output_select : in integer range 0 to 15;
+    mult_shift : in unsigned(2 downto 0);
     output_value : out unsigned(63 downto 0) := (others => '0')
     );
 end entity;
@@ -62,6 +63,7 @@ architecture neo_gregorian of divider32 is
 
   signal mult_a : unsigned(67 downto 0) := (others => '0');
   signal mult_b : unsigned(69 downto 0) := (others => '0');
+  signal mult_signed : std_logic := '0';
   signal mult_out : unsigned(137 downto 0) := (others => '0');
 
   signal dd : unsigned(67 downto 0) := to_unsigned(0,68);
@@ -102,7 +104,11 @@ begin
       report "state is " & state_t'image(state);
       -- only for vunit test
       -- report "q$" & to_hstring(q) & " = n$" & to_hstring(n) & " / d$" & to_hstring(d);
-      mult_out <= mult_a * mult_b;
+      if mult_signed = '0' then
+        mult_out <= mult_a * mult_b;
+      else
+        mult_out <= unsigned(signed(mult_a) * signed(mult_b));
+      end if;
       if start_over = '0' then
         case state is
           when idle =>
@@ -118,13 +124,14 @@ begin
             report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
             mult_a <= nn;
             mult_b <= f;
+            mult_signed <= '0';
             state <= start_2;
           when start_2 =>
             report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
             mult_a <= dd; 
             mult_b <= f;
             -- multiplier gets set to a * b when start_over is asserted, so store the product.
-            p <= mult_out(135 downto 72);
+            p <= mult_out(137 downto 74);
             state <= start_3;
           when start_3 =>
             report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out);
@@ -219,9 +226,9 @@ begin
         -- calculate multiplication
         mult_a(35 downto 0) <= (others => '0');
         mult_a(67 downto 36) <= a;
-        mult_b(35 downto 0) <= (others => '0');
-        mult_b(67 downto 36) <= b;
-        mult_b(69 downto 68) <= (others => '0');
+        mult_b(37 downto 0) <= (others => '0');
+        mult_b(69 downto 38) <= b;
+        mult_signed <= '1';
       elsif start_over='1' then
         -- define divide by zero as zero
         report "Ignoring divide by zero";
@@ -288,7 +295,7 @@ begin
           report "MATH: Unit #" & integer'image(unit)
             & " outputting addition sum $" & to_hstring(s);
         elsif do_mult = '1' then
-          output_value <= p;
+          output_value <= shift_right(p, to_integer(mult_shift & "000"));
           report "MATH: Unit #" & integer'image(unit)
             & " outputting multiplication product $" & to_hstring(p);
         else
diff --git a/src/vhdl/gs4510.vhdl b/src/vhdl/gs4510.vhdl
index 378a8084a..c2adadcb6 100755
--- a/src/vhdl/gs4510.vhdl
+++ b/src/vhdl/gs4510.vhdl
@@ -1502,6 +1502,10 @@ architecture Behavioural of gs4510 is
 
   -- Start with input and outputting enabled
   signal math_unit_flags : unsigned(7 downto 0) := x"01";
+  signal math_unit_mult_out_shift : unsigned(2 downto 0) := "000";
+  signal math_unit_less_than : std_logic := '0';
+  signal math_unit_greater_than : std_logic := '0';
+  signal math_unit_equal_to : std_logic := '0';
   signal math_unit_invert_b : std_logic_vector(15 downto 0) := (others => '0');
   -- halt math unit when math_unit_halted /= last_math_unit_halted
   signal math_unit_halted : std_logic := '0';
@@ -1629,7 +1633,7 @@ begin
       );
 
 
-  multipliers_a: for unit in 0 to 7 generate
+  multipliers: for unit in 0 to 7 generate
     mult_unit : entity work.multiply32 generic map (
       unit => unit
       ) port map (
@@ -1640,27 +1644,13 @@ begin
       input_b => reg_math_config_drive(unit).source_b,
       input_value_number => math_input_number,
       input_value => math_input_value,
+      output_shift => math_unit_mult_out_shift,
       output_value => math_output_values(unit)
       -- output_select => math_output_counter,
       -- output_value(31 downto 0) => math_output_value_low,
       -- output_value(63 downto 32) => math_output_value_high
       );
   end generate;
-  
-  -- multipliers_b: for unit in 8 to 15 generate
-  --   mult_unit_alt: entity work.multiply32 generic map (
-  --     unit => unit
-  --     ) port map (
-  --     clock => mathclock,
-  --     do_add => reg_math_config_drive(unit).do_add,
-  --     invert_b => math_unit_invert_b(unit),
-  --     input_a => reg_math_config_drive(unit).source_a,
-  --     input_b => reg_math_config_drive(unit).source_b,
-  --     input_value_number => math_input_number,
-  --     input_value => math_input_value,
-  --     output_value => math_output_values_alt(unit mod 8)
-  --     );
-  -- end generate;       
 
   shifters: for unit in 8 to 11 generate
     shift_unit : entity work.shifter32 generic map (
@@ -1692,6 +1682,7 @@ begin
       input_b => reg_math_config_drive(unit).source_b,
       input_value_number => math_input_number,
       input_value => math_input_value,
+      mult_shift => math_unit_mult_out_shift,
       output_value => math_output_values(unit)
       -- output_select => math_output_counter,
       -- output_value(31 downto 0) => math_output_value_low,
@@ -1931,6 +1922,29 @@ begin
         -- reg_math_cycle_counter_plus_one <= x"00000001";
       end if;
       reg_math_cycle_counter_plus_one <= reg_math_cycle_counter + 1;
+
+      -- We also provide some flags (which will later trigger interrupts) based
+      -- on the equality of math registers 14 and 15
+      math_unit_flags(6) <= math_unit_equal_to;
+      math_unit_flags(5) <= math_unit_less_than;
+      math_unit_flags(4) <= math_unit_greater_than;
+      if reg_math_regs(14) = reg_math_regs(15) then
+        math_unit_equal_to <= '1';
+      else
+        math_unit_equal_to <= '0';
+      end if;
+      if reg_math_regs(14) < reg_math_regs(15) then
+        math_unit_less_than <= '1';
+      else
+        math_unit_less_than <= '0';
+      end if;
+      if reg_math_regs(14) > reg_math_regs(15) then
+        math_unit_greater_than <= '1';
+      else
+        math_unit_greater_than <= '0';
+      end if;
+      -- temp, maybe use $D7E1.7 as an interrupt indicate later?
+      math_unit_flags(7) <= '0';
     end if;
   end process;
 
@@ -3135,10 +3149,14 @@ begin
               -- $D7E1 is documented higher up
             when x"E0" => return to_unsigned(math_latch_address,4) & to_unsigned(reg_math_latch_intervals(math_latch_address),4);
             when x"E1" =>
-              if math_unit_halted = last_math_unit_halted then
-                return math_unit_flags;
+              if math_unit_flags(3) = '0' then
+                if math_unit_halted = last_math_unit_halted then
+                  return math_unit_flags;
+                else
+                  return math_unit_flags(7 downto 2) & "01";
+                end if;
               else
-                return math_unit_flags(7 downto 2) & "01";
+                return math_unit_flags(7 downto 3) & math_unit_mult_out_shift(2 downto 0);
               end if;
             -- @IO:GS $D7E2 MATH:RESERVED Reserved
             -- @IO:GS $D7E3 MATH:RESERVED Reserved
@@ -3734,7 +3752,17 @@ begin
           -- @IO:GS $D7E1 - Math unit general settings (writing also clears math cycle counter)
           -- @IO:GS $D7E1.0 MATH:WREN Enable setting of math registers (must normally be set)
           -- @IO:GS $D7E1.1 MATH:CALCEN Enable committing of output values from math units back to math registers (clearing effectively pauses iterative formulae)
-          math_unit_flags(3 downto 0) <= value(3 downto 0);
+          math_unit_flags(3) <= value(3);
+          if value(3) = '1' then
+            if math_unit_flags(0) = '1' then
+              math_unit_mult_out_shift <= value(2 downto 0);
+            elsif math_unit_halted /= last_math_unit_halted then
+              math_unit_mult_out_shift <= value(2 downto 0);
+              math_unit_flags(1 downto 0) <= "01";  -- reset flags to halted state, since halted state is cleared.
+            end if;
+          else
+            math_unit_flags(2 downto 0) <= value(2 downto 0);
+          end if;
           -- reg_math_cycle_counter <= to_unsigned(0,32); -- TODO: Should generate a reg_math_cycle_counter_reset signal
           reg_math_cycle_counter_reset_toggle <= not reg_math_cycle_counter_reset_toggle;
           math_latch_reset_toggle <= not last_math_latch_reset_toggle;
@@ -4404,30 +4432,6 @@ begin
       else
         chipselect_enables <= x"EF";
       end if;
-
-      if math_unit_enable then
-        -- We also provide some flags (which will later trigger interrupts) based
-        -- on the equality of math registers 14 and 15
-        math_unit_flags(6 downto 4) <= (others => '0');
-        if reg_math_regs(14) = reg_math_regs(15) then
-          math_unit_flags(6) <= '1';
-        end if;
-        if reg_math_regs(14) < reg_math_regs(15) then
-          math_unit_flags(5) <= '1';
-          -- if math_unit_flags(3 downto 2) = "10" then
-          --   math_unit_flags(7) <= '1' ;
-          -- end if;
-        end if;
-        if reg_math_regs(14) > reg_math_regs(15) then
-          math_unit_flags(4) <= '1';
-          -- if math_unit_flags(3 downto 2) = "01" then
-          --   math_unit_flags(7) <= '1' ;
-          -- end if;
-        end if;
-        -- temp, maybe use $D7E1.7 as an interrupt indicate later?
-        math_unit_flags(7) <= '0';
-      end if;
-
     end if;
 
     -- BEGINNING OF MAIN PROCESS FOR CPU
diff --git a/src/vhdl/multiply32.vhdl b/src/vhdl/multiply32.vhdl
index 519a102f1..490e25eac 100644
--- a/src/vhdl/multiply32.vhdl
+++ b/src/vhdl/multiply32.vhdl
@@ -38,8 +38,9 @@ entity multiply32 is
     input_a : in integer range 0 to 15;
     input_b : in integer range 0 to 15;
     input_value_number : in integer range 0 to 15;
-    input_value : unsigned(31 downto 0);
+    input_value : in unsigned(31 downto 0);
     -- output_select : in integer range 0 to 15;
+    output_shift : in unsigned(2 downto 0); 
     output_value : out unsigned(63 downto 0) := (others => '0')
     );
 end entity;
@@ -105,7 +106,7 @@ begin
           report "MATH: Unit #" & integer'image(unit)
             & " outputting addition sum $" & to_hstring(s);
         else
-          output_value <= unsigned(p);
+          output_value <= shift_right(unsigned(p), to_integer(output_shift & "000"));
 --          report "MATH: Unit #" & integer'image(unit)
 --            & " outputting multiplication product $" & to_hstring(unsigned(p));
         end if;

From 745464fca6b093dc3ce267ba035d0acbbc0b9f94 Mon Sep 17 00:00:00 2001
From: Unkn0wn <teamlightning2018@outlook.com>
Date: Wed, 22 Oct 2025 10:22:35 -0500
Subject: [PATCH 9/9] Removed a bunch of commented-out code, and cleaned up
 formatting for divider32, multiply32, and shifter32.

---
 src/vhdl/divider32.vhdl  | 56 ++++++++++++++--------------------------
 src/vhdl/multiply32.vhdl | 38 ++++++++++++---------------
 src/vhdl/shifter32.vhdl  | 30 ++++++++-------------
 3 files changed, 47 insertions(+), 77 deletions(-)

diff --git a/src/vhdl/divider32.vhdl b/src/vhdl/divider32.vhdl
index c8907a46a..9119e91a7 100644
--- a/src/vhdl/divider32.vhdl
+++ b/src/vhdl/divider32.vhdl
@@ -80,16 +80,6 @@ architecture neo_gregorian of divider32 is
   end function count_leading_zeros;
 begin
 
-  -- instance "fast_divide_1"
-  -- fast_divide_1: entity work.fast_divide
-  --   port map (
-  --     clock      => clock,
-  --     n          => a,
-  --     d          => b,
-  --     q          => q,
-  --     start_over => start_over,
-  --     busy       => busy);
-
   process (clock) is
     variable temp64 : unsigned(73 downto 0) := to_unsigned(0,74);
     variable temp96 : unsigned(105 downto 0) := to_unsigned(0,106);
@@ -279,33 +269,27 @@ begin
         start_over <= '0';
       end if;
 
-      -- Even units do addition, odd ones do subtraction
-      -- if (unit mod 2) = 0 then
-        s <= unsigned((a(31) & a) + (b(31) & b));
-      -- else
-      --   s <= unsigned((a(31) & a)-(b(31) & b));
-      -- end if;
+      -- Compute sum of inputs
+      s <= unsigned((a(31) & a) + (b(31) & b));
 
-      -- Display output value when requested, and tri-state outputs otherwise
-      -- if output_select = unit then
-        if do_add='1' then
-          -- Output sign-extended 33 bit addition result
-          output_value(63 downto 33) <= (others => s(32));
-          output_value(32 downto 0) <= s;
-          report "MATH: Unit #" & integer'image(unit)
-            & " outputting addition sum $" & to_hstring(s);
-        elsif do_mult = '1' then
-          output_value <= shift_right(p, to_integer(mult_shift & "000"));
-          report "MATH: Unit #" & integer'image(unit)
-            & " outputting multiplication product $" & to_hstring(p);
-        else
-          output_value <= q;
-          report "MATH: Unit #" & integer'image(unit)
-            & " outputting division quotient $" & to_hstring(q);
-        end if;
-      -- else
-      --   output_value <= (others => 'Z');
-      -- end if;
+      -- Output result, stored in output register on the CPU side
+      if do_add='1' then
+        -- Output sign-extended 33 bit addition result
+        output_value(63 downto 33) <= (others => s(32));
+        output_value(32 downto 0) <= s;
+        report "MATH: Unit #" & integer'image(unit)
+          & " outputting addition sum $" & to_hstring(s);
+      elsif do_mult = '1' then
+        -- Output product shifted by multiplication shift
+        output_value <= shift_right(p, to_integer(mult_shift & "000"));
+        report "MATH: Unit #" & integer'image(unit)
+          & " outputting multiplication product $" & to_hstring(p);
+      else
+        -- Output quotient and fractional part
+        output_value <= q;
+        report "MATH: Unit #" & integer'image(unit)
+          & " outputting division quotient $" & to_hstring(q);
+      end if;
     end if;
   end process;
 end neo_gregorian;
diff --git a/src/vhdl/multiply32.vhdl b/src/vhdl/multiply32.vhdl
index 490e25eac..5221dcebf 100644
--- a/src/vhdl/multiply32.vhdl
+++ b/src/vhdl/multiply32.vhdl
@@ -90,29 +90,23 @@ begin
       p3 <= p2;
       p4 <= p3;
       p <= p4;
-      -- Even units do addition, odd ones do subtraction
-      -- if (unit mod 2) = 0 then
-        s <= unsigned((a(31) & a)+(b(31) & b));
-      -- else
-      --   s <= unsigned((a(31) & a)-(b(31) & b));
-      -- end if;
 
-      -- Display output value when requested, and tri-state outputs otherwise
-      -- if output_select = unit then
-        if do_add='1' then
-          -- Output sign-extended 33 bit addition result
-          output_value(63 downto 33) <= (others => s(32));
-          output_value(32 downto 0) <= s;
-          report "MATH: Unit #" & integer'image(unit)
-            & " outputting addition sum $" & to_hstring(s);
-        else
-          output_value <= shift_right(unsigned(p), to_integer(output_shift & "000"));
---          report "MATH: Unit #" & integer'image(unit)
---            & " outputting multiplication product $" & to_hstring(unsigned(p));
-        end if;
-      -- else
-      --   output_value <= (others => 'Z');
-      -- end if;
+      -- Calculate sum of inputs
+      s <= unsigned((a(31) & a)+(b(31) & b));
+
+      -- Output result, stored in output register on the CPU side
+      if do_add='1' then
+        -- Output sign-extended 33 bit addition result
+        output_value(63 downto 33) <= (others => s(32));
+        output_value(32 downto 0) <= s;
+        -- report "MATH: Unit #" & integer'image(unit)
+        --   & " outputting addition sum $" & to_hstring(s);
+      else
+        -- Output product shifted by the output shift
+        output_value <= shift_right(unsigned(p), to_integer(output_shift & "000"));
+        -- report "MATH: Unit #" & integer'image(unit)
+        --   & " outputting multiplication product $" & to_hstring(unsigned(p));
+      end if;
     end if;
   end process;
 end neo_gregorian;
diff --git a/src/vhdl/shifter32.vhdl b/src/vhdl/shifter32.vhdl
index 26c0f53ca..efc4ad0cc 100644
--- a/src/vhdl/shifter32.vhdl
+++ b/src/vhdl/shifter32.vhdl
@@ -69,13 +69,8 @@ begin
         end if;
       end if;
 
-      -- Calculate the result
-      -- Even units do addition, odd ones do subtraction
-      -- if (unit mod 2) = 0 then
-        s <= unsigned((a(31) & a)+(b(31) & b));
-      -- else
-      --   s <= unsigned((a(31) & a)-(b(31) & b));
-      -- end if;
+      -- Calculate sum of inputs
+      s <= unsigned((a(31) & a)+(b(31) & b));
 
       if b(7 downto 0) = x"00" then
         p(63 downto 32) <= (others => '0');
@@ -93,18 +88,15 @@ begin
         end if;
       end if;
 
-      -- Display output value when requested, and tri-state outputs otherwise
-      -- if output_select = unit then
-        if do_add='1' then
-          -- Output sign-extended 33 bit addition result
-          output_value(63 downto 33) <= (others => s(32));
-          output_value(32 downto 0) <= s;
-        else
-          output_value <= p;
-        end if;
-      -- else
-      --   output_value <= (others => 'Z');
-      -- end if;
+      -- Output result, stored in output register on the CPU side
+      if do_add='1' then
+        -- Output sign-extended 33 bit addition result
+        output_value(63 downto 33) <= (others => s(32));
+        output_value(32 downto 0) <= s;
+      else
+        -- Output shifted result
+        output_value <= p;
+      end if;
     end if;
   end process;
 end neo_gregorian;