diff --git a/src/vhdl/divider32.vhdl b/src/vhdl/divider32.vhdl index c00189422..9119e91a7 100644 --- a/src/vhdl/divider32.vhdl +++ b/src/vhdl/divider32.vhdl @@ -28,33 +28,209 @@ use Std.TextIO.all; use work.debugtools.all; entity divider32 is + generic ( + unit : integer range 0 to 15 + ); port ( clock : in std_logic; - unit : in integer range 0 to 15; do_add : in std_logic; + invert_b : in std_logic; + do_mult : in std_logic; input_a : in integer range 0 to 15; input_b : in integer range 0 to 15; input_value_number : in integer range 0 to 15; input_value : unsigned(31 downto 0); - output_select : in integer range 0 to 15; - output_value : out unsigned(63 downto 0) + -- output_select : in integer range 0 to 15; + mult_shift : in unsigned(2 downto 0); + output_value : out unsigned(63 downto 0) := (others => '0') ); end entity; architecture neo_gregorian of divider32 is - signal a : signed(31 downto 0) := to_signed(0,32); - signal b : signed(31 downto 0) := to_signed(0,32); - signal p : signed(63 downto 0) := to_signed(0,64); + signal a : unsigned(31 downto 0) := to_unsigned(0,32); + signal b : unsigned(31 downto 0) := to_unsigned(0,32); + signal p : unsigned(63 downto 0) := to_unsigned(0,64); + signal q : unsigned(63 downto 0) := to_unsigned(0,64); signal s : unsigned(32 downto 0) := to_unsigned(0,33); - - signal p1 : signed(63 downto 0); - signal p2 : signed(63 downto 0); - signal p3 : signed(63 downto 0); - signal p4 : signed(63 downto 0); + signal busy : std_logic := '0'; + signal start_over : std_logic := '0'; + + type state_t is (idle, start_1, start_2, start_3, step_1, step_2, output); + signal state : state_t := idle; + signal steps_remaining : integer range 0 to 5 := 0; + + signal mult_a : unsigned(67 downto 0) := (others => '0'); + signal mult_b : unsigned(69 downto 0) := (others => '0'); + signal mult_signed : std_logic := '0'; + signal mult_out : unsigned(137 downto 0) := (others => '0'); + + signal dd : unsigned(67 downto 0) := to_unsigned(0,68); + signal nn : unsigned(67 downto 0) := to_unsigned(0,68); + + pure function count_leading_zeros(arg : unsigned(31 downto 0)) return natural is + begin + for i in 0 to 31 loop + if arg(31-i) = '1' then + return i; + end if; + end loop; + return 0; + end function count_leading_zeros; begin + process (clock) is + variable temp64 : unsigned(73 downto 0) := to_unsigned(0,74); + variable temp96 : unsigned(105 downto 0) := to_unsigned(0,106); + -- variable temp138 : unsigned(137 downto 0) := to_unsigned(0,138); + variable f : unsigned(69 downto 0) := to_unsigned(0,70); + variable leading_zeros : natural range 0 to 31; + variable new_dd : unsigned( 35 downto 0); + variable new_nn : unsigned( 67 downto 0); + variable padded_d : unsigned(63 downto 0); + begin + if rising_edge(clock) then + report "state is " & state_t'image(state); + -- only for vunit test + -- report "q$" & to_hstring(q) & " = n$" & to_hstring(n) & " / d$" & to_hstring(d); + if mult_signed = '0' then + mult_out <= mult_a * mult_b; + else + mult_out <= unsigned(signed(mult_a) * signed(mult_b)); + end if; + if start_over = '0' then + case state is + when idle => + null; + -- special startup case to allow for multiplier outputs to settle + when start_1 => + -- f = 2 - dd + f := to_unsigned(0,70); + f(69) := '1'; + f := f - dd; + -- Now multiply both nn and dd by f + -- temp138 := nn * f; + report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out); + mult_a <= nn; + mult_b <= f; + mult_signed <= '0'; + state <= start_2; + when start_2 => + report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out); + mult_a <= dd; + mult_b <= f; + -- multiplier gets set to a * b when start_over is asserted, so store the product. + p <= mult_out(137 downto 74); + state <= start_3; + when start_3 => + report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out); + mult_a <= nn; + mult_b <= f; + state <= step_2; + when step_1 => + report "nn=$" & to_hstring(nn(67 downto 36)) & "." & to_hstring(nn(35 downto 4)) & "." & to_hstring(nn(3 downto 0)) + & " / dd=$" & to_hstring(dd(67 downto 36)) & "." & to_hstring(dd(35 downto 4)) & "." & to_hstring(dd(3 downto 0)); + report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out); + -- f = 2 - dd + -- f := to_unsigned(0,70); + -- f(69) := '1'; + -- f := f - dd; + report "f = $" & to_hstring(f); + + -- Check whether to round up + if mult_out(67) = '1' then + nn <= mult_out(135 downto 68) + 1; + mult_a <= mult_out(135 downto 68) + 1; + else + nn <= mult_out(135 downto 68); + mult_a <= mult_out(135 downto 68); + end if; + -- Now multiply both nn and dd by f + -- temp138 := nn * f; + mult_b <= f; + state <= step_2; + -- report "temp138=$" & to_hstring(temp138); + when step_2 => + report "nn=$" & to_hstring(nn(67 downto 36)) & "." & to_hstring(nn(35 downto 4)) & "." & to_hstring(nn(3 downto 0)) + & " / dd=$" & to_hstring(dd(67 downto 36)) & "." & to_hstring(dd(35 downto 4)) & "." & to_hstring(dd(3 downto 0)); + report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out); + -- temp138 := dd * f; + -- Check whether to round up, but avoid overflow + f := to_unsigned(0,70); + f(69) := '1'; + -- f := f - dd; + if mult_out(67) = '1' and mult_out(135 downto 68) /= X"FFFFFFFFFFFFFFFFF" then + dd <= mult_out(135 downto 68) + 1; + mult_a <= mult_out(135 downto 68) + 1; + f := f - (mult_out(135 downto 68) + 1); + else + dd <= mult_out(135 downto 68); + mult_a <= mult_out(135 downto 68); + f := f - mult_out(135 downto 68); + end if; + -- report "temp138=$" & to_hstring(temp138); + mult_b <= f; + -- Perform number of required steps, or abort early if we can + if steps_remaining /= 0 and dd /= x"FFFFFFFFFFFFFFFFF" then + steps_remaining <= steps_remaining - 1; + state <= step_1; + else + state <= output; + end if; + when output => + report "mult_a=$" & to_hstring(mult_a) & ", mult_b=$" & to_hstring(mult_b) & ", mult_out=$" & to_hstring(mult_out); + -- No idea why we need to add one, but we do to stop things like 4/2 + -- giving a result of 1.999999999 + if mult_out(67) = '1' then + temp64(67 downto 0) := mult_out(135 downto 68) + 1; + else + temp64(67 downto 0) := mult_out(135 downto 68); + end if; + -- temp64(67 downto 0) := nn; + temp64(73 downto 68) := (others => '0'); + temp64 := temp64 + 8; + report "temp64=$" & to_hstring(temp64); + busy <= '0'; + q <= temp64(67 downto 4); + state <= idle; + end case; + end if; + + if start_over='1' and b /= to_unsigned(0,32) then + report "Calculating $" & to_hstring(a) & " / $" & to_hstring(b); + leading_zeros := count_leading_zeros(b); + padded_d := b & X"00000000"; + new_dd := (others => '0'); + new_dd(35 downto 4) := padded_d(63-leading_zeros downto 32-leading_zeros); + new_nn := (others => '0'); + new_nn(35+leading_zeros downto 4+leading_zeros) := a; + report "Normalised to $" & to_hstring(new_nn(67 downto 36)) & "." & + to_hstring(new_nn(35 downto 4)) & "." & to_hstring(new_nn(3 downto 0)) + & " / $" & to_hstring(new_dd(35 downto 4)) & "." & to_hstring(new_dd(3 downto 0)); + dd <= new_dd & X"00000000"; + nn <= new_nn; + state <= start_1; + steps_remaining <= 5; + busy <= '1'; + -- calculate multiplication + mult_a(35 downto 0) <= (others => '0'); + mult_a(67 downto 36) <= a; + mult_b(37 downto 0) <= (others => '0'); + mult_b(69 downto 38) <= b; + mult_signed <= '1'; + elsif start_over='1' then + -- define divide by zero as zero + report "Ignoring divide by zero"; + state <= idle; + busy <= '0'; + q <= (others => '0'); + -- zero product of a * b, since we know b = 0 + p <= (others => '0'); + end if; + end if; + end process; + process(clock) is begin if rising_edge(clock) then @@ -68,42 +244,51 @@ begin if input_value_number = input_a then -- report "MATH: Unit #" & integer'image(unit) -- & ": Setting a=$" & to_hstring(input_value); - a <= signed(input_value); + a <= input_value; + if a /= input_value or busy = '0' then + start_over <= '1'; + end if; end if; if input_value_number = input_b then -- report "MATH: Unit #" & integer'image(unit) -- & ": Setting b=$" & to_hstring(input_value); - b <= signed(input_value); + if invert_b = '1' then + b <= unsigned(-signed(input_value)); + if b /= unsigned(-signed(input_value)) or busy = '0' then + start_over <= '1'; + end if; + else + b <= input_value; + if b /= input_value or busy = '0' then + start_over <= '1'; + end if; + end if; end if; - -- Calculate the result - p1 <= a*b; - p2 <= p1; - p3 <= p2; - p4 <= p3; - p <= p4; - -- Even units do addition, odd ones do subtraction - if (unit mod 2) = 0 then - s <= to_unsigned(to_integer(a)+to_integer(b),33); - else - s <= to_unsigned(to_integer(a)-to_integer(b),33); + if start_over = '1' then + start_over <= '0'; end if; - -- Display output value when requested, and tri-state outputs otherwise - if output_select = unit then - if do_add='1' then - -- Output sign-extended 33 bit addition result - output_value(63 downto 33) <= (others => s(32)); - output_value(32 downto 0) <= s; - report "MATH: Unit #" & integer'image(unit) - & " outputting addition sum $" & to_hstring(s); - else - output_value <= unsigned(p); - report "MATH: Unit #" & integer'image(unit) - & " outputting multiplication product $" & to_hstring(unsigned(p)); - end if; + -- Compute sum of inputs + s <= unsigned((a(31) & a) + (b(31) & b)); + + -- Output result, stored in output register on the CPU side + if do_add='1' then + -- Output sign-extended 33 bit addition result + output_value(63 downto 33) <= (others => s(32)); + output_value(32 downto 0) <= s; + report "MATH: Unit #" & integer'image(unit) + & " outputting addition sum $" & to_hstring(s); + elsif do_mult = '1' then + -- Output product shifted by multiplication shift + output_value <= shift_right(p, to_integer(mult_shift & "000")); + report "MATH: Unit #" & integer'image(unit) + & " outputting multiplication product $" & to_hstring(p); else - output_value <= (others => 'Z'); + -- Output quotient and fractional part + output_value <= q; + report "MATH: Unit #" & integer'image(unit) + & " outputting division quotient $" & to_hstring(q); end if; end if; end process; diff --git a/src/vhdl/gs4510.vhdl b/src/vhdl/gs4510.vhdl index 77d89b3e2..c2adadcb6 100755 --- a/src/vhdl/gs4510.vhdl +++ b/src/vhdl/gs4510.vhdl @@ -37,7 +37,7 @@ use work.victypes.all; entity gs4510 is generic( - math_unit_enable : boolean := false; + math_unit_enable : boolean := true; chipram_1mb : std_logic := '0'; cpufrequency : integer := 40; @@ -1462,30 +1462,60 @@ architecture Behavioural of gs4510 is constant math_unit_count : integer := 16; type math_reg_array is array(0 to 15) of unsigned(31 downto 0); type math_config_array is array(0 to math_unit_count - 1) of math_unit_config; + type math_output_array is array(0 to math_unit_count - 1) of unsigned(63 downto 0); + type math_latch_array is array(0 to math_unit_count - 1) of integer range 0 to 15; signal reg_math_regs : math_reg_array := (others => to_unsigned(0,32)); signal reg_math_config : math_config_array := (others => math_unit_config_v); signal reg_math_config_drive : math_config_array := (others => math_unit_config_v); - signal reg_math_latch_counter : unsigned(7 downto 0) := x"00"; - signal reg_math_latch_interval : unsigned(7 downto 0) := x"00"; + -- signal reg_math_latch_counter : unsigned(7 downto 0) := x"00"; + -- signal reg_math_latch_interval : unsigned(7 downto 0) := x"00"; + signal math_latch_value : integer range 0 to 15; -- Latch value to write + signal math_latch_address : integer range 0 to 15; -- Which unit to write latch value to + signal math_latch_write_toggle : std_logic := '0'; + signal last_math_latch_write_toggle : std_logic := '0'; + signal math_latch_reset_toggle : std_logic := '0'; + signal last_math_latch_reset_toggle : std_logic := '0'; + signal reg_math_latch_counters : math_latch_array := (others => 0); + signal reg_math_latch_fired : std_logic_vector(15 downto 0) := (others => '0'); + signal reg_math_latch_intervals : math_latch_array := (others => 0); + -- Unit 15 needs to write to the last cycle status instead of current cycle, since + -- the register is copied over at the same time. + signal math_was_latched_current_cycle : std_logic_vector(14 downto 0) := (others => '0'); + signal math_was_latched_last_cycle : std_logic_vector(15 downto 0) := (others => '0'); -- We have the output counter out of phase with the input counter, so that we -- have time to catch an output, and store it, ready for presenting as an input -- very soon after. - signal math_input_counter : integer range 0 to 15 := 0; - signal math_output_counter : integer range 0 to 15 := 3; - signal prev_math_output_counter : integer range 0 to 15 := 2; + -- note: for whatever reason the way this was phased meant that the math cycle would + -- count up by 1 before math unit 1 had actually output anything + constant math_input_counter_init : integer range 0 to 15 := 0; + signal math_input_counter : integer range 0 to 15 := math_input_counter_init; + constant math_output_counter_init : integer range 0 to 15 := 0; + signal math_output_counter : integer range 0 to 15 := math_output_counter_init; -- originally 3 signal math_input_number : integer range 0 to 15 := 0; signal math_input_value : unsigned(31 downto 0) := (others => '0'); - signal math_output_value_low : unsigned(31 downto 0) := (others => '0'); - signal math_output_value_high : unsigned(31 downto 0) := (others => '0'); + signal math_output_values : math_output_array := (others => (others => '0')); + -- signal math_output_values_alt : math_alt_output_array := (others => (others => '0')); + -- signal math_output_value_low : unsigned(31 downto 0) := (others => 'Z'); + -- signal math_output_value_high : unsigned(31 downto 0) := (others => 'Z'); -- Start with input and outputting enabled - signal math_unit_flags : unsigned(7 downto 0) := x"03"; + signal math_unit_flags : unsigned(7 downto 0) := x"01"; + signal math_unit_mult_out_shift : unsigned(2 downto 0) := "000"; + signal math_unit_less_than : std_logic := '0'; + signal math_unit_greater_than : std_logic := '0'; + signal math_unit_equal_to : std_logic := '0'; + signal math_unit_invert_b : std_logic_vector(15 downto 0) := (others => '0'); + -- halt math unit when math_unit_halted /= last_math_unit_halted + signal math_unit_halted : std_logic := '0'; + signal last_math_unit_halted : std_logic := '0'; -- Each write to the math registers is passed to the math unit to handle -- (this is to avoid ISE doing really weird things in synthesis, thinking -- that each bit of each register was a clock or something similarly odd.) - signal reg_math_write : std_logic := '0'; + -- The reset and write systems need to directly read the toggle state, since + -- any intermediate adds enough latency to cause writes on every cycle to fail. + -- This is particularly bad with math register writing, since it breaks STQ. signal reg_math_write_toggle : std_logic := '0'; signal last_reg_math_write_toggle : std_logic := '0'; signal reg_math_regnum : integer range 0 to 15 := 0; @@ -1494,6 +1524,9 @@ architecture Behavioural of gs4510 is -- Count # of math cycles since cycle latch last written to signal reg_math_cycle_counter : unsigned(31 downto 0) := to_unsigned(0,32); signal reg_math_cycle_counter_plus_one : unsigned(31 downto 0) := to_unsigned(0,32); + -- Reset math cycle counters + signal reg_math_cycle_counter_reset_toggle : std_logic := '0'; + signal last_reg_math_cycle_counter_reset_toggle : std_logic := '0'; -- # of math cycles to trigger end of job / math interrupt signal reg_math_cycle_compare : unsigned(31 downto 0) := to_unsigned(0,32); @@ -1601,47 +1634,59 @@ begin multipliers: for unit in 0 to 7 generate - mult_unit : entity work.multiply32 port map ( + mult_unit : entity work.multiply32 generic map ( + unit => unit + ) port map ( clock => mathclock, - unit => unit, do_add => reg_math_config_drive(unit).do_add, + invert_b => math_unit_invert_b(unit), input_a => reg_math_config_drive(unit).source_a, input_b => reg_math_config_drive(unit).source_b, input_value_number => math_input_number, input_value => math_input_value, - output_select => math_output_counter, - output_value(31 downto 0) => math_output_value_low, - output_value(63 downto 32) => math_output_value_high + output_shift => math_unit_mult_out_shift, + output_value => math_output_values(unit) + -- output_select => math_output_counter, + -- output_value(31 downto 0) => math_output_value_low, + -- output_value(63 downto 32) => math_output_value_high ); end generate; shifters: for unit in 8 to 11 generate - mult_unit : entity work.shifter32 port map ( + shift_unit : entity work.shifter32 generic map ( + unit => unit + ) port map ( clock => mathclock, - unit => unit, do_add => reg_math_config_drive(unit).do_add, + invert_b => math_unit_invert_b(unit), input_a => reg_math_config_drive(unit).source_a, input_b => reg_math_config_drive(unit).source_b, input_value_number => math_input_number, input_value => math_input_value, - output_select => math_output_counter, - output_value(31 downto 0) => math_output_value_low, - output_value(63 downto 32) => math_output_value_high + output_value => math_output_values(unit) + -- output_select => math_output_counter, + -- output_value(31 downto 0) => math_output_value_low, + -- output_value(63 downto 32) => math_output_value_high ); end generate; dividerrs: for unit in 12 to 15 generate - mult_unit : entity work.divider32 port map ( + div_unit : entity work.divider32 generic map ( + unit => unit + ) port map ( clock => mathclock, - unit => unit, do_add => reg_math_config_drive(unit).do_add, + do_mult => math_unit_flags(2), + invert_b => math_unit_invert_b(unit), input_a => reg_math_config_drive(unit).source_a, input_b => reg_math_config_drive(unit).source_b, input_value_number => math_input_number, input_value => math_input_value, - output_select => math_output_counter, - output_value(31 downto 0) => math_output_value_low, - output_value(63 downto 32) => math_output_value_high + mult_shift => math_unit_mult_out_shift, + output_value => math_output_values(unit) + -- output_select => math_output_counter, + -- output_value(31 downto 0) => math_output_value_low, + -- output_value(63 downto 32) => math_output_value_high ); end generate; @@ -1669,6 +1714,7 @@ begin ); process (mathclock) + variable math_current_unit_has_latched : std_logic := '0'; begin if rising_edge(mathclock) and math_unit_enable then -- For the plumbed math units, we want to avoid having two huge 16x32x32 @@ -1687,62 +1733,126 @@ begin -- counters at the CPU speed. -- Present input value to all math units - if math_input_counter /= 15 then - math_input_counter <= math_input_counter + 1; + -- reset the counter if bit 0 is set (write enabled) + if math_unit_flags(0) = '0' and math_unit_halted = last_math_unit_halted then + if math_input_counter /= 15 then + math_input_counter <= math_input_counter + 1; + else + math_input_counter <= 0; + end if; + -- only update the input value and reg when the counter is running + -- to prevent register updates while the inputs are offline from messing with + -- the math unit's internal registers + math_input_number <= math_input_counter; + math_input_value <= reg_math_regs(math_input_counter); + report "MATH: Presenting math reg #" & integer'image(math_input_counter) + &" = $" & to_hstring(reg_math_regs(math_input_counter)); else - math_input_counter <= 0; + math_input_counter <= math_input_counter_init; end if; - math_input_number <= math_input_counter; - math_input_value <= reg_math_regs(math_input_counter); - report "MATH: Presenting math reg #" & integer'image(math_input_counter) - &" = $" & to_hstring(reg_math_regs(math_input_counter)); -- Update output counter being shown to math units - if math_output_counter /= 15 then - math_output_counter <= math_output_counter + 1; + -- reset counters when bit 0 is set (write enabled) + if math_unit_flags(0) = '0' and math_unit_halted = last_math_unit_halted then + if math_output_counter /= 15 then + math_output_counter <= math_output_counter + 1; + else + math_output_counter <= 0; + end if; else - math_output_counter <= 0; + math_output_counter <= math_output_counter_init; end if; - prev_math_output_counter <= math_output_counter; + -- Based on the configuration for the previously selected unit, -- stash the results in the appropriate place if true then - report "MATH: output flags for unit #" & integer'image(prev_math_output_counter) + report "MATH: output flags for unit #" & integer'image(math_output_counter) & " = " - & std_logic'image(reg_math_config(prev_math_output_counter).output_low) & ", " - & std_logic'image(reg_math_config(prev_math_output_counter).output_high) & ", " - & integer'image(reg_math_config(prev_math_output_counter).output) & ", " - & std_logic'image(reg_math_config(prev_math_output_counter).latched) & "."; + & std_logic'image(reg_math_config(math_output_counter).output_low) & ", " + & std_logic'image(reg_math_config(math_output_counter).output_high) & ", " + & integer'image(reg_math_config(math_output_counter).output) & ", " + & std_logic'image(reg_math_config(math_output_counter).latched) & "."; end if; - if math_unit_flags(1) = '1' then - if (reg_math_config_drive(prev_math_output_counter).latched='0') or (reg_math_latch_counter = x"00") then - if reg_math_config_drive(prev_math_output_counter).output_high = '0' then - if reg_math_config_drive(prev_math_output_counter).output_low = '0' then + -- Make sure output counter is running before starting to stash outputs, to avoid constantly writing a register + -- Math config latch bit indicates whether to treat its latch interval as a counter (unset) or as a unit index (set). + if math_unit_flags(1) = '1' and math_unit_flags(0) = '0' and math_unit_halted = last_math_unit_halted then + math_current_unit_has_latched := '0'; + if reg_math_config_drive(math_output_counter).latched = '0' then + -- Latched bit unset, use latch interval and counter to determine when to latch. + if math_latch_reset_toggle = last_math_latch_reset_toggle then + -- Math latches are not resetting, proceed with checks. + if reg_math_latch_counters(math_output_counter) = 0 then + math_current_unit_has_latched := '1'; + reg_math_latch_counters(math_output_counter) <= reg_math_latch_intervals(math_output_counter); + elsif reg_math_latch_counters(math_output_counter) = 8 then + reg_math_latch_fired(math_output_counter) <= '1'; + if reg_math_latch_fired(math_output_counter) = '0' then + math_current_unit_has_latched := '1'; + end if; + else + reg_math_latch_counters(math_output_counter) <= reg_math_latch_counters(math_output_counter) - 1; + end if; + else + -- Math latches are resetting, so only latch if the interval to latch on is zero cycles. + if reg_math_latch_intervals(math_output_counter) = 0 then + math_current_unit_has_latched := '1'; + reg_math_latch_fired(math_output_counter) <= '0'; + reg_math_latch_counters(math_output_counter) <= reg_math_latch_intervals(math_output_counter); + elsif reg_math_latch_intervals(math_output_counter) = 8 then + math_current_unit_has_latched := '1'; + reg_math_latch_fired(math_output_counter) <= '1'; + reg_math_latch_counters(math_output_counter) <= reg_math_latch_intervals(math_output_counter); + else + reg_math_latch_fired(math_output_counter) <= '0'; + reg_math_latch_counters(math_output_counter) <= reg_math_latch_intervals(math_output_counter) - 1; + end if; + end if; + else + -- Latched bit set, use a math unit's previous latch state to determine when to latch. + -- When resetting, assume no units were latched last cycle. + if math_latch_reset_toggle = last_math_latch_reset_toggle then + if math_was_latched_last_cycle(reg_math_latch_intervals(math_output_counter)) = '1' then + math_current_unit_has_latched := '1'; + end if; + end if; + end if; + + if math_output_counter = 15 then + -- Since this is the last unit, no intermediate is required. + math_was_latched_last_cycle(15) <= math_current_unit_has_latched; + else + math_was_latched_current_cycle(math_output_counter) <= math_current_unit_has_latched; + end if; + + -- Process output if current unit has latched + if math_current_unit_has_latched = '1' then + if reg_math_config_drive(math_output_counter).output_high = '0' then + if reg_math_config_drive(math_output_counter).output_low = '0' then -- No output being kept, so nothing to do. null; else -- Only low output being kept - report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(prev_math_output_counter).output) - & ") from output of math unit #" & integer'image(prev_math_output_counter) - & " ( = $" & to_hstring(math_output_value_low) & ")"; - reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_value_low; + report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output) + & ") from output of math unit #" & integer'image(math_output_counter) + & " ( = $" & to_hstring(math_output_values(math_output_counter)(31 downto 0)) & ")"; + reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0); end if; else - if reg_math_config_drive(prev_math_output_counter).output_low = '0' then + if reg_math_config_drive(math_output_counter).output_low = '0' then -- Only high half of output is being kept, so stash it - report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(prev_math_output_counter).output) - & ") from output of math unit #" & integer'image(prev_math_output_counter); - reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_value_high; + report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output) + & ") from output of math unit #" & integer'image(math_output_counter); + reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(63 downto 32); else -- Both are being stashed, so store in consecutive slots - report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(prev_math_output_counter).output) - & ") (and next) from output of math unit #" & integer'image(prev_math_output_counter); - reg_math_regs(reg_math_config(prev_math_output_counter).output) <= math_output_value_low; - if reg_math_config_drive(prev_math_output_counter).output /= 15 then - reg_math_regs(reg_math_config_drive(prev_math_output_counter).output + 1) <= math_output_value_high; + report "MATH: Setting reg_math_regs(" & integer'image(reg_math_config(math_output_counter).output) + & ") (and next) from output of math unit #" & integer'image(math_output_counter); + reg_math_regs(reg_math_config(math_output_counter).output) <= math_output_values(math_output_counter)(31 downto 0); + if reg_math_config_drive(math_output_counter).output /= 15 then + reg_math_regs(reg_math_config(math_output_counter).output + 1) <= math_output_values(math_output_counter)(63 downto 32); else - reg_math_regs(0) <= math_output_value_high; + reg_math_regs(0) <= math_output_values(math_output_counter)(63 downto 32); end if; end if; end if; @@ -1754,11 +1864,19 @@ begin -- Implement writing to math registers if reg_math_write_toggle /= last_reg_math_write_toggle then last_reg_math_write_toggle <= reg_math_write_toggle; - reg_math_write <= '1'; end if; - reg_math_write <= '0'; - if math_unit_flags(0) = '1' then - if reg_math_write = '1' then + + if math_latch_write_toggle /= last_math_latch_write_toggle then + last_math_latch_write_toggle <= math_latch_write_toggle; + end if; + + if reg_math_cycle_counter_reset_toggle /= last_reg_math_cycle_counter_reset_toggle then + last_reg_math_cycle_counter_reset_toggle <= reg_math_cycle_counter_reset_toggle; + end if; + + -- when math unit has been halted by the comparator, behave as if math_unit_flags(1 downto 0) = "01" + if math_unit_flags(0) = '1' or math_unit_halted /= last_math_unit_halted then + if reg_math_write_toggle /= last_reg_math_write_toggle then case reg_math_regbyte is when 0 => reg_math_regs(reg_math_regnum)(7 downto 0) <= reg_math_write_value; when 1 => reg_math_regs(reg_math_regnum)(15 downto 8) <= reg_math_write_value; @@ -1767,25 +1885,66 @@ begin when others => end case; end if; + if math_latch_write_toggle /= last_math_latch_write_toggle then + reg_math_latch_intervals(math_latch_address) <= math_latch_value; + reg_math_latch_counters(math_latch_address) <= math_latch_value; + end if; end if; -- Latch counter counts "math cycles", which is the time it takes for an -- output to appear on the inputs again, i.e., once per lap of the input -- and output propagation. -- TODO: implement reg_math_cycle_counter_reset signal, see D7E1 - reg_math_cycle_counter_plus_one <= reg_math_cycle_counter + 1; - if math_output_counter = 1 then - -- Decrement latch counter - if reg_math_latch_counter = x"00" then - reg_math_latch_counter <= reg_math_latch_interval; - -- And update math cycle counter, if math unit is active - if math_unit_flags(1) = '1' then - reg_math_cycle_counter <= reg_math_cycle_counter_plus_one; + if math_output_counter = (15 + math_output_counter_init) mod 16 then + -- If a bit is set in math_was_latched_current_cycle, then that unit reset its latch counter and wrote an output. + -- For sequential latching, a unit needs to know the latch status from the previous cycle, + -- so the FPU needs to store which units latched on this cycle. + -- Unit 15 is an exception, since when this code runs, it still hasn't finished processing. + -- In order to avoid weird glitchiness with 15, it will write to the last cycle reg directly. + math_was_latched_last_cycle(14 downto 0) <= math_was_latched_current_cycle; + -- All units have been cycled through, so no more resetting to do. + if math_latch_reset_toggle /= last_math_latch_reset_toggle then + last_math_latch_reset_toggle <= math_latch_reset_toggle; + end if; + -- Update math cycle counter, if math unit is active + -- include a case for the reset, to avoid a possible edge case resulting in a double-drive + if math_unit_flags(1) = '1' and reg_math_cycle_counter_reset_toggle = last_reg_math_cycle_counter_reset_toggle and math_unit_halted = last_math_unit_halted then + if reg_math_cycle_counter_plus_one = reg_math_cycle_compare then + math_unit_halted <= not last_math_unit_halted; -- disable calculation, enable writing to regs from CPU (disables counters) end if; - else - reg_math_latch_counter <= reg_math_latch_counter - 1; + reg_math_cycle_counter <= reg_math_cycle_counter_plus_one; end if; end if; + + -- handle resetting the cycle counter, as well as updating reg_math_cycle_counter_plus_one, to avoid a multiple drive situation + if reg_math_cycle_counter_reset_toggle /= last_reg_math_cycle_counter_reset_toggle then + reg_math_cycle_counter <= (others => '0'); + -- reg_math_cycle_counter_plus_one <= x"00000001"; + end if; + reg_math_cycle_counter_plus_one <= reg_math_cycle_counter + 1; + + -- We also provide some flags (which will later trigger interrupts) based + -- on the equality of math registers 14 and 15 + math_unit_flags(6) <= math_unit_equal_to; + math_unit_flags(5) <= math_unit_less_than; + math_unit_flags(4) <= math_unit_greater_than; + if reg_math_regs(14) = reg_math_regs(15) then + math_unit_equal_to <= '1'; + else + math_unit_equal_to <= '0'; + end if; + if reg_math_regs(14) < reg_math_regs(15) then + math_unit_less_than <= '1'; + else + math_unit_less_than <= '0'; + end if; + if reg_math_regs(14) > reg_math_regs(15) then + math_unit_greater_than <= '1'; + else + math_unit_greater_than <= '0'; + end if; + -- temp, maybe use $D7E1.7 as an interrupt indicate later? + math_unit_flags(7) <= '0'; end if; end process; @@ -2988,10 +3147,21 @@ begin &to_unsigned(reg_math_config(to_integer(the_read_address(3 downto 0))).output,4); -- @IO:GS $D7E0 MATH:LATCHINT Latch interval for latched outputs (in CPU cycles) -- $D7E1 is documented higher up - when x"E0" => return reg_math_latch_interval; - when x"E1" => return math_unit_flags; + when x"E0" => return to_unsigned(math_latch_address,4) & to_unsigned(reg_math_latch_intervals(math_latch_address),4); + when x"E1" => + if math_unit_flags(3) = '0' then + if math_unit_halted = last_math_unit_halted then + return math_unit_flags; + else + return math_unit_flags(7 downto 2) & "01"; + end if; + else + return math_unit_flags(7 downto 3) & math_unit_mult_out_shift(2 downto 0); + end if; -- @IO:GS $D7E2 MATH:RESERVED Reserved -- @IO:GS $D7E3 MATH:RESERVED Reserved + when x"E2" => return unsigned(math_unit_invert_b(7 downto 0)); + when x"E3" => return unsigned(math_unit_invert_b(15 downto 8)); --@IO:GS $D7E4 MATH:ITERCNT Iteration Counter (32 bit) --@IO:GS $D7E5 MATH:ITERCNT Iteration Counter (32 bit) --@IO:GS $D7E6 MATH:ITERCNT Iteration Counter (32 bit) @@ -3575,13 +3745,32 @@ begin reg_math_config(to_integer(long_address(3 downto 0))).output <= to_integer(value(3 downto 0)); elsif long_address(7 downto 0) = x"E0" then -- @IO:GS $D7E0 - Math unit latch interval (only update output of math function units every this many cycles, if they have the latch output flag set) - reg_math_latch_interval <= value; + math_latch_address <= to_integer(value(7 downto 4)); + math_latch_value <= to_integer(value(3 downto 0)); + math_latch_write_toggle <= not math_latch_write_toggle; elsif long_address(7 downto 0) = x"E1" then -- @IO:GS $D7E1 - Math unit general settings (writing also clears math cycle counter) -- @IO:GS $D7E1.0 MATH:WREN Enable setting of math registers (must normally be set) -- @IO:GS $D7E1.1 MATH:CALCEN Enable committing of output values from math units back to math registers (clearing effectively pauses iterative formulae) - math_unit_flags <= value; + math_unit_flags(3) <= value(3); + if value(3) = '1' then + if math_unit_flags(0) = '1' then + math_unit_mult_out_shift <= value(2 downto 0); + elsif math_unit_halted /= last_math_unit_halted then + math_unit_mult_out_shift <= value(2 downto 0); + math_unit_flags(1 downto 0) <= "01"; -- reset flags to halted state, since halted state is cleared. + end if; + else + math_unit_flags(2 downto 0) <= value(2 downto 0); + end if; -- reg_math_cycle_counter <= to_unsigned(0,32); -- TODO: Should generate a reg_math_cycle_counter_reset signal + reg_math_cycle_counter_reset_toggle <= not reg_math_cycle_counter_reset_toggle; + math_latch_reset_toggle <= not last_math_latch_reset_toggle; + last_math_unit_halted <= math_unit_halted; + elsif long_address(7 downto 0) = x"E2" then + math_unit_invert_b(7 downto 0) <= std_logic_vector(value); + elsif long_address(7 downto 0) = x"E3" then + math_unit_invert_b(15 downto 8) <= std_logic_vector(value); elsif long_address(7 downto 0) = x"E8" then reg_math_cycle_compare(7 downto 0) <= value; elsif long_address(7 downto 0) = x"E9" then @@ -4243,39 +4432,6 @@ begin else chipselect_enables <= x"EF"; end if; - - if math_unit_enable then - -- We also provide some flags (which will later trigger interrupts) based - -- on the equality of math registers 14 and 15 - if reg_math_regs(14) = reg_math_regs(15) then - math_unit_flags(6) <= '1'; - if math_unit_flags(3 downto 2) = "00" then - math_unit_flags(7) <= '1' ; - end if; - else - math_unit_flags(6) <= '0'; - if math_unit_flags(3 downto 2) = "11" then - math_unit_flags(7) <= '1' ; - end if; - end if; - if reg_math_regs(14) < reg_math_regs(15) then - math_unit_flags(5) <= '1'; - if math_unit_flags(3 downto 2) = "10" then - math_unit_flags(7) <= '1' ; - end if; - else - math_unit_flags(5) <= '0'; - end if; - if reg_math_regs(14) > reg_math_regs(15) then - math_unit_flags(4) <= '1'; - if math_unit_flags(3 downto 2) = "01" then - math_unit_flags(7) <= '1' ; - end if; - else - math_unit_flags(4) <= '0'; - end if; - end if; - end if; -- BEGINNING OF MAIN PROCESS FOR CPU diff --git a/src/vhdl/multiply32.vhdl b/src/vhdl/multiply32.vhdl index 58a612941..5221dcebf 100644 --- a/src/vhdl/multiply32.vhdl +++ b/src/vhdl/multiply32.vhdl @@ -28,16 +28,20 @@ use Std.TextIO.all; use work.debugtools.all; entity multiply32 is + generic ( + unit : integer range 0 to 15 + ); port ( clock : in std_logic; - unit : in integer range 0 to 15; do_add : in std_logic; + invert_b : in std_logic; input_a : in integer range 0 to 15; input_b : in integer range 0 to 15; input_value_number : in integer range 0 to 15; - input_value : unsigned(31 downto 0); - output_select : in integer range 0 to 15; - output_value : out unsigned(63 downto 0) + input_value : in unsigned(31 downto 0); + -- output_select : in integer range 0 to 15; + output_shift : in unsigned(2 downto 0); + output_value : out unsigned(63 downto 0) := (others => '0') ); end entity; @@ -73,7 +77,11 @@ begin if input_value_number = input_b then -- report "MATH: Unit #" & integer'image(unit) -- & ": Setting b=$" & to_hstring(input_value); - b <= signed(input_value); + if invert_b = '1' then + b <= -signed(input_value); + else + b <= signed(input_value); + end if; end if; -- Calculate the result @@ -82,28 +90,22 @@ begin p3 <= p2; p4 <= p3; p <= p4; - -- Even units do addition, odd ones do subtraction - if (unit mod 2) = 0 then - s <= to_unsigned(to_integer(a)+to_integer(b),33); - else - s <= to_unsigned(to_integer(a)-to_integer(b),33); - end if; - -- Display output value when requested, and tri-state outputs otherwise - if output_select = unit then - if do_add='1' then - -- Output sign-extended 33 bit addition result - output_value(63 downto 33) <= (others => s(32)); - output_value(32 downto 0) <= s; - report "MATH: Unit #" & integer'image(unit) - & " outputting addition sum $" & to_hstring(s); - else - output_value <= unsigned(p); --- report "MATH: Unit #" & integer'image(unit) --- & " outputting multiplication product $" & to_hstring(unsigned(p)); - end if; + -- Calculate sum of inputs + s <= unsigned((a(31) & a)+(b(31) & b)); + + -- Output result, stored in output register on the CPU side + if do_add='1' then + -- Output sign-extended 33 bit addition result + output_value(63 downto 33) <= (others => s(32)); + output_value(32 downto 0) <= s; + -- report "MATH: Unit #" & integer'image(unit) + -- & " outputting addition sum $" & to_hstring(s); else - output_value <= (others => 'Z'); + -- Output product shifted by the output shift + output_value <= shift_right(unsigned(p), to_integer(output_shift & "000")); + -- report "MATH: Unit #" & integer'image(unit) + -- & " outputting multiplication product $" & to_hstring(unsigned(p)); end if; end if; end process; diff --git a/src/vhdl/shifter32.vhdl b/src/vhdl/shifter32.vhdl index dace057d8..efc4ad0cc 100644 --- a/src/vhdl/shifter32.vhdl +++ b/src/vhdl/shifter32.vhdl @@ -28,16 +28,19 @@ use Std.TextIO.all; use work.debugtools.all; entity shifter32 is + generic ( + unit : integer range 0 to 15 + ); port ( clock : in std_logic; - unit : in integer range 0 to 15; do_add : in std_logic; + invert_b : in std_logic; input_a : in integer range 0 to 15; input_b : in integer range 0 to 15; input_value_number : in integer range 0 to 15; input_value : unsigned(31 downto 0); - output_select : in integer range 0 to 15; - output_value : out unsigned(63 downto 0) + -- output_select : in integer range 0 to 15; + output_value : out unsigned(63 downto 0) := (others => '0') ); end entity; @@ -59,16 +62,15 @@ begin a <= input_value; end if; if input_value_number = input_b then - b <= input_value; + if invert_b = '1' then + b <= unsigned(-signed(input_value)); + else + b <= input_value; + end if; end if; - -- Calculate the result - -- Even units do addition, odd ones do subtraction - if (unit mod 2) = 0 then - s <= to_unsigned(to_integer(a)+to_integer(b),33); - else - s <= to_unsigned(to_integer(a)-to_integer(b),33); - end if; + -- Calculate sum of inputs + s <= unsigned((a(31) & a)+(b(31) & b)); if b(7 downto 0) = x"00" then p(63 downto 32) <= (others => '0'); @@ -86,17 +88,14 @@ begin end if; end if; - -- Display output value when requested, and tri-state outputs otherwise - if output_select = unit then - if do_add='1' then - -- Output sign-extended 33 bit addition result - output_value(63 downto 33) <= (others => s(32)); - output_value(32 downto 0) <= s; - else - output_value <= p; - end if; + -- Output result, stored in output register on the CPU side + if do_add='1' then + -- Output sign-extended 33 bit addition result + output_value(63 downto 33) <= (others => s(32)); + output_value(32 downto 0) <= s; else - output_value <= (others => 'Z'); + -- Output shifted result + output_value <= p; end if; end if; end process;