Add Marc4 decoder

asander-ccsw · travisgoodspeed · commit 4efe3ada80cb · 2026-01-30T18:11:07.000-05:00
* Add Affine decoder, a class that can concisely implement many different decoding orders. A high-level introduction to the affine decoder is in `affinedecoder.h`, while an example of it being used is in `gatodecodermarc4.cpp`. * Implement Marc4 decoder using Affine decoder, resolving #67.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -90,6 +90,8 @@ set(GATOROM_SOURCES
     gatodecodertlcsfont.h gatodecodertlcsfont.cpp # TMP47C434N Font ROM
     gatodecoderz86x1.h gatodecoderz86x1.cpp # Zilog Z8 Z86x1
     gatodecodercolsdownlswap.h gatodecodercolsdownlswap.cpp # Used in NEC uCOM4 Micros
+    gatodecodermarc4.h gatodecodermarc4.cpp
+    affinedecoder.h # generic affine decoder
     # Decoder named after Zorrom strategies.
     gatodecodercolsdownr.h gatodecodercolsdownr.cpp   # Top-to-bottom then left to right, 8-bits.
     gatodecodercolsdownl.h gatodecodercolsdownl.cpp
diff --git a/affinedecoder.h b/affinedecoder.h
@@ -0,0 +1,182 @@
+#ifndef AFFINEDECODER_H
+#define AFFINEDECODER_H
+
+#include "gatorom.h"
+#include <array>
+/* This is a general purpose "affine decoder". It aims to
+make a template for a good variety of decoder bit patterns
+with only small code changes. There are 3 arrays used to construct it:
+- counts: each entry indicates how many times the stride should be repeated
+- col_strides: how much to increment col by each time this stride is taken
+- row_strides: how much to increment row by each time this stride is taken
+
+The strides act like an affine map, the column increment and row increment are
+multiplied by an index vector to produce a row offset and column offset.
+The index vector increments based on the counts, incrementing idx_vec[0]
+until it reaches count[0], at which point it "carries" over to increment
+idx_vec[1]. As an example, suppose counts = [2, 3, 4]. The index vector would
+increment through: [0,0,0] [1,0,0] [0,1,0] [1,1,0] [0,2,0] [1,2,0] [0,0,1]
+
+Combining this with the row and column strides, it's possible to represent many
+interleaved and non-interleaved bit-read patterns.
+
+A simple layout:
+1234
+5678
+
+would look like
+counts: [4, 2]
+col_strides: [1, 0]
+row_strides: [0, 1]
+
+and would iterate as
+idx vector: col, row
+00: (0*1)+(0*0), (0*0)+(0*1)
+...
+30: (3*1)+(0*0), (3*0)+(0*1)
+01: (0*1)+(1*0), (0*0)+(1*1)
+...
+31: (3*1)+(1*0), (3*0)+(1*1)
+
+
+colsdownl can be represented with:
+initial col offset: numcols-numcols/wordsize
+initial row offset: 0
+counts     : [wordsize        , numrows, numcols/wordsize]
+col_strides: [-numcols/wordsize, 0      , 1]
+row_strides: [0               , 1      , 0]
+
+so for example with wordsize=2, numrows=4, numcols=4:
+1908
+3b2a
+5d4c
+7f6e
+
+so:
+counts: [2,4,2]
+col_strides: [-2,0,1]
+row_strides: [0,1,0]
+idx: col, row
+000: 2, 0
+100: 2-2+0+0, 0+0+0
+010: 2+0+0+0, 0+1+0
+110: 2-2+0+0, 0+1+0
+020: 2+0+0+0, 0+2+0
+120: 0,2
+030: 2,3
+130: 0,3
+001: 2+0+0+1, 0+0+0
+101: 2-2+0+1, 0+0+0
+...
+
+*/
+template <size_t N> class AffineDecoder : public GatoDecoder {
+private:
+  std::array<unsigned int, N> counts;
+  std::array<int, N> row_strides;
+  std::array<int, N> col_strides;
+  int row_start;
+  int col_start;
+
+public:
+  AffineDecoder(std::array<unsigned int, N> counts,
+                std::array<int, N> row_strides, std::array<int, N> col_strides,
+                int row_start, int col_start);
+  void decode(GatoROM *gr);
+  void inc_idx_counts(std::array<unsigned int, N> &idx_counts);
+  int get_row_offset(std::array<unsigned int, N> &idx_counts);
+  int get_col_offset(std::array<unsigned int, N> &idx_counts);
+};
+
+template <size_t N>
+AffineDecoder<N>::AffineDecoder(std::array<unsigned int, N> counts,
+                                std::array<int, N> row_strides,
+                                std::array<int, N> col_strides, int row_start,
+                                int col_start) {
+
+  this->row_start = row_start;
+  this->col_start = col_start;
+  this->name = "Raw Affine Decoder";
+  this->counts = counts;
+  this->row_strides = row_strides;
+  this->col_strides = col_strides;
+}
+
+template <size_t N>
+int AffineDecoder<N>::get_col_offset(std::array<unsigned int, N> &idx_counts) {
+  int col_offset = this->col_start;
+  for (size_t i = 0; i < N; i++) {
+    col_offset += idx_counts[i] * this->col_strides[i];
+  }
+  return col_offset;
+}
+
+template <size_t N>
+int AffineDecoder<N>::get_row_offset(std::array<unsigned int, N> &idx_counts) {
+  int row_offset = this->row_start;
+  for (size_t i = 0; i < N; i++) {
+    row_offset += idx_counts[i] * this->row_strides[i];
+  }
+  return row_offset;
+}
+
+template <size_t N>
+void AffineDecoder<N>::inc_idx_counts(std::array<unsigned int, N> &idx_counts) {
+  size_t cur_idx = 0;
+  idx_counts[cur_idx]++;
+  cur_idx++;
+  while (cur_idx < N &&
+         (idx_counts[cur_idx - 1] == this->counts[cur_idx - 1])) {
+    idx_counts[cur_idx - 1] = 0;
+    idx_counts[cur_idx]++;
+    cur_idx++;
+  }
+};
+
+template <size_t N> void AffineDecoder<N>::decode(GatoROM *gr) {
+
+  uint32_t adr = 0;
+  QByteArray ba, bad; // data and damage
+
+  int wordsize = gr->wordsize;
+  std::array<unsigned int, N> idx_counts = {0};
+  gr->eval();
+
+  unsigned int num_bits = gr->outputrows * gr->outputcols;
+  if (num_bits % wordsize != 0) {
+    return;
+  }
+  // we'll go bit-by-bit, grouping by words
+  unsigned int num_words = num_bits / wordsize;
+  for (int word = 0; word < num_words; word++) {
+    uint32_t w = 0, wd = 0;
+
+    for (int bit = wordsize - 1; bit >= 0; bit--) {
+      unsigned int row = this->get_row_offset(idx_counts);
+      unsigned int col = this->get_col_offset(idx_counts);
+      this->inc_idx_counts(idx_counts);
+      GatoBit *B = gr->outputbit(row, col);
+      assert(B);
+
+      B->adr = adr;
+      B->mask = 1 << bit;
+      if (B->getVal())
+        w |= B->mask;
+      if (B->ambiguous)
+        wd |= B->mask;
+    }
+
+    // This is implicitly little endian
+    for (int bitcount = wordsize; bitcount > 0; bitcount -= 8) {
+      ba.append(w & 0xFF);
+      w = w >> 8;
+      bad.append(wd & 0xff);
+      wd = wd >> 8;
+
+      adr++;
+    }
+  }
+  gr->decoded = ba;
+  gr->decodedDamage = bad;
+}
+#endif // AFFINEDECODER_H
diff --git a/gatodecodermarc4.cpp b/gatodecodermarc4.cpp
@@ -0,0 +1,45 @@
+#include "gatodecodermarc4.h"
+#include "affinedecoder.h"
+#include <array>
+
+/* This decoder is based on the layout of MARC4 mask ROMS
+ * MARC4 ROMS are split up into "meta columns" where each
+ * meta column contains 2 interleaved columns of words.
+ * Specifically, on MARC4, each meta column consists of
+ * 16 columns and 2 words. A word consists of bits from
+ * every-other column in a meta column, with the next word
+ * being on the row below the current word. Once the last row
+ * has been read, the next word is at the top of the next column,
+ * or the top of the next meta-column, if this is the last
+ * column in the current meta-column. For example, with a
+ * word size of 2 bits and 2 meta columns, and 4 rows, the bit ordering
+ * would look like:
+ * 0 4 0 4 8 c 8 c
+ * 1 5 1 5 9 d 9 d
+ * 2 6 2 6 a e a e
+ * 3 7 3 7 b f b f
+ * In the above chart, number indicates which word that bit is a member of.
+ * The dimensions of the ROM and word size for the chart above are smaller than
+ * are realistic in order to keep the chart small. With a word size of 8 and 4
+ * rows, the leftmost meta-column would have bit indices:
+ * 0 4 0 4 0 4 0 4 0 4 0 4 0 4 0 4
+ * 1 5 1 5 1 5 1 5 1 5 1 5 1 5 1 5
+ * 2 6 2 6 2 6 2 6 2 6 2 6 2 6 2 6
+ * 3 7 3 7 3 7 3 7 3 7 3 7 3 7 3 7
+ */
+GatoDecoderMarc4::GatoDecoderMarc4() { name = "marc4"; }
+
+void GatoDecoderMarc4::decode(GatoROM *gr) {
+  unsigned int nrows = gr->outputrows;
+  unsigned int ncols = gr->outputcols;
+  unsigned int wordsize = gr->wordsize;
+  if (ncols % (2 * wordsize) != 0) {
+    return;
+  }
+  std::array<unsigned int, 4> counts = {wordsize, nrows, 2, ncols / 16};
+  std::array<int, 4> col_strides = {2, 0, 1, (int)wordsize * 2};
+  std::array<int, 4> row_strides = {0, 1, 0, 0};
+  AffineDecoder<4> root_decode =
+      AffineDecoder(counts, row_strides, col_strides, 0, 0);
+  return root_decode.decode(gr);
+}
diff --git a/gatodecodermarc4.h b/gatodecodermarc4.h
@@ -0,0 +1,13 @@
+#ifndef GATODECODERMARC4_H
+#define GATODECODERMARC4_H
+
+#include "affinedecoder.h"
+#include "gatorom.h"
+
+class GatoDecoderMarc4 : public GatoDecoder {
+public:
+  GatoDecoderMarc4();
+  void decode(GatoROM *gr);
+};
+
+#endif // GATODECODERMARC4_H
diff --git a/gatomain.cpp b/gatomain.cpp
@@ -19,6 +19,7 @@
 #include "gatodecodertlcsfont.h"
 #include "gatodecoderz86x1.h"
 #include "gatodecodercolsdownlswap.h" // NEC uCOM4
+#include "gatodecodermarc4.h" //MARC4
 
 //Zorrom compatibility.
 #include "gatodecodercolsdownr.h"
@@ -186,6 +187,11 @@ int main(int argc, char *argv[]) {
                                        );
     parser.addOption(zorromOption);
 
+    QCommandLineOption marc4Option(QStringList()<<"decode-marc4",
+                                       "Decodes column-major, 2-bit column interleave as for MARC4 ROM"
+                                       );
+    parser.addOption(marc4Option);
+
     //banking
     QCommandLineOption leftbankOption(QStringList()<<"leftbank",
                                       "Only the left half of the bits."
@@ -389,6 +395,8 @@ int main(int argc, char *argv[]) {
             gr->decoder=new GatoDecoderColsRight();
         else if(parser.isSet(squeezelrOption))
             gr->decoder=new GatoDecoderSqueezeLR();
+        else if (parser.isSet(marc4Option))
+            gr->decoder = new GatoDecoderMarc4();
 
 
 
diff --git a/gatorom.cpp b/gatorom.cpp
@@ -10,6 +10,7 @@
 #include "gatodecodertlcsfont.h"
 #include "gatodecoderz86x1.h"
 #include "gatodecodercolsdownlswap.h" // NEC uCOM4
+#include "gatodecodermarc4.h" //MARC4
 //Zorrom compatibility.
 #include "gatodecodercolsdownr.h"
 #include "gatodecodercolsdownl.h"
@@ -184,6 +185,8 @@ void GatoROM::setDecoderByName(QString name){
         decoder=new GatoDecoderColsRight();
     else if(name=="squeeze-lr")
         decoder=new GatoDecoderSqueezeLR();
+    else if(name=="marc4")
+        decoder=new GatoDecoderMarc4();
     else
         qDebug()<<"Unknown decoder"<<name;
 }
diff --git a/gatosolver.cpp b/gatosolver.cpp
@@ -11,7 +11,7 @@
 #include "gatodecodertlcsfont.h"
 #include "gatodecodercolsdownlswap.h"  //NEC uCOM4
 #include "gatodecoderz86x1.h"
-
+#include "gatodecodermarc4.h" // marc4
 
 // Default solver is a dummy state that covers everything.
 GatoGraderAll::GatoGraderAll(){}
@@ -41,8 +41,8 @@ GatoSolver::GatoSolver(GatoROM *rom, GatoGrader *grader){
     decoders[5]=new GatoDecoderTLCSFont();
     decoders[6]=new GatoDecoderZ86x1();
     decoders[7]=new GatoDecoderColsDownLSwap(); //NEC uCOM4
+    decoders[8]=new GatoDecoderMarc4();
     //Remainder of table must be null.
-    decoders[8]=0;
     decoders[9]=0;
     decoders[10]=0;
     decoders[11]=0;
diff --git a/gatotests/Makefile b/gatotests/Makefile
@@ -5,11 +5,13 @@ all:
 	make -C gameboy all
 	make -C seanriddle all
 	make -C z8 all
+	make -C marc4 all
 clean:
 	make -C zorrom clean
 	make -C arm6 clean
 	make -C tlcs clean
 	make -C gameboy clean
 	make -C seanriddle clean
 	make -C z8 clean
+	make -C marc4 clean
 
diff --git a/gatotests/marc4/Makefile b/gatotests/marc4/Makefile
@@ -0,0 +1,10 @@
+all: test
+
+marc4-topleft-bits.bin:
+	gatorom --decode-marc4 marc4-topleft-bits.txt -o marc4-topleft-bits.bin
+
+test: marc4-topleft-bits.bin
+	md5sum -c md5.txt
+
+clean:
+	rm -f *.bin
diff --git a/gatotests/marc4/marc4-topleft-bits.txt b/gatotests/marc4/marc4-topleft-bits.txt
@@ -0,0 +1,47 @@
+0001010101000111101001110101111000100111111111100000111111111100
+0111011101000010010001110101011010100101010101100000111111111100
+0100101111111100101001011101010000001111111111000000111111111100
+0101111111111000101001111101110000001111111111000000111111111100
+0101101010101000101011010101010000001111111111000000111111111100
+0101101111101001111110010100010000001111111111000000111111111100
+0101111110111000100000100010101000001111111111000000111111111100
+0101101010101000111000110100100100001111111111000000111111111100
+1111001110001100010010111010100000001111111111000000111111111100
+0000000000010001000011111111100000001111111111000000111111111100
+1101001101101000010010111110110100001111111111000000111111111100
+1101001101000000010010111010100000001111111111000000111111111100
+0101111011110011010010111010100000001111111111000000111111111100
+1100011111111011000010101010100000001111111111000000111111111100
+1000001010101010111000111101100000001111111111000000111111111100
+1110001101000011111010010100000000001111111111000000111111111100
+1110001101000110101011010101010000001111111111000000111111111100
+1010011101010010101011010101010000001111111111000000111111111100
+0110000111101101101001111101010000001111111111000000111111111100
+1110100100101100101001111101011000001111111111000000111111111100
+0100000101100110100001110111010000001111111111000000111111111100
+0000111011001001111101100000111000001111111111000000111111111100
+0100010111110111111101110000101100001111111111000000111111111100
+0101000101000100110000100110000000001111111111000000111111111100
+1000011010111001000010111110100100001111111111000000111111111100
+1100001100000101111010010101100100001111111111000000111111111100
+1111011110001101111110110001001100011111111111000000111111111100
+1010110111100000100100011100000100001111111111000000111111111100
+1100001101110010010101101001011000001111111111000000111111111100
+0100100110000110110000101000010100001111111111000000111111111100
+1101011011100110110101001011000100001111111111000000111111111100
+0000000100000001111010110101100100001111111111000000111111111100
+1101001001101000010010111111110100001111111111000000111111111100
+1101001110000000010110111010100100001111111111000000111111111100
+0001111010010010000110111111110000001111111111000000111111111100
+1101011010000111111101111100100000001111111111000000111111111100
+1100001010001111010000010100011000001111111111000000111111111100
+0101000101010001100100110110100100001111111111000000111111111100
+1100001100110111110101100001011000001111111111000000111111111100
+1110100000000101010010001000011100001111111111000000111111111100
+1101011000100011110101101010001100001111111111000000111111111100
+0100000101010101010000010001010100001111111111000000111111111100
+1100001101101101110000110111110100011111111111000000111111111100
+1000001001000001110100111000000100001111111111000000111111111100
+1110001101000110000111101001001000001111111111000000111111111111
+0111001100000011010101010100000000001111111111100000111111111100
+1111101110000001110000110111001000001111111111000000111111111100
diff --git a/gatotests/marc4/md5.txt b/gatotests/marc4/md5.txt
@@ -0,0 +1 @@
+65827eb3a559ac08d86c4a7540f5671d  marc4-topleft-bits.bin
diff --git a/romdecodedialog.ui b/romdecodedialog.ui