ablab
diff --git a/‎src/assignment_aggregator.py‎
Lines changed: 11 additions & 10 deletions b/‎src/assignment_aggregator.py‎
Lines changed: 11 additions & 10 deletions
diff --git a/‎src/dataset_processor.py‎
Lines changed: 9 additions & 1 deletion b/‎src/dataset_processor.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎src/graph_based_model_construction.py‎
Lines changed: 8 additions & 8 deletions b/‎src/graph_based_model_construction.py‎
Lines changed: 8 additions & 8 deletions
@@ -30,9 +30,9 @@
 
 
 class ReadAssignmentAggregator:
-    def __init__(self, args, sample, read_groups, gffutils_db=None, chr_id=None, gzipped=False, grouping_strategy_names=None):
+    def __init__(self, args, sample, string_pools, gffutils_db=None, chr_id=None, gzipped=False, grouping_strategy_names=None):
         self.args = args
-        self.read_groups = read_groups
+        self.string_pools = string_pools
         self.grouping_strategy_names = grouping_strategy_names if grouping_strategy_names else ["default"]
         self.common_header = "# Command line: " + args._cmd_line + "\n# IsoQuant version: " + args._version + "\n"
         self.io_support = IOSupport(self.args)
@@ -95,8 +95,9 @@ def __init__(self, args, sample, read_groups, gffutils_db=None, chr_id=None, gzi
         if self.args.count_exons and self.args.genedb:
             exon_counts_path = sample.get_exon_counts_file(chr_id) if chr_id else sample.out_exon_counts_tsv
             intron_counts_path = sample.get_intron_counts_file(chr_id) if chr_id else sample.out_intron_counts_tsv
-            self.exon_counter = ExonCounter(exon_counts_path, ignore_read_groups=True)
-            self.intron_counter = IntronCounter(intron_counts_path, ignore_read_groups=True)
+            # string_pools=None means ungrouped counting
+            self.exon_counter = ExonCounter(exon_counts_path)
+            self.intron_counter = IntronCounter(intron_counts_path)
             self.global_counter.add_counters([self.exon_counter, self.intron_counter])
 
         if self.args.read_group and self.args.genedb:
@@ -112,12 +113,12 @@ def __init__(self, args, sample, read_groups, gffutils_db=None, chr_id=None, gzi
                 gene_counter = create_gene_counter(gene_out_file,
                                                    self.args.gene_quantification,
                                                    complete_feature_list=self.gene_set,
-                                                   read_groups=self.read_groups[group_idx],
+                                                   string_pools=self.string_pools,
                                                    group_index=group_idx)
                 transcript_counter = create_transcript_counter(transcript_out_file,
                                                               self.args.transcript_quantification,
                                                               complete_feature_list=self.transcript_set,
-                                                              read_groups=self.read_groups[group_idx],
+                                                              string_pools=self.string_pools,
                                                               group_index=group_idx)
 
                 self.global_counter.add_counters([gene_counter, transcript_counter])
@@ -129,8 +130,8 @@ def __init__(self, args, sample, read_groups, gffutils_db=None, chr_id=None, gzi
                     else:
                         exon_out_file = f"{sample.out_exon_grouped_counts_tsv}_{strategy_name}"
                         intron_out_file = f"{sample.out_intron_grouped_counts_tsv}_{strategy_name}"
-                    exon_counter = ExonCounter(exon_out_file, group_index=group_idx)
-                    intron_counter = IntronCounter(intron_out_file, group_index=group_idx)
+                    exon_counter = ExonCounter(exon_out_file, string_pools=self.string_pools, group_index=group_idx)
+                    intron_counter = IntronCounter(intron_out_file, string_pools=self.string_pools, group_index=group_idx)
                     self.global_counter.add_counters([exon_counter, intron_counter])
 
         if self.args.read_group and not self.args.no_model_construction:
@@ -145,12 +146,12 @@ def __init__(self, args, sample, read_groups, gffutils_db=None, chr_id=None, gzi
                 transcript_model_counter = create_transcript_counter(
                     transcript_model_out_file,
                     self.args.transcript_quantification,
-                    read_groups=self.read_groups[group_idx],
+                    string_pools=self.string_pools,
                     group_index=group_idx)
                 gene_model_counter = create_gene_counter(
                     gene_model_out_file,
                     self.args.gene_quantification,
-                    read_groups=self.read_groups[group_idx],
+                    string_pools=self.string_pools,
                     group_index=group_idx)
 
                 self.transcript_model_global_counter.add_counter(transcript_model_counter)
 
@@ -38,6 +38,7 @@
 from .barcode_calling.umi_filtering import create_transcript_info_dict
 from .table_splitter import split_read_table_parallel
 from .assignment_aggregator import ReadAssignmentAggregator
+from .string_pools import setup_string_pools
 from .parallel_workers import (
     collect_reads_in_parallel,
     construct_models_in_parallel,
@@ -368,8 +369,15 @@ def process_assigned_reads(self, sample, dump_filename):
         logger.info("Transcript models construction is turned %s" %
                     ("off" if self.args.no_model_construction else "on"))
 
+        # Build string pools for the merge phase (to convert group names <-> IDs)
+        # This must match the pools used by parallel workers
+        string_pools = None
+        if self.args.read_group:
+            string_pools = setup_string_pools(self.args, sample, chr_ids, chr_id=None,
+                                              load_barcode_pool=False, load_tsv_pools=False)
+
         # set up aggregators and outputs
-        aggregator = ReadAssignmentAggregator(self.args, sample, self.all_read_groups, gzipped=self.args.gzipped,
+        aggregator = ReadAssignmentAggregator(self.args, sample, string_pools, gzipped=self.args.gzipped,
                                              grouping_strategy_names=self.grouping_strategy_names)
         transcript_stat_counter = EnumStats()
 
 
@@ -162,22 +162,22 @@ def forward_counts(self, read_assignments):
             for read_assignment in self.transcript_read_ids[transcript_id]:
                 read_id = read_assignment.read_id
                 if self.read_assignment_counts[read_id] == 1:
-                    # Add to ungrouped counters
-                    self.transcript_counter.add_read_info_raw(read_id, [transcript_id], read_assignment.read_group)
-                    self.gene_counter.add_read_info_raw(read_id, [gene_id], read_assignment.read_group)
+                    # Add to ungrouped counters - use read_group_ids (integers) directly
+                    self.transcript_counter.add_read_info_raw(read_id, [transcript_id], read_assignment.read_group_ids)
+                    self.gene_counter.add_read_info_raw(read_id, [gene_id], read_assignment.read_group_ids)
                     continue
 
                 if read_id not in ambiguous_assignments:
-                    ambiguous_assignments[read_id] = [read_assignment.read_group]
+                    ambiguous_assignments[read_id] = [read_assignment.read_group_ids]
                 ambiguous_assignments[read_id].append(transcript_id)
 
         for read_id in ambiguous_assignments.keys():
-            read_groups = ambiguous_assignments[read_id][0]
+            read_group_ids = ambiguous_assignments[read_id][0]
             transcript_ids = ambiguous_assignments[read_id][1:]
             gene_ids = [transcript2gene[transcript_id] for transcript_id in transcript_ids]
-            # Add to ungrouped counters
-            self.transcript_counter.add_read_info_raw(read_id, transcript_ids, read_groups)
-            self.gene_counter.add_read_info_raw(read_id, gene_ids, read_groups)
+            # Add to ungrouped counters - use read_group_ids (integers) directly
+            self.transcript_counter.add_read_info_raw(read_id, transcript_ids, read_group_ids)
+            self.gene_counter.add_read_info_raw(read_id, gene_ids, read_group_ids)
 
         for r in read_assignments:
             if self.read_assignment_counts[r.read_id] > 0: continue