|
9 | 9 | import re |
10 | 10 | from array import array |
11 | 11 |
|
| 12 | + |
| 13 | +class _ChromosomeMemmapWriter: |
| 14 | + """Stream sample matrices into byte-aligned files for later memmap usage.""" |
| 15 | + |
| 16 | + def __init__(self, filename, n_cols): |
| 17 | + """ |
| 18 | + Args: |
| 19 | + filename (str): Destination path for the raw binary matrix. |
| 20 | + n_cols (int): Number of sample columns stored per row. |
| 21 | + """ |
| 22 | + self.filename = filename |
| 23 | + self.n_cols = n_cols |
| 24 | + self.handle = open(filename, 'wb') |
| 25 | + self.rows = 0 |
| 26 | + |
| 27 | + def add_row(self, values): |
| 28 | + """ |
| 29 | + Append a single row of sample indicators to the binary file. |
| 30 | +
|
| 31 | + Args: |
| 32 | + values (Iterable[str]): Raw string values from the TSV columns. |
| 33 | + """ |
| 34 | + row_array = array( |
| 35 | + 'b', |
| 36 | + (1 if value not in ('', '0', '0.0', '.', 'False') else 0 for value in values) |
| 37 | + ) |
| 38 | + if len(row_array) != self.n_cols: |
| 39 | + raise ValueError(f'mismatched memmap width: expected {self.n_cols}, got {len(row_array)}') |
| 40 | + row_array.tofile(self.handle) |
| 41 | + self.rows += 1 |
| 42 | + |
| 43 | + def finalize(self): |
| 44 | + """Close file handle and create the companion .done flag.""" |
| 45 | + self.handle.close() |
| 46 | + open(self.filename + '.done', 'w').close() |
| 47 | + |
| 48 | + def __del__(self): |
| 49 | + """Ensure file handle closes if finalize is not called explicitly.""" |
| 50 | + if not self.handle.closed: |
| 51 | + self.handle.close() |
| 52 | + |
12 | 53 | def get_k_new(k, chromosomes_genome, chromosomes_genome_description): |
13 | 54 | '''k is chromosome name. return chromosome name based on chromosomes_genome, chromosomes_genome_description |
14 | 55 | deal with RefSeq IDs |
@@ -459,46 +500,6 @@ def splitMutationByChromosomeLarge(self, chromosomes_genome_description=None, ch |
459 | 500 | open(file_splitMutationByChromosomeLarge_done,'w').write('\n'.join(chromosomes_mutation)) |
460 | 501 | return chromosomes_mutation |
461 | 502 |
|
462 | | -class _ChromosomeMemmapWriter: |
463 | | - """Stream sample matrices into byte-aligned files for later memmap usage.""" |
464 | | - |
465 | | - def __init__(self, filename, n_cols): |
466 | | - """ |
467 | | - Args: |
468 | | - filename (str): Destination path for the raw binary matrix. |
469 | | - n_cols (int): Number of sample columns stored per row. |
470 | | - """ |
471 | | - self.filename = filename |
472 | | - self.n_cols = n_cols |
473 | | - self.handle = open(filename, 'wb') |
474 | | - self.rows = 0 |
475 | | - |
476 | | - def add_row(self, values): |
477 | | - """ |
478 | | - Append a single row of sample indicators to the binary file. |
479 | | -
|
480 | | - Args: |
481 | | - values (Iterable[str]): Raw string values from the TSV columns. |
482 | | - """ |
483 | | - row_array = array( |
484 | | - 'b', |
485 | | - (1 if value not in ('', '0', '0.0', '.', 'False') else 0 for value in values) |
486 | | - ) |
487 | | - if len(row_array) != self.n_cols: |
488 | | - raise ValueError(f'mismatched memmap width: expected {self.n_cols}, got {len(row_array)}') |
489 | | - row_array.tofile(self.handle) |
490 | | - self.rows += 1 |
491 | | - |
492 | | - def finalize(self): |
493 | | - """Close file handle and create the companion .done flag.""" |
494 | | - self.handle.close() |
495 | | - open(self.filename + '.done', 'w').close() |
496 | | - |
497 | | - def __del__(self): |
498 | | - """Ensure file handle closes if finalize is not called explicitly.""" |
499 | | - if not self.handle.closed: |
500 | | - self.handle.close() |
501 | | - |
502 | 503 | def splitGtfByChromosomes(self,dc_protein2chr): |
503 | 504 | '''split gtf file based on chromosome. only keep proteins in file_protein |
504 | 505 | ''' |
|
0 commit comments