|
| 1 | +import os |
| 2 | + |
| 3 | +def load_reference_file(filename): |
| 4 | + """Load a reference file into a set of tags.""" |
| 5 | + with open(filename, 'r', encoding='utf-8') as f: |
| 6 | + return {line.strip() for line in f if line.strip()} |
| 7 | + |
| 8 | +def main(): |
| 9 | + # Load all reference files |
| 10 | + artist_tags = load_reference_file('artist.txt') |
| 11 | + character_tags = load_reference_file('character.txt') |
| 12 | + copyright_tags = load_reference_file('copyright.txt') |
| 13 | + meta_tags = load_reference_file('meta.txt') |
| 14 | + |
| 15 | + # Process the final.txt file |
| 16 | + with open('final.txt', 'r', encoding='utf-8') as infile, \ |
| 17 | + open('output.txt', 'w', encoding='utf-8') as outfile: |
| 18 | + |
| 19 | + for line in infile: |
| 20 | + line = line.strip() |
| 21 | + if not line: |
| 22 | + continue |
| 23 | + |
| 24 | + # Check if this is an UNCATEGORIZED tag |
| 25 | + if line.startswith('[UNCATEGORIZED]'): |
| 26 | + tag = line[15:].strip() # Remove '[UNCATEGORIZED] ' prefix |
| 27 | + |
| 28 | + # Determine the new category |
| 29 | + if tag in artist_tags: |
| 30 | + new_line = f'[artist] {tag}' |
| 31 | + elif tag in character_tags: |
| 32 | + new_line = f'[character] {tag}' |
| 33 | + elif tag in copyright_tags: |
| 34 | + new_line = f'[copyright] {tag}' |
| 35 | + elif tag in meta_tags: |
| 36 | + new_line = f'[meta] {tag}' |
| 37 | + else: |
| 38 | + # Keep as UNCATEGORIZED if not found in any reference file |
| 39 | + new_line = line |
| 40 | + |
| 41 | + outfile.write(new_line + '\n') |
| 42 | + else: |
| 43 | + # Write the line unchanged if it's not an UNCATEGORIZED tag |
| 44 | + outfile.write(line + '\n') |
| 45 | + |
| 46 | +if __name__ == '__main__': |
| 47 | + main() |
0 commit comments