11#!/usr/bin/env python
22"""Parses an i18n Java properties file and returns the data as a dictionary.
33
4- The benefit of this method over using configparser is that the whitespace in
4+ If called with remove_backslashes=False, then the whitespace in
55multiline values is preserved.
66
7+ If called with remove_backslashes=True, then configparser is used
8+ and the whitespace and backslashes in multiline values are removed.
9+
710Note that this method does not work properly for multiline translations
811with an "=" character in them.
9-
10- See related question: https://stackoverflow.com/questions/76047202
1112"""
1213
1314import argparse
15+ import configparser
16+ import os
1417from pathlib import Path
1518
1619
17- def parse_i18n_file (file_path ):
20+ def parse_i18n_file (file_path , remove_backslashes = False ):
1821 """Parses an i18n Java properties file and returns the data as a dictionary.
1922
2023 Note that this method does not work properly for multiline translations
2124 with an "=" character in them.
2225
2326 Keyword arguments:
2427 file_path -- filepath of the i18n Java properties file to parse
28+ remove_backslashes -- when true, the data returned will not have the
29+ backslashes used in multiline values.
30+ Multiline values will be transformed into single line values.
2531 """
2632 if not Path (file_path ).exists ():
2733 raise FileNotFoundError (f"File { file_path } does not exist" )
@@ -60,6 +66,77 @@ def parse_i18n_file(file_path):
6066 f"It has at least one duplicate key: { duplicate_keys } "
6167 )
6268
69+ if remove_backslashes :
70+ # Now that we've ensure the file has no duplicate properties, return
71+ # the data as a dictionary with multiline values transformed into
72+ # single line values.
73+ return parse_i18n_file_without_backslashes (file_path )
74+
75+ return data
76+
77+
78+ def convert_properties_to_ini (input_path , ini_path ):
79+ """Reads a properties file and writes it as an .ini file with a [DEFAULT] section
80+ header to make it compatible with configparser.
81+
82+ Keyword arguments:
83+ input_path -- filepath of the i18n Java properties file to convert
84+ ini_path -- filepath of the output .ini file
85+ """
86+ with open (input_path , "r" , encoding = "utf-8" ) as infile , open (
87+ ini_path , "w" , encoding = "utf-8"
88+ ) as outfile :
89+ # Add a dummy section header
90+ outfile .write ("[DEFAULT]\n " )
91+ outfile .writelines (infile .readlines ())
92+
93+
94+ def merge_multiline_string (multiline_string ):
95+ """Takes a multiline string as input, removes the backslashes at the
96+ end of each line, and returns a single line string.
97+ """
98+ # Split the string into lines and strip any leading/trailing whitespace
99+ # from each line
100+ lines = multiline_string .splitlines ()
101+ # Remove the backslash from the end of each line,
102+ # and join the lines into a single string
103+ merged_string = " " .join (line .rstrip ("\\ " ).strip () for line in lines )
104+ return merged_string
105+
106+
107+ def parse_i18n_file_without_backslashes (file_path ):
108+ """Parses an i18n Java properties file and returns the data as a dictionary.
109+ Multiline values will be transformed into single line values with the
110+ backslashes removed.
111+
112+ Note that this method does not work properly for multiline translations
113+ with an "=" character in them.
114+
115+ Keyword arguments:
116+ file_path -- filepath of the i18n Java properties file to parse
117+ """
118+ if not Path (file_path ).exists ():
119+ raise FileNotFoundError ("File {0} does not exist" .format (file_path ), file_path )
120+
121+ # Convert the properties file into a temporary .ini file
122+ ini_file_path = "temp_file.ini"
123+ convert_properties_to_ini (file_path , ini_file_path )
124+
125+ # Parse the temporary .ini file
126+ # Use RawConfigParser to avoid any interpolation or automatic conversions
127+ config = configparser .RawConfigParser (empty_lines_in_values = False )
128+ # Override the optionxform method to prevent lowercase conversion of the keys
129+ config .optionxform = str # type: ignore
130+
131+ config .read (ini_file_path , encoding = "utf-8" )
132+ # Clean up the temporary .ini file
133+ os .remove (ini_file_path )
134+
135+ data = {}
136+ for key , value in config ["DEFAULT" ].items ():
137+ merged_string = merge_multiline_string (value )
138+ data [key ] = merged_string
139+
63140 return data
64141
65142
@@ -80,8 +157,17 @@ def main():
80157 "Can be specified as a relative or absolute file path."
81158 ),
82159 )
160+ parser .add_argument (
161+ "-r" ,
162+ "--remove_backslashes" ,
163+ action = "store_true" ,
164+ help = (
165+ "the data returned will not have the "
166+ "backslashes used in multiline values."
167+ ),
168+ )
83169 args = parser .parse_args ()
84- result = parse_i18n_file (args .input_file )
170+ result = parse_i18n_file (args .input_file , args . remove_backslashes )
85171 for key , value in result .items ():
86172 print ("key" , key )
87173 print ("value" , value )
0 commit comments