diff --git a/src/aiida_quantumespresso/parsers/matdyn.py b/src/aiida_quantumespresso/parsers/matdyn.py index 5ca624fad..563711a34 100644 --- a/src/aiida_quantumespresso/parsers/matdyn.py +++ b/src/aiida_quantumespresso/parsers/matdyn.py @@ -87,7 +87,7 @@ def parse(self, **kwargs): return self.exit(logs=logs) -def parse_raw_matdyn_phonon_file(phonon_frequencies): +def parse_raw_matdyn_phonon_file(phonon_frequencies: str) -> dict: """Parses the phonon frequencies file. :param phonon_frequencies: phonon frequencies file from the matdyn calculation @@ -98,57 +98,78 @@ def parse_raw_matdyn_phonon_file(phonon_frequencies): * phonon_bands: BandsData object with the bands for each kpoint """ import re - import numpy as np parsed_data = {} parsed_data['warnings'] = [] - # extract numbere of bands and kpoints - try: - num_bands = int(phonon_frequencies.split('=')[1].split(',')[0]) - num_kpoints = int(phonon_frequencies.split('=')[2].split('/')[0]) - parsed_data['num_kpoints'] = num_kpoints - except (ValueError, IndexError): + lines = phonon_frequencies.splitlines() + + # extract number of bands and kpoints from the header + # example header line: " &plot nbnd= 6, nks= 1 /" + header_pattern = re.compile(r'\s*&plot\s+nbnd=\s*(\d+),\s+nks=\s*(\d+)\s*/') + header_match = re.match(header_pattern, lines.pop(0)) + if not header_match: parsed_data['warnings'].append('Number of bands or kpoints unreadable in phonon frequencies file') return parsed_data + num_bands = int(header_match.group(1)) + num_kpoints = int(header_match.group(2)) + parsed_data['num_kpoints'] = num_kpoints # initialize array of frequencies freq_matrix = np.zeros((num_kpoints, num_bands)) - split_data = phonon_frequencies.split() - # discard the header of the file - raw_data = split_data[split_data.index('/') + 1 :] - - # try to improve matdyn deficiencies - corrected_data = [] - for b in raw_data: - try: - corrected_data.append(float(b)) - except ValueError: - # case in which there are two frequencies attached like -1204.1234-1020.536 - if '-' in b: - c = re.split('(-)', b) - d = [i for i in c if i != ''] - for i in range(0, len(d), 2): # d should have an even number of elements - corrected_data.append(float(d[i] + d[i + 1])) - else: - # I don't know what to do - parsed_data['warnings'].append('Bad formatting of frequencies') + # In the file, each kpoint block consists of: + # 1 line with kpoint coordinates (optionally followed by weight) + # one or more lines with frequencies + # (maybe up to 6 frequencies per line but it can vary so won't assume that) + + # The blocks will be processed in a loop over the number of kpoints + # and frequencies will be gradually extracted until the expected number of bands is reached. + + # regex patterns + # kpoint line ex: " 0.000000 0.000000 0.000000" + # or with weight: " 0.500000 0.288675 0.000000 0.000000" + kpoint_pattern = re.compile(r'\s+([-+]?\d+\.\d+)\s+([-+]?\d+\.\d+)\s+([-+]?\d+\.\d+)(?:\s+([-+]?\d+\.\d+))?') + # frequency line ex: " -148.6347 -124.2795 46.3694 100.8722 110.9098 132.1670" + # or with attached signs: " -148.70828-124.2696 46.2846 100.8707 110.9253 132.1867" + frequency_pattern = re.compile(r'\s*([-+]?\d+\.\d+)') + + for kpt_index in range(num_kpoints): + if not lines: + parsed_data['warnings'].append('Unexpected end of file while reading kpoints') + return parsed_data + + kpt_line = lines.pop(0) + if not re.match(kpoint_pattern, kpt_line): + parsed_data['warnings'].append(f'Invalid kpoint line format: "{kpt_line}"') + return parsed_data + + freq_count = 0 + while freq_count < num_bands: + if not lines: + parsed_data['warnings'].append('Unexpected end of file while reading frequencies') return parsed_data - counter = 3 - for i in range(num_kpoints): - for j in range(num_bands): - try: - freq_matrix[i, j] = corrected_data[counter] * CONSTANTS.invcm_to_THz # from cm-1 to THz - except ValueError: - parsed_data['warnings'].append('Error while parsing the frequencies') - except IndexError: - parsed_data['warnings'].append('Error while parsing the frequencies, dimension exceeded') + freq_line = lines.pop(0) + freq_matches = re.findall(frequency_pattern, freq_line) + if not freq_matches: + parsed_data['warnings'].append(f'Invalid frequency line format: "{freq_line}"') return parsed_data - counter += 1 - counter += 3 # move past the kpoint coordinates + + for freq_str in freq_matches: + if freq_count < num_bands: + try: + freq_matrix[kpt_index, freq_count] = ( + float(freq_str) * CONSTANTS.invcm_to_THz + ) # from cm-1 to THz + except ValueError: + parsed_data['warnings'].append('Error while parsing the frequencies') + return parsed_data + freq_count += 1 + else: + parsed_data['warnings'].append('More frequencies than expected for a kpoint') + break parsed_data['phonon_bands'] = freq_matrix diff --git a/tests/parsers/fixtures/matdyn/k_weights/aiida.out b/tests/parsers/fixtures/matdyn/k_weights/aiida.out new file mode 100644 index 000000000..f7abb1cdb --- /dev/null +++ b/tests/parsers/fixtures/matdyn/k_weights/aiida.out @@ -0,0 +1,22 @@ + + Program MATDYN v.6.1 (svn rev. 13369) starts on 9May2019 at 11:46:17 + + This program is part of the open-source Quantum ESPRESSO suite + for quantum simulation of materials; please cite + "P. Giannozzi et al., J. Phys.:Condens. Matter 21 395502 (2009); + URL http://www.quantum-espresso.org", + in publications or presentations arising from this work. More details at + http://www.quantum-espresso.org/quote + + Parallel version (MPI), running on 1 processors + mass for atomic type 1 not given; uses mass from file real_space_force_constants.dat + A direction for q was not specified:TO-LO splitting will be absent + + MATDYN : 0.00s CPU 0.00s WALL + + + This run was terminated on: 11:46:17 9May2019 + +=------------------------------------------------------------------------------= + JOB DONE. +=------------------------------------------------------------------------------= diff --git a/tests/parsers/fixtures/matdyn/k_weights/phonon_frequencies.dat b/tests/parsers/fixtures/matdyn/k_weights/phonon_frequencies.dat new file mode 100644 index 000000000..e93514da2 --- /dev/null +++ b/tests/parsers/fixtures/matdyn/k_weights/phonon_frequencies.dat @@ -0,0 +1,13 @@ + &plot nbnd= 9, nks= 4 / + 0.500000 0.288675 0.000000 0.000000 + -148.6347 -124.2795 46.3694 100.8722 110.9098 132.1670 + 147.0127 153.5999 212.1980 + 0.490000 0.282902 0.000000 0.000000 + -148.7082 -124.2696 46.2846 100.8707 110.9253 132.1867 + 146.9338 153.6419 212.3053 + 0.480000 0.277128 0.000000 0.000000 + -148.9257 -124.2379 46.0307 100.8666 110.9709 132.2444 + 146.7046 153.7616 212.6272 + 0.470000 0.271355 0.000000 0.000000 + -149.2792 -124.1789 45.6089 100.8613 111.0444 132.3354 + 146.3437 153.9424 213.1631 diff --git a/tests/parsers/fixtures/matdyn/k_weights_attached/aiida.out b/tests/parsers/fixtures/matdyn/k_weights_attached/aiida.out new file mode 100644 index 000000000..f7abb1cdb --- /dev/null +++ b/tests/parsers/fixtures/matdyn/k_weights_attached/aiida.out @@ -0,0 +1,22 @@ + + Program MATDYN v.6.1 (svn rev. 13369) starts on 9May2019 at 11:46:17 + + This program is part of the open-source Quantum ESPRESSO suite + for quantum simulation of materials; please cite + "P. Giannozzi et al., J. Phys.:Condens. Matter 21 395502 (2009); + URL http://www.quantum-espresso.org", + in publications or presentations arising from this work. More details at + http://www.quantum-espresso.org/quote + + Parallel version (MPI), running on 1 processors + mass for atomic type 1 not given; uses mass from file real_space_force_constants.dat + A direction for q was not specified:TO-LO splitting will be absent + + MATDYN : 0.00s CPU 0.00s WALL + + + This run was terminated on: 11:46:17 9May2019 + +=------------------------------------------------------------------------------= + JOB DONE. +=------------------------------------------------------------------------------= diff --git a/tests/parsers/fixtures/matdyn/k_weights_attached/phonon_frequencies.dat b/tests/parsers/fixtures/matdyn/k_weights_attached/phonon_frequencies.dat new file mode 100644 index 000000000..1c7b9355a --- /dev/null +++ b/tests/parsers/fixtures/matdyn/k_weights_attached/phonon_frequencies.dat @@ -0,0 +1,13 @@ + &plot nbnd= 9, nks= 4 / + 0.500000 0.288675 0.000000 0.000000 + -148.6347 -124.2795 46.3694 100.8722 110.9098 132.1670 + 147.0127 153.5999 212.1980 + 0.490000 0.282902 0.000000 0.000000 + -148.70828-124.2696 46.2846 100.8707 110.9253 132.1867 + 146.9338 153.6419 212.3053 + 0.480000 0.277128 0.000000 0.000000 + -148.9257 -124.2379 46.0307 100.8666 110.9709 132.2444 + 146.7046 153.7616 212.6272 + 0.470000 0.271355 0.000000 0.000000 + -149.2792 -124.1789 45.6089 100.8613 111.0444 132.3354 + 146.3437 153.9424 213.1631 diff --git a/tests/parsers/test_matdyn.py b/tests/parsers/test_matdyn.py index 5b1ba6c8d..a2b939c4f 100644 --- a/tests/parsers/test_matdyn.py +++ b/tests/parsers/test_matdyn.py @@ -4,10 +4,10 @@ from aiida.common import AttributeDict -def generate_inputs(): +def generate_inputs(n=1): """Return only those inputs that the parser will expect to be there.""" kpoints = orm.KpointsData() - kpoints.set_kpoints_mesh([1, 1, 1]) + kpoints.set_kpoints_mesh([n, n, 1]) return AttributeDict( { @@ -34,6 +34,56 @@ def test_matdyn_default(fixture_localhost, generate_calc_job_node, generate_pars { 'output_parameters': results['output_parameters'].get_dict(), 'output_phonon_bands': results['output_phonon_bands'].base.attributes.all, + 'frequencies': results['output_phonon_bands'].get_array('bands').tolist(), + 'kpoints': results['output_phonon_bands'].get_array('kpoints').tolist(), + } + ) + + +def test_matdyn_k_weights(fixture_localhost, generate_calc_job_node, generate_parser, data_regression): + """Test a default `matdyn.x` calculation which also outputs k-weights in phonon_frequencies.""" + entry_point_calc_job = 'quantumespresso.matdyn' + entry_point_parser = 'quantumespresso.matdyn' + + node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'k_weights', generate_inputs(2)) + parser = generate_parser(entry_point_parser) + results, calcfunction = parser.parse_from_node(node, store_provenance=False) + + assert calcfunction.is_finished, calcfunction.exception + assert calcfunction.is_finished_ok, calcfunction.exit_message + assert not orm.Log.collection.get_logs_for(node) + assert 'output_parameters' in results + assert 'output_phonon_bands' in results + data_regression.check( + { + 'output_parameters': results['output_parameters'].get_dict(), + 'output_phonon_bands': results['output_phonon_bands'].base.attributes.all, + 'frequencies': results['output_phonon_bands'].get_array('bands').tolist(), + 'kpoints': results['output_phonon_bands'].get_array('kpoints').tolist(), + } + ) + + +def test_matdyn_k_weights_attached(fixture_localhost, generate_calc_job_node, generate_parser, data_regression): + """Test a default `matdyn.x` calculation which also outputs k-weights in phonon_frequencies and has attached frequencies through a minus sign (bad formatting).""" + entry_point_calc_job = 'quantumespresso.matdyn' + entry_point_parser = 'quantumespresso.matdyn' + + node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'k_weights_attached', generate_inputs(2)) + parser = generate_parser(entry_point_parser) + results, calcfunction = parser.parse_from_node(node, store_provenance=False) + + assert calcfunction.is_finished, calcfunction.exception + assert calcfunction.is_finished_ok, calcfunction.exit_message + assert not orm.Log.collection.get_logs_for(node) + assert 'output_parameters' in results + assert 'output_phonon_bands' in results + data_regression.check( + { + 'output_parameters': results['output_parameters'].get_dict(), + 'output_phonon_bands': results['output_phonon_bands'].base.attributes.all, + 'frequencies': results['output_phonon_bands'].get_array('bands').tolist(), + 'kpoints': results['output_phonon_bands'].get_array('kpoints').tolist(), } ) diff --git a/tests/parsers/test_matdyn/test_matdyn_default.yml b/tests/parsers/test_matdyn/test_matdyn_default.yml index 378982250..75c78f951 100644 --- a/tests/parsers/test_matdyn/test_matdyn_default.yml +++ b/tests/parsers/test_matdyn/test_matdyn_default.yml @@ -1,3 +1,14 @@ +frequencies: +- - -0.63865386912656 + - -0.63865386912656 + - -0.63865386912656 + - 17.55545361236002 + - 17.55545361236002 + - 17.55545361236002 +kpoints: +- - 0.0 + - 0.0 + - 0.0 output_parameters: code_version: '6.1' wall_time_seconds: 0.0 diff --git a/tests/parsers/test_matdyn/test_matdyn_k_weights.yml b/tests/parsers/test_matdyn/test_matdyn_k_weights.yml new file mode 100644 index 000000000..ddc415c36 --- /dev/null +++ b/tests/parsers/test_matdyn/test_matdyn_k_weights.yml @@ -0,0 +1,63 @@ +frequencies: +- - -4.45595620570926 + - -3.7258056784010996 + - 1.3901196401985199 + - 3.02407247818676 + - 3.32499215582884 + - 3.9622669796485996 + - 4.40732986902166 + - 4.60480915695542 + - 6.3615360002684 +- - -4.45815968027556 + - -3.7255088838676795 + - 1.38757740015468 + - 3.02402750931806 + - 3.32545683413874 + - 3.96285757079086 + - 4.40496450652804 + - 4.60606828527902 + - 6.364752773342739 +- - -4.46468016623706 + - -3.72455854177582 + - 1.37996566964606 + - 3.02390459441028 + - 3.32682388774722 + - 3.9645873732735204 + - 4.398093263390679 + - 4.60965680100128 + - 6.3744030925657595 +- - -4.47527782962736 + - -3.72278976627362 + - 1.36732042376762 + - 3.0237457044075398 + - 3.3290273623135196 + - 3.9673154846413197 + - 4.38727375358146 + - 4.615077048641919 + - 6.390468970389979 +kpoints: +- - 0.0 + - 0.0 + - 0.0 +- - 0.0 + - 0.5 + - 0.0 +- - 0.5 + - 0.0 + - 0.0 +- - 0.5 + - 0.5 + - 0.0 +output_parameters: + code_version: '6.1' + wall_time_seconds: 0.0 +output_phonon_bands: + array|bands: + - 4 + - 9 + array|kpoints: + - 4 + - 3 + label_numbers: [] + labels: [] + units: THz diff --git a/tests/parsers/test_matdyn/test_matdyn_k_weights_attached.yml b/tests/parsers/test_matdyn/test_matdyn_k_weights_attached.yml new file mode 100644 index 000000000..700688aa9 --- /dev/null +++ b/tests/parsers/test_matdyn/test_matdyn_k_weights_attached.yml @@ -0,0 +1,63 @@ +frequencies: +- - -4.45595620570926 + - -3.7258056784010996 + - 1.3901196401985199 + - 3.02407247818676 + - 3.32499215582884 + - 3.9622669796485996 + - 4.40732986902166 + - 4.60480915695542 + - 6.3615360002684 +- - -4.458162078615223 + - -3.7255088838676795 + - 1.38757740015468 + - 3.02402750931806 + - 3.32545683413874 + - 3.96285757079086 + - 4.40496450652804 + - 4.60606828527902 + - 6.364752773342739 +- - -4.46468016623706 + - -3.72455854177582 + - 1.37996566964606 + - 3.02390459441028 + - 3.32682388774722 + - 3.9645873732735204 + - 4.398093263390679 + - 4.60965680100128 + - 6.3744030925657595 +- - -4.47527782962736 + - -3.72278976627362 + - 1.36732042376762 + - 3.0237457044075398 + - 3.3290273623135196 + - 3.9673154846413197 + - 4.38727375358146 + - 4.615077048641919 + - 6.390468970389979 +kpoints: +- - 0.0 + - 0.0 + - 0.0 +- - 0.0 + - 0.5 + - 0.0 +- - 0.5 + - 0.0 + - 0.0 +- - 0.5 + - 0.5 + - 0.0 +output_parameters: + code_version: '6.1' + wall_time_seconds: 0.0 +output_phonon_bands: + array|bands: + - 4 + - 9 + array|kpoints: + - 4 + - 3 + label_numbers: [] + labels: [] + units: THz