diff --git a/electronicparsers/__init__.py b/electronicparsers/__init__.py index 9c4ac803..0610bfd8 100644 --- a/electronicparsers/__init__.py +++ b/electronicparsers/__init__.py @@ -41,7 +41,7 @@ class EntryPoint(ParserEntryPoint): metadata: Optional[dict] = Field( None, description=""" - Metadata passed to the UI. Deprecated. """ + Metadata passed to the UI. Deprecated. """, ) def load(self): diff --git a/electronicparsers/abacus/parser.py b/electronicparsers/abacus/parser.py index faeec920..cf3f576c 100644 --- a/electronicparsers/abacus/parser.py +++ b/electronicparsers/abacus/parser.py @@ -315,7 +315,7 @@ def extract_data(val_in, nks): data = [] for i in range(nks): kx, ky, kz, npws = re.search( - rf'{i+1}/{nks} kpoint \(Cartesian\)\s*=\s*({re_float})\s*({re_float})\s*({re_float})\s*\((\d+)\s*pws\)', + rf'{i + 1}/{nks} kpoint \(Cartesian\)\s*=\s*({re_float})\s*({re_float})\s*({re_float})\s*\((\d+)\s*pws\)', val_in, ).groups() # TODO pylinit error, unbalanced-tuple-unpacking @@ -324,7 +324,7 @@ def extract_data(val_in, nks): map( lambda x: x.strip().split(), re.search( - rf'{i+1}/{nks} kpoint \(Cartesian\)\s*=.*\n([\s\S]+?)\n\n', + rf'{i + 1}/{nks} kpoint \(Cartesian\)\s*=.*\n([\s\S]+?)\n\n', val_in, ) .group(1) @@ -368,7 +368,7 @@ def extract_data(val_in, nks): data = [] for i in range(nks): kx, ky, kz = re.search( - rf'k\-points{i+1}\(\d+\):\s*({re_float})\s*({re_float})\s*({re_float})', + rf'k\-points{i + 1}\(\d+\):\s*({re_float})\s*({re_float})\s*({re_float})', val_in, ).groups() res = np.array( @@ -376,7 +376,8 @@ def extract_data(val_in, nks): map( lambda x: x.strip().split(), re.search( - rf'k\-points{i+1}\(\d+\):.*\n([\s\S]+?)\n\n', val_in + rf'k\-points{i + 1}\(\d+\):.*\n([\s\S]+?)\n\n', + val_in, ) .group(1) .split('\n'), diff --git a/electronicparsers/edmft/parser.py b/electronicparsers/edmft/parser.py index 61c777da..4ef326a5 100644 --- a/electronicparsers/edmft/parser.py +++ b/electronicparsers/edmft/parser.py @@ -848,8 +848,8 @@ def parse(self, filepath: str, archive: EntryArchive, logger): if params_files: if len(params_files) > 1: self.logger.warning( - 'Multiple *params.dat files found; we will parse the last one.', - data={'files': params_files}, + f'Multiple *params.dat files found: {params_files};', + 'we will parse the last one.', ) self.params_parser.mainfile = params_files[-1] @@ -875,8 +875,8 @@ def parse(self, filepath: str, archive: EntryArchive, logger): if indmfl_files: if len(indmfl_files) > 1: self.logger.warning( - 'Multiple *.indmfl files found; we will parse the last one.', - data={'files': indmfl_files}, + f'Multiple *.indmfl files found ({indmfl_files});', + 'we will parse the last one.', ) self.indmfl_parser.mainfile = indmfl_files[-1] if self.general_parameters and self.impurity_parameters: diff --git a/electronicparsers/exciting/parser.py b/electronicparsers/exciting/parser.py index b7fa3365..91312439 100644 --- a/electronicparsers/exciting/parser.py +++ b/electronicparsers/exciting/parser.py @@ -3254,7 +3254,7 @@ def parse_configuration(section): volume_index = 1 while True: info_volume = get_files( - f"run_dir{str(volume_index).rjust(2, '0')}/INFO.OUT", + f'run_dir{str(volume_index).rjust(2, "0")}/INFO.OUT', self.filepath, 'INFO.OUT', ) diff --git a/electronicparsers/magres/parser.py b/electronicparsers/magres/parser.py index 06902c84..cc700337 100644 --- a/electronicparsers/magres/parser.py +++ b/electronicparsers/magres/parser.py @@ -105,7 +105,7 @@ def init_quantities(self): ), Quantity( 'kpoint_mp_offset', - rf'calc\_kpoint\_mp\_offset({re_float*3})$', + rf'calc\_kpoint\_mp\_offset({re_float * 3})$', ), ] ), @@ -115,11 +115,11 @@ def init_quantities(self): r'([\[\<]*atoms[\>\]]*[\s\S]+?)(?:[\[\<]*\/atoms[\>\]]*)', sub_parser=TextParser( quantities=[ - Quantity('lattice', rf'lattice({re_float*9})'), + Quantity('lattice', rf'lattice({re_float * 9})'), Quantity('symmetry', r'symmetry *([\w\-\+\,]+)', repeats=True), Quantity( 'atom', - rf'atom *([a-zA-Z]+) *[a-zA-Z\d]* *([\d]+) *({re_float*3})', + rf'atom *([a-zA-Z]+) *[a-zA-Z\d]* *([\d]+) *({re_float * 3})', repeats=True, ), ] @@ -131,47 +131,47 @@ def init_quantities(self): sub_parser=TextParser( quantities=[ Quantity( - 'ms', rf'ms *(\w+) *(\d+)({re_float*9})', repeats=True + 'ms', rf'ms *(\w+) *(\d+)({re_float * 9})', repeats=True ), Quantity( - 'efg', rf'efg *(\w+) *(\d+)({re_float*9})', repeats=True + 'efg', rf'efg *(\w+) *(\d+)({re_float * 9})', repeats=True ), Quantity( 'efg_local', - rf'efg_local *(\w+) *(\d+)({re_float*9})', + rf'efg_local *(\w+) *(\d+)({re_float * 9})', repeats=True, ), Quantity( 'efg_nonlocal', - rf'efg_nonlocal *(\w+) *(\d+)({re_float*9})', + rf'efg_nonlocal *(\w+) *(\d+)({re_float * 9})', repeats=True, ), Quantity( 'isc', - rf'isc *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', + rf'isc *(\w+) *(\d+) *(\w+) *(\d+)({re_float * 9})', repeats=True, ), Quantity( 'isc_fc', - rf'isc_fc *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', + rf'isc_fc *(\w+) *(\d+) *(\w+) *(\d+)({re_float * 9})', repeats=True, ), Quantity( 'isc_orbital_p', - rf'isc_orbital_p *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', + rf'isc_orbital_p *(\w+) *(\d+) *(\w+) *(\d+)({re_float * 9})', repeats=True, ), Quantity( 'isc_orbital_d', - rf'isc_orbital_d *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', + rf'isc_orbital_d *(\w+) *(\d+) *(\w+) *(\d+)({re_float * 9})', repeats=True, ), Quantity( 'isc_spin', - rf'isc_spin *(\w+) *(\d+) *(\w+) *(\d+)({re_float*9})', + rf'isc_spin *(\w+) *(\d+) *(\w+) *(\d+)({re_float * 9})', repeats=True, ), - Quantity('sus', rf'sus *({re_float*9})', repeats=True), + Quantity('sus', rf'sus *({re_float * 9})', repeats=True), ] ), ), @@ -443,8 +443,7 @@ def parse(self, filepath, archive, logger): program_name = calculation_params.get('code', '') if program_name != 'CASTEP': self.logger.error( - 'Only CASTEP-based NMR simulations are supported by the ' - 'magres parser.' + 'Only CASTEP-based NMR simulations are supported by the magres parser.' ) return sec_run.program = Program( diff --git a/electronicparsers/quantumespresso/parser.py b/electronicparsers/quantumespresso/parser.py index c973e3d2..0af5edcc 100644 --- a/electronicparsers/quantumespresso/parser.py +++ b/electronicparsers/quantumespresso/parser.py @@ -16,6 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import glob import logging import numpy as np import re @@ -48,10 +49,12 @@ Stress, StressEntry, Thermodynamics, - BandEnergies, ScfIteration, Dos, DosValues, + BandEnergies, + BandGapDeprecated, + BandStructure, ) from simulationworkflowschema import ( SinglePoint, @@ -914,7 +917,7 @@ ], 'xc_section_method': { 'x_qe_xc_igcx_name': 'x3lp', - 'x_qe_xc_igcx_comment': 'X3LYP (Becke88*0.542 ' ' + Perdew-Wang91*0.167)', + 'x_qe_xc_igcx_comment': 'X3LYP (Becke88*0.542 + Perdew-Wang91*0.167)', 'x_qe_xc_igcx': 28, }, }, @@ -1631,8 +1634,7 @@ ], 'xc_section_method': { 'x_qe_xc_inlc_name': 'vdwx', - 'x_qe_xc_inlc_comment': 'vdW-DF-x (reserved Thonhauser,' - ' not implemented)', + 'x_qe_xc_inlc_comment': 'vdW-DF-x (reserved Thonhauser, not implemented)', 'x_qe_xc_inlc': 4, }, }, @@ -1644,8 +1646,7 @@ ], 'xc_section_method': { 'x_qe_xc_inlc_name': 'vdwy', - 'x_qe_xc_inlc_comment': 'vdW-DF-y (reserved Thonhauser,' - ' not implemented)', + 'x_qe_xc_inlc_comment': 'vdW-DF-y (reserved Thonhauser, not implemented)', 'x_qe_xc_inlc': 5, }, }, @@ -1657,8 +1658,7 @@ ], 'xc_section_method': { 'x_qe_xc_inlc_name': 'vdwz', - 'x_qe_xc_inlc_comment': 'vdW-DF-z (reserved Thonhauser,' - ' not implemented)', + 'x_qe_xc_inlc_comment': 'vdW-DF-z (reserved Thonhauser, not implemented)', 'x_qe_xc_inlc': 6, }, }, @@ -2441,7 +2441,7 @@ def str_to_sticks(val_in): ), Quantity( 'fermi_energy', - rf'(?:the Fermi energy is|the spin up/dw Fermi energies are)\s*([\-\d\. ]+)', + r'(?:the Fermi energy is|the spin up\/dw Fermi energies are)\s*([\-\d\. ]+)', dtype=float, ), Quantity( @@ -2723,7 +2723,7 @@ def str_to_sticks(val_in): sub_parser=TextParser(quantities=scf_quantities), ), Quantity( - 'bandstructure', + 'bandstructure', # flag for SCF part of the band structure r'(Structure Calculation[\s\S]+?)(?:init_run|\Z)', repeats=False, sub_parser=TextParser(quantities=bandstructure_quantities), @@ -2783,10 +2783,108 @@ def str_to_sticks(val_in): ] +class QuantumEspressoBandParser(TextParser): + def init_quantities(self): + self._quantities = [ + Quantity( + 'kpoint', + rf'xk=\(\s*({RE_FLOAT}),\s*({RE_FLOAT}),\s*({RE_FLOAT})\s*\)', + repeats=True, + dtype=float, + ), # ? nested arrays + Quantity( + 'symmetry', + r'Band symmetry, ([\w_]+)\s*\(.*\)\s+point group:', + repeats=True, + dtype=str, + ), + Quantity( + 'band', + r'point group:([\s\S]+?)\n\n', + repeats=True, + sub_parser=TextParser( + quantities=[ + Quantity( + 'energy', + r'e\(\s*\d+ -\s*\d+\) =\s*([\-\d\.]+)\s+eV', + repeats=True, + dtype=float, # unit (eV) should be specified later + ), + Quantity( + 'mult', + r'eV\s*(\d+)\s*-->', + repeats=True, + dtype=int, + ), + ], + ), + ), + ] + + @staticmethod + def scan_out_files(directory: str) -> list[str]: + return glob.glob(os.path.join(directory, '*.out')) + + @staticmethod + def read_header(filepath: str) -> str: + with open(filepath, 'r') as file: + file.readline() + return_value = file.readline() + return return_value + + @staticmethod + def match_header(line: str) -> bool: + pattern = re.compile( + r'Program BANDS v\.\d+\.\d+ starts on \d+\w+\d+ at \d+:\d+: \d+' + ) + return True if pattern.search(line) else False + + @staticmethod + def points_to_segments(kpoints: list, symmetries: list) -> list[list[list[float]]]: + """Split the kpoints by segment based on differing symmetry group.""" + + def shift_window(window: tuple, elem) -> tuple: + return window[1:] + (elem,) + + previous_point: Optional[list[float]] = None + symmetry_window: tuple[Optional[str]] = (None,) * 3 + + segments: list[list[np.ndarray[float]]] = [[]] + for point, symmetry in zip(kpoints, symmetries): + symmetry_window = shift_window(symmetry_window, symmetry) + # case enumeration for `symmetry_window`: + # 1. (None, None, None) -> not possible + # 2. (None, None, X) -> first step (add to initial bucket) + # 3. (X, None, None) -> not possible + # 4. (X, Y, None) -> end reached + # 5. (None, X, Y) -> add to initial bucket + # 6. (X, X, Y) -> add Y to latest bucket + # 7. (X, Y, Y) -> add Y to latest bucket + # 8. (X, Y, X) -> add Y and X a new, latest bucket + # 9. (X, Y, Z) -> add Y and Z a new, latest bucket + if (None not in symmetry_window) and all( + [symmetry_window[i] != symmetry_window[i + 1] for i in range(2)] + ): + segments.append([previous_point]) + segments[-1].append(point) + previous_point = point + return segments + + @staticmethod + def apply_multiplicity( + energies: list[list[float]], multiplicity: list[int] + ) -> list[list[float]]: + return [ + [e for e, m in zip(energy, mult) for _ in range(m)] + for energy, mult in zip(energies, multiplicity) + ] + + class QuantumEspressoParser: def __init__(self): self.out_parser = QuantumEspressoOutParser() self.dos_parser = DataTextParser() + self.band_parser = QuantumEspressoBandParser() self.smearing_map = { '-99': 'fermi', '-1': 'marzari-vanderbilt', @@ -2936,7 +3034,7 @@ def parse_scc(self, run, calculation): if ( homo is None and fermi_energy is None - and len(self.get_n_electrons_safe()) == 0 + and self.get_n_electrons_safe() is None ): self.logger.error('Reference energy is not defined') @@ -3131,7 +3229,8 @@ def _convert(key, source, units_key='units', units=None): ) reciprocal_cell *= 2 * np.pi / volume if reciprocal_cell is not None: - sec_system.x_qe_reciprocal_cell = reciprocal_cell + sec_system.x_qe_reciprocal_cell = reciprocal_cell # ? deprecate + sec_system.atoms.lattice_vectors_reciprocal = reciprocal_cell starting_magnetization = calculation.get( 'starting_magnetization', run.get_header('starting_magnetization') @@ -3266,6 +3365,85 @@ def parse_configuration(calculation): sec_dos_total.value = dos[spin] / ureg.eV sec_dos_total.value_integrated = integrated[spin] + # band structure + if (bs_context := run.get('bandstructure')) is not None: + out_files = self.band_parser.scan_out_files( + os.path.dirname(self.out_parser.mainfile) + ) # ! move to a separate class + out_headers = [self.band_parser.read_header(f) for f in out_files] + + for out_header, out_file in zip(out_headers, out_files): + if self.band_parser.match_header(out_header): + self.band_parser.mainfile = out_file + self.band_parser.parse() + + if self.band_parser.results: + kpoints, symmetries, bands = ( + self.band_parser.get('kpoint', []), + self.band_parser.get('symmetry', []), + self.band_parser.get('band', []), + ) + if len(kpoints) and len(symmetries) and len(bands): + sec_run.calculation[-1].band_structure_electronic = [] + bandstructure = [] + for kpath in self.band_parser.points_to_segments( + kpoints, symmetries + ): + band_split = len(kpath) + band_selection, bands = ( + bands[: band_split], + bands[band_split - 1 :], + ) + desymm_energies = self.band_parser.apply_multiplicity( + [b.get('energy', []) * ureg.eV for b in band_selection], + [b.get('mult', []) for b in band_selection], + ) + band_energy = BandEnergies( + kpoints=kpath, + energies=[desymm_energies], + ) + # this is never executed + if energy_highest_occupied := self.out_parser.get('run', [{}])[0].get('bandstructure', {}).get('fermi_energy'): + band_energy.band_gap = [BandGapDeprecated(energy_highest_occupied)] # TODO: for-loop over spin channels + bandstructure.append(band_energy) + + sec_run.calculation[-1].band_structure_electronic.append( + BandStructure( + segment=bandstructure, + reciprocal_cell=sec_run.system[-1].atoms.lattice_vectors_reciprocal, + ) # TODO add safety checks + ) + + # under testing + filepath_stripped = self.filepath.split('raw/')[-1] + from nomad.search import search + from nomad.app.v1.models import MetadataRequired + + upload_id = self.archive.metadata.upload_id + search_ids = search( + owner='visible', + user_id=self.archive.metadata.main_author.user_id, + query={'upload_id': upload_id}, + required=MetadataRequired(include=['entry_id', 'mainfile']), + ).data + metadata = [[sid['entry_id'], sid['mainfile']] for sid in search_ids] + if len(metadata) > 1: + for entry_id, mainfile in metadata: + if (mainfile == filepath_stripped): + continue # skip the current mainfile + entry_archive = self.archive.m_context.load_archive( + entry_id, upload_id, None + ) + for bs_elec in sec_run.calculation[-1].band_structure_electronic: + entry_calc = entry_archive.run[-1].calculation[-1] + if (fermi_energy := entry_calc.energy.fermi) is not None: + bs_elec.fermi = fermi_energy + bs_elec.band_gap = [BandGapDeprecated(energy_highest_occupied = fermi_energy)] + elif (highest_occ := entry_calc.energy.highest_occupied) is not None: + bs_elec.fermi = fermi_energy + bs_elec.band_gap = [BandGapDeprecated(energy_highest_occupied = highest_occ)] + + def parse_method(self, run): sec_method = Method() self.archive.run[-1].method.append(sec_method) diff --git a/electronicparsers/soliddmft/parser.py b/electronicparsers/soliddmft/parser.py index 7b2eac1b..0fa59f9c 100644 --- a/electronicparsers/soliddmft/parser.py +++ b/electronicparsers/soliddmft/parser.py @@ -129,7 +129,10 @@ def extract_groups_datasets( if not isinstance(value, h5py.Dataset) or value.shape or not value: continue val = value[()].decode() if isinstance(value[()], bytes) else value[()] - params[key] = numpy_type_to_json_serializable(val) + if isinstance(val, (np.bool_, np.int32, np.int64, np.float64)): + params[key] = numpy_type_to_json_serializable(val) + else: + params[key] = val # ? is this appropriate return params else: return default diff --git a/electronicparsers/w2dynamics/parser.py b/electronicparsers/w2dynamics/parser.py index 75b54d18..ab699f5b 100644 --- a/electronicparsers/w2dynamics/parser.py +++ b/electronicparsers/w2dynamics/parser.py @@ -274,7 +274,7 @@ def parse_input_model(self, data: h5py.Group, wannier90_name: str): for key in self._hubbard_kanamori_map.keys(): parameters = ( data.attrs.get( - f'atoms.{n+1}.{key}{angular_momentum}{angular_momentum}', None + f'atoms.{n + 1}.{key}{angular_momentum}{angular_momentum}', None ) * ureg.eV ) @@ -284,9 +284,9 @@ def parse_input_model(self, data: h5py.Group, wannier90_name: str): parameters, ) - if data.attrs.get(f'atoms.{n+1}.hamiltonian') == 'Density': + if data.attrs.get(f'atoms.{n + 1}.hamiltonian') == 'Density': sec_hubbard_kanamori_model.j = 0.0 - elif data.attrs.get(f'atoms.{n+1}.hamiltonian') == 'Kanamori': + elif data.attrs.get(f'atoms.{n + 1}.hamiltonian') == 'Kanamori': sec_hubbard_kanamori_model.j = sec_hubbard_kanamori_model.jh def parse_method(self, data: h5py.Group): @@ -329,7 +329,7 @@ def parse_config(keys): sec_config_subsection = x_w2dynamics_config_atoms_parameters() sec_config.x_w2dynamics_config_atoms.append(sec_config_subsection) for key in data.attrs.keys(): - if key.startswith(f'atoms.{i+1}'): + if key.startswith(f'atoms.{i + 1}'): keys_mod = (key.replace('-', '_')).split('.') parameters = data.attrs.get(key) setattr( diff --git a/pyproject.toml b/pyproject.toml index ee44ccf6..3f35a530 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ homepage = "https://github.com/nomad-coe/electronic-parsers" [project.optional-dependencies] dev = [ 'astroid==2.11.7', - 'mypy==1.0.1', + 'mypy>=1.15', 'pytest>= 5.3.0, <8', 'pytest-timeout>=1.4.2', 'pytest-cov>=2.7.1',