diff --git a/.gitignore b/.gitignore
index e883d2b7..e71eeaf1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -99,3 +99,6 @@ test.ipynb
 
 # uv
 uv.lock
+
+# jupyter
+*.ipynb
diff --git a/doc/release-notes.rst b/doc/release-notes.rst
index 0430a104..4d2e3f33 100644
--- a/doc/release-notes.rst
+++ b/doc/release-notes.rst
@@ -8,6 +8,20 @@ Upcoming Version
 
 * Drop support for Python 3.9, add support for Python 3.13. Minimum required Python version is now 3.10.
 
+* Added [GeoNuclearData](github.com/cristianst85/GeoNuclearData) dataset as `pm.data.GND()`.
+* Added [European Energy Storage Inventory](https://ses.jrc.ec.europa.eu/storage-inventory-maps) dataset as `pm.data.EESI()`.
+* Added [GloHydroRES](https://zenodo.org/records/14526360) dataset as `pm.data.GHR()`.
+* Updated ENTSOE, BEYONDCOAL, JRC, IRENASTAT and the Global Energy Monitor datasets to the latest versions.
+* Fix in `pm.data.MASTR()` the distinction of hydro technologies and between offshore and onshore wind. Also read in storage technologies.
+* Improved recognition of CHP power plants.
+* In Global Energy Monitor datasets, also read entries below capacity threshold.
+* In `pm.data.GCPT()`, add estimate for coal plant efficiency.
+* Include mothballed gas, oil and coal power plants.
+* Initially, include unit/block name in power plant name before matching.
+* Added option to retain blocks for subsets of fuel types (e.g. `clean_name: fueltypes_with_blocks: ['Nuclear']`).
+* For fully included datasets, add option to only aggregate units included in the matching process (e.g. `aggregate_only_matching_sources: ['MASTR']`).
+* Added option for multiprocessing when aggregating units of non-matched power plants (e.g. `threads_extend_by_non_matched: 16`).
+* Updating matching logic configuration.
 
 `v0.7.1 <https://github.com/PyPSA/powerplantmatching/releases/tag/v0.7.1>`__ (30th January 2024)
 =================================================================================================
diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 98a73477..3acf4306 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -76,24 +76,78 @@ def clean_name(df, config=None):
 
     name = df.Name.astype(str).copy().apply(unidecode.unidecode)
 
+    roman_to_arabic = {
+        "I": "1",
+        "II": "2",
+        "III": "3",
+        "IV": "4",
+        "V": "5",
+        "VI": "6",
+        "VII": "7",
+        "VIII": "8",
+        "IX": "9",
+        "X": "10",
+        "XI": "11",
+    }
+    for roman, arabic in roman_to_arabic.items():
+        name = name.str.replace(rf"\b{roman}\b", arabic, regex=True)
+
     replace = config["clean_name"]["replace"]
     replace.setdefault("", [])
 
+    keep_blocks = config["clean_name"].get("fueltypes_with_blocks", [])
+    if len(keep_blocks) > 0:
+        mask = df.Fueltype.isin(keep_blocks)
+
     for key, pattern in replace.items():
         if config["clean_name"]["remove_common_words"] and (key == ""):
             common_words = pd.Series(sum(name.str.split(), [])).value_counts()
             common_words = list(common_words[common_words >= 20].index)
             pattern += common_words
-        if isinstance(pattern, list):
-            # if pattern is a list, concat all entries in a case-insensitive regex
+
+        pattern = np.atleast_1d(pattern)
+
+        # do not remove block numbers for fuel types with blocks; the regular
+        # regex [^a-zA-Z] removes non-alphabetical characters; for fueltypes to
+        # keep, the regex [^a-zA-Z0-9] is used which only removes
+        # non-alphanumerical characters
+        if len(keep_blocks) > 0 and key == " " and "[^a-zA-Z]" in pattern:
+            base = [rf"\b{p}\b" for p in pattern if p != "[^a-zA-Z]"]
+            pattern_keep = r"(?i)" + "|".join(base + [r"[^a-zA-Z0-9]"])
+            pattern_default = r"(?i)" + "|".join(base + [r"[^a-zA-Z]"])
+            name.loc[mask] = name.loc[mask].str.replace(pattern_keep, key, regex=True)
+            name.loc[~mask] = name.loc[~mask].str.replace(
+                pattern_default, key, regex=True
+            )
+
+        # do not remove block letters for fuel types with blocks; the regular
+        # regex \w would remove standalone letters, this one is skipped for
+        # fueltypes in mask
+        elif key == "" and "\w" in pattern:
+            pattern_keep = r"(?i)" + "|".join(
+                [rf"\b{p}\b" for p in pattern if p != "\w"]
+            )
+            pattern_default = r"(?i)" + "|".join([rf"\b{p}\b" for p in pattern])
+            name.loc[mask] = name.loc[mask].str.replace(pattern_keep, key, regex=True)
+            name.loc[~mask] = name.loc[~mask].str.replace(
+                pattern_default, key, regex=True
+            )
+
+        else:
             pattern = r"(?i)" + "|".join([rf"\b{p}\b" for p in pattern])
-        elif not isinstance(pattern, str):
-            raise ValueError(f"Pattern must be string or list, not {type(pattern)}")
-        name = name.str.replace(pattern, key, regex=True)
+            name = name.str.replace(pattern, key, regex=True)
 
+    # remove duplicated words; second pass necessary for edge cases
     if config["clean_name"]["remove_duplicated_words"]:
-        name = name.str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False)
-    name = name.str.strip().str.title().str.replace(r" +", " ", regex=True)
+        name = (
+            name.str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False)
+            .str.strip()
+            .str.replace(r" +", " ", regex=True)
+            .str.title()
+            .str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False)
+        )
+    else:
+        name = name.str.strip().str.title().str.replace(r" +", " ", regex=True)
 
     return df.assign(Name=name).sort_values("Name")
 
@@ -329,7 +383,16 @@ def clean_technology(df, generalize_hydros=False):
         .str.split(", ")
         .apply(lambda x: ", ".join(i.strip() for i in np.unique(x)))
     )
-    tech = tech.replace({"Ccgt": "CCGT", "Ocgt": "OCGT"}, regex=True)
+    ABBREVIATIONS = {
+        "Ccgt": "CCGT",
+        "Ocgt": "OCGT",
+        "Pv": "PV",
+        "Nas": "NaS",
+        "Nicd": "NiCd",
+        "Nanicl": "NaNiCl",
+        "Caes": "CAES",
+    }
+    tech = tech.replace(ABBREVIATIONS, regex=False)
     return df.assign(Technology=tech)
 
 
@@ -367,6 +430,7 @@ def aggregate_units(
     pre_clean_name=False,
     country_wise=True,
     config=None,
+    threads=1,
     **kwargs,
 ):
     """
@@ -385,6 +449,8 @@ def aggregate_units(
         Whether to clean the 'Name'-column before aggregating.
     country_wise : Boolean, default True
         Whether to aggregate only entries with a identical country.
+    threads : int, default 1
+        Number of threads to use
     """
     deprecated_args = {"use_saved_aggregation", "save_aggregation"}
     used_deprecated_args = deprecated_args.intersection(kwargs)
@@ -422,12 +488,27 @@ def aggregate_units(
         df = clean_name(df)
 
     logger.info(f"Aggregating blocks in data source '{ds_name}'.")
+    agg_query = None
+    if ds_name in config.get("aggregate_only_matching_sources", []):
+        for source in config["matching_sources"]:
+            if isinstance(source, dict) and ds_name in source:
+                agg_query = source[ds_name]
+                break
+
+    block_query = None
+    if with_blocks := config["clean_name"].get("fuel_type_with_blocks", []):  # noqa
+        block_query = "Fueltype in @with_blocks"
 
     if country_wise:
         countries = df.Country.unique()
-        duplicates = pd.concat([duke(df.query("Country == @c")) for c in countries])
+        country_query = "Country == @c"
+        query = " and ".join(filter(None, [agg_query, block_query, country_query]))
+        duplicates = pd.concat(
+            [duke(df.query(query), threads=threads) for c in countries]
+        )
     else:
-        duplicates = duke(df)
+        query = " and ".join(filter(None, [agg_query, block_query]))
+        duplicates = duke(df.query(query) if query else df, threads=threads)
 
     df = cliques(df, duplicates)
     df = df.groupby("grouped").agg(props_for_groups)
@@ -445,4 +526,9 @@ def aggregate_units(
         .reindex(columns=cols)
         .pipe(set_column_name, ds_name)
     )
+
+    # Remove zero values from summed non-weighted numeric columns
+    numeric_cols = df.select_dtypes(include="number").columns
+    df[numeric_cols] = df[numeric_cols].where(lambda df: df != 0)
+
     return df
diff --git a/powerplantmatching/collection.py b/powerplantmatching/collection.py
index 2405f9fa..8169affa 100644
--- a/powerplantmatching/collection.py
+++ b/powerplantmatching/collection.py
@@ -60,6 +60,10 @@ def df_by_name(name):
         get_df = getattr(data, name)
         df = get_df(config=config)
 
+        for source in config["matching_sources"]:
+            if isinstance(source, dict) and next(iter(source)) == name:
+                df = df.query(source[name])
+
         if not conf.get("aggregated_units", False):
             return aggregate_units(df, dataset_name=name, config=config)
         else:
diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 5cdaacb5..c7b0d5c2 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -6,6 +6,7 @@
 Collection of power plant data bases and statistical data
 """
 
+import json
 import logging
 import os
 from zipfile import ZipFile
@@ -24,7 +25,7 @@
     gather_specifications,
 )
 from .core import _package_data, get_config
-from .heuristics import scale_to_net_capacities
+from .heuristics import PLZ_to_LatLon_map, scale_to_net_capacities
 from .utils import (
     config_filter,
     convert_to_short_name,
@@ -57,46 +58,67 @@ def BEYONDCOAL(raw=False, update=False, config=None):
     config = get_config() if config is None else config
 
     fn = get_raw_file("BEYONDCOAL", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Plant", header=[0, 1, 2], skiprows=[3])
+    df = pd.read_excel(
+        fn, sheet_name="Unit", header=0, skiprows=[0, 2, 3], na_values=["unknown"]
+    )
+
+    df_plant = pd.read_excel(
+        fn,
+        sheet_name="Plant",
+        header=0,
+        skiprows=[0, 2, 3],
+        usecols=["BFF plant ID", "Latitude", "Longitude"],
+    ).set_index("BFF plant ID")
+
+    df["lat"] = df["BFF plant ID"].map(df_plant.Latitude)
+    df["lon"] = df["BFF plant ID"].map(df_plant.Longitude)
 
     if raw:
         return df
 
+    status_list = config["BEYONDCOAL"].get("status", ["operational"])  # noqa
+
     RENAME_COLUMNS = {
-        "Plant name": "Name",
+        "Unit name": "Name",
         "Fuel type": "Fueltype",
-        "Latitude": "lat",
-        "Longitude": "lon",
-        "Commissioning year of first unit": "DateIn",
-        "(Announced) Retirement year of last unit": "DateOut",
-        "Coal capacity open": "Capacity",
-        "Plant status\n(gross)": "status",
-        "EBC plant ID": "projectID",
+        "Commissioning year": "DateIn",
+        "Unit status\n(detailed)": "status",
+        "BFF unit ID": "projectID",
     }
 
-    phaseout_col = "Covered by country phase-out? [if yes: country phase-out year]"
+    SET_MAP = {
+        "chp": "CHP",
+        "conventional": "PP",
+        "industrial": "CHP",
+        "heat": "CHP",
+    }
 
-    df = (
-        df["Plant Data"]
-        .droplevel(1, axis=1)
-        .rename(columns=RENAME_COLUMNS)
-        .query('status != "Cancelled"')
+    with pd.option_context("future.no_silent_downcasting", True):
+        phaseout_col = "Covered by country phase-out? [if yes: country phase-out year]"
+        date_out = (
+            df["(Announced) Retirement year"]
+            .replace({"After 2030": np.nan, "By 2030": 2030})
+            .astype(float)
+            .combine_first(pd.to_numeric(df[phaseout_col], errors="coerce"))
+        )
+
+    df_final = (
+        df.rename(columns=RENAME_COLUMNS)
+        .query("status in @status_list")
         .assign(
-            DateOut=lambda df: df.DateOut.fillna(df[phaseout_col]).where(
-                lambda ds: ds <= 8000
-            ),
+            DateOut=date_out,
             projectID=lambda df: "BEYOND-" + df.projectID,
-            Fueltype=lambda df: df.Fueltype.str.title().replace("Unknown", "Other"),
-            Set="PP",
+            Fueltype=lambda df: df.Fueltype.str.title(),
+            Set=lambda df: df["Unit type"].map(SET_MAP),
             Technology=np.nan,
         )
-        .pipe(scale_to_net_capacities)
         .pipe(clean_name)
         .pipe(convert_to_short_name)
         .pipe(set_column_name, "BEYONDCOAL")
         .pipe(config_filter, config)
     )
-    return df
+
+    return df_final
 
 
 def OPSD(
@@ -214,6 +236,10 @@ def OPSD(
     )
 
 
+# @deprecated(
+#     deprecated_in="0.8.0",
+#     details="Deprecated since data is not maintained. Use GEM instead.",
+# )
 def GEO(raw=False, update=False, config=None):
     """
     Importer for the GEO database.
@@ -286,6 +312,9 @@ def to_year(ds):
 
     res = units.join(ppl.set_index("projectID"), "projectID", rsuffix="_ppl")
     res["DateIn"] = res.DateIn.fillna(res.DateIn_ppl)
+    res["Name"] = res.Name + res["Unit_Nbr"].fillna("").apply(
+        lambda x: f" {x}" if x else ""
+    )
     not_included_ppl = ppl.query("projectID not in @res.projectID")
     res = pd.concat([res, not_included_ppl]).pipe(set_column_name, "GEO")
     res = scale_to_net_capacities(res)
@@ -432,6 +461,11 @@ def set_large_spanish_stores_to_reservoirs(df):
         .assign(
             Set=lambda df: np.where(df.Technology == "Run-Of-River", "PP", "Store"),
             Fueltype="Hydro",
+            Duration=lambda df: df.Duration.where(df.Duration > 0),
+            StorageCapacity_MWh=lambda df: df.StorageCapacity_MWh.where(
+                df.StorageCapacity_MWh > 0
+            ),
+            Volume_Mm3=lambda df: df.Volume_Mm3.where(df.Volume_Mm3 > 0),
         )
         .drop(columns=["pypsa_id", "GEO"])
         .powerplant.convert_alpha2_to_country()
@@ -585,6 +619,10 @@ def GPD(raw=False, update=False, config=None, filter_other_dbs=True):
     )
 
 
+# @deprecated(
+#     deprecated_in="0.8.0",
+#     details="Removed since data is not maintained. Use GNPT instead.",
+# )
 def WIKIPEDIA(raw=False, update=False, config=None):
     """
     Importer for the WIKIPEDIA nuclear power plant database.
@@ -618,7 +656,6 @@ def WIKIPEDIA(raw=False, update=False, config=None):
 
     df = (
         df.rename(columns=RENAME_COLUMNS)
-        .pipe(clean_name)
         .pipe(convert_to_short_name)
         .assign(
             Fueltype="Nuclear",
@@ -627,6 +664,7 @@ def WIKIPEDIA(raw=False, update=False, config=None):
             # plants which are not yet built are set to 2027
             DateIn=lambda df: df.DateIn.where(~df.Status.str.contains("In Bau"), 2027),
         )
+        .pipe(clean_name)
         .pipe(set_column_name, "WIKIPEDIA")
         .pipe(config_filter, config)
     )
@@ -1114,7 +1152,7 @@ def WEPP(raw=False, config=None):
 
 @deprecated(
     deprecated_in="0.5.0",
-    details="This function is not maintained anymore.",
+    details="This function is not maintained anymore. Use MASTR instead.",
 )
 def UBA(
     raw=False,
@@ -1237,7 +1275,7 @@ def UBA(
 
 @deprecated(
     deprecated_in="0.5.0",
-    details="This function is not maintained anymore.",
+    details="This function is not maintained anymore. Use MASTR instead.",
 )
 def BNETZA(
     raw=False,
@@ -1398,6 +1436,10 @@ def BNETZA(
     )
 
 
+# @deprecated(
+#     deprecated_in="0.8.0",
+#     details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.",
+# )
 def OPSD_VRE(raw=False, update=False, config=None):
     """
     Importer for the OPSD (Open Power Systems Data) renewables (VRE)
@@ -1447,6 +1489,10 @@ def OPSD_VRE(raw=False, update=False, config=None):
     )
 
 
+# @deprecated(
+#     deprecated_in="0.8.0",
+#     details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.",
+# )
 def OPSD_VRE_country(country, raw=False, update=False, config=None):
     """
     Get country specific data from OPSD for renewables, if available.
@@ -1596,45 +1642,51 @@ def GBPT(raw=False, update=False, config=None):
     """
     config = get_config() if config is None else config
     fn = get_raw_file("GBPT", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Data")
+    large = pd.read_excel(fn, sheet_name="Data")
+    small = pd.read_excel(fn, sheet_name="Below Threshold")
+    df = pd.concat([large, small], ignore_index=True)
 
     if raw:
         return df
 
     RENAME_COLUMNS = {
-        "Project name": "Name",
+        "Project Name": "Name",
         "Capacity (MW)": "Capacity",
-        "Fuel 1": "Fueltype",
-        "Operating status": "Status",
+        "Fuel": "Fueltype",
         "Latitude": "lat",
         "Longitude": "lon",
-        "Unit start year": "DateIn",
-        "Retired year": "DateOut",
+        "Start Year": "DateIn",
+        "Retired Year": "DateOut",
+        "Country/Area": "Country",
         "GEM phase ID": "projectID",
     }
+
     fueltype_dict = {
-        "bioenergy - agricultural waste (solids)": "Solid Biomass",
-        "bioenergy - refuse (municipal and industrial wastes)": "Solid Biomass",
-        "bioenergy - refuse (syngas)": "Solid Biomass",
-        "bioenergy - agricultural waste (biogas)": "Biogas",
-        "bioenergy - wood & other biomass (solids)": "Solid Biomass",
-        "bioenergy - ethanol": "Solid Biomass",
-        "bioenergy - paper mill wastes": "Solid Biomass",
-        "bioenergy - biodiesel": "Solid Biomass",
-        "bioenergy - unknown": "Solid Biomass",
-        "bioenergy - wastewater and sewage sludge (solids or biogas)": "Solid Biomass",
-        "bioenergy - refuse (landfill gas)": "Biogas",
-        "bioenergy - agricultural waste (unknown)": "Solid Biomass",
-        "bioenergy - agricultural waste (syngas)": "Solid Biomass",
-        "bioenergy - wood & other biomass (biocoal)": "Solid Biomass",
+        # solid biomass
+        "bioenergy: agricultural waste (solids)": "Solid Biomass",
+        "bioenergy: agricultural waste (unknown)": "Solid Biomass",
+        "bioenergy: paper mill wastes": "Solid Biomass",
+        "bioenergy: unknown": "Solid Biomass",
+        "bioenergy: wood & other biomass (biocoal)": "Solid Biomass",
+        "bioenergy: wood & other biomass (solids)": "Solid Biomass",
+        "bioenergy: agricultural waste (syngas)": "Solid Biomass",
+        # biogas
+        "bioenergy: agricultural waste (biogas)": "Biogas",
+        "bioenergy: refuse (landfill gas)": "Biogas",
+        "bioenergy: wastewater and sewage sludge (solids or biogas)": "Biogas",
+        # oil
+        "bioenergy: ethanol": "Oil",
+        "bioenergy: biodiesel": "Oil",
+        # waste
+        "bioenergy: refuse (municipal and industrial wastes)": "Waste",
+        "bioenergy: refuse (syngas)": "Solid Biomass",
     }
 
     status_list = config["GBPT"].get("status", ["operating"])  # noqa: F841
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GBPT")
+        df.pipe(set_column_name, "GBPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -1642,12 +1694,15 @@ def GBPT(raw=False, update=False, config=None):
             DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
             lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
+            Fueltype=df["Fueltype"].apply(
+                lambda v: fueltype_dict[v.split(",")[0].strip()]
+            ),
         )
         .query("Status in @status_list")
         .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
-        .pipe(lambda x: x.replace({"Fueltype": fueltype_dict}))
-        .assign(Technology="Steam Turbine")
-        .assign(Set="PP")
+        .assign(Technology=np.nan)
+        .assign(Set=np.nan)
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -1670,7 +1725,7 @@ def GNPT(raw=False, update=False, config=None):
     """
     config = get_config() if config is None else config
     fn = get_raw_file("GNPT", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Data")
+    df = pd.read_excel(fn, sheet_name="Data", na_values=["--"])
 
     if raw:
         return df
@@ -1690,11 +1745,12 @@ def GNPT(raw=False, update=False, config=None):
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GNPT")
+        df.pipe(set_column_name, "GNPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
+            Name=lambda df: df["Name"]
+            + df["Unit Name"].fillna("").apply(lambda x: f" {x}" if x else ""),
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
             DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
@@ -1705,6 +1761,7 @@ def GNPT(raw=False, update=False, config=None):
         .assign(Fueltype="Nuclear")
         .assign(Technology="Steam Turbine")
         .assign(Set="PP")
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -1728,7 +1785,7 @@ def GCPT(raw=False, update=False, config=None):
 
     config = get_config() if config is None else config
     fn = get_raw_file("GCPT", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Units")
+    df = pd.read_excel(fn, sheet_name="Units", na_values=["not found", "-"])
 
     if raw:
         return df
@@ -1746,40 +1803,63 @@ def GCPT(raw=False, update=False, config=None):
         "GEM unit/phase ID": "projectID",
     }
     fueltype_dict = {
+        "anthracite": "Hard Coal",
         "bituminous": "Hard Coal",
+        "bituminous with CCS": "Hard Coal",
         "lignite": "Lignite",
-        "unknown": "Hard Coal",
-        "subbituminous": "Hard Coal",
-        "waste coal": "Hard Coal",
-        "anthracite": "Hard Coal",
         "lignite with CCS": "Lignite",
-        "bituminous with CCS": "Hard Coal",
+        "subbituminous": "Hard Coal",
         "subbituminous with CCS": "Hard Coal",
+        "unknown": "Hard Coal",
         "unknown with CCS": "Hard Coal",
+        "waste coal": "Hard Coal",
+    }
+    technology_dict = {
+        "IGCC": "CCGT",
+        "subcritical": "Steam Turbine",
+        "unknown": np.nan,
+        "supercritical": "Steam Turbine",
+        "ultra-supercritical": "Steam Turbine",
     }
 
     planned_retirement = df["Planned retirement"].apply(pd.to_numeric, errors="coerce")
 
+    # conservative assumption that mothballed plants (without fixed retirement
+    # date) went out of operation in 2024
+    mothballed_retirement = df["Status"].apply(
+        lambda x: 2024 if x == "mothballed" else np.nan
+    )
+
     status_list = config["GCPT"].get("status", ["operating"])  # noqa: F841
 
+    BTU_PER_KWH = 3412.14
+
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GCPT")
+        df.pipe(set_column_name, "GCPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
+            Name=lambda df: df["Name"]
+            + df["Unit name"].fillna("").apply(lambda x: f" {x}" if x else ""),
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
-            DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"),
+            DateOut=df["DateOut"]
+            .apply(pd.to_numeric, errors="coerce")
+            .combine_first(planned_retirement)
+            .combine_first(mothballed_retirement),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
             lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
+            Set=df["CHP"].replace({"yes": "CHP", "no": "PP"}),
+            Efficiency=BTU_PER_KWH / df["Heat rate (Btu per kWh)"],
         )
-        .assign(DateOut=lambda x: x["DateOut"].combine_first(planned_retirement))
         .query("Status in @status_list")
         .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
-        .pipe(lambda x: x.replace({"Fueltype": fueltype_dict}))
-        .pipe(lambda x: x.assign(Technology="Steam Turbine"))
-        .pipe(lambda x: x.assign(Set="PP"))
+        .pipe(
+            lambda x: x.replace(
+                {"Fueltype": fueltype_dict, "Technology": technology_dict}
+            )
+        )
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
 
@@ -1810,11 +1890,11 @@ def GGTPT(raw=False, update=False, config=None):
 
     RENAME_COLUMNS = {
         "Project Name": "Name",
-        "Capacity (MW)": "Capacity",
+        "Unit Capacity (MW)": "Capacity",
         "Latitude": "lat",
         "Longitude": "lon",
-        "Start year": "DateIn",
-        "Retired year": "DateOut",
+        "Start Year": "DateIn",
+        "Retired Year": "DateOut",
         "Country/Area": "Country",
         "GEM unit ID": "projectID",
     }
@@ -1823,8 +1903,7 @@ def GGTPT(raw=False, update=False, config=None):
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GGTPT")
+        df.pipe(set_column_name, "GGTPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -1838,6 +1917,7 @@ def GGTPT(raw=False, update=False, config=None):
         .assign(Fueltype="Geothermal")
         .assign(Technology="Steam Turbine")
         .assign(Set="PP")
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -1861,6 +1941,11 @@ def GWPT(raw=False, update=False, config=None):
     config = get_config() if config is None else config
     fn = get_raw_file("GWPT", update=update, config=config)
     df = pd.read_excel(fn, sheet_name="Data")
+    df_small = pd.read_excel(fn, sheet_name="Below Threshold")
+    df = pd.concat([df, df_small], ignore_index=True)
+
+    if raw:
+        return df
 
     RENAME_COLUMNS = {
         "Project Name": "Name",
@@ -1885,8 +1970,7 @@ def GWPT(raw=False, update=False, config=None):
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GWPT")
+        df.pipe(set_column_name, "GWPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -1900,6 +1984,7 @@ def GWPT(raw=False, update=False, config=None):
         .pipe(lambda x: x.replace({"Technology": technology_dict}))
         .assign(Fueltype="Wind")
         .assign(Set="PP")
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -1952,8 +2037,7 @@ def GSPT(raw=False, update=False, config=None):
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GSPT")
+        df.pipe(set_column_name, "GSPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -1967,6 +2051,7 @@ def GSPT(raw=False, update=False, config=None):
         .pipe(lambda x: x.replace({"Technology": technology_dict}))
         .assign(Fueltype="Solar")
         .assign(Set="PP")
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -1989,69 +2074,85 @@ def GGPT(raw=False, update=False, config=None):
     """
     config = get_config() if config is None else config
     fn = get_raw_file("GGPT", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Gas & Oil Units")
+    df = pd.read_excel(fn, sheet_name="Gas & Oil Units", na_values=["not found"])
+    df_small = pd.read_excel(
+        fn, sheet_name="sub-threshold units", na_values=["not found"]
+    )
+    df = pd.concat([df, df_small], ignore_index=True)
 
     if raw:
         return df
 
     RENAME_COLUMNS = {
         "Plant name": "Name",
-        "Fuel": "Fueltype",
         "Capacity (MW)": "Capacity",
         "Latitude": "lat",
         "Longitude": "lon",
         "Start year": "DateIn",
         "Retired year": "DateOut",
         "CHP": "Set",
-        "GEM location ID": "projectID",
+        "Fuel": "Fueltype",
+        "GEM unit ID": "projectID",
+        "Country/Area": "Country",
+        "Turbine/Engine Technology": "Technology",
     }
 
+    def classify_fuel(s):
+        if s["Fuel classification?"] in ["Gas only", "LNG only"]:
+            return "Natural Gas"
+        elif s["Fuel classification?"] == "Oil only":
+            return "Oil"
+        elif s["Fueltype"].startswith("fossil liquids"):
+            return "Oil"
+        else:
+            return "Natural Gas"
+
     technology_dict = {
-        "GT": "Steam Turbine",
-        "IC": "Steam Turbine",
-        "CC": "CCGT",
-        "GT/IC": "Steam Turbine",
+        "gas turbine": "Steam Turbine",
+        "internal combustion": "Steam Turbine",
+        "combined cycle": "CCGT",
         "ICCC": "CCGT",
         "ISCC": "CCGT",
-        "ST": "Steam Turbine",
+        "steam turbine": "Steam Turbine",
         "AFC": "CCGT",
+        "unknown": np.nan,
     }
 
     set_dict = {
-        "Y": "CHP",
-        "N": "PP",
-        "not found": "PP",
+        "yes": "CHP",
+        "no": "PP",
     }
 
     status_list = config["GGPT"].get("status", ["operating"])  # noqa: F841
-    gas_fuels = ["NG", "LNG", "BU", "LFG", "BG", "BFG", "COG", "CM", "H", "OG"]
 
     df = df.rename(columns=RENAME_COLUMNS)
+
+    # conservative assumption that mothballed plants (without fixed retirement
+    # date) went out of operation in 2024
+    mothballed_retirement = df["Status"].apply(
+        lambda x: 2024 if x == "mothballed" else np.nan
+    )
+
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GGPT")
+        df.pipe(set_column_name, "GGPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
-        .pipe(lambda x: x.query("Capacity != 'not found'"))
         .assign(
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
-            DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"),
+            DateOut=df["DateOut"]
+            .apply(pd.to_numeric, errors="coerce")
+            .combine_first(df["Planned retire"])
+            .combine_first(mothballed_retirement),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
             lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
-            Capacity=lambda df: pd.to_numeric(df.Capacity, "coerce"),
-            Fueltype=df["Fueltype"].apply(
-                lambda s: (
-                    "Natural Gas"
-                    if any(sub in gas_fuels for sub in s.split("/"))
-                    else "Oil"
-                )
-            ),
+            Capacity=df["Capacity"].apply(pd.to_numeric, errors="coerce"),
+            Fueltype=df.apply(classify_fuel, axis=1),
         )
         .query("Status in @status_list")
         .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
         .pipe(lambda x: x.replace({"Technology": technology_dict}))
-        .pipe(lambda x: x.replace({"Set": set_dict}).fillna({"Set": "PP"}))
-        .assign(Fueltype="Natural Gas")
+        .pipe(lambda x: x.replace({"Set": set_dict}))
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -2074,7 +2175,9 @@ def GHPT(raw=False, update=False, config=None):
     """
     config = get_config() if config is None else config
     fn = get_raw_file("GHPT", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Data")
+    large = pd.read_excel(fn, sheet_name="Data")
+    small = pd.read_excel(fn, sheet_name="Below Threshold")
+    df = pd.concat([large, small], ignore_index=True)
 
     if raw:
         return df
@@ -2087,7 +2190,7 @@ def GHPT(raw=False, update=False, config=None):
         "Start Year": "DateIn",
         "Retired Year": "DateOut",
         "GEM unit ID": "projectID",
-        "Country 1": "Country",
+        "Country/Area 1": "Country",
         "Technology Type": "Technology",
     }
     technology_dict = {
@@ -2095,13 +2198,13 @@ def GHPT(raw=False, update=False, config=None):
         "pumped storage": "Pumped Storage",
         "run-of-river": "Run-Of-River",
         "conventional and pumped storage": "Pumped Storage",
-        "conventional and run-of-river": "Run-Of-River",
+        "conventional and run-of-river": "Reservoir",
+        "unknown": "Run-Of-River",
     }
     status_list = config["GHPT"].get("status", ["operating"])  # noqa: F841
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GHPT")
+        df.pipe(set_column_name, "GHPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -2115,6 +2218,7 @@ def GHPT(raw=False, update=False, config=None):
         .pipe(lambda x: x.replace({"Technology": technology_dict}))
         .assign(Fueltype="Hydro")
         .assign(Set="PP")
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -2162,8 +2266,11 @@ def MASTR(
         defaults to powerplantmatching.config.get_config()
 
     """
+
     config = get_config() if config is None else config
 
+    THRESHOLD_KW = 100  # noqa: F841
+
     RENAME_COLUMNS = {
         "EinheitMastrNummer": "projectID",
         "NameKraftwerk": "Name",
@@ -2174,6 +2281,7 @@ def MASTR(
         "EinheitBetriebsstatus": "Status",
         "Laengengrad": "lon",
         "Breitengrad": "lat",
+        "WEIC": "EIC",
     }
     COUNTRY_MAP = {
         "Deutschland": "Germany",
@@ -2187,6 +2295,9 @@ def MASTR(
         "Energietraeger",
         "Hauptbrennstoff",
         "NameStromerzeugungseinheit",
+        "NameKraftwerksblock",
+        "NameWindpark",
+        "Technologie",
     ]
 
     fn = get_raw_file("MASTR", update=update, config=config)
@@ -2197,6 +2308,7 @@ def MASTR(
         "Hydro": "hydro_raw.csv",
         "Wind": "wind_raw.csv",
         "Solar": "solar_raw.csv",
+        "Storage": "bnetza_mastr_storage_raw.csv",
     }
     data_frames = []
     with ZipFile(fn, "r") as file:
@@ -2208,39 +2320,88 @@ def MASTR(
                         "GeplantesInbetriebnahmedatum",
                         "ThermischeNutzleistung",
                         "KwkMastrNummer",
+                        "Batterietechnologie",
+                        "DatumBeginnVoruebergehendeStilllegung",
+                        "DatumWiederaufnahmeBetrieb",
+                        "Postleitzahl",
+                        "Ort",
+                        "Gemeinde",
+                        "Landkreis",
+                        "Lage",
                     ]
                     target_columns = (
                         target_columns + PARSE_COLUMNS + list(RENAME_COLUMNS.keys())
                     )
                     usecols = available_columns.intersection(target_columns)
-                    df = pd.read_csv(file.open(name), usecols=usecols).assign(
-                        Filesuffix=fueltype
+                    df = (
+                        pd.read_csv(file.open(name), usecols=usecols)
+                        .assign(Filesuffix=fueltype)
+                        .query("Nettonennleistung >= @THRESHOLD_KW")
                     )
                     data_frames.append(df)
                     break
     df = pd.concat(data_frames).reset_index(drop=True)
 
+    cols = ["NutzbareSpeicherkapazitaet", "VerknuepfteEinheit"]
+    with ZipFile(fn, "r") as file:
+        fn_storage_units = (
+            "bnetza_open_mastr_2025-02-09/bnetza_mastr_storage_units_raw.csv"
+        )
+        storage_units = pd.read_csv(file.open(fn_storage_units), usecols=cols)
+
+    storage_mwh = (
+        storage_units.assign(
+            VerknuepfteEinheit=lambda x: x.VerknuepfteEinheit.str.split(", ")
+        )
+        .assign(n=lambda x: x.VerknuepfteEinheit.str.len())
+        .explode("VerknuepfteEinheit")
+        .assign(NutzbareSpeicherkapazitaet=lambda x: x.NutzbareSpeicherkapazitaet / x.n)
+        .set_index("VerknuepfteEinheit")["NutzbareSpeicherkapazitaet"]
+    )
+
+    df["StorageCapacity_MWh"] = (
+        df["EinheitMastrNummer"].map(storage_mwh) / 1000
+    )  #  kWh to MWh
+
     if raw:
         return df
 
     status_list = config["MASTR"].get("status", ["In Betrieb"])  # noqa: F841
-    capacity_threshold_kw = 1000
 
-    df = (
+    PLZ_map = PLZ_to_LatLon_map()
+    df.Postleitzahl = (
+        df.Postleitzahl.astype(str).str.replace(r"[^0-9]", "0", regex=True).astype(int)
+    )
+    df["PLZ_lat"] = df.Postleitzahl.map(PLZ_map.lat)
+    df["PLZ_lon"] = df.Postleitzahl.map(PLZ_map.lon)
+
+    df_processed = (
         df.rename(columns=RENAME_COLUMNS)
         .query("Status in @status_list")
-        .loc[lambda df: df.Capacity > capacity_threshold_kw]
         .assign(
             projectID=lambda df: "MASTR-" + df.projectID,
+            Name=lambda df: df.Name.combine_first(df.NameWindpark).combine_first(
+                df.NameStromerzeugungseinheit
+            ),
             Country=lambda df: df.Country.map(COUNTRY_MAP),
             Capacity=lambda df: df.Capacity / 1e3,  # kW to MW
-            DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year,
-            DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year,
-        )
-        .assign(
-            DateIn=lambda df: df["DateIn"].combine_first(
+            DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year.combine_first(
                 pd.to_datetime(df["GeplantesInbetriebnahmedatum"]).dt.year
             ),
+            DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year.where(
+                df.Status != "Vorübergehend stillgelegt",
+                pd.to_datetime(
+                    df["DatumBeginnVoruebergehendeStilllegung"]
+                ).dt.year.where(
+                    df["DatumWiederaufnahmeBetrieb"].isna(),
+                    pd.to_datetime(df.DateOut).dt.year,
+                ),
+            ),
+            lat=lambda df: df.lat.combine_first(df.PLZ_lat),
+            lon=lambda df: df.lon.combine_first(df.PLZ_lon),
+            Duration=lambda df: df.StorageCapacity_MWh.div(
+                df.Capacity, fill_value=np.nan
+            ),
         )
         .pipe(
             gather_specifications,
@@ -2252,12 +2413,96 @@ def MASTR(
                 df["KwkMastrNummer"].isna() & df["ThermischeNutzleistung"].isna(), "CHP"
             ),
         )
-        .pipe(clean_name)
+    )
+
+    psw = df_processed.query(
+        "Energietraeger == 'Speicher' and Technologie == 'Pumpspeicher'"
+    ).index
+    df_processed.loc[psw, ["Fueltype", "Technology"]] = ["Hydro", "Pumped Storage"]
+
+    bat = df_processed.query(
+        "Energietraeger == 'Speicher' and Technologie == 'Batterie'"
+    ).index
+    df_processed.loc[bat, ["Fueltype", "Set"]] = ["Battery", "Store"]
+
+    BATTERY_MAPPING = {
+        "Blei-Batterie": "Pb",
+        "Lithium-Batterie": "Li",
+        "Sonstige Batterie": np.nan,
+        "Hochtemperaturbatterie": "NaS",
+        "Nickel-Cadmium- / Nickel-Metallhydridbatterie": "NiCd",
+    }
+    df_processed.loc[bat, "Technology"] = df_processed.loc[
+        bat, "Batterietechnologie"
+    ].map(BATTERY_MAPPING)
+
+    WIND_MAPPING = {
+        "Windkraft auf See": "Offshore",
+        "Windkraft an Land": "Onshore",
+    }
+    wind = df_processed.query("Energietraeger == 'Wind'").index
+    df_processed.loc[wind, "Technology"] = df_processed.loc[wind, "Lage"].map(
+        WIND_MAPPING
+    )
+
+    sel = df_processed.query(
+        "Fueltype == 'Natural Gas' and Filesuffix == 'Bioenergy'"
+    ).index
+    df_processed.loc[sel, "Fueltype"] = "Biogas"
+
+    # one biogas unit has 'Wind' in name
+    sel = df_processed.query("Fueltype == 'Wind' and Filesuffix == 'Biomass'").index
+    df_processed.loc[sel, "Fueltype"] = "Biogas"
+
+    # some combi-units are named wind-solar
+    sel = df_processed.query(
+        "Fueltype in ['Wind', 'Waste'] and Filesuffix == 'Solar'"
+    ).index
+    df_processed.loc[sel, ["Fueltype", "Technology"]] = ["Solar", "PV"]
+
+    # some technologies are wrongly allocated
+    sel = df_processed.query("Fueltype == 'Biogas' and Technology == 'PV'").index
+    df_processed.loc[sel, "Technology"] = "Combustion Engine"
+    sel = df_processed.query(
+        "Fueltype == 'Hydro' and Technology == 'Steam Turbine'"
+    ).index
+    df_processed.loc[sel, "Technology"] = "Run-Of-River"
+    sel = df_processed.query("Fueltype == 'Solar' and Technology == 'CCGT'").index
+    df_processed.loc[sel, "Technology"] = "PV"
+    sel = df_processed.query(
+        "Fueltype == 'Solar' and Technology == 'OCGT' and Filesuffix == 'Combustion'"
+    ).index
+    df_processed.loc[sel, "Fueltype"] = "Natural Gas"
+    sel = df_processed.query(
+        "Fueltype == 'Wind' and Technology == 'PV' and Filesuffix == 'Solar'"
+    ).index
+    df_processed.loc[sel, "Fueltype"] = "Solar"
+    sel = df_processed.query(
+        "Fueltype == 'Wind' and Technology == 'Combustion Engine' and Filesuffix == 'Bioenergy'"
+    ).index
+    df_processed.loc[sel, "Fueltype"] = "Biogas"
+
+    mask = df_processed.query(
+        "Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set in ['Store', 'CHP']"
+    ).index
+    df_processed.loc[mask, "Set"] = "PP"
+
+    df_processed["Name"] = df_processed.apply(
+        lambda x: f"{x.Name} {x.NameKraftwerksblock.replace(x.Name, '').strip()}"
+        if x.NameKraftwerksblock
+        and x.NameKraftwerksblock != x.Name
+        and x.Fueltype in config["clean_name"]["fueltypes_with_blocks"]
+        else x.Name,
+        axis=1,
+    )
+
+    df_final = (
+        df_processed.pipe(clean_name)
         .pipe(set_column_name, "MASTR")
         .pipe(config_filter, config)
     )
 
-    return df
+    return df_final
 
 
 # deprecated alias for GGPT
@@ -2269,6 +2514,247 @@ def GEM_GGPT(*args, **kwargs):
     return GGPT(*args, **kwargs)
 
 
+def EESI(
+    raw=False,
+    update=False,
+    config=None,
+):
+    """
+    Get the European Energy Storage Inventory (EESI) dataset.
+
+    Provided by the European Commission's Joint Research Centre. Contains
+    chemical, electrochemical, thermal and mechanical energy storage
+    technologies in Europe.
+
+    https://ses.jrc.ec.europa.eu/storage-inventory-maps
+
+    https://ses.jrc.ec.europa.eu/storage-inventory-tool/api/projects
+
+    Parameters
+    ----------
+    raw : Boolean, default False
+        Whether to return the original dataset
+    update: bool, default False
+        Whether to update the data from the url.
+    config : dict, default None
+        Add custom specific configuration, e.g.
+        powerplantmatching.config.get_config(target_countries='Italy'), defaults
+        to powerplantmatching.config.get_config()
+    """
+
+    config = get_config() if config is None else config
+
+    fn = get_raw_file("EESI", update=update, config=config)
+
+    with open(fn) as f:
+        data = json.load(f)
+
+    df = pd.json_normalize(data["projects"], sep="_")
+    float_cols = ["power", "capacity", "facility_latitude", "facility_longitude"]
+    df[float_cols] = df[float_cols].astype(float)
+
+    if raw:
+        return df
+
+    status_list = config["EESI"].get("status", ["Operational"])  # noqa: F841
+
+    RENAME_COLUMNS = {
+        "title": "Name",
+        "power": "Capacity",
+        "capacity": "StorageCapacity_MWh",
+        "facility_latitude": "lat",
+        "facility_longitude": "lon",
+        "facility_country": "Country",
+        "id": "projectID",
+        "technology_name": "Technology",
+        "status": "Status",
+    }
+
+    df_processed = (
+        df.rename(columns=RENAME_COLUMNS)
+        .query("Status in @status_list")
+        .assign(
+            projectID=lambda df: "EESI-" + df.projectID.astype(str),
+            StorageCapacity_MWh=lambda df: df.StorageCapacity_MWh.where(
+                df.StorageCapacity_MWh > 0
+            ),
+            Capacity=lambda df: df.Capacity.where(df.Capacity > 0),
+            Set="Store",
+        )
+    )
+
+    sel = df_processed.query("technology_parentName == 'ElectroChemical'").index
+    df_processed.loc[sel, "Fueltype"] = "Battery"
+
+    sel = df_processed.query("technology_parentName == 'Thermal'").index
+    df_processed.loc[sel, "Fueltype"] = "Heat Storage"
+
+    sel = df_processed.query("technology_parentName == 'Mechanical'").index
+    df_processed.loc[sel, "Fueltype"] = "Mechanical Storage"
+
+    sel = df_processed.query("Technology == 'Power to Gas (H2)'").index
+    df_processed.loc[sel, "Fueltype"] = "Hydrogen Storage"
+
+    sel = df_processed.query("Technology == 'Pumped Hydro Storage (PHS)'").index
+    df_processed.loc[sel, "Fueltype"] = "Hydro"
+
+    TECHNOLOGY_MAPPING = {
+        "Power to Gas (H2)": np.nan,
+        "Lithium-ion batteries": "Li",
+        "Lead Acid batteries": "Pb",
+        "Sodium Sulphur batteries": "NaS",
+        "Redox flow batteries Vanadium": "V",
+        "Sodium Nickel Chloride batteries": "NaNiCl",
+        "Lithium-titanate battery (LTO)": "Li",
+        "Pumped Hydro Storage (PHS)": "Pumped Storage",
+        "Unespecified Storage - mechanical": np.nan,
+        "Compressed Air Energy Storage (CAES)": "CAES",
+        "Flywheel Energy Storage": "Flywheel",
+        "Unspecific Thermal Storage": np.nan,
+        "Molten salts (Sensible Thermal Energy Storage (STES))": "Molten Salt",
+    }
+    df_processed.Technology = df_processed.Technology.map(TECHNOLOGY_MAPPING)
+
+    df_final = (
+        df_processed.pipe(clean_name)
+        .pipe(set_column_name, "EESI")
+        .pipe(config_filter, config)
+    )
+
+    return df_final
+
+
+def GND(
+    raw=False,
+    update=False,
+    config=None,
+):
+    """
+    Get the GeoNuclearData (GND) dataset.
+
+    https://github.com/cristianst85/GeoNuclearData
+
+    Parameters
+    ----------
+    raw : Boolean, default False
+        Whether to return the original dataset
+    update: bool, default False
+        Whether to update the data from the url.
+    config : dict, default None
+        Add custom specific configuration, e.g.
+        powerplantmatching.config.get_config(target_countries='Italy'), defaults
+        to powerplantmatching.config.get_config()
+    """
+
+    config = get_config() if config is None else config
+
+    fn = get_raw_file("GND", update=update, config=config)
+
+    df = pd.read_csv(fn)
+
+    if raw:
+        return df
+
+    status_list = config["GND"].get("status", ["Operational"])  # noqa: F841
+
+    RENAME_COLUMNS = {
+        "Id": "projectID",
+        "Latitude": "lat",
+        "Longitude": "lon",
+        "OperationalFrom": "DateIn",
+        "OperationalTo": "DateOut",
+    }
+
+    df_final = (
+        df.rename(columns=RENAME_COLUMNS)
+        .query("Status in @status_list")
+        .assign(
+            projectID=lambda df: "GND-" + df.projectID.astype(str),
+            Capacity=lambda df: df.Capacity.where(df.Capacity > 0),
+            DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year,
+            DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year,
+            Set="PP",
+            Fueltype="Nuclear",
+        )
+        .pipe(clean_name)
+        .pipe(set_column_name, "GND")
+        .pipe(config_filter, config)
+    )
+
+    return df_final
+
+
+def GHR(
+    raw=False,
+    update=False,
+    config=None,
+):
+    """
+    Get the GloHydroRes (GHR) dataset.
+
+    https://www.nature.com/articles/s41597-025-04975-0
+
+    https://zenodo.org/records/14526360
+
+    Parameters
+    ----------
+    raw : Boolean, default False
+        Whether to return the original dataset
+    update: bool, default False
+        Whether to update the data from the url.
+    config : dict, default None
+        Add custom specific configuration, e.g.
+        powerplantmatching.config.get_config(target_countries='Italy'), defaults
+        to powerplantmatching.config.get_config()
+    """
+
+    config = get_config() if config is None else config
+
+    fn = get_raw_file("GHR", update=update, config=config)
+
+    df = pd.read_csv(fn)
+
+    if raw:
+        return df
+
+    RENAME_COLUMNS = {
+        "ID": "projectID",
+        "name": "Name",
+        "country": "Country",
+        "Latitude": "plant_lat",
+        "Longitude": "plant_lon",
+        "plant_type": "Technology",
+        "dam_height_m": "DamHeight_m",
+        "year": "DateIn",
+    }
+    TECHNOLOGY_MAP = {
+        "STO": "Reservoir",
+        "RTO": "Run-Of-River",
+        "PHS": "Pumped Hydro",
+        "canal": np.nan,
+    }
+
+    df_final = (
+        df.rename(columns=RENAME_COLUMNS)
+        .assign(
+            projectID=lambda df: "GHR-" + df.projectID.astype(str),
+            Name=lambda df: df.Name.str.split(" - ").str[0].combine_first(df.dam_name),
+            DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year,
+            Technology=lambda df: df.Technology.map(TECHNOLOGY_MAP),
+            Volume_Mm3=lambda df: df.res_vol_km3 * 1e3,
+            # StorageCapacity_MWh=lambda df: 9.81 * df.dam_height_m * df.Volume_Mm3 * 0.9 / 3.6,
+            # Duration=lambda df: df.StorageCapacity_MWh / df.Capacity,
+            Set="PP",
+            Fueltype="Hydro",
+        )
+        .pipe(clean_name)
+        .pipe(set_column_name, "GHR")
+        .pipe(config_filter, config)
+    )
+
+    return df_final
+
+
 def EXTERNAL_DATABASE(raw=False, update=True, config=None):
     """
     Importer for external custom databases.
diff --git a/powerplantmatching/duke.py b/powerplantmatching/duke.py
index 3f093690..4c6fff7a 100644
--- a/powerplantmatching/duke.py
+++ b/powerplantmatching/duke.py
@@ -40,6 +40,7 @@ def duke(
     showmatches=False,
     keepfiles=False,
     showoutput=False,
+    threads=1,
 ):
     """
     Run duke in different modes (Deduplication or Record Linkage Mode) to
@@ -107,6 +108,7 @@ def duke(
             "-Dfile.encoding=UTF-8",
             "no.priv.garshol.duke.Duke",
             "--linkfile=linkfile.txt",
+            f"--threads={threads}",
         ]
         if singlematch:
             args.append("--singlematch")
diff --git a/powerplantmatching/heuristics.py b/powerplantmatching/heuristics.py
index 7a4eca05..08bf2b1f 100644
--- a/powerplantmatching/heuristics.py
+++ b/powerplantmatching/heuristics.py
@@ -55,6 +55,8 @@ def extend_by_non_matched(
     if config is None:
         config = get_config()
 
+    threads = config.get("threads_extend_by_non_matched", 1)
+
     if isinstance(extend_by, str):
         label = extend_by
         extend_by = getattr(data, extend_by)(config=config)
@@ -71,7 +73,7 @@ def extend_by_non_matched(
 
     if aggregate_added_data and not extend_by.empty:
         extend_by = aggregate_units(
-            extend_by, dataset_name=label, config=config, **aggkwargs
+            extend_by, dataset_name=label, config=config, threads=threads, **aggkwargs
         )
         extend_by["projectID"] = extend_by.projectID.map(lambda x: {label: x})
     else:
diff --git a/powerplantmatching/package_data/PLZ_Coords_map.csv b/powerplantmatching/package_data/PLZ_Coords_map.csv
index 90203fec..f6fc38cb 100644
--- a/powerplantmatching/package_data/PLZ_Coords_map.csv
+++ b/powerplantmatching/package_data/PLZ_Coords_map.csv
@@ -8197,3 +8197,11 @@ PLZ,lon,lat
 65527,8.29686030496,50.1698531547
 32760,8.89250849998,51.9103401848
 65529,8.34783843133,50.256587295
+39628,11.6901777,52.6269331
+23769,11.1340848,54.4687375
+64760,8.9928567,49.540722
+78089,8.3637278,48.0748482
+99331,10.8270088,50.7108384
+98694,10.9888104,50.6365371
+19055,11.4375455,53.655925
+81248,11.4023582,48.1497765
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index f9b88ebe..6b4f3bf7 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -1,7 +1,7 @@
 # ---------------------------------------------------------------------------- #
 #                                   IO Config                                  #
 # ---------------------------------------------------------------------------- #
-entsoe_token:
+entsoe_token: ""
 google_api_key:
 
 # ---------------------------------------------------------------------------- #
@@ -16,31 +16,45 @@ main_query: "Name != '' and (lat >= 30 or lat != lat)"
 matching_sources:
   # Make individual queries for the datasets as done in `fully_included_sources`
   # Queries are combined with `main_query` with an `and` operator
-  - ENTSOE: Fueltype != 'Solar'
-  - GEO: Fueltype != 'Solar'
-  - GPD: Fueltype != 'Solar'
-  - JRC: Fueltype != 'Solar'
-  - OPSD: Country != "Spain" and Fueltype != 'Hard Coal' and Fueltype != 'Solar'
-  - BEYONDCOAL: Fueltype != 'Solar'
-  - WIKIPEDIA: Fueltype != 'Solar'
-  - GEM
-  - MASTR
+  # capacity filters avoid matching of too small units (which is too time-consuming)
+  # wind is per turbine rather than park in MASTR and unsuitable for matching
+  - ENTSOE: not (Country == 'Germany' and Fueltype  == 'Wind')
+  # wind in germany is provided by MASTR, other filters are due to large deviations to other datasets
+  - GEO: Capacity >= 1 and not (Country == 'Germany' and Fueltype  == 'Wind') and Fueltype not in ['Oil', 'Nuclear'] and not (Country in ['Bulgaria', 'Slovakia'] and Fueltype == 'Hard Coal')
+  # wind in germany is provided by MASTR, nuclear is not block-wise, other filters are due to large deviations to other datasets
+  - GPD: Capacity >= 1 and not (Country == 'Germany' and Fueltype  == 'Wind') and not (Country in ['Czechia', 'Bulgaria', 'Romania'] and Fueltype == 'Hard Coal') and Fueltype != 'Nuclear'
+  - JRC: Capacity >= 1
+  # wind in germany is provided by MASTR, other filters are due to large deviations to other datasets
+  - OPSD: not (Country == 'Germany' and Fueltype  == 'Wind') and ((Capacity >= 1 and Fueltype != 'Solar') or Capacity >= 3) and not (Country == 'Spain' and Fueltype == 'Hard Coal') and not (Country == 'Italy' and Fueltype == 'Natural Gas')
+  - BEYONDCOAL
+  # wind in germany is provided by MASTR
+  - GEM: Capacity >= 3 and not (Country == 'Germany' and Fueltype  == 'Wind')
+  # do not match units below 1 MW (2 MW for biogas, 3 MW for solar), exclude wind in Germany from any matching
+  - MASTR: (Fueltype != 'Wind') and ((Fueltype == 'Solar' and Capacity >= 3) or (Fueltype == 'Biogas' and Capacity >= 2) or (Fueltype not in ['Solar', 'Biogas'] and Capacity >= 1))
+  - EESI
+  - GHR
 
-# fully_included_sources, these sources are included even without match to the final dataset
+# # fully_included_sources, these sources are included even without match to the final dataset
 fully_included_sources:
   # Make individual queries for the datasets
-  - ENTSOE: (Country not in ['Switzerland', 'Ireland', 'Albania', 'Greece', 'Czech Republic', 'Bulgaria', 'United Kingdom', 'Italy', 'Serbia'] and not (Country == 'Spain' and Fueltype == 'Hydro')) or (Fueltype == 'Geothermal')
-  - GEO: (Country == 'Spain' and Fueltype == 'Natural Gas')
-  - GPD: Country in ['Finland', 'Spain']
-  - JRC: Country not in ['Switzerland', 'Albania', 'United Kingdom', 'Norway']
-  - OPSD: Country not in ['Switzerland', 'Italy', 'Spain', 'Norway', 'Austria']
+  # wind and solar in Germany is covered by MASTR
+  - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind'])
+  # battery in Germany is covered by MASTR
+  - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery')
+  # exclude units smaller than 100 kW (low total capacity) and take nuclear from other datasets (good matching)
+  - MASTR: Capacity >= 0.1 and Fueltype != 'Nuclear'
+  # take small hydro outside Germany from OPSD (highest coverage)
+  - OPSD: Country != 'Germany' and Capacity < 1 and Capacity >= 0.1 and Fueltype == 'Hydro'
   - BEYONDCOAL
-  - GEM: Country != 'Germany' or Fueltype == 'Solar'
-  - MASTR
+  # include this selection of countries as they have poorer coverage in all other datasets
+  - JRC: Country in ['Italy', 'Croatia', 'Serbia', 'Slovakia']
 
+# these sources skip unit aggregation for fully_included_sources not covered in matching_sources
+aggregate_only_matching_sources:
+  - MASTR # the matching process of very small units is not efficient
 
-parallel_duke_processes: false
-process_limit: 4
+parallel_duke_processes: 16
+threads_extend_by_non_matched: 16
 matched_data_url: https://raw.githubusercontent.com/PyPSA/powerplantmatching/{tag}/powerplants.csv
 
 # ---------------------------------------------------------------------------- #
@@ -50,20 +64,22 @@ matched_data_url: https://raw.githubusercontent.com/PyPSA/powerplantmatching/{ta
 opsd_vres_base_year: 2020
 
 BNETZA:
-  reliability_score: 2
+  reliability_score: 1
   fn: Kraftwerksliste_2017_2.xlsx
   url: https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/Versorgungssicherheit/Erzeugungskapazitaeten/Kraftwerksliste/Kraftwerksliste_2019_1.xlsx;jsessionid=17E419F28D025C7DD9FC6E2BEB3D088F?__blob=publicationFile&v=2
 BEYONDCOAL:
   net_capacity: false
-  aggregated_units: true
-  reliability_score: 6
-  fn: Europe_Beyond_Coal-European_Coal_Database_hc5n.xlsx
-  url: https://beyond-coal.eu/wp-content/uploads/2021/07/2021-04-20_Europe_Beyond_Coal-European_Coal_Database_hc5n.xlsx
+  aggregated_units: false
+  reliability_score: 4
+  status: ["construction", "operational", "no longer coal", "retired", "standby", "deactivated", "retrofitting"]
+  fn: 2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx
+  url: https://beyondfossilfuels.org/wp-content/uploads/2025/07/2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx
 IRENA:
   net_capacity: true
   aggregated_units: true
-  fn: IRENASTAT_capacities_2000-2023.csv
-  url: https://zenodo.org/records/10952917/files/IRENASTAT_capacities_2000-2023.csv
+  fn: IRENASTAT_capacities_2000-2024.csv
+  # compiled from https://pxweb.irena.org/pxweb/en/IRENASTAT/IRENASTAT__Power%20Capacity%20and%20Generation/Country_ELECSTAT_2025_H2_PX.px/
+  url: https://tubcloud.tu-berlin.de/s/p2D5E9MLWE8HPHE/download/IRENASTAT_capacities_2000-2024.csv
 CARMA:
   net_capacity: false
   reliability_score: 1
@@ -71,23 +87,23 @@ CARMA:
   fn: Full_CARMA_2009_Dataset_1.csv
 ENTSOE:
   reliability_score: 5
-  url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/entsoe_powerplants.csv
-  fn: entsoe_powerplants.csv
+  url: https://tubcloud.tu-berlin.de/s/QaHLH38J4A7ZF5m/download/entsoe_transparency_platform_20250820.csv
+  fn: entsoe_transparency_platform_20250820.csv
 ENTSOE-EIC:
-  url: https://eepublicdownloads.entsoe.eu/eic-codes-csv/W_eiccodes.csv
-  fn: entsoe_eic_codes.csv
+  url: https://eepublicdownloads.blob.core.windows.net/cio-lio/csv/W_eicCodes.csv
+  fn: W_eicCodes.csv
 JRC:
-  reliability_score: 4
+  reliability_score: 5
   fn: jrc-hydro-power-plant-database.csv
-  url: https://raw.githubusercontent.com/energy-modelling-toolkit/hydro-power-database/fd7535c/data/jrc-hydro-power-plant-database.csv
+  url: https://raw.githubusercontent.com/energy-modelling-toolkit/hydro-power-database/27e80f/data/jrc-hydro-power-plant-database.csv
 GEO:
   net_capacity: false
-  reliability_score: 3
+  reliability_score: 2
   url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/global_energy_observatory_power_plants.csv
   fn: global_energy_observatory_power_plants.csv
 GEO_units:
   net_capacity: false
-  reliability_score: 3
+  reliability_score: 2
   url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/global_energy_observatory_ppl_units.csv
   fn: global_energy_observatory_ppl_units.csv
 GPD:
@@ -96,19 +112,19 @@ GPD:
   #if outdated, look at http://datasets.wri.org/dataset/globalpowerplantdatabase
   url: https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip
 WIKIPEDIA:
-  reliability_score: 4
+  reliability_score: 2
   url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/nuclear_plants_from_wikipedia.csv
   fn: nuclear_plants_from_wikipedia.csv
 IWPDCY:
   aggregated_units: true
-  reliability_score: 3
+  reliability_score: 2
   fn: IWPDCY.csv
 OPSD_DE:
-  reliability_score: 4
+  reliability_score: 3
   fn: conventional_power_plants_DE.csv
   url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/conventional_power_plants_DE.csv
 OPSD_EU:
-  reliability_score: 4
+  reliability_score: 3
   fn: conventional_power_plants_EU.csv
   url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/conventional_power_plants_EU.csv
 OPSD_VRE:
@@ -139,78 +155,94 @@ OPSD_VRE_GB:
   url: https://data.open-power-system-data.org/renewable_power_plants/2020-08-25/renewable_power_plants_UK.csv
   fn: renewable_power_plants_UK.csv
 OPSD:
-  reliability_score: 4
+  reliability_score: 3
 Capacity_stats:
   url: https://data.open-power-system-data.org/national_generation_capacity/2020-10-01/national_generation_capacity_stacked.csv
   fn: national_generation_capacity_stacked.csv
 UBA:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 1
   fn: kraftwerke-de-ab-100-mw.xls
   url: https://www.umweltbundesamt.de/sites/default/files/medien/372/dokumente/kraftwerke_de_ab_100_mw_0.xls
 WEPP:
   net_capacity: false
-  reliability_score: 3
+  reliability_score: 1
   fn: platts_wepp.csv
 GGPT:
   net_capacity: false
-  reliability_score: 5
+  reliability_score: 6
   status: ["operating", "retired", "construction"]
-  fn: Global-Oil-and-Gas-Plant-Tracker-GOGPT-February-2024-v4.xlsx
-  url: https://tubcloud.tu-berlin.de/s/Be5arQgT9Z9g8Kp/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-February-2024-v4.xlsx
+  fn: Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/aKrt7dyNgazmgAm/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx
 GEM:
   # combined data set of all GEM trackers
   net_capacity: true
-  reliability_score: 5
+  reliability_score: 6
 GCPT:
   net_capacity: false
-  reliability_score: 4
-  status: ["operating", "retired", "construction"]
-  fn: Global-Coal-Plant-Tracker-July-2024.xlsx
-  url: https://tubcloud.tu-berlin.de/s/FdyKMZtr2ddRJEd/download/Global-Coal-Plant-Tracker-July-2024.xlsx
+  reliability_score: 6
+  status: ["operating", "retired", "construction", "mothballed"]
+  fn: Global-Coal-Plant-Tracker-July-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/etMB7qawKNwfgnk/download/Global-Coal-Plant-Tracker-July-2025.xlsx
 GGTPT:
   net_capacity: false
-  reliability_score: 4
-  status: ["operating", "retired", "construction"]
-  fn: Geothermal-Power-Tracker-May-2024.xlsx
-  url: https://tubcloud.tu-berlin.de/s/Hz3ZD7YcKnZTs9t/download/Geothermal-Power-Tracker-May-2024.xlsx
+  reliability_score: 6
+  aggregated_units: false
+  status: ["operating", "retired", "construction", "mothballed"]
+  fn: Geothermal-Power-Tracker-March-2025-Final.xlsx
+  url: https://tubcloud.tu-berlin.de/s/dNoEsLeGtCWDkoc/download/Geothermal-Power-Tracker-March-2025-Final.xlsx
 GWPT:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 6
   status: ["operating", "retired", "construction"]
-  fn: Global-Wind-Power-Tracker-June-2024.xlsx
-  url: https://tubcloud.tu-berlin.de/s/Z9b3WkAJmSnsrHD/download/Global-Wind-Power-Tracker-June-2024.xlsx
+  fn:  Global-Wind-Power-Tracker-February-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/8NSXSjPmJPXpg4W/download/Global-Wind-Power-Tracker-February-2025.xlsx
 GSPT:
   net_capacity: false
-  reliability_score: 4
-  status: ["operating", "construction"]
-  fn: Global-Solar-Power-Tracker-June-2024.xlsx
-  url: https://tubcloud.tu-berlin.de/s/tJ5K5rA2e5XaNjM/download/Global-Solar-Power-Tracker-June-2024.xlsx
+  reliability_score: 6
+  status: ["operating", "retired", "construction"]
+  fn: Global-Solar-Power-Tracker-February-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/7eo4dZXMp6eB3mz/download/Global-Solar-Power-Tracker-February-2025.xlsx
 GBPT:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 6
   status: ["operating", "retired", "construction"]
-  fn: Global-Bioenergy-Power-Tracker-GBPT-V1.xlsx
-  url: https://tubcloud.tu-berlin.de/s/F34bbwcxYHL9ZR4/download/Global-Bioenergy-Power-Tracker-GBPT-V1.xlsx
+  fn: Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx
+  url: https://tubcloud.tu-berlin.de/s/CzMBKe2rAcsoq7c/download/Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx
 GNPT:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 6
   status: ["operating", "retired", "mothballed", "construction"]
   fn: Global-Nuclear-Power-Tracker-July-2024.xlsx
   url: https://tubcloud.tu-berlin.de/s/gXFim9EciRHrjeQ/download/Global-Nuclear-Power-Tracker-July-2024.xlsx
 GHPT:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 6
   status: ["operating", "retired", "construction"]
-  fn: Global-Hydropower-Tracker-April-2024.xlsx
-  url: https://tubcloud.tu-berlin.de/s/sEztyBLdJS5sNHY/download/Global-Hydropower-Tracker-April-2024.xlsx
-
+  fn: Global-Hydropower-Tracker-April-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/2xqxRmfP4FKTrLf/download/Global-Hydropower-Tracker-April-2025.xlsx
 MASTR:
   net_capacity: true
-  reliability_score: 8
-  status: ["In Betrieb", "In Planung", "Endgültig stillgelegt"]
+  reliability_score: 7
+  status: ["In Betrieb", "In Planung", "Endgültig stillgelegt", "Vorübergehend stillgelegt"]
   fn: bnetza_open_mastr_2025-02-09.zip
   url: https://zenodo.org/records/14783581/files/bnetza_open_mastr_2025-02-09.zip
+EESI:
+  net_capacity: true
+  reliability_score: 5
+  status: ["Operational"] # since no start years given
+  fn: european-energy-storage-inventory-20250817-2245.json
+  url: https://tubcloud.tu-berlin.de/s/RXWgYbYJpePsWAZ/download/european-energy-storage-inventory-20250817-2245.json
+GND:
+  net_capacity: true
+  reliability_score: 5
+  status: ["Shutdown", "Operational", "Under Construction", "Decommissioning Completed"]
+  url: https://raw.githubusercontent.com/cristianst85/GeoNuclearData/1bc8b4ac106af236902385b87e46c540b4864815/data/csv/denormalized/nuclear_power_plants.csv
+  fn: nuclear_power_plants.csv
+GHR:
+  reliability_score: 4
+  fn: GloHydroRes_vs1.csv
+  url: https://zenodo.org/records/14526360/files/GloHydroRes_vs1.csv
 
 # ---------------------------------------------------------------------------- #
 #                             Data Structure Config                            #
@@ -279,42 +311,136 @@ target_fueltypes:
   # given by the list. An empty string results in a regex expression containing only the key.
   # Parsed of representatives at the top may be overwritten by representatives further below.
   Other: ".*"
-  Solid Biomass: [biological, bioenergy, agricultural, wood, biomass, feste biomasse]
-  Biogas: [biogas, biomethan, gasförmige biomasse]
-  Nuclear: [nuclear]
+  Solid Biomass:
+    - biological
+    - bioenergy
+    - agricultural
+    - biomass
+    - feste biomasse
+    - biomasa
+    - biomassa
+    - feste biogene stoffe
+    - pellets
+    - stroh
+    - straw
+  Biogas:
+    - biogas
+    - biogaz
+    - biomethan
+    - gasförmige biomasse
+  Nuclear:
+    - nuclear
+    - kernkraft
+    - atomkraft
+    - nucléaire
+    - atomowa
+    - jądrowa
+    - kjernekraft
+    - atoom
   Natural Gas:
-    [
-      ccgt,
-      gas,
-      natural gas,
-      ocgt,
-      lng,
-      combined cycle,
-      fossil gas,
-      mixed fossil fuels,
-      erdgas,
-      andere gase,
-    ]
+    - ccgt
+    - gas
+    - natural gas
+    - ocgt
+    - lng
+    - combined cycle
+    - fossil gas
+    - mixed fossil fuels
+    - erdgas
+    - andere gase
+    - gaz
+    - gaz naturel
+    - gas natural
+    - naturgass
+    - gaz ziemny
+    - gass
+    - aardgas
+    - flüssiggas
   Hydro:
-    [
-      run-off,
-      run off,
-      run of river,
-      run-of-river,
-      ror,
-      hydro,
-      hydroelectric,
-      wasserkraft,
-      wasser,
-    ]
-  Hard Coal: [coal, coke, steinkohle]
-  Lignite: [brown coal, lignite, peat, braunkohle]
-  Oil: [oil, diesel, mineralölprodukte]
+    - run-off
+    - run off
+    - run of river
+    - run-of-river
+    - ror
+    - hydro
+    - hidro
+    - hydraulique
+    - hydroelectric
+    - wasserkraft
+    - waterkracht
+    - wasser
+    - vannkraft
+    - vattenkraft
+    - wodna
+    - idroelettrica
+    - idraulica
+  Hard Coal:
+    - coal
+    - coke
+    - steinkohle
+    - houille
+    - charbon dur
+    - hulla
+    - carbón duro
+    - carbone duro
+    - antracite
+    - steinkul
+    - węgiel kamienny
+    - steenkool
+  Lignite:
+    - brown coal
+    - lignite
+    - peat
+    - braunkohle
+    - ligni.*
+    - brunatny
+    - brunkul
+    - bruinkool
+  Oil:
+    - oil
+    - diesel
+    - biodiesel
+    - methanol
+    - heizöl
+    - ethanol
+    - mineralölprodukte
+    - öl
+    - fioul
+    - mazout
+    - petrol
+    - olio
+    - olej
+    - carburante
+    - olie
   Geothermal: ""
   Solar: ""
-  Waste: ["abfall.*", "waste"]
+  Waste:
+    - "abfall.*"
+    - waste
+    - mva
+    - müll
+    - afval
+    - affald
+    - energy recovery
+    - incineration
+    - reststoffe
+    - refuse
+    - déchets
+    - ordures
+    - residuos
+    - basura
+    - rifiuti
+    - scarti
+    - odpady
+    - śmieci
+    - abfälle
   Wind: ""
-  Battery: [Electro-chemical, battery]
+  Battery:
+    - Electro-chemical
+    - battery
+  Mechanical Storage: ""
+  Heat Storage: ""
+  Hydrogen Storage: ""
 target_sets:
   # Provide a mapping of the keys to a list or a regex expression which are used for parsing.
   # A list will be converted to a regex expression matching all words (case-insensitive)
@@ -322,89 +448,207 @@ target_sets:
   # Parsed of representatives at the top may be overwritten by representatives further below.
   PP: ".*"
   CHP:
-    [
-      heizkraftwerk,
-      hkw,
-      kwk,
-      fhkw,
-      gud,
-      hp,
-      bhkw,
-      cogeneration,
-      power and heat,
-      heat and power,
-      chp,
-    ]
-  Store: [battery, storage, store]
+    - heizkraftwerk
+    - hkw
+    - kwk
+    - fhkw
+    - gud
+    - hp
+    - bhkw
+    - cogeneration
+    - power and heat
+    - heat and power
+    - chp
+    - cogen
+    - heat & power
+    - power & heat
+    - cogeneración
+    - cogenerazione
+    - kogeneracja
+    - combinada calor y electricidad
+    - kraftvarmeverk
+    - kraftvarmeværk
+    - samproduktion
+    - samproduksjon
+    - kvv
+    - wkk
+    - warmtekrachtkoppeling
+    - warmte-krachtcentrale
+  Storage:
+    - battery
+    - storage
+    - store
+    - speicher
+    - pumped
 target_technologies:
   # Provide a mapping of the keys to a list or a regex expression which are used for parsing.
   # A list will be converted to a regex expression matching all words (case-insensitive)
   # given by the list. An empty string results in a regex expression containing only the key.
   # Parsed of representatives at the top may be overwritten by representatives further below.
-  CCGT: [ccgt, gas, natural gas, gasturbinen mit abhitzekessel]
-  OCGT: [ocgt, gasturbinen ohne abhitzekessel]
-  Steam Turbine: [steam, turbine, kondensationsmaschine, gegendruckmaschine, dampfmotor]
-  Combustion Engine: [combustion engine, verbrennungsmotor, stirlingmotor]
-  Run-Of-River: [run-off, run off, run of river, run-of-river, ror, laufwasseranlage]
-  Pumped Storage: [pumped hydro, pumped, speicherwasseranlage]
-  Reservoir: ""
+  CCGT:
+   - ccgt
+   - gas
+   - natural gas
+   - gasturbinen mit abhitzekessel
+  OCGT:
+   - ocgt
+   - gasturbinen ohne abhitzekessel
+  Steam Turbine:
+   - steam
+   - turbine
+   - kondensationsmaschine
+   - gegendruckmaschine
+   - dampfmotor
+  Combustion Engine:
+   - combustion engine
+   - verbrennungsmotor
+   - stirlingmotor
+  Run-Of-River:
+   - run-off
+   - run off
+   - run of river
+   - run-of-river
+   - ror
+   - laufwasseranlage
+   - laufwasser
+   - abwasserkraft
+   - trinkwassersystem
+   - brauchwassersystem
+   - pasada
+   - przepływowa
+   - fluente
+   - elvekraft
+   - doorstroom
+   - älvkraft
+  Reservoir:
+   - reservoir
+   - réservoir
+   - impoundment
+   - talsperre
+   - stausee
+   - speicherwasseranlage
+   - speicherwasser
+   - barrage
+   - embalse
+   - bacino
+   - zbiornik
+   - magasinverk
+   - damkraftverk
+   - reguleringsmagasin
+  Pumped Storage:
+   - pumped hydro
+   - pumped
+   - kavernen
+   - bombeo
+   - reversible
+   - reversibel
+   - oberbecken
+   - unterbecken
+   - pompage
+   - pompaggio
+   - pompowa
+   - pumpekraftverk
   Marine: ""
-  Onshore: ""
-  Offshore: ""
-  PV: [pv, photo-voltaic, photo voltaic]
+  PV:
+   - pv
+   - photo-voltaic
+   - photo voltaic
   CSP: ""
+  Onshore:
+   - onshore
+   - an land
+   - terrestre
+   - landvind
+   - på land
+   - op land
+   - lądowy
+   - su terra
+   - en tierra
+   - à terre
+  Offshore:
+   - offshore
+   - nearshore
+   - auf see
+   - en mer
+   - marino
+   - en mar
+   - in mare
+   - morski
+   - havvind
+   - til havs
+   - på havet
+   - op zee
+   - zeewind
 clean_name:
+  fueltypes_with_blocks:
+    - Nuclear
   remove_common_words: false # remove words which appear more that 20 times in all entries
   remove_duplicated_words: true
   replace:
     " ": "[^a-zA-Z]" # non-alphabetical symbols
     "":
       # This should be a list, if remove_common_words is true.
-      [
-        I,
-        II,
-        III,
-        IV,
-        V,
-        VI,
-        VII,
-        VIII,
-        IX,
-        X,
-        XI,
-        parque,
-        grupo,
-        station,
-        power,
-        plant,
-        unit,
-        kraftwerk,
-        kw,
-        hkw,
-        nuclear,
-        thermal,
-        heizkraftwerk,
-        eolico,
-        project,
-        hydroelectric,
-        pumped,
-        storage,
-        france,
-        austria,
-        sweden,
-        serbia,
-        ukraine,
-        switzerland,
-        slovakia,
-        croatia,
-        poland,
-        slovenia,
-        portugal,
-        bosnia,
-        and,
-        herzegovina,
-        \w, #remove single letters
-      ]
+      - I
+      - II
+      - III
+      - IV
+      - V
+      - VI
+      - VII
+      - VIII
+      - IX
+      - X
+      - XI
+      - parque
+      - grupo
+      - station
+      - power
+      - plant
+      - unit
+      - block
+      - kraftwerk
+      - kernkraftwerk
+      - wehrkraftwerk
+      - rheinkraftwerk
+      - gemeinschaftskernkraftwerk
+      - kernkw
+      - kw
+      - hkw
+      - nuclear
+      - hydro
+      - thermal
+      - heizkraftwerk
+      - eolico
+      - project
+      - hydroelectric
+      - hydropower
+      - hydroelectrique
+      - hydraulique
+      - embassament
+      - pumped
+      - storage
+      - france
+      - austria
+      - sweden
+      - serbia
+      - ukraine
+      - switzerland
+      - slovakia
+      - croatia
+      - poland
+      - slovenia
+      - portugal
+      - bosnia
+      - and
+      - herzegovina
+      - bulgaria
+      - generating
+      - romania
+      - macedonia
+      - latvia
+      - lithuania
+      - hungary
+      - \w #remove single letters
     "ss": "ß"
 
 # ---------------------------------------------------------------------------- #
@@ -455,5 +699,6 @@ fuel_to_color:
   Geothermal: darkgoldenrod
   Battery: purple
   Hydrogen Storage: teal
-  Electro-mechanical: teal
+  Mechanical Storage: darkslategray
+  Heat Storage: darkorange
   Total: gold
diff --git a/powerplantmatching/utils.py b/powerplantmatching/utils.py
index cab6e18a..ee966212 100644
--- a/powerplantmatching/utils.py
+++ b/powerplantmatching/utils.py
@@ -113,16 +113,7 @@ def config_filter(df, config):
 
     main_query = config.get("main_query", "")
 
-    # individual filter from config.yaml
-    queries = {}
-    for source in config["matching_sources"]:
-        if isinstance(source, dict):
-            queries.update(source)
-        else:
-            queries[source] = ""
-    ds_query = queries.get(name, "")
-
-    query = " and ".join([q for q in [target_query, main_query, ds_query] if q])
+    query = " and ".join([q for q in [target_query, main_query] if q])
 
     df = correct_manually(df, name, config=config)
 
@@ -185,7 +176,6 @@ def set_uncommon_fueltypes_to_other(df, fillna_other=True, config=None, **kwargs
     default = [
         "Mixed fuel types",
         "Electro-mechanical",
-        "Hydrogen Storage",
     ]
     fueltypes = kwargs.get("fueltypes", default)
     df.loc[df.Fueltype.isin(fueltypes), "Fueltype"] = "Other"
@@ -342,7 +332,7 @@ def fun(f, q_in, q_out):
         q_out.put((i, f(x)))
 
 
-def parmap(f, arg_list, config=None):
+def parmap(f, arg_list, config=None, threads=None):
     """
     Parallel mapping function. Use this function to parallelly map function
     f onto arguments in arg_list. The maximum number of parallel threads is
@@ -355,11 +345,21 @@ def parmap(f, arg_list, config=None):
         python function with one argument
     arg_list : list
         list of arguments mapped to f
+    config : dict, default None
+        configuration dictionary
+    threads : int, default None
+        number of parallel threads
     """
     if config is None:
         config = get_config()
-    if config["parallel_duke_processes"]:
-        nprocs = min(multiprocessing.cpu_count(), config["process_limit"])
+
+    if threads is None:
+        threads = config["parallel_duke_processes"]
+    if isinstance(threads, bool):
+        threads = config.get("process_limit", 1)
+
+    if threads > 1:
+        nprocs = min(multiprocessing.cpu_count(), threads)
         logger.info(f"Run process with {nprocs} parallel threads.")
         q_in = multiprocessing.Queue(1)
         q_out = multiprocessing.Queue()
diff --git a/test/test_cleaning.py b/test/test_cleaning.py
index dab6c5c0..b4411075 100644
--- a/test/test_cleaning.py
+++ b/test/test_cleaning.py
@@ -83,7 +83,7 @@ def test_gather_specifications(data):
 def test_clean_name(data):
     res = clean_name(data)
     assert res.Name[0] == "Powerplant"
-    assert res.Name[1] == "An Hydro Powerplant"
+    assert res.Name[1] == "An Powerplant"
     assert res.Name[2] == "Another Powerplant With Whitespaces"
     assert res.Name[3] == "Coalition"
     assert res.Name[4] == "Besonders Chp"
diff --git a/test/test_data.py b/test/test_data.py
index 07d32843..933d0710 100755
--- a/test/test_data.py
+++ b/test/test_data.py
@@ -54,4 +54,7 @@ def test_url_retrieval():
 
 
 def test_reduced_retrieval():
-    pm.powerplants(reduced=False)
+    config = pm.get_config()
+    config["matching_sources"] = ["GEO", "GPD"]
+    config["fully_included_sources"] = []
+    pm.powerplants(reduced=False, config=config)