diff --git a/alphadia/outputtransform/search_plan_output.py b/alphadia/outputtransform/search_plan_output.py index 366de5db..da5821d4 100644 --- a/alphadia/outputtransform/search_plan_output.py +++ b/alphadia/outputtransform/search_plan_output.py @@ -32,6 +32,13 @@ logger = logging.getLogger() +@dataclass +class LFQOutputConfig: + quant_level: str + level_name: str + save_fragments: bool = False + + class SearchPlanOutput: PSM_INPUT = "psm" PRECURSOR_OUTPUT = "precursors" @@ -512,27 +519,17 @@ def _build_lfq_tables( intensity_df, quality_df = qb.accumulate_frag_df_from_folders(folder_list) - @dataclass - class LFQOutputConfig: - should_process: bool - quant_level: str - level_name: str - save_fragments: bool = False - quantlevel_configs = [ LFQOutputConfig( - self.config["search_output"]["precursor_level_lfq"], "mod_seq_charge_hash", "precursor", self.config["search_output"]["save_fragment_quant_matrix"], ), LFQOutputConfig( - self.config["search_output"]["peptide_level_lfq"], "mod_seq_hash", "peptide", ), LFQOutputConfig( - True, # always process protein group level "pg", "pg", ), @@ -541,9 +538,6 @@ class LFQOutputConfig: lfq_results = {} for quantlevel_config in quantlevel_configs: - if not quantlevel_config.should_process: - continue - logger.progress( f"Performing label free quantification on the {quantlevel_config.level_name} level" ) @@ -599,14 +593,10 @@ class LFQOutputConfig: file_format=self.config["search_output"]["file_format"], ) - # Use protein group (pg) results for merging with psm_df - pg_lfq_df = lfq_results.get("pg", pd.DataFrame()) - - if len(pg_lfq_df) > 0: - protein_df_melted = pg_lfq_df.melt( - id_vars="pg", var_name="run", value_name="intensity" - ) - psm_df = psm_df.merge(protein_df_melted, on=["pg", "run"], how="left") + # Merge all quantification levels back to precursor table + psm_df = self._merge_quant_levels_to_psm( + psm_df, lfq_results, quantlevel_configs + ) if save: logger.info("Writing psm output to disk") @@ -618,6 +608,51 @@ class LFQOutputConfig: return lfq_results + def _merge_quant_levels_to_psm( + self, + psm_df: pd.DataFrame, + lfq_results: dict[str, pd.DataFrame], + quantlevel_configs: list, + ) -> pd.DataFrame: + """Merge quantification results from all levels back to the precursor table. + + Parameters + ---------- + psm_df : pd.DataFrame + Precursor table to merge quantification data into + lfq_results : dict[str, pd.DataFrame] + Dictionary containing quantification results for each level + quantlevel_configs : list + List of LFQOutputConfig objects defining quantification levels + + Returns + ------- + pd.DataFrame + Updated precursor table with merged quantification data + """ + intensity_column_mapping = { + "precursor": "precursor_intensity", + "peptide": "peptide_intensity", + "pg": "intensity", + } + + for config in quantlevel_configs: + lfq_df = lfq_results.get(config.level_name, pd.DataFrame()) + + if len(lfq_df) == 0: + continue + + intensity_column = intensity_column_mapping.get( + config.level_name, f"{config.level_name}_intensity" + ) + + melted_df = lfq_df.melt( + id_vars=config.quant_level, var_name="run", value_name=intensity_column + ) + psm_df = psm_df.merge(melted_df, on=[config.quant_level, "run"], how="left") + + return psm_df + def _build_mbr_library( self, base_spec_lib: base.SpecLibBase, diff --git a/tests/unit_tests/outputtransform/test_search_plan_output.py b/tests/unit_tests/outputtransform/test_search_plan_output.py index ed26870e..26ce06e1 100644 --- a/tests/unit_tests/outputtransform/test_search_plan_output.py +++ b/tests/unit_tests/outputtransform/test_search_plan_output.py @@ -184,3 +184,115 @@ def test_output_transform(): assert np.corrcoef(protein_df[i], protein_df[j])[0, 0] > 0.5 shutil.rmtree(temp_folder) + + +def test_merge_quant_levels_to_psm_merges_precursor_level(): + """Test that precursor level quantification is merged correctly.""" + from alphadia.outputtransform.search_plan_output import ( + LFQOutputConfig, + SearchPlanOutput, + ) + + spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp") + psm_df = pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run": ["run1"]}) + lfq_results = { + "precursor": pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run1": [100.0]}) + } + configs = [LFQOutputConfig("mod_seq_charge_hash", "precursor")] + + result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs) + + assert "precursor_intensity" in result.columns + assert result["precursor_intensity"].iloc[0] == 100.0 + + +def test_merge_quant_levels_to_psm_merges_peptide_level(): + """Test that peptide level quantification is merged correctly.""" + from alphadia.outputtransform.search_plan_output import ( + LFQOutputConfig, + SearchPlanOutput, + ) + + spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp") + psm_df = pd.DataFrame({"mod_seq_hash": ["A"], "run": ["run1"]}) + lfq_results = {"peptide": pd.DataFrame({"mod_seq_hash": ["A"], "run1": [400.0]})} + configs = [LFQOutputConfig("mod_seq_hash", "peptide")] + + result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs) + + assert "peptide_intensity" in result.columns + assert result["peptide_intensity"].iloc[0] == 400.0 + + +def test_merge_quant_levels_to_psm_merges_protein_group_level(): + """Test that protein group level quantification is merged correctly.""" + from alphadia.outputtransform.search_plan_output import ( + LFQOutputConfig, + SearchPlanOutput, + ) + + spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp") + psm_df = pd.DataFrame({"pg": ["PG1"], "run": ["run1"]}) + lfq_results = {"pg": pd.DataFrame({"pg": ["PG1"], "run1": [700.0]})} + configs = [LFQOutputConfig("pg", "pg")] + + result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs) + + assert "intensity" in result.columns + assert result["intensity"].iloc[0] == 700.0 + + +def test_merge_quant_levels_to_psm_handles_empty_lfq_results(): + """Test that empty LFQ results are handled gracefully.""" + from alphadia.outputtransform.search_plan_output import ( + LFQOutputConfig, + SearchPlanOutput, + ) + + spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp") + psm_df = pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run": ["run1"]}) + lfq_results = {"precursor": pd.DataFrame()} + configs = [LFQOutputConfig("mod_seq_charge_hash", "precursor")] + + result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs) + + assert len(result) == 1 + assert "precursor_intensity" not in result.columns + + +def test_merge_quant_levels_to_psm_merges_all_levels(): + """Test that all quantification levels are merged in one call.""" + from alphadia.outputtransform.search_plan_output import ( + LFQOutputConfig, + SearchPlanOutput, + ) + + spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp") + psm_df = pd.DataFrame( + { + "mod_seq_charge_hash": ["A1"], + "mod_seq_hash": ["A"], + "pg": ["PG1"], + "run": ["run1"], + } + ) + lfq_results = { + "precursor": pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run1": [100.0]}), + "peptide": pd.DataFrame({"mod_seq_hash": ["A"], "run1": [400.0]}), + "pg": pd.DataFrame({"pg": ["PG1"], "run1": [700.0]}), + } + configs = [ + LFQOutputConfig("mod_seq_charge_hash", "precursor"), + LFQOutputConfig("mod_seq_hash", "peptide"), + LFQOutputConfig("pg", "pg"), + ] + + result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs) + + assert all( + col in result.columns + for col in ["precursor_intensity", "peptide_intensity", "intensity"] + ) + assert result["precursor_intensity"].iloc[0] == 100.0 + assert result["peptide_intensity"].iloc[0] == 400.0 + assert result["intensity"].iloc[0] == 700.0