Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 56 additions & 21 deletions alphadia/outputtransform/search_plan_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@
logger = logging.getLogger()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In which way is this change breaking? because it adds new columns to the output tables?



@dataclass
class LFQOutputConfig:
quant_level: str
level_name: str
save_fragments: bool = False


class SearchPlanOutput:
PSM_INPUT = "psm"
PRECURSOR_OUTPUT = "precursors"
Expand Down Expand Up @@ -512,27 +519,17 @@ def _build_lfq_tables(

intensity_df, quality_df = qb.accumulate_frag_df_from_folders(folder_list)

@dataclass
class LFQOutputConfig:
should_process: bool
quant_level: str
level_name: str
save_fragments: bool = False

quantlevel_configs = [
LFQOutputConfig(
self.config["search_output"]["precursor_level_lfq"],
"mod_seq_charge_hash",
"precursor",
self.config["search_output"]["save_fragment_quant_matrix"],
),
LFQOutputConfig(
self.config["search_output"]["peptide_level_lfq"],
"mod_seq_hash",
"peptide",
),
LFQOutputConfig(
True, # always process protein group level
"pg",
"pg",
),
Expand All @@ -541,9 +538,6 @@ class LFQOutputConfig:
lfq_results = {}

for quantlevel_config in quantlevel_configs:
if not quantlevel_config.should_process:
continue

logger.progress(
f"Performing label free quantification on the {quantlevel_config.level_name} level"
)
Expand Down Expand Up @@ -599,14 +593,10 @@ class LFQOutputConfig:
file_format=self.config["search_output"]["file_format"],
)

# Use protein group (pg) results for merging with psm_df
pg_lfq_df = lfq_results.get("pg", pd.DataFrame())

if len(pg_lfq_df) > 0:
protein_df_melted = pg_lfq_df.melt(
id_vars="pg", var_name="run", value_name="intensity"
)
psm_df = psm_df.merge(protein_df_melted, on=["pg", "run"], how="left")
# Merge all quantification levels back to precursor table
psm_df = self._merge_quant_levels_to_psm(
psm_df, lfq_results, quantlevel_configs
)

if save:
logger.info("Writing psm output to disk")
Expand All @@ -618,6 +608,51 @@ class LFQOutputConfig:

return lfq_results

def _merge_quant_levels_to_psm(
self,
psm_df: pd.DataFrame,
lfq_results: dict[str, pd.DataFrame],
quantlevel_configs: list,
) -> pd.DataFrame:
"""Merge quantification results from all levels back to the precursor table.

Parameters
----------
psm_df : pd.DataFrame
Precursor table to merge quantification data into
lfq_results : dict[str, pd.DataFrame]
Dictionary containing quantification results for each level
quantlevel_configs : list
List of LFQOutputConfig objects defining quantification levels

Returns
-------
pd.DataFrame
Updated precursor table with merged quantification data
"""
intensity_column_mapping = {
"precursor": "precursor_intensity",
"peptide": "peptide_intensity",
"pg": "intensity",
}

for config in quantlevel_configs:
lfq_df = lfq_results.get(config.level_name, pd.DataFrame())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

            lfq_df = lfq_results.get(config.level_name)

            if lfq_df is None:
                continue


if len(lfq_df) == 0:
continue

intensity_column = intensity_column_mapping.get(
config.level_name, f"{config.level_name}_intensity"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the default required? you have e.g. "precursor": "precursor_intensity", explicitly defined

)

melted_df = lfq_df.melt(
id_vars=config.quant_level, var_name="run", value_name=intensity_column
)
psm_df = psm_df.merge(melted_df, on=[config.quant_level, "run"], how="left")

return psm_df

def _build_mbr_library(
self,
base_spec_lib: base.SpecLibBase,
Expand Down
112 changes: 112 additions & 0 deletions tests/unit_tests/outputtransform/test_search_plan_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,115 @@ def test_output_transform():
assert np.corrcoef(protein_df[i], protein_df[j])[0, 0] > 0.5

shutil.rmtree(temp_folder)


def test_merge_quant_levels_to_psm_merges_precursor_level():
"""Test that precursor level quantification is merged correctly."""
from alphadia.outputtransform.search_plan_output import (
LFQOutputConfig,
SearchPlanOutput,
)
Comment on lines +191 to +194
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

import to top please


spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

search_plan_output .. less mental mapping >> less characters

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/tmp => /some_output_path

psm_df = pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run": ["run1"]})
lfq_results = {
"precursor": pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run1": [100.0]})
}
configs = [LFQOutputConfig("mod_seq_charge_hash", "precursor")]

result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs)

assert "precursor_intensity" in result.columns
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this assert is redundant

assert result["precursor_intensity"].iloc[0] == 100.0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could be made stronger:

assert result["precursor_intensity"].values == [100.0]



def test_merge_quant_levels_to_psm_merges_peptide_level():
"""Test that peptide level quantification is merged correctly."""
from alphadia.outputtransform.search_plan_output import (
LFQOutputConfig,
SearchPlanOutput,
)

spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp")
psm_df = pd.DataFrame({"mod_seq_hash": ["A"], "run": ["run1"]})
lfq_results = {"peptide": pd.DataFrame({"mod_seq_hash": ["A"], "run1": [400.0]})}
configs = [LFQOutputConfig("mod_seq_hash", "peptide")]

result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs)

assert "peptide_intensity" in result.columns
assert result["peptide_intensity"].iloc[0] == 400.0


def test_merge_quant_levels_to_psm_merges_protein_group_level():
"""Test that protein group level quantification is merged correctly."""
from alphadia.outputtransform.search_plan_output import (
LFQOutputConfig,
SearchPlanOutput,
)

spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp")
psm_df = pd.DataFrame({"pg": ["PG1"], "run": ["run1"]})
lfq_results = {"pg": pd.DataFrame({"pg": ["PG1"], "run1": [700.0]})}
configs = [LFQOutputConfig("pg", "pg")]

result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs)

assert "intensity" in result.columns
assert result["intensity"].iloc[0] == 700.0
Comment on lines +225 to +242
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these tests really adding something? if it works for one level, it should work for all..
I would rather add a test with multiple runs, or some edge cases



def test_merge_quant_levels_to_psm_handles_empty_lfq_results():
"""Test that empty LFQ results are handled gracefully."""
from alphadia.outputtransform.search_plan_output import (
LFQOutputConfig,
SearchPlanOutput,
)

spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp")
psm_df = pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run": ["run1"]})
lfq_results = {"precursor": pd.DataFrame()}
configs = [LFQOutputConfig("mod_seq_charge_hash", "precursor")]

result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs)

assert len(result) == 1
assert "precursor_intensity" not in result.columns


def test_merge_quant_levels_to_psm_merges_all_levels():
"""Test that all quantification levels are merged in one call."""
from alphadia.outputtransform.search_plan_output import (
LFQOutputConfig,
SearchPlanOutput,
)

spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp")
psm_df = pd.DataFrame(
{
"mod_seq_charge_hash": ["A1"],
"mod_seq_hash": ["A"],
"pg": ["PG1"],
"run": ["run1"],
}
)
lfq_results = {
"precursor": pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run1": [100.0]}),
"peptide": pd.DataFrame({"mod_seq_hash": ["A"], "run1": [400.0]}),
"pg": pd.DataFrame({"pg": ["PG1"], "run1": [700.0]}),
}
configs = [
LFQOutputConfig("mod_seq_charge_hash", "precursor"),
LFQOutputConfig("mod_seq_hash", "peptide"),
LFQOutputConfig("pg", "pg"),
]

result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs)

assert all(
col in result.columns
for col in ["precursor_intensity", "peptide_intensity", "intensity"]
)
assert result["precursor_intensity"].iloc[0] == 100.0
assert result["peptide_intensity"].iloc[0] == 400.0
assert result["intensity"].iloc[0] == 700.0
Loading