-
Notifications
You must be signed in to change notification settings - Fork 5
Add intensity to precursor table #687
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,13 @@ | |
logger = logging.getLogger() | ||
|
||
|
||
@dataclass | ||
class LFQOutputConfig: | ||
quant_level: str | ||
level_name: str | ||
save_fragments: bool = False | ||
|
||
|
||
class SearchPlanOutput: | ||
PSM_INPUT = "psm" | ||
PRECURSOR_OUTPUT = "precursors" | ||
|
@@ -512,27 +519,17 @@ def _build_lfq_tables( | |
|
||
intensity_df, quality_df = qb.accumulate_frag_df_from_folders(folder_list) | ||
|
||
@dataclass | ||
class LFQOutputConfig: | ||
should_process: bool | ||
quant_level: str | ||
level_name: str | ||
save_fragments: bool = False | ||
|
||
quantlevel_configs = [ | ||
LFQOutputConfig( | ||
self.config["search_output"]["precursor_level_lfq"], | ||
"mod_seq_charge_hash", | ||
"precursor", | ||
self.config["search_output"]["save_fragment_quant_matrix"], | ||
), | ||
LFQOutputConfig( | ||
self.config["search_output"]["peptide_level_lfq"], | ||
"mod_seq_hash", | ||
"peptide", | ||
), | ||
LFQOutputConfig( | ||
True, # always process protein group level | ||
"pg", | ||
"pg", | ||
), | ||
|
@@ -541,9 +538,6 @@ class LFQOutputConfig: | |
lfq_results = {} | ||
|
||
for quantlevel_config in quantlevel_configs: | ||
if not quantlevel_config.should_process: | ||
continue | ||
|
||
logger.progress( | ||
f"Performing label free quantification on the {quantlevel_config.level_name} level" | ||
) | ||
|
@@ -599,14 +593,10 @@ class LFQOutputConfig: | |
file_format=self.config["search_output"]["file_format"], | ||
) | ||
|
||
# Use protein group (pg) results for merging with psm_df | ||
pg_lfq_df = lfq_results.get("pg", pd.DataFrame()) | ||
|
||
if len(pg_lfq_df) > 0: | ||
protein_df_melted = pg_lfq_df.melt( | ||
id_vars="pg", var_name="run", value_name="intensity" | ||
) | ||
psm_df = psm_df.merge(protein_df_melted, on=["pg", "run"], how="left") | ||
# Merge all quantification levels back to precursor table | ||
psm_df = self._merge_quant_levels_to_psm( | ||
psm_df, lfq_results, quantlevel_configs | ||
) | ||
|
||
if save: | ||
logger.info("Writing psm output to disk") | ||
|
@@ -618,6 +608,51 @@ class LFQOutputConfig: | |
|
||
return lfq_results | ||
|
||
def _merge_quant_levels_to_psm( | ||
self, | ||
psm_df: pd.DataFrame, | ||
lfq_results: dict[str, pd.DataFrame], | ||
quantlevel_configs: list, | ||
) -> pd.DataFrame: | ||
"""Merge quantification results from all levels back to the precursor table. | ||
|
||
Parameters | ||
---------- | ||
psm_df : pd.DataFrame | ||
Precursor table to merge quantification data into | ||
lfq_results : dict[str, pd.DataFrame] | ||
Dictionary containing quantification results for each level | ||
quantlevel_configs : list | ||
List of LFQOutputConfig objects defining quantification levels | ||
|
||
Returns | ||
------- | ||
pd.DataFrame | ||
Updated precursor table with merged quantification data | ||
""" | ||
intensity_column_mapping = { | ||
"precursor": "precursor_intensity", | ||
"peptide": "peptide_intensity", | ||
"pg": "intensity", | ||
} | ||
|
||
for config in quantlevel_configs: | ||
lfq_df = lfq_results.get(config.level_name, pd.DataFrame()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
if len(lfq_df) == 0: | ||
continue | ||
|
||
intensity_column = intensity_column_mapping.get( | ||
config.level_name, f"{config.level_name}_intensity" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is the default required? you have e.g. |
||
) | ||
|
||
melted_df = lfq_df.melt( | ||
id_vars=config.quant_level, var_name="run", value_name=intensity_column | ||
) | ||
psm_df = psm_df.merge(melted_df, on=[config.quant_level, "run"], how="left") | ||
|
||
return psm_df | ||
|
||
def _build_mbr_library( | ||
self, | ||
base_spec_lib: base.SpecLibBase, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -184,3 +184,115 @@ def test_output_transform(): | |
assert np.corrcoef(protein_df[i], protein_df[j])[0, 0] > 0.5 | ||
|
||
shutil.rmtree(temp_folder) | ||
|
||
|
||
def test_merge_quant_levels_to_psm_merges_precursor_level(): | ||
"""Test that precursor level quantification is merged correctly.""" | ||
from alphadia.outputtransform.search_plan_output import ( | ||
LFQOutputConfig, | ||
SearchPlanOutput, | ||
) | ||
Comment on lines
+191
to
+194
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. import to top please |
||
|
||
spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
psm_df = pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run": ["run1"]}) | ||
lfq_results = { | ||
"precursor": pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run1": [100.0]}) | ||
} | ||
configs = [LFQOutputConfig("mod_seq_charge_hash", "precursor")] | ||
|
||
result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs) | ||
|
||
assert "precursor_intensity" in result.columns | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this assert is redundant |
||
assert result["precursor_intensity"].iloc[0] == 100.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could be made stronger:
|
||
|
||
|
||
def test_merge_quant_levels_to_psm_merges_peptide_level(): | ||
"""Test that peptide level quantification is merged correctly.""" | ||
from alphadia.outputtransform.search_plan_output import ( | ||
LFQOutputConfig, | ||
SearchPlanOutput, | ||
) | ||
|
||
spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp") | ||
psm_df = pd.DataFrame({"mod_seq_hash": ["A"], "run": ["run1"]}) | ||
lfq_results = {"peptide": pd.DataFrame({"mod_seq_hash": ["A"], "run1": [400.0]})} | ||
configs = [LFQOutputConfig("mod_seq_hash", "peptide")] | ||
|
||
result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs) | ||
|
||
assert "peptide_intensity" in result.columns | ||
assert result["peptide_intensity"].iloc[0] == 400.0 | ||
|
||
|
||
def test_merge_quant_levels_to_psm_merges_protein_group_level(): | ||
"""Test that protein group level quantification is merged correctly.""" | ||
from alphadia.outputtransform.search_plan_output import ( | ||
LFQOutputConfig, | ||
SearchPlanOutput, | ||
) | ||
|
||
spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp") | ||
psm_df = pd.DataFrame({"pg": ["PG1"], "run": ["run1"]}) | ||
lfq_results = {"pg": pd.DataFrame({"pg": ["PG1"], "run1": [700.0]})} | ||
configs = [LFQOutputConfig("pg", "pg")] | ||
|
||
result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs) | ||
|
||
assert "intensity" in result.columns | ||
assert result["intensity"].iloc[0] == 700.0 | ||
Comment on lines
+225
to
+242
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are these tests really adding something? if it works for one level, it should work for all.. |
||
|
||
|
||
def test_merge_quant_levels_to_psm_handles_empty_lfq_results(): | ||
"""Test that empty LFQ results are handled gracefully.""" | ||
from alphadia.outputtransform.search_plan_output import ( | ||
LFQOutputConfig, | ||
SearchPlanOutput, | ||
) | ||
|
||
spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp") | ||
psm_df = pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run": ["run1"]}) | ||
lfq_results = {"precursor": pd.DataFrame()} | ||
configs = [LFQOutputConfig("mod_seq_charge_hash", "precursor")] | ||
|
||
result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs) | ||
|
||
assert len(result) == 1 | ||
assert "precursor_intensity" not in result.columns | ||
|
||
|
||
def test_merge_quant_levels_to_psm_merges_all_levels(): | ||
"""Test that all quantification levels are merged in one call.""" | ||
from alphadia.outputtransform.search_plan_output import ( | ||
LFQOutputConfig, | ||
SearchPlanOutput, | ||
) | ||
|
||
spo = SearchPlanOutput({"general": {"save_figures": False}}, "/tmp") | ||
psm_df = pd.DataFrame( | ||
{ | ||
"mod_seq_charge_hash": ["A1"], | ||
"mod_seq_hash": ["A"], | ||
"pg": ["PG1"], | ||
"run": ["run1"], | ||
} | ||
) | ||
lfq_results = { | ||
"precursor": pd.DataFrame({"mod_seq_charge_hash": ["A1"], "run1": [100.0]}), | ||
"peptide": pd.DataFrame({"mod_seq_hash": ["A"], "run1": [400.0]}), | ||
"pg": pd.DataFrame({"pg": ["PG1"], "run1": [700.0]}), | ||
} | ||
configs = [ | ||
LFQOutputConfig("mod_seq_charge_hash", "precursor"), | ||
LFQOutputConfig("mod_seq_hash", "peptide"), | ||
LFQOutputConfig("pg", "pg"), | ||
] | ||
|
||
result = spo._merge_quant_levels_to_psm(psm_df, lfq_results, configs) | ||
|
||
assert all( | ||
col in result.columns | ||
for col in ["precursor_intensity", "peptide_intensity", "intensity"] | ||
) | ||
assert result["precursor_intensity"].iloc[0] == 100.0 | ||
assert result["peptide_intensity"].iloc[0] == 400.0 | ||
assert result["intensity"].iloc[0] == 700.0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In which way is this change breaking? because it adds new columns to the output tables?