Skip to content

Commit ff9d543

Browse files
committed
updated the report and test fixes
1 parent a50c9e2 commit ff9d543

File tree

5 files changed

+97
-53
lines changed

5 files changed

+97
-53
lines changed

examples/how_to_questions/metrics/data_integrity/dataset_rouge_summary_metric.ipynb

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
"outputs": [],
4646
"source": [
4747
"report = Report(metrics=[\n",
48-
" ROUGESummaryMetric(column_name=\"summary\", rouge_n=1)\n",
48+
" ROUGESummaryMetric(column_name=\"summary\", rouge_n=2)\n",
4949
"])"
5050
]
5151
},
@@ -75,11 +75,27 @@
7575
"source": [
7676
"report.as_dict()"
7777
]
78+
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": null,
82+
"metadata": {},
83+
"outputs": [],
84+
"source": [
85+
"report.as_dataframe()"
86+
]
87+
},
88+
{
89+
"cell_type": "code",
90+
"execution_count": null,
91+
"metadata": {},
92+
"outputs": [],
93+
"source": []
7894
}
7995
],
8096
"metadata": {
8197
"kernelspec": {
82-
"display_name": "evidently",
98+
"display_name": "Python 3 (ipykernel)",
8399
"language": "python",
84100
"name": "python3"
85101
},
@@ -97,5 +113,5 @@
97113
}
98114
},
99115
"nbformat": 4,
100-
"nbformat_minor": 2
116+
"nbformat_minor": 4
101117
}

src/evidently/metrics/_registry.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,13 @@
138138
"evidently.metrics.data_integrity.dataset_summary_metric.DatasetSummaryMetric",
139139
"evidently:metric:DatasetSummaryMetric",
140140
)
141+
142+
register_type_alias(
143+
Metric,
144+
"evidently.metrics.data_integrity.rouge_summary_metric.ROUGESummaryMetric",
145+
"evidently:metric:ROUGESummaryMetric",
146+
)
147+
141148
register_type_alias(
142149
Metric,
143150
"evidently.metrics.data_quality.column_category_metric.ColumnCategoryMetric",
@@ -570,6 +577,11 @@
570577
"evidently.metrics.data_integrity.dataset_summary_metric.DatasetSummaryMetricResult",
571578
"evidently:metric_result:DatasetSummaryMetricResult",
572579
)
580+
register_type_alias(
581+
MetricResult,
582+
"evidently.metrics.data_integrity.rouge_summary_metric.ROUGESummaryMetricResult",
583+
"evidently:metric_result:ROUGESummaryMetricResult",
584+
)
573585
register_type_alias(
574586
MetricResult,
575587
"evidently.metrics.data_quality.column_category_metric.CategoryStat",
Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,37 @@
11
from typing import List
2-
from typing import Union
32

43
import evaluate
54
import pandas as pd
65

7-
from evidently.base_metric import ColumnName
86
from evidently.base_metric import InputData
97
from evidently.base_metric import Metric
108
from evidently.base_metric import MetricResult
119
from evidently.core import IncludeTags
1210
from evidently.model.widget import BaseWidgetInfo
11+
from evidently.options.base import AnyOptions
1312
from evidently.renderers.base_renderer import MetricRenderer
1413
from evidently.renderers.base_renderer import default_renderer
1514
from evidently.renderers.html_widgets import header_text
1615
from evidently.renderers.html_widgets import table_data
16+
from evidently.renderers.html_widgets import text_widget
1717

1818

1919
class ROUGESummaryMetricResult(MetricResult):
2020
class Config:
2121
type_alias = "evidently:metric_result:ROUGESummaryMetricResult"
2222
field_tags = {
23+
"current": {IncludeTags.Current},
24+
"reference": {IncludeTags.Reference},
2325
"rouge_type": {IncludeTags.Parameter},
24-
"value": {IncludeTags.Parameter},
26+
"per_row_scores": {IncludeTags.Parameter},
27+
"summary_score": {IncludeTags.Parameter},
2528
}
2629

30+
current: list
31+
reference: list
2732
rouge_type: str
28-
score: dict
33+
per_row_scores: list
34+
summary_score: float
2935

3036

3137
class ROUGESummaryMetric(Metric[ROUGESummaryMetricResult]):
@@ -36,50 +42,62 @@ class Config:
3642
column_name: str
3743
rouge_n: int
3844

39-
def __init__(self, column_name: Union[str, ColumnName], rouge_n: int):
45+
def __init__(self, column_name: str, rouge_n: int, options: AnyOptions = None):
4046
self.column_name = column_name
4147
self.rouge_n = rouge_n
42-
super().__init__()
48+
super().__init__(options=options)
4349

44-
def _calculate_summary_rouge(self, current_data: pd.Series, reference_data: pd.Series):
50+
def _calculate_summary_rouge(self, current: pd.Series, reference: pd.Series):
4551
rouge_evaluator = evaluate.load("rouge")
4652

47-
predictions = current_data.astype(str).tolist()
48-
references = reference_data.astype(str).tolist()
53+
current = current.astype(str).tolist()
54+
reference = reference.astype(str).tolist()
4955

5056
rouge_scores = rouge_evaluator.compute(
51-
rouge_types=[f"rouge{self.rouge_n}"], predictions=predictions, references=references, use_aggregator=False
57+
rouge_types=[f"rouge{self.rouge_n}"], predictions=current, references=reference, use_aggregator=False
5258
)
5359

5460
per_row_rouge_scores = rouge_scores[f"rouge{self.rouge_n}"]
5561

5662
summary_rouge_score = sum(per_row_rouge_scores) / len(per_row_rouge_scores)
5763

58-
return per_row_rouge_scores, summary_rouge_score
64+
return per_row_rouge_scores, summary_rouge_score, current, reference
5965

60-
def calculate(self, data: InputData) -> MetricResult:
66+
def calculate(self, data: InputData) -> ROUGESummaryMetricResult:
67+
if data.current_data is None or data.reference_data is None:
68+
raise ValueError("The current data or the reference data is None.")
6169
if len(data.current_data[self.column_name]) == 0 or len(data.reference_data[self.column_name]) == 0:
6270
raise ValueError("The current data or the reference data is empty.")
6371

64-
per_row_rouge_scores, summary_rouge_score = self._calculate_summary_rouge(
72+
per_row_rouge_scores, summary_rouge_score, current, reference = self._calculate_summary_rouge(
6573
data.current_data[self.column_name], data.reference_data[self.column_name]
6674
)
6775

6876
result = ROUGESummaryMetricResult(
6977
rouge_type=f"ROUGE-{self.rouge_n}",
70-
score={"per_row_scores": per_row_rouge_scores, "summary_score": summary_rouge_score},
78+
per_row_scores=per_row_rouge_scores,
79+
summary_score=summary_rouge_score,
80+
current=current,
81+
reference=reference,
7182
)
7283
return result
7384

7485

7586
@default_renderer(wrap_type=ROUGESummaryMetric)
7687
class ROUGESummaryMetricRenderer(MetricRenderer):
7788
@staticmethod
78-
def _get_table(metric, n: int = 2) -> BaseWidgetInfo:
79-
column_names = ["Metric", "Value"]
80-
rows = ([metric.rouge_type, metric.score],)
89+
def _get_table(metric) -> BaseWidgetInfo:
90+
column_names = ["Metric", "current", "reference", "score"]
91+
rows = []
92+
for i in range(len(metric.current)):
93+
rows.append([metric.rouge_type, metric.current[i], metric.reference[i], metric.per_row_scores[i]])
94+
# rows.append(["metric.rouge_type", 1, "metric.current[i]", "metric.reference[i]", 2.4])
8195
return table_data(title="", column_names=column_names, data=rows)
8296

83-
def render_html(self, obj: ROUGESummaryMetricResult) -> List[BaseWidgetInfo]:
97+
def render_html(self, obj: ROUGESummaryMetric) -> List[BaseWidgetInfo]:
8498
metric = obj.get_result()
85-
return [header_text(label="ROUGE Metric"), self._get_table(metric)]
99+
return [
100+
header_text(label="ROUGE Metric"),
101+
self._get_table(metric),
102+
text_widget(text=f"{metric.summary_score}", title="Overall ROUGE score"),
103+
]

tests/metrics/data_interity/test_dataset_rouge_summary_metric.py

Lines changed: 7 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,37 +4,7 @@
44
import pytest
55

66
from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetric
7-
from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetricResult
87
from evidently.report.report import Report
9-
from tests.conftest import smart_assert_equal
10-
11-
12-
@pytest.mark.parametrize(
13-
"current_df, reference_df, metric, expected_result",
14-
(
15-
(
16-
pd.DataFrame(
17-
{
18-
"summary": ["hello there", "general kenobi"],
19-
}
20-
),
21-
pd.DataFrame({"summary": ["hello there", "no de"]}),
22-
ROUGESummaryMetric(column_name="summary", rouge_n=1),
23-
ROUGESummaryMetricResult(rouge_type="ROUGE-1", score={"per_row_scores": [1.0, 0.0], "summary_score": 0.5}),
24-
),
25-
),
26-
)
27-
def test_rouge_summary_metric_success(
28-
current_df: pd.DataFrame,
29-
reference_df: pd.DataFrame,
30-
metric,
31-
expected_result: ROUGESummaryMetricResult,
32-
) -> None:
33-
report = Report(metrics=[metric])
34-
35-
report.run(current_data=current_df, reference_data=reference_df)
36-
37-
smart_assert_equal(metric.get_result(), expected_result)
388

399

4010
@pytest.mark.parametrize(
@@ -48,7 +18,13 @@ def test_rouge_summary_metric_success(
4818
),
4919
pd.DataFrame({"summary": ["hello there", "no de"]}),
5020
ROUGESummaryMetric(column_name="summary", rouge_n=1),
51-
{"rouge_type": "ROUGE-1", "score": {"per_row_scores": [1.0, 0.0], "summary_score": 0.5}},
21+
{
22+
"current": ["hello there", "general kenobi"],
23+
"reference": ["hello there", "no de"],
24+
"rouge_type": "ROUGE-1",
25+
"per_row_scores": [1.0, 0.0],
26+
"summary_score": 0.5,
27+
},
5228
),
5329
),
5430
)

tests/multitest/metrics/data_integrity.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from evidently.metrics.data_integrity.column_summary_metric import NumericCharacteristics
1717
from evidently.metrics.data_integrity.dataset_missing_values_metric import DatasetMissingValuesMetric
1818
from evidently.metrics.data_integrity.dataset_summary_metric import DatasetSummaryMetric
19+
from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetric
1920
from tests.multitest.conftest import AssertExpectedResult
2021
from tests.multitest.conftest import Error
2122
from tests.multitest.conftest import NoopOutcome
@@ -206,6 +207,27 @@ def dataset_summary_metric():
206207
)
207208

208209

210+
@metric
211+
def rouge_summary_metric():
212+
return TestMetric(
213+
name="rouge_summary_metric",
214+
metric=ROUGESummaryMetric(column_name="summary", rouge_n=1),
215+
fingerprint="bfc616f760b973d2cbfbf0540c7b2c71",
216+
outcomes=NoopOutcome(),
217+
datasets=[
218+
TestDataset(
219+
"rouge_summary_metric_data",
220+
current=pd.DataFrame(
221+
{
222+
"summary": ["hello there", "general kenobi"],
223+
}
224+
),
225+
reference=pd.DataFrame({"summary": ["hello there", "no de"]}),
226+
),
227+
],
228+
)
229+
230+
209231
@metric
210232
def column_reg_exp_metric():
211233
return TestMetric(

0 commit comments

Comments
 (0)