Skip to content

Commit a50c9e2

Browse files
committed
rouge metric integration
1 parent 4bd1c13 commit a50c9e2

File tree

7 files changed

+261
-2
lines changed

7 files changed

+261
-2
lines changed
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Evidently Dataset ROUGE Summary Metric"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": null,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import pandas as pd\n",
17+
"from evidently.report import Report\n",
18+
"from evidently.metrics import ROUGESummaryMetric"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {},
25+
"outputs": [],
26+
"source": [
27+
"current_data = {\n",
28+
" \"summary\": [\"hello there\", \"general kenobi\"],\n",
29+
"}\n",
30+
"\n",
31+
"current_df = pd.DataFrame(current_data)\n",
32+
"\n",
33+
"reference_data = {\n",
34+
" \"summary\": [\"hello there\", \"no de\"]\n",
35+
"}\n",
36+
"\n",
37+
"current_df = pd.DataFrame(current_data)\n",
38+
"reference_df = pd.DataFrame(reference_data)"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": null,
44+
"metadata": {},
45+
"outputs": [],
46+
"source": [
47+
"report = Report(metrics=[\n",
48+
" ROUGESummaryMetric(column_name=\"summary\", rouge_n=1)\n",
49+
"])"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": null,
55+
"metadata": {},
56+
"outputs": [],
57+
"source": [
58+
"report.run(current_data=current_df, reference_data=reference_df)\n"
59+
]
60+
},
61+
{
62+
"cell_type": "code",
63+
"execution_count": null,
64+
"metadata": {},
65+
"outputs": [],
66+
"source": [
67+
"report.show()"
68+
]
69+
},
70+
{
71+
"cell_type": "code",
72+
"execution_count": null,
73+
"metadata": {},
74+
"outputs": [],
75+
"source": [
76+
"report.as_dict()"
77+
]
78+
}
79+
],
80+
"metadata": {
81+
"kernelspec": {
82+
"display_name": "evidently",
83+
"language": "python",
84+
"name": "python3"
85+
},
86+
"language_info": {
87+
"codemirror_mode": {
88+
"name": "ipython",
89+
"version": 3
90+
},
91+
"file_extension": ".py",
92+
"mimetype": "text/x-python",
93+
"name": "python",
94+
"nbconvert_exporter": "python",
95+
"pygments_lexer": "ipython3",
96+
"version": "3.8.19"
97+
}
98+
},
99+
"nbformat": 4,
100+
"nbformat_minor": 2
101+
}

examples/how_to_questions/metrics/data_integrity/dataset_summary_metric.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@
116116
"name": "python",
117117
"nbconvert_exporter": "python",
118118
"pygments_lexer": "ipython3",
119-
"version": "3.6.13"
119+
"version": "3.8.19"
120120
}
121121
},
122122
"nbformat": 4,

requirements.min.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,5 @@ openai==1.16.2
3131
evaluate==0.4.1
3232
transformers[torch]==4.39.3
3333
sentence-transformers==2.7.0
34+
rouge-score==0.1.2
3435
chromadb==0.4.0

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292
"types-python-dateutil==2.8.19",
9393
"types-ujson>=5.4.0",
9494
"pillow==10.3.0",
95-
"httpx==0.27.0",
95+
"httpx==0.24.1",
9696
"ruff==0.3.7",
9797
"pre-commit==3.5.0",
9898
"pytest-asyncio==0.23.7",
@@ -102,6 +102,7 @@
102102
"evaluate>=0.4.1",
103103
"transformers[torch]>=4.39.3",
104104
"sentence-transformers>=2.7.0",
105+
"rouge-score>=0.1.2",
105106
"chromadb>=0.4.0",
106107
],
107108
"spark": ["pyspark>=3.4.0"],

src/evidently/metrics/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from .data_integrity.column_summary_metric import ColumnSummaryMetric
3333
from .data_integrity.dataset_missing_values_metric import DatasetMissingValuesMetric
3434
from .data_integrity.dataset_summary_metric import DatasetSummaryMetric
35+
from .data_integrity.rouge_summary_metric import ROUGESummaryMetric
3536
from .data_quality.column_category_metric import ColumnCategoryMetric
3637
from .data_quality.column_correlations_metric import ColumnCorrelationsMetric
3738
from .data_quality.column_distribution_metric import ColumnDistributionMetric
@@ -99,6 +100,7 @@
99100
"ColumnSummaryMetric",
100101
"DatasetMissingValuesMetric",
101102
"DatasetSummaryMetric",
103+
"ROUGESummaryMetric",
102104
"ColumnCategoryMetric",
103105
"ColumnCorrelationsMetric",
104106
"ColumnDistributionMetric",
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
from typing import List
2+
from typing import Union
3+
4+
import evaluate
5+
import pandas as pd
6+
7+
from evidently.base_metric import ColumnName
8+
from evidently.base_metric import InputData
9+
from evidently.base_metric import Metric
10+
from evidently.base_metric import MetricResult
11+
from evidently.core import IncludeTags
12+
from evidently.model.widget import BaseWidgetInfo
13+
from evidently.renderers.base_renderer import MetricRenderer
14+
from evidently.renderers.base_renderer import default_renderer
15+
from evidently.renderers.html_widgets import header_text
16+
from evidently.renderers.html_widgets import table_data
17+
18+
19+
class ROUGESummaryMetricResult(MetricResult):
20+
class Config:
21+
type_alias = "evidently:metric_result:ROUGESummaryMetricResult"
22+
field_tags = {
23+
"rouge_type": {IncludeTags.Parameter},
24+
"value": {IncludeTags.Parameter},
25+
}
26+
27+
rouge_type: str
28+
score: dict
29+
30+
31+
class ROUGESummaryMetric(Metric[ROUGESummaryMetricResult]):
32+
class Config:
33+
type_alias = "evidently:metric:ROUGESummaryMetric"
34+
arbitrary_types_allowed = True
35+
36+
column_name: str
37+
rouge_n: int
38+
39+
def __init__(self, column_name: Union[str, ColumnName], rouge_n: int):
40+
self.column_name = column_name
41+
self.rouge_n = rouge_n
42+
super().__init__()
43+
44+
def _calculate_summary_rouge(self, current_data: pd.Series, reference_data: pd.Series):
45+
rouge_evaluator = evaluate.load("rouge")
46+
47+
predictions = current_data.astype(str).tolist()
48+
references = reference_data.astype(str).tolist()
49+
50+
rouge_scores = rouge_evaluator.compute(
51+
rouge_types=[f"rouge{self.rouge_n}"], predictions=predictions, references=references, use_aggregator=False
52+
)
53+
54+
per_row_rouge_scores = rouge_scores[f"rouge{self.rouge_n}"]
55+
56+
summary_rouge_score = sum(per_row_rouge_scores) / len(per_row_rouge_scores)
57+
58+
return per_row_rouge_scores, summary_rouge_score
59+
60+
def calculate(self, data: InputData) -> MetricResult:
61+
if len(data.current_data[self.column_name]) == 0 or len(data.reference_data[self.column_name]) == 0:
62+
raise ValueError("The current data or the reference data is empty.")
63+
64+
per_row_rouge_scores, summary_rouge_score = self._calculate_summary_rouge(
65+
data.current_data[self.column_name], data.reference_data[self.column_name]
66+
)
67+
68+
result = ROUGESummaryMetricResult(
69+
rouge_type=f"ROUGE-{self.rouge_n}",
70+
score={"per_row_scores": per_row_rouge_scores, "summary_score": summary_rouge_score},
71+
)
72+
return result
73+
74+
75+
@default_renderer(wrap_type=ROUGESummaryMetric)
76+
class ROUGESummaryMetricRenderer(MetricRenderer):
77+
@staticmethod
78+
def _get_table(metric, n: int = 2) -> BaseWidgetInfo:
79+
column_names = ["Metric", "Value"]
80+
rows = ([metric.rouge_type, metric.score],)
81+
return table_data(title="", column_names=column_names, data=rows)
82+
83+
def render_html(self, obj: ROUGESummaryMetricResult) -> List[BaseWidgetInfo]:
84+
metric = obj.get_result()
85+
return [header_text(label="ROUGE Metric"), self._get_table(metric)]
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import json
2+
3+
import pandas as pd
4+
import pytest
5+
6+
from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetric
7+
from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetricResult
8+
from evidently.report.report import Report
9+
from tests.conftest import smart_assert_equal
10+
11+
12+
@pytest.mark.parametrize(
13+
"current_df, reference_df, metric, expected_result",
14+
(
15+
(
16+
pd.DataFrame(
17+
{
18+
"summary": ["hello there", "general kenobi"],
19+
}
20+
),
21+
pd.DataFrame({"summary": ["hello there", "no de"]}),
22+
ROUGESummaryMetric(column_name="summary", rouge_n=1),
23+
ROUGESummaryMetricResult(rouge_type="ROUGE-1", score={"per_row_scores": [1.0, 0.0], "summary_score": 0.5}),
24+
),
25+
),
26+
)
27+
def test_rouge_summary_metric_success(
28+
current_df: pd.DataFrame,
29+
reference_df: pd.DataFrame,
30+
metric,
31+
expected_result: ROUGESummaryMetricResult,
32+
) -> None:
33+
report = Report(metrics=[metric])
34+
35+
report.run(current_data=current_df, reference_data=reference_df)
36+
37+
smart_assert_equal(metric.get_result(), expected_result)
38+
39+
40+
@pytest.mark.parametrize(
41+
"current_df, reference_df, metric, expected_json",
42+
(
43+
(
44+
pd.DataFrame(
45+
{
46+
"summary": ["hello there", "general kenobi"],
47+
}
48+
),
49+
pd.DataFrame({"summary": ["hello there", "no de"]}),
50+
ROUGESummaryMetric(column_name="summary", rouge_n=1),
51+
{"rouge_type": "ROUGE-1", "score": {"per_row_scores": [1.0, 0.0], "summary_score": 0.5}},
52+
),
53+
),
54+
)
55+
def test_rouge_summary_metric_with_report(
56+
current_df: pd.DataFrame,
57+
reference_df: pd.DataFrame,
58+
metric,
59+
expected_json: dict,
60+
) -> None:
61+
report = Report(metrics=[metric])
62+
63+
report.run(current_data=current_df, reference_data=reference_df)
64+
65+
assert report.show()
66+
json_result = report.json()
67+
assert len(json_result) > 0
68+
result = json.loads(json_result)
69+
assert result["metrics"][0]["result"] == expected_json

0 commit comments

Comments
 (0)