-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathragas_eval.py
68 lines (50 loc) · 1.3 KB
/
ragas_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
os.environ["OPENAI_API_KEY"] = "" ## API Key
# OPENAI_API_KEY = "" ## API KEY
from datasets import load_dataset
from ragas.metrics import (
answer_relevancy,
faithfulness,
context_recall,
context_precision,
)
data = load_dataset("json", data_files="outputs/fetaqa_fulltable_C_Ragas.jsonl")
print(data)
from ragas import evaluate
from datasets import Dataset
# from datasets import load_dataset
#
# fiqa_eval = load_dataset("explodinggradients/fiqa", "ragas_eval")
#
# print(fiqa_eval)
# from ragas import evaluate
# result = evaluate(
# fiqa_eval["baseline"].select(range(3)), # selecting only 3
# metrics=[
# context_precision,
# faithfulness,
# answer_relevancy,
# context_recall,
# ],
# )
#
# result = evaluate(
# data["train"].select(range(1000, 1050)), # selecting only 3
# metrics=[
# context_precision,
# faithfulness,
# answer_relevancy,
# context_recall,
# ],
# )
#
# print(result)
# prepare your huggingface dataset in the format
# Dataset({
# features: ['question', 'contexts', 'answer', 'ground_truths'],
# num_rows: 25
# })
# dataset: Dataset
# results = evaluate(data)
# {'ragas_score': 0.860, 'context_precision': 0.817,
# 'faithfulness': 0.892, 'answer_relevancy': 0.874}