diff --git "a/8\341\204\214\341\205\256\341\204\216\341\205\241 \341\204\211\341\205\246\341\204\211\341\205\247\341\206\253 - GPT.pdf" "b/8\341\204\214\341\205\256\341\204\216\341\205\241 \341\204\211\341\205\246\341\204\211\341\205\247\341\206\253 - GPT.pdf" new file mode 100644 index 0000000..3faa7bf Binary files /dev/null and "b/8\341\204\214\341\205\256\341\204\216\341\205\241 \341\204\211\341\205\246\341\204\211\341\205\247\341\206\253 - GPT.pdf" differ diff --git a/GAN.pdf b/GAN.pdf new file mode 100644 index 0000000..d133fe7 Binary files /dev/null and b/GAN.pdf differ diff --git a/README.md b/README.md index fa849fa..0fad825 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,6 @@ BOAZ 25기 분석 Base 세션 자료 및 과제 제출 repo |branch|날짜|세션 내용| |------|-------|-------| -|Week0|25.07.10|전통 ML: SVM, 로지스틱 회귀, 선형/비선형 회귀, KNN, PCA| |Week1|25.07.17|딥러닝 기초 (ANN, DNN, CNN)| |Week2|25.07.24|순환신경망 (RNN, LSTM, GRU) → Seq2Seq 구조| |Week3|25.07.31|GPU 성능 향상 기법: Gradient Accumulation 등| @@ -24,4 +23,4 @@ BOAZ 25기 분석 Base 세션 자료 및 과제 제출 repo |Week12|25.10.02|VISION 논문 리뷰(미정)| |Week13|25.10.09|VISION 논문 리뷰(미정)| *** -Github 사용 안내 : [실습 시 Git Hub 사용 가이드라인](https://oval-alligator-fbf.notion.site/Git-Hub-15c6710199ab8171bc44c081bb22aa00?pvs=4) +Github 사용 안내 : [실습 시 Git Hub 사용 가이드라인](https://observant-wax-f96.notion.site/Git-Hub-229c031af4b980479de4cb6d7e73bf90?source=copy_link) diff --git a/SAM.pdf b/SAM.pdf new file mode 100644 index 0000000..4e9daf6 Binary files /dev/null and b/SAM.pdf differ diff --git a/nDCG.ipynb b/nDCG.ipynb new file mode 100644 index 0000000..f9e98e9 --- /dev/null +++ b/nDCG.ipynb @@ -0,0 +1,538 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "authorship_tag": "ABX9TyMDW8NWKECDoKYu9qzu1e6r", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install pyarrow" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MHuWeerlr8hg", + "outputId": "e9f51348-0016-4401-817e-d3923130b08a" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: pyarrow in /usr/local/lib/python3.12/dist-packages (18.1.0)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install fastparquet" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "szpX1TkWvpJj", + "outputId": "179559ef-0d65-4191-c73b-d5abc0eb6a12" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting fastparquet\n", + " Downloading fastparquet-2024.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)\n", + "Requirement already satisfied: pandas>=1.5.0 in /usr/local/lib/python3.12/dist-packages (from fastparquet) (2.2.2)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from fastparquet) (2.0.2)\n", + "Requirement already satisfied: cramjam>=2.3 in /usr/local/lib/python3.12/dist-packages (from fastparquet) (2.11.0)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.12/dist-packages (from fastparquet) (2025.3.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from fastparquet) (25.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas>=1.5.0->fastparquet) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas>=1.5.0->fastparquet) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas>=1.5.0->fastparquet) (2025.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas>=1.5.0->fastparquet) (1.17.0)\n", + "Downloading fastparquet-2024.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m22.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: fastparquet\n", + "Successfully installed fastparquet-2024.11.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import unicodedata, os\n", + "\n", + "def normalize_filenames_in_content():\n", + " for fname in os.listdir(\"/content\"):\n", + " fixed = unicodedata.normalize(\"NFC\", fname)\n", + " if fixed != fname:\n", + " os.rename(os.path.join(\"/content\", fname),\n", + " os.path.join(\"/content\", fixed))\n", + " print(f\"🔄 {fname} -> {fixed}\")\n", + "\n", + "normalize_filenames_in_content()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ttBbIyGtruyH", + "outputId": "85ff8631-80ef-4cd2-cea8-f62d8df6b221" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "🔄 예시쿼리5.csv -> 예시쿼리5.csv\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import warnings\n", + "import os\n", + "from sklearn.metrics import ndcg_score\n", + "\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "# ==== 하이퍼파라미터 / 설정 (예시쿼리 관련) ====\n", + "CONFIG = {\n", + " \"QUERY_FILES\": [\n", + " \"예시쿼리1_final.csv\",\n", + " \"예시쿼리2.csv\",\n", + " \"예시쿼리3.csv\",\n", + " \"예시쿼리4.csv\",\n", + " \"예시쿼리5.csv\",\n", + " ],\n", + " \"QUERY_TEXTS\": {\n", + " 0: \"크기 1200에 컴퓨터랑 독서대를 같이 둘 수 있는, 화이트톤에 잘 어울리는 가성비 학생 책상을 추천해줘\",\n", + " 1: \"미드센츄리 무드의 바닥에 안 긁히고 공간을 차지하지 않는 내구성 좋은 아크릴 소재 의자를 추천해줘.\",\n", + " 2: \"4명이 앉을 수 있는 베이지 톤 소파 중에 내구성 좋고 얼룩 관리 쉬운 소파를 추천해줘\",\n", + " 3: \"원룸에서 사용할 충전단자와 조명이 있고, 수납과 조립이 편한 우드톤의 가성비 좋은 침대 프레임을 추천해줘\",\n", + " 4: \"전자레인지 밥솥이 올라갈 수 있는, 선이 안보이게 정리되고, 공간 절약형 깔끔한 수납장 추천해줘\",\n", + " },\n", + " \"MODELS\": [\n", + " (\"koE5\", \"product\", \"desk_results - desk_results.csv\"),\n", + " (\"koE5\", \"product\", \"chair_results - chair_results.csv\"),\n", + " (\"koE5\", \"product\", \"sofa_results - sofa_results.csv\"),\n", + " (\"koE5\", \"product\", \"bed_results - bed_results.csv\"),\n", + " (\"koE5\", \"product\", \"cabinet_results - cabinet_results.csv\"),\n", + " (\"koE5\", \"chunk\", \"desk_chunk - desk_chunk.csv\"),\n", + " (\"koE5\", \"chunk\", \"chair_chunk - chair_chunk.csv\"),\n", + " (\"koE5\", \"chunk\", \"sofa_chunk - sofa_chunk.csv\"),\n", + " (\"koE5\", \"chunk\", \"bed_chunk - bed_chunk.csv\"),\n", + " (\"koE5\", \"chunk\", \"cabinet_chunk - cabinet_chunk.csv\"),\n", + " ],\n", + "}\n", + "CONFIG[\"QUERY_FILE_BY_IDX\"] = dict(enumerate(CONFIG[\"QUERY_FILES\"]))\n", + "\n", + "# ==== 데이터 로드/문서 만들기 ====\n", + "def load_data(data_csv=\"오늘의집_reviews.csv\"):\n", + " df = pd.read_csv(data_csv)\n", + " cols = [\"상품명\", \"옵션\", \"원가\", \"할인가\"] + [f\"리뷰{i}\" for i in range(1, 11)]\n", + " for c in cols:\n", + " if c not in df.columns:\n", + " df[c] = \"\"\n", + " return df\n", + "\n", + "# ==== GT/키 생성 ====\n", + "OPTION_ALIASES = [\"색상\", \"옵션명\", \"옵션\"]\n", + "\n", + "def _norm(x):\n", + " if x is None:\n", + " return \"\"\n", + " try:\n", + " if isinstance(x, float) and np.isnan(x):\n", + " return \"\"\n", + " except:\n", + " pass\n", + " return str(x).strip().lower()\n", + "\n", + "def _get_option_value(row):\n", + " for name in OPTION_ALIASES:\n", + " if name in row and pd.notna(row[name]):\n", + " v = _norm(row[name])\n", + " if v:\n", + " return v\n", + " return \"\"\n", + "\n", + "def build_key_from_row(row):\n", + " product_name = _norm(row.get(\"상품명\", \"\"))\n", + " option = _get_option_value(row)\n", + " return (product_name, option)\n", + "\n", + "def load_gt_map_by_key(query_file):\n", + " qdf = pd.read_csv(query_file)\n", + " qdf.columns = [str(c).strip() for c in qdf.columns]\n", + "\n", + " score_col = None\n", + " if \"최종점수\" in qdf.columns:\n", + " score_col = \"최종점수\"\n", + " elif \"점수\" in qdf.columns:\n", + " score_col = \"점수\"\n", + " else:\n", + " qdf[\"_score_\"] = 1.0\n", + " score_col = \"_score_\"\n", + "\n", + " qdf[score_col] = pd.to_numeric(qdf[score_col], errors=\"coerce\").fillna(0.0)\n", + " qdf[\"_key_\"] = qdf.apply(build_key_from_row, axis=1)\n", + "\n", + " gt_map = qdf.groupby(\"_key_\")[score_col].max().to_dict()\n", + " return gt_map\n", + "\n", + "# ==== 키 매칭 개선 ====\n", + "def find_best_match_key(search_key, gt_map, threshold=0.7):\n", + " search_product, search_option = search_key\n", + " best_match = None\n", + " best_score = 0.0\n", + " best_similarity = 0.0\n", + "\n", + " for gt_key, gt_score in gt_map.items():\n", + " gt_product, gt_option = gt_key\n", + "\n", + " if search_key == gt_key:\n", + " return gt_key, gt_score, 1.0\n", + "\n", + " product_similarity = calculate_similarity(search_product, gt_product)\n", + " option_similarity = calculate_similarity(search_option, gt_option)\n", + "\n", + " total_similarity = product_similarity * 0.7 + option_similarity * 0.3\n", + "\n", + " if total_similarity > best_similarity and total_similarity >= threshold:\n", + " best_similarity = total_similarity\n", + " best_match = gt_key\n", + " best_score = gt_score\n", + "\n", + " return best_match, best_score, best_similarity\n", + "\n", + "def calculate_similarity(str1, str2):\n", + " if not str1 or not str2:\n", + " return 0.0\n", + "\n", + " str1 = str1.lower().replace(' ', '')\n", + " str2 = str2.lower().replace(' ', '')\n", + "\n", + " if str1 == str2:\n", + " return 1.0\n", + "\n", + " if str1 in str2 or str2 in str1:\n", + " return 0.8\n", + "\n", + " common_chars = set(str1) & set(str2)\n", + " total_chars = set(str1) | set(str2)\n", + "\n", + " if not total_chars:\n", + " return 0.0\n", + "\n", + " return len(common_chars) / len(total_chars)\n", + "\n", + "# ==== 쿼리별 GT 파일 매핑 수정 ====\n", + "def get_correct_gt_file(query_idx, query_text):\n", + " if query_text.startswith(\"크기 1200\"):\n", + " return CONFIG[\"QUERY_FILES\"][0]\n", + " elif query_text.startswith(\"미드센츄리\"):\n", + " return CONFIG[\"QUERY_FILES\"][1]\n", + " elif query_text.startswith(\"4명이 앉을 수\"):\n", + " return CONFIG[\"QUERY_FILES\"][2]\n", + " elif query_text.startswith(\"원룸에서 사용할\"):\n", + " return CONFIG[\"QUERY_FILES\"][3]\n", + " elif query_text.startswith(\"전자레인지\"):\n", + " return CONFIG[\"QUERY_FILES\"][4]\n", + " return None\n", + "\n", + "# ==== Hybrid 모델 결과 로드 (경로 수정) ====\n", + "def load_hybrid_results(hybrid_file):\n", + " try:\n", + " if hybrid_file.endswith('.csv'):\n", + " hybrid_results = pd.read_csv(hybrid_file)\n", + " return hybrid_results\n", + " else:\n", + " print(f\"지원하지 않는 파일 형식: {hybrid_file}\")\n", + " return None\n", + " except Exception as e:\n", + " print(f\"Hybrid 결과 로드 실패: {e}\")\n", + " return None\n", + "\n", + "# ==== output CSV 파일들에 쿼리 정보 추가 (기존 로직 삭제) ====\n", + "# 코랩 환경에서는 output 디렉토리가 없으므로 이 함수는 필요하지 않습니다.\n", + "# 대신, 직접 파일을 로드하고 평가하는 로직을 사용합니다.\n", + "\n", + "# ==== 단순화된 Hybrid 모델 평가 ====\n", + "def evaluate_hybrid_model_simple(hybrid_results, df, df_doc_keys, model_name=\"hybrid\"):\n", + " rows = []\n", + " key_to_index = {k: i for i, k in enumerate(df_doc_keys)}\n", + "\n", + " def select_prediction_score(row: pd.Series, model_name: str):\n", + " search_type = None\n", + " for name, st, csv_file in CONFIG[\"MODELS\"]:\n", + " if f\"{name}_{st}\" == model_name:\n", + " search_type = st\n", + " break\n", + "\n", + " if search_type is None:\n", + " if '_product' in model_name:\n", + " search_type = 'product'\n", + " elif '_chunk' in model_name:\n", + " search_type = 'chunk'\n", + "\n", + " if 'rrf_score' in row.index and pd.notna(row['rrf_score']):\n", + " return float(row['rrf_score'])\n", + " elif 'fused_score' in row.index and pd.notna(row['fused_score']):\n", + " return float(row['fused_score'])\n", + " else:\n", + " return 0.0\n", + "\n", + " for idx, row in hybrid_results.iterrows():\n", + " product_name = _norm(row.get(\"상품명\", \"\"))\n", + " option = _norm(row.get(\"옵션\", \"\"))\n", + " key = (product_name, option)\n", + " doc_idx = row.get(\"product_row_id\")\n", + "\n", + " if doc_idx is None or (isinstance(doc_idx, float) and pd.isna(doc_idx)):\n", + " doc_idx = key_to_index.get(key)\n", + "\n", + " if doc_idx is None:\n", + " continue\n", + "\n", + " score = select_prediction_score(row, model_name)\n", + "\n", + " rows.append({\n", + " \"model\": model_name,\n", + " \"query_file\": row.get(\"query_file\", \"\"),\n", + " \"query_idx\": row.get(\"query_idx\", 0),\n", + " \"query_text\": row.get(\"query_text\", \"\"),\n", + " \"rank\": row.get(\"rank\", idx + 1),\n", + " \"doc_idx\": int(doc_idx),\n", + " \"score\": score,\n", + " \"key\": key\n", + " })\n", + " return pd.DataFrame(rows)\n", + "\n", + "# ==== NDCG 계산 ====\n", + "def compute_ndcg_for_group(group, df_doc_keys, gt_map, k=10):\n", + " g = group.sort_values(\"rank\").head(k)\n", + " ranked_true = []\n", + " ranked_pred = []\n", + "\n", + " for _, row in g.iterrows():\n", + " doc_idx = int(row[\"doc_idx\"])\n", + " pred_s = float(row[\"score\"])\n", + " key = df_doc_keys[doc_idx]\n", + " true_score = gt_map.get(key, 0.0)\n", + " if true_score == 0.0:\n", + " matched_key, matched_score, similarity = find_best_match_key(key, gt_map, threshold=0.6)\n", + " if matched_key is not None and matched_score > 0.0:\n", + " true_score = matched_score\n", + " ranked_true.append(true_score)\n", + " ranked_pred.append(pred_s)\n", + "\n", + " if not ranked_true:\n", + " return 0.0\n", + "\n", + " if all(score == 0.0 for score in ranked_true):\n", + " return 0.0\n", + "\n", + " if all(score == 0.0 for score in ranked_pred):\n", + " return 0.0\n", + "\n", + " gt_coverage = sum(1 for score in ranked_true if score > 0.0) / len(ranked_true)\n", + " gt_ranked = sorted(ranked_true, reverse=True)\n", + " pred_ranked = sorted(ranked_pred, reverse=True)\n", + "\n", + " top3_gt = gt_ranked[:3]\n", + " top3_pred = [ranked_pred[i] for i in range(min(3, len(ranked_pred)))]\n", + " gt_top3_in_pred = sum(1 for gt_score in top3_gt if gt_score in top3_pred)\n", + " order_accuracy = gt_top3_in_pred / min(3, len(top3_gt))\n", + "\n", + " y_true = np.array([ranked_true], dtype=float)\n", + " y_score = np.array([ranked_pred], dtype=float)\n", + "\n", + " try:\n", + " ndcg_result = ndcg_score(y_true, y_score, k=k)\n", + "\n", + " penalty_factor = 1.0\n", + " if gt_coverage < 0.8:\n", + " coverage_penalty = (0.8 - gt_coverage) * 0.3\n", + " penalty_factor *= (1.0 - coverage_penalty)\n", + " if order_accuracy < 0.66:\n", + " order_penalty = (0.66 - order_accuracy) * 0.2\n", + " penalty_factor *= (1.0 - order_penalty)\n", + "\n", + " final_ndcg = ndcg_result * penalty_factor\n", + " return final_ndcg\n", + " except Exception as e:\n", + " return 0.0\n", + "\n", + "# ==== 실제 준비 및 실행 ====\n", + "if __name__ == '__main__':\n", + " df = load_data()\n", + " df_doc_keys = [build_key_from_row(df.iloc[i]) for i in range(len(df))]\n", + " print(f\"✅ 문서 키 생성 완료: {len(df_doc_keys)}개\")\n", + "\n", + " hybrid_results_list = []\n", + "\n", + " # 이미지에 보이는 파일명들을 사용하도록 수정\n", + " for model_name, search_type, result_file in CONFIG[\"MODELS\"]:\n", + " print(f\"\\n[Hybrid] {model_name} ({search_type}) 결과 로드 중... ({result_file})\")\n", + " hybrid_results = load_hybrid_results(result_file)\n", + "\n", + " if hybrid_results is not None:\n", + " hybrid_df = evaluate_hybrid_model_simple(\n", + " hybrid_results, df, df_doc_keys, f\"{model_name}_{search_type}\"\n", + " )\n", + "\n", + " if not hybrid_df.empty:\n", + " hybrid_df['search_type'] = search_type\n", + " hybrid_results_list.append(hybrid_df)\n", + " print(f\" - {model_name} ({search_type}): {len(hybrid_df)}개 결과 로드\")\n", + "\n", + " if hybrid_results_list:\n", + " retrieval_df = pd.concat(hybrid_results_list, ignore_index=True)\n", + " retrieval_path = \"retrieval_results.parquet\"\n", + " retrieval_df.to_parquet(retrieval_path, index=False)\n", + " print(f\"\\n✅ CSV 결과 저장 완료: {retrieval_path} (rows={len(retrieval_df)})\\n\")\n", + " else:\n", + " print(\"⚠️ 로드된 CSV 결과가 없습니다.\")\n", + " retrieval_df = pd.DataFrame()\n", + "\n", + " results = []\n", + " if not retrieval_df.empty:\n", + " for qf, sub in retrieval_df.groupby(\"query_file\"):\n", + " print(f\"--- 쿼리 파일: {qf} ---\")\n", + " if \"query_text\" in sub.columns:\n", + " for (model, qidx), g in sub.groupby([\"model\", \"query_idx\"]):\n", + " query_text = g.iloc[0][\"query_text\"]\n", + " if not query_text or query_text.strip() == \"\":\n", + " continue\n", + "\n", + " actual_gt_file = get_correct_gt_file(qidx, query_text)\n", + " if actual_gt_file is None:\n", + " continue\n", + "\n", + " try:\n", + " gt_map = load_gt_map_by_key(actual_gt_file)\n", + " ndcg = compute_ndcg_for_group(g, df_doc_keys, gt_map, k=10)\n", + "\n", + " model_parts = model.split('_')\n", + " if len(model_parts) >= 2:\n", + " base_model = '_'.join(model_parts[:-1])\n", + " search_type = model_parts[-1]\n", + " else:\n", + " base_model = model\n", + " search_type = \"unknown\"\n", + "\n", + " results.append({\n", + " \"query_file\": actual_gt_file,\n", + " \"base_model\": base_model,\n", + " \"search_type\": search_type,\n", + " \"model\": model,\n", + " \"query_idx\": qidx,\n", + " \"ndcg@10\": ndcg\n", + " })\n", + " except Exception as e:\n", + " continue\n", + "\n", + " ndcg_df = pd.DataFrame(results)\n", + " if not ndcg_df.empty:\n", + " ndcg_df.to_csv(\"ndcg_per_query.csv\", index=False)\n", + " print(\"\\n✅ NDCG 저장 완료: ndcg_per_query.csv (전체 상세 결과)\")\n", + " else:\n", + " print(\"⚠️ NDCG 계산 결과가 없습니다.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WLB_rcy8vu4D", + "outputId": "54e58cf5-ae49-4468-af88-f0926eaaaa5c" + }, + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "✅ 문서 키 생성 완료: 3309개\n", + "\n", + "[Hybrid] koE5 (product) 결과 로드 중... (desk_results - desk_results.csv)\n", + " - koE5 (product): 5개 결과 로드\n", + "\n", + "[Hybrid] koE5 (product) 결과 로드 중... (chair_results - chair_results.csv)\n", + " - koE5 (product): 5개 결과 로드\n", + "\n", + "[Hybrid] koE5 (product) 결과 로드 중... (sofa_results - sofa_results.csv)\n", + " - koE5 (product): 5개 결과 로드\n", + "\n", + "[Hybrid] koE5 (product) 결과 로드 중... (bed_results - bed_results.csv)\n", + " - koE5 (product): 5개 결과 로드\n", + "\n", + "[Hybrid] koE5 (product) 결과 로드 중... (cabinet_results - cabinet_results.csv)\n", + " - koE5 (product): 5개 결과 로드\n", + "\n", + "[Hybrid] koE5 (chunk) 결과 로드 중... (desk_chunk - desk_chunk.csv)\n", + " - koE5 (chunk): 5개 결과 로드\n", + "\n", + "[Hybrid] koE5 (chunk) 결과 로드 중... (chair_chunk - chair_chunk.csv)\n", + " - koE5 (chunk): 5개 결과 로드\n", + "\n", + "[Hybrid] koE5 (chunk) 결과 로드 중... (sofa_chunk - sofa_chunk.csv)\n", + " - koE5 (chunk): 5개 결과 로드\n", + "\n", + "[Hybrid] koE5 (chunk) 결과 로드 중... (bed_chunk - bed_chunk.csv)\n", + " - koE5 (chunk): 5개 결과 로드\n", + "\n", + "[Hybrid] koE5 (chunk) 결과 로드 중... (cabinet_chunk - cabinet_chunk.csv)\n", + " - koE5 (chunk): 5개 결과 로드\n", + "\n", + "✅ CSV 결과 저장 완료: retrieval_results.parquet (rows=50)\n", + "\n", + "--- 쿼리 파일: query/예시쿼리1_final.csv ---\n", + "--- 쿼리 파일: query/예시쿼리2.csv ---\n", + "--- 쿼리 파일: query/예시쿼리3.csv ---\n", + "--- 쿼리 파일: query/예시쿼리4.csv ---\n", + "--- 쿼리 파일: query/예시쿼리5.csv ---\n", + "\n", + "✅ NDCG 저장 완료: ndcg_per_query.csv (전체 상세 결과)\n" + ] + } + ] + } + ] +} \ No newline at end of file