Skip to content
1 change: 1 addition & 0 deletions week1/[MLNovice]김문희_week1.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions week2/[MLNovice]김문희_week2.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions week3/[MLNovice]김문희_week3.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions week4/[MLNovice]김문희_week4.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions week5/[MLNovice]김문희_week5.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions week6/[MLNovice]김문희_week6.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions week7/[MLNovice]김문희_week7.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions week8/[MLNovice]김문희_week8.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions week9/[MLNovice]김문희_week9.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyM7wYoX7EGWftCHtjZhK9Lg"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"WQVpe78Yfxwc"},"outputs":[],"source":["from google.colab import files\n","\n","# 파일 업로드\n","uploaded = files.upload() # train.csv, test.csv 파일 선택"]},{"cell_type":"code","source":["import pandas as pd\n","\n","train_data = pd.read_csv('train.csv')\n","test_data = pd.read_csv('test.csv')\n","\n","print(train_data.head())\n","print(test_data.head())"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hnwQSEcbgMQz","executionInfo":{"status":"ok","timestamp":1733212366970,"user_tz":-540,"elapsed":1206,"user":{"displayName":"김문희","userId":"05132702975260878746"}},"outputId":"7cbfd6b7-5175-47ee-bbe1-9255d079e840"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":[" id keyword location text \\\n","0 1 NaN NaN Our Deeds are the Reason of this #earthquake M... \n","1 4 NaN NaN Forest fire near La Ronge Sask. Canada \n","2 5 NaN NaN All residents asked to 'shelter in place' are ... \n","3 6 NaN NaN 13,000 people receive #wildfires evacuation or... \n","4 7 NaN NaN Just got sent this photo from Ruby #Alaska as ... \n","\n"," target \n","0 1 \n","1 1 \n","2 1 \n","3 1 \n","4 1 \n"," id keyword location text\n","0 0 NaN NaN Just happened a terrible car crash\n","1 2 NaN NaN Heard about #earthquake is different cities, s...\n","2 3 NaN NaN there is a forest fire at spot pond, geese are...\n","3 9 NaN NaN Apocalypse lighting. #Spokane #wildfires\n","4 11 NaN NaN Typhoon Soudelor kills 28 in China and Taiwan\n"]}]},{"cell_type":"code","source":["import re\n","\n","# 텍스트 전처리 함수\n","def clean_text(text):\n"," text = re.sub(r\"http\\S+\", \"\", text) # URL 제거\n"," text = re.sub(r\"@\\w+\", \"\", text) # 멘션 제거\n"," text = re.sub(r\"[^a-zA-Z\\s]\", \"\", text) # 특수문자 제거\n"," text = text.lower().strip() # 소문자 변환 및 공백 제거\n"," return text\n","\n","# train 데이터와 test 데이터의 텍스트 전처리\n","train_data['cleaned_text'] = train_data['text'].apply(clean_text)\n","test_data['cleaned_text'] = test_data['text'].apply(clean_text)"],"metadata":{"id":"0rWffUKugeGc","executionInfo":{"status":"ok","timestamp":1733212483396,"user_tz":-540,"elapsed":1169,"user":{"displayName":"김문희","userId":"05132702975260878746"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["from sklearn.feature_extraction.text import TfidfVectorizer\n","from sklearn.model_selection import train_test_split\n","\n","# TfidfVectorizer를 사용하여 텍스트를 수치화\n","vectorizer = TfidfVectorizer(max_features=5000)\n","\n","X = vectorizer.fit_transform(train_data['cleaned_text'])\n","y = train_data['target']\n","\n","# 훈련 데이터와 검증 데이터로 분리\n","X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)\n"],"metadata":{"id":"-YkexoY_g6fw","executionInfo":{"status":"ok","timestamp":1733212509679,"user_tz":-540,"elapsed":7763,"user":{"displayName":"김문희","userId":"05132702975260878746"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","source":["from sklearn.linear_model import LogisticRegression\n","from sklearn.metrics import accuracy_score, classification_report\n","\n","# 모델 훈련\n","model = LogisticRegression()\n","model.fit(X_train, y_train)\n","\n","# 검증 데이터 예측\n","val_predictions = model.predict(X_val)\n","\n","# 정확도 확인\n","print(\"Validation Accuracy:\", accuracy_score(y_val, val_predictions))\n","print(classification_report(y_val, val_predictions))\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"duTVJs2ag_Ub","executionInfo":{"status":"ok","timestamp":1733212516588,"user_tz":-540,"elapsed":1150,"user":{"displayName":"김문희","userId":"05132702975260878746"}},"outputId":"e1356f71-213c-40dd-8c3d-070d700c3510"},"execution_count":5,"outputs":[{"output_type":"stream","name":"stdout","text":["Validation Accuracy: 0.8063033486539725\n"," precision recall f1-score support\n","\n"," 0 0.79 0.89 0.84 874\n"," 1 0.83 0.69 0.75 649\n","\n"," accuracy 0.81 1523\n"," macro avg 0.81 0.79 0.80 1523\n","weighted avg 0.81 0.81 0.80 1523\n","\n"]}]},{"cell_type":"code","source":["# 테스트 데이터 변환 및 예측\n","X_test = vectorizer.transform(test_data['cleaned_text'])\n","test_predictions = model.predict(X_test)\n"],"metadata":{"id":"WFBgUjzwhCns","executionInfo":{"status":"ok","timestamp":1733212531762,"user_tz":-540,"elapsed":700,"user":{"displayName":"김문희","userId":"05132702975260878746"}}},"execution_count":6,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"_a46qFCFhGcA"},"execution_count":null,"outputs":[]}]}