diff --git a/MACHINE-LEARNING-BASIC-PROJECTS/COVID_19_FAKE_NEWS_DETECTOR.ipynb b/MACHINE-LEARNING-BASIC-PROJECTS/COVID_19_FAKE_NEWS_DETECTOR.ipynb new file mode 100644 index 0000000..8fa280b --- /dev/null +++ b/MACHINE-LEARNING-BASIC-PROJECTS/COVID_19_FAKE_NEWS_DETECTOR.ipynb @@ -0,0 +1,574 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "COVID-19 FAKE NEWS DETECTOR.ipynb", + "provenance": [], + "authorship_tag": "ABX9TyOBP9JpPkU1EdFyvLF5IStZ", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NAOLKJNM10oK", + "outputId": "a7741de8-7153-4ec2-cf9c-521b0607ddd3" + }, + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/drive\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xvhjols02cgP" + }, + "source": [ + "import numpy as np #for numpy arrays\n", + "import pandas as pd #storing data in dataframe\n", + "import re #regularexpression - useful for searching text in docs\n", + "from nltk.corpus import stopwords #nltk - natural lang toolkit | corpus - main body of docs \n", + " #stopwords - words that don't add much value to the text\n", + "from nltk.stem.porter import PorterStemmer #gives root word \n", + "from sklearn.feature_extraction.text import TfidfVectorizer #convert text to feature vectors i.e numbers\n", + "from sklearn.model_selection import train_test_split #split into traaining and test data\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g7Cbsryr2pFC", + "outputId": "b7b1dd52-c6d0-4427-ea15-b8333999d607" + }, + "source": [ + "import nltk\n", + "nltk.download('stopwords')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", + "[nltk_data] Unzipping corpora/stopwords.zip.\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aJq-cWWp2rIr", + "outputId": "bc429110-95a7-49ec-c3c8-b46270c01c9d" + }, + "source": [ + "print(stopwords.words('english')) #during steeming remove these stopwords" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\"]\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "TL5lRHhH2wio", + "outputId": "7105dbf5-f5f9-4f04-d776-627c0b432dc4" + }, + "source": [ + "#pandas represents data in a more structured tabular form\n", + "news_dataset = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/fake_new_dataset.csv')\n", + "news_dataset.shape\n", + "news_dataset.head()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0titletextsubcategorylabel
00FACEBOOK DELETES MICHIGAN ANTI-LOCKDOWN GROUP ...Facebook has shuttered a popular group for Mic...false news0
11Other Viewpoints: COVID-19 is worse than the fluWe can now officially put to rest all comparis...true1
22Bermuda's COVID-19 cases surpass 100The Ministry of Health in Bermuda has confirme...true1
33Purdue University says students face 'close to...Purdue University President Mitch Daniels, the...partially false0
44THE HIGH COST OF LOCKING DOWN AMERICA: “WE’VE ...Locking down much of the country may have help...false news0
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 ... label\n", + "0 0 ... 0\n", + "1 1 ... 1\n", + "2 2 ... 1\n", + "3 3 ... 0\n", + "4 4 ... 0\n", + "\n", + "[5 rows x 5 columns]" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Kzr_tGLQ3i7x", + "outputId": "0f67988d-3cb2-43bd-cb26-7a278b13a390" + }, + "source": [ + "news_dataset.isnull().sum() #counting no. of missing data in each column" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Unnamed: 0 0\n", + "title 1\n", + "text 0\n", + "subcategory 0\n", + "label 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ttsg5BwI3nT3" + }, + "source": [ + "#replacing empty colums with null\n", + "news_dataset = news_dataset.fillna('')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Iv10FYbdo5Qp" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "o9QE7Xh43ry-", + "outputId": "1fd92857-b05a-472a-93ab-33737d4c9ffc" + }, + "source": [ + "#combine title , text\n", + "news_dataset['content'] = news_dataset['title'] + ' ' + news_dataset['text']\n", + "print(news_dataset['content'])" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0 FACEBOOK DELETES MICHIGAN ANTI-LOCKDOWN GROUP ...\n", + "1 Other Viewpoints: COVID-19 is worse than the f...\n", + "2 Bermuda's COVID-19 cases surpass 100 The Minis...\n", + "3 Purdue University says students face 'close to...\n", + "4 THE HIGH COST OF LOCKING DOWN AMERICA: “WE’VE ...\n", + " ... \n", + "3114 2019-nCoV: Health dept. on full alert Quaranti...\n", + "3115 Screening machine for corona virus launched at...\n", + "3116 Coronavirus (2019-nCoV) Coronavirus (2019-nCoV...\n", + "3117 Characterizing Patients Hospitalized With COVI...\n", + "3118 Corona Virus WARNING Just something to be prep...\n", + "Name: content, Length: 3119, dtype: object\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ka4Vsvi53uHF" + }, + "source": [ + "#separating the label column\n", + "\n", + "x = news_dataset.drop('label',axis = 1) #axis = 1 for column, axis = 0 for label\n", + "y = news_dataset['label']\n", + "#\n", + "#print(y)\n", + "#print(x)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "n7FvdkUF30-o" + }, + "source": [ + "port_stem = PorterStemmer()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ADZEyVcf34Eb" + }, + "source": [ + "#stemming\n", + "\n", + "def stemming(content):\n", + " stemmed_content = re.sub('[^a-zA-Z]',' ',content) #sub will substitute everything that is not alphabet with null\n", + " stemmed_content = stemmed_content.lower()\n", + " stemmed_content = stemmed_content.split()\n", + " stemmed_content = [port_stem.stem(word) for word in stemmed_content if not word in stopwords.words('english')]\n", + " stemmed_content = ' '.join(stemmed_content)\n", + " return stemmed_content" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "hHklhXpO4KZM", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 573 + }, + "outputId": "c661df0e-fe35-49df-d452-101b5a625768" + }, + "source": [ + "news_dataset['content'] = news_dataset['content'].apply(stemming) #taking only the root words" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "error", + "ename": "KeyError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2897\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2898\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2899\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'content'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnews_dataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnews_dataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstemming\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#taking only the root words\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2904\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2905\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2906\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2907\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2908\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2898\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2899\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2900\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2902\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtolerance\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'content'" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 573 + }, + "id": "ePmdAmQv9HN2", + "outputId": "ea02aa12-5e63-442a-d0dd-6a9ecfa2b08f" + }, + "source": [ + "# converting data to numbers for feeding in the ML model\n", + "x = news_dataset['content'].values\n", + "y = news_dataset['label'].values\n", + "\n", + "#tf - term frequency\n", + "#idf - inwords document frequency\n", + "\n", + "vectorizer = TfidfVectorizer()\n", + "vectorizer.fit(x)\n", + "\n", + "x = vectorizer.transform(x)\n", + "\n", + "print(x)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "error", + "ename": "KeyError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2897\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2898\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2899\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'content'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# converting data to numbers for feeding in the ML model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnews_dataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnews_dataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'label'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m#tf - term frequency\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2904\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2905\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2906\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2907\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2908\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2898\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2899\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2900\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2902\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtolerance\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'content'" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "55UMjFlPBcwY" + }, + "source": [ + "DIVIDING THE VECTORIZED DATASET INTO TRAINING & TEST MODEL" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kzu6-CDmBpGf" + }, + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, stratify=y, random_state=2)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hlM1ruGNE_Gu" + }, + "source": [ + "LOGISTIC REGRESSION (as it is binary classification we will use logistic regression)" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3tnvMA0eFCdB", + "outputId": "5d9ce1cb-8e1d-4e0e-e9fd-0c0e5b5219c8" + }, + "source": [ + "#ploting the sigmoid function using Logistic Regression function\n", + "model = LogisticRegression()\n", + "model.fit(x_train, y_train)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", + " multi_class='auto', n_jobs=None, penalty='l2',\n", + " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n", + " warm_start=False)" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4TlbKirmGsaH", + "outputId": "49adf15e-9235-4483-db90-b1dab800b478" + }, + "source": [ + "#evaluating the model using accuracy score\n", + "x_train_pred = model.predict(x_train)\n", + "train_accuracy = accuracy_score(x_train_pred, y_train)\n", + "\n", + "print('Accuracy of the training data : ', train_accuracy)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy of the training data : 0.9798677884615384\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "G5g8o5coc8JV" + }, + "source": [ + "x_new = x_test[7]\n", + "\n", + "prediction = model.predict(x_new)\n", + "print(prediction)\n", + "#print(news_dataset[1])\n", + "\n", + "if (prediction[0]==0):\n", + " print('The news is Real')\n", + "else:\n", + " print('The news is Fake')" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/MACHINE-LEARNING-BASIC-PROJECTS/README.md b/MACHINE-LEARNING-BASIC-PROJECTS/README.md new file mode 100644 index 0000000..a0272fe --- /dev/null +++ b/MACHINE-LEARNING-BASIC-PROJECTS/README.md @@ -0,0 +1,3 @@ +# MACHINE LEARNING BASIC PROJECTS +1. **COVID-19 FAKE NEWS DETECTOR** - A LOGISTIC REGRESSION MODEL TO PREDICT FAKE AND REAL COVID-19 NEWS FROM A DATASET | SUPERVISED LEARNING +2. **ROCKS vs MINES** - A LOGISTIC REGRESSION MODEL TO PREDICT IF THERE IS A MINE OR ROCKS AT A GIVEN LOCATION | SUPERVISED LEARNING diff --git a/MACHINE-LEARNING-BASIC-PROJECTS/ROCK_vs_SUBMARINE_PREDICTION.ipynb b/MACHINE-LEARNING-BASIC-PROJECTS/ROCK_vs_SUBMARINE_PREDICTION.ipynb new file mode 100644 index 0000000..1ec9603 --- /dev/null +++ b/MACHINE-LEARNING-BASIC-PROJECTS/ROCK_vs_SUBMARINE_PREDICTION.ipynb @@ -0,0 +1,1327 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "ROCK vs SUBMARINE - PREDICTION.ipynb", + "provenance": [], + "authorship_tag": "ABX9TyNaCvEwNSgO685poUwfrwar", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dHyh_thlbgQ1" + }, + "source": [ + "#importing all libraries and dependencies\n", + "import pandas as pd\n", + "import numpy as np #Numpy array\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score\n" + ], + "execution_count": 26, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 226 + }, + "id": "L-0cPQ3-fM_n", + "outputId": "e7309791-4be1-4b6d-fa64-a4b7e68d2cdb" + }, + "source": [ + "#reading the csv file\n", + "dataset = pd.read_csv('/content/Copy of sonar data.csv' , header=None)\n", + "dataset.head()" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
00.02000.03710.04280.02070.09540.09860.15390.16010.31090.21110.16090.15820.22380.06450.06600.22730.31000.29990.50780.47970.57830.50710.43280.55500.67110.64150.71040.80800.67910.38570.13070.26040.51210.75470.85370.85070.66920.60970.49430.27440.05100.28340.28250.42560.26410.13860.10510.13430.03830.03240.02320.00270.00650.01590.00720.01670.01800.00840.00900.0032R
10.04530.05230.08430.06890.11830.25830.21560.34810.33370.28720.49180.65520.69190.77970.74640.94441.00000.88740.80240.78180.52120.40520.39570.39140.32500.32000.32710.27670.44230.20280.37880.29470.19840.23410.13060.41820.38350.10570.18400.19700.16740.05830.14010.16280.06210.02030.05300.07420.04090.00610.01250.00840.00890.00480.00940.01910.01400.00490.00520.0044R
20.02620.05820.10990.10830.09740.22800.24310.37710.55980.61940.63330.70600.55440.53200.64790.69310.67590.75510.89290.86190.79740.67370.42930.36480.53310.24130.50700.85330.60360.85140.85120.50450.18620.27090.42320.30430.61160.67560.53750.47190.46470.25870.21290.22220.21110.01760.13480.07440.01300.01060.00330.02320.01660.00950.01800.02440.03160.01640.00950.0078R
30.01000.01710.06230.02050.02050.03680.10980.12760.05980.12640.08810.19920.01840.22610.17290.21310.06930.22810.40600.39730.27410.36900.55560.48460.31400.53340.52560.25200.20900.35590.62600.73400.61200.34970.39530.30120.54080.88140.98570.91670.61210.50060.32100.32020.42950.36540.26550.15760.06810.02940.02410.01210.00360.01500.00850.00730.00500.00440.00400.0117R
40.07620.06660.04810.03940.05900.06490.12090.24670.35640.44590.41520.39520.42560.41350.45280.53260.73060.61930.20320.46360.41480.42920.57300.53990.31610.22850.69951.00000.72620.47240.51030.54590.28810.09810.19510.41810.46040.32170.28280.24300.19790.24440.18470.08410.06920.05280.03570.00850.02300.00460.01560.00310.00540.01050.01100.00150.00720.00480.01070.0094R
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 ... 56 57 58 59 60\n", + "0 0.0200 0.0371 0.0428 0.0207 0.0954 ... 0.0180 0.0084 0.0090 0.0032 R\n", + "1 0.0453 0.0523 0.0843 0.0689 0.1183 ... 0.0140 0.0049 0.0052 0.0044 R\n", + "2 0.0262 0.0582 0.1099 0.1083 0.0974 ... 0.0316 0.0164 0.0095 0.0078 R\n", + "3 0.0100 0.0171 0.0623 0.0205 0.0205 ... 0.0050 0.0044 0.0040 0.0117 R\n", + "4 0.0762 0.0666 0.0481 0.0394 0.0590 ... 0.0072 0.0048 0.0107 0.0094 R\n", + "\n", + "[5 rows x 61 columns]" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jQrRiywvg5pb", + "outputId": "07d3a80e-c986-4720-8f4a-c6e223276967" + }, + "source": [ + "#finding number of rows and columns\n", + "dataset.shape" + ], + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(208, 61)" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 320 + }, + "id": "kVPTbxJWhtv7", + "outputId": "5e6fbacc-575b-4748-f912-848bf80c68e9" + }, + "source": [ + "#some statistical data like mean, standard deviation for our data\n", + "dataset.describe()" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
count208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000208.000000
mean0.0291640.0384370.0438320.0538920.0752020.1045700.1217470.1347990.1780030.2082590.2360130.2502210.2733050.2965680.3202010.3784870.4159830.4523180.5048120.5630470.6090600.6242750.6469750.6726540.6754240.6998660.7021550.6940240.6420740.5809280.5044750.4390400.4172200.4032330.3925710.3848480.3638070.3396570.3258000.3112070.2892520.2782930.2465420.2140750.1972320.1606310.1224530.0914240.0519290.0204240.0160690.0134200.0107090.0109410.0092900.0082220.0078200.0079490.0079410.006507
std0.0229910.0329600.0384280.0465280.0555520.0591050.0617880.0851520.1183870.1344160.1327050.1400720.1409620.1644740.2054270.2326500.2636770.2615290.2579880.2626530.2578180.2558830.2501750.2391160.2449260.2372280.2456570.2371890.2402500.2207490.2139920.2132370.2065130.2312420.2591320.2641210.2399120.2129730.1990750.1786620.1711110.1687280.1389930.1332910.1516280.1339380.0869530.0624170.0359540.0136650.0120080.0096340.0070600.0073010.0070880.0057360.0057850.0064700.0061810.005031
min0.0015000.0006000.0015000.0058000.0067000.0102000.0033000.0055000.0075000.0113000.0289000.0236000.0184000.0273000.0031000.0162000.0349000.0375000.0494000.0656000.0512000.0219000.0563000.0239000.0240000.0921000.0481000.0284000.0144000.0613000.0482000.0404000.0477000.0212000.0223000.0080000.0351000.0383000.0371000.0117000.0360000.0056000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0008000.0005000.0010000.0006000.0004000.0003000.0003000.0001000.000600
25%0.0133500.0164500.0189500.0243750.0380500.0670250.0809000.0804250.0970250.1112750.1292500.1334750.1661250.1751750.1646250.1963000.2058500.2420750.2990750.3506250.3997250.4069250.4502250.5407250.5258000.5441750.5319000.5347750.4637000.4114000.3455500.2814000.2578750.2175750.1793750.1543500.1601000.1742750.1739750.1864500.1631000.1589000.1552000.1268750.0944750.0685500.0642500.0451250.0263500.0115500.0084250.0072750.0050750.0053750.0041500.0044000.0037000.0036000.0036750.003100
50%0.0228000.0308000.0343000.0440500.0625000.0921500.1069500.1121000.1522500.1824000.2248000.2490500.2639500.2811000.2817000.3047000.3084000.3683000.4349500.5425000.6177000.6649000.6997000.6985000.7211000.7545000.7456000.7319000.6808000.6071500.4903500.4296000.3912000.3510500.3127500.3211500.3063000.3127000.2835000.2780500.2595000.2451000.2225500.1777000.1480000.1213500.1016500.0781000.0447000.0179000.0139000.0114000.0095500.0093000.0075000.0068500.0059500.0058000.0064000.005300
75%0.0355500.0479500.0579500.0645000.1002750.1341250.1540000.1696000.2334250.2687000.3016500.3312500.3512500.3861750.4529250.5357250.6594250.6790500.7314000.8093250.8169750.8319750.8485750.8721750.8737250.8938000.9171000.9002750.8521250.7351750.6419500.5803000.5561250.5961250.5933500.5565250.5189000.4405500.4349000.4243500.3875250.3842500.3245250.2717500.2315500.2003750.1544250.1201000.0685250.0252750.0208250.0167250.0149000.0145000.0121000.0105750.0104250.0103500.0103250.008525
max0.1371000.2339000.3059000.4264000.4010000.3823000.3729000.4590000.6828000.7106000.7342000.7060000.7131000.9970001.0000000.9988001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000000.9657000.9306001.0000000.9647001.0000001.0000000.9497001.0000000.9857000.9297000.8995000.8246000.7733000.7762000.7034000.7292000.5522000.3339000.1981000.0825000.1004000.0709000.0390000.0352000.0447000.0394000.0355000.0440000.0364000.043900
\n", + "
" + ], + "text/plain": [ + " 0 1 2 ... 57 58 59\n", + "count 208.000000 208.000000 208.000000 ... 208.000000 208.000000 208.000000\n", + "mean 0.029164 0.038437 0.043832 ... 0.007949 0.007941 0.006507\n", + "std 0.022991 0.032960 0.038428 ... 0.006470 0.006181 0.005031\n", + "min 0.001500 0.000600 0.001500 ... 0.000300 0.000100 0.000600\n", + "25% 0.013350 0.016450 0.018950 ... 0.003600 0.003675 0.003100\n", + "50% 0.022800 0.030800 0.034300 ... 0.005800 0.006400 0.005300\n", + "75% 0.035550 0.047950 0.057950 ... 0.010350 0.010325 0.008525\n", + "max 0.137100 0.233900 0.305900 ... 0.044000 0.036400 0.043900\n", + "\n", + "[8 rows x 60 columns]" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uOD59QvKiF5W", + "outputId": "12950db8-6d91-47df-9564-47dd520363f7" + }, + "source": [ + "#counts the number of each type of entry in column with index 60\n", + "dataset[60].value_counts()" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "M 111\n", + "R 97\n", + "Name: 60, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0gVXl7UIimJ8" + }, + "source": [ + "#extracting the data only and separating the label\n", + "x = dataset.drop(columns=60, axis=1)\n", + "y = dataset[60]" + ], + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uAjoi8G_jg5u", + "outputId": "ed9fb33c-6064-4c44-fa49-7f9dd44abc9f" + }, + "source": [ + "#dividing data for taining and testing\n", + "x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.05, stratify = y, random_state = 1)\n", + "print(x.shape, x_test.shape, x_train.shape)" + ], + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(208, 60) (11, 60) (197, 60)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "R-LOlPfZklZL", + "outputId": "8aaecb99-1300-4417-9c27-636a9b66eeee" + }, + "source": [ + "#logistic Regression Model\n", + "model = LogisticRegression()\n", + "model.fit(x_train,y_train)\n" + ], + "execution_count": 28, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", + " multi_class='auto', n_jobs=None, penalty='l2',\n", + " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n", + " warm_start=False)" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ha5_aCEtmfDE", + "outputId": "d3340114-785d-4e94-a1cb-d3225ed86de5" + }, + "source": [ + "#accuracy check of training data\n", + "accuracy_train = accuracy_score(model.predict(x_train), y_train)\n", + "print(\"Accuracy of the model(train) = \", accuracy_train*100,\"%\")" + ], + "execution_count": 36, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy of the model(train) = 83.75634517766497 %\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a-Heoq5LpGuE", + "outputId": "9be2c12a-6bec-41e4-9fac-bcafd6f25d54" + }, + "source": [ + "#accuracy check of test data\n", + "accuracy_test = accuracy_score(model.predict(x_test),y_test)\n", + "print(\"Accuracy of the model(test) = \", accuracy_test*100,\"%\")" + ], + "execution_count": 37, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy of the model(test) = 72.72727272727273 %\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "I0OIdH6pxFfN", + "outputId": "fa163ef5-563a-41a3-ffcb-a401a81aa642" + }, + "source": [ + "#testing and making prediction\n", + "input_data = (0.0307,0.0523,0.0653,0.0521,0.0611,0.0577,0.0665,0.0664,0.1460,0.2792,0.3877,0.4992,0.4981,0.4972,0.5607,0.7339,0.8230,0.9173,0.9975,0.9911,0.8240,0.6498,0.5980,0.4862,0.3150,0.1543,0.0989,0.0284,0.1008,0.2636,0.2694,0.2930,0.2925,0.3998,0.3660,0.3172,0.4609,0.4374,0.1820,0.3376,0.6202,0.4448,0.1863,0.1420,0.0589,0.0576,0.0672,0.0269,0.0245,0.0190,0.0063,0.0321,0.0189,0.0137,0.0277,0.0152,0.0052,0.0121,0.0124,0.0055)\n", + " \n", + " #converting to numpy array as it is faster\n", + "input_np = np.asarray(input_data)\n", + " #reshaping\n", + "input_np_reshape = input_np.reshape(1,-1)\n", + "\n", + "#printing our result\n", + "\n", + "print(model.predict(input_np_reshape)) # 'M' - Mines and 'R' - Rocks" + ], + "execution_count": 40, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['M']\n" + ] + } + ] + } + ] +} \ No newline at end of file