diff --git a/MACHINE-LEARNING-BASIC-PROJECTS/COVID_19_FAKE_NEWS_DETECTOR.ipynb b/MACHINE-LEARNING-BASIC-PROJECTS/COVID_19_FAKE_NEWS_DETECTOR.ipynb
new file mode 100644
index 0000000..8fa280b
--- /dev/null
+++ b/MACHINE-LEARNING-BASIC-PROJECTS/COVID_19_FAKE_NEWS_DETECTOR.ipynb
@@ -0,0 +1,574 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "COVID-19 FAKE NEWS DETECTOR.ipynb",
+ "provenance": [],
+ "authorship_tag": "ABX9TyOBP9JpPkU1EdFyvLF5IStZ",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "NAOLKJNM10oK",
+ "outputId": "a7741de8-7153-4ec2-cf9c-521b0607ddd3"
+ },
+ "source": [
+ "from google.colab import drive\n",
+ "drive.mount('/content/drive')"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Mounted at /content/drive\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "xvhjols02cgP"
+ },
+ "source": [
+ "import numpy as np #for numpy arrays\n",
+ "import pandas as pd #storing data in dataframe\n",
+ "import re #regularexpression - useful for searching text in docs\n",
+ "from nltk.corpus import stopwords #nltk - natural lang toolkit | corpus - main body of docs \n",
+ " #stopwords - words that don't add much value to the text\n",
+ "from nltk.stem.porter import PorterStemmer #gives root word \n",
+ "from sklearn.feature_extraction.text import TfidfVectorizer #convert text to feature vectors i.e numbers\n",
+ "from sklearn.model_selection import train_test_split #split into traaining and test data\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.metrics import accuracy_score"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "g7Cbsryr2pFC",
+ "outputId": "b7b1dd52-c6d0-4427-ea15-b8333999d607"
+ },
+ "source": [
+ "import nltk\n",
+ "nltk.download('stopwords')"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+ "[nltk_data] Unzipping corpora/stopwords.zip.\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 4
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "aJq-cWWp2rIr",
+ "outputId": "bc429110-95a7-49ec-c3c8-b46270c01c9d"
+ },
+ "source": [
+ "print(stopwords.words('english')) #during steeming remove these stopwords"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\"]\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "TL5lRHhH2wio",
+ "outputId": "7105dbf5-f5f9-4f04-d776-627c0b432dc4"
+ },
+ "source": [
+ "#pandas represents data in a more structured tabular form\n",
+ "news_dataset = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/fake_new_dataset.csv')\n",
+ "news_dataset.shape\n",
+ "news_dataset.head()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " title | \n",
+ " text | \n",
+ " subcategory | \n",
+ " label | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " FACEBOOK DELETES MICHIGAN ANTI-LOCKDOWN GROUP ... | \n",
+ " Facebook has shuttered a popular group for Mic... | \n",
+ " false news | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Other Viewpoints: COVID-19 is worse than the flu | \n",
+ " We can now officially put to rest all comparis... | \n",
+ " true | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " Bermuda's COVID-19 cases surpass 100 | \n",
+ " The Ministry of Health in Bermuda has confirme... | \n",
+ " true | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " Purdue University says students face 'close to... | \n",
+ " Purdue University President Mitch Daniels, the... | \n",
+ " partially false | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " THE HIGH COST OF LOCKING DOWN AMERICA: “WE’VE ... | \n",
+ " Locking down much of the country may have help... | \n",
+ " false news | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 ... label\n",
+ "0 0 ... 0\n",
+ "1 1 ... 1\n",
+ "2 2 ... 1\n",
+ "3 3 ... 0\n",
+ "4 4 ... 0\n",
+ "\n",
+ "[5 rows x 5 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 5
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Kzr_tGLQ3i7x",
+ "outputId": "0f67988d-3cb2-43bd-cb26-7a278b13a390"
+ },
+ "source": [
+ "news_dataset.isnull().sum() #counting no. of missing data in each column"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Unnamed: 0 0\n",
+ "title 1\n",
+ "text 0\n",
+ "subcategory 0\n",
+ "label 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 35
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Ttsg5BwI3nT3"
+ },
+ "source": [
+ "#replacing empty colums with null\n",
+ "news_dataset = news_dataset.fillna('')"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Iv10FYbdo5Qp"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "o9QE7Xh43ry-",
+ "outputId": "1fd92857-b05a-472a-93ab-33737d4c9ffc"
+ },
+ "source": [
+ "#combine title , text\n",
+ "news_dataset['content'] = news_dataset['title'] + ' ' + news_dataset['text']\n",
+ "print(news_dataset['content'])"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 FACEBOOK DELETES MICHIGAN ANTI-LOCKDOWN GROUP ...\n",
+ "1 Other Viewpoints: COVID-19 is worse than the f...\n",
+ "2 Bermuda's COVID-19 cases surpass 100 The Minis...\n",
+ "3 Purdue University says students face 'close to...\n",
+ "4 THE HIGH COST OF LOCKING DOWN AMERICA: “WE’VE ...\n",
+ " ... \n",
+ "3114 2019-nCoV: Health dept. on full alert Quaranti...\n",
+ "3115 Screening machine for corona virus launched at...\n",
+ "3116 Coronavirus (2019-nCoV) Coronavirus (2019-nCoV...\n",
+ "3117 Characterizing Patients Hospitalized With COVI...\n",
+ "3118 Corona Virus WARNING Just something to be prep...\n",
+ "Name: content, Length: 3119, dtype: object\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "ka4Vsvi53uHF"
+ },
+ "source": [
+ "#separating the label column\n",
+ "\n",
+ "x = news_dataset.drop('label',axis = 1) #axis = 1 for column, axis = 0 for label\n",
+ "y = news_dataset['label']\n",
+ "#\n",
+ "#print(y)\n",
+ "#print(x)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "n7FvdkUF30-o"
+ },
+ "source": [
+ "port_stem = PorterStemmer()"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "ADZEyVcf34Eb"
+ },
+ "source": [
+ "#stemming\n",
+ "\n",
+ "def stemming(content):\n",
+ " stemmed_content = re.sub('[^a-zA-Z]',' ',content) #sub will substitute everything that is not alphabet with null\n",
+ " stemmed_content = stemmed_content.lower()\n",
+ " stemmed_content = stemmed_content.split()\n",
+ " stemmed_content = [port_stem.stem(word) for word in stemmed_content if not word in stopwords.words('english')]\n",
+ " stemmed_content = ' '.join(stemmed_content)\n",
+ " return stemmed_content"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "hHklhXpO4KZM",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 573
+ },
+ "outputId": "c661df0e-fe35-49df-d452-101b5a625768"
+ },
+ "source": [
+ "news_dataset['content'] = news_dataset['content'].apply(stemming) #taking only the root words"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "error",
+ "ename": "KeyError",
+ "evalue": "ignored",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2897\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2898\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2899\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: 'content'",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnews_dataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnews_dataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstemming\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#taking only the root words\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2904\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2905\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2906\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2907\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2908\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2898\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2899\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2900\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2902\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtolerance\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: 'content'"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 573
+ },
+ "id": "ePmdAmQv9HN2",
+ "outputId": "ea02aa12-5e63-442a-d0dd-6a9ecfa2b08f"
+ },
+ "source": [
+ "# converting data to numbers for feeding in the ML model\n",
+ "x = news_dataset['content'].values\n",
+ "y = news_dataset['label'].values\n",
+ "\n",
+ "#tf - term frequency\n",
+ "#idf - inwords document frequency\n",
+ "\n",
+ "vectorizer = TfidfVectorizer()\n",
+ "vectorizer.fit(x)\n",
+ "\n",
+ "x = vectorizer.transform(x)\n",
+ "\n",
+ "print(x)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "error",
+ "ename": "KeyError",
+ "evalue": "ignored",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2897\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2898\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2899\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: 'content'",
+ "\nThe above exception was the direct cause of the following exception:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# converting data to numbers for feeding in the ML model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnews_dataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnews_dataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'label'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m#tf - term frequency\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2904\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2905\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2906\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2907\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2908\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2898\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2899\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2900\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2902\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtolerance\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: 'content'"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "55UMjFlPBcwY"
+ },
+ "source": [
+ "DIVIDING THE VECTORIZED DATASET INTO TRAINING & TEST MODEL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "kzu6-CDmBpGf"
+ },
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, stratify=y, random_state=2)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "hlM1ruGNE_Gu"
+ },
+ "source": [
+ "LOGISTIC REGRESSION (as it is binary classification we will use logistic regression)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "3tnvMA0eFCdB",
+ "outputId": "5d9ce1cb-8e1d-4e0e-e9fd-0c0e5b5219c8"
+ },
+ "source": [
+ "#ploting the sigmoid function using Logistic Regression function\n",
+ "model = LogisticRegression()\n",
+ "model.fit(x_train, y_train)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+ " intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
+ " multi_class='auto', n_jobs=None, penalty='l2',\n",
+ " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
+ " warm_start=False)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 23
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "4TlbKirmGsaH",
+ "outputId": "49adf15e-9235-4483-db90-b1dab800b478"
+ },
+ "source": [
+ "#evaluating the model using accuracy score\n",
+ "x_train_pred = model.predict(x_train)\n",
+ "train_accuracy = accuracy_score(x_train_pred, y_train)\n",
+ "\n",
+ "print('Accuracy of the training data : ', train_accuracy)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Accuracy of the training data : 0.9798677884615384\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "G5g8o5coc8JV"
+ },
+ "source": [
+ "x_new = x_test[7]\n",
+ "\n",
+ "prediction = model.predict(x_new)\n",
+ "print(prediction)\n",
+ "#print(news_dataset[1])\n",
+ "\n",
+ "if (prediction[0]==0):\n",
+ " print('The news is Real')\n",
+ "else:\n",
+ " print('The news is Fake')"
+ ],
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file
diff --git a/MACHINE-LEARNING-BASIC-PROJECTS/README.md b/MACHINE-LEARNING-BASIC-PROJECTS/README.md
new file mode 100644
index 0000000..a0272fe
--- /dev/null
+++ b/MACHINE-LEARNING-BASIC-PROJECTS/README.md
@@ -0,0 +1,3 @@
+# MACHINE LEARNING BASIC PROJECTS
+1. **COVID-19 FAKE NEWS DETECTOR** - A LOGISTIC REGRESSION MODEL TO PREDICT FAKE AND REAL COVID-19 NEWS FROM A DATASET | SUPERVISED LEARNING
+2. **ROCKS vs MINES** - A LOGISTIC REGRESSION MODEL TO PREDICT IF THERE IS A MINE OR ROCKS AT A GIVEN LOCATION | SUPERVISED LEARNING
diff --git a/MACHINE-LEARNING-BASIC-PROJECTS/ROCK_vs_SUBMARINE_PREDICTION.ipynb b/MACHINE-LEARNING-BASIC-PROJECTS/ROCK_vs_SUBMARINE_PREDICTION.ipynb
new file mode 100644
index 0000000..1ec9603
--- /dev/null
+++ b/MACHINE-LEARNING-BASIC-PROJECTS/ROCK_vs_SUBMARINE_PREDICTION.ipynb
@@ -0,0 +1,1327 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "ROCK vs SUBMARINE - PREDICTION.ipynb",
+ "provenance": [],
+ "authorship_tag": "ABX9TyNaCvEwNSgO685poUwfrwar",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "dHyh_thlbgQ1"
+ },
+ "source": [
+ "#importing all libraries and dependencies\n",
+ "import pandas as pd\n",
+ "import numpy as np #Numpy array\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.metrics import accuracy_score\n"
+ ],
+ "execution_count": 26,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 226
+ },
+ "id": "L-0cPQ3-fM_n",
+ "outputId": "e7309791-4be1-4b6d-fa64-a4b7e68d2cdb"
+ },
+ "source": [
+ "#reading the csv file\n",
+ "dataset = pd.read_csv('/content/Copy of sonar data.csv' , header=None)\n",
+ "dataset.head()"
+ ],
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 8 | \n",
+ " 9 | \n",
+ " 10 | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 13 | \n",
+ " 14 | \n",
+ " 15 | \n",
+ " 16 | \n",
+ " 17 | \n",
+ " 18 | \n",
+ " 19 | \n",
+ " 20 | \n",
+ " 21 | \n",
+ " 22 | \n",
+ " 23 | \n",
+ " 24 | \n",
+ " 25 | \n",
+ " 26 | \n",
+ " 27 | \n",
+ " 28 | \n",
+ " 29 | \n",
+ " 30 | \n",
+ " 31 | \n",
+ " 32 | \n",
+ " 33 | \n",
+ " 34 | \n",
+ " 35 | \n",
+ " 36 | \n",
+ " 37 | \n",
+ " 38 | \n",
+ " 39 | \n",
+ " 40 | \n",
+ " 41 | \n",
+ " 42 | \n",
+ " 43 | \n",
+ " 44 | \n",
+ " 45 | \n",
+ " 46 | \n",
+ " 47 | \n",
+ " 48 | \n",
+ " 49 | \n",
+ " 50 | \n",
+ " 51 | \n",
+ " 52 | \n",
+ " 53 | \n",
+ " 54 | \n",
+ " 55 | \n",
+ " 56 | \n",
+ " 57 | \n",
+ " 58 | \n",
+ " 59 | \n",
+ " 60 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.0200 | \n",
+ " 0.0371 | \n",
+ " 0.0428 | \n",
+ " 0.0207 | \n",
+ " 0.0954 | \n",
+ " 0.0986 | \n",
+ " 0.1539 | \n",
+ " 0.1601 | \n",
+ " 0.3109 | \n",
+ " 0.2111 | \n",
+ " 0.1609 | \n",
+ " 0.1582 | \n",
+ " 0.2238 | \n",
+ " 0.0645 | \n",
+ " 0.0660 | \n",
+ " 0.2273 | \n",
+ " 0.3100 | \n",
+ " 0.2999 | \n",
+ " 0.5078 | \n",
+ " 0.4797 | \n",
+ " 0.5783 | \n",
+ " 0.5071 | \n",
+ " 0.4328 | \n",
+ " 0.5550 | \n",
+ " 0.6711 | \n",
+ " 0.6415 | \n",
+ " 0.7104 | \n",
+ " 0.8080 | \n",
+ " 0.6791 | \n",
+ " 0.3857 | \n",
+ " 0.1307 | \n",
+ " 0.2604 | \n",
+ " 0.5121 | \n",
+ " 0.7547 | \n",
+ " 0.8537 | \n",
+ " 0.8507 | \n",
+ " 0.6692 | \n",
+ " 0.6097 | \n",
+ " 0.4943 | \n",
+ " 0.2744 | \n",
+ " 0.0510 | \n",
+ " 0.2834 | \n",
+ " 0.2825 | \n",
+ " 0.4256 | \n",
+ " 0.2641 | \n",
+ " 0.1386 | \n",
+ " 0.1051 | \n",
+ " 0.1343 | \n",
+ " 0.0383 | \n",
+ " 0.0324 | \n",
+ " 0.0232 | \n",
+ " 0.0027 | \n",
+ " 0.0065 | \n",
+ " 0.0159 | \n",
+ " 0.0072 | \n",
+ " 0.0167 | \n",
+ " 0.0180 | \n",
+ " 0.0084 | \n",
+ " 0.0090 | \n",
+ " 0.0032 | \n",
+ " R | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.0453 | \n",
+ " 0.0523 | \n",
+ " 0.0843 | \n",
+ " 0.0689 | \n",
+ " 0.1183 | \n",
+ " 0.2583 | \n",
+ " 0.2156 | \n",
+ " 0.3481 | \n",
+ " 0.3337 | \n",
+ " 0.2872 | \n",
+ " 0.4918 | \n",
+ " 0.6552 | \n",
+ " 0.6919 | \n",
+ " 0.7797 | \n",
+ " 0.7464 | \n",
+ " 0.9444 | \n",
+ " 1.0000 | \n",
+ " 0.8874 | \n",
+ " 0.8024 | \n",
+ " 0.7818 | \n",
+ " 0.5212 | \n",
+ " 0.4052 | \n",
+ " 0.3957 | \n",
+ " 0.3914 | \n",
+ " 0.3250 | \n",
+ " 0.3200 | \n",
+ " 0.3271 | \n",
+ " 0.2767 | \n",
+ " 0.4423 | \n",
+ " 0.2028 | \n",
+ " 0.3788 | \n",
+ " 0.2947 | \n",
+ " 0.1984 | \n",
+ " 0.2341 | \n",
+ " 0.1306 | \n",
+ " 0.4182 | \n",
+ " 0.3835 | \n",
+ " 0.1057 | \n",
+ " 0.1840 | \n",
+ " 0.1970 | \n",
+ " 0.1674 | \n",
+ " 0.0583 | \n",
+ " 0.1401 | \n",
+ " 0.1628 | \n",
+ " 0.0621 | \n",
+ " 0.0203 | \n",
+ " 0.0530 | \n",
+ " 0.0742 | \n",
+ " 0.0409 | \n",
+ " 0.0061 | \n",
+ " 0.0125 | \n",
+ " 0.0084 | \n",
+ " 0.0089 | \n",
+ " 0.0048 | \n",
+ " 0.0094 | \n",
+ " 0.0191 | \n",
+ " 0.0140 | \n",
+ " 0.0049 | \n",
+ " 0.0052 | \n",
+ " 0.0044 | \n",
+ " R | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.0262 | \n",
+ " 0.0582 | \n",
+ " 0.1099 | \n",
+ " 0.1083 | \n",
+ " 0.0974 | \n",
+ " 0.2280 | \n",
+ " 0.2431 | \n",
+ " 0.3771 | \n",
+ " 0.5598 | \n",
+ " 0.6194 | \n",
+ " 0.6333 | \n",
+ " 0.7060 | \n",
+ " 0.5544 | \n",
+ " 0.5320 | \n",
+ " 0.6479 | \n",
+ " 0.6931 | \n",
+ " 0.6759 | \n",
+ " 0.7551 | \n",
+ " 0.8929 | \n",
+ " 0.8619 | \n",
+ " 0.7974 | \n",
+ " 0.6737 | \n",
+ " 0.4293 | \n",
+ " 0.3648 | \n",
+ " 0.5331 | \n",
+ " 0.2413 | \n",
+ " 0.5070 | \n",
+ " 0.8533 | \n",
+ " 0.6036 | \n",
+ " 0.8514 | \n",
+ " 0.8512 | \n",
+ " 0.5045 | \n",
+ " 0.1862 | \n",
+ " 0.2709 | \n",
+ " 0.4232 | \n",
+ " 0.3043 | \n",
+ " 0.6116 | \n",
+ " 0.6756 | \n",
+ " 0.5375 | \n",
+ " 0.4719 | \n",
+ " 0.4647 | \n",
+ " 0.2587 | \n",
+ " 0.2129 | \n",
+ " 0.2222 | \n",
+ " 0.2111 | \n",
+ " 0.0176 | \n",
+ " 0.1348 | \n",
+ " 0.0744 | \n",
+ " 0.0130 | \n",
+ " 0.0106 | \n",
+ " 0.0033 | \n",
+ " 0.0232 | \n",
+ " 0.0166 | \n",
+ " 0.0095 | \n",
+ " 0.0180 | \n",
+ " 0.0244 | \n",
+ " 0.0316 | \n",
+ " 0.0164 | \n",
+ " 0.0095 | \n",
+ " 0.0078 | \n",
+ " R | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.0100 | \n",
+ " 0.0171 | \n",
+ " 0.0623 | \n",
+ " 0.0205 | \n",
+ " 0.0205 | \n",
+ " 0.0368 | \n",
+ " 0.1098 | \n",
+ " 0.1276 | \n",
+ " 0.0598 | \n",
+ " 0.1264 | \n",
+ " 0.0881 | \n",
+ " 0.1992 | \n",
+ " 0.0184 | \n",
+ " 0.2261 | \n",
+ " 0.1729 | \n",
+ " 0.2131 | \n",
+ " 0.0693 | \n",
+ " 0.2281 | \n",
+ " 0.4060 | \n",
+ " 0.3973 | \n",
+ " 0.2741 | \n",
+ " 0.3690 | \n",
+ " 0.5556 | \n",
+ " 0.4846 | \n",
+ " 0.3140 | \n",
+ " 0.5334 | \n",
+ " 0.5256 | \n",
+ " 0.2520 | \n",
+ " 0.2090 | \n",
+ " 0.3559 | \n",
+ " 0.6260 | \n",
+ " 0.7340 | \n",
+ " 0.6120 | \n",
+ " 0.3497 | \n",
+ " 0.3953 | \n",
+ " 0.3012 | \n",
+ " 0.5408 | \n",
+ " 0.8814 | \n",
+ " 0.9857 | \n",
+ " 0.9167 | \n",
+ " 0.6121 | \n",
+ " 0.5006 | \n",
+ " 0.3210 | \n",
+ " 0.3202 | \n",
+ " 0.4295 | \n",
+ " 0.3654 | \n",
+ " 0.2655 | \n",
+ " 0.1576 | \n",
+ " 0.0681 | \n",
+ " 0.0294 | \n",
+ " 0.0241 | \n",
+ " 0.0121 | \n",
+ " 0.0036 | \n",
+ " 0.0150 | \n",
+ " 0.0085 | \n",
+ " 0.0073 | \n",
+ " 0.0050 | \n",
+ " 0.0044 | \n",
+ " 0.0040 | \n",
+ " 0.0117 | \n",
+ " R | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.0762 | \n",
+ " 0.0666 | \n",
+ " 0.0481 | \n",
+ " 0.0394 | \n",
+ " 0.0590 | \n",
+ " 0.0649 | \n",
+ " 0.1209 | \n",
+ " 0.2467 | \n",
+ " 0.3564 | \n",
+ " 0.4459 | \n",
+ " 0.4152 | \n",
+ " 0.3952 | \n",
+ " 0.4256 | \n",
+ " 0.4135 | \n",
+ " 0.4528 | \n",
+ " 0.5326 | \n",
+ " 0.7306 | \n",
+ " 0.6193 | \n",
+ " 0.2032 | \n",
+ " 0.4636 | \n",
+ " 0.4148 | \n",
+ " 0.4292 | \n",
+ " 0.5730 | \n",
+ " 0.5399 | \n",
+ " 0.3161 | \n",
+ " 0.2285 | \n",
+ " 0.6995 | \n",
+ " 1.0000 | \n",
+ " 0.7262 | \n",
+ " 0.4724 | \n",
+ " 0.5103 | \n",
+ " 0.5459 | \n",
+ " 0.2881 | \n",
+ " 0.0981 | \n",
+ " 0.1951 | \n",
+ " 0.4181 | \n",
+ " 0.4604 | \n",
+ " 0.3217 | \n",
+ " 0.2828 | \n",
+ " 0.2430 | \n",
+ " 0.1979 | \n",
+ " 0.2444 | \n",
+ " 0.1847 | \n",
+ " 0.0841 | \n",
+ " 0.0692 | \n",
+ " 0.0528 | \n",
+ " 0.0357 | \n",
+ " 0.0085 | \n",
+ " 0.0230 | \n",
+ " 0.0046 | \n",
+ " 0.0156 | \n",
+ " 0.0031 | \n",
+ " 0.0054 | \n",
+ " 0.0105 | \n",
+ " 0.0110 | \n",
+ " 0.0015 | \n",
+ " 0.0072 | \n",
+ " 0.0048 | \n",
+ " 0.0107 | \n",
+ " 0.0094 | \n",
+ " R | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 4 ... 56 57 58 59 60\n",
+ "0 0.0200 0.0371 0.0428 0.0207 0.0954 ... 0.0180 0.0084 0.0090 0.0032 R\n",
+ "1 0.0453 0.0523 0.0843 0.0689 0.1183 ... 0.0140 0.0049 0.0052 0.0044 R\n",
+ "2 0.0262 0.0582 0.1099 0.1083 0.0974 ... 0.0316 0.0164 0.0095 0.0078 R\n",
+ "3 0.0100 0.0171 0.0623 0.0205 0.0205 ... 0.0050 0.0044 0.0040 0.0117 R\n",
+ "4 0.0762 0.0666 0.0481 0.0394 0.0590 ... 0.0072 0.0048 0.0107 0.0094 R\n",
+ "\n",
+ "[5 rows x 61 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "jQrRiywvg5pb",
+ "outputId": "07d3a80e-c986-4720-8f4a-c6e223276967"
+ },
+ "source": [
+ "#finding number of rows and columns\n",
+ "dataset.shape"
+ ],
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(208, 61)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 320
+ },
+ "id": "kVPTbxJWhtv7",
+ "outputId": "5e6fbacc-575b-4748-f912-848bf80c68e9"
+ },
+ "source": [
+ "#some statistical data like mean, standard deviation for our data\n",
+ "dataset.describe()"
+ ],
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 8 | \n",
+ " 9 | \n",
+ " 10 | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 13 | \n",
+ " 14 | \n",
+ " 15 | \n",
+ " 16 | \n",
+ " 17 | \n",
+ " 18 | \n",
+ " 19 | \n",
+ " 20 | \n",
+ " 21 | \n",
+ " 22 | \n",
+ " 23 | \n",
+ " 24 | \n",
+ " 25 | \n",
+ " 26 | \n",
+ " 27 | \n",
+ " 28 | \n",
+ " 29 | \n",
+ " 30 | \n",
+ " 31 | \n",
+ " 32 | \n",
+ " 33 | \n",
+ " 34 | \n",
+ " 35 | \n",
+ " 36 | \n",
+ " 37 | \n",
+ " 38 | \n",
+ " 39 | \n",
+ " 40 | \n",
+ " 41 | \n",
+ " 42 | \n",
+ " 43 | \n",
+ " 44 | \n",
+ " 45 | \n",
+ " 46 | \n",
+ " 47 | \n",
+ " 48 | \n",
+ " 49 | \n",
+ " 50 | \n",
+ " 51 | \n",
+ " 52 | \n",
+ " 53 | \n",
+ " 54 | \n",
+ " 55 | \n",
+ " 56 | \n",
+ " 57 | \n",
+ " 58 | \n",
+ " 59 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ " 208.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 0.029164 | \n",
+ " 0.038437 | \n",
+ " 0.043832 | \n",
+ " 0.053892 | \n",
+ " 0.075202 | \n",
+ " 0.104570 | \n",
+ " 0.121747 | \n",
+ " 0.134799 | \n",
+ " 0.178003 | \n",
+ " 0.208259 | \n",
+ " 0.236013 | \n",
+ " 0.250221 | \n",
+ " 0.273305 | \n",
+ " 0.296568 | \n",
+ " 0.320201 | \n",
+ " 0.378487 | \n",
+ " 0.415983 | \n",
+ " 0.452318 | \n",
+ " 0.504812 | \n",
+ " 0.563047 | \n",
+ " 0.609060 | \n",
+ " 0.624275 | \n",
+ " 0.646975 | \n",
+ " 0.672654 | \n",
+ " 0.675424 | \n",
+ " 0.699866 | \n",
+ " 0.702155 | \n",
+ " 0.694024 | \n",
+ " 0.642074 | \n",
+ " 0.580928 | \n",
+ " 0.504475 | \n",
+ " 0.439040 | \n",
+ " 0.417220 | \n",
+ " 0.403233 | \n",
+ " 0.392571 | \n",
+ " 0.384848 | \n",
+ " 0.363807 | \n",
+ " 0.339657 | \n",
+ " 0.325800 | \n",
+ " 0.311207 | \n",
+ " 0.289252 | \n",
+ " 0.278293 | \n",
+ " 0.246542 | \n",
+ " 0.214075 | \n",
+ " 0.197232 | \n",
+ " 0.160631 | \n",
+ " 0.122453 | \n",
+ " 0.091424 | \n",
+ " 0.051929 | \n",
+ " 0.020424 | \n",
+ " 0.016069 | \n",
+ " 0.013420 | \n",
+ " 0.010709 | \n",
+ " 0.010941 | \n",
+ " 0.009290 | \n",
+ " 0.008222 | \n",
+ " 0.007820 | \n",
+ " 0.007949 | \n",
+ " 0.007941 | \n",
+ " 0.006507 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 0.022991 | \n",
+ " 0.032960 | \n",
+ " 0.038428 | \n",
+ " 0.046528 | \n",
+ " 0.055552 | \n",
+ " 0.059105 | \n",
+ " 0.061788 | \n",
+ " 0.085152 | \n",
+ " 0.118387 | \n",
+ " 0.134416 | \n",
+ " 0.132705 | \n",
+ " 0.140072 | \n",
+ " 0.140962 | \n",
+ " 0.164474 | \n",
+ " 0.205427 | \n",
+ " 0.232650 | \n",
+ " 0.263677 | \n",
+ " 0.261529 | \n",
+ " 0.257988 | \n",
+ " 0.262653 | \n",
+ " 0.257818 | \n",
+ " 0.255883 | \n",
+ " 0.250175 | \n",
+ " 0.239116 | \n",
+ " 0.244926 | \n",
+ " 0.237228 | \n",
+ " 0.245657 | \n",
+ " 0.237189 | \n",
+ " 0.240250 | \n",
+ " 0.220749 | \n",
+ " 0.213992 | \n",
+ " 0.213237 | \n",
+ " 0.206513 | \n",
+ " 0.231242 | \n",
+ " 0.259132 | \n",
+ " 0.264121 | \n",
+ " 0.239912 | \n",
+ " 0.212973 | \n",
+ " 0.199075 | \n",
+ " 0.178662 | \n",
+ " 0.171111 | \n",
+ " 0.168728 | \n",
+ " 0.138993 | \n",
+ " 0.133291 | \n",
+ " 0.151628 | \n",
+ " 0.133938 | \n",
+ " 0.086953 | \n",
+ " 0.062417 | \n",
+ " 0.035954 | \n",
+ " 0.013665 | \n",
+ " 0.012008 | \n",
+ " 0.009634 | \n",
+ " 0.007060 | \n",
+ " 0.007301 | \n",
+ " 0.007088 | \n",
+ " 0.005736 | \n",
+ " 0.005785 | \n",
+ " 0.006470 | \n",
+ " 0.006181 | \n",
+ " 0.005031 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 0.001500 | \n",
+ " 0.000600 | \n",
+ " 0.001500 | \n",
+ " 0.005800 | \n",
+ " 0.006700 | \n",
+ " 0.010200 | \n",
+ " 0.003300 | \n",
+ " 0.005500 | \n",
+ " 0.007500 | \n",
+ " 0.011300 | \n",
+ " 0.028900 | \n",
+ " 0.023600 | \n",
+ " 0.018400 | \n",
+ " 0.027300 | \n",
+ " 0.003100 | \n",
+ " 0.016200 | \n",
+ " 0.034900 | \n",
+ " 0.037500 | \n",
+ " 0.049400 | \n",
+ " 0.065600 | \n",
+ " 0.051200 | \n",
+ " 0.021900 | \n",
+ " 0.056300 | \n",
+ " 0.023900 | \n",
+ " 0.024000 | \n",
+ " 0.092100 | \n",
+ " 0.048100 | \n",
+ " 0.028400 | \n",
+ " 0.014400 | \n",
+ " 0.061300 | \n",
+ " 0.048200 | \n",
+ " 0.040400 | \n",
+ " 0.047700 | \n",
+ " 0.021200 | \n",
+ " 0.022300 | \n",
+ " 0.008000 | \n",
+ " 0.035100 | \n",
+ " 0.038300 | \n",
+ " 0.037100 | \n",
+ " 0.011700 | \n",
+ " 0.036000 | \n",
+ " 0.005600 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000800 | \n",
+ " 0.000500 | \n",
+ " 0.001000 | \n",
+ " 0.000600 | \n",
+ " 0.000400 | \n",
+ " 0.000300 | \n",
+ " 0.000300 | \n",
+ " 0.000100 | \n",
+ " 0.000600 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 0.013350 | \n",
+ " 0.016450 | \n",
+ " 0.018950 | \n",
+ " 0.024375 | \n",
+ " 0.038050 | \n",
+ " 0.067025 | \n",
+ " 0.080900 | \n",
+ " 0.080425 | \n",
+ " 0.097025 | \n",
+ " 0.111275 | \n",
+ " 0.129250 | \n",
+ " 0.133475 | \n",
+ " 0.166125 | \n",
+ " 0.175175 | \n",
+ " 0.164625 | \n",
+ " 0.196300 | \n",
+ " 0.205850 | \n",
+ " 0.242075 | \n",
+ " 0.299075 | \n",
+ " 0.350625 | \n",
+ " 0.399725 | \n",
+ " 0.406925 | \n",
+ " 0.450225 | \n",
+ " 0.540725 | \n",
+ " 0.525800 | \n",
+ " 0.544175 | \n",
+ " 0.531900 | \n",
+ " 0.534775 | \n",
+ " 0.463700 | \n",
+ " 0.411400 | \n",
+ " 0.345550 | \n",
+ " 0.281400 | \n",
+ " 0.257875 | \n",
+ " 0.217575 | \n",
+ " 0.179375 | \n",
+ " 0.154350 | \n",
+ " 0.160100 | \n",
+ " 0.174275 | \n",
+ " 0.173975 | \n",
+ " 0.186450 | \n",
+ " 0.163100 | \n",
+ " 0.158900 | \n",
+ " 0.155200 | \n",
+ " 0.126875 | \n",
+ " 0.094475 | \n",
+ " 0.068550 | \n",
+ " 0.064250 | \n",
+ " 0.045125 | \n",
+ " 0.026350 | \n",
+ " 0.011550 | \n",
+ " 0.008425 | \n",
+ " 0.007275 | \n",
+ " 0.005075 | \n",
+ " 0.005375 | \n",
+ " 0.004150 | \n",
+ " 0.004400 | \n",
+ " 0.003700 | \n",
+ " 0.003600 | \n",
+ " 0.003675 | \n",
+ " 0.003100 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 0.022800 | \n",
+ " 0.030800 | \n",
+ " 0.034300 | \n",
+ " 0.044050 | \n",
+ " 0.062500 | \n",
+ " 0.092150 | \n",
+ " 0.106950 | \n",
+ " 0.112100 | \n",
+ " 0.152250 | \n",
+ " 0.182400 | \n",
+ " 0.224800 | \n",
+ " 0.249050 | \n",
+ " 0.263950 | \n",
+ " 0.281100 | \n",
+ " 0.281700 | \n",
+ " 0.304700 | \n",
+ " 0.308400 | \n",
+ " 0.368300 | \n",
+ " 0.434950 | \n",
+ " 0.542500 | \n",
+ " 0.617700 | \n",
+ " 0.664900 | \n",
+ " 0.699700 | \n",
+ " 0.698500 | \n",
+ " 0.721100 | \n",
+ " 0.754500 | \n",
+ " 0.745600 | \n",
+ " 0.731900 | \n",
+ " 0.680800 | \n",
+ " 0.607150 | \n",
+ " 0.490350 | \n",
+ " 0.429600 | \n",
+ " 0.391200 | \n",
+ " 0.351050 | \n",
+ " 0.312750 | \n",
+ " 0.321150 | \n",
+ " 0.306300 | \n",
+ " 0.312700 | \n",
+ " 0.283500 | \n",
+ " 0.278050 | \n",
+ " 0.259500 | \n",
+ " 0.245100 | \n",
+ " 0.222550 | \n",
+ " 0.177700 | \n",
+ " 0.148000 | \n",
+ " 0.121350 | \n",
+ " 0.101650 | \n",
+ " 0.078100 | \n",
+ " 0.044700 | \n",
+ " 0.017900 | \n",
+ " 0.013900 | \n",
+ " 0.011400 | \n",
+ " 0.009550 | \n",
+ " 0.009300 | \n",
+ " 0.007500 | \n",
+ " 0.006850 | \n",
+ " 0.005950 | \n",
+ " 0.005800 | \n",
+ " 0.006400 | \n",
+ " 0.005300 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 0.035550 | \n",
+ " 0.047950 | \n",
+ " 0.057950 | \n",
+ " 0.064500 | \n",
+ " 0.100275 | \n",
+ " 0.134125 | \n",
+ " 0.154000 | \n",
+ " 0.169600 | \n",
+ " 0.233425 | \n",
+ " 0.268700 | \n",
+ " 0.301650 | \n",
+ " 0.331250 | \n",
+ " 0.351250 | \n",
+ " 0.386175 | \n",
+ " 0.452925 | \n",
+ " 0.535725 | \n",
+ " 0.659425 | \n",
+ " 0.679050 | \n",
+ " 0.731400 | \n",
+ " 0.809325 | \n",
+ " 0.816975 | \n",
+ " 0.831975 | \n",
+ " 0.848575 | \n",
+ " 0.872175 | \n",
+ " 0.873725 | \n",
+ " 0.893800 | \n",
+ " 0.917100 | \n",
+ " 0.900275 | \n",
+ " 0.852125 | \n",
+ " 0.735175 | \n",
+ " 0.641950 | \n",
+ " 0.580300 | \n",
+ " 0.556125 | \n",
+ " 0.596125 | \n",
+ " 0.593350 | \n",
+ " 0.556525 | \n",
+ " 0.518900 | \n",
+ " 0.440550 | \n",
+ " 0.434900 | \n",
+ " 0.424350 | \n",
+ " 0.387525 | \n",
+ " 0.384250 | \n",
+ " 0.324525 | \n",
+ " 0.271750 | \n",
+ " 0.231550 | \n",
+ " 0.200375 | \n",
+ " 0.154425 | \n",
+ " 0.120100 | \n",
+ " 0.068525 | \n",
+ " 0.025275 | \n",
+ " 0.020825 | \n",
+ " 0.016725 | \n",
+ " 0.014900 | \n",
+ " 0.014500 | \n",
+ " 0.012100 | \n",
+ " 0.010575 | \n",
+ " 0.010425 | \n",
+ " 0.010350 | \n",
+ " 0.010325 | \n",
+ " 0.008525 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 0.137100 | \n",
+ " 0.233900 | \n",
+ " 0.305900 | \n",
+ " 0.426400 | \n",
+ " 0.401000 | \n",
+ " 0.382300 | \n",
+ " 0.372900 | \n",
+ " 0.459000 | \n",
+ " 0.682800 | \n",
+ " 0.710600 | \n",
+ " 0.734200 | \n",
+ " 0.706000 | \n",
+ " 0.713100 | \n",
+ " 0.997000 | \n",
+ " 1.000000 | \n",
+ " 0.998800 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.965700 | \n",
+ " 0.930600 | \n",
+ " 1.000000 | \n",
+ " 0.964700 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.949700 | \n",
+ " 1.000000 | \n",
+ " 0.985700 | \n",
+ " 0.929700 | \n",
+ " 0.899500 | \n",
+ " 0.824600 | \n",
+ " 0.773300 | \n",
+ " 0.776200 | \n",
+ " 0.703400 | \n",
+ " 0.729200 | \n",
+ " 0.552200 | \n",
+ " 0.333900 | \n",
+ " 0.198100 | \n",
+ " 0.082500 | \n",
+ " 0.100400 | \n",
+ " 0.070900 | \n",
+ " 0.039000 | \n",
+ " 0.035200 | \n",
+ " 0.044700 | \n",
+ " 0.039400 | \n",
+ " 0.035500 | \n",
+ " 0.044000 | \n",
+ " 0.036400 | \n",
+ " 0.043900 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 ... 57 58 59\n",
+ "count 208.000000 208.000000 208.000000 ... 208.000000 208.000000 208.000000\n",
+ "mean 0.029164 0.038437 0.043832 ... 0.007949 0.007941 0.006507\n",
+ "std 0.022991 0.032960 0.038428 ... 0.006470 0.006181 0.005031\n",
+ "min 0.001500 0.000600 0.001500 ... 0.000300 0.000100 0.000600\n",
+ "25% 0.013350 0.016450 0.018950 ... 0.003600 0.003675 0.003100\n",
+ "50% 0.022800 0.030800 0.034300 ... 0.005800 0.006400 0.005300\n",
+ "75% 0.035550 0.047950 0.057950 ... 0.010350 0.010325 0.008525\n",
+ "max 0.137100 0.233900 0.305900 ... 0.044000 0.036400 0.043900\n",
+ "\n",
+ "[8 rows x 60 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 14
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "uOD59QvKiF5W",
+ "outputId": "12950db8-6d91-47df-9564-47dd520363f7"
+ },
+ "source": [
+ "#counts the number of each type of entry in column with index 60\n",
+ "dataset[60].value_counts()"
+ ],
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "M 111\n",
+ "R 97\n",
+ "Name: 60, dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 15
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "0gVXl7UIimJ8"
+ },
+ "source": [
+ "#extracting the data only and separating the label\n",
+ "x = dataset.drop(columns=60, axis=1)\n",
+ "y = dataset[60]"
+ ],
+ "execution_count": 16,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "uAjoi8G_jg5u",
+ "outputId": "ed9fb33c-6064-4c44-fa49-7f9dd44abc9f"
+ },
+ "source": [
+ "#dividing data for taining and testing\n",
+ "x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.05, stratify = y, random_state = 1)\n",
+ "print(x.shape, x_test.shape, x_train.shape)"
+ ],
+ "execution_count": 18,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(208, 60) (11, 60) (197, 60)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "R-LOlPfZklZL",
+ "outputId": "8aaecb99-1300-4417-9c27-636a9b66eeee"
+ },
+ "source": [
+ "#logistic Regression Model\n",
+ "model = LogisticRegression()\n",
+ "model.fit(x_train,y_train)\n"
+ ],
+ "execution_count": 28,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+ " intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
+ " multi_class='auto', n_jobs=None, penalty='l2',\n",
+ " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
+ " warm_start=False)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 28
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ha5_aCEtmfDE",
+ "outputId": "d3340114-785d-4e94-a1cb-d3225ed86de5"
+ },
+ "source": [
+ "#accuracy check of training data\n",
+ "accuracy_train = accuracy_score(model.predict(x_train), y_train)\n",
+ "print(\"Accuracy of the model(train) = \", accuracy_train*100,\"%\")"
+ ],
+ "execution_count": 36,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Accuracy of the model(train) = 83.75634517766497 %\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "a-Heoq5LpGuE",
+ "outputId": "9be2c12a-6bec-41e4-9fac-bcafd6f25d54"
+ },
+ "source": [
+ "#accuracy check of test data\n",
+ "accuracy_test = accuracy_score(model.predict(x_test),y_test)\n",
+ "print(\"Accuracy of the model(test) = \", accuracy_test*100,\"%\")"
+ ],
+ "execution_count": 37,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Accuracy of the model(test) = 72.72727272727273 %\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "I0OIdH6pxFfN",
+ "outputId": "fa163ef5-563a-41a3-ffcb-a401a81aa642"
+ },
+ "source": [
+ "#testing and making prediction\n",
+ "input_data = (0.0307,0.0523,0.0653,0.0521,0.0611,0.0577,0.0665,0.0664,0.1460,0.2792,0.3877,0.4992,0.4981,0.4972,0.5607,0.7339,0.8230,0.9173,0.9975,0.9911,0.8240,0.6498,0.5980,0.4862,0.3150,0.1543,0.0989,0.0284,0.1008,0.2636,0.2694,0.2930,0.2925,0.3998,0.3660,0.3172,0.4609,0.4374,0.1820,0.3376,0.6202,0.4448,0.1863,0.1420,0.0589,0.0576,0.0672,0.0269,0.0245,0.0190,0.0063,0.0321,0.0189,0.0137,0.0277,0.0152,0.0052,0.0121,0.0124,0.0055)\n",
+ " \n",
+ " #converting to numpy array as it is faster\n",
+ "input_np = np.asarray(input_data)\n",
+ " #reshaping\n",
+ "input_np_reshape = input_np.reshape(1,-1)\n",
+ "\n",
+ "#printing our result\n",
+ "\n",
+ "print(model.predict(input_np_reshape)) # 'M' - Mines and 'R' - Rocks"
+ ],
+ "execution_count": 40,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "['M']\n"
+ ]
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file