nerofeeva2001 · nerofeeva2001 · Feb 9, 2024 · Feb 9, 2024 · Feb 9, 2024 · Feb 9, 2024
diff --git a/lecture_1_intro_knn/homework/KNN.ipynb b/lecture_1_intro_knn/homework/KNN.ipynb
diff --git a/lecture_1_intro_knn/homework/knn.py b/lecture_1_intro_knn/homework/knn.py
@@ -1,143 +1,54 @@
 import numpy as np
-
-
 class KNNClassifier:
-    """
-    K-neariest-neighbor classifier using L1 loss
-    """
-
     def __init__(self, k=1):
         self.k = k
-
-
     def fit(self, X, y):
-        self.train_X = X
-        self.train_y = y
-
-
-    def predict(self, X, n_loops=0):
-        """
-        Uses the KNN model to predict clases for the data samples provided
-
-        Arguments:
-        X, np array (num_samples, num_features) - samples to run
-           through the model
-        num_loops, int - which implementation to use
-
-        Returns:
-        predictions, np array of ints (num_samples) - predicted class
-           for each sample
-        """
-
-        if n_loops == 0:
-            distances = self.compute_distances_no_loops(X)
-        elif n_loops == 1:
-            distances = self.compute_distances_one_loops(X)
-        else:
-            distances = self.compute_distances_two_loops(X)
-
-        if len(np.unique(self.train_y)) == 2:
-            return self.predict_labels_binary(distances)
-        else:
-            return self.predict_labels_multiclass(distances)
-
-
+        self.train_X = np.array(X, dtype=int)
+        self.train_y = np.array(y, dtype=int)      
     def compute_distances_two_loops(self, X):
-        """
-        Computes L1 distance from every sample of X to every training sample
-        Uses simplest implementation with 2 Python loops
-
-        Arguments:
-        X, np array (num_test_samples, num_features) - samples to run
-
-        Returns:
-        distances, np array (num_test_samples, num_train_samples) - array
-           with distances between each test and each train sample
-        """
-
-        """
-        YOUR CODE IS HERE
-        """
-        pass
-
-
+        num_test = X.shape[0]
+        num_train = self.train_X.shape[0]
+        distances = np.zeros((num_test, num_train))
+        for i in range(num_test):
+            for j in range(num_train):
+                distances[i, j] = np.sum(np.abs(X[i] - self.train_X[j]))
+        return distances.astype(int)       
     def compute_distances_one_loop(self, X):
-        """
-        Computes L1 distance from every sample of X to every training sample
-        Vectorizes some of the calculations, so only 1 loop is used
-
-        Arguments:
-        X, np array (num_test_samples, num_features) - samples to run
-
-        Returns:
-        distances, np array (num_test_samples, num_train_samples) - array
-           with distances between each test and each train sample
-        """
-
-        """
-        YOUR CODE IS HERE
-        """
-        pass
-
-
+        num_test = X.shape[0]
+        num_train = self.train_X.shape[0]
+        distances = np.zeros((num_test, num_train))
+        for i in range(num_test):
+            distances[i, :] = np.sum(np.abs(self.train_X - X[i, :]), axis=1)
+        return distances.astype(int) 
     def compute_distances_no_loops(self, X):
-        """
-        Computes L1 distance from every sample of X to every training sample
-        Fully vectorizes the calculations using numpy
-
-        Arguments:
-        X, np array (num_test_samples, num_features) - samples to run
-
-        Returns:
-        distances, np array (num_test_samples, num_train_samples) - array
-           with distances between each test and each train sample
-        """
-
-        """
-        YOUR CODE IS HERE
-        """
-        pass
-
-
-    def predict_labels_binary(self, distances):
-        """
-        Returns model predictions for binary classification case
-
-        Arguments:
-        distances, np array (num_test_samples, num_train_samples) - array
-           with distances between each test and each train sample
-        Returns:
-        pred, np array of bool (num_test_samples) - binary predictions 
-           for every test sample
-        """
-
-        n_train = distances.shape[1]
-        n_test = distances.shape[0]
-        prediction = np.zeros(n_test)
-
-        """
-        YOUR CODE IS HERE
-        """
-        pass
-
-
+        num_test = X.shape[0]
+        num_train = self.train_X.shape[0]
+        distances = np.abs(X[:, np.newaxis, :] - self.train_X).sum(axis=2)
+        return distances.astype(int) 
+    def predict(self, X, n_loops=0):
+          if n_loops == 0:
+              distances = self.compute_distances_no_loops(X) 
+          elif n_loops == 1:
+              distances = self.compute_distances_one_loop(X)
+          else:  # n_loops == 2
+              distances = self.compute_distances_two_loops(X)
+          if len(np.unique(self.train_y)) == 2:
+              prediction = self.predict_labels_binary(distances)          
+          else:
+              prediction = self.predict_labels_multiclass(distances)
+          return prediction.astype(int) 
+    def predict_labels_binary(self, distances)
+        num_test = distances.shape[0]
+        prediction = np.zeros(num_test) 
+        for i in range(num_test):
+            closest_y = self.train_y[np.argsort(distances[i])[:self.k]]
+            print(closest_y)
+            prediction[i] = np.argmax(np.bincount(closest_y))
+            return prediction.astype(int)            
     def predict_labels_multiclass(self, distances):
-        """
-        Returns model predictions for multi-class classification case
-
-        Arguments:
-        distances, np array (num_test_samples, num_train_samples) - array
-           with distances between each test and each train sample
-        Returns:
-        pred, np array of int (num_test_samples) - predicted class index 
-           for every test sample
-        """
-
-        n_train = distances.shape[0]
-        n_test = distances.shape[0]
-        prediction = np.zeros(n_test, np.int)
-
-        """
-        YOUR CODE IS HERE
-        """
-        pass
+        num_test = distances.shape[0]
+        y_pred = np.zeros(num_test, dtype=int)
+        for i in range(num_test):
+            closest_y = self.train_y[np.argsort(distances[i, :])[:self.k]]
+            y_pred[i] = np.argmax(np.bincount(closest_y))
+            return prediction.astype(int) 
diff --git a/lecture_1_intro_knn/homework/metrics.py b/lecture_1_intro_knn/homework/metrics.py
@@ -2,86 +2,35 @@
 
 
 def binary_classification_metrics(y_pred, y_true):
-    """
-    Computes metrics for binary classification
-    Arguments:
-    y_pred, np array (num_samples) - model predictions
-    y_true, np array (num_samples) - true labels
-    Returns:
-    precision, recall, f1, accuracy - classification metrics
-    """
-
-    # TODO: implement metrics!
-    # Some helpful links:
-    # https://en.wikipedia.org/wiki/Precision_and_recall
-    # https://en.wikipedia.org/wiki/F1_score
-
-    """
-    YOUR CODE IS HERE
-    """
-    pass
-
+    true_positive = np.sum((y_pred == 1) & (y_true == 1))
+    false_positive = np.sum((y_pred == 1) & (y_true == 0))
+    false_negative = np.sum((y_pred == 0) & (y_true == 1))
+    true_negative = np.sum((y_pred == 0) & (y_true == 0))
+
+    precision = true_positive / (true_positive + false_positive)
+    recall = true_positive / (true_positive + false_negative)
+    f1 = 2 * (precision * recall) / (precision + recall)
+    accuracy = (true_positive + true_negative) / (true_positive + false_positive + false_negative + true_negative)
+
+    return precision, recall, f1, accuracy
 
 def multiclass_accuracy(y_pred, y_true):
-    """
-    Computes metrics for multiclass classification
-    Arguments:
-    y_pred, np array of int (num_samples) - model predictions
-    y_true, np array of int (num_samples) - true labels
-    Returns:
-    accuracy - ratio of accurate predictions to total samples
-    """
-
-    """
-    YOUR CODE IS HERE
-    """
-    pass
+    return np.mean(y_pred == y_true)
 
 
 def r_squared(y_pred, y_true):
-    """
-    Computes r-squared for regression
-    Arguments:
-    y_pred, np array of int (num_samples) - model predictions
-    y_true, np array of int (num_samples) - true values
-    Returns:
-    r2 - r-squared value
-    """
-
-    """
-    YOUR CODE IS HERE
-    """
-    pass
+    ss_res = np.sum((y_true - y_pred) ** 2)
+    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+    r2 = 1 - (ss_res / ss_tot)
+    return r2
 
 
 def mse(y_pred, y_true):
-    """
-    Computes mean squared error
-    Arguments:
-    y_pred, np array of int (num_samples) - model predictions
-    y_true, np array of int (num_samples) - true values
-    Returns:
-    mse - mean squared error
-    """
+    return np.mean((y_pred - y_true) ** 2)
 
-    """
-    YOUR CODE IS HERE
-    """
-    pass
 
 
 def mae(y_pred, y_true):
-    """
-    Computes mean absolut error
-    Arguments:
-    y_pred, np array of int (num_samples) - model predictions
-    y_true, np array of int (num_samples) - true values
-    Returns:
-    mae - mean absolut error
-    """
+    return np.mean(np.abs(y_pred - y_true))
 
-    """
-    YOUR CODE IS HERE
-    """
-    pass
-
+