Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7,839 changes: 7,063 additions & 776 deletions lecture_1_intro_knn/homework/KNN.ipynb

Large diffs are not rendered by default.

179 changes: 45 additions & 134 deletions lecture_1_intro_knn/homework/knn.py
Original file line number Diff line number Diff line change
@@ -1,143 +1,54 @@
import numpy as np


class KNNClassifier:
"""
K-neariest-neighbor classifier using L1 loss
"""

def __init__(self, k=1):
self.k = k


def fit(self, X, y):
self.train_X = X
self.train_y = y


def predict(self, X, n_loops=0):
"""
Uses the KNN model to predict clases for the data samples provided

Arguments:
X, np array (num_samples, num_features) - samples to run
through the model
num_loops, int - which implementation to use

Returns:
predictions, np array of ints (num_samples) - predicted class
for each sample
"""

if n_loops == 0:
distances = self.compute_distances_no_loops(X)
elif n_loops == 1:
distances = self.compute_distances_one_loops(X)
else:
distances = self.compute_distances_two_loops(X)

if len(np.unique(self.train_y)) == 2:
return self.predict_labels_binary(distances)
else:
return self.predict_labels_multiclass(distances)


self.train_X = np.array(X, dtype=int)
self.train_y = np.array(y, dtype=int)
def compute_distances_two_loops(self, X):
"""
Computes L1 distance from every sample of X to every training sample
Uses simplest implementation with 2 Python loops

Arguments:
X, np array (num_test_samples, num_features) - samples to run

Returns:
distances, np array (num_test_samples, num_train_samples) - array
with distances between each test and each train sample
"""

"""
YOUR CODE IS HERE
"""
pass


num_test = X.shape[0]
num_train = self.train_X.shape[0]
distances = np.zeros((num_test, num_train))
for i in range(num_test):
for j in range(num_train):
distances[i, j] = np.sum(np.abs(X[i] - self.train_X[j]))
return distances.astype(int)
def compute_distances_one_loop(self, X):
"""
Computes L1 distance from every sample of X to every training sample
Vectorizes some of the calculations, so only 1 loop is used

Arguments:
X, np array (num_test_samples, num_features) - samples to run

Returns:
distances, np array (num_test_samples, num_train_samples) - array
with distances between each test and each train sample
"""

"""
YOUR CODE IS HERE
"""
pass


num_test = X.shape[0]
num_train = self.train_X.shape[0]
distances = np.zeros((num_test, num_train))
for i in range(num_test):
distances[i, :] = np.sum(np.abs(self.train_X - X[i, :]), axis=1)
return distances.astype(int)
def compute_distances_no_loops(self, X):
"""
Computes L1 distance from every sample of X to every training sample
Fully vectorizes the calculations using numpy

Arguments:
X, np array (num_test_samples, num_features) - samples to run

Returns:
distances, np array (num_test_samples, num_train_samples) - array
with distances between each test and each train sample
"""

"""
YOUR CODE IS HERE
"""
pass


def predict_labels_binary(self, distances):
"""
Returns model predictions for binary classification case

Arguments:
distances, np array (num_test_samples, num_train_samples) - array
with distances between each test and each train sample
Returns:
pred, np array of bool (num_test_samples) - binary predictions
for every test sample
"""

n_train = distances.shape[1]
n_test = distances.shape[0]
prediction = np.zeros(n_test)

"""
YOUR CODE IS HERE
"""
pass


num_test = X.shape[0]
num_train = self.train_X.shape[0]
distances = np.abs(X[:, np.newaxis, :] - self.train_X).sum(axis=2)
return distances.astype(int)
def predict(self, X, n_loops=0):
if n_loops == 0:
distances = self.compute_distances_no_loops(X)
elif n_loops == 1:
distances = self.compute_distances_one_loop(X)
else: # n_loops == 2
distances = self.compute_distances_two_loops(X)
if len(np.unique(self.train_y)) == 2:
prediction = self.predict_labels_binary(distances)
else:
prediction = self.predict_labels_multiclass(distances)
return prediction.astype(int)
def predict_labels_binary(self, distances)
num_test = distances.shape[0]
prediction = np.zeros(num_test)
for i in range(num_test):
closest_y = self.train_y[np.argsort(distances[i])[:self.k]]
print(closest_y)
prediction[i] = np.argmax(np.bincount(closest_y))
return prediction.astype(int)
def predict_labels_multiclass(self, distances):
"""
Returns model predictions for multi-class classification case

Arguments:
distances, np array (num_test_samples, num_train_samples) - array
with distances between each test and each train sample
Returns:
pred, np array of int (num_test_samples) - predicted class index
for every test sample
"""

n_train = distances.shape[0]
n_test = distances.shape[0]
prediction = np.zeros(n_test, np.int)

"""
YOUR CODE IS HERE
"""
pass
num_test = distances.shape[0]
y_pred = np.zeros(num_test, dtype=int)
for i in range(num_test):
closest_y = self.train_y[np.argsort(distances[i, :])[:self.k]]
y_pred[i] = np.argmax(np.bincount(closest_y))
return prediction.astype(int)
89 changes: 19 additions & 70 deletions lecture_1_intro_knn/homework/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,86 +2,35 @@


def binary_classification_metrics(y_pred, y_true):
"""
Computes metrics for binary classification
Arguments:
y_pred, np array (num_samples) - model predictions
y_true, np array (num_samples) - true labels
Returns:
precision, recall, f1, accuracy - classification metrics
"""

# TODO: implement metrics!
# Some helpful links:
# https://en.wikipedia.org/wiki/Precision_and_recall
# https://en.wikipedia.org/wiki/F1_score

"""
YOUR CODE IS HERE
"""
pass

true_positive = np.sum((y_pred == 1) & (y_true == 1))
false_positive = np.sum((y_pred == 1) & (y_true == 0))
false_negative = np.sum((y_pred == 0) & (y_true == 1))
true_negative = np.sum((y_pred == 0) & (y_true == 0))

precision = true_positive / (true_positive + false_positive)
recall = true_positive / (true_positive + false_negative)
f1 = 2 * (precision * recall) / (precision + recall)
accuracy = (true_positive + true_negative) / (true_positive + false_positive + false_negative + true_negative)

return precision, recall, f1, accuracy

def multiclass_accuracy(y_pred, y_true):
"""
Computes metrics for multiclass classification
Arguments:
y_pred, np array of int (num_samples) - model predictions
y_true, np array of int (num_samples) - true labels
Returns:
accuracy - ratio of accurate predictions to total samples
"""

"""
YOUR CODE IS HERE
"""
pass
return np.mean(y_pred == y_true)


def r_squared(y_pred, y_true):
"""
Computes r-squared for regression
Arguments:
y_pred, np array of int (num_samples) - model predictions
y_true, np array of int (num_samples) - true values
Returns:
r2 - r-squared value
"""

"""
YOUR CODE IS HERE
"""
pass
ss_res = np.sum((y_true - y_pred) ** 2)
ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
r2 = 1 - (ss_res / ss_tot)
return r2


def mse(y_pred, y_true):
"""
Computes mean squared error
Arguments:
y_pred, np array of int (num_samples) - model predictions
y_true, np array of int (num_samples) - true values
Returns:
mse - mean squared error
"""
return np.mean((y_pred - y_true) ** 2)

"""
YOUR CODE IS HERE
"""
pass


def mae(y_pred, y_true):
"""
Computes mean absolut error
Arguments:
y_pred, np array of int (num_samples) - model predictions
y_true, np array of int (num_samples) - true values
Returns:
mae - mean absolut error
"""
return np.mean(np.abs(y_pred - y_true))

"""
YOUR CODE IS HERE
"""
pass