-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbestK.py
93 lines (71 loc) · 2.45 KB
/
bestK.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#Author: Zarreen Naowal Reza
#Email: [email protected]
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
#data = pd.read_csv('twogaussians.csv',header=None)
data = pd.read_csv('twospirals.csv',header=None)
#data = pd.read_csv('halfkernel.csv',header=None)
#data = pd.read_csv('clusterincluster.csv',header=None)
print("classifier: k-Nearest Neighbor\n")
data.columns = ['a','b','class']
def clf_eval(Y_test, prediction):
TP = 0
TN = 0
FP = 0
FN = 0
for i in range(len(Y_test)):
if Y_test[i] == prediction[i] == 1:
TP += 1
for i in range(len(Y_test)):
if Y_test[i] == prediction[i] == 2:
TN += 1
for i in range(len(Y_test)):
if Y_test[i] == 2 and prediction[i] == 1:
FP += 1
for i in range(len(Y_test)):
if Y_test[i] == 1 and prediction[i] == 2:
FN += 1
return TP,TN,FP,FN
X = np.array(data.drop(['class'],1))
Y = np.array(data['class'])
X = preprocessing.scale(X)
#1 = positive, 2=negative
accuracy = []
ppv = []
npv = []
specificity = []
sensitivity = []
all_ppv = [[]]
all_npv = [[]]
all_specificity = [[]]
all_sensitivity = [[]]
all_accuracy = [[]]
k_range = int(np.sqrt(len(data)))+1
for n in range(1,k_range):
kf = KFold(n_splits=10,shuffle=True)
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index],X[test_index]
Y_train, Y_test = Y[train_index],Y[test_index]
classifier = KNeighborsClassifier(n_neighbors=n,p=2,n_jobs=-1)
classifier.fit(X_train,Y_train)
prediction = classifier.predict(X_test)
TP,TN,FP,FN = clf_eval(Y_test, prediction)
ppv.append(TP/(TP+FP))
npv.append(TN/(TN+FN))
specificity.append(TN/(TN+FP))
sensitivity.append(TP/(TP+FN))
accuracy.append((TP+TN)/len(Y_test))
all_ppv.append([np.mean(ppv),n])
all_npv.append([np.mean(npv),n])
all_specificity.append([np.mean(specificity),n])
all_sensitivity.append([np.mean(sensitivity),n])
all_accuracy.append([np.mean(accuracy),n])
print("max ppv: ",max(all_ppv))
print("max npv: ",max(all_npv))
print("max specificity: ",max(all_specificity))
print("max sensitivity: ",max(all_sensitivity))
print("max accuracy: ",max(all_accuracy))