-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
68 lines (52 loc) · 2.17 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
class WageRegression:
def __init__(self):
self.X = None
self.y = None
self.X_test = None
self.y_test = None
def load_data(self, train_file, test_file):
# Load training data
df = pd.read_csv(train_file)
df = df.fillna(0)
df['HourlyRate'] = df['MonthlyEarnings'] / (4 * df['AveWeeklyHours'])
self.X = df.drop(['MonthlyEarnings', 'AveWeeklyHours', 'HourlyRate'], axis=1)
self.y = df['HourlyRate']
# Load test data
df_test = pd.read_csv(test_file)
self.X_test = df_test.drop(['MonthlyEarnings', 'AveWeeklyHours'], axis=1)
self.y_test = df_test['MonthlyEarnings'] / (4 * df_test['AveWeeklyHours'])
def linear_regression(self):
lin_reg = LinearRegression()
lin_reg.fit(self.X, self.y)
# Compute training error
y_pred = lin_reg.predict(self.X)
error = ((self.y - y_pred) ** 2).sum()
print("Training Error (SSE):", error)
# Print coefficients
print("Coefficients:")
for i, coef in enumerate(lin_reg.coef_):
print(self.X.columns[i], ":", coef)
# Predict on test data
y_pred = lin_reg.predict(self.X_test)
# Compute test error
error = ((self.y_test - y_pred) ** 2).sum()
print("Test Error (SSE):", error)
def svr(self):
# SVR with polynomial kernel
svr_poly = SVR(C=100, kernel='poly')
svr_poly.fit(self.X, self.y)
print("SVR Poly Training SSE:", ((svr_poly.predict(self.X) - self.y) ** 2).sum())
print("SVR Poly Testing SSE:", ((svr_poly.predict(self.X_test) - self.y_test) ** 2).sum())
# SVR with RBF kernel
svr_rbf = SVR(C=100, kernel='rbf')
svr_rbf.fit(self.X, self.y)
print("SVR RBF Training SSE:", ((svr_rbf.predict(self.X) - self.y) ** 2).sum())
print("SVR RBF Testing SSE:", ((svr_rbf.predict(self.X_test) - self.y_test) ** 2).sum())
# Usage
regressor = WageRegression()
regressor.load_data('wages-train.csv', 'wages-test.csv')
regressor.linear_regression()
regressor.svr()