-
Notifications
You must be signed in to change notification settings - Fork 9
Scatterplot branch #30
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
0ee7bd9
1c5a240
79d7bd7
625cb0c
4b4d9d6
16b9166
287434b
e4f5052
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1 @@ | ||
|
||
**/.DS_Store | ||
**/.DS_Store |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
/***************************************** | ||
Data Transforms | ||
(C) 2018 Jonathan Reus | ||
|
||
Tools for analyzing and transforming datasets: scaling, normalization, standardization, PCA, etc.. | ||
|
||
|
||
******************************************/ | ||
|
||
// TO NORMALIZE: | ||
// find min / max of features | ||
// normalized_value = (val - min) / (max - min) | ||
|
||
|
||
/* | ||
T for Transform | ||
Normalization: | ||
1. find min/max of each feature | ||
2. normalized_value = (val - min) / (max - min) | ||
*/ | ||
TNormalizer { | ||
var <min, <max, <originalData, <normalizedData, dim; | ||
|
||
/* | ||
@param dataset a Matrix of rows as feature vectors | ||
*/ | ||
*new {|dataset| | ||
^super.new.init(dataset); | ||
} | ||
|
||
init {|dataset| | ||
if(dataset.isKindOf(SequenceableCollection).not ) { "Dataset must be a Matrix".error;this.halt; }; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better write:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. btw. the test |
||
if(dataset.at(0).isKindOf(SequenceableCollection)) { | ||
dim = dataset.cols; | ||
min = Array.newClear(dim); | ||
max = Array.newClear(dim); | ||
normalizedData = Matrix.newClear(dataset.rows, dataset.cols); | ||
dim.do {|i| | ||
var col = dataset.getCol(i); | ||
min[i] = col.minItem; | ||
max[i] = col.maxItem; | ||
normalizedData.putCol(i, this.normalizeSample(col) ); | ||
}; | ||
} { | ||
min = dataset.minItem; max = dataset.maxItem; | ||
normalizedData = dataset.collect {|it,i| this.normalizeSample(it) }; | ||
}; | ||
originalData = dataset; | ||
} | ||
|
||
// normalize / denormalize a single sample | ||
normalizeSample {|samp| (samp - min) / (max - min) } | ||
denormalizeSample {|samp| (samp * (max-min)) + min } | ||
|
||
|
||
// normalize an entire dataset | ||
normalizeData {} | ||
|
||
// denormalize a point-slope form 2-dimensional line | ||
// of the form [slope, intercept] | ||
denormalizeLine {|line| | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you check the formatting of whitespace? Please use tabs not spaces, and auto-formatting. |
||
var p1,p2, new_m, new_b, m=line[0], b=line[1]; | ||
// calculate two normalized samples & denormalize them | ||
p1 = [-1,(-1 * m)+b]; p2 = [1,(1 * m)+b]; | ||
p1 = this.denormalizeSample(p1); | ||
p2 = this.denormalizeSample(p2); | ||
// calculate denormalized decision boundary | ||
new_m = (p2[1]-p1[1]) / (p2[0]-p1[0]); // slope | ||
new_b = p1[1] - (new_m*p1[0]); // y-intercept | ||
^[new_m,new_b]; | ||
} | ||
|
||
} | ||
|
||
/* | ||
TO STANDARDIZE: | ||
1. calculate mean and standard deviation of each feature | ||
2. subtract mean from each feature | ||
3. divide features by standard deviation | ||
|
||
T is for Transform | ||
*/ | ||
TStandardizer { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
var <mean, <stddev; | ||
|
||
} | ||
|
||
/* | ||
T for Transform | ||
*/ | ||
TPCA { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hm? This class is just an object? |
||
|
||
} | ||
|
||
|
||
|
||
/* | ||
( // feature scaling of reduced dataset | ||
~dReduced = Matrix.newFrom(~dReduced); | ||
dim = ~dReduced.cols; | ||
mean = Array.newClear(dim); | ||
stdev = Array.newClear(dim); | ||
|
||
~standardizeSample = {|v,mean,stddev| (v-mean) / stddev }; | ||
~destandardizeSample = {|v,mean,stddev| (v*stddev) + mean }; | ||
|
||
~dStandard = Matrix.newClear(~dReduced.rows, dim); | ||
dim.do {|i| | ||
var col = ~dReduced.getCol(i); | ||
mean[i] = col.mean; | ||
stdev[i] = col.stdDev(mean[i]); | ||
~dStandard.putCol(i, ~standardizeSample.(col, mean[i], stdev[i])); | ||
}; | ||
"MEAN: % STDDEV: %".format(mean, stdev).postln; | ||
s1 = [-0.09, 1.45]; | ||
s2 = ~standardizeSample.(s1, mean, stdev); | ||
s3 = ~destandardizeSample.(s2, mean, stdev); | ||
"NEW SAMPLE: % STANDARDIZED: % DESTANDARDIZED: %".format(s1,s2,s3).postln; | ||
|
||
); // END SCALING & STANDARDIZATION OF FEATURES | ||
|
||
*/ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
often, the need to explain is an indication that a name could be improved. If this class is not used dense repetiton, like classes like
Array
orEvent
, you could just as well write it out.TransformNormalizer
This would be better because
T
may mean a lot of different things (think of task inTdef
), andNormalizer
is a UGen.Also possible is to make the verb the name of a class:
NormalizeTransform
Both I find fine.