Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@

**/.DS_Store
**/.DS_Store
122 changes: 122 additions & 0 deletions classes/Datasets/Transforms.sc
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*****************************************
Data Transforms
(C) 2018 Jonathan Reus

Tools for analyzing and transforming datasets: scaling, normalization, standardization, PCA, etc..


******************************************/

// TO NORMALIZE:
// find min / max of features
// normalized_value = (val - min) / (max - min)


/*
T for Transform
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

often, the need to explain is an indication that a name could be improved. If this class is not used dense repetiton, like classes like Array or Event, you could just as well write it out.

TransformNormalizer

This would be better because T may mean a lot of different things (think of task inTdef), and Normalizer is a UGen.

Also possible is to make the verb the name of a class:

NormalizeTransform

Both I find fine.

Normalization:
1. find min/max of each feature
2. normalized_value = (val - min) / (max - min)
*/
TNormalizer {
var <min, <max, <originalData, <normalizedData, dim;

/*
@param dataset a Matrix of rows as feature vectors
*/
*new {|dataset|
^super.new.init(dataset);
}

init {|dataset|
if(dataset.isKindOf(SequenceableCollection).not ) { "Dataset must be a Matrix".error;this.halt; };
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better write:

Error("Dataset must be a Matrix".).throw

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw. the test .isKindOf(SequenceableCollection) doesn't exclude things like a Set. So if you pass a Set it would go through.

if(dataset.at(0).isKindOf(SequenceableCollection)) {
dim = dataset.cols;
min = Array.newClear(dim);
max = Array.newClear(dim);
normalizedData = Matrix.newClear(dataset.rows, dataset.cols);
dim.do {|i|
var col = dataset.getCol(i);
min[i] = col.minItem;
max[i] = col.maxItem;
normalizedData.putCol(i, this.normalizeSample(col) );
};
} {
min = dataset.minItem; max = dataset.maxItem;
normalizedData = dataset.collect {|it,i| this.normalizeSample(it) };
};
originalData = dataset;
}

// normalize / denormalize a single sample
normalizeSample {|samp| (samp - min) / (max - min) }
denormalizeSample {|samp| (samp * (max-min)) + min }


// normalize an entire dataset
normalizeData {}

// denormalize a point-slope form 2-dimensional line
// of the form [slope, intercept]
denormalizeLine {|line|
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you check the formatting of whitespace? Please use tabs not spaces, and auto-formatting.

var p1,p2, new_m, new_b, m=line[0], b=line[1];
// calculate two normalized samples & denormalize them
p1 = [-1,(-1 * m)+b]; p2 = [1,(1 * m)+b];
p1 = this.denormalizeSample(p1);
p2 = this.denormalizeSample(p2);
// calculate denormalized decision boundary
new_m = (p2[1]-p1[1]) / (p2[0]-p1[0]); // slope
new_b = p1[1] - (new_m*p1[0]); // y-intercept
^[new_m,new_b];
}

}

/*
TO STANDARDIZE:
1. calculate mean and standard deviation of each feature
2. subtract mean from each feature
3. divide features by standard deviation

T is for Transform
*/
TStandardizer {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TransformStandardizer or StandardizeTransform?

var <mean, <stddev;

}

/*
T for Transform
*/
TPCA {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm? This class is just an object?


}



/*
( // feature scaling of reduced dataset
~dReduced = Matrix.newFrom(~dReduced);
dim = ~dReduced.cols;
mean = Array.newClear(dim);
stdev = Array.newClear(dim);

~standardizeSample = {|v,mean,stddev| (v-mean) / stddev };
~destandardizeSample = {|v,mean,stddev| (v*stddev) + mean };

~dStandard = Matrix.newClear(~dReduced.rows, dim);
dim.do {|i|
var col = ~dReduced.getCol(i);
mean[i] = col.mean;
stdev[i] = col.stdDev(mean[i]);
~dStandard.putCol(i, ~standardizeSample.(col, mean[i], stdev[i]));
};
"MEAN: % STDDEV: %".format(mean, stdev).postln;
s1 = [-0.09, 1.45];
s2 = ~standardizeSample.(s1, mean, stdev);
s3 = ~destandardizeSample.(s2, mean, stdev);
"NEW SAMPLE: % STANDARDIZED: % DESTANDARDIZED: %".format(s1,s2,s3).postln;

); // END SCALING & STANDARDIZATION OF FEATURES

*/
Loading