diff --git a/doc/PolynomialRegressor.rst b/doc/PolynomialRegressor.rst new file mode 100644 index 0000000..fcf098f --- /dev/null +++ b/doc/PolynomialRegressor.rst @@ -0,0 +1,67 @@ +:digest: Polynomial regression on data sets. +:species: data +:sc-categories: Machine Learning +:sc-related: +:see-also: KNNRegressor, DataSet, MLPClassifier +:description: + + Perform regression between :fluid-obj:`DataSet`\s using N parallel 1-to-1 polynomial regressors in one object. + +:discussion: + + A polynomial regressor is a very simple algorithm that, given a set of input-output pairs - ``x`` to ``y``, for example - will find the line of best fit for that data. Linear regression is a special case when the ``degree`` of the polynomial is ``1``, meaning the line of best fit will be straight. + + Essentially, each element of each input is mapped to the corresponding element of the same output, hence it needing to be an N-to-N corpus, which is one limitation of this algorithm. + Tikhonov regularisation is an improvement of this algorithm, which compensates for noisy data and reduces overfitting in certain situations, a good explanation of how this works can be found on wikipedia (https://en.wikipedia.org/wiki/Ridge_regression#Tikhonov_regularization) + +:control degree: + + An integer that specifies the degree \(highest power of x\) that the fit polynomial will have; e.g. a degree of 2 means that the polynomial will have a form ``y = \alpha + \beta x + \gamma x^2``. Therefore the higher the degree, the closer the output will get to the original data (until it begins overfitting). + +:control tikhonov: + + A floating point value that describes the strength of the Tikhonov filter, namely how much the algorithm is penalised for overfitting to the data + +:message fit: + + :arg sourceDataSet: Source data + + :arg targetDataSet: Target data + + Fit the polynomial to map between a source and target :fluid-obj:`DataSet` + +:message predict: + + :arg sourceDataSet: Input :fluid-obj:`DataSet` + + :arg targetDataSet: Output :fluid-obj:`DataSet` + + Apply the regressed mapping to a :fluid-obj:`DataSet` and predict the output value for each point + +:message predictPoint: + + :arg sourceBuffer: Input point + + :arg targetBuffer: Output point + + Apply the regressed mapping to a single data point in a |buffer| + +:message clear: + + This will erase all the learned polynomials. + +:message print: + + Print object information to the console. + +:message read: + + :arg fileName: file to read from (optional, will prompt if not present) + + load regressed polynomials from a json file on disk + +:message write: + + :arg fileName: file to write to (optional, will prompt if not present) + + write current regression to a json file on disk diff --git a/example-code/sc/PolynomialRegressor.scd b/example-code/sc/PolynomialRegressor.scd new file mode 100644 index 0000000..a9fb3c4 --- /dev/null +++ b/example-code/sc/PolynomialRegressor.scd @@ -0,0 +1,31 @@ +code:: +s.boot; + +// 10 random points and their square +~somepoints = 10.collect{var x = 1.0.rand; [x, x**2]}.flop + +// load the ins and the outs in 1D datasets +~in = FluidDataSet(s).load(Dictionary.newFrom(["cols", 1, "data", Dictionary.newFrom(~somepoints[0].collect{|j,i|[i.asSymbol, j]}.flat)])) + +~out = FluidDataSet(s).load(Dictionary.newFrom(["cols", 1, "data", Dictionary.newFrom(~somepoints[1].collect{|j,i|[i.asSymbol, j]}.flat)])) + +~in.print;~out.print + +~polyreg = FluidPolynomialRegressor(s); + +~polyreg.fit(~in, ~out, {\done.postln}); + +// 100 points to draw the function + +~question = FluidDataSet(s).load(Dictionary.newFrom(["cols", 1, "data", Dictionary.newFrom(100.collect{|i|[i,i/100]}.flat)])) +~answer = FluidDataSet(s) + +~polyreg.predict(~question, ~answer, {\done.postln}); + +~arrayedanswer = Array.fill(100,0) +~answer.dump{|x|x["data"].keysValuesDo{|k,v|~arrayedanswer[k.asInteger]=v}} +~arrayedanswer.flat.plot + +//compare with the real function +100.collect{|x|(x/100)**2}.plot +:: \ No newline at end of file