bangerth · MantautasRimkus · Nov 12, 2019 · Nov 12, 2019 · bangerth · Nov 14, 2019
diff --git a/include/sampleflow/consumers/average_cosinus.h b/include/sampleflow/consumers/average_cosinus.h
@@ -7,6 +7,8 @@
 #include <sampleflow/types.h>
 #include <list>
 #include <mutex>
+#include <deque>
+
 
 #include <boost/numeric/ublas/matrix.hpp>
 
@@ -19,18 +21,16 @@ namespace SampleFlow
      * This is a Consumer class that implements computing the running average cosine of
      * angles between vectors value. This calculation, similarly to autocovariance, compares samples, that are
      * apart by specific index. In this sense, we get function where domain is bounded unsigned int and for
-     * each value we calculate cosine average.
-     * Let's say unsigned int is equal to l and n_samples=n and n is bigger than l. Then the formula can be written as:
+     * each value we calculate average cosine of angles.
+     *
+     * Let's say unsigned int is equal to l, n_samples=n, n is bigger than l and \hat{cos(\theta_n)(l) is estimated average
+     * cosinus of angles between vectors that are apart by l. Then the formula can be written as:
      * $\hat{cos(\theta_n)(l)}=\frac{1}{n-l}\sum_{t=1}^{n-l}{(x_{t+l}^T\bar{x})(\|x_{t}\| \; \|\bar{x}\|)}$.
      *
      * This code for every new sample updates $\hat{cos(\theta_n)(l)}, l=1,2,3,\ldots,L$. The value of
      * $L$ is provided to the constructor of this class.
      *
      * Notice, that for each new sample $x_n$, we need to take a sample, that was $l$ earlier than new sample ($x_{n-l}$).
-     * Working this way, the updating algorithm becomes very similar to one used in the MeanValue class.
-     *
-     * Every time, for updating we calculate corresponding fractions. There numerator is the dot product, while denominator -
-     * multiplied those two vector norms.
      *
      * ### Threading model ###
      *
@@ -41,81 +41,95 @@ namespace SampleFlow
     template <typename InputType>
     class AverageCosineBetweenSuccessiveSamples: public Consumer<InputType>
     {
-      public:
-        /**
-         * The data type of the elements of the input type. This type is used to define type of members, used in
-         * calculations.
-         */
-        using scalar_type = typename InputType::value_type;
-
-        /**
-         * Constructor.
-         */
-        AverageCosineBetweenSuccessiveSamples(const unsigned int length);
-
-        /**
-         * Process one sample by updating the previously computed average cosine
-         * function using this one sample.
-         *
-         * @param[in] sample The sample to process.
-         * @param[in] aux_data Auxiliary data about this sample. The current
-         *   class does not know what to do with any such data and consequently
-         *   simply ignores it.
-         */
-        virtual
-        void
-        consume (InputType     sample,
-                 AuxiliaryData aux_data) override;
-
-        /**
-         * A function that returns the average cosine vector computed from the
-         * samples seen so far. If no samples have been processed so far, then
-         * a default-constructed object of type std::vector<scalar_type> will be returned.
-         *
-         * @return The computed average cosine vector.
-         */
-        std::vector<scalar_type> get() const;
-
-      private:
-        /**
-         * A mutex used to lock access to all member variables when running
-         * on multiple threads.
-         */
-        mutable std::mutex mutex;
-
-        /**
-         * The data type, where we save multiple set of previous samples.
-         */
-        using matrix_type = boost::numeric::ublas::matrix<scalar_type>;
-
-        /**
-         * Current average cosine. Description of this is given above
-         */
-        std::vector<scalar_type> current_avg_cosine;
-
-        /**
-         * Save previous sample value
-         */
-        matrix_type previous_sample;
-        matrix_type previous_sample_replace;
-
-        /**
-         * The number of samples processed so far.
-         */
-        types::sample_index n_samples;
-
-        /**
-         * Describes how many values of average cosine function we calculate.
-         */
-        unsigned int history_length;
+    public:
+      /**
+       * The data type of the elements of the input type. This type is used to define type of members, used in
+       * calculations.
+       */
+      using scalar_type = types::ScalarType<InputType>;
+
+      /**
+       * The data type, where we save multiple set of previous samples.
+       */
+      using value_type = std::vector<scalar_type>;
+
+      /**
+       * Constructor.
+       *
+       * @param[in] lag_length A number that indicates how many average_cosinus
+       *   values we want to calculate, i.e., how far back in the past we
+       *   want to check how correlated (how big cosinus value) each sample is.
+       */
+      AverageCosineBetweenSuccessiveSamples(const unsigned int length);
+
+      /**
+       * Process one sample by updating the previously computed average cosine
+       * function using this one sample.
+       *
+       * @param[in] sample The sample to process.
+       * @param[in] aux_data Auxiliary data about this sample. The current
+       *   class does not know what to do with any such data and consequently
+       *   simply ignores it.
+       */
+      virtual
+      void
+      consume (InputType     sample,
+	       AuxiliaryData aux_data) override;
+
+      /**
+       * A function that returns the average cosine vector computed from the
+       * samples seen so far. If no samples have been processed so far, then
+       * a default-constructed object of value_type will be returned.
-       * a default-constructed object of value_type will be returned.
+       * a default-constructed object of `value_type` will be returned.
-       * a default-constructed object of value_type will be returned.
+       * a default-constructed object of `value_type` will be returned.
+       *
+       * @return The computed average cosine vector.
+       */
+      value_type get() const;
+
+    private:
+      /**
+       * A mutex used to lock access to all member variables when running
+       * on multiple threads.
+       */
+      mutable std::mutex mutex;
+
+      /**
+       * Current average cosine. Description of this is given above
+       */
+      value_type current_avg_cosine;
+
+
+      /**
+       * A data type used to store the past few samples.
+       */
+      using PreviousSamples = std::deque<InputType>;
+
+      /**
+       * Save previous samples needed to do calculations when a new sample
+       * comes in.
+       *
+       * These samples are stored in a double-ended queue (`std::deque`) so
+       * that it is efficient to push a new sample to the front of the list
+       * as well as to remove one from the end of the list.
+       */
+      PreviousSamples previous_samples;
+
+      /**
+       * The number of samples processed so far.
+       */
+      types::sample_index n_samples;
+
+      /**
+       * Describes how many values of average cosine function we calculate.
+       */
+      unsigned int history_length;
     };
 
     template <typename InputType>
     AverageCosineBetweenSuccessiveSamples<InputType>::
     AverageCosineBetweenSuccessiveSamples (const unsigned int length)
-      :
-      history_length(length),
-      n_samples (0)
+    :
+    history_length(length),
+    n_samples (0)
 
     {}
 
@@ -133,63 +147,51 @@ namespace SampleFlow
 
 
       if (n_samples == 0)
-        {
-          current_avg_cosine.resize(history_length);
-          n_samples = 1;
-
-          for (unsigned int i=0; i<history_length; ++i)
-            {
-              current_avg_cosine[i] = 0;
-            }
-
-          previous_sample.resize(history_length,sample.size());
-          previous_sample_replace.resize(history_length,sample.size());
+	{
+	  current_avg_cosine.resize(history_length);
+	  n_samples = 1;
 
-          for (unsigned int i=0; i<sample.size(); ++i) previous_sample(0,i) = sample[i];
+	  for (unsigned int i=0; i<history_length; ++i)
+	    {
+	      current_avg_cosine[i] = 0;
+	    }
 
-        }
+	  // Push the first sample to the front of the list of samples:
+	  previous_samples.push_front (sample);
 
+	}
       else
-        {
-          unsigned int length1=std::min(static_cast<unsigned int>(n_samples),history_length);
-          unsigned int length2=std::min(static_cast<unsigned int>(n_samples),history_length-1);
-
-          ++n_samples;
-          for (unsigned int i=0; i<length1; ++i)
-            {
-
-              //Update first dot product (alpha)
-              double update=0,norm1=0, norm2=0;
-              for (unsigned int j=0; j<sample.size(); ++j)
-                {
-                  update += sample[j]*previous_sample(i,j);
-                  norm1 += sample[j]*sample[j];
-                  norm2 += previous_sample(i,j)*previous_sample(i,j);
-                }
-              update = update/(std::sqrt(norm1*norm2));
-              update -= current_avg_cosine[i];
-              update /= static_cast<double>(n_samples-i-1);
-              current_avg_cosine[i] += update;
-            }
-
-          //Save needed previous values
-          for (unsigned int i=0; i<length2; ++i)
-            {
-              for (unsigned int j=0; j<sample.size(); ++j)
-                {
-                  previous_sample_replace(i+1,j)=previous_sample(i,j);
-                }
-            }
-
-          for (unsigned int j=0; j<sample.size(); ++j) previous_sample_replace(0,j) = sample[j];
-          previous_sample = previous_sample_replace;
-
-        }
+	{
+	  ++n_samples;
+	  for (unsigned int l=0; l<previous_samples.size(); ++l)
+	    {
+	      //Calculate each member of fractions separately
+	      double update=0,norm1=0, norm2=0;
+	      for (unsigned int j=0; j<sample.size(); ++j)
+		{
+		  update += sample[j]*previous_samples[l][j];
+		  norm1 += sample[j]*sample[j];
+		  norm2 += previous_samples[l][j]*previous_samples[l][j];
+		}
+	      //Put calculations together
+	      update = update/(std::sqrt(norm1*norm2));
+	      update -= current_avg_cosine[l];
+	      update /= static_cast<double>(n_samples-l-1);
+	      current_avg_cosine[l] += update;
+	    }
+
+	  // Now save the sample. If the list is becoming longer than the lag
+	  // length, drop the oldest sample.
+	  previous_samples.push_front (sample);
+	  if (previous_samples.size() > history_length)
+	    previous_samples.pop_back ();
+
+	}
     }
 
-//return value
+    //return value
     template <typename InputType>
-    std::vector<typename InputType::value_type>
+    std::vector<typename types::ScalarType<InputType>>
     AverageCosineBetweenSuccessiveSamples<InputType>::
     get () const
     {

diff --git a/include/sampleflow/consumers/spurious_autocovariance.h b/include/sampleflow/consumers/spurious_autocovariance.h
@@ -32,16 +32,17 @@ namespace SampleFlow
      * @note This class only calculates a "spurious autocovariance" since the actual
      *   definition of "autocovariance" is more complex than what we do here (except
      *   if we work scalar samples types). That said, below we will use the word
-     *   "autocovariance" even when refering to this spurious autocovariance.
+     *   "autocovariance" even when refering to this spurious autocovariance. It can be looked as trace of
-     *   "autocovariance" even when refering to this spurious autocovariance. It can be looked as trace of
+     *   "autocovariance" even when refering to this spurious autocovariance. It can be looked as the trace of
-     *   "autocovariance" even when refering to this spurious autocovariance. It can be looked as trace of
+     *   "autocovariance" even when refering to this spurious autocovariance. It can be looked as the trace of
+     *   cross-covariance matrices.
      *
      * This is a Consumer class that implements computing the running sample autocovariance function:
      * @f{align*}{
      *   \hat\gamma(l)
      *   =
-     *   \frac{1}{n} \sum_{t=1}^{n-l}{(x_{t+l}-\bar{x})(x_{t}-\bar{x})}.
+     *   \frac{1}{n-l} \sum_{t=1}^{n-l}{(x_{t+l}-\bar{x})(x_{t}-\bar{x})}.
      * @f}
      * In other words, it calculates the covariance of samples $x_{t+l}$ and $x_t$
-     * with a lag between zero and $k$
+     * with a lag l between zero and $k$
-     * with a lag l between zero and $k$
+     * with a lag $l$ between zero and $k$
-     * with a lag l between zero and $k$
+     * with a lag $l$ between zero and $k$
      *
      * This class updates $\hat\gamma(l), l=0,1,2,3,\ldots,k$ for each new, incoming
      * sample. The value of $k$ is set in the constructor.
@@ -56,16 +57,16 @@ namespace SampleFlow
      * @f{align*}{
      *   \hat\gamma(l)
      *   &=
-     *   \frac{1}{n}\sum_{t=1}^{n-l}{(x_{t+l}-\bar{x}_n)^T(x_{t}-\bar{x}_n)}
+     *   \frac{1}{n-l}\sum_{t=1}^{n-l}{(x_{t+l}-\bar{x}_n)^T(x_{t}-\bar{x}_n)}
      * \\&=
-     *   \underbrace{\frac{1}{n}\sum_{t=1}^{n-l}{x_{t+l}^T x_{t}}}_{\alpha_n(l)}
+     *   \underbrace{\frac{1}{n-l}\sum_{t=1}^{n-l}{x_{t+l}^T x_{t}}}_{\alpha_n(l)}
      *   -
      *   \bar{x}_n^T
-     *   \underbrace{\left[ \frac{1}{n}\sum_{t=1}^{n-l}(x_{t+l}+x_{t}) \right]}_{\beta_n(l)}
+     *   \underbrace{\left[ \frac{1}{n-l}\sum_{t=1}^{n-l}(x_{t+l}+x_{t}) \right]}_{\beta_n(l)}
      *   +
-     *   \frac{n-l}{n}\bar{x}_n^T \bar{x}_n
+     *   \frac{n-l}{n-l}\bar{x}_n^T \bar{x}_n
      * \\&=
-     *   \alpha_n(l)-\bar{x}_n^T \beta_n(l)+\frac{n-l}{n} \bar{x}_n^T \bar{x}_n.
+     *   \alpha_n(l)-\bar{x}_n^T \beta_n(l)+ \bar{x}_n^T \bar{x}_n.
      * @f}
      *
      * For each new sample, we then need to update the scalars $\alpha_{n+1}(l)$,
@@ -104,7 +105,7 @@ namespace SampleFlow
         /**
          * Constructor.
          *
-         * @param[in] lag_length A number that indicates how many autocovariance
+         * @param[in] lag_length A number that indicates how many spurious autocovariance
          *   values we want to calculate, i.e., how far back in the past we
          *   want to check how correlated each sample is.
          */
@@ -158,10 +159,10 @@ namespace SampleFlow
         using PreviousSamples = std::deque<InputType>;
 
         /**
-         * Parts for running autocovariation calculations.
+         * Parts for running spurious autocovariance calculations.
          * Description of these parts is given at this class description above.
-         * We should notice, that alpha and current_autocovariation is vectors, while beta is matrix.
-         * That happens, because if we want to update beta for each new sample, we need to know mean
+         * We should notice, that alpha and current_spur_autocovariance is vectors consisted of scalar_type,
+         * while beta is matrix. That happens, because if we want to update beta for each new sample, we need to know mean
          * summed vector values (sum numbers depends from lag parameter). There might be other ways how
          * to update this member, but this one appears the most stable.
          */
@@ -285,24 +286,24 @@ namespace SampleFlow
     {
       std::lock_guard<std::mutex> lock(mutex);
 
-      std::vector<scalar_type> current_autocovariation(autocovariance_length,
+      value_type current_spur_autocovariance(autocovariance_length,
                                                        scalar_type(0));
 
       if (n_samples !=0 )
         {
           for (int i=0; i<previous_samples.size(); ++i)
             {
-              current_autocovariation[i] = alpha[i];
+              current_spur_autocovariance[i] = alpha[i];
 
               for (unsigned int j=0; j<current_mean.size(); ++j)
-                current_autocovariation[i] -= current_mean[j] * beta[i][j];
+                current_spur_autocovariance[i] -= current_mean[j] * beta[i][j];
 
               for (unsigned int j=0; j<current_mean.size(); ++j)
-                current_autocovariation[i] += current_mean[j]*current_mean[j];
+                current_spur_autocovariance[i] += current_mean[j]*current_mean[j];
             }
         }
 
-      return current_autocovariation;
+      return current_spur_autocovariance;
     }
   }
 }