Skip to content
This repository was archived by the owner on Mar 3, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions docs/framework/operators/tensor/tensor.mel_weight_matrix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# TensorTrait::mel_weight_matrix

```rust
fn mel_weight_matrix(num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: T, upper_edge_hertz: T) -> Tensor<T>;
```

Generate a MelWeightMatrix that can be used to re-weight a Tensor containing a linearly sampled frequency spectra (from DFT or STFT) into num_mel_bins frequency information based on the [lower_edge_hertz, upper_edge_hertz] range on the mel scale.
This function defines the mel scale in terms of a frequency in hertz according to the following formula:
```
mel(f) = 2595 * log10(1 + f/700)
```
In the returned matrix, all the triangles (filterbanks) have a peak value of 1.0.
The returned MelWeightMatrix can be used to right-multiply a spectrogram S of shape [frames, num_spectrogram_bins] of linear scale spectrum values (e.g. STFT magnitudes) to generate a “mel spectrogram” M of shape [frames, num_mel_bins].
## Args

* `num_mel_bins `(`usize`) - The number of bands in the mel spectrum.
* `dft_length `(`usize`) - The size of the original DFT. The size of the original DFT is used to infer the size of the onesided DFT, which is understood to be floor(dft_length/2) + 1, i.e. the spectrogram only contains the nonredundant DFT bins.
* `sample_rate `(`usize`) - Samples per second of the input signal used to create the spectrogram. Used to figure out the frequencies corresponding to each spectrogram bin, which dictates how they are mapped into the mel scale.
* `lower_edge_hertz `(T) - Lower bound on the frequencies to be included in the mel spectrum. This corresponds to the lower edge of the lowest triangular band.
* `upper_edge_hertz `(T) - The desired top edge of the highest frequency band.

## Returns

* A `Tensor<T>` The Mel Weight Matrix. The output has the shape: [floor(dft_length/2) + 1][num_mel_bins].

## Examples

```rust
use orion::operators::tensor::{FP16x16Tensor, FP16x16TensorAdd};
use core::array::{ArrayTrait, SpanTrait};
use orion::operators::tensor::FP16x16TensorPartialEq;
use orion::utils::{assert_eq, assert_seq_eq};
use orion::operators::tensor::{TensorTrait, Tensor};
use orion::numbers::{FixedTrait, FP16x16};


fn example() -> Tensor<FP16x16> {
return TensorTrait::mel_weight_matrix(8, 16, 8192, FP16x16 { mag: 0, sign: false }, FP16x16 { mag: 268435456, sign: false });
}
>>> [
[65536, 65536, 0, 0, 0, 0, 0, 0],
[0, 0, 65536, 65536, 0, 0, 0, 0],
[0, 0, 0, 0, 65536, 0, 0, 0],
[0, 0, 0, 0, 0, 65536, 0, 0],
[0, 0, 0, 0, 0, 0, 65536, 0],
[0, 0, 0, 0, 0, 0, 0, 65536],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
]
```
97 changes: 97 additions & 0 deletions nodegen/node/mel_weight_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import numpy as np
from nodegen.node import RunAll
from ..helpers import make_test, to_fp, Tensor, Dtype, FixedImpl, Trait, get_data_statement

def mel_weight_matrix(num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz) -> np.ndarray: # type: ignore
# num_mel_bins = np.int32(8)
# dft_length = np.int32(16)
# sample_rate = np.int32(8192)
# lower_edge_hertz = np.float32(0)
# upper_edge_hertz = np.float32(8192 / 2)
num_spectrogram_bins = dft_length // 2 + 1
frequency_bins = np.arange(0, num_mel_bins + 2)

low_frequency_mel = 2595 * np.log10(1 + lower_edge_hertz / 700)
high_frequency_mel = 2595 * np.log10(1 + upper_edge_hertz / 700)
mel_step = (high_frequency_mel - low_frequency_mel) / frequency_bins.shape[0]

frequency_bins = frequency_bins * mel_step + low_frequency_mel
frequency_bins = 700 * (np.power(10, (frequency_bins / 2595)) - 1)
frequency_bins = ((dft_length + 1) * frequency_bins) // sample_rate
frequency_bins = frequency_bins.astype(int)

output = np.zeros((num_spectrogram_bins, num_mel_bins))
output.flags.writeable = True

for i in range(num_mel_bins):
lower_frequency_value = frequency_bins[i] # left
center_frequency_point = frequency_bins[i + 1] # center
higher_frequency_point = frequency_bins[i + 2] # right
low_to_center = center_frequency_point - lower_frequency_value
if low_to_center == 0:
output[center_frequency_point, i] = 1
else:
for j in range(lower_frequency_value, center_frequency_point + 1):
output[j, i] = float(j - lower_frequency_value) / float(
low_to_center
)
center_to_high = higher_frequency_point - center_frequency_point
if center_to_high > 0:
for j in range(center_frequency_point, higher_frequency_point):
output[j, i] = float(higher_frequency_point - j) / float(
center_to_high
)
return output


class Mel_weight_matrix(RunAll):

# @staticmethod
# # We test here with u32 implementation.
# def u32():
# num_mel_bins = np.int32(8)
# dft_length = np.int32(16)
# sample_rate = np.int32(256)
# lower_edge_hertz = np.int32(0)
# upper_edge_hertz = np.int32(256 / 2)
# args_str = get_data_statement(np.array([lower_edge_hertz, upper_edge_hertz]), Dtype.U32)
# y = mel_weight_matrix(num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)
# print(y)

# # Convert the floats values in `y` to fixed points with `to_fp` method:
# y = Tensor(Dtype.U32, y.shape, y.flatten())

# # Define the name of the generated folder.
# name = "mel_weight_matrix_u32"
# # Invoke `make_test` method to generate corresponding Cairo tests:
# make_test(
# [], # List of input tensors.
# y, # The expected output result.
# f"TensorTrait::mel_weight_matrix({f'{num_mel_bins}, {dft_length}, {sample_rate}, '+', '.join(args_str)})", # The code signature.
# name # The name of the generated folder.
# )

@staticmethod
# We test here with fp16x16 implementation.
def fp16x16():
num_mel_bins = np.int32(8)
dft_length = np.int32(16)
sample_rate = np.int32(8192)
lower_edge_hertz = np.float32(0)
upper_edge_hertz = np.float32(8192 / 2)
args_str = get_data_statement(to_fp(np.array([lower_edge_hertz, upper_edge_hertz]).flatten(), FixedImpl.FP16x16), Dtype.FP16x16)
y = mel_weight_matrix(num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)
print(y)

# Convert the floats values in `y` to fixed points with `to_fp` method:
y = Tensor(Dtype.FP16x16, y.shape, to_fp(y.flatten(), FixedImpl.FP16x16))

# Define the name of the generated folder.
name = "mel_weight_matrix_fp16x16"
# Invoke `make_test` method to generate corresponding Cairo tests:
make_test(
[], # List of input tensors.
y, # The expected output result.
f"TensorTrait::mel_weight_matrix({f'{num_mel_bins}, {dft_length}, {sample_rate}, '+', '.join(args_str)})", # The code signature.
name # The name of the generated folder.
)
56 changes: 56 additions & 0 deletions src/operators/tensor/core.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ impl TensorSerde<T, impl TSerde: Serde<T>, impl TDrop: Drop<T>> of Serde<Tensor<
/// dynamic_quantize_linear - Computes the Scale, Zero Point and FP32->8Bit conversion of FP32 Input data.
/// scatter_nd - The output of the operation is produced by creating a copy of the input data, and then updating its value to values specified by updates at specific index positions specified by indices. Its output shape is the same as the shape of data
/// label_encoder - Maps each element in the input tensor to another value.
/// mel_weight_matrix - Generate a MelWeightMatrix that can be used to re-weight a Tensor containing a linearly sampled frequency spectra (from DFT or STFT) into num_mel_bins frequency information based on the [lower_edge_hertz, upper_edge_hertz] range on the mel scale.
trait TensorTrait<T> {
/// # tensor.new
///
Expand Down Expand Up @@ -5850,6 +5851,61 @@ trait TensorTrait<T> {
values: Option<Span<T>>,
values_tensor: Option<Tensor<T>>
) -> Tensor<T>;
/// # TensorTrait::mel_weight_matrix
///
/// ```rust
/// fn mel_weight_matrix(num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: T, upper_edge_hertz: T) -> Tensor<T>;
/// ```
///
/// Generate a MelWeightMatrix that can be used to re-weight a Tensor containing a linearly sampled frequency spectra (from DFT or STFT) into num_mel_bins frequency information based on the [lower_edge_hertz, upper_edge_hertz] range on the mel scale.
/// This function defines the mel scale in terms of a frequency in hertz according to the following formula:
/// ```
/// mel(f) = 2595 * log10(1 + f/700)
/// ```
/// In the returned matrix, all the triangles (filterbanks) have a peak value of 1.0.
/// The returned MelWeightMatrix can be used to right-multiply a spectrogram S of shape [frames, num_spectrogram_bins] of linear scale spectrum values (e.g. STFT magnitudes) to generate a “mel spectrogram” M of shape [frames, num_mel_bins].
/// ## Args
///
/// * `num_mel_bins `(`usize`) - The number of bands in the mel spectrum.
/// * `dft_length `(`usize`) - The size of the original DFT. The size of the original DFT is used to infer the size of the onesided DFT, which is understood to be floor(dft_length/2) + 1, i.e. the spectrogram only contains the nonredundant DFT bins.
/// * `sample_rate `(`usize`) - Samples per second of the input signal used to create the spectrogram. Used to figure out the frequencies corresponding to each spectrogram bin, which dictates how they are mapped into the mel scale.
/// * `lower_edge_hertz `(T) - Lower bound on the frequencies to be included in the mel spectrum. This corresponds to the lower edge of the lowest triangular band.
/// * `upper_edge_hertz `(T) - The desired top edge of the highest frequency band.
///
/// ## Returns
///
/// * A `Tensor<T>` The Mel Weight Matrix. The output has the shape: [floor(dft_length/2) + 1][num_mel_bins].
///
/// ## Examples
///
/// ```rust
/// use orion::operators::tensor::{FP16x16Tensor, FP16x16TensorAdd};
/// use core::array::{ArrayTrait, SpanTrait};
/// use orion::operators::tensor::FP16x16TensorPartialEq;
/// use orion::utils::{assert_eq, assert_seq_eq};
/// use orion::operators::tensor::{TensorTrait, Tensor};
/// use orion::numbers::{FixedTrait, FP16x16};
///
///
/// fn example() -> Tensor<FP16x16> {
/// return TensorTrait::mel_weight_matrix(8, 16, 8192, FP16x16 { mag: 0, sign: false }, FP16x16 { mag: 268435456, sign: false });
/// }
/// >>> [
/// [65536, 65536, 0, 0, 0, 0, 0, 0],
/// [0, 0, 65536, 65536, 0, 0, 0, 0],
/// [0, 0, 0, 0, 65536, 0, 0, 0],
/// [0, 0, 0, 0, 0, 65536, 0, 0],
/// [0, 0, 0, 0, 0, 0, 65536, 0],
/// [0, 0, 0, 0, 0, 0, 0, 65536],
/// [0, 0, 0, 0, 0, 0, 0, 0],
/// [0, 0, 0, 0, 0, 0, 0, 0],
/// [0, 0, 0, 0, 0, 0, 0, 0],
/// ]
/// ```
///
fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: T, upper_edge_hertz: T
) -> Tensor<T>;
}

/// Cf: TensorTrait::new docstring
Expand Down
6 changes: 6 additions & 0 deletions src/operators/tensor/implementations/tensor_bool.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,12 @@ impl BoolTensor of TensorTrait<bool> {
) -> Tensor<bool> {
panic(array!['not supported!'])
}

fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: bool, upper_edge_hertz: bool
) -> Tensor<bool>{
panic(array!['not supported!'])
}
}

/// Implements partial equal for two `Tensor<bool>` using the `PartialEq` trait.
Expand Down
6 changes: 6 additions & 0 deletions src/operators/tensor/implementations/tensor_complex64.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,12 @@ impl Complex64Tensor of TensorTrait<complex64> {
) -> Tensor<complex64> {
panic(array!['not supported!'])
}

fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: complex64, upper_edge_hertz: complex64
) -> Tensor<complex64>{
panic(array!['not supported!'])
}
}

/// Implements addition for `Tensor<complex64>` using the `Add` trait.
Expand Down
6 changes: 6 additions & 0 deletions src/operators/tensor/implementations/tensor_fp16x16.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,12 @@ impl FP16x16Tensor of TensorTrait<FP16x16> {
self, default_list, default_tensor, keys, keys_tensor, values, values_tensor
)
}

fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: FP16x16, upper_edge_hertz: FP16x16
) -> Tensor<FP16x16>{
math::mel_weight_matrix::mel_weight_matrix(num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)
}
}

/// Implements addition for `Tensor<FP16x16>` using the `Add` trait.
Expand Down
6 changes: 6 additions & 0 deletions src/operators/tensor/implementations/tensor_fp16x16wide.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,12 @@ impl FP16x16WTensor of TensorTrait<FP16x16W> {
self, default_list, default_tensor, keys, keys_tensor, values, values_tensor
)
}

fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: FP16x16W, upper_edge_hertz: FP16x16W
) -> Tensor<FP16x16W>{
math::mel_weight_matrix::mel_weight_matrix(num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)
}
}

/// Implements addition for `Tensor<FP16x16W>` using the `Add` trait.
Expand Down
6 changes: 6 additions & 0 deletions src/operators/tensor/implementations/tensor_fp32x32.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,12 @@ impl FP32x32Tensor of TensorTrait<FP32x32> {
self, default_list, default_tensor, keys, keys_tensor, values, values_tensor
)
}

fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: FP32x32, upper_edge_hertz: FP32x32
) -> Tensor<FP32x32>{
math::mel_weight_matrix::mel_weight_matrix(num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)
}
}

/// Implements addition for `Tensor<FP32x32>` using the `Add` trait.
Expand Down
6 changes: 6 additions & 0 deletions src/operators/tensor/implementations/tensor_fp64x64.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,12 @@ impl FP64x64Tensor of TensorTrait<FP64x64> {
self, default_list, default_tensor, keys, keys_tensor, values, values_tensor
)
}

fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: FP64x64, upper_edge_hertz: FP64x64
) -> Tensor<FP64x64>{
math::mel_weight_matrix::mel_weight_matrix(num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)
}
}

/// Implements addition for `Tensor<FP64x64>` using the `Add` trait.
Expand Down
6 changes: 6 additions & 0 deletions src/operators/tensor/implementations/tensor_fp8x23.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,12 @@ impl FP8x23Tensor of TensorTrait<FP8x23> {
self, default_list, default_tensor, keys, keys_tensor, values, values_tensor
)
}

fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: FP8x23, upper_edge_hertz: FP8x23
) -> Tensor<FP8x23>{
panic(array!['not supported!'])
}
}

/// Implements addition for `Tensor<FP8x23>` using the `Add` trait.
Expand Down
6 changes: 6 additions & 0 deletions src/operators/tensor/implementations/tensor_fp8x23wide.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,12 @@ impl FP8x23WTensor of TensorTrait<FP8x23W> {
self, default_list, default_tensor, keys, keys_tensor, values, values_tensor
)
}

fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: FP8x23W, upper_edge_hertz: FP8x23W
) -> Tensor<FP8x23W>{
panic(array!['not supported!'])
}
}

/// Implements addition for `Tensor<FP8x23W>` using the `Add` trait.
Expand Down
6 changes: 6 additions & 0 deletions src/operators/tensor/implementations/tensor_i32.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,12 @@ impl I32Tensor of TensorTrait<i32> {
self, default_list, default_tensor, keys, keys_tensor, values, values_tensor
)
}

fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: i32, upper_edge_hertz: i32
) -> Tensor<i32>{
panic(array!['not supported!'])
}
}

/// Implements addition for `Tensor<i32>` using the `Add` trait.
Expand Down
6 changes: 6 additions & 0 deletions src/operators/tensor/implementations/tensor_i8.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,12 @@ impl I8Tensor of TensorTrait<i8> {
self, default_list, default_tensor, keys, keys_tensor, values, values_tensor
)
}

fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: i8, upper_edge_hertz: i8
) -> Tensor<i8>{
panic(array!['not supported!'])
}
}

/// Implements addition for `Tensor<i8>` using the `Add` trait.
Expand Down
7 changes: 7 additions & 0 deletions src/operators/tensor/implementations/tensor_u32.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,13 @@ impl U32Tensor of TensorTrait<u32> {
self, default_list, default_tensor, keys, keys_tensor, values, values_tensor
)
}

fn mel_weight_matrix(
num_mel_bins: usize, dft_length: usize, sample_rate: usize, lower_edge_hertz: u32, upper_edge_hertz: u32
) -> Tensor<u32>{
// Blocking at the 'log10' operator
panic(array!['not supported!'])
}
}

/// Implements addition for `Tensor<u32>` using the `Add` trait.
Expand Down
1 change: 1 addition & 0 deletions src/operators/tensor/math.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,4 @@ mod hann_window;
mod hamming_window;
mod blackman_window;
mod scatter_nd;
mod mel_weight_matrix;
Loading