diff --git a/pyAudioAnalysis/MidTermFeatures.py b/pyAudioAnalysis/MidTermFeatures.py index 543fca0f6..eb55535f0 100644 --- a/pyAudioAnalysis/MidTermFeatures.py +++ b/pyAudioAnalysis/MidTermFeatures.py @@ -134,7 +134,7 @@ def mid_feature_extraction(signal, sampling_rate, mid_window, mid_step, def directory_feature_extraction(folder_path, mid_window, mid_step, short_window, short_step, - compute_beat=True): + compute_beat=True, max_files=1000): """ This function extracts the mid-term features of the WAVE files of a particular folder. @@ -157,7 +157,8 @@ def directory_feature_extraction(folder_path, mid_window, mid_step, for files in types: wav_file_list.extend(glob.glob(os.path.join(folder_path, files))) - wav_file_list = sorted(wav_file_list) + wav_file_list = sorted(wav_file_list)[0:max_files] + np.random.shuffle(wav_file_list) wav_file_list2, mid_feature_names = [], [] for i, file_path in enumerate(wav_file_list): print("Analyzing file {0:d} of {1:d}: {2:s}".format(i + 1, @@ -217,7 +218,7 @@ def directory_feature_extraction(folder_path, mid_window, mid_step, def multiple_directory_feature_extraction(path_list, mid_window, mid_step, short_window, short_step, - compute_beat=False): + compute_beat=False, max_files = 1000): """ Same as dirWavFeatureExtraction, but instead of a single dir it takes a list of paths as input and returns a list of feature matrices. @@ -242,7 +243,7 @@ def multiple_directory_feature_extraction(path_list, mid_window, mid_step, f, fn, feature_names = \ directory_feature_extraction(d, mid_window, mid_step, short_window, short_step, - compute_beat=compute_beat) + compute_beat=compute_beat, max_files=max_files) if f.shape[0] > 0: # if at least one audio file has been found in the provided folder: features.append(f) diff --git a/pyAudioAnalysis/audioTrainTest.py b/pyAudioAnalysis/audioTrainTest.py index 524b7e184..ca421ec58 100644 --- a/pyAudioAnalysis/audioTrainTest.py +++ b/pyAudioAnalysis/audioTrainTest.py @@ -282,7 +282,7 @@ def train_random_forest_regression(features, labels, n_estimators): def extract_features_and_train(paths, mid_window, mid_step, short_window, short_step, classifier_type, model_name, - compute_beat=False, train_percentage=0.90): + compute_beat=False, train_percentage=0.90, max_files = 1000): """ This function is used as a wrapper to segment-based audio feature extraction and classifier training. @@ -304,7 +304,7 @@ def extract_features_and_train(paths, mid_window, mid_step, short_window, features, class_names, _ = \ aF.multiple_directory_feature_extraction(paths, mid_window, mid_step, short_window, short_step, - compute_beat=compute_beat) + compute_beat=compute_beat, max_files=max_files) if len(features) == 0: print("trainSVM_feature ERROR: No data found in any input folder!")