tyiannak · KobaKhit · Sep 9, 2020 · Sep 9, 2020 · Sep 10, 2020 · Sep 10, 2020
diff --git a/pyAudioAnalysis/MidTermFeatures.py b/pyAudioAnalysis/MidTermFeatures.py
@@ -134,7 +134,7 @@ def mid_feature_extraction(signal, sampling_rate, mid_window, mid_step,
 
 def directory_feature_extraction(folder_path, mid_window, mid_step,
                                  short_window, short_step,
-                                 compute_beat=True):
+                                 compute_beat=True, max_files=1000):
     """
     This function extracts the mid-term features of the WAVE files of a 
     particular folder.
@@ -157,7 +157,8 @@ def directory_feature_extraction(folder_path, mid_window, mid_step,
     for files in types:
         wav_file_list.extend(glob.glob(os.path.join(folder_path, files)))
 
-    wav_file_list = sorted(wav_file_list)    
+    wav_file_list = sorted(wav_file_list)[0:max_files] 
+    np.random.shuffle(wav_file_list)
     wav_file_list2, mid_feature_names = [], []
     for i, file_path in enumerate(wav_file_list):
         print("Analyzing file {0:d} of {1:d}: {2:s}".format(i + 1,
@@ -217,7 +218,7 @@ def directory_feature_extraction(folder_path, mid_window, mid_step,
 
 def multiple_directory_feature_extraction(path_list, mid_window, mid_step,
                                           short_window, short_step,
-                                          compute_beat=False):
+                                          compute_beat=False, max_files = 1000):
     """
     Same as dirWavFeatureExtraction, but instead of a single dir it
     takes a list of paths as input and returns a list of feature matrices.
@@ -242,7 +243,7 @@ def multiple_directory_feature_extraction(path_list, mid_window, mid_step,
         f, fn, feature_names = \
             directory_feature_extraction(d, mid_window, mid_step,
                                          short_window, short_step,
-                                         compute_beat=compute_beat)
+                                         compute_beat=compute_beat, max_files=max_files)
         if f.shape[0] > 0:
             # if at least one audio file has been found in the provided folder:
             features.append(f)

diff --git a/pyAudioAnalysis/audioTrainTest.py b/pyAudioAnalysis/audioTrainTest.py
@@ -282,7 +282,7 @@ def train_random_forest_regression(features, labels, n_estimators):
 
 def extract_features_and_train(paths, mid_window, mid_step, short_window,
                                short_step, classifier_type, model_name,
-                               compute_beat=False, train_percentage=0.90):
+                               compute_beat=False, train_percentage=0.90, max_files = 1000):
     """
     This function is used as a wrapper to segment-based audio feature extraction
     and classifier training.
@@ -304,7 +304,7 @@ def extract_features_and_train(paths, mid_window, mid_step, short_window,
     features, class_names, _ = \
         aF.multiple_directory_feature_extraction(paths, mid_window, mid_step,
                                                  short_window, short_step,
-                                                 compute_beat=compute_beat)
+                                                 compute_beat=compute_beat, max_files=max_files)
 
     if len(features) == 0:
         print("trainSVM_feature ERROR: No data found in any input folder!")