diff --git a/bwi_elev_detection/.gitignore b/bwi_elev_detection/.gitignore new file mode 100644 index 0000000..34c53d2 --- /dev/null +++ b/bwi_elev_detection/.gitignore @@ -0,0 +1,42 @@ +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +build/* +Project +CMakeFiles/* +src/CMakeFiles/* +src/audio_sandbox/CMakeFiles/* +src/audio_sandbox/Makefile +**.cmake +CMakeCache.txt +Makefile +src/Makefile + +#Audacity files +audacity/ diff --git a/bwi_elev_detection/CMakeLists.txt b/bwi_elev_detection/CMakeLists.txt new file mode 100644 index 0000000..5a40f01 --- /dev/null +++ b/bwi_elev_detection/CMakeLists.txt @@ -0,0 +1,173 @@ +cmake_minimum_required(VERSION 2.8.3) +project(elevators) + +## Find catkin macros and libraries +## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) +## is used, also find other catkin packages +find_package(catkin REQUIRED COMPONENTS roscpp std_msgs genmsg cv_bridge) +find_package(OpenCV REQUIRED) +find_package(message_generation REQUIRED) +find_package(std_msgs REQUIRED) +find_package(cv_bridge REQUIRED) + +## System dependencies are found with CMake's conventions +# find_package(Boost REQUIRED COMPONENTS system) + + +## Uncomment this if the package has a setup.py. This macro ensures +## modules and global scripts declared therein get installed +## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html +# catkin_python_setup() + +################################################ +## Declare ROS messages, services and actions ## +################################################ + +## To declare and build messages, services or actions from within this +## package, follow these steps: +## * Let MSG_DEP_SET be the set of packages whose message types you use in +## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). +## * In the file package.xml: +## * add a build_depend and a run_depend tag for each package in MSG_DEP_SET +## * If MSG_DEP_SET isn't empty the following dependencies might have been +## pulled in transitively but can be declared for certainty nonetheless: +## * add a build_depend tag for "message_generation" +## * add a run_depend tag for "message_runtime" +## * In this file (CMakeLists.txt): +## * add "message_generation" and every package in MSG_DEP_SET to +## find_package(catkin REQUIRED COMPONENTS ...) +## * add "message_runtime" and every package in MSG_DEP_SET to +## catkin_package(CATKIN_DEPENDS ...) +## * uncomment the add_*_files sections below as needed +## and list every .msg/.srv/.action file to be processed +## * uncomment the generate_messages entry below +## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) + +## Generate messages in the 'msg' folder + add_message_files( + FILES + fft_col.msg + ) + +## Generate services in the 'srv' folder +# add_service_files( +# FILES +# Service1.srv +# Service2.srv +# ) + +## Generate actions in the 'action' folder +# add_action_files( +# FILES +# Action1.action +# Action2.action +# ) + +## Generate added messages and services with any dependencies listed here + generate_messages( + DEPENDENCIES + std_msgs # Or other packages containing msgs + ) + +################################### +## catkin specific configuration ## +################################### +## The catkin_package macro generates cmake config files for your package +## Declare things to be passed to dependent projects +## INCLUDE_DIRS: uncomment this if you package contains header files +## LIBRARIES: libraries you create in this project that dependent projects also need +## CATKIN_DEPENDS: catkin_packages dependent projects also need +## DEPENDS: system dependencies of this project that dependent projects also need +catkin_package( + INCLUDE_DIRS src + LIBRARIES audio_sandbox +# CATKIN_DEPENDS other_catkin_pkg +# DEPENDS system_lib +) + +########### +## Build ## +########### + +## Specify additional locations of header files +## Your package locations should be listed before other locations +# include_directories(include) + +## Declare a cpp library +# add_library(elevators +# src/${PROJECT_NAME}/elevators.cpp +# ) + +include(src/audio_sandbox/CMakeLists.txt) +add_subdirectory(src) + + +## Declare a cpp executable +add_executable(elevator_node ${exec_SRC}) +add_executable(fft_node src/audio_sandbox/src/ROSInterface.cpp) + +## Add cmake target dependencies of the executable/library +## as an example, message headers may need to be generated before nodes + add_dependencies(elevator_node elevators_generate_messages_cpp) + add_dependencies(fft_node elevators_generate_messages_cpp) + +## Specify libraries to link a library or executable target against + target_link_libraries(elevator_node + ${catkin_LIBRARIES} + ${OpenCV_LIBS} + ) + + target_link_libraries(fft_node + ${catkin_LIBRARIES} + ${ALSA_LIBRARY} + ${FFTW_LIBRARIES} + ${OpenCV_LIBS} + ) + +############# +## Install ## +############# + +# all install targets should use catkin DESTINATION variables +# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html + +## Mark executable scripts (Python etc.) for installation +## in contrast to setup.py, you can choose the destination +# install(PROGRAMS +# scripts/my_python_script +# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark executables and/or libraries for installation +# install(TARGETS elevators elevators_node +# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark cpp header files for installation +# install(DIRECTORY include/${PROJECT_NAME}/ +# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} +# FILES_MATCHING PATTERN "*.h" +# PATTERN ".svn" EXCLUDE +# ) + +## Mark other files for installation (e.g. launch and bag files, etc.) +# install(FILES +# # myfile1 +# # myfile2 +# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} +# ) + +############# +## Testing ## +############# + +## Add gtest based cpp test target and link libraries +# catkin_add_gtest(${PROJECT_NAME}-test test/test_elevators.cpp) +# if(TARGET ${PROJECT_NAME}-test) +# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) +# endif() + +## Add folders to be run by python nosetests +# catkin_add_nosetests(test) diff --git a/bwi_elev_detection/data/config/demo.cfg b/bwi_elev_detection/data/config/demo.cfg new file mode 100644 index 0000000..b385c75 --- /dev/null +++ b/bwi_elev_detection/data/config/demo.cfg @@ -0,0 +1,7 @@ +image_topic=camera/image_raw +ideal=8 175 175 +range=10 75 85 +convert_hsv=false +area_threshold=50 200 +strictness=10 +flip_dir=true diff --git a/bwi_elev_detection/data/config/elevator_settings.cfg b/bwi_elev_detection/data/config/elevator_settings.cfg new file mode 100644 index 0000000..f597696 --- /dev/null +++ b/bwi_elev_detection/data/config/elevator_settings.cfg @@ -0,0 +1,3 @@ +image_topic=usb_cam/image_raw +range=100 100 100 +ideal=255 255 255 \ No newline at end of file diff --git a/bwi_elev_detection/data/config/paper_settings.cfg b/bwi_elev_detection/data/config/paper_settings.cfg new file mode 100644 index 0000000..441059f --- /dev/null +++ b/bwi_elev_detection/data/config/paper_settings.cfg @@ -0,0 +1,4 @@ +image_topic=usb_cam/image_raw +ideal=133 48 189 +range=100 100 100 +convert_hsv=false \ No newline at end of file diff --git a/bwi_elev_detection/data/config/paper_settings_cam.cfg b/bwi_elev_detection/data/config/paper_settings_cam.cfg new file mode 100644 index 0000000..b385c75 --- /dev/null +++ b/bwi_elev_detection/data/config/paper_settings_cam.cfg @@ -0,0 +1,7 @@ +image_topic=camera/image_raw +ideal=8 175 175 +range=10 75 85 +convert_hsv=false +area_threshold=50 200 +strictness=10 +flip_dir=true diff --git a/bwi_elev_detection/launch/camera.launch b/bwi_elev_detection/launch/camera.launch new file mode 100644 index 0000000..3dd18a5 --- /dev/null +++ b/bwi_elev_detection/launch/camera.launch @@ -0,0 +1,4 @@ + + + + diff --git a/bwi_elev_detection/launch/fft_launch.launch b/bwi_elev_detection/launch/fft_launch.launch new file mode 100644 index 0000000..3d65de5 --- /dev/null +++ b/bwi_elev_detection/launch/fft_launch.launch @@ -0,0 +1,9 @@ + + + + \ No newline at end of file diff --git a/bwi_elev_detection/launch/sensors.launch b/bwi_elev_detection/launch/sensors.launch new file mode 100644 index 0000000..92b6632 --- /dev/null +++ b/bwi_elev_detection/launch/sensors.launch @@ -0,0 +1,10 @@ + + + + + diff --git a/bwi_elev_detection/launch/usb_cam/.rosinstall b/bwi_elev_detection/launch/usb_cam/.rosinstall new file mode 100644 index 0000000..6f4d5d0 --- /dev/null +++ b/bwi_elev_detection/launch/usb_cam/.rosinstall @@ -0,0 +1,5 @@ +# IT IS UNLIKELY YOU WANT TO EDIT THIS FILE BY HAND, +# UNLESS FOR REMOVING ENTRIES. +# IF YOU WANT TO CHANGE THE ROS ENVIRONMENT VARIABLES +# USE THE rosinstall TOOL INSTEAD. +# IF YOU CHANGE IT, USE rosinstall FOR THE CHANGES TO TAKE EFFECT diff --git a/bwi_elev_detection/launch/webcam.launch b/bwi_elev_detection/launch/webcam.launch new file mode 100644 index 0000000..9ebe364 --- /dev/null +++ b/bwi_elev_detection/launch/webcam.launch @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/bwi_elev_detection/msg/fft_col.msg b/bwi_elev_detection/msg/fft_col.msg new file mode 100644 index 0000000..6627cb8 --- /dev/null +++ b/bwi_elev_detection/msg/fft_col.msg @@ -0,0 +1 @@ +float64[] data \ No newline at end of file diff --git a/bwi_elev_detection/package.xml b/bwi_elev_detection/package.xml new file mode 100644 index 0000000..c2fe9eb --- /dev/null +++ b/bwi_elev_detection/package.xml @@ -0,0 +1,56 @@ + + + elevators + 0.0.0 + The elevators package + + + + + Brian Wang + + + + + + TODO + + + + + + + + + + + + + + + + + + + + + + + + + + catkin + cv_bridge + + cv_bridge + + + + + + + + + + + \ No newline at end of file diff --git a/bwi_elev_detection/src/CMakeLists.txt b/bwi_elev_detection/src/CMakeLists.txt new file mode 100644 index 0000000..a7b14fb --- /dev/null +++ b/bwi_elev_detection/src/CMakeLists.txt @@ -0,0 +1,6 @@ +SET( exec_SRC ${exec_SRC} + ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/SoundInterface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/CameraInterface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/CameraController.cpp +PARENT_SCOPE) diff --git a/bwi_elev_detection/src/CameraController.cpp b/bwi_elev_detection/src/CameraController.cpp new file mode 100644 index 0000000..5ea8d7b --- /dev/null +++ b/bwi_elev_detection/src/CameraController.cpp @@ -0,0 +1,29 @@ +#include "lib/CameraController.h" + +void CameraController::moveTo(float f) { + std_msgs::Float32 msg; + msg.data = f; + + publisher->publish(msg); +} + +void CameraController::wait(int ms) { + usleep(1000 * ms); +} + +void CameraController::alternate() { + if (currentPosition >= 0) + moveTo(range[0]); + else + moveTo(range[1]); +} + +void CameraController::status_callback(const std_msgs::Float32::ConstPtr &msg) { + currentPosition = msg->data; + if (currentPosition <= range[0] + 0.05 || currentPosition >= range[1] - 0.05) + counter++; + if (counter >= 60) { + counter = 0; + alternate(); + } +} diff --git a/bwi_elev_detection/src/CameraInterface.cpp b/bwi_elev_detection/src/CameraInterface.cpp new file mode 100644 index 0000000..f6e9b82 --- /dev/null +++ b/bwi_elev_detection/src/CameraInterface.cpp @@ -0,0 +1,195 @@ +#include "lib/CameraInterface.h" +#include + +#define SCALE 2 + +//Finds the area of the triangle. +double area(vector *); +//Returns which double the first is closest to. +int isCloserTo(double, double, double); +//Returns whether the triangle is reasonably isosceles. +bool isReasonablyIsosceles(vector *); +//Distance formula. +double distance(double, double, double, double); +//Whether the double is in range of the set defined by the other two. +bool inRangeValues(double, double, double); + +void CameraInterface::image_callback( + const sensor_msgs::Image::ConstPtr &msg) { + CameraImage *ptr = lastImage; + lastImage = new CameraImage(msg); + process(); + if (ptr != NULL) + delete ptr; +} + +void CameraInterface::process() { + + if (abs(controller->getCurrentPosition()) < 0.40) + return; + + int cols = lastImage->width; + int rows = lastImage->height; + + Mat hsv_frame(cols, rows, CV_8UC4); + Mat thresholded(cols, rows, CV_8UC1); + + Mat image = lastImage -> image; + + cvtColor(image, hsv_frame, CV_RGB2HSV); + inRange(hsv_frame, hsv_min, hsv_max, thresholded); + + vector > contours, approximation; + vector hierarchy; + findContours(thresholded, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE); + + approximation.resize(contours.size()); + for (int k = 0; k < contours.size(); k++) { + approxPolyDP(Mat(contours[k]), approximation[k], sigma, true); + } + for ( int i = 0; i < approximation.size(); i++ ) { + if (approximation[i].size() == 3) { + verifyTriangle(&approximation[i]); + Scalar color = Scalar(255, 255, 255); + drawContours( thresholded, approximation, i, color, 2, 8, hierarchy, 0, Point() ); + } + } + namedWindow("contours", 1); + imshow("contours", thresholded); + waitKey(1); +} + +void CameraInterface::generateMinMax(Scalar ideal, Scalar range, enum hsv_type type) { + double h = max((int)(ideal.val[H] - range.val[H]), 0); + double s = max((int)(ideal.val[S] - range.val[S]), 0); + double v = max((int)(ideal.val[V] - range.val[V]), 0); + + hsv_min = Scalar(h, s, v); + + h = min((int)(ideal.val[H] + range.val[H]), 255); + s = min((int)(ideal.val[S] + range.val[S]), 255); + v = min((int)(ideal.val[V] + range.val[V]), 255); + + hsv_max = Scalar(h, s, v); +} + +enum Direction CameraInterface::getDirection(vector *vertices) { + double A[3] = { + (*vertices)[0].y, + (*vertices)[1].y, + (*vertices)[2].y + }; + + double minimum = min(A[0], min(A[1], A[2])); + double maximum = max(A[0], max(A[1], A[2])); + + int closeToMin = 0; + + closeToMin += isCloserTo(A[0], minimum, maximum) == 1 ? 1 : 0; + closeToMin += isCloserTo(A[1], minimum, maximum) == 1 ? 1 : 0; + closeToMin += isCloserTo(A[2], minimum, maximum) == 1 ? 1 : 0; + + if (closeToMin == 1) + return flip ? DIR_DOWN : DIR_UP; + + return flip ? DIR_UP : DIR_DOWN; +} + +double area(vector *vertices) { + if (vertices->size() != 3) + return -1; + double A[2] = {(*vertices)[0].x, (*vertices)[0].y}; + double B[2] = {(*vertices)[1].x, (*vertices)[1].y}; + double C[2] = {(*vertices)[2].x, (*vertices)[2].y}; + + return abs(A[0] * (B[1] - C[1]) + B[0] * (C[1] - A[1]) + C[0] * (A[1] - B[1])) / 2.0; +} + +int isCloserTo(double subject, double one, double two) { + double diffOne = abs(one - subject); + double diffTwo = abs(two - subject); + return diffTwo < diffOne ? 2 : 1; +} + +bool isReasonablyIsosceles(vector *vertices) { + double A[2] = {(*vertices)[0].x, (*vertices)[0].y}; + double B[2] = {(*vertices)[1].x, (*vertices)[1].y}; + double C[2] = {(*vertices)[2].x, (*vertices)[2].y}; + + double sides[3] = { + distance(A[0], A[1], B[0], B[1]), + distance(B[0], B[1], A[0], A[1]), + distance(B[0], B[1], C[0], C[1]) + }; + + sides[1] /= sides[0]; + sides[2] /= sides[0]; + sides[0] = 1; + + return (abs(sides[1] - sides[2]) <= 0.1 || abs(sides[1] - sides[0]) <= 0.1 || abs(sides[2] - sides[0]) <= 0.1); +} + +double distance(double x1, double y1, double x2, double y2) { + return sqrt(pow(y2 - y1, 2) + pow(x2 - x1, 2)); +} + +bool CameraInterface::verifyTriangle(vector *vertices) { + double areaNum = area(vertices); + if (tri[0] <= areaNum && areaNum <= tri[1] && inRegion(vertices)) { // && isReasonablyIsosceles(vertices)) { + + if (verbose) { + cout << *vertices << endl; + cout << "Found triangle of area: " << areaNum << endl; + } + + if (controller->getCurrentPosition() > 0) + cout << "Elevator on the left has arrived" << endl; + else + cout << "Elevator on the right has arrived" << endl; + + enum Direction dir = getDirection(vertices); + cout << "Direction: "; + switch (dir) { + case DIR_UP : cout << "Down" << endl; + break; + case DIR_DOWN : cout << "Up" << endl; + break; + case DIR_UNKNOWN : cout << "Not enough info!" << endl; + } + if (found++ >= 1) { + cout << "Stopping scan" << endl; + exit(0); + } + return true; + } + return false; +} + +bool CameraInterface::inRegion(vector *vertices) { + + double X[3] = { + (*vertices)[0].x, + (*vertices)[1].x, + (*vertices)[2].x + }; + double Y[3] = { + (*vertices)[0].y, + (*vertices)[1].y, + (*vertices)[2].y + }; + + for (int k = 0; k < 3; k++) { + if ((controller->getCurrentPosition() > 0 && !inRangeValues(X[k], 400, 600)) || + (controller->getCurrentPosition() < 0 && !inRangeValues(X[k], 0, 200)) || Y[k] < 300) { + if (verbose) + cout << "Rejecting triangle " << *vertices << endl; + return false; + } + } + return true; + +} + +bool inRangeValues(double a, double s, double e) { + return (s < a && a < e); +} diff --git a/bwi_elev_detection/src/SoundInterface.cpp b/bwi_elev_detection/src/SoundInterface.cpp new file mode 100644 index 0000000..203e660 --- /dev/null +++ b/bwi_elev_detection/src/SoundInterface.cpp @@ -0,0 +1,69 @@ +#include "lib/SoundInterface.h" + +double max(double, double); + +void SoundInterface::fft_callback(const elevators::fft_col::ConstPtr &msg) { + vector data = msg->data; + process_audio(data); + +} + +void SoundInterface::process_audio(vector data) { + if (peaks == NULL) + peaks = (int *)malloc(data.size() * sizeof(int)); + + int k = 0; + for (; k < data.size(); k++) { + peaks[k] = peak(k, &data); + // cout << peaks[k] << " "; + } + // cout << endl; +} + +int SoundInterface::peak(int index, vector *dataptr) { + vector data = *dataptr; + const int dsize = data.size() - 1; + if (index == 0) + return (int)max((data[1] - data[0]) / PEAK_MIN, 0); + if (index == dsize) + return (int)max((data[dsize] - data[dsize - 1]) / PEAK_MIN, 0); + + return (int)max(max((data[index] - data[index - 1]) / PEAK_MIN, + (data[index + 1] - data[index]) / PEAK_MIN), 0); +} + +void SoundInterface::load_ref_data() { + // vector *v = new vector (); + + // DIR *dir; + // dirent *ent; + // string line; + // if((dir = opendir("")) != NULL){ + // while((ent = readdir(dir)) != NULL){ + // ifstream file; + // file.open(ent->d_name); + + // vector processed; + + // getline(file, line); + + // if(line != "#elevators/data") + // continue; + + // while(getline (file, line)){ + + // } + + // file.close(); + // } + // } else { + // ROS_ERROR("Could not open directory!"); + // return; + // } + + // sound_db.push_back(v); +} + +double max(double a, double b) { + return a < b ? b : a; +} diff --git a/bwi_elev_detection/src/audio_sandbox/CMakeLists.txt b/bwi_elev_detection/src/audio_sandbox/CMakeLists.txt new file mode 100644 index 0000000..3757774 --- /dev/null +++ b/bwi_elev_detection/src/audio_sandbox/CMakeLists.txt @@ -0,0 +1,44 @@ +cmake_minimum_required(VERSION 2.8.7 FATAL_ERROR) +project(MY_GRAND_PROJECT) + +include(src/audio_sandbox/FindPackageHandleStandardArgs.cmake) + +include(src/audio_sandbox/FindALSA.cmake) + +#OpenCV +find_package( OpenCV 2.0 REQUIRED ) +include_directories(${OpenCV_INCLUDE_DIRS}) + + +#fftw - "libfftw3-3" and "libfftw3-dev" +find_library(FFTW_LIBRARY + NAMES fftw3 fftw) +set(FFTW_LIBRARIES "${FFTW_LIBRARY}") +if(UNIX AND NOT WIN32) + find_library(FFTW_libm_LIBRARY + NAMES m) + list(APPEND FFTW_LIBRARIES "${FFTW_libm_LIBRARY}") +endif() + + +#port audio - libportaudio19 +include(src/audio_sandbox/FindPortaudio.cmake) +include_directories(${PORTAUDIO_INCLUDE_DIRS}) +link_directories(${PORTAUDIO_LIBRARIES}) +add_definitions(${PORTAUDIO_DEFINITIONS}) + +#alsa -- should already exist on new ubuntu machines +include(src/audio_sandbox/FindALSA.cmake) +include_directories(${ALSA_INCLUDE_DIR}) +link_directories(${ALSA_LIBRARY}) + + +# add_executable(mik_test src/mik_test.cpp) +# target_link_libraries(mik_test ${PORTAUDIO_LIBRARIES}) + + +# add_executable(alsa_capture src/alsa_capture.cpp) +# target_link_libraries(alsa_capture ${ALSA_LIBRARY} ${FFTW_LIBRARIES} ${OpenCV_LIBS} ) + +# add_executable(alsa_sound_detector_demo src/alsa_sound_detector_demo.cpp) +# target_link_libraries(alsa_sound_detector_demo ${ALSA_LIBRARY} ${FFTW_LIBRARIES} ${OpenCV_LIBS} ) diff --git a/bwi_elev_detection/src/audio_sandbox/octave-core b/bwi_elev_detection/src/audio_sandbox/octave-core new file mode 100644 index 0000000..74671c0 Binary files /dev/null and b/bwi_elev_detection/src/audio_sandbox/octave-core differ diff --git a/bwi_elev_detection/src/audio_sandbox/src/BackgroundModelFFT.h b/bwi_elev_detection/src/audio_sandbox/src/BackgroundModelFFT.h new file mode 100644 index 0000000..2fdd0f0 --- /dev/null +++ b/bwi_elev_detection/src/audio_sandbox/src/BackgroundModelFFT.h @@ -0,0 +1,254 @@ +#ifndef _background_model_fft_ +#define _background_model_fft_ + +//some standard C++ +#include +#include +#include +#include +#include +#include + +//my math utils +#include "math_utils.h" +#include "CircularBuffer.h" + +#define FRAC_BINS_THRESH 0.3 +#define STDEV_DIFF_THRES 2.5 + +using std::vector; + +class BackgroundModelFFT { + public: + + BackgroundModelFFT(){ + last_answer = false; //assume we start in background + num_time_steps_in_state=1; + }; + + + //estimates the mean and standard deviation + void estimateModel(vector< vector > fft){ + + num_fft_bins = fft[0].size(); + + bin_means = vector(num_fft_bins); + bin_stdevs = vector(num_fft_bins); + + + for (int i = 0; i < num_fft_bins; i++){ + vector bin_values = vector(fft.size()); + + for (int j = 0; j < (int)fft.size(); j++){ + bin_values[j]=fft[j][i]; + } + + //compute mean and standard deviation of values for that bin + double mean_i = get_vector_mean(bin_values); + double stdev_i = get_vector_stdev(bin_values, mean_i); + + bin_means[i]=mean_i; + bin_stdevs[i]=stdev_i; + std::cout << "Bin #"< vol_values; + for (unsigned int i = 0; i < fft.size(); i++){ + vol_values.push_back(get_vector_sum(fft[i])); + } + + vol_mean = get_vector_mean(vol_values); + vol_stdev = get_vector_stdev(vol_values, vol_mean); + printf("Vol. mean and stdev: %f\t+/-\t%f\n",vol_mean,vol_stdev); + }; + + + + //classifies a single FFT sample as background (false) or foreground (true) + bool classify(vector fft_column){ + int frac_bins = (int)floor( FRAC_BINS_THRESH * (double) num_fft_bins ); //this many bins have to exceed threshold + classify_answer = label_fft_column_by_bin_deviations(fft_column,frac_bins,STDEV_DIFF_THRES); + + //state transition + if (classify_answer != last_answer){ + num_time_steps_in_state=1; + last_answer=classify_answer; + } else { + num_time_steps_in_state++; + } + + + return classify_answer; + } + + + + //labels the FFT column/msg as either background or foreground using only volume information + //if the total intensity of the column is greater by more than k standard deviations from the mean + //volume of the background, then it is labeled as non-background + bool label_fft_column_by_volume(vector fft_column, double k_stdev_coefficient){ + double column_intensity = get_vector_mean(fft_column); + if (column_intensity > vol_mean + k_stdev_coefficient*vol_stdev) + return true; + else return false; + }; + + + //if frac_bins*num_fft_bins bins deviate from the mean by more than dev_theta, then the column + //is labelled as non-background + bool label_fft_column_by_bin_deviations(vector fft_column, int frac_bins, double dev_theta){ + vector dev_column = compute_normalized_deviations(fft_column,bin_means,bin_stdevs); + count_dev = 0; + amount_dev=0.0; + pct_dev_bins = 0.0; + mean_dev_bins_amount_deviation=0.0; + + for (unsigned int i = 0; i < dev_column.size(); ++i){ + amount_dev += dev_column[i]; + + if (dev_column[i] > dev_theta){ + count_dev++; + mean_dev_bins_amount_deviation+=dev_column[i]; + } + } + amount_dev=amount_dev/(double)dev_column.size(); + + if (count_dev!=0){ + mean_dev_bins_amount_deviation=mean_dev_bins_amount_deviation/(double)count_dev; + pct_dev_bins = ((double)count_dev)/(double)dev_column.size(); + } + + //add to filters + amount_dev_f.push(amount_dev); + pct_dev_bins_f.push(pct_dev_bins); + mean_dev_bins_amount_deviation_f.push(mean_dev_bins_amount_deviation); + + if (count_dev > frac_bins) + return true; //not background + else return false; //background + }; + + //returns the number of frequency bins that deviate by more than the threshold in the last measurement + int get_num_deviating_bins(){ + return count_dev; + }; + + //returns the total amount of deviation from the last measurement, summer over all bins, in standard deviations + double get_deviation_amount(){ + //return amount_dev; + return amount_dev_f.mean(); + }; + + double get_pct_deviating_bins(){ + return pct_dev_bins_f.mean(); + }; + + double get_deviating_bins_deviation_amount(){ + //return mean_dev_bins_amount_deviation; + return mean_dev_bins_amount_deviation_f.mean(); + }; + + unsigned long get_num_time_steps_in_state(){ + return num_time_steps_in_state; + }; + + //for each frequency bin, look at how much the value in the input deviates from the expected mean + //measured in terms of standard deviations + vector compute_combined_binned_deviation(vector fft_column){ + vector dev_column = vector(num_fft_bins); + for (unsigned int j = 0; j < fft_column.size(); j++) + dev_column[j]=abs((fft_column[j]-bin_means[j])/bin_stdevs[j]); + return dev_column; + }; + + vector< vector > compute_deviation_from_background_matrix(vector< vector > fft_in){ + vector< vector > fft = fft_in; + vector< vector > dev; + + for (unsigned int i = 0; i < fft.size(); i++){ + vector dev_column = vector(num_fft_bins); + vector fft_column = fft[i]; + + for (unsigned int j = 0; j < dev_column.size(); j++) + dev_column[j]= abs((fft_column[j]-bin_means[j])/bin_stdevs[j]); + + dev.push_back(dev_column); + + } + + return dev; + } + + //returns a matrix of zeros and ones, where zero at (i,j) indicates that + //that at time j, the dB level for bin i was above the threshold standard deviations + vector< vector > zero_one_filter(vector< vector > fft_in, double threshold){ + vector< vector > dev_matrix = compute_deviation_from_background_matrix(fft_in); + + for (unsigned int i = 0; i < dev_matrix.size(); i++){ + for (unsigned int j = 0; j < dev_matrix[i].size(); j++){ + if (dev_matrix[i][j]>threshold) + dev_matrix[i][j]=1; + else dev_matrix[i][j]=0; + } + } + + return dev_matrix; + } + + //labels each time samples in the input FFT as either above (1) or below (0) the expected volume + vector label_by_volume(vector< vector > fft_in, double threshold){ + vector< vector > fft = fft_in; + + vector labels = vector(fft.size()); + + for (unsigned int i = 0; i < labels.size(); i++){ + double volume_i = get_vector_sum(fft[i]); + + double num_devs_diff = abs(volume_i-vol_mean)/vol_stdev; + + if (num_devs_diff > threshold){ + labels[i]=1; + } + else labels[i]=0; + } + return labels; + } + + + + private: + int num_fft_bins; + vector bin_means; + vector bin_stdevs; + + double threshold; + + double vol_mean, vol_stdev; + + //how many frequency bins diverge from background model + int count_dev; + + //by how much the values in all frequency bins diverge from background model (in terms of standard deviations) + double amount_dev; + CircularBuffer amount_dev_f; + + //by how much the values in frequency bins labeled as deviating diverge from background model (in terms of standard deviations) + double mean_dev_bins_amount_deviation; + CircularBuffer mean_dev_bins_amount_deviation_f; + + //pct (0-1) bins that deviate from background + double pct_dev_bins; + CircularBuffer pct_dev_bins_f; + + //stores the answer to the classify function + bool classify_answer; + bool last_answer; //keeps track of previous answer in order to detect transitions + + //stores for how many time steps the background model has been in the currnet state + unsigned long num_time_steps_in_state; + + +}; + +#endif diff --git a/bwi_elev_detection/src/audio_sandbox/src/CircularBuffer.h b/bwi_elev_detection/src/audio_sandbox/src/CircularBuffer.h new file mode 100644 index 0000000..6edf8fa --- /dev/null +++ b/bwi_elev_detection/src/audio_sandbox/src/CircularBuffer.h @@ -0,0 +1,72 @@ +#pragma once + +#include + + + + class CircularBuffer { + static const int LIMIT = 5; + double data[LIMIT]; + int next; + + public: + + CircularBuffer(){ + for(int x=0; x= LIMIT ) + next=0; + } + + double mean()const{ + double MEAN=0.0; + for( int x=1; x MAX ) + MAX = data[x]; + } + int belowMax = 0; + do{ + double max = 0; + for( int x=0; x max ) + max = data[x]; + } + MAX = max; + belowMax = 0; + for( int x=0; x LIMIT/2 ); + + + return MAX; + } + std::string toString(){ + std::stringstream ss; + ss << "["; + for( int x=0; x 1 && (!strcmp("true", argv[1]) || atoi(argv[1])); + + NodeHandle node; + + Publisher press = node.advertise("fft_topic", 100); + ROSInterface interface(&press, viz); + + Subscriber feed = node.subscribe("/audio", 100, &ROSInterface::audio_callback, &interface); + + spin(); + + return 0; +} + + +void ROSInterface::audio_callback( + const audio_common_msgs::AudioData::ConstPtr &msg) { + vector data = msg->data; + perform_fft(data); +} + +void ROSInterface::perform_fft(vector data) { + int k = 0; + for (; k < data.size(); k++) { + float sample = ((float) data[k]) / MAX_AMPLITUDE; + if (calculator->add_sample_RT(sample)) + publish_fft_col(); + } +} + +void ROSInterface::publish_fft_col() { + vector fft_col = calculator->get_last_fft_column(); + elevators::fft_col msg; + msg.data = fft_col; + + if (visualizer != NULL){ + visualizer->update(fft_col); + ROS_INFO("update viz\n"); + } + out->publish(msg); +} \ No newline at end of file diff --git a/bwi_elev_detection/src/audio_sandbox/src/ROSInterface.h b/bwi_elev_detection/src/audio_sandbox/src/ROSInterface.h new file mode 100644 index 0000000..ef52790 --- /dev/null +++ b/bwi_elev_detection/src/audio_sandbox/src/ROSInterface.h @@ -0,0 +1,44 @@ +#ifndef AUDIO_INTERFACE_H +#define AUDIO_INTERFACE_H + +#include +#include +#include +#include "fft_calculator_dq.h" +#include "fft_visualizer.h" +#include + +//Maximum amplitude of samples +#define MAX_AMPLITUDE 255 +//Graph window, in FFT columns (for visualization) +#define GRAPH_WINDOW 1024 + +using namespace std; +using namespace ros; + +class ROSInterface { + Publisher *out; + FFT_calculator *calculator; + FFT_visualizer *visualizer; +public: + ROSInterface(Publisher *p, bool viz) { + out = p; + calculator = new FFT_calculator(); + visualizer = viz ? + new FFT_visualizer(calculator->get_num_fft(), GRAPH_WINDOW) : + NULL; + calculator->init_RT_mode(); + } + ~ROSInterface() { + delete calculator; + if (visualizer != NULL) + delete visualizer; + } + void audio_callback( + const audio_common_msgs::AudioData::ConstPtr &msg); +private: + void perform_fft(vector data); + void publish_fft_col(); +}; + +#endif //AUDIO_INTERFACE_H \ No newline at end of file diff --git a/bwi_elev_detection/src/audio_sandbox/src/alsa_capture.cpp b/bwi_elev_detection/src/audio_sandbox/src/alsa_capture.cpp new file mode 100644 index 0000000..035faa3 --- /dev/null +++ b/bwi_elev_detection/src/audio_sandbox/src/alsa_capture.cpp @@ -0,0 +1,78 @@ +#include +#include +#include + +#include "alsa_utils.h" + +#include "fft_calculator_dq.h" +#include "fft_visualizer.h" + + +const static int SAMPLE_RATE = 44100; +const static int BUFF_SIZE = 128; + +#define NFFT 128 //number of frequency bins in FFT, must be a power of 2 +#define FFT_WIN 128 //length of window used to compute FFT (in terms of number of raw signal samples) +#define NOVERLAP 64 //by how much the windows should overlap - typically, half of FFT_WIN + +bool g_caught_sigint = false; + +snd_pcm_t *capture_handle; + + +void sig_handler(int sig) +{ + g_caught_sigint = true; + snd_pcm_close (capture_handle); + exit (0); +}; + +main (int argc, char *argv[]) + { + //open a handle to the microphone + capture_handle = open_mic_capture("hw:2,0",SAMPLE_RATE); + + //visualizer for FFT + FFT_visualizer *FFT_viz; + int viz_range = 640; //show last 640 samples of the FFT + bool visualize = true; + + /* INITIALIZE FFT */ + FFT_calculator* FFT = new FFT_calculator(NFFT,FFT_WIN,NOVERLAP); + FFT->init_RT_mode(); + + //read from the microphone + int i,j; + int err; + short buf[128]; + + for (i = 0; i < 1000; ++i) { + //while (true){ + + if ((err = snd_pcm_readi (capture_handle, buf, BUFF_SIZE)) != BUFF_SIZE) { + fprintf (stderr, "read from audio interface failed (%s)\n",snd_strerror (err)); + exit (1); + } + else { + for (j = 0; j < BUFF_SIZE; j++){ + //FFT->add_sample_RT((float)buf[j]); + + float sample_j = (float)buf[j]/(float)32768.0; + + //printf("%i\n",buf[j]); + if (FFT->add_sample_RT(sample_j)){ + //get last sample + vector next_fft = FFT->get_last_fft_column(); + + for (int k =0; k < next_fft.size(); k++){ + printf("%f,",next_fft.at(k)); + } + printf("\n"); + } + } + } + } + + snd_pcm_close (capture_handle); + exit (0); + } diff --git a/bwi_elev_detection/src/audio_sandbox/src/alsa_sound_detector_demo.cpp b/bwi_elev_detection/src/audio_sandbox/src/alsa_sound_detector_demo.cpp new file mode 100644 index 0000000..04dae61 --- /dev/null +++ b/bwi_elev_detection/src/audio_sandbox/src/alsa_sound_detector_demo.cpp @@ -0,0 +1,150 @@ +#include +#include +#include +#include + +#include "alsa_utils.h" + +#include "fft_calculator_dq.h" +#include "fft_visualizer.h" + +#include "BackgroundModelFFT.h" + + +const static int SAMPLE_RATE = 44100; +const static int BUFF_SIZE = 128; + +#define NFFT 128 //number of frequency bins in FFT, must be a power of 2 +#define FFT_WIN 128 //length of window used to compute FFT (in terms of number of raw signal samples) +#define NOVERLAP 64 //by how much the windows should overlap - typically, half of FFT_WIN + +bool g_caught_sigint = false; + +snd_pcm_t *capture_handle; + +int i,j; +int err; +short buf[128]; + +BackgroundModelFFT* BGM; + +void sig_handler(int sig) +{ + g_caught_sigint = true; + snd_pcm_close (capture_handle); + exit (0); +}; + +vector< vector > record_fft_columns(double duration, FFT_calculator* fft_calc){ + int num_samples_to_record = (int)((double)SAMPLE_RATE*duration); + int c = 0; + int num_fft_columns = 0; + int err; + + vector< vector > result; + + while (true){ + if ((err = snd_pcm_readi (capture_handle, buf, BUFF_SIZE)) != BUFF_SIZE) { + fprintf (stderr, "read from audio interface failed (%s)\n",snd_strerror (err)); + exit (1); + } + else { + for (j = 0; j < BUFF_SIZE; j++){ + float sample_j = (float)buf[j]/(float)32768.0; + + if (fft_calc->add_sample_RT(sample_j)){ + vector next_fft = fft_calc->get_last_fft_column(); + result.push_back(next_fft); + num_fft_columns++; + } + + c++; + + } + } + + if (c > num_samples_to_record) + break; + } + + printf("Listened for %i samples and %i fft columns...\n",c,num_fft_columns); + return result; +} + + +void learn_sound_model(vector< vector > fft_columns){ + //step 1: assume the first 0.5 seconds are background and use that to learn background model + int t = (int)((double)(SAMPLE_RATE / 2)/(double)NOVERLAP); + + vector< vector > background_fft_columns ; + + for (i = 0; i < t; i ++){ + background_fft_columns.push_back(fft_columns.at(i)); + } + + BGM = new BackgroundModelFFT(); + BGM->estimateModel(background_fft_columns); + +} + +main (int argc, char *argv[]) + { + //open a handle to the microphone + capture_handle = open_mic_capture("hw:2,0",SAMPLE_RATE); + + //visualizer for FFT + FFT_visualizer *FFT_viz; + int viz_range = 640; //show last 640 samples of the FFT + bool visualize = true; + + /* INITIALIZE FFT */ + FFT_calculator* FFT = new FFT_calculator(NFFT,FFT_WIN,NOVERLAP); + FFT->init_RT_mode(); + + //read half a second from the mic, this is needed upon start + vector< vector > temp = record_fft_columns(0.5,FFT); + printf("Read %i fft columns...\n",(int)temp.size()); + temp.clear(); + + + if (argc > 1){ + printf("Training mode. Enter the duration to listen for and press 'Enter'\n"); + double d; + std::cin >> d; + sleep(2); + temp = record_fft_columns(d,FFT); + + learn_sound_model(temp); + } + + + //read from the microphone + + + + /*for (i = 0; i < 1000; ++i) { + //while (true){ + + if ((err = snd_pcm_readi (capture_handle, buf, BUFF_SIZE)) != BUFF_SIZE) { + fprintf (stderr, "read from audio interface failed (%s)\n",snd_strerror (err)); + exit (1); + } + else { + for (j = 0; j < BUFF_SIZE; j++){ + //FFT->add_sample_RT((float)buf[j]); + + float sample_j = (float)buf[j]/(float)32768.0; + + //printf("%i\n",buf[j]); + if (FFT->add_sample_RT(sample_j)){ + //get last sample + vector next_fft = FFT->get_last_fft_column(); + + } + } + } + }*/ + + snd_pcm_close (capture_handle); + exit (0); + } diff --git a/bwi_elev_detection/src/audio_sandbox/src/alsa_utils.h b/bwi_elev_detection/src/audio_sandbox/src/alsa_utils.h new file mode 100644 index 0000000..a0296ee --- /dev/null +++ b/bwi_elev_detection/src/audio_sandbox/src/alsa_utils.h @@ -0,0 +1,77 @@ +#include +#include +#include +#include + + + + +snd_pcm_t* open_mic_capture(std::string dev_name, int sample_rate){ + + int i; + int err; + + snd_pcm_t *capture_handle; + snd_pcm_hw_params_t *hw_params; + + if ((err = snd_pcm_open (&capture_handle, dev_name.c_str(), SND_PCM_STREAM_CAPTURE, 0)) < 0) { + fprintf (stderr, "cannot open audio device %s (%s)\n", + dev_name.c_str(), + snd_strerror (err)); + exit (1); + } + + if ((err = snd_pcm_hw_params_malloc (&hw_params)) < 0) { + fprintf (stderr, "cannot allocate hardware parameter structure (%s)\n", + snd_strerror (err)); + exit (1); + } + + if ((err = snd_pcm_hw_params_any (capture_handle, hw_params)) < 0) { + fprintf (stderr, "cannot initialize hardware parameter structure (%s)\n", + snd_strerror (err)); + exit (1); + } + + if ((err = snd_pcm_hw_params_set_access (capture_handle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED)) < 0) { + fprintf (stderr, "cannot set access type (%s)\n", + snd_strerror (err)); + exit (1); + } + + if ((err = snd_pcm_hw_params_set_format (capture_handle, hw_params, SND_PCM_FORMAT_S16_LE)) < 0) { + fprintf (stderr, "cannot set sample format (%s)\n", + snd_strerror (err)); + exit (1); + } + + + if ((err = snd_pcm_hw_params_set_rate_resample (capture_handle, hw_params, sample_rate)) < 0) { + fprintf (stderr, "cannot set sample rate (%s)\n",snd_strerror (err)); + exit (1); + } + + + + if ((err = snd_pcm_hw_params_set_channels (capture_handle, hw_params, 1)) < 0) { + fprintf (stderr, "cannot set channel count (%s)\n", + snd_strerror (err)); + exit (1); + } + + if ((err = snd_pcm_hw_params (capture_handle, hw_params)) < 0) { + fprintf (stderr, "cannot set parameters (%s)\n", + snd_strerror (err)); + exit (1); + } + + snd_pcm_hw_params_free (hw_params); + + if ((err = snd_pcm_prepare (capture_handle)) < 0) { + fprintf (stderr, "cannot prepare audio interface for use (%s)\n", + snd_strerror (err)); + exit (1); + } + + return capture_handle; +} diff --git a/bwi_elev_detection/src/audio_sandbox/src/fft_calculator_dq.h b/bwi_elev_detection/src/audio_sandbox/src/fft_calculator_dq.h new file mode 100644 index 0000000..d81985e --- /dev/null +++ b/bwi_elev_detection/src/audio_sandbox/src/fft_calculator_dq.h @@ -0,0 +1,252 @@ +// +// Class that can compute fast fourier transform of a signal +// +// Author: Jivko Sinapov +// Date: 5/25/2011 +// Place: HRL Labs + + +#ifndef _fft_calc_dq_ +#define _fft_calc_dq_ + +#define PI 3.14159 + + + +//some standard C++ +#include +#include +#include +#include +#include +#include + + + + +//some audio and boost libs +#include + + +//for computing fft +#include //need to install the following: "libfftw3-3" and "libfftw3-dev" + +//how many FFT column vectors to keep in memory, used in real time mode +#define MAX_NUM_FFT_COLUMNS 640 + +using std::deque; +using std::vector; +using std::string; + +class FFT_calculator { +public: + FFT_calculator() { + set_params(256, 256, 128); //default params + }; + + FFT_calculator(int n, int w, int o) { + set_params(n, w, o); + }; + + void set_params(int n, int w, int o) { + NFFT = n; + FFT_WIN = w; + NOVERLAP = o; + + win = (double *)malloc(FFT_WIN * sizeof(double)); + + //init variables for fftw + fft_in = (double *)fftw_malloc(sizeof(double) * NFFT); + fft_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * (NFFT / 2 + 1)); + plan = fftw_plan_dft_r2c_1d(NFFT, fft_in, fft_out, FFTW_ESTIMATE); + + + }; + + int get_num_fft() { + return NFFT; + }; + + int get_num_fft_samples() { + return (int)fft_data.size(); + } + + vector get_last_fft_column() { + return fft_data[(int)fft_data.size() - 1]; + }; + + //initializes real time mode + void init_RT_mode() { + win = (double *)malloc(FFT_WIN * sizeof(double)); + sample_counter = 0; + } + + //process next audio sample - returns true if new FFT column was generated + bool add_sample_RT(float sample_i) { + + bool new_column = false; + + //check if it is time to compute the fft of the current window + if (sample_counter == FFT_WIN) { + /* process the next window of length FFT_WIN */ + process_window(win, FFT_WIN, fft_in, fft_out, &plan); + + /* move the last noverlap samples in the window to its front */ + memmove(win, &win[FFT_WIN - NOVERLAP], sizeof(double)*NOVERLAP); + sample_counter = NOVERLAP; + + //check if the vector of columns is at the max limit + if (fft_data.size() > MAX_NUM_FFT_COLUMNS) + fft_data.pop_front(); + + new_column = true; + } + + //add sample to the next position in current window + win[sample_counter++] = (double)sample_i; + + return new_column; + } + + + /* computes the fft of the input vector */ + void compute_fft( vector clip_samples) { + int winsamps = 0; + + fft_data = deque< vector >(); + + for (int i = 0; i < (int)clip_samples.size(); i++) { + if (winsamps == FFT_WIN) { + /* process the next window of length FFT_WIN */ + process_window(win, winsamps, fft_in, fft_out, &plan); + + /* move the last noverlap samples in the window to its front */ + memmove(win, &win[FFT_WIN - NOVERLAP], sizeof(double)*NOVERLAP); + winsamps = NOVERLAP; + + } + win[winsamps++] = (double)clip_samples[i]; + } + }; + + /* computes the FFT of a single window of length FFT_WIN; to be used in case FFT is needed in real time */ + vector compute_single_fft( double *single_window) { + + /* process the data */ + return compute_fft_column(win, FFT_WIN, fft_in, fft_out, &plan); + + /* return the single column vector */ + //return fft_data[(int)fft_data.size()-1]; + } + + + /* returns the FFT matrix; first dimension is time, second is frequency bin */ + deque< vector > getFFT() { + return fft_data; + }; + + void save_fft(char *filename) { + FILE *fp = fopen(filename, "w"); + fprintf(fp, "%d\t%d\n", NFFT, (int)fft_data.size()); + + for (unsigned int i = 0; i < fft_data.size(); i++) { + vector next_fft = fft_data[i]; + for (int j = 0; j < NFFT / 2 + 1; j++) + //fprintf(fp,"%.4f%s",next_fft[j],j == (NFFT/2+1-1) ? "\n":"\t"); + + fprintf(fp, "%.4f%s", fft_data[i][j], j == (NFFT / 2 + 1 - 1) ? "\n" : "\t"); + + + } + fclose(fp); + }; + + + + void save_as_matrix(char *filename) { + FILE *fp = fopen(filename, "w"); + for (unsigned int i = 0; i < fft_data.size(); i++) { + vector next_fft = fft_data[i]; + for (int j = 0; j < NFFT / 2 + 1; j++) + //fprintf(fp,"%.4f%s",next_fft[j],j == (NFFT/2+1-1) ? "\n":"\t"); + + fprintf(fp, "%.4f%s", fft_data[i][j], j == (NFFT / 2 + 1 - 1) ? "\n" : "\t"); + + + } + fclose(fp); + }; + + + +private: + + //parameters for FFT + int NFFT; // the number of frequency bins in the FFT will be (NFFT/2)+1 + int FFT_WIN; // the length of the fft window + int NOVERLAP; // the overlap - by default, it will be half the length of the fft window + + //variables used by fftw + double *win; + double *fft_in; + fftw_complex *fft_out; + fftw_plan plan; + + //where the fft is stored + deque< vector > fft_data; + + + int sample_counter;//used in real time mode + + + /* Computes the Hamming window function for a window of size N */ + static inline double hamming(unsigned int n, const unsigned int N) { + return 0.54 - 0.46 * cos(2 * M_PI * n / (N - 1)); + } + + static inline double tophat(unsigned int n, const unsigned int N) { + return 0.215578 - 0.41663 * cos(2 * M_PI * n / N) + 0.277263 * cos(4 * M_PI * n / N) - 0.08358 * cos(6 * M_PI * n / N) + 0.006947 * cos(8 * M_PI * n / N); + } + + /* computes the FFT spectrum of a single window of size FFT_WIN */ + vector compute_fft_column(double *win, int winsamps, double *fft_in, fftw_complex *fft_out, fftw_plan *plan) { + vector column = vector(NFFT / 2 + 1); + + int i; + for (i = 0; i < winsamps; ++i) { + fft_in[i % NFFT] = tophat(i, FFT_WIN) * win[i]; + if (!((i + 1) % NFFT)) { + fftw_execute(*plan); + + + for (int j = 0; j < NFFT / 2 + 1; ++j) { + double v = (sqrt( ((double)fft_out[j][0]) * (double)fft_out[j][0]) + ((double)fft_out[j][1] * ((double)fft_out[j][1]))); + // if (j<7) + // printf("%f ",v); + column[j] = v; + + } + } + } + return column; + } + + + /* computes the FFT spectrum of a single window of size FFT_WIN */ + void process_window(double *win, int winsamps, double *fft_in, fftw_complex *fft_out, fftw_plan *plan) { + int i; + for (i = 0; i < winsamps; ++i) { + fft_in[i % NFFT] = tophat(i, FFT_WIN) * win[i]; + if (!((i + 1) % NFFT)) { + fftw_execute(*plan); + + vector column; + for (int j = 0; j < NFFT / 2 + 1; ++j) + column.push_back(sqrt( ((double)fft_out[j][0]) * (double)fft_out[j][0]) + ((double)fft_out[j][1] * ((double)fft_out[j][1]))); + fft_data.push_back(column); + } + } + } +}; + +#endif diff --git a/bwi_elev_detection/src/audio_sandbox/src/fft_visualizer.h b/bwi_elev_detection/src/audio_sandbox/src/fft_visualizer.h new file mode 100644 index 0000000..a108e3a --- /dev/null +++ b/bwi_elev_detection/src/audio_sandbox/src/fft_visualizer.h @@ -0,0 +1,130 @@ +/* +* Class for visualizing a Fast-Fourrir Transform of a signal using OpenCV +* +* Author: Jivko Sinapov +* +* +*/ + + + +#ifndef _fft_viz_ +#define _fft_viz_ + +//some standard C++ +#include +#include +#include +#include +#include +#include +#include + +//open cv +#include +#include + +#define MAX_BIN_VALUE 0.5 + +const double weights[6] = {0.0, 0.12, 0.4, 0.6, 0.88, 1.0}; +const double colors[6][3] = { {0.56, 0, 0}, {1.0, 0.0, 0.0}, {1.0, 1.0, 0.0}, {0.0, 1.0, 1.0}, {0.0, 0.0, 1.0}, {0.21, 0.39, 0.0}}; +double w_temp[6]; +double sum; +const int num_colors = 6; + + + +void trackbar_callback( int i, void *t ); + + + + +class FFT_visualizer { + friend void trackbar_callback(int i, void *); +public: + FFT_visualizer(int n_fft, int n_time) { + N_FFT = n_fft / 2 + 1; + N_TIME = n_time; + t_counter = 0; + max_bin_value = 0.25 ; //to do: should be 0.5 for 64 N_FFT, 1.0 for 32 N_FFT, etc... + + //horizontal axis of image is time, vertical is frequency bins + fft_img = cvCreateImage(cvSize(N_TIME, N_FFT), IPL_DEPTH_32F, 3); + + for (int i = 0; i < fft_img->width; i ++) { + for (int j = 0; j < fft_img->height; j++) { + + s.val[0] = 0; + s.val[1] = 0; + s.val[2] = 255; + cvSet2D(fft_img, j, i, s); + + } + } + cv::waitKey(10); + cv::namedWindow("fft"); + cv::imshow("fft", cv::Mat(fft_img)); + + bar_value = 20; + cv::createTrackbar("threshold", "fft", &bar_value, 100, trackbar_callback, this); + + cv::waitKey(10); + }; + + + void show_fft() { + cv::imshow("fft", cv::Mat(fft_img)); + cv::waitKey(10); + }; + + + + + + void update(std::vector next_column, double blue_max = 1.0, double green_max = 1.0, double red_max = 1.0) { + for (int j = 0; j < fft_img->height; j++) { + double v = (double)next_column[j]; + if (v > max_bin_value) { + v = max_bin_value; + } + + s.val[0] = (double)(blue_max * ((v - 0.0) / (max_bin_value))); + s.val[1] = (double)(green_max * ((v - 0.0) / (max_bin_value))); + s.val[2] = (double)(red_max * ((v - 0.0) / (max_bin_value))); + cvSet2D(fft_img, fft_img->height - 1 - j, t_counter, s); + + } + + //ROS_INFO("max = %f",max_bin_value); + + t_counter++; + if (t_counter >= N_TIME) + t_counter = 0; + + cv::waitKey(10); + + + }; + + +private: + int N_FFT, N_TIME; + IplImage *fft_img; + + int t_counter; + CvScalar s; + + double max_bin_value; + + int bar_value; + + + +}; + +void trackbar_callback( int i, void *t ) { + ((FFT_visualizer *)t)->max_bin_value = ((double)i / (50)) * 0.25; +}; + + +#endif diff --git a/bwi_elev_detection/src/audio_sandbox/src/import_sound.cpp b/bwi_elev_detection/src/audio_sandbox/src/import_sound.cpp new file mode 100644 index 0000000..b69f561 --- /dev/null +++ b/bwi_elev_detection/src/audio_sandbox/src/import_sound.cpp @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include "fft_calculator_dq.h" +#include "fft_visualizer.h" + +#define NOISE 0.01 + +int main(int argc, char **argv) { + if (argc < 3) { + cout << "Usage: import_sound " << endl; + return; + } + + ifstream infile; + ofstream outfile; + string line; + FFT_calculator calc; + fft_visualizer viz; + + infile.open(argv[1]); + if (!infile.is_open()) { + cout << "Unable to open infile!" << endl; + return EXIT_FAILURE; + } + + outfile.open(argv[2]) + if (!outfile.is_open()) { + cout << "Unable to open outfile!" << endl; + return EXIT_FAILURE; + } + + vector samples; + double least = 1.1; + + while (getline(file, line)) { + double ldata = atof(line); + samples.push_back((float)ldata); + least = ldata < least ? ldata : least; + } + + //TODO: normalize all values, and save FFT + + infile.close(); + + double avg = 0; + int k = 0; + for (; k < samples.size(); k++) { + if (samples[k] <= least + NOISE) + samples[k] = 0; + else + avg += samples[k]; + } + + for (k = 0; k < samples.size(); k++){ + samples[k] += -avg + abs(least); + if(samples[k] > 1) + samples[k] = 1; + } + calc.compute_fft(samples); + + deque> out_data = calc.get_data(); + for(k = 0; k < out_data.size(); k++){ + vector out_line = out_data[k]; + int j = 0; + for(; j < out_line.size(); j++) + outfile << j << " "; + outfile << endl; + } + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/bwi_elev_detection/src/audio_sandbox/src/math_utils.h b/bwi_elev_detection/src/audio_sandbox/src/math_utils.h new file mode 100644 index 0000000..d46a4f1 --- /dev/null +++ b/bwi_elev_detection/src/audio_sandbox/src/math_utils.h @@ -0,0 +1,43 @@ +#ifndef _math_utils_ +#define _math_utils_ + +vector compute_normalized_deviations(vector values, vector means, vector stdevs){ + vector devs = vector(values.size()); + for (unsigned int j = 0; j < values.size(); j++) + devs[j]=abs((values[j]-means[j])/stdevs[j]); + return devs; +}; + +double get_vector_sum(vector values){ + double sum=0; + for (unsigned int i = 0; i < values.size(); i++) + sum+= values[i]; + + + return sum; +}; + +/* returns the mean of the input vector of values */ +double get_vector_mean(vector values){ + double mean=0; + for (unsigned int i = 0; i < values.size(); i++) + mean+= values[i]; + + return mean/values.size(); +}; + +/* returns the standard deviation given a vector of values, and their mean */ +double get_vector_stdev(vector values, double mean){ + + std::vector squared_diffs = std::vector((int)values.size()); + + for (unsigned int i = 0; i < values.size(); i++){ + squared_diffs[i]= (values[i]-mean)*(values[i]-mean) ; + } + + double expected_sq_diff = get_vector_mean(squared_diffs); + return sqrt(expected_sq_diff); +}; + + +#endif diff --git a/bwi_elev_detection/src/main.cpp b/bwi_elev_detection/src/main.cpp new file mode 100644 index 0000000..b6e209e --- /dev/null +++ b/bwi_elev_detection/src/main.cpp @@ -0,0 +1,132 @@ +#include +#include +#include +#include +#include +#include "lib/CameraInterface.h" +#include "lib/SoundInterface.h" +#include "lib/CameraController.h" +#include + +using namespace std; +using namespace ros; + +static int current_floor = 3; + +/** + * Configuration struct. Currently, the following have no effect: + * + * area + * flip + * strictness + */ +struct config { + //Ideal HSV values. All values must be out of 255. + int ideal[3]; + //Range of HSV values. All values must be out of 255. + int range[3]; + //Min and max area thresholds for triangles found. + int area[2]; + //Topic to subscribe to. + string topic; + //Whether or not to flip the directions each elevator is going. + bool flip; + //How strict to be with triangle matching. 0 is the most strict. + int strictness; +}; + +//Parse a config file. +void parse(ifstream *, struct config *); + +int main(int argc, char **argv) { + if (argc < 2) { + cout << "Usage: " << endl; + return EXIT_FAILURE; + } + + ifstream config_file (argv[1]); + if (!config_file.is_open()) { + cout << "Config file not found!" << endl; + return EXIT_FAILURE; + } + + struct config conf; + parse(&config_file, &conf); + + init(argc, argv, "elevator_listener"); + + NodeHandle node; + + Scalar ideal = Scalar(conf.ideal[0], conf.ideal[1], conf.ideal[2]); + Scalar range = Scalar(conf.range[0], conf.range[1], conf.range[2]); + + //Servo and controller initialization. + Publisher cpublisherh = node.advertise("/servo0_cmd", 1000); + + CameraController camera(&cpublisherh); + Subscriber csubscriberh = node.subscribe("/servo0_status", 100, &CameraController::status_callback, &camera); + + //Set area threshold. Parsing problems can't be solved at the moment. + conf.area[0] = 0; + conf.area[1] = 1000; + + //Initialize sound interface. + SoundInterface sinter; + + //Initialize camera interface. + CameraInterface cinter(ideal, range, &camera, conf.area, + conf.flip, conf.strictness, HSV_STANDARD); + + //Print out minimum and maximum Scalar for thresholding. + cout << cinter.getMin() << endl; + cout << cinter.getMax() << endl; + + //Subscribe to topics. + Subscriber listener = node.subscribe("/fft_topic", 100, &SoundInterface::fft_callback, &sinter); + Subscriber image = node.subscribe(conf.topic, 100, &CameraInterface::image_callback, &cinter); + + //Callbacks. + spin(); + + return 0; +} + +void parse(ifstream *file, struct config *configuration) { + string line; + bool done = false; + while (getline(*file, line, '=')) { + if (line == "image_topic") + getline(*file, configuration->topic); + else if (line == "ideal") { + for (int k = 0; k < 2; k++) { + getline(*file, line, ' '); + configuration->ideal[k] = atoi(line.c_str()); + } + getline(*file, line); + configuration->ideal[2] = atoi(line.c_str()); + } else if (line == "range") { + for (int k = 0; k < 2; k++) { + getline(*file, line, ' '); + configuration->range[k] = atoi(line.c_str()); + } + getline(*file, line); + configuration->range[2] = atoi(line.c_str()); + } else if (line == "area_threshold") { + getline(*file, line, ' '); + configuration->area[0] = atoi(line.c_str()); + getline(*file, line); + configuration->area[1] = atoi(line.c_str()); + if (configuration->area[1] < configuration->area[0]) { + int t = configuration->area[0]; + configuration->area[0] = configuration->area[1]; + configuration->area[1] = t; + } + } else if (line == "strictness") { + getline(*file, line); + configuration->strictness = atoi(line.c_str()); + } else if (line == "flip_dir") { + getline(*file, line); + configuration->flip = (line == "true"); + } + } +} diff --git a/bwi_elev_detection/src/required_libraries.txt b/bwi_elev_detection/src/required_libraries.txt new file mode 100644 index 0000000..1b5ea1a --- /dev/null +++ b/bwi_elev_detection/src/required_libraries.txt @@ -0,0 +1,2 @@ +libportaudio-dev +fftw-dev diff --git a/bwi_elev_detection/utils/FileIndexer.java b/bwi_elev_detection/utils/FileIndexer.java new file mode 100644 index 0000000..81f19b4 --- /dev/null +++ b/bwi_elev_detection/utils/FileIndexer.java @@ -0,0 +1,47 @@ +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; +import java.util.Collections; +import java.util.HashSet; + +/** + * Created by brian on 4/17/14. + */ +public class FileIndexer { + static PrintStream out; + static HashSet acceptedExtensions; + + public static void main(String... args) throws IOException { + out = new PrintStream(args.length >= 2 ? args[1] : "FileIndex.txt"); + File file = new File(args.length >= 1 ? args[0] : "."); + + String[] ext = {"jpg", "png", "jpeg", "bmp"}; + + acceptedExtensions = new HashSet(); + Collections.addAll(acceptedExtensions, ext); + + explore(file); + System.out.println("Indexing finished"); + } + + public static void explore(File root) throws IOException { + if (root == null || root.listFiles() == null) + return; + + System.out.println("Indexing directory " + root.getPath()); + for (File f : root.listFiles()) { + if (f == null) + continue; + if (f.isDirectory()) + explore(f); + else if (acceptedExtensions.contains(getExtension(f.getName()))){ + // System.out.println(f.getPath()); + out.println(f.getPath().substring(f.getPath().indexOf("/") + 1)); + } + } + } + + private static String getExtension(String name) { + return name.substring(name.lastIndexOf(".") + 1).toLowerCase(); + } +} \ No newline at end of file