diff --git a/.coveralls.yml b/.coveralls.yml deleted file mode 100644 index 88df9c8..0000000 --- a/.coveralls.yml +++ /dev/null @@ -1,2 +0,0 @@ -service_name: travis-pro -repo_token: TaRO4XYGPDOpkUVP5HZbatcmk9ZFhOErb \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..325a6d9 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,161 @@ +name: CI + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the main branch + push: + branches: [ main ,dev] + pull_request: + branches: [ dev,main ] +jobs: + docker-job: + strategy: + matrix: + os: [ubuntu-20.04,ubuntu-18.04] + version: [3.6] + runs-on: ${{ matrix.os }} + + # service containers to run with `postgres-job` + services: + # label used to access the service container + postgres: + # Docker Hub image + image: postgres:latest + # service environment variables + # `POSTGRES_HOST` is `postgres` + env: + # optional (defaults to `postgres`) + POSTGRES_DB: postgres_db + # required + POSTGRES_PASSWORD: postgres + # optional (defaults to `5432`) + POSTGRES_PORT: 5432 + # optional (defaults to `postgres`) + POSTGRES_USER: postgres_db + ports: + # maps tcp port 5432 on service container to the host + - 5432:5432 + # set health checks to wait until postgres has started + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + + + #Installing PostgressSQL client + + - name: Install PostgreSQL client + run: | + sudo apt-get update + sudo apt-get install --yes postgresql-client + echo postgree installed + # Setting Up Postgres Database + + - name: Setup Database database + run: | + echo creating database + psql -h localhost -U postgres_db -c "CREATE DATABASE pmr" + echo database created + psql -h localhost -U postgres_db -c "CREATE USER admin WITH PASSWORD 'admin'" + psql -h localhost -U postgres_db -c "ALTER ROLE admin SET client_encoding TO 'utf8'" + psql -h localhost -U postgres_db -c "ALTER ROLE admin SET default_transaction_isolation TO 'read committed'" + psql -h localhost -U postgres_db -c "ALTER ROLE admin SET timezone TO 'UTC'" + psql -h localhost -U postgres_db -c "ALTER USER admin CREATEDB" + psql -h localhost -U postgres_db -c "ALTER DATABASE pmr OWNER TO admin" + echo "deb [arch=amd64] http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | sudo tee /etc/apt/sources.list.d/tensorflow-serving.list && \ + curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | sudo apt-key add - + sudo apt-get update + sudo apt-get install tensorflow-model-server + echo postgres sttted up + + env: + # Setting up POSRGRES Enviorment + POSTGRES_HOST: localhost + POSTGRES_USER: postgres_db + POSTGRES_PASSWORD: postgres + + POSTGRES_PORT: 5432 + PGPASSWORD: postgres + + + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3.1.2 + with: + python-version: 3.6 + + # Dependency Installation + + - name: Install Dependencies + run: | + echo python version = + python --version + sudo apt-get install python3-dev + pip3 install --upgrade pip + pip3 install -r requirements.txt + pip3 install tensorflow-serving-api==2.5.2 + pip3 install pytest + echo installed dependencies + flake8 --ignore=E501,F821,E265,E741 . + # Downloading Test Files + + - name: Download Files + run: | + mkdir tests/testdata + cd tests/testdata + wget https://www.dropbox.com/s/1bnxg32zvgjv0pl/compareImage.jpeg + wget https://www.dropbox.com/s/1bnxg32zvgjv0pl/compareImage.jpeg + wget https://www.dropbox.com/s/x3qpga9gc4ifamn/t1.png + wget https://www.dropbox.com/s/l5t09lp8u4ok593/t2.jpeg + wget https://www.dropbox.com/s/hzlpo74tk0xwzzh/t3.jpeg + wget https://www.dropbox.com/s/lni50cgunua5mij/test1.mp4 + wget https://www.dropbox.com/s/wm3llx0ydbnq8mn/test2.mp4 + wget https://www.dropbox.com/s/ato4fie6k3lmctu/test3.mp4 + wget https://www.dropbox.com/s/ifd7254x29oxjze/test4.mp4 + wget https://www.dropbox.com/s/iwtgwz24eipd629/obj1.mp4 + wget https://www.dropbox.com/s/ull2tqlou1p8l16/obj2.mp4 + wget https://www.dropbox.com/s/3w5ghr5jj6opr58/scene1.mp4 + wget https://www.dropbox.com/s/ij5hj4hznczvfcw/text.mp4 + cd ../.. + cd media + mkdir object + mkdir nsfw + cd .. + cd corelib/model + mkdir facenet + cd facenet + wget https://www.dropbox.com/s/jm8grrifh5yk7is/2017.zip?dl=1 -O 2017.zip + unzip 2017.zip + rm 2017.zip + cd .. + mkdir tfs + cd tfs + wget --load-cookies /tmp/cookies.txt "https://drive.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://drive.google.com/uc?export=download&id=12yE9v8dWeVidqxseUXidaDoS_VZpVOp1' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=12yE9v8dWeVidqxseUXidaDoS_VZpVOp1" -O module.zip && rm -rf /tmp/cookies.txt + unzip module.zip + rm module.zip + cd ../../.. + cd data + mkdir text_reco + cd text_reco + wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1zmxO5NoEqTdYRGHXalaK7bn2m27egQMN' -O char_dict_en.json + wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1XGO2ycQcBYtwxT_KanapuoR6LeGR4s_E' -O ord_map_en.json + cd ../.. + sudo apt update + sudo apt install ffmpeg + export DJANGO_SETTINGS_MODULE="Rekognition.settings" + - run: | + export DJANGO_SETTINGS_MODULE="Rekognition.settings" + set -x + docker version + docker run --rm hello-world + docker pull tensorflow/serving:nightly-devel + echo $(pwd) + docker run -d -t -p 8500:8500 -p 8501:8501 -v /home/runner/work/Rekognition/Rekognition/corelib/model/tfs/model_volume:/home/ tensorflow/serving --model_config_file=/home/configs/models.conf + echo aagye + python manage.py flush --no-input + python manage.py migrate + python manage.py runserver 8000 & + sleep 10 + python -m pytest --import-mode=append /home/runner/work/Rekognition/Rekognition/tests/ --no-header -vv + echo done all \ No newline at end of file diff --git a/.gitignore b/.gitignore index 5edefce..a247eb3 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ __pycache__ env/ db.sqlite3 *.log +myenv/ +myen/ \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 1bdac9f..0000000 --- a/.travis.yml +++ /dev/null @@ -1,66 +0,0 @@ -dist: xenial # required for Python >= 3.7 -language: python -sudo: required - -services: - - postgresql - -addons: - postgresql: "10" - apt: - packages: - - postgresql-10 - - postgresql-client-10 -env: - global: - - PGPORT=5432 - -cache: - directories: - - $HOME/.cache/pip - -before_cache: - - rm -f $HOME/.cache/pip/log/debug.log - -python: - - "3.6" - -before_install: - - pip3 install --upgrade pip - -install: - - pip3 install -r requirements.txt - -before_script: - - psql -U postgres -c "CREATE DATABASE pmr" - - psql -U postgres -c "CREATE USER admin WITH PASSWORD 'admin'" - - psql -U postgres -c "ALTER ROLE admin SET client_encoding TO 'utf8'" - - psql -U postgres -c "ALTER ROLE admin SET default_transaction_isolation TO 'read committed'" - - psql -U postgres -c "ALTER ROLE admin SET timezone TO 'UTC'" - - psql -U postgres -c "ALTER USER admin CREATEDB" - - psql -U postgres -c "ALTER DATABASE pmr OWNER TO admin" - - mkdir tests/testdata - - cd tests/testdata - - wget https://www.dropbox.com/s/1bnxg32zvgjv0pl/compareImage.jpeg - - wget https://www.dropbox.com/s/1bnxg32zvgjv0pl/compareImage.jpeg - - wget https://www.dropbox.com/s/x3qpga9gc4ifamn/t1.png - - wget https://www.dropbox.com/s/l5t09lp8u4ok593/t2.jpeg - - wget https://www.dropbox.com/s/hzlpo74tk0xwzzh/t3.jpeg - - wget https://www.dropbox.com/s/lni50cgunua5mij/test1.mp4 - - wget https://www.dropbox.com/s/wm3llx0ydbnq8mn/test2.mp4 - - wget https://www.dropbox.com/s/ato4fie6k3lmctu/test3.mp4 - - wget https://www.dropbox.com/s/ifd7254x29oxjze/test4.mp4 - - wget https://www.dropbox.com/s/iwtgwz24eipd629/obj1.mp4 - - wget https://www.dropbox.com/s/ull2tqlou1p8l16/obj2.mp4 - - wget https://www.dropbox.com/s/3w5ghr5jj6opr58/scene1.mp4 - - wget https://www.dropbox.com/s/ij5hj4hznczvfcw/text.mp4 - - cd ../.. - - export DJANGO_SETTINGS_MODULE="Rekognition.settings" - -script: - - flake8 . - - coverage run -m tests.test_views - - coverage run -m tests.test_models - -after_success: - - coveralls diff --git a/README.md b/README.md index 3a8d34c..cf90096 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ Google Summer Of Code Project under CCExtractor Development [![GPLv3 license](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://github.com/ccextractor/Rekognition/blob/master/LICENSE) --- +![bannerimage](https://github.com/CCExtractor/Rekognition/banner.png) This project aims at providing a free alternative to Amazon Rekognition services. diff --git a/contributing.md b/contributing.md index b623834..faaca80 100644 --- a/contributing.md +++ b/contributing.md @@ -1,3 +1,33 @@ +Table of contents: +- [Contributing to Rekognition](#Contributing-to-Rekognition) +- [Making a PR](#making-a-pr) +- [Asking for help](#asking-for-help) +- [Development environment setup](#Development-environment-setup) + +As beginners, navigating the codebase and finding your way out of the documentation can become difficult. This page will help you understand everything about contributing to howdoi and the best practices in open source as well. + +## Contributing to Rekognition +- Follow the page Setting up the development environment for setting up the development environment for Rekognition. +- Finding your first issue +- Go to issues in the Rekognition repo. +- Find the issues which you might be interested to work on. Or, you can also come up with your own ideas of improving the code. +- After finding the issue you are interested in : If the issue is an existing one, comment on the issue and ask for it to be assigned to you. Or, if the issue is unlisted and new , create a new issue and fill every information needed in the issues template provided by howdoi and ask for it to be assigned to you. +- After receiving confirmation, start working on the issue and whenever and wherever help is needed, comment on the issue itself describing your query in detail. +- A good guide on how to collaborate efficiently can be found here. + +## Making a PR +- After you have worked on the issue and fixed it, we need to merge it from your forked repository into the Rekognition repository by making a PR. +- Each PR made should pass all the tests. We have new Github Actions in place for CI/CD. +- Once your commit passes all the tests, make a PR and wait for it to be reviewed and merged. + + +## Asking for help +- At times, help is needed while solving the issue. We recommend the following step for asking for help when you get stuck: +- Read from our documentation to see if your question has already been answered. +- Comment on the issue you are working on describing in detail what problems you are facing. +- Make sure to write your query in detail and if it is a bug, include steps to reproduce it. +- If you are not working on any issue and have a question to be answered, open a new issue on Github and wait for a reply. + # Development Environment setup ## Install python 3.6 ``` @@ -6,14 +36,29 @@ sudo apt-get update sudo apt-get install python3.6 ``` +## For MacOS: +``` +brew install python +``` + ## Clone the repository and setup venv ``` -git clone https://github.com/pymit/Rekognition +git clone https://github.com/CCExtractor/Rekognition cd Rekognition -./setup.sh +../setup.sh source myenv/bin/activate ``` + +### For MacOS: +git clone https://github.com/CCExtractor/Rekognition +./setup.sh +cd Rekognition +python3 -m virtualenv myenv +source $PWD/myenv/bin/activate +pip install -r ../requirements.txt + +NOTE: Sometimes an error "permission denied" may be shown when you try to run `setup.sh`. For this, try: `chmod 755 setup.sh` in root directory to change permissions. *** ## Postgres setup @@ -30,16 +75,20 @@ source myenv/bin/activate ALTER USER admin CREATEDB; ALTER DATABASE pmr OWNER TO admin; *** +## Postgres setup for MacOS -## ReactJS setup for frontend - - git clone https://github.com/pymit/RekoUI - cd RekoUI - sudo apt install npm - sudo npm install -g npm@latest - npm install - npm start -*** + brew update + brew install postgresql + brew services start postgresql + psql postgres + CREATE DATABASE pmr; + CREATE USER admin WITH PASSWORD 'admin'; + ALTER ROLE admin SET client_encoding TO 'utf8'; + ALTER ROLE admin SET default_transaction_isolation TO 'read committed'; + ALTER ROLE admin SET timezone TO 'UTC'; + ALTER USER admin CREATEDB; + ALTER DATABASE pmr OWNER TO admin; +*** ## Downloading the models ##### current directory Rekognition @@ -107,3 +156,7 @@ python manage.py runserver 8000 Django app can be accessed at http://localhost:8000 ReactJS app can be accessed at http://localhost:3000 + + + + diff --git a/coreapi/views.py b/coreapi/views.py index 73b9118..1c40909 100644 --- a/coreapi/views.py +++ b/coreapi/views.py @@ -1,6 +1,9 @@ from django.shortcuts import render from rest_framework import views, status from rest_framework.response import Response +from corelib.facenet.utils import handle_uploaded_file +from Rekognition.settings import MEDIA_ROOT +import os from corelib.facenet.utils import (getnewuniquefilename) from corelib.main_api import (facerecogniseinimage, facerecogniseinvideo, createembedding, process_streaming_video, @@ -13,7 +16,6 @@ from logger.logging import RekogntionLogger from rest_framework.views import APIView from rest_framework.parsers import MultiPartParser, FormParser -import asyncio from threading import Thread import random import tracemalloc @@ -25,7 +27,6 @@ class SceneText(views.APIView): """ To localize and recognise text in an image - Workflow * if POST method request is made, then initially a random filename is generated and then text_detect method is @@ -65,17 +66,17 @@ def post(self, request): return Response(result, status=status.HTTP_400_BAD_REQUEST) elif (result["Error"] == 'Text Detection Not Working'): return Response(result, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - elif (result["Error"] == 'The media format of the requested data is not supported by the server'): + elif (result["Error"] == 'The media format of the requested data is not supported by the server'): return Response(result, status=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE) - elif (result["Error"] == 'A JSON error occurred.'): + elif (result["Error"] == 'A JSON error occurred.'): return Response(result, status=status.HTTP_204_NO_CONTENT) - elif (result["Error"] == 'A proxy error occurred.'): + elif (result["Error"] == 'A proxy error occurred.'): return Response(result, status=status.HTTP_407_PROXY_AUTHENTICATION_REQUIRED) elif (result["Error"] == 'The header value provided was somehow invalid.'): return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) - else : + else: return Response(result, status=status.HTTP_400_BAD_REQUEST) @@ -129,20 +130,18 @@ def post(self, request): return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) - else : + else: return Response(result, status=status.HTTP_400_BAD_REQUEST) class NsfwRecognise(views.APIView): """ To recognise whether a image is nsfw or not - Workflow * if POST method request is made, then initially a random filename is generated and then nsfwclassifier method is called which process the image and outputs the result containing the dictionary of probability of type of content in the image - Returns: * output dictionary of probability content in the image """ @@ -184,7 +183,7 @@ def post(self, request): return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) - else : + else: return Response(result, status=status.HTTP_400_BAD_REQUEST) @@ -238,7 +237,7 @@ def post(self, request): return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) - else : + else: return Response(result, status=status.HTTP_400_BAD_REQUEST) @@ -292,7 +291,7 @@ def post(self, request): return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) - else : + else: return Response(result, status=status.HTTP_400_BAD_REQUEST) @@ -346,27 +345,24 @@ def post(self, request): return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) - else : + else: return Response(result, status=status.HTTP_400_BAD_REQUEST) + class ImageFr(views.APIView): """ To recognise faces in image - Workflow\n * if POST method request is made, then initially a random filename is generated and then facerecogniseinimage method is called which process the image and outputs the result containing all the information about the faces available in the image. - Returns\n * output by facerecogniseinimage """ - serializer = ImageFrSerializers def get(self, request): - logger.info(msg="GET Request for Face Reocgnition made") serializer = self.serializer() return Response(serializer.data) @@ -392,7 +388,7 @@ def post(self, request): return Response(result, status=status.HTTP_200_OK) else: return Response(result, status=status.HTTP_400_BAD_REQUEST) - + else: logger.error(msg=image_serializer.errors) return Response(image_serializer.errors, @@ -401,14 +397,12 @@ def post(self, request): class VideoFr(views.APIView): """ To recognise faces in video - Workflow * if POST method request is made, then initially a random filename is generated and then facerecogniseinvideo method is called which process the video and outputs the result containing all the information about the faces available in the video. - Returns: * output by facerecogniseinvideo """ @@ -420,6 +414,8 @@ def post(self, request): logger.info(msg="POST Request for Face Recognition in Video made") filename = getnewuniquefilename(request) input_file = request.FILES['file'] + file_path = os.path.join(MEDIA_ROOT, 'videos', filename) + handle_uploaded_file(input_file, file_path) result = facerecogniseinvideo(input_file, filename) if "Error" not in result: end = time.time() @@ -430,29 +426,43 @@ def post(self, request): tracemalloc.stop() return Response(result, status=status.HTTP_200_OK) else: - - return Response(result, status=status.HTTP_400_BAD_REQUEST) - + if (result["Error"] == 'An HTTP error occurred.'): + return Response(result, status=status.HTTP_400_BAD_REQUEST) + elif (result["Error"] == 'A Connection error occurred.'): + return Response(result, status=status.HTTP_503_SERVICE_UNAVALIABLE) + elif (result["Error"] == 'The request timed out.'): + return Response(result, status=status.HTTP_408_REQUEST_TIMEOUT) + elif (result["Error"] == 'Bad URL'): + return Response(result, status=status.HTTP_400_BAD_REQUEST) + elif (result["Error"] == 'Video Processing Not Working'): + return Response(result, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + elif (result["Error"] == 'The media format of the requested data is not supported by the server'): + return Response(result, status=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE) + elif (result["Error"] == 'A JSON error occurred.'): + return Response(result, status=status.HTTP_204_NO_CONTENT) + elif (result["Error"] == 'A proxy error occurred.'): + return Response(result, status=status.HTTP_407_PROXY_AUTHENTICATION_REQUIRED) + elif (result["Error"] == 'The header value provided was somehow invalid.'): + return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) + elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): + return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) + else: + return Response(result, status=status.HTTP_400_BAD_REQUEST) class EMBEDDING(views.APIView): """ To create embedding of faces - Workflow * if GET method request is made, all the faceid are returned - * if POST method request is made, then the file is sent to createembedding to create the embedding - Returns: * POST : output whether it was successful or not * GET : List the data stored in database """ - parser_classes = (MultiPartParser, FormParser) def get(self, request, *args, **kwargs): - logger.info(msg="GET Request for generating embeddings made") embedlist = InputEmbed.objects.all() serializer = EmbedSerializer(embedlist, many=True) @@ -465,6 +475,7 @@ def post(self, request): logger.info(msg="POST Request for generating embeddings made") filename = request.FILES['file'].name input_file = request.FILES['file'] + # filename = getnewuniquefilename(request) result = createembedding(input_file, filename) if "Error" not in result: end = time.time() @@ -475,29 +486,68 @@ def post(self, request): tracemalloc.stop() return Response(result, status=status.HTTP_200_OK) else: - return Response(result, status=status.HTTP_400_BAD_REQUEST) - + return Response(result, status=status.HTTP_400_BAD_REQUEST) + + +class AsyncVideoFr(views.APIView): + def post(self, request): + tracemalloc.start() + start = time.time() + filename = getnewuniquefilename(request) + file_path = os.path.join(MEDIA_ROOT, 'videos', filename) + input_file = request.FILES['file'] + handle_uploaded_file(input_file, file_path) + thread = ThreadWithReturnValue(target=facerecogniseinvideo, args=(input_file, filename)) + thread.start() + result = thread.join() + end = time.time() + logger.info(msg="Time For Prediction = " + str(int(end - start))) + result['Time'] = int(end - start) + result["Memory"] = (tracemalloc.get_traced_memory()[1] - tracemalloc.get_traced_memory()[0]) * 0.001 + logger.info(msg="Memory Used = " + str((tracemalloc.get_traced_memory()[1] - tracemalloc.get_traced_memory()[0]) * 0.001)) + tracemalloc.stop() + if "Error" not in result: + return Response(result, status=status.HTTP_200_OK) + else: + if (result["Error"] == 'An HTTP error occurred.'): + return Response(result, status=status.HTTP_400_BAD_REQUEST) + elif (result["Error"] == 'A Connection error occurred.'): + return Response(result, status=status.HTTP_503_SERVICE_UNAVALIABLE) + elif (result["Error"] == 'The request timed out.'): + return Response(result, status=status.HTTP_408_REQUEST_TIMEOUT) + elif (result["Error"] == 'Bad URL'): + return Response(result, status=status.HTTP_400_BAD_REQUEST) + elif (result["Error"] == 'Face Recongiton(Video) Not Working'): + return Response(result, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + elif (result["Error"] == 'The media format of the requested data is not supported by the server'): + return Response(result, status=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE) + elif (result["Error"] == 'A JSON error occurred.'): + return Response(result, status=status.HTTP_204_NO_CONTENT) + elif (result["Error"] == 'A proxy error occurred.'): + return Response(result, status=status.HTTP_407_PROXY_AUTHENTICATION_REQUIRED) + elif (result["Error"] == 'The header value provided was somehow invalid.'): + return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) + elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): + return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) + else: + return Response(result, status=status.HTTP_400_BAD_REQUEST) + class FeedbackFeature(APIView): """ Feedback feature - Workflow * if GET method request is made, then first all the embeddings objects are loaded followed by randomly selecting anyone of them. - * with the help of id of the randomly selected object, an attempt is made to get object available in NameSuggested model. If the object is available then it is selected else a new object is created in NameSuggested model . - * All the objects having the ids are fetched and serialized and then passed to reponse the request. - * if POST method request is made, then first the received data is made mutable so later the embedding object can be included in the data. - * With the help of id contained in the POST request embedding object is fetched and attached to the data followed by serializing it , Now here is a catch, How the POST request @@ -505,14 +555,11 @@ class FeedbackFeature(APIView): answered by the GET request. When GET request is made it sends a feedback_id which is used to make POST request when ever a new name is suggested to the faceid. - * So, if there is any action on already available NameSuggested object i.e. upvote or downvote then the object is updated in the database else a new object is made with the same id having upvote = downvote = 0. Here don't mix id and primary key. Primary key in this case is different than this id. - - """ parser_classes = (MultiPartParser, FormParser) @@ -529,7 +576,6 @@ def get(self, request, *args, **kwargs): feedback=randomfaceobject) namesuggestedobject.save() logger.warn(msg="No names were returned, random name has been set.") - namesuggestedlist = NameSuggested.objects.filter(feedback_id=randomfaceobject.id) serializer = NameSuggestedSerializer(namesuggestedlist, many=True) result = {'data': serializer.data, @@ -538,7 +584,6 @@ def get(self, request, *args, **kwargs): def post(self, request, *args, **kwargs): request.data._mutable = True - feedbackmodel = InputEmbed.objects.get(id=request.data["feedback_id"]) request.data["feedback"] = feedbackmodel feedback_serializer = NameSuggestedSerializer(data=request.data) @@ -560,14 +605,13 @@ def post(self, request, *args, **kwargs): def imagewebui(request): - if request.method == 'POST': + if request.method == 'POST': if 'file' not in request.FILES: logger.error(msg="file not found") return render(request, '404.html') else: filename = getnewuniquefilename(request) result = facerecogniseinimage(request, filename) - if "Error" not in result: return render(request, 'predict_result.html', {'Faces': result, 'imagefile': filename}) @@ -580,7 +624,7 @@ def imagewebui(request): def videowebui(request): - if request.method == 'POST': + if request.method == 'POST': if 'file' not in request.FILES: logger.error(msg="file not found") return render(request, '404.html') @@ -598,33 +642,12 @@ def videowebui(request): return "POST HTTP method required!" -async def async_helper(request, filename): - return (facerecogniseinvideo(request, filename)) - - -def asyncthread(request, filename): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - loop.run_until_complete(async_helper(request, filename)) - loop.close() - - -class AsyncVideoFr(views.APIView): - def post(self, request): - filename = getnewuniquefilename(request) - thread = Thread(target=asyncthread, args=(request, filename)) - thread.start() - return Response(str(filename.split('.')[0]), status=status.HTTP_200_OK) - - class StreamVideoFr(views.APIView): """ To recognise faces in YouTube video - Workflow * youtube embed link is received by reactjs post request then it is preprocessed to get the original youtube link and then it is passed - Returns: * output by facerecogniseinvideo """ @@ -667,25 +690,22 @@ def post(self, request): return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) - else : + else: return Response(result, status=status.HTTP_400_BAD_REQUEST) class SimilarFace(views.APIView): """ To recognise similar faces in two images - Workflow * if POST method request is made, then initially a random filename is generated and then similarface method is called which process the image and outputs the result containing the dictionary of file name and image id of matched face - Returns: * output by similarface """ def get(self, request, *args, **kwargs): - logger.info(msg="GET Request for Similar Face Recognition made") similarfacelist = SimilarFaceInImage.objects.all() serializer = SimilarFaceSerializer(similarfacelist, many=True) @@ -729,20 +749,18 @@ def post(self, request): return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) - else : + else: return Response(result, status=status.HTTP_400_BAD_REQUEST) class ObjectDetect(views.APIView): """ To detect objects in an image - Workflow * if POST method request is made, then initially a random filename is generated and then object_detect method is called which process the image and outputs the result containing the dictionary of detected objects, confidence scores and bounding box coordinates - Returns: * output dictionary of detected objects, confidence scores and bounding box coordinates @@ -785,20 +803,18 @@ def post(self, request): return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) - else : + else: return Response(result, status=status.HTTP_400_BAD_REQUEST) class ObjectDetectVideo(views.APIView): """ To detect objects in a video - Workflow * if POST method request is made, then initially a random filename is generated and then object_detect_video method is called which process the image and outputs the result containing the dictionary of detected objects, confidence scores and bounding box coordinates for each frame - Returns: * output dictionary of detected objects, confidence scores and bounding box coordinates for each frame of the video @@ -814,8 +830,9 @@ def post(self, request): result = object_detect_video(input_file, filename) if "Error" not in result: end = time.time() - logger.info(msg="Time For Prediction = " + str(int(end - start))) + result['Time'] = int(end - start) + result["Memory"] = (tracemalloc.get_traced_memory()[1] - tracemalloc.get_traced_memory()[0]) * 0.001 logger.info() tracemalloc.stop() @@ -841,5 +858,22 @@ def post(self, request): return Response(result, status=status.HTTP_411_LENGTH_REQUIRED) elif (result["Error"] == 'The request timed out while trying to connect to the remote server.'): return Response(result, status=status.HTTP_504_GATEWAY_TIMEOUT) - else : + else: return Response(result, status=status.HTTP_400_BAD_REQUEST) + + +class ThreadWithReturnValue(Thread): + def __init__(self, group=None, target=None, name=None, + args=(), kwargs={}, Verbose=None): + Thread.__init__(self, group, target, name, args, kwargs) + self._return = None + + def run(self): + print(type(self._target)) + if self._target is not None: + self._return = self._target(*self._args, + **self._kwargs) + + def join(self, *args): + Thread.join(self, *args) + return self._return diff --git a/corelib/RetinaFace/retina_net.py b/corelib/RetinaFace/retina_net.py index 9d40ffa..e591141 100644 --- a/corelib/RetinaFace/retina_net.py +++ b/corelib/RetinaFace/retina_net.py @@ -2,6 +2,7 @@ import numpy as np import json # from scipy.misc import imresize +from skimage.transform import resize import requests from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry @@ -165,7 +166,7 @@ def get_face(self, file): bb[2] = int(face["x2"]) bb[3] = int(face["y2"]) cropped = img[bb[1]:bb[3], bb[0]: bb[2], :] - face_img = cv2.resize(cropped, size) + face_img = resize(cropped, size) all_faces.append(face_img) all_bb.append(bb) return all_faces, all_bb diff --git a/corelib/acar_net/calc_mAP.py b/corelib/acar_net/calc_mAP.py new file mode 100644 index 0000000..104d109 --- /dev/null +++ b/corelib/acar_net/calc_mAP.py @@ -0,0 +1,233 @@ +r"""Compute action detection performance for the AVA dataset. +Please send any questions about this code to the Google Group ava-dataset-users: +https://groups.google.com/forum/#!forum/ava-dataset-users +Example usage: +python -O calc_mAP.py \ + -l ava/ava_action_list_v2.2_for_activitynet_2019.pbtxt.txt \ + -g ava_val_v2.2.csv \ + -e ava_val_excluded_timestamps_v2.2.csv \ + -d your_results.csv +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +from collections import defaultdict +import csv +import heapq +import logging +import pprint +import time +import numpy as np + +from ava_evaluation import object_detection_evaluation, standard_fields + + +def print_time(message, start): + logging.info("==> %g seconds to %s", time.time() - start, message) + + +def make_image_key(video_id, timestamp): + """Returns a unique identifier for a video id & timestamp.""" + return "%s,%04d" % (video_id, int(timestamp)) + + +def read_csv(csv_file, class_whitelist=None, capacity=0): + """Loads boxes and class labels from a CSV file in the AVA format. + CSV file format described at https://research.google.com/ava/download.html. + Args: + csv_file: A file object. + class_whitelist: If provided, boxes corresponding to (integer) class labels + not in this set are skipped. + capacity: Maximum number of labeled boxes allowed for each example. + Default is 0 where there is no limit. + Returns: + boxes: A dictionary mapping each unique image key (string) to a list of + boxes, given as coordinates [y1, x1, y2, x2]. + labels: A dictionary mapping each unique image key (string) to a list of + integer class lables, matching the corresponding box in `boxes`. + scores: A dictionary mapping each unique image key (string) to a list of + score values lables, matching the corresponding label in `labels`. If + scores are not provided in the csv, then they will default to 1.0. + """ + start = time.time() + entries = defaultdict(list) + boxes = defaultdict(list) + labels = defaultdict(list) + scores = defaultdict(list) + reader = csv.reader(csv_file) + for row in reader: + assert len(row) in [7, 8], "Wrong number of columns: " + row + image_key = make_image_key(row[0], row[1]) + x1, y1, x2, y2 = [float(n) for n in row[2:6]] + action_id = int(row[6]) + if class_whitelist and action_id not in class_whitelist: + continue + score = 1.0 + if len(row) == 8: + score = float(row[7]) + if capacity < 1 or len(entries[image_key]) < capacity: + heapq.heappush(entries[image_key], + (score, action_id, y1, x1, y2, x2)) + elif score > entries[image_key][0][0]: + heapq.heapreplace(entries[image_key], + (score, action_id, y1, x1, y2, x2)) + for image_key in entries: + # Evaluation API assumes boxes with descending scores + entry = sorted(entries[image_key], key=lambda tup: -tup[0]) + for item in entry: + score, action_id, y1, x1, y2, x2 = item + boxes[image_key].append([y1, x1, y2, x2]) + labels[image_key].append(action_id) + scores[image_key].append(score) + print_time("read file " + csv_file.name, start) + return boxes, labels, scores + + +def read_exclusions(exclusions_file): + """Reads a CSV file of excluded timestamps. + Args: + exclusions_file: A file object containing a csv of video-id,timestamp. + Returns: + A set of strings containing excluded image keys, e.g. "aaaaaaaaaaa,0904", + or an empty set if exclusions file is None. + """ + excluded = set() + if exclusions_file: + reader = csv.reader(exclusions_file) + for row in reader: + assert len(row) == 2, "Expected only 2 columns, got: " + row + excluded.add(make_image_key(row[0], row[1])) + return excluded + + +def read_labelmap(labelmap_file): + """Reads a labelmap without the dependency on protocol buffers. + Args: + labelmap_file: A file object containing a label map protocol buffer. + Returns: + labelmap: The label map in the form used by the object_detection_evaluation + module - a list of {"id": integer, "name": classname } dicts. + class_ids: A set containing all of the valid class id integers. + """ + labelmap = [] + class_ids = set() + name = "" + class_id = "" + for line in labelmap_file: + if line.startswith(" name:"): + name = line.split('"')[1] + elif line.startswith(" id:") or line.startswith(" label_id:"): + class_id = int(line.strip().split(" ")[-1]) + labelmap.append({"id": class_id, "name": name}) + class_ids.add(class_id) + return labelmap, class_ids + + +def run_evaluation(labelmap, groundtruth, detections, exclusions, logger): + """Runs evaluations given input files. + Args: + labelmap: file object containing map of labels to consider, in pbtxt format + groundtruth: file object + detections: file object + exclusions: file object or None. + """ + categories, class_whitelist = read_labelmap(labelmap) + logger.info("CATEGORIES (%d):\n%s", len(categories), + pprint.pformat(categories, indent=2)) + excluded_keys = read_exclusions(exclusions) + + pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator( + categories) + + # Reads the ground truth data. + boxes, labels, _ = read_csv(groundtruth, class_whitelist, 0) + start = time.time() + for image_key in boxes: + if image_key in excluded_keys: + logger.info(("Found excluded timestamp in ground truth: %s. " + "It will be ignored."), image_key) + continue + pascal_evaluator.add_single_ground_truth_image_info( + image_key, { + standard_fields.InputDataFields.groundtruth_boxes: + np.array(boxes[image_key], dtype=float), + standard_fields.InputDataFields.groundtruth_classes: + np.array(labels[image_key], dtype=int), + standard_fields.InputDataFields.groundtruth_difficult: + np.zeros(len(boxes[image_key]), dtype=bool) + }) + print_time("convert groundtruth", start) + + # Reads detections data. + boxes, labels, scores = read_csv(detections, class_whitelist, 50) + start = time.time() + for image_key in boxes: + if image_key in excluded_keys: + logger.info(("Found excluded timestamp in detections: %s. " + "It will be ignored."), image_key) + continue + pascal_evaluator.add_single_detected_image_info( + image_key, { + standard_fields.DetectionResultFields.detection_boxes: + np.array(boxes[image_key], dtype=float), + standard_fields.DetectionResultFields.detection_classes: + np.array(labels[image_key], dtype=int), + standard_fields.DetectionResultFields.detection_scores: + np.array(scores[image_key], dtype=float) + }) + print_time("convert detections", start) + + start = time.time() + metrics = pascal_evaluator.evaluate() + print_time("run_evaluator", start) + logger.info(pprint.pformat(metrics, indent=2)) + + return metrics + + +def parse_arguments(): + """Parses command-line flags. + Returns: + args: a named tuple containing three file objects args.labelmap, + args.groundtruth, and args.detections. + """ + parser = argparse.ArgumentParser() + parser.add_argument( + "-l", + "--labelmap", + help="Filename of label map", + type=argparse.FileType("r"), + default="ava/ava_action_list_v2.2_for_activitynet_2019.pbtxt") + parser.add_argument( + "-g", + "--groundtruth", + help="CSV file containing ground truth.", + type=argparse.FileType("r"), + required=True) + parser.add_argument( + "-d", + "--detections", + help="CSV file containing inferred action detections.", + type=argparse.FileType("r"), + required=True) + parser.add_argument( + "-e", + "--exclusions", + help=("Optional CSV file containing videoid,timestamp pairs to exclude " + "from evaluation."), + type=argparse.FileType("r"), + required=False) + return parser.parse_args() + + +def main(): + logging.basicConfig(level=logging.INFO) + args = parse_arguments() + run_evaluation(logger=logging.getLogger(), **vars(args)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/corelib/acar_net/main.py b/corelib/acar_net/main.py new file mode 100644 index 0000000..55bf697 --- /dev/null +++ b/corelib/acar_net/main.py @@ -0,0 +1,444 @@ +import multiprocessing as mp +mp.set_start_method('spawn', force=True) +import os +import argparse +import json +import pprint +import socket +import time +from easydict import EasyDict +import yaml +from tensorboardX import SummaryWriter + +import torch +import torch.optim as optim +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel + +from calc_mAP import run_evaluation +from datasets import ava, spatial_transforms, temporal_transforms +from distributed_utils import init_distributed +import losses +from models import AVA_model +from scheduler import get_scheduler +from utils import * + + +def main(local_rank, args): + '''dist init''' + rank, world_size = init_distributed(local_rank, args) + + with open(args.config) as f: + config = yaml.load(f, Loader=yaml.FullLoader) + + opt = EasyDict(config) + opt.world_size = world_size + + if rank == 0: + mkdir(opt.result_path) + mkdir(os.path.join(opt.result_path, 'tmp')) + with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: + json.dump(vars(opt), opt_file, indent=2) + logger = create_logger(os.path.join(opt.result_path, 'log.txt')) + logger.info('opt: {}'.format(pprint.pformat(opt, indent=2))) + + writer = SummaryWriter(os.path.join(opt.result_path, 'tb')) + else: + logger = writer = None + dist.barrier() + + random_seed(opt.manual_seed) + # setting benchmark to True causes OOM in some cases + if opt.get('cudnn', None) is not None: + torch.backends.cudnn.deterministic = opt.cudnn.get('deterministic', False) + torch.backends.cudnn.benchmark = opt.cudnn.get('benchmark', False) + + # create model + net = AVA_model(opt.model) + net.cuda() + net = DistributedDataParallel(net, device_ids=[local_rank], broadcast_buffers=False) + + if rank == 0: + logger.info(net) + logger.info(parameters_string(net)) + + if not opt.get('evaluate', False): + train_aug = opt.train.augmentation + + spatial_transform = [getattr(spatial_transforms, aug.type)(**aug.get('kwargs', {})) for aug in train_aug.spatial] + spatial_transform = spatial_transforms.Compose(spatial_transform) + + temporal_transform = getattr(temporal_transforms, train_aug.temporal.type)(**train_aug.temporal.get('kwargs', {})) + + train_data = ava.AVA( + opt.train.root_path, + opt.train.annotation_path, + spatial_transform, + temporal_transform + ) + + train_sampler = DistributedSampler(train_data, round_down=True) + + train_loader = ava.AVADataLoader( + train_data, + batch_size=opt.train.batch_size, + shuffle=False, + num_workers=opt.train.get('workers', 1), + pin_memory=True, + sampler=train_sampler, + drop_last=True + ) + + if rank == 0: + logger.info('# train data: {}'.format(len(train_data))) + logger.info('train spatial aug: {}'.format(spatial_transform)) + logger.info('train temporal aug: {}'.format(temporal_transform)) + + train_logger = Logger( + os.path.join(opt.result_path, 'train.log'), + ['epoch', 'loss', 'lr']) + train_batch_logger = Logger( + os.path.join(opt.result_path, 'train_batch.log'), + ['epoch', 'batch', 'iter', 'loss', 'lr']) + else: + train_logger = train_batch_logger = None + + optim_opt = opt.train.optimizer + sched_opt = opt.train.scheduler + + optimizer = getattr(optim, optim_opt.type)( + net.parameters(), + lr=sched_opt.base_lr, + **optim_opt.kwargs + ) + scheduler = get_scheduler(sched_opt, optimizer, opt.train.n_epochs, len(train_loader)) + + val_aug = opt.val.augmentation + + transform_choices, total_choices = [], 1 + for aug in val_aug.spatial: + kwargs_list = aug.get('kwargs', {}) + if not isinstance(kwargs_list, list): + kwargs_list = [kwargs_list] + cur_choices = [getattr(spatial_transforms, aug.type)(**kwargs) for kwargs in kwargs_list] + transform_choices.append(cur_choices) + total_choices *= len(cur_choices) + + spatial_transform = [] + for choice_idx in range(total_choices): + idx, transform = choice_idx, [] + for cur_choices in transform_choices: + n_choices = len(cur_choices) + cur_idx = idx % n_choices + transform.append(cur_choices[cur_idx]) + idx = idx // n_choices + spatial_transform.append(spatial_transforms.Compose(transform)) + + temporal_transform = getattr(temporal_transforms, val_aug.temporal.type)(**val_aug.temporal.get('kwargs', {})) + + val_data = ava.AVAmulticrop( + opt.val.root_path, + opt.val.annotation_path, + spatial_transform, + temporal_transform + ) + + val_sampler = DistributedSampler(val_data, round_down=False) + + val_loader = ava.AVAmulticropDataLoader( + val_data, + batch_size=opt.val.batch_size, + shuffle=False, + num_workers=opt.val.get('workers', 1), + pin_memory=True, + sampler=val_sampler + ) + + val_logger = None + if rank == 0: + logger.info('# val data: {}'.format(len(val_data))) + logger.info('val spatial aug: {}'.format(spatial_transform)) + logger.info('val temporal aug: {}'.format(temporal_transform)) + + val_log_items = ['epoch'] + if opt.val.with_label: + val_log_items.append('loss') + if opt.val.get('eval_mAP', None) is not None: + val_log_items.append('mAP') + if len(val_log_items) > 1: + val_logger = Logger( + os.path.join(opt.result_path, 'val.log'), + val_log_items) + + if opt.get('pretrain', None) is not None: + load_pretrain(opt.pretrain, net) + + begin_epoch = 1 + if opt.get('resume_path', None) is not None: + if not os.path.isfile(opt.resume_path): + opt.resume_path = os.path.join(opt.result_path, opt.resume_path) + checkpoint = torch.load(opt.resume_path, map_location=lambda storage, loc: storage.cuda()) + + begin_epoch = checkpoint['epoch'] + 1 + net.load_state_dict(checkpoint['state_dict']) + if rank == 0: + logger.info('Resumed from checkpoint {}'.format(opt.resume_path)) + + if not opt.get('evaluate', False): + optimizer.load_state_dict(checkpoint['optimizer']) + scheduler.load_state_dict(checkpoint['scheduler']) + if rank == 0: + logger.info('Also loaded optimizer and scheduler from checkpoint {}'.format(opt.resume_path)) + + criterion, act_func = getattr(losses, opt.loss.type)(**opt.loss.get('kwargs', {})) + + if opt.get('evaluate', False): # evaluation mode + val_epoch(begin_epoch - 1, val_loader, net, criterion, act_func, + opt, logger, val_logger, rank, world_size, writer) + else: # training and validation mode + for e in range(begin_epoch, opt.train.n_epochs + 1): + train_sampler.set_epoch(e) + train_epoch(e, train_loader, net, criterion, optimizer, scheduler, + opt, logger, train_logger, train_batch_logger, rank, world_size, writer) + + if e % opt.train.val_freq == 0: + val_epoch(e, val_loader, net, criterion, act_func, + opt, logger, val_logger, rank, world_size, writer) + + if rank == 0: + writer.close() + + +def train_epoch(epoch, data_loader, model, criterion, optimizer, scheduler, + opt, logger, epoch_logger, batch_logger, rank, world_size, writer): + if rank == 0: + logger.info('Training at epoch {}'.format(epoch)) + + model.train() + + batch_time = AverageMeter(opt.print_freq) + data_time = AverageMeter(opt.print_freq) + loss_time = AverageMeter(opt.print_freq) + losses = AverageMeter(opt.print_freq) + global_losses = AverageMeter() + + end_time = time.time() + for i, data in enumerate(data_loader): + data_time.update(time.time() - end_time) + + curr_step = (epoch - 1) * len(data_loader) + i + scheduler.step(curr_step) + + ret = model(data) + num_rois = ret['num_rois'] + outputs = ret['outputs'] + targets = ret['targets'] + + tot_rois = torch.Tensor([num_rois]).cuda() + dist.all_reduce(tot_rois) + tot_rois = tot_rois.item() + + if tot_rois == 0: + end_time = time.time() + continue + + optimizer.zero_grad() + + if num_rois > 0: + loss = criterion(outputs, targets) + loss = loss * num_rois / tot_rois * world_size + else: + loss = torch.tensor(0).float().cuda() + for param in model.parameters(): + if param.requires_grad: + loss = loss + param.sum() + loss = 0. * loss + + loss.backward() + optimizer.step() + + reduced_loss = loss.clone() + dist.all_reduce(reduced_loss) + losses.update(reduced_loss.item(), tot_rois) + global_losses.update(reduced_loss.item(), tot_rois) + + batch_time.update(time.time() - end_time) + end_time = time.time() + + if (i + 1) % opt.print_freq == 0 and rank == 0: + writer.add_scalar('train/loss', losses.avg, curr_step + 1) + writer.add_scalar('train/lr', optimizer.param_groups[0]['lr'], curr_step + 1) + + batch_logger.log({ + 'epoch': epoch, + 'batch': i + 1, + 'iter': curr_step + 1, + 'loss': losses.avg, + 'lr': optimizer.param_groups[0]['lr'] + }) + + logger.info('Epoch [{0}]\t' + 'Iter [{1}/{2}]\t' + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' + 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' + 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( + epoch, + i + 1, + len(data_loader), + batch_time=batch_time, + data_time=data_time, + loss=losses)) + + if rank == 0: + writer.add_scalar('train/epoch_loss', global_losses.avg, epoch) + writer.flush() + + epoch_logger.log({ + 'epoch': epoch, + 'loss': global_losses.avg, + 'lr': optimizer.param_groups[0]['lr'] + }) + + logger.info('-' * 100) + logger.info( + 'Epoch [{}/{}]\t' + 'Loss {:.4f}'.format( + epoch, + opt.train.n_epochs, + global_losses.avg)) + + if epoch % opt.train.save_freq == 0: + save_file_path = os.path.join(opt.result_path, 'ckpt_{}.pth.tar'.format(epoch)) + states = { + 'epoch': epoch, + 'state_dict': model.state_dict(), + 'optimizer': optimizer.state_dict(), + 'scheduler': scheduler.state_dict() + } + torch.save(states, save_file_path) + logger.info('Checkpoint saved to {}'.format(save_file_path)) + + logger.info('-' * 100) + + +def val_epoch(epoch, data_loader, model, criterion, act_func, + opt, logger, epoch_logger, rank, world_size, writer): + if rank == 0: + logger.info('Evaluation at epoch {}'.format(epoch)) + + model.eval() + + calc_loss = opt.val.with_label + out_file = open(os.path.join(opt.result_path, 'tmp', 'predict_rank%d.csv'%rank), 'w') + + batch_time = AverageMeter(opt.print_freq) + data_time = AverageMeter(opt.print_freq) + if calc_loss: + global_losses = AverageMeter() + + end_time = time.time() + for i, data in enumerate(data_loader): + data_time.update(time.time() - end_time) + + with torch.no_grad(): + ret = model(data, evaluate=True) + num_rois = ret['num_rois'] + outputs = ret['outputs'] + targets = ret['targets'] + if num_rois == 0: + end_time = time.time() + continue + + if calc_loss: + loss = criterion(outputs, targets) + global_losses.update(loss.item(), num_rois) + + fnames, mid_times, bboxes = ret['filenames'], ret['mid_times'], ret['bboxes'] + outputs = act_func(outputs).cpu().data + idx_to_class = data_loader.dataset.idx_to_class + for k in range(num_rois): + prefix = "%s,%s,%.3f,%.3f,%.3f,%.3f"%(fnames[k], mid_times[k], + bboxes[k][0], bboxes[k][1], + bboxes[k][2], bboxes[k][3]) + for cls in range(outputs.shape[1]): + score_str = '%.3f'%outputs[k][cls] + out_file.write(prefix + ",%d,%s\n" % (idx_to_class[cls]['id'], score_str)) + + batch_time.update(time.time() - end_time) + end_time = time.time() + + if (i + 1) % opt.print_freq == 0 and rank == 0: + logger.info('Epoch [{0}]\t' + 'Iter [{1}/{2}]\t' + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' + 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( + epoch, + i + 1, + len(data_loader), + batch_time=batch_time, + data_time=data_time)) + + if calc_loss: + total_num = torch.Tensor([global_losses.count]).cuda() + loss_sum = torch.Tensor([global_losses.avg * global_losses.count]).cuda() + dist.all_reduce(total_num) + dist.all_reduce(loss_sum) + final_loss = loss_sum.item() / (total_num.item() + 1e-10) + + out_file.close() + dist.barrier() + + if rank == 0: + val_log = {'epoch': epoch} + val_str = 'Epoch [{}]'.format(epoch) + + if calc_loss: + writer.add_scalar('val/epoch_loss', final_loss, epoch) + val_log['loss'] = final_loss + val_str += '\tLoss {:.4f}'.format(final_loss) + + result_file = os.path.join(opt.result_path, 'predict_epoch%d.csv'%epoch) + with open(result_file, 'w') as of: + for r in range(world_size): + with open(os.path.join(opt.result_path, 'tmp', 'predict_rank%d.csv'%r), 'r') as f: + of.writelines(f.readlines()) + + if opt.val.get('eval_mAP', None) is not None: + eval_mAP = opt.val.eval_mAP + metrics = run_evaluation( + open(eval_mAP.labelmap, 'r'), + open(eval_mAP.groundtruth, 'r'), + open(result_file, 'r'), + open(eval_mAP.exclusions, 'r') if eval_mAP.get('exclusions', None) is not None else None, + logger + ) + + mAP = metrics['PascalBoxes_Precision/mAP@0.5IOU'] + writer.add_scalar('val/mAP', mAP, epoch) + val_log['mAP'] = mAP + val_str += '\tmAP {:.6f}'.format(mAP) + + writer.flush() + + if epoch_logger is not None: + epoch_logger.log(val_log) + + logger.info('-' * 100) + logger.info(val_str) + logger.info('-' * 100) + + dist.barrier() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='PyTorch AVA Training and Evaluation') + parser.add_argument('--config', type=str, required=True) + parser.add_argument('--nproc_per_node', type=int, default=8) + parser.add_argument('--backend', type=str, default='nccl') + parser.add_argument('--master_addr', type=str, default=socket.gethostbyname(socket.gethostname())) + parser.add_argument('--master_port', type=int, default=31114) + parser.add_argument('--nnodes', type=int, default=None) + parser.add_argument('--node_rank', type=int, default=None) + args = parser.parse_args() + + torch.multiprocessing.spawn(main, args=(args,), nprocs=args.nproc_per_node) \ No newline at end of file diff --git a/corelib/acar_net/utils.py b/corelib/acar_net/utils.py new file mode 100644 index 0000000..6061e22 --- /dev/null +++ b/corelib/acar_net/utils.py @@ -0,0 +1,208 @@ +import os +import csv +import logging +import math +import random + +import numpy as np +import torch +import torch.distributed as dist +from torch.utils.data.sampler import Sampler + +_LOGGER = None + + +def get_rank(): + return dist.get_rank() + + +def get_world_size(): + return dist.get_world_size() + + +def mkdir(path): + os.makedirs(path, exist_ok=True) + + +def random_seed(seed_value): + np.random.seed(seed_value) + torch.manual_seed(seed_value) + random.seed(seed_value) + os.environ['PYTHONHASHSEED'] = str(seed_value) + torch.cuda.manual_seed(seed_value) + torch.cuda.manual_seed_all(seed_value) + + +def parameters_string(module): + lines = [ + "", + "List of model parameters:", + "=" * 105, + ] + + row_format = "{name:<60} {shape:>27} ={total_size:>15,d}" + params = list(module.named_parameters()) + for name, param in params: + lines.append(row_format.format( + name=name, + shape=" * ".join(str(p) for p in param.size()), + total_size=param.numel() + )) + lines.append("=" * 105) + lines.append(row_format.format( + name="all parameters", + shape="sum of above", + total_size=sum(int(param.numel()) for name, param in params) + )) + lines.append("") + return "\n".join(lines) + + +def create_logger(log_file, level=logging.INFO): + global _LOGGER + if _LOGGER is not None: + return _LOGGER + l = logging.getLogger('global') + formatter = logging.Formatter('[%(asctime)s][%(filename)15s][line:%(lineno)4d][%(levelname)8s] %(message)s') + fh = logging.FileHandler(log_file) + fh.setFormatter(formatter) + sh = logging.StreamHandler() + sh.setFormatter(formatter) + l.setLevel(level) + l.addHandler(fh) + l.addHandler(sh) + l.propagate = False + _LOGGER = l + return l + + +def get_logger(): + return _LOGGER + + +class Logger(object): + + def __init__(self, path, header): + self.log_file = open(path, 'w') + self.logger = csv.writer(self.log_file, delimiter='\t') + + self.logger.writerow(header) + self.header = header + + def __del(self): + self.log_file.close() + + def log(self, values): + write_values = [] + for col in self.header: + assert col in values + write_values.append(values[col]) + + self.logger.writerow(write_values) + self.log_file.flush() + + +class AverageMeter(object): + def __init__(self, length=0): + self.length = length + self.reset() + + def reset(self): + if self.length > 0: + self.history, self.history_num = [], [] + else: + self.count = 0 + self.sum = 0.0 + self.val = 0.0 + self.avg = 0.0 + + def update(self, val, num=1): + assert num > 0 + if self.length > 0: + self.history.append(val * num) + self.history_num.append(num) + if len(self.history) > self.length: + del self.history[0] + del self.history_num[0] + + self.val = val + self.avg = np.sum(self.history) / np.sum(self.history_num) + else: + self.val = val + self.sum += val * num + self.count += num + self.avg = self.sum / self.count + + +class DistributedSampler(Sampler): + def __init__(self, dataset, world_size=None, rank=None, round_down=False): + if world_size is None: + world_size = get_world_size() + if rank is None: + rank = get_rank() + self.dataset = dataset + self.world_size = world_size + self.rank = rank + self.round_down = round_down + self.epoch = 0 + + self.total_size = len(self.dataset) + if self.round_down: + self.num_samples = int(math.floor(len(self.dataset) / self.world_size)) + else: + self.num_samples = int(math.ceil(len(self.dataset) / self.world_size)) + + def __iter__(self): + # deterministically shuffle based on epoch + g = torch.Generator() + g.manual_seed(self.epoch) + indices = list(torch.randperm(len(self.dataset), generator=g)) + + assert len(indices) == self.total_size + + # subsample + offset = self.num_samples * self.rank + indices = indices[offset:offset + self.num_samples] + if self.round_down: + assert len(indices) == self.num_samples + + return iter(indices) + + def __len__(self): + return self.num_samples + + def set_epoch(self, epoch): + self.epoch = epoch + + +def load_pretrain(pretrain_opt, net): + checkpoint = torch.load(pretrain_opt.path, map_location=lambda storage, loc: storage.cuda()) + if pretrain_opt.get('state_dict_key', None) is not None: + checkpoint = checkpoint[pretrain_opt.state_dict_key] + + if pretrain_opt.get('delete_prefix', None): + keys = set(checkpoint.keys()) + for k in keys: + if k.startswith(pretrain_opt.delete_prefix): + checkpoint.pop(k) + if pretrain_opt.get('replace_prefix', None) is not None: + keys = set(checkpoint.keys()) + for k in keys: + if k.startswith(pretrain_opt.replace_prefix): + new_k = pretrain_opt.get('replace_to', '') + k[len(pretrain_opt.replace_prefix):] + checkpoint[new_k] = checkpoint.pop(k) + net.load_state_dict(checkpoint, strict=False) + + if get_rank() == 0: + ckpt_keys = set(checkpoint.keys()) + own_keys = set(net.state_dict().keys()) + missing_keys = own_keys - ckpt_keys + ignore_keys = ckpt_keys - own_keys + loaded_keys = own_keys - missing_keys + + logger = get_logger() + for k in missing_keys: + logger.info('Caution: missing key {}'.format(k)) + for k in ignore_keys: + logger.info('Caution: redundant key {}'.format(k)) + logger.info('Loaded {} key(s) from pre-trained model at {}'.format(len(loaded_keys), pretrain_opt.path)) \ No newline at end of file diff --git a/corelib/constant.py b/corelib/constant.py index c31fd78..93847d3 100644 --- a/corelib/constant.py +++ b/corelib/constant.py @@ -29,13 +29,13 @@ base_url = 'http://localhost:8501/v1/' face_exp_url = 'models/fer2013:predict' nsfw_url = 'models/nsfw:predict' -char_dict_path = './data/text_reco/char_dict_en.json' -ord_map_dict_path = './data/text_reco/ord_map_en.json' +char_dict_path = str(os.getcwd()) + '/data/text_reco/char_dict_en.json' +ord_map_dict_path = str(os.getcwd()) + '/data/text_reco/ord_map_en.json' text_reco_url = 'models/crnn_syn90k_saved_model:predict' #text_detect_url = 'models/east:predict' text_detect_url = 'models/tbpp:predict' -coco_names_path = './data/object_detect/coco.names' -scene_labels_path = './data/scene_detect/places365.txt' +coco_names_path = str(os.getcwd()) + '/data/object_detect/coco.names' +scene_labels_path = str(os.getcwd()) + '/data/scene_detect/places365.txt' #object_detect_url = 'models/yolov3:predict' object_detect_url = 'models/efficientdet:predict' scene_detect_url = 'models/places:predict' diff --git a/corelib/facenet/facenet.py b/corelib/facenet/facenet.py index 38d8956..2b107fa 100644 --- a/corelib/facenet/facenet.py +++ b/corelib/facenet/facenet.py @@ -1,4 +1,6 @@ """Functions for building the face recognition network. + Reference: + https://github.com/davidsandberg/facenet/blob/master/src/facenet.py """ @@ -68,7 +70,13 @@ def center_loss(features, label, alfa, nrof_classes): def get_image_paths_and_labels(dataset): - + """ Gets image paths and labels from the dataset sent as input + Args: + dataset: the data of images + Returns: + image paths and labels + + """ logger.info(msg="get_image_paths_and_labels called") image_paths_flat = [] labels_flat = [] @@ -79,7 +87,15 @@ def get_image_paths_and_labels(dataset): def shuffle_examples(image_paths, labels): + """Takes inputs as image paths and labels and shuffles them to create a dynamic sample + Args: + image_paths: path of images generated previously + labels: labels of images generated previously + Returns: + image paths and labels post shuffling + + """ logger.info(msg="shuffle_examples called") shuffle_list = list(zip(image_paths, labels)) random.shuffle(shuffle_list) @@ -88,7 +104,12 @@ def shuffle_examples(image_paths, labels): def random_rotate_image(image): - + """Rotates the given image using random angle generated in the function + Args: + image: the input image to perform the function on + Returns: + the rotated array of image + """ logger.info(msg="random_rotate_image called") angle = np.random.uniform(low=-10.0, high=10.0) return misc.imrotate(image, angle, 'bicubic') @@ -184,7 +205,19 @@ def _add_loss_summaries(total_loss): def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True): + """ Extensive function to train the model + Args: + total_loss: total loss incurred + global_step: the number of batches seen by the graph + optimizer: defines the type of optimizer + learning_rate: a hyperparameter controlling how much to change the model in response to the estimated + error each time the model weights are updated. For every optimizer, we keep learning_rate constant. + moving_average_decay: + update_gradient_vars: + log_histogram: set to true to create histograms for gradients + Returns: + """ logger.info(msg="train called") # Generate moving averages of all losses and associated summaries. loss_averages_op = _add_loss_summaries(total_loss) @@ -237,7 +270,7 @@ def train(total_loss, global_step, optimizer, learning_rate, moving_average_deca def prewhiten(x): - + """ormalizes the range of the pixel values of input images to make training easier""" logger.info(msg="prewhiten called") mean = np.mean(x) std = np.std(x) @@ -247,7 +280,14 @@ def prewhiten(x): def crop(image, random_crop, image_size): - + """Crops the input image + Args: + image: input image + random_crop: turned true or false for randomly cropping and vice versa + image_size: size of the image + Returns: + cropped image + """ logger.info(msg="crop called") if image.shape[1] > image_size: sz1 = int(image.shape[1] // 2) @@ -263,7 +303,16 @@ def crop(image, random_crop, image_size): def flip(image, random_flip): + """ flips the entries in each row in the left/right direction. + Columns are preserved, but appear in a different order than before if random_flip is on + + Args: + image: input image + random_flip: parameter deciding the randomness of flip + Returns: + image after being flipped + """ logger.info(msg="flip called") if random_flip and np.random.choice([True, False]): image = np.fliplr(image) @@ -271,7 +320,6 @@ def flip(image, random_flip): def to_rgb(img): - logger.info(msg="to_rgb called") w, h = img.shape ret = np.empty((w, h, 3), dtype=np.uint8) diff --git a/corelib/facenet/utils.py b/corelib/facenet/utils.py index 97e3fe4..48ba648 100644 --- a/corelib/facenet/utils.py +++ b/corelib/facenet/utils.py @@ -1,13 +1,15 @@ import os import numpy as np import glob +import cv2 import tensorflow.compat.v1 as tf from tensorflow.python.platform import gfile from corelib.facenet.facenet import get_model_filenames from corelib.facenet.align import detect_face from corelib.facenet.facenet import load_img # from scipy.misc import imsave -import cv2 +# from skimage.io import imsave +# import skimage from collections import defaultdict import string import random @@ -58,7 +60,7 @@ def save_image(img, filename, upload_path): logger.info(msg="save_image called") try: - cv2.imwrite(os.path.join(upload_path, filename), np.squeeze(img)) + cv2.imwrite(os.path.join(upload_path, filename), arr=np.squeeze(img)) except Exception as e: logger.error(msg=e) @@ -151,7 +153,7 @@ def save_face(img, where, filename): logger.info(msg="save_face called") path = os.path.join(MEDIA_ROOT, where, str(filename) + '.jpg') try: - cv2.imwrite(path, np.squeeze(img)) + cv2.imwrite(path, arr=np.squeeze(img)) except Exception as e: logger.error(msg=e) @@ -253,6 +255,7 @@ def handle_uploaded_file(file, fname): def getnewuniquefilename(request): logger.info(msg="getnewuniquefilename called") + file_ext = str((request.FILES['file'].name)).split('.')[-1] filename = id_generator() + '.' + file_ext return filename diff --git a/corelib/main_api.py b/corelib/main_api.py index 06581a2..5d75519 100644 --- a/corelib/main_api.py +++ b/corelib/main_api.py @@ -5,19 +5,18 @@ import subprocess import shlex import cv2 -import os import wordninja import urllib.parse +import ffmpeg from werkzeug.utils import secure_filename from Rekognition.settings import MEDIA_ROOT from corelib.CRNN import CRNN_utils -from corelib.textbox import TBPP512_dense_separable, rbox3_to_polygon, PriorUtil +from corelib.textbox import TBPP512_dense_separable, PriorUtil from corelib.facenet.utils import (get_face, embed_image, save_embedding, identify_face, allowed_file, time_dura, handle_uploaded_file, save_face, img_standardize) -from corelib.EAST.EAST_utils import (preprocess, sort_poly, - postprocess) +from corelib.EAST.EAST_utils import (sort_poly) from corelib.constant import (pnet, rnet, onet, facenet_persistent_session, phase_train_placeholder, embeddings, images_placeholder, image_size, allowed_set, @@ -68,7 +67,7 @@ def text_reco(image): url = urllib.parse.urljoin(base_url, text_reco_url) json_response = requests.post(url, data=data, headers=headers) logger.info(msg=url) - + except requests.exceptions.HTTPError as errh: logger.error(msg=errh) return {"Error": "An HTTP error occurred."} @@ -110,7 +109,7 @@ def text_reco(image): decode_dense_shape=predictions['decodes_dense_shape'], )[0] preds = ' '.join(wordninja.split(preds)) - + return {"Text": preds} @@ -145,24 +144,24 @@ def text_detect(input_file, filename): as keys and coordinates of bounding boxes and recognized text of that box as the respective value """ - + logger.info(msg="text_detect called") file_path = os.path.join(MEDIA_ROOT, 'text', filename) handle_uploaded_file(input_file, file_path) - + img = cv2.imread(file_path)[:, :, ::-1] - img=cv2.resize(img,(512,512)) - img=cv2.cvtColor(img, cv2.COLOR_BGR2HSV) - img_resized=img.copy() - #img=cv2.resize(img,tuple(img_sh)) - #img_resized, (ratio_h, ratio_w) = preprocess(img) - #img_resized = (img_resized / 127.5) - 1 - + img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + img_resized = cv2.resize(img, (512, 512)) + + # img_resized, (ratio_h, ratio_w) = preprocess(img) + # img_resized = (img_resized / 127.5) - 1 + print("For input image of shape = ", img.shape, " returned shape is ", img_resized.shape) data = json.dumps({"signature_name": "serving_default", "inputs": [img_resized.tolist()]}) try: headers = {"content-type": "application/json"} url = urllib.parse.urljoin(base_url, text_detect_url) + json_response = requests.post(url, data=data, headers=headers) except requests.exceptions.HTTPError as errh: logger.error(msg=errh) @@ -197,33 +196,36 @@ def text_detect(input_file, filename): except Exception as e: logger.error(msg=e) return {"Error": e} - + prior_util = PriorUtil(TBPP512_dense_separable(softmax=False)) - result=np.array(json.loads(json_response.text)["outputs"]) - + result = np.array(json.loads(json_response.text)["outputs"]) predictions = prior_util.decode(result[0], .2) #score_map = np.array(predictions["pred_score_map/Sigmoid:0"], dtype="float64") #geo_map = np.array(predictions["pred_geo_map/concat:0"], dtype="float64") #boxes = postprocess(score_map=score_map, geo_map=geo_map) - boxes=predictions[:,0:4] + boxes = predictions + print("Shape of prediction = ", predictions.shape) result_boxes = [] if boxes is not None: + print("Shape of boxes = ", boxes.shape) + print("Type of box = ", type(boxes)) boxes = boxes[:, :8].reshape((-1, 4, 2)) - #boxes[:, :, 0] /= ratio_w - #boxes[:, :, 1] /= ratio_h - boxes[:,:,0]*=img.shape[0] - boxes[:,:,1]*=img.shape[1] + # boxes[:, :, 0] /= ratio_w + # boxes[:, :, 1] /= ratio_h + boxes[:, :, 0] *= img.shape[1] + boxes[:, :, 1] *= img.shape[0] for box in boxes: box = sort_poly(box.astype(np.int32)) + print("Shape of single box = ", box.shape) + print("Box after sort = ", box) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3] - box[0]) < 5: continue result_boxes.append(box) - - + result = [] for box in result_boxes: top_left_x, top_left_y, bot_right_x, bot_right_y = bb_to_cv(box) - text=text_reco(img[top_left_y :bot_right_y,top_left_x :bot_right_x ]).get("Text") + text = text_reco(img[top_left_y:bot_right_y, top_left_x:bot_right_x]).get("Text") result.append({"Boxes": box, "Text": text}) return {"Texts": result} @@ -271,8 +273,8 @@ def text_detect_video(input_file, filename): ret, img = vid.read() if ret: img = img[:, :, ::-1] - img=cv2.cvtColor(img, cv2.COLOR_BGR2HSV) - img_resized=cv2.resize(img, (512, 512)) + img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + img_resized = cv2.resize(img, (512, 512)) #img_resized, (ratio_h, ratio_w) = preprocess(img) #img_resized = (img_resized / 127.5) - 1 data = json.dumps({"signature_name": "serving_default", @@ -315,20 +317,20 @@ def text_detect_video(input_file, filename): logger.error(msg=e) return {"Error": e} prior_util = PriorUtil(TBPP512_dense_separable(softmax=False)) - result=np.array(json.loads(json_response.text)["outputs"]) - + result = np.array(json.loads(json_response.text)["outputs"]) + predictions = prior_util.decode(result[0], .2) #score_map = np.array(predictions["pred_score_map/Sigmoid:0"], dtype="float64") #geo_map = np.array(predictions["pred_geo_map/concat:0"], dtype="float64") #boxes = postprocess(score_map=score_map, geo_map=geo_map) - boxes=predictions[:,0:4] + boxes = predictions[:, 0:4] result_boxes = [] if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) #boxes[:, :, 0] /= ratio_w #boxes[:, :, 1] /= ratio_h - boxes[:,:,0]*=img.shape[0] - boxes[:,:,1]*=img.shape[1] + boxes[:, :, 0] *= img.shape[0] + boxes[:, :, 1] *= img.shape[1] for box in boxes: box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3] - box[0]) < 5: @@ -629,10 +631,8 @@ def nsfwclassifier(input_file, filename): file_path = os.path.join(MEDIA_ROOT, 'images', filename) handle_uploaded_file(input_file, file_path) - - img = cv2.imread(file_path)[:, :, ::-1] - img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) - img_resized = cv2.resize(img, (64, 64)) + img = cv2.imread(file_path) + img = cv2.resize(img, (64, 64)) if (img.shape[2] == 4): img = img[..., :3] @@ -835,6 +835,7 @@ def facerecogniseinimage(input_file, filename, network): return {"Error": e} img = cv2.imread(file_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # img = imread(fname=input_file, pilmode='RGB') if (img.shape[2] == 4): img = img[..., :3] @@ -937,7 +938,7 @@ def facerecogniseinvideo(input_file, filename): logger.info(msg="facerecogniseinvideo called") file_path = os.path.join(MEDIA_ROOT, 'videos', filename) - handle_uploaded_file(input_file, file_path) + #handle_uploaded_file(input_file, file_path) try: file_form = InputVideo(title=filename) file_form.save() @@ -952,20 +953,27 @@ def facerecogniseinvideo(input_file, filename): return {"Error": e} videofile = file_path - - videogen = cv2.VideoCapture(videofile) - - #For various OpenCV versions: - (major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.') - if int(major_ver) < 3 : - total_frame = int(videogen.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)) - fps = videogen.get(cv2.cv.CV_CAP_PROP_FPS) - else : - total_frame = int(videogen.get(cv2.CAP_PROP_FRAME_COUNT)) - fps = videogen.get(cv2.CAP_PROP_FPS) - - total_duration = float(total_frame / fps) #in seconds + # metadata = skvideo.io.ffprobe(videofile) + # print("************METEDATE ***************+++ \n",metadata) + # str_fps = metadata["video"]['@avg_frame_rate'].split('/') + # print("FPS = **************** = \n",str_fps) + # fps = float(float(str_fps[0]) / float(str_fps[1])) + + # timestamps = [(float(1) / fps)] + # total_frame = float(metadata["video"]["@nb_frames"]) + # total_duration = float(metadata["video"]["@duration"]) + + metadata = ffmpeg.probe(videofile)["streams"] + str_fps = metadata[1]['avg_frame_rate'].split('/') + fps = "30/1" + try: + fps = float(float(str_fps[0]) / float(str_fps[1])) + except BaseException: + fps = 30.0 + timestamps = [(float(1) / fps)] + total_frame = float(metadata[1]["nb_frames"]) + total_duration = float(metadata[1]["duration"]) frame_hop = int(math.ceil(fps / 10)) gap_in_sec = (total_duration / total_frame) * frame_hop * 3 * 1000 @@ -973,54 +981,61 @@ def facerecogniseinvideo(input_file, filename): cele = {} ids = [] cache_embeddings = {} + cap = cv2.VideoCapture(videofile) - success = True - while success: - success,curr_frame = videogen.read() - count = count + 1 - if count % frame_hop == 0: - # multiplying to get the timestamps in milliseconds - timestamps = (float(count) / fps) * 1000 - try: - all_faces, all_bb = get_face(img=curr_frame, - pnet=pnet, rnet=rnet, - onet=onet, image_size=image_size) - if all_faces is not None: - cele_id = [] - for face, bb in zip(all_faces, all_bb): - embedding = embed_image(img=face, - session=facenet_persistent_session, - images_placeholder=images_placeholder, - embeddings=embeddings, - phase_train_placeholder=phase_train_placeholder, - image_size=image_size) - id_name = '' - if embedding_dict: - if cache_embeddings: - id_name = identify_face(embedding=embedding, - embedding_dict=cache_embeddings) - if id_name == "Unknown": + # videogen = skvideo.io.vreader(videofile) + cap = cv2.VideoCapture(videofile) + while(cap.isOpened()): + ret, curr_frame = cap.read() + if ret: + count = count + 1 + if count % frame_hop == 0: + # multiplying to get the timestamps in milliseconds + timestamps = (float(count) / fps) * 1000 + try: + all_faces, all_bb = get_face(img=curr_frame, + pnet=pnet, rnet=rnet, + onet=onet, image_size=image_size) + if all_faces is not None: + cele_id = [] + for face, bb in zip(all_faces, all_bb): + embedding = embed_image(img=face, + session=facenet_persistent_session, + images_placeholder=images_placeholder, + embeddings=embeddings, + phase_train_placeholder=phase_train_placeholder, + image_size=image_size) + id_name = '' + if embedding_dict: + if cache_embeddings: + id_name = identify_face(embedding=embedding, + embedding_dict=cache_embeddings) + if id_name == "Unknown": + id_name = identify_face(embedding=embedding, + embedding_dict=embedding_dict) + if id_name != "Unknown": + cache_embeddings[id_name] = embedding + else: id_name = identify_face(embedding=embedding, embedding_dict=embedding_dict) - if id_name != "Unknown": - cache_embeddings[id_name] = embedding - else: - id_name = identify_face(embedding=embedding, - embedding_dict=embedding_dict) - cache_embeddings[id_name] = embedding - - if(str(id_name) not in ids): - ids.append(str(id_name)) - cele[str(id_name)] = [] - cele_id.append(id_name) - cele[str(id_name)].append(timestamps) - else: - logger.error(msg="No Faces") - return {"Error": 'No Faces'} - except Exception as e: - logger.error(msg=e) - return {"Error": e} + cache_embeddings[id_name] = embedding + + if(str(id_name) not in ids): + ids.append(str(id_name)) + cele[str(id_name)] = [] + cele_id.append(id_name) + cele[str(id_name)].append(timestamps) + else: + logger.error(msg="No Faces") + return {"Error": 'No Faces'} + except Exception as e: + logger.error(msg=e) + return {"Error": e} + + else: + break + cap.release() output_dur = time_dura(cele, gap_in_sec) try: with open(os.path.join(MEDIA_ROOT, 'output/video', filename.split('.')[0] + '.json'), 'w') as fp: @@ -1056,6 +1071,8 @@ def createembedding(input_file, filename): filepath = "/media/face/" + str(unid) + '.jpg' file_form = InputEmbed(id=unid, title=filename, fileurl=filepath) file_form.save() + file_path = os.path.join(MEDIA_ROOT, 'images', filename) + handle_uploaded_file(input_file, file_path) except IntegrityError as eri: logger.error(msg=eri) return {"Error": "Integrity Error"} @@ -1065,8 +1082,7 @@ def createembedding(input_file, filename): except Exception as e: logger.error(msg=e) return {"Error": e} - - img = cv2.imread(input_file)[:, :, ::-1] + img = cv2.imread(file_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if (img.shape[2] == 4): img = img[..., :3] @@ -1135,8 +1151,8 @@ def process_streaming_video(url, filename): result = requests.post('http://localhost:8000/api/old_video/', files=files) except requests.exceptions.HTTPError as errh: - logger.error(msg=errh) - return {"Error": "An HTTP error occurred."} + logger.error(msg=errh) + return {"Error": "An HTTP error occurred."} except requests.exceptions.ConnectTimeout as err: logger.error(msg=err) return {"Error": "The request timed out while trying to connect to the remote server."} @@ -1278,8 +1294,8 @@ def similarface(reference_img, compare_img, filename): return {"result": [str(filename.split('.')[0]), "None"]} except requests.exceptions.HTTPError as errh: - logger.error(msg=errh) - return {"Error": "An HTTP error occurred."} + logger.error(msg=errh) + return {"Error": "An HTTP error occurred."} except requests.exceptions.ConnectTimeout as err: logger.error(msg=err) return {"Error": "The request timed out while trying to connect to the remote server."} @@ -1342,15 +1358,13 @@ def object_detect(input_file, filename): """ - - logger.info(msg="object_detect called") file_path = os.path.join(MEDIA_ROOT, 'object', filename) handle_uploaded_file(input_file, file_path) image = cv2.imread(file_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) - imgh,imgw,_=image.shape - image = cv2.resize(image, (640, 640)) + imgh, imgw, _ = image.shape + image = cv2.resize(image, (640, 640)) #LINEAR = np.array(image, np.float32) / 255 data = json.dumps({"inputs": [image.tolist()]}) try: @@ -1358,8 +1372,8 @@ def object_detect(input_file, filename): url = urllib.parse.urljoin(base_url, object_detect_url) json_response = requests.post(url, data=data, headers=headers) except requests.exceptions.HTTPError as errh: - logger.error(msg=errh) - return {"Error": "An HTTP error occurred."} + logger.error(msg=errh) + return {"Error": "An HTTP error occurred."} except requests.exceptions.ConnectTimeout as err: logger.error(msg=err) return {"Error": "The request timed out while trying to connect to the remote server."} @@ -1390,15 +1404,14 @@ def object_detect(input_file, filename): except Exception as e: logger.error(msg=e) return {"Error": "ObjectDetection Not Working"} - + predictions = np.array(json.loads(json_response.text)["outputs"]) - - boxes, scores, classes = predictions[0][:,1:5], predictions[0][:,5], predictions[0][:,6] - nums=100 - boxes[:,0]*=(imgh/640) - boxes[:,2]*=(imgh/640) - boxes[:,1]*=(imgw/640) - boxes[:,3]*=(imgw/640) + + boxes, scores, classes = predictions[0][:, 1:5], predictions[0][:, 5], predictions[0][:, 6] + boxes[:, 0] *= (imgh / 640) + boxes[:, 2] *= (imgh / 640) + boxes[:, 1] *= (imgw / 640) + boxes[:, 3] *= (imgw / 640) # return {"Error": "ObjectDetection Not Working"} # predictions = json.loads(json_response.text).get("outputs", "Bad request made.") @@ -1452,8 +1465,8 @@ def object_detect_video(input_file, filename): ret, image = vid.read() if ret: image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) - imgh,imgw,_=image.shape - image = cv2.resize(image, (640, 640)) + imgh, imgw, _ = image.shape + image = cv2.resize(image, (640, 640)) data = json.dumps({"inputs": [image.tolist()]}) try: headers = {"content-type": "application/json"} @@ -1493,7 +1506,7 @@ def object_detect_video(input_file, filename): logger.error(msg=e) return {"Error": "Object Detection(video) Not Working"} predictions = np.array(json.loads(json_response.text)["outputs"]) - boxes, scores, classes = predictions[0][:,1:5], predictions[0][:,5], predictions[0][:,6] + boxes, scores, classes = predictions[0][:, 1:5], predictions[0][:, 5], predictions[0][:, 6] # return {"Error": "Object Detection(video) Not Working"} # predictions = json.loads(json_response.text).get("outputs", "Bad request made.") @@ -1501,11 +1514,11 @@ def object_detect_video(input_file, filename): # "yolo_nms_1"][0], predictions["yolo_nms_2"][0], predictions["yolo_nms_3"][0] result = [] - nums=100 - boxes[:,0]*=(imgh/640) - boxes[:,2]*=(imgh/640) - boxes[:,1]*=(imgw/640) - boxes[:,3]*=(imgw/640) + nums = 100 + boxes[:, 0] *= (imgh / 640) + boxes[:, 2] *= (imgh / 640) + boxes[:, 1] *= (imgw / 640) + boxes[:, 3] *= (imgw / 640) class_names = get_class_names(coco_names_path) for num in range(nums): result.append([{"Box": boxes[num]}, {"Score": scores[num]}, {"Label": class_names[int(classes[num])]}]) diff --git a/corelib/textbox.py b/corelib/textbox.py index 21736f5..b1209bf 100644 --- a/corelib/textbox.py +++ b/corelib/textbox.py @@ -9,84 +9,62 @@ from tensorflow.keras.layers import BatchNormalization from tensorflow.keras import initializers from tensorflow.keras.layers import Layer -import numpy as np +import matplotlib.pyplot as plt import tensorflow as tf - +from tqdm import tqdm +import cv2 from numpy.linalg import norm eps = 1e-10 -def rot_matrix(theta): - s, c = np.sin(theta), np.cos(theta) - return np.array([[c, -s],[s, c]]) - -def polygon_to_rbox(xy): - # center point plus width, height and orientation angle - tl, tr, br, bl = xy - # length of top and bottom edge - dt, db = tr-tl, bl-br - # center is mean of all 4 vetrices - cx, cy = c = np.sum(xy, axis=0) / len(xy) - # width is mean of top and bottom edge length - w = (norm(dt) + norm(db)) / 2. - # height is distance from center to top edge plus distance form center to bottom edge - h = norm(np.cross(dt, tl-c))/(norm(dt)+eps) + norm(np.cross(db, br-c))/(norm(db)+eps) - #h = point_line_distance(c, tl, tr) + point_line_distance(c, br, bl) - #h = (norm(tl-bl) + norm(tr-br)) / 2. - # angle is mean of top and bottom edge angle - theta = (np.arctan2(dt[0], dt[1]) + np.arctan2(db[0], db[1])) / 2. - return np.array([cx, cy, w, h, theta]) - -def rbox_to_polygon(rbox): - cx, cy, w, h, theta = rbox - box = np.array([[-w,h],[w,h],[w,-h],[-w,-h]]) / 2. - box = np.dot(box, rot_matrix(theta)) - box += rbox[:2] - return box def polygon_to_rbox2(xy): # two points at the top left and top right corner plus height tl, tr, br, bl = xy # length of top and bottom edge - dt, db = tr-tl, bl-br + dt, _ = tr - tl, bl - br # height is mean between distance from top to bottom right and distance from top edge to bottom left - h = (norm(np.cross(dt, tl-br)) + norm(np.cross(dt, tr-bl))) / (2*(norm(dt)+eps)) - return np.hstack((tl,tr,h)) + h = (norm(np.cross(dt, tl - br)) + norm(np.cross(dt, tr - bl))) / (2 * (norm(dt) + eps)) + return np.hstack((tl, tr, h)) + def rbox2_to_polygon(rbox): x1, y1, x2, y2, h = rbox - alpha = np.arctan2(x1-x2, y2-y1) - dx = -h*np.cos(alpha) - dy = -h*np.sin(alpha) - xy = np.reshape([x1,y1,x2,y2,x2+dx,y2+dy,x1+dx,y1+dy], (-1,2)) + alpha = np.arctan2(x1 - x2, y2 - y1) + dx = -h * np.cos(alpha) + dy = -h * np.sin(alpha) + xy = np.reshape([x1, y1, x2, y2, x2 + dx, y2 + dy, x1 + dx, y1 + dy], (-1, 2)) return xy + def polygon_to_rbox3(xy): # two points at the center of the left and right edge plus heigth tl, tr, br, bl = xy # length of top and bottom edge - dt, db = tr-tl, bl-br + dt, _ = tr - tl, bl - br # height is mean between distance from top to bottom right and distance from top edge to bottom left - h = (norm(np.cross(dt, tl-br)) + norm(np.cross(dt, tr-bl))) / (2*(norm(dt)+eps)) + h = (norm(np.cross(dt, tl - br)) + norm(np.cross(dt, tr - bl))) / (2 * (norm(dt) + eps)) p1 = (tl + bl) / 2. - p2 = (tr + br) / 2. - return np.hstack((p1,p2,h)) + p2 = (tr + br) / 2. + return np.hstack((p1, p2, h)) + def rbox3_to_polygon(rbox): x1, y1, x2, y2, h = rbox - alpha = np.arctan2(x1-x2, y2-y1) - dx = -h*np.cos(alpha) / 2. - dy = -h*np.sin(alpha) / 2. - xy = np.reshape([x1-dx,y1-dy,x2-dx,y2-dy,x2+dx,y2+dy,x1+dx,y1+dy], (-1,2)) + alpha = np.arctan2(x1 - x2, y2 - y1) + dx = -h * np.cos(alpha) / 2. + dy = -h * np.sin(alpha) / 2. + xy = np.reshape([x1 - dx, y1 - dy, x2 - dx, y2 - dy, x2 + dx, y2 + dy, x1 + dx, y1 + dy], (-1, 2)) return xy + def polygon_to_box(xy, box_format='xywh'): # minimum axis aligned bounding box containing some points - xy = np.reshape(xy, (-1,2)) + xy = np.reshape(xy, (-1, 2)) xmin, ymin = np.min(xy, axis=0) xmax, ymax = np.max(xy, axis=0) if box_format == 'xywh': - box = [xmin, ymin, xmax-xmin, ymax-ymin] + box = [xmin, ymin, xmax - xmin, ymax - ymin] elif box_format == 'xyxy': box = [xmin, ymin, xmax, ymax] if box_format == 'polygon': @@ -94,47 +72,18 @@ def polygon_to_box(xy, box_format='xywh'): return np.array(box) -def iou(box, boxes): - """Computes the intersection over union for a given axis - aligned bounding box with several others. - - # Arguments - box: Bounding box, numpy array of shape (4). - (x1, y1, x2, y2) - boxes: Reference bounding boxes, numpy array of - shape (num_boxes, 4). - - # Return - iou: Intersection over union, - numpy array of shape (num_boxes). - """ - # compute intersection - inter_upleft = np.maximum(boxes[:, :2], box[:2]) - inter_botright = np.minimum(boxes[:, 2:4], box[2:]) - inter_wh = inter_botright - inter_upleft - inter_wh = np.maximum(inter_wh, 0) - inter = inter_wh[:, 0] * inter_wh[:, 1] - # compute union - area_pred = (box[2] - box[0]) * (box[3] - box[1]) - area_gt = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) - union = area_pred + area_gt - inter - # compute iou - iou = inter / union - return iou - - def non_maximum_suppression_slow(boxes, confs, iou_threshold, top_k): """Does None-Maximum Suppresion on detection results. - + Intuitive but slow as hell!!! - + # Agruments boxes: Array of bounding boxes (boxes, xmin + ymin + xmax + ymax). confs: Array of corresponding confidenc values. iou_threshold: Intersection over union threshold used for comparing overlapping boxes. top_k: Maximum number of returned indices. - + # Return List of remaining indices. """ @@ -148,61 +97,60 @@ def non_maximum_suppression_slow(boxes, confs, iou_threshold, top_k): break return selected + def non_maximum_suppression(boxes, confs, overlap_threshold, top_k): """Does None-Maximum Suppresion on detection results. - + # Agruments boxes: Array of bounding boxes (boxes, xmin + ymin + xmax + ymax). confs: Array of corresponding confidenc values. overlap_threshold: top_k: Maximum number of returned indices. - + # Return List of remaining indices. - + # References - Girshick, R. B. and Felzenszwalb, P. F. and McAllester, D. [Discriminatively Trained Deformable Part Models, Release 5](http://people.cs.uchicago.edu/~rbg/latent-release5/) """ eps = 1e-15 - + boxes = np.asarray(boxes, dtype='float32') - + pick = [] x1, y1, x2, y2 = boxes.T - + idxs = np.argsort(confs) area = (x2 - x1) * (y2 - y1) - + while len(idxs) > 0: i = idxs[-1] - + pick.append(i) if len(pick) >= top_k: break - + idxs = idxs[:-1] - + xx1 = np.maximum(x1[i], x1[idxs]) yy1 = np.maximum(y1[i], y1[idxs]) xx2 = np.minimum(x2[i], x2[idxs]) yy2 = np.minimum(y2[i], y2[idxs]) - + w = np.maximum(0, xx2 - xx1) h = np.maximum(0, yy2 - yy1) I = w * h - + overlap = I / (area[idxs] + eps) # as in Girshick et. al. - + #U = area[idxs] + area[i] - I #overlap = I / (U + eps) - - idxs = idxs[overlap <= overlap_threshold] - - return pick + idxs = idxs[overlap <= overlap_threshold] + return pick def bn_acti_conv(x, filters, kernel_size=1, stride=1, padding='same', activation='relu'): @@ -214,17 +162,19 @@ def bn_acti_conv(x, filters, kernel_size=1, stride=1, padding='same', activation x = Conv2D(filters, kernel_size, strides=stride, padding=padding)(x) return x + def dense_block(x, n, growth_rate, width=4, activation='relu'): input_shape = K.int_shape(x) c = input_shape[3] for i in range(n): x1 = x - x2 = bn_acti_conv(x, growth_rate*width, 1, 1, activation=activation) + x2 = bn_acti_conv(x, growth_rate * width, 1, 1, activation=activation) x2 = bn_acti_conv(x2, growth_rate, 3, 1, activation=activation) x = concatenate([x1, x2], axis=3) c += growth_rate return x + def downsampling_block(x, filters, width, padding='same', activation='relu'): x = BatchNormalization(scale=True)(x) x = Activation(activation)(x) @@ -237,14 +187,14 @@ def downsampling_block(x, filters, width, padding='same', activation='relu'): def ssd512_dense_separable_body(x, activation='relu'): # used for SegLink and TextBoxes++ variantes with separable convolution - + if activation == 'leaky_relu': activation = leaky_relu - + growth_rate = 48 compressed_features = 224 source_layers = [] - + x = SeparableConv2D(96, 3, depth_multiplier=32, strides=2, padding='same')(x) x = BatchNormalization(scale=True)(x) x = Activation(activation)(x) @@ -254,38 +204,37 @@ def ssd512_dense_separable_body(x, activation='relu'): x = SeparableConv2D(96, 3, depth_multiplier=1, strides=1, padding='same')(x) x = BatchNormalization(scale=True)(x) x = Activation(activation)(x) - + x = MaxPooling2D(pool_size=2, strides=2)(x) x = dense_block(x, 6, growth_rate, 4, activation) x = bn_acti_conv(x, compressed_features, 1, 1, activation=activation) - + x = MaxPooling2D(pool_size=2, strides=2, padding='same')(x) x = dense_block(x, 6, growth_rate, 4, activation) x = bn_acti_conv(x, compressed_features, 1, 1, activation=activation) - source_layers.append(x) # 64x64 + source_layers.append(x) # 64x64 x = MaxPooling2D(pool_size=2, strides=2)(x) x = dense_block(x, 6, growth_rate, 4, activation) x = bn_acti_conv(x, compressed_features, 1, 1, activation=activation) - source_layers.append(x) # 32x32 + source_layers.append(x) # 32x32 x = downsampling_block(x, 192, 1, activation=activation) - source_layers.append(x) # 16x16 + source_layers.append(x) # 16x16 x = downsampling_block(x, 160, 1, activation=activation) - source_layers.append(x) # 8x8 + source_layers.append(x) # 8x8 x = downsampling_block(x, 128, 1, activation=activation) - source_layers.append(x) # 4x4 + source_layers.append(x) # 4x4 x = downsampling_block(x, 96, 1, activation=activation) - source_layers.append(x) # 2x2 - + source_layers.append(x) # 2x2 + x = downsampling_block(x, 64, 1, activation=activation) - source_layers.append(x) # 1x1 - - return source_layers + source_layers.append(x) # 1x1 + return source_layers def _to_tensor(x, dtype): @@ -294,14 +243,15 @@ def _to_tensor(x, dtype): x = tf.cast(x, dtype) return x + def leaky_relu(x): """Leaky Rectified Linear activation. - + # References - [Rectifier Nonlinearities Improve Neural Network Acoustic Models](https://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf) """ - #return K.relu(x, alpha=0.1, max_value=None) - + # return K.relu(x, alpha=0.1, max_value=None) + # requires less memory than keras implementation alpha = 0.1 zero = _to_tensor(0., x.dtype.base_dtype) @@ -328,107 +278,61 @@ class Normalize(Layer): # TODO Add possibility to have one scale for all features. """ + def __init__(self, scale=20, **kwargs): self.scale = scale super(Normalize, self).__init__(**kwargs) def build(self, input_shape): - self.gamma = self.add_weight(name=self.name+'_gamma', + self.gamma = self.add_weight(name=self.name + '_gamma', shape=(input_shape[-1],), - initializer=initializers.Constant(self.scale), + initializer=initializers.Constant(self.scale), trainable=True) super(Normalize, self).build(input_shape) - + def call(self, x, mask=None): return self.gamma * K.l2_normalize(x, axis=-1) def rot_matrix(theta): s, c = np.sin(theta), np.cos(theta) - return np.array([[c, -s],[s, c]]) + return np.array([[c, -s], [s, c]]) + def polygon_to_rbox(xy): # center point plus width, height and orientation angle tl, tr, br, bl = xy # length of top and bottom edge - dt, db = tr-tl, bl-br + dt, db = tr - tl, bl - br # center is mean of all 4 vetrices cx, cy = c = np.sum(xy, axis=0) / len(xy) # width is mean of top and bottom edge length w = (norm(dt) + norm(db)) / 2. # height is distance from center to top edge plus distance form center to bottom edge - h = norm(np.cross(dt, tl-c))/(norm(dt)+eps) + norm(np.cross(db, br-c))/(norm(db)+eps) + h = norm(np.cross(dt, tl - c)) / (norm(dt) + eps) + norm(np.cross(db, br - c)) / (norm(db) + eps) #h = point_line_distance(c, tl, tr) + point_line_distance(c, br, bl) #h = (norm(tl-bl) + norm(tr-br)) / 2. # angle is mean of top and bottom edge angle theta = (np.arctan2(dt[0], dt[1]) + np.arctan2(db[0], db[1])) / 2. return np.array([cx, cy, w, h, theta]) + def rbox_to_polygon(rbox): cx, cy, w, h, theta = rbox - box = np.array([[-w,h],[w,h],[w,-h],[-w,-h]]) / 2. + box = np.array([[-w, h], [w, h], [w, -h], [-w, -h]]) / 2. box = np.dot(box, rot_matrix(theta)) box += rbox[:2] return box -def polygon_to_rbox2(xy): - # two points at the top left and top right corner plus height - tl, tr, br, bl = xy - # length of top and bottom edge - dt, db = tr-tl, bl-br - # height is mean between distance from top to bottom right and distance from top edge to bottom left - h = (norm(np.cross(dt, tl-br)) + norm(np.cross(dt, tr-bl))) / (2*(norm(dt)+eps)) - return np.hstack((tl,tr,h)) - -def rbox2_to_polygon(rbox): - x1, y1, x2, y2, h = rbox - alpha = np.arctan2(x1-x2, y2-y1) - dx = -h*np.cos(alpha) - dy = -h*np.sin(alpha) - xy = np.reshape([x1,y1,x2,y2,x2+dx,y2+dy,x1+dx,y1+dy], (-1,2)) - return xy - -def polygon_to_rbox3(xy): - # two points at the center of the left and right edge plus heigth - tl, tr, br, bl = xy - # length of top and bottom edge - dt, db = tr-tl, bl-br - # height is mean between distance from top to bottom right and distance from top edge to bottom left - h = (norm(np.cross(dt, tl-br)) + norm(np.cross(dt, tr-bl))) / (2*(norm(dt)+eps)) - p1 = (tl + bl) / 2. - p2 = (tr + br) / 2. - return np.hstack((p1,p2,h)) - -def rbox3_to_polygon(rbox): - x1, y1, x2, y2, h = rbox - alpha = np.arctan2(x1-x2, y2-y1) - dx = -h*np.cos(alpha) / 2. - dy = -h*np.sin(alpha) / 2. - xy = np.reshape([x1-dx,y1-dy,x2-dx,y2-dy,x2+dx,y2+dy,x1+dx,y1+dy], (-1,2)) - return xy - -def polygon_to_box(xy, box_format='xywh'): - # minimum axis aligned bounding box containing some points - xy = np.reshape(xy, (-1,2)) - xmin, ymin = np.min(xy, axis=0) - xmax, ymax = np.max(xy, axis=0) - if box_format == 'xywh': - box = [xmin, ymin, xmax-xmin, ymax-ymin] - elif box_format == 'xyxy': - box = [xmin, ymin, xmax, ymax] - if box_format == 'polygon': - box = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] - return np.array(box) - def iou(box, boxes): - """Computes the intersection over union for a given axis + """Computes the intersection over union for a given axis aligned bounding box with several others. # Arguments box: Bounding box, numpy array of shape (4). (x1, y1, x2, y2) - boxes: Reference bounding boxes, numpy array of + boxes: Reference bounding boxes, numpy array of shape (num_boxes, 4). # Return @@ -450,117 +354,39 @@ def iou(box, boxes): return iou -def non_maximum_suppression_slow(boxes, confs, iou_threshold, top_k): - """Does None-Maximum Suppresion on detection results. - - Intuitive but slow as hell!!! - - # Agruments - boxes: Array of bounding boxes (boxes, xmin + ymin + xmax + ymax). - confs: Array of corresponding confidenc values. - iou_threshold: Intersection over union threshold used for comparing - overlapping boxes. - top_k: Maximum number of returned indices. - - # Return - List of remaining indices. - """ - idxs = np.argsort(-confs) - selected = [] - for idx in idxs: - if np.any(iou(boxes[idx], boxes[selected]) >= iou_threshold): - continue - selected.append(idx) - if len(selected) >= top_k: - break - return selected - -def non_maximum_suppression(boxes, confs, overlap_threshold, top_k): - """Does None-Maximum Suppresion on detection results. - - # Agruments - boxes: Array of bounding boxes (boxes, xmin + ymin + xmax + ymax). - confs: Array of corresponding confidenc values. - overlap_threshold: - top_k: Maximum number of returned indices. - - # Return - List of remaining indices. - - # References - - Girshick, R. B. and Felzenszwalb, P. F. and McAllester, D. - [Discriminatively Trained Deformable Part Models, Release 5](http://people.cs.uchicago.edu/~rbg/latent-release5/) - """ - eps = 1e-15 - - boxes = np.asarray(boxes, dtype='float32') - - pick = [] - x1, y1, x2, y2 = boxes.T - - idxs = np.argsort(confs) - area = (x2 - x1) * (y2 - y1) - - while len(idxs) > 0: - i = idxs[-1] - - pick.append(i) - if len(pick) >= top_k: - break - - idxs = idxs[:-1] - - xx1 = np.maximum(x1[i], x1[idxs]) - yy1 = np.maximum(y1[i], y1[idxs]) - xx2 = np.minimum(x2[i], x2[idxs]) - yy2 = np.minimum(y2[i], y2[idxs]) - - w = np.maximum(0, xx2 - xx1) - h = np.maximum(0, yy2 - yy1) - I = w * h - - overlap = I / (area[idxs] + eps) - # as in Girshick et. al. - - #U = area[idxs] + area[i] - I - #overlap = I / (U + eps) - - idxs = idxs[overlap <= overlap_threshold] - - return pick - class PriorMap(object): """Handles prior boxes for a given feature map. - + # Arguments / Attributes source_layer_name image_size: Tuple with spatial size of model input. map_size variances - aspect_ratios: List of aspect ratios for the prior boxes at each + aspect_ratios: List of aspect ratios for the prior boxes at each location. - shift: List of tuples for the displacement of the prior boxes - relative to ther location. Each tuple contains an value between + shift: List of tuples for the displacement of the prior boxes + relative to ther location. Each tuple contains an value between -1.0 and 1.0 for x and y direction. - clip: Boolean, whether the boxes should be cropped to do not exceed + clip: Boolean, whether the boxes should be cropped to do not exceed the borders of the input image. step minmax_size: List of tuples with s_min and s_max values (see paper). - special_ssd_box: Boolean, wether or not the extra box for aspect + special_ssd_box: Boolean, wether or not the extra box for aspect ratio 1 is used. - + # Notes The compute_priors methode has to be called to get usable prior boxes. """ - def __init__(self, source_layer_name, image_size, map_size, - minmax_size=None, variances=[0.1, 0.1, 0.2, 0.2], + + def __init__(self, source_layer_name, image_size, map_size, + minmax_size=None, variances=[0.1, 0.1, 0.2, 0.2], aspect_ratios=[1], shift=None, clip=False, step=None, special_ssd_box=False): - + self.__dict__.update(locals()) - - #self.compute_priors() - + + # self.compute_priors() + def __str__(self): s = '' for a in ['source_layer_name', @@ -576,24 +402,24 @@ def __str__(self): ]: s += '%-24s %s\n' % (a, getattr(self, a)) return s - + @property def num_boxes_per_location(self): return len(self.box_wh) - + @property def num_locations(self): return len(self.box_xy) - + @property def num_boxes(self): - return len(self.box_xy) * len(self.box_wh) # len(self.priors) - + return len(self.box_xy) * len(self.box_wh) # len(self.priors) + def compute_priors(self): image_h, image_w = image_size = self.image_size map_h, map_w = map_size = self.map_size min_size, max_size = self.minmax_size - + # define centers of prior boxes if self.step is None: step_x = image_w / map_w @@ -601,154 +427,153 @@ def compute_priors(self): assert step_x % 1 == 0 and step_y % 1 == 0, 'map size %s not constiten with input size %s' % (map_size, image_size) else: step_x = step_y = self.step - + linx = np.array([(0.5 + i) for i in range(map_w)]) * step_x liny = np.array([(0.5 + i) for i in range(map_h)]) * step_y - box_xy = np.array(np.meshgrid(linx, liny)).reshape(2,-1).T - + box_xy = np.array(np.meshgrid(linx, liny)).reshape(2, -1).T + if self.shift is None: - shift = [(0.0,0.0)] * len(self.aspect_ratios) + shift = [(0.0, 0.0)] * len(self.aspect_ratios) else: shift = self.shift - + box_wh = [] box_shift = [] for i in range(len(self.aspect_ratios)): ar = self.aspect_ratios[i] box_wh.append([min_size * np.sqrt(ar), min_size / np.sqrt(ar)]) box_shift.append(shift[i]) - if ar == 1 and self.special_ssd_box: # special SSD box + if ar == 1 and self.special_ssd_box: # special SSD box box_wh.append([np.sqrt(min_size * max_size), np.sqrt(min_size * max_size)]) - box_shift.append((0.0,0.0)) + box_shift.append((0.0, 0.0)) box_wh = np.asarray(box_wh) - + box_shift = np.asarray(box_shift) box_shift = np.clip(box_shift, -1.0, 1.0) - box_shift = box_shift * np.array([step_x, step_y]) # percent to pixels - + box_shift = box_shift * np.array([step_x, step_y]) # percent to pixels + # values for individual prior boxes - priors_shift = np.tile(box_shift, (len(box_xy),1)) + priors_shift = np.tile(box_shift, (len(box_xy), 1)) priors_xy = np.repeat(box_xy, len(box_wh), axis=0) + priors_shift - priors_wh = np.tile(box_wh, (len(box_xy),1)) - + priors_wh = np.tile(box_wh, (len(box_xy), 1)) + priors_min_xy = priors_xy - priors_wh / 2. priors_max_xy = priors_xy + priors_wh / 2. - + if self.clip: - priors_min_xy[:,0] = np.clip(priors_min_xy[:,0], 0, image_w) - priors_min_xy[:,1] = np.clip(priors_min_xy[:,1], 0, image_h) - priors_max_xy[:,0] = np.clip(priors_max_xy[:,0], 0, image_w) - priors_max_xy[:,1] = np.clip(priors_max_xy[:,1], 0, image_h) - - priors_variances = np.tile(self.variances, (len(priors_xy),1)) - + priors_min_xy[:, 0] = np.clip(priors_min_xy[:, 0], 0, image_w) + priors_min_xy[:, 1] = np.clip(priors_min_xy[:, 1], 0, image_h) + priors_max_xy[:, 0] = np.clip(priors_max_xy[:, 0], 0, image_w) + priors_max_xy[:, 1] = np.clip(priors_max_xy[:, 1], 0, image_h) + + priors_variances = np.tile(self.variances, (len(priors_xy), 1)) + self.box_xy = box_xy self.box_wh = box_wh self.box_shfit = box_shift - + self.priors_xy = priors_xy self.priors_wh = priors_wh self.priors_min_xy = priors_min_xy self.priors_max_xy = priors_max_xy self.priors_variances = priors_variances self.priors = np.concatenate([priors_min_xy, priors_max_xy, priors_variances], axis=1) - + def plot_locations(self, color='r'): xy = self.box_xy - plt.plot(xy[:,0], xy[:,1], '.', color=color, markersize=6) - + plt.plot(xy[:, 0], xy[:, 1], '.', color=color, markersize=6) + def plot_boxes(self, location_idxs=[]): colors = 'rgbcmy' ax = plt.gca() n = self.num_boxes_per_location for i in location_idxs: for j in range(n): - idx = i*n+j + idx = i * n + j if idx >= self.num_boxes: break x1, y1, x2, y2 = self.priors[idx, :4] - ax.add_patch(plt.Rectangle((x1, y1), x2-x1, y2-y1, - fill=False, edgecolor=colors[j%len(colors)], linewidth=2)) + ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, + fill=False, edgecolor=colors[j % len(colors)], linewidth=2)) ax.autoscale_view() - - class SSDPriorUtil(object): """Utility for SSD prior boxes. """ + def __init__(self, model, aspect_ratios=None, shifts=None, - minmax_sizes=None, steps=None, scale=None, clips=None, - special_ssd_boxes=None, ssd_assignment=None): - + minmax_sizes=None, steps=None, scale=None, clips=None, + special_ssd_boxes=None, ssd_assignment=None): + source_layers_names = [l.name.split('/')[0] for l in model.source_layers] self.source_layers_names = source_layers_names - + self.model = model self.image_size = model.input_shape[1:3] - + num_maps = len(source_layers_names) - + # take parameters from model definition if they exist there if aspect_ratios is None: if hasattr(model, 'aspect_ratios'): aspect_ratios = model.aspect_ratios else: aspect_ratios = [[1]] * num_maps - + if shifts is None: if hasattr(model, 'shifts'): shifts = model.shifts else: shifts = [None] * num_maps - + if minmax_sizes is None: if hasattr(model, 'minmax_sizes'): minmax_sizes = model.minmax_sizes else: # as in equation (4) min_dim = np.min(self.image_size) - min_ratio = 10 # 15 - max_ratio = 100 # 90 - s = np.linspace(min_ratio, max_ratio, num_maps+1) * min_dim / 100. - minmax_sizes = [(round(s[i]), round(s[i+1])) for i in range(len(s)-1)] - + min_ratio = 10 # 15 + max_ratio = 100 # 90 + s = np.linspace(min_ratio, max_ratio, num_maps + 1) * min_dim / 100. + minmax_sizes = [(round(s[i]), round(s[i + 1])) for i in range(len(s) - 1)] + if scale is None: if hasattr(model, 'scale'): scale = model.scale else: scale = 1.0 minmax_sizes = np.array(minmax_sizes) * scale - + if steps is None: if hasattr(model, 'steps'): steps = model.steps else: steps = [None] * num_maps - + if clips is None: if hasattr(model, 'clips'): clips = model.clips else: clips = False - if type(clips) == bool: + if isinstance(clips, bool): clips = [clips] * num_maps - + if special_ssd_boxes is None: if hasattr(model, 'special_ssd_boxes'): special_ssd_boxes = model.special_ssd_boxes else: special_ssd_boxes = False - if type(special_ssd_boxes) == bool: + if isinstance(special_ssd_boxes, bool): special_ssd_boxes = [special_ssd_boxes] * num_maps - + if ssd_assignment is None: if hasattr(model, 'ssd_assignment'): ssd_assignment = model.ssd_assignment else: ssd_assignment = True self.ssd_assignment = ssd_assignment - + self.prior_maps = [] for i in range(num_maps): layer = model.get_layer(source_layers_names[i]) @@ -765,14 +590,14 @@ def __init__(self, model, aspect_ratios=None, shifts=None, clip=clips[i]) self.prior_maps.append(m) self.update_priors() - + self.nms_top_k = 400 self.nms_thresh = 0.45 - + @property def num_maps(self): return len(self.prior_maps) - + def update_priors(self): priors_xy = [] priors_wh = [] @@ -780,14 +605,14 @@ def update_priors(self): priors_max_xy = [] priors_variances = [] priors = [] - + map_offsets = [0] for i in range(len(self.prior_maps)): m = self.prior_maps[i] - + # compute prior boxes m.compute_priors() - + # collect prior data priors_xy.append(m.priors_xy) priors_wh.append(m.priors_wh) @@ -795,8 +620,8 @@ def update_priors(self): priors_max_xy.append(m.priors_max_xy) priors_variances.append(m.priors_variances) priors.append(m.priors) - map_offsets.append(map_offsets[-1]+len(m.priors)) - + map_offsets.append(map_offsets[-1] + len(m.priors)) + self.priors_xy = np.concatenate(priors_xy, axis=0) self.priors_wh = np.concatenate(priors_wh, axis=0) self.priors_min_xy = np.concatenate(priors_min_xy, axis=0) @@ -804,7 +629,7 @@ def update_priors(self): self.priors_variances = np.concatenate(priors_variances, axis=0) self.priors = np.concatenate(priors, axis=0) self.map_offsets = map_offsets - + # normalized prior boxes image_wh = self.image_size[::-1] self.priors_xy_norm = self.priors_xy / image_wh @@ -812,11 +637,10 @@ def update_priors(self): self.priors_min_xy_norm = self.priors_min_xy / image_wh self.priors_max_xy_norm = self.priors_max_xy / image_wh self.priors_norm = np.concatenate([self.priors_min_xy_norm, self.priors_max_xy_norm, self.priors_variances], axis=1) - - + def encode(self, gt_data, overlap_threshold=0.45, debug=False): # calculation is done with normalized sizes - + # TODO: empty ground truth if gt_data.shape[0] == 0: print('gt_data', type(gt_data), gt_data.shape) @@ -824,19 +648,19 @@ def encode(self, gt_data, overlap_threshold=0.45, debug=False): num_classes = self.model.num_classes num_priors = self.priors.shape[0] - gt_boxes = self.gt_boxes = np.copy(gt_data[:,:4]) # normalized xmin, ymin, xmax, ymax - gt_class_idx = np.asarray(gt_data[:,-1]+0.5, dtype=np.int) - gt_one_hot = np.zeros([len(gt_class_idx),num_classes]) - gt_one_hot[range(len(gt_one_hot)),gt_class_idx] = 1 # one_hot classes including background - - gt_min_xy = gt_boxes[:,0:2] - gt_max_xy = gt_boxes[:,2:4] - gt_xy = (gt_boxes[:,2:4] + gt_boxes[:,0:2]) / 2. - gt_wh = gt_boxes[:,2:4] - gt_boxes[:,0:2] - + gt_boxes = self.gt_boxes = np.copy(gt_data[:, :4]) # normalized xmin, ymin, xmax, ymax + gt_class_idx = np.asarray(gt_data[:, -1] + 0.5, dtype=np.int) + gt_one_hot = np.zeros([len(gt_class_idx), num_classes]) + gt_one_hot[range(len(gt_one_hot)), gt_class_idx] = 1 # one_hot classes including background + + gt_min_xy = gt_boxes[:, 0:2] + gt_max_xy = gt_boxes[:, 2:4] + gt_xy = (gt_boxes[:, 2:4] + gt_boxes[:, 0:2]) / 2. + gt_wh = gt_boxes[:, 2:4] - gt_boxes[:, 0:2] + gt_iou = np.array([iou(b, self.priors_norm) for b in gt_boxes]).T max_idxs = np.argmax(gt_iou, axis=1) - + priors_xy = self.priors_xy_norm priors_wh = self.priors_wh_norm @@ -848,35 +672,35 @@ def encode(self, gt_data, overlap_threshold=0.45, debug=False): prior_mask = max_val > overlap_threshold match_indices = max_idxs[prior_mask] else: - prior_area = np.product(priors_wh, axis=-1)[:,None] - gt_area = np.product(gt_wh, axis=-1)[:,None] - - priors_ar = priors_wh[:,0] / priors_wh[:,1] - gt_ar = gt_wh[:,0] / gt_wh[:,1] - + prior_area = np.product(priors_wh, axis=-1)[:, None] + gt_area = np.product(gt_wh, axis=-1)[:, None] + + priors_ar = priors_wh[:, 0] / priors_wh[:, 1] + gt_ar = gt_wh[:, 0] / gt_wh[:, 1] + match_mask = np.array([np.concatenate([ - priors_xy >= gt_min_xy[i], - priors_xy <= gt_max_xy[i], - #priors_wh >= 0.5 * gt_wh[i], - #priors_wh <= 2.0 * gt_wh[i], - #prior_area >= 0.25 * gt_area[i], - #prior_area <= 4.0 * gt_area[i], - prior_area >= 0.0625 * gt_area[i], - prior_area <= 1.0 * gt_area[i], - #((priors_ar < 1.0) == (gt_ar[i] < 1.0))[:,None], - (np.abs(priors_ar - gt_ar[i]) < 0.5)[:,None], - max_idxs[:,None] == i - ], axis=-1) for i in range(len(gt_boxes))]) + priors_xy >= gt_min_xy[i], + priors_xy <= gt_max_xy[i], + #priors_wh >= 0.5 * gt_wh[i], + #priors_wh <= 2.0 * gt_wh[i], + #prior_area >= 0.25 * gt_area[i], + #prior_area <= 4.0 * gt_area[i], + prior_area >= 0.0625 * gt_area[i], + prior_area <= 1.0 * gt_area[i], + #((priors_ar < 1.0) == (gt_ar[i] < 1.0))[:,None], + (np.abs(priors_ar - gt_ar[i]) < 0.5)[:, None], + max_idxs[:, None] == i + ], axis=-1) for i in range(len(gt_boxes))]) self.match_mask = match_mask match_mask = np.array([np.all(m, axis=-1) for m in match_mask]).T prior_mask = np.any(match_mask, axis=-1) - match_indices = np.argmax(match_mask[prior_mask,:], axis=-1) - + match_indices = np.argmax(match_mask[prior_mask, :], axis=-1) + self.match_indices = dict(zip(list(np.ix_(prior_mask)[0]), list(match_indices))) # prior labels confidence = np.zeros((num_priors, num_classes)) - confidence[:,0] = 1 + confidence[:, 0] = 1 confidence[prior_mask] = gt_one_hot[match_indices] # compute local offsets from ground truth boxes @@ -884,70 +708,69 @@ def encode(self, gt_data, overlap_threshold=0.45, debug=False): gt_wh = gt_wh[match_indices] priors_xy = priors_xy[prior_mask] priors_wh = priors_wh[prior_mask] - priors_variances = self.priors_variances[prior_mask,:] + priors_variances = self.priors_variances[prior_mask, :] offsets = np.zeros((num_priors, 4)) - offsets[prior_mask,0:2] = (gt_xy - priors_xy) / priors_wh - offsets[prior_mask,2:4] = np.log(gt_wh / priors_wh) - offsets[prior_mask,0:4] /= priors_variances + offsets[prior_mask, 0:2] = (gt_xy - priors_xy) / priors_wh + offsets[prior_mask, 2:4] = np.log(gt_wh / priors_wh) + offsets[prior_mask, 0:4] /= priors_variances return np.concatenate([offsets, confidence], axis=1) - - + def decode(self, model_output, confidence_threshold=0.01, keep_top_k=200, fast_nms=True, sparse=True): # calculation is done with normalized sizes - - prior_mask = model_output[:,4:] > confidence_threshold + + prior_mask = model_output[:, 4:] > confidence_threshold image_wh = self.image_size[::-1] - + if sparse: # compute boxes only if the confidence is high enough and the class is not background - mask = np.any(prior_mask[:,1:], axis=1) + mask = np.any(prior_mask[:, 1:], axis=1) prior_mask = prior_mask[mask] mask = np.ix_(mask)[0] model_output = model_output[mask] priors_xy = self.priors_xy[mask] / image_wh priors_wh = self.priors_wh[mask] / image_wh - priors_variances = self.priors_variances[mask,:] + priors_variances = self.priors_variances[mask, :] else: priors_xy = self.priors_xy / image_wh priors_wh = self.priors_wh / image_wh priors_variances = self.priors_variances - - offsets = model_output[:,:4] - confidence = model_output[:,4:] - + + offsets = model_output[:, :4] + confidence = model_output[:, 4:] + num_priors = offsets.shape[0] num_classes = confidence.shape[1] # compute bounding boxes from local offsets boxes = np.empty((num_priors, 4)) offsets = offsets * priors_variances - boxes_xy = priors_xy + offsets[:,0:2] * priors_wh - boxes_wh = priors_wh * np.exp(offsets[:,2:4]) - boxes[:,0:2] = boxes_xy - boxes_wh / 2. # xmin, ymin - boxes[:,2:4] = boxes_xy + boxes_wh / 2. # xmax, ymax + boxes_xy = priors_xy + offsets[:, 0:2] * priors_wh + boxes_wh = priors_wh * np.exp(offsets[:, 2:4]) + boxes[:, 0:2] = boxes_xy - boxes_wh / 2. # xmin, ymin + boxes[:, 2:4] = boxes_xy + boxes_wh / 2. # xmax, ymax boxes = np.clip(boxes, 0.0, 1.0) - + # do non maximum suppression results = [] for c in range(1, num_classes): - mask = prior_mask[:,c] + mask = prior_mask[:, c] boxes_to_process = boxes[mask] if len(boxes_to_process) > 0: confs_to_process = confidence[mask, c] - + if fast_nms: idx = non_maximum_suppression( - boxes_to_process, confs_to_process, - self.nms_thresh, self.nms_top_k) + boxes_to_process, confs_to_process, + self.nms_thresh, self.nms_top_k) else: idx = non_maximum_suppression_slow( - boxes_to_process, confs_to_process, - self.nms_thresh, self.nms_top_k) - + boxes_to_process, confs_to_process, + self.nms_thresh, self.nms_top_k) + good_boxes = boxes_to_process[idx] good_confs = confs_to_process[idx][:, None] - labels = np.ones((len(idx),1)) * c + labels = np.ones((len(idx), 1)) * c c_pred = np.concatenate((good_boxes, good_confs, labels), axis=1) results.extend(c_pred) if len(results) > 0: @@ -956,53 +779,53 @@ def decode(self, model_output, confidence_threshold=0.01, keep_top_k=200, fast_n results = results[order] results = results[:keep_top_k] else: - results = np.empty((0,6)) + results = np.empty((0, 6)) self.results = results return results - + def compute_class_weights(self, gt_util, num_samples=np.inf): - """Computes weighting factors for the classification loss by considering + """Computes weighting factors for the classification loss by considering the inverse frequency of class instance in local ground truth. """ s = np.zeros(gt_util.num_classes) for i in tqdm(range(min(gt_util.num_samples, num_samples))): egt = self.encode(gt_util.data[i]) - s += np.sum(egt[:,-gt_util.num_classes:], axis=0) - si = 1/s - return si/np.sum(si) * len(s) - + s += np.sum(egt[:, -gt_util.num_classes:], axis=0) + si = 1 / s + return si / np.sum(si) * len(s) + def show_image(self, img): """Resizes an image to the network input size and shows it in the current figure. """ image_wh = self.image_size[::-1] img = cv2.resize(img, image_wh, cv2.INTER_LINEAR) - img = img[:, :, (2,1,0)] # BGR to RGB + img = img[:, :, (2, 1, 0)] # BGR to RGB img = img / 256. plt.imshow(img) - + def plot_assignment(self, map_idx): ax = plt.gca() im = plt.gci() image_h, image_w = image_size = im.get_size() - + # ground truth boxes = self.gt_boxes - boxes_x = (boxes[:,0] + boxes[:,2]) / 2. * image_w - boxes_y = (boxes[:,1] + boxes[:,3]) / 2. * image_h + boxes_x = (boxes[:, 0] + boxes[:, 2]) / 2. * image_w + boxes_y = (boxes[:, 1] + boxes[:, 3]) / 2. * image_h for box in boxes: xy_rec = to_rec(box[:4], image_size) ax.add_patch(plt.Polygon(xy_rec, fill=False, edgecolor='b', linewidth=2)) - plt.plot(boxes_x, boxes_y, 'bo', markersize=6) - + plt.plot(boxes_x, boxes_y, 'bo', markersize=6) + # prior boxes for idx, box_idx in self.match_indices.items(): - if idx >= self.map_offsets[map_idx] and idx < self.map_offsets[map_idx+1]: + if idx >= self.map_offsets[map_idx] and idx < self.map_offsets[map_idx + 1]: x, y = self.priors_xy[idx] w, h = self.priors_wh[idx] - plt.plot(x, y, 'ro', markersize=4) + plt.plot(x, y, 'ro', markersize=4) plt.plot([x, boxes_x[box_idx]], [y, boxes_y[box_idx]], '-r', linewidth=1) - ax.add_patch(plt.Rectangle((x-w/2, y-h/2), w+1, h+1, - fill=False, edgecolor='y', linewidth=2)) + ax.add_patch(plt.Rectangle((x - w / 2, y - h / 2), w + 1, h + 1, + fill=False, edgecolor='y', linewidth=2)) def plot_results(self, results=None, classes=None, show_labels=True, gt_data=None, confidence_threshold=None): if results is None: @@ -1011,68 +834,67 @@ def plot_results(self, results=None, classes=None, show_labels=True, gt_data=Non mask = results[:, 4] > confidence_threshold results = results[mask] if classes is not None: - colors = plt.cm.hsv(np.linspace(0, 1, len(classes)+1)).tolist() + colors = plt.cm.hsv(np.linspace(0, 1, len(classes) + 1)).tolist() ax = plt.gca() im = plt.gci() image_size = im.get_size() - + # draw ground truth if gt_data is not None: for box in gt_data: - label = np.nonzero(box[4:])[0][0]+1 - color = 'g' if classes == None else colors[label] + label = np.nonzero(box[4:])[0][0] + 1 + color = 'g' if classes is None else colors[label] xy_rec = to_rec(box[:4], image_size) ax.add_patch(plt.Polygon(xy_rec, fill=True, color=color, linewidth=1, alpha=0.3)) - + # draw prediction for r in results: label = int(r[5]) confidence = r[4] - color = 'r' if classes == None else colors[label] + color = 'r' if classes is None else colors[label] xy_rec = to_rec(r[:4], image_size) ax.add_patch(plt.Polygon(xy_rec, fill=False, edgecolor=color, linewidth=2)) if show_labels: - label_name = label if classes == None else classes[label] + label_name = label if classes is None else classes[label] xmin, ymin = xy_rec[0] - display_txt = '%0.2f, %s' % (confidence, label_name) - ax.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5}) - + display_txt = '%0.2f, %s' % (confidence, label_name) + ax.text(xmin, ymin, display_txt, bbox={'facecolor': color, 'alpha': 0.5}) + def print_gt_stats(self): # TODO pass - class PriorUtil(SSDPriorUtil): """Utility for SSD prior boxes. """ def encode(self, gt_data, overlap_threshold=0.5, debug=False): # calculation is done with normalized sizes - + # TODO: empty ground truth if gt_data.shape[0] == 0: print('gt_data', type(gt_data), gt_data.shape) - + num_classes = 2 num_priors = self.priors.shape[0] - - gt_polygons = np.copy(gt_data[:,:8]) # normalized quadrilaterals - gt_rboxes = np.array([polygon_to_rbox3(np.reshape(p, (-1,2))) for p in gt_data[:,:8]]) - + + gt_polygons = np.copy(gt_data[:, :8]) # normalized quadrilaterals + gt_rboxes = np.array([polygon_to_rbox3(np.reshape(p, (-1, 2))) for p in gt_data[:, :8]]) + # minimum horizontal bounding rectangles - gt_xmin = np.min(gt_data[:,0:8:2], axis=1) - gt_ymin = np.min(gt_data[:,1:8:2], axis=1) - gt_xmax = np.max(gt_data[:,0:8:2], axis=1) - gt_ymax = np.max(gt_data[:,1:8:2], axis=1) - gt_boxes = self.gt_boxes = np.array([gt_xmin,gt_ymin,gt_xmax,gt_ymax]).T # normalized xmin, ymin, xmax, ymax - - gt_class_idx = np.asarray(gt_data[:,-1]+0.5, dtype=np.int) - gt_one_hot = np.zeros([len(gt_class_idx),num_classes]) - gt_one_hot[range(len(gt_one_hot)),gt_class_idx] = 1 # one_hot classes including background + gt_xmin = np.min(gt_data[:, 0:8:2], axis=1) + gt_ymin = np.min(gt_data[:, 1:8:2], axis=1) + gt_xmax = np.max(gt_data[:, 0:8:2], axis=1) + gt_ymax = np.max(gt_data[:, 1:8:2], axis=1) + gt_boxes = self.gt_boxes = np.array([gt_xmin, gt_ymin, gt_xmax, gt_ymax]).T # normalized xmin, ymin, xmax, ymax + + gt_class_idx = np.asarray(gt_data[:, -1] + 0.5, dtype=np.int) + gt_one_hot = np.zeros([len(gt_class_idx), num_classes]) + gt_one_hot[range(len(gt_one_hot)), gt_class_idx] = 1 # one_hot classes including background gt_iou = np.array([iou(b, self.priors_norm) for b in gt_boxes]).T - + # assigne gt to priors max_idxs = np.argmax(gt_iou, axis=1) max_val = gt_iou[np.arange(num_priors), max_idxs] @@ -1083,117 +905,116 @@ def encode(self, gt_data, overlap_threshold=0.5, debug=False): # prior labels confidence = np.zeros((num_priors, num_classes)) - confidence[:,0] = 1 + confidence[:, 0] = 1 confidence[prior_mask] = gt_one_hot[match_indices] - gt_xy = (gt_boxes[:,2:4] + gt_boxes[:,0:2]) / 2. - gt_wh = gt_boxes[:,2:4] - gt_boxes[:,0:2] + gt_xy = (gt_boxes[:, 2:4] + gt_boxes[:, 0:2]) / 2. + gt_wh = gt_boxes[:, 2:4] - gt_boxes[:, 0:2] gt_xy = gt_xy[match_indices] gt_wh = gt_wh[match_indices] gt_polygons = gt_polygons[match_indices] gt_rboxes = gt_rboxes[match_indices] - + priors_xy = self.priors_xy[prior_mask] / self.image_size priors_wh = self.priors_wh[prior_mask] / self.image_size - variances_xy = self.priors_variances[prior_mask,0:2] - variances_wh = self.priors_variances[prior_mask,2:4] - - # compute local offsets for + variances_xy = self.priors_variances[prior_mask, 0:2] + variances_wh = self.priors_variances[prior_mask, 2:4] + + # compute local offsets for offsets = np.zeros((num_priors, 4)) - offsets[prior_mask,0:2] = (gt_xy - priors_xy) / priors_wh - offsets[prior_mask,2:4] = np.log(gt_wh / priors_wh) - offsets[prior_mask,0:2] /= variances_xy - offsets[prior_mask,2:4] /= variances_wh - + offsets[prior_mask, 0:2] = (gt_xy - priors_xy) / priors_wh + offsets[prior_mask, 2:4] = np.log(gt_wh / priors_wh) + offsets[prior_mask, 0:2] /= variances_xy + offsets[prior_mask, 2:4] /= variances_wh + # compute local offsets for quadrilaterals offsets_quads = np.zeros((num_priors, 8)) - priors_xy_minmax = np.hstack([priors_xy-priors_wh/2, priors_xy+priors_wh/2]) + priors_xy_minmax = np.hstack([priors_xy - priors_wh / 2, priors_xy + priors_wh / 2]) #ref = np.tile(priors_xy, (1,4)) - ref = priors_xy_minmax[:,(0,1,2,1,2,3,0,3)] # corner points - offsets_quads[prior_mask,:] = (gt_polygons - ref) / np.tile(priors_wh, (1,4)) / np.tile(variances_xy, (1,4)) - + ref = priors_xy_minmax[:, (0, 1, 2, 1, 2, 3, 0, 3)] # corner points + offsets_quads[prior_mask, :] = (gt_polygons - ref) / np.tile(priors_wh, (1, 4)) / np.tile(variances_xy, (1, 4)) + # compute local offsets for rotated bounding boxes offsets_rboxs = np.zeros((num_priors, 5)) - offsets_rboxs[prior_mask,0:2] = (gt_rboxes[:,0:2] - priors_xy) / priors_wh / variances_xy - offsets_rboxs[prior_mask,2:4] = (gt_rboxes[:,2:4] - priors_xy) / priors_wh / variances_xy - offsets_rboxs[prior_mask,4] = np.log(gt_rboxes[:,4] / priors_wh[:,1]) / variances_wh[:,1] - + offsets_rboxs[prior_mask, 0:2] = (gt_rboxes[:, 0:2] - priors_xy) / priors_wh / variances_xy + offsets_rboxs[prior_mask, 2:4] = (gt_rboxes[:, 2:4] - priors_xy) / priors_wh / variances_xy + offsets_rboxs[prior_mask, 4] = np.log(gt_rboxes[:, 4] / priors_wh[:, 1]) / variances_wh[:, 1] + return np.concatenate([offsets, offsets_quads, offsets_rboxs, confidence], axis=1) - - + def decode(self, model_output, confidence_threshold=0.01, keep_top_k=200, fast_nms=True, sparse=True): # calculation is done with normalized sizes # mbox_loc, mbox_quad, mbox_rbox, mbox_conf # 4,8,5,2 # boxes, quad, rboxes, confs, labels # 4,8,5,1,1 - - prior_mask = model_output[:,17:] > confidence_threshold - + + prior_mask = model_output[:, 17:] > confidence_threshold + if sparse: # compute boxes only if the confidence is high enough and the class is not background - mask = np.any(prior_mask[:,1:], axis=1) + mask = np.any(prior_mask[:, 1:], axis=1) prior_mask = prior_mask[mask] mask = np.ix_(mask)[0] model_output = model_output[mask] priors_xy = self.priors_xy[mask] / self.image_size priors_wh = self.priors_wh[mask] / self.image_size - priors_variances = self.priors_variances[mask,:] + priors_variances = self.priors_variances[mask, :] else: priors_xy = self.priors_xy / self.image_size priors_wh = self.priors_wh / self.image_size priors_variances = self.priors_variances - - offsets = model_output[:,:4] - offsets_quads = model_output[:,4:12] - offsets_rboxs = model_output[:,12:17] - confidence = model_output[:,17:] - - priors_xy_minmax = np.hstack([priors_xy-priors_wh/2, priors_xy+priors_wh/2]) - ref = priors_xy_minmax[:,(0,1,2,1,2,3,0,3)] # corner points - variances_xy = priors_variances[:,0:2] - variances_wh = priors_variances[:,2:4] - + + offsets = model_output[:, :4] + offsets_quads = model_output[:, 4:12] + offsets_rboxs = model_output[:, 12:17] + confidence = model_output[:, 17:] + + priors_xy_minmax = np.hstack([priors_xy - priors_wh / 2, priors_xy + priors_wh / 2]) + ref = priors_xy_minmax[:, (0, 1, 2, 1, 2, 3, 0, 3)] # corner points + variances_xy = priors_variances[:, 0:2] + variances_wh = priors_variances[:, 2:4] + num_priors = offsets.shape[0] num_classes = confidence.shape[1] # compute bounding boxes from local offsets boxes = np.empty((num_priors, 4)) offsets = offsets * priors_variances - boxes_xy = priors_xy + offsets[:,0:2] * priors_wh - boxes_wh = priors_wh * np.exp(offsets[:,2:4]) - boxes[:,0:2] = boxes_xy - boxes_wh / 2. # xmin, ymin - boxes[:,2:4] = boxes_xy + boxes_wh / 2. # xmax, ymax + boxes_xy = priors_xy + offsets[:, 0:2] * priors_wh + boxes_wh = priors_wh * np.exp(offsets[:, 2:4]) + boxes[:, 0:2] = boxes_xy - boxes_wh / 2. # xmin, ymin + boxes[:, 2:4] = boxes_xy + boxes_wh / 2. # xmax, ymax boxes = np.clip(boxes, 0.0, 1.0) - + # do non maximum suppression results = [] for c in range(1, num_classes): - mask = prior_mask[:,c] + mask = prior_mask[:, c] boxes_to_process = boxes[mask] if len(boxes_to_process) > 0: confs_to_process = confidence[mask, c] - + if fast_nms: idx = non_maximum_suppression( - boxes_to_process, confs_to_process, - self.nms_thresh, self.nms_top_k) + boxes_to_process, confs_to_process, + self.nms_thresh, self.nms_top_k) else: idx = non_maximum_suppression_slow( - boxes_to_process, confs_to_process, - self.nms_thresh, self.nms_top_k) - + boxes_to_process, confs_to_process, + self.nms_thresh, self.nms_top_k) + good_boxes = boxes_to_process[idx] good_confs = confs_to_process[idx][:, None] - labels = np.ones((len(idx),1)) * c - - good_quads = ref[mask][idx] + offsets_quads[mask][idx] * np.tile(priors_wh[mask][idx] * variances_xy[mask][idx], (1,4)) + labels = np.ones((len(idx), 1)) * c + + good_quads = ref[mask][idx] + offsets_quads[mask][idx] * np.tile(priors_wh[mask][idx] * variances_xy[mask][idx], (1, 4)) good_rboxs = np.empty((len(idx), 5)) - good_rboxs[:,0:2] = priors_xy[mask][idx] + offsets_rboxs[mask][idx,0:2] * priors_wh[mask][idx] * variances_xy[mask][idx] - good_rboxs[:,2:4] = priors_xy[mask][idx] + offsets_rboxs[mask][idx,2:4] * priors_wh[mask][idx] * variances_xy[mask][idx] - good_rboxs[:,4] = np.exp(offsets_rboxs[mask][idx,4] * variances_wh[mask][idx,1]) * priors_wh[mask][idx,1] - + good_rboxs[:, 0:2] = priors_xy[mask][idx] + offsets_rboxs[mask][idx, 0:2] * priors_wh[mask][idx] * variances_xy[mask][idx] + good_rboxs[:, 2:4] = priors_xy[mask][idx] + offsets_rboxs[mask][idx, 2:4] * priors_wh[mask][idx] * variances_xy[mask][idx] + good_rboxs[:, 4] = np.exp(offsets_rboxs[mask][idx, 4] * variances_wh[mask][idx, 1]) * priors_wh[mask][idx, 1] + c_pred = np.concatenate((good_boxes, good_quads, good_rboxs, good_confs, labels), axis=1) results.extend(c_pred) if len(results) > 0: @@ -1202,11 +1023,10 @@ def decode(self, model_output, confidence_threshold=0.01, keep_top_k=200, fast_n results = results[order] results = results[:keep_top_k] else: - results = np.empty((0,6)) + results = np.empty((0, 6)) self.results = results return results - def plot_results(self, results=None, classes=None, show_labels=False, gt_data=None, confidence_threshold=None): if results is None: results = self.results @@ -1214,7 +1034,7 @@ def plot_results(self, results=None, classes=None, show_labels=False, gt_data=No mask = results[:, 17] > confidence_threshold results = results[mask] if classes is not None: - colors = plt.cm.hsv(np.linspace(0, 1, len(classes)+1)).tolist() + colors = plt.cm.hsv(np.linspace(0, 1, len(classes) + 1)).tolist() ax = plt.gca() im = plt.gci() h, w = im.get_size() @@ -1222,9 +1042,9 @@ def plot_results(self, results=None, classes=None, show_labels=False, gt_data=No # draw ground truth if gt_data is not None: for box in gt_data: - label = np.nonzero(box[4:])[0][0]+1 - color = 'g' if classes == None else colors[label] - xy = np.reshape(box[:8], (-1,2)) * (w,h) + label = np.nonzero(box[4:])[0][0] + 1 + color = 'g' if classes is None else colors[label] + xy = np.reshape(box[:8], (-1, 2)) * (w, h) ax.add_patch(plt.Polygon(xy, fill=True, color=color, linewidth=1, alpha=0.3)) # draw prediction @@ -1235,19 +1055,20 @@ def plot_results(self, results=None, classes=None, show_labels=False, gt_data=No confidence = r[17] label = int(r[18]) - plot_box(bbox*(w,h,w,h), box_format='xyxy', color='b') - plot_box(np.reshape(quad,(-1,2))*(w,h), box_format='polygon', color='r') - plot_box(rbox3_to_polygon(rbox)*(w,h), box_format='polygon', color='g') - plt.plot(rbox[[0,2]]*(w,w), rbox[[1,3]]*(h,h), 'oc', markersize=4) + plot_box(bbox * (w, h, w, h), box_format='xyxy', color='b') + plot_box(np.reshape(quad, (-1, 2)) * (w, h), box_format='polygon', color='r') + plot_box(rbox3_to_polygon(rbox) * (w, h), box_format='polygon', color='g') + plt.plot(rbox[[0, 2]] * (w, w), rbox[[1, 3]] * (h, h), 'oc', markersize=4) if show_labels: - label_name = label if classes == None else classes[label] - color = 'r' if classes == None else colors[label] - xmin, ymin = bbox[:2]*(w,h) + label_name = label if classes is None else classes[label] + color = 'r' if classes is None else colors[label] + xmin, ymin = bbox[:2] * (w, h) display_txt = '%0.2f, %s' % (confidence, label_name) - ax.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5}) + ax.text(xmin, ymin, display_txt, bbox={'facecolor': color, 'alpha': 0.5}) + def multibox_head_separable(source_layers, num_priors, normalizations=None, softmax=True): - + num_classes = 2 class_activation = 'softmax' if softmax else 'sigmoid' @@ -1258,80 +1079,78 @@ def multibox_head_separable(source_layers, num_priors, normalizations=None, soft for i in range(len(source_layers)): x = source_layers[i] name = x.name.split('/')[0] - + # normalize if normalizations is not None and normalizations[i] > 0: name = name + '_norm' x = Normalize(normalizations[i], name=name)(x) - + # confidence name1 = name + '_mbox_conf' x1 = SeparableConv2D(num_priors[i] * num_classes, (3, 5), padding='same', name=name1)(x) - x1 = Flatten(name=name1+'_flat')(x1) + x1 = Flatten(name=name1 + '_flat')(x1) mbox_conf.append(x1) # location, Delta(x,y,w,h) name2 = name + '_mbox_loc' x2 = SeparableConv2D(num_priors[i] * 4, (3, 5), padding='same', name=name2)(x) - x2 = Flatten(name=name2+'_flat')(x2) + x2 = Flatten(name=name2 + '_flat')(x2) mbox_loc.append(x2) - + # quadrilateral, Delta(x1,y1,x2,y2,x3,y3,x4,y4) name3 = name + '_mbox_quad' x3 = SeparableConv2D(num_priors[i] * 8, (3, 5), padding='same', name=name3)(x) - x3 = Flatten(name=name3+'_flat')(x3) + x3 = Flatten(name=name3 + '_flat')(x3) mbox_quad.append(x3) # rotated rectangle, Delta(x1,y1,x2,y2,h) name4 = name + '_mbox_rbox' x4 = SeparableConv2D(num_priors[i] * 5, (3, 5), padding='same', name=name4)(x) - x4 = Flatten(name=name4+'_flat')(x4) + x4 = Flatten(name=name4 + '_flat')(x4) mbox_rbox.append(x4) - + mbox_conf = concatenate(mbox_conf, axis=1, name='mbox_conf') mbox_conf = Reshape((-1, num_classes), name='mbox_conf_logits')(mbox_conf) mbox_conf = Activation(class_activation, name='mbox_conf_final')(mbox_conf) - + mbox_loc = concatenate(mbox_loc, axis=1, name='mbox_loc') mbox_loc = Reshape((-1, 4), name='mbox_loc_final')(mbox_loc) - + mbox_quad = concatenate(mbox_quad, axis=1, name='mbox_quad') mbox_quad = Reshape((-1, 8), name='mbox_quad_final')(mbox_quad) - + mbox_rbox = concatenate(mbox_rbox, axis=1, name='mbox_rbox') mbox_rbox = Reshape((-1, 5), name='mbox_rbox_final')(mbox_rbox) predictions = concatenate([mbox_loc, mbox_quad, mbox_rbox, mbox_conf], axis=2, name='predictions') - + return predictions - + def TBPP512_dense_separable(input_shape=(512, 512, 3), softmax=True): """TextBoxes++512 architecture with dense blocks and separable convolution. """ - + # custom body x = input_tensor = Input(shape=input_shape) source_layers = ssd512_dense_separable_body(x) - + num_maps = len(source_layers) - + # Add multibox head for classification and regression num_priors = [14] * num_maps normalizations = [1] * num_maps output_tensor = multibox_head_separable(source_layers, num_priors, normalizations, softmax) model = Model(input_tensor, output_tensor) - + # parameters for prior boxes model.image_size = input_shape[:2] model.source_layers = source_layers - - model.aspect_ratios = [[1,2,3,5,1/2,1/3,1/5] * 2] * num_maps + + model.aspect_ratios = [[1, 2, 3, 5, 1 / 2, 1 / 3, 1 / 5] * 2] * num_maps #model.shifts = [[(0.0, 0.0)] * 7 + [(0.0, 0.5)] * 7] * num_maps model.shifts = [[(0.0, -0.25)] * 7 + [(0.0, 0.25)] * 7] * num_maps model.special_ssd_boxes = False model.scale = 0.5 - - return model - + return model diff --git a/media/text/__init__.py b/media/text/__init__.py index 8b13789..e69de29 100644 --- a/media/text/__init__.py +++ b/media/text/__init__.py @@ -1 +0,0 @@ - diff --git a/requirements.txt b/requirements.txt index 7c918ec..c9344f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ absl-py==0.10 -aiohttp==3.5.4 +aiohttp==3.8.1 amqp==2.5.0 appnope==0.1.0 astor==0.8.0 -async-timeout==3.0.1 +async-timeout==4.0.2 attrs==19.1.0 autopep8==1.4.4 Babel==2.7.0 @@ -29,12 +29,11 @@ entrypoints==0.3 flake8==3.7.7 flower==0.9.3 gast==0.4.0 -grpcio==1.34.0 +grpcio==1.47.0 h5py==3.1.0 idna==2.8 idna-ssl==1.1.0 imageio==2.5.0 -ipdb==0.12.3 ipython==7.5.0 ipython-genutils==0.2.0 jedi==0.14.0 @@ -49,14 +48,13 @@ mccabe==0.6.1 mock==3.0.5 multidict==4.5.2 networkx==2.3 -numpy -opencv-python==4.3.0.38 +numpy==1.19.5 +opencv-python==4.6.0.66 parso==0.5.0 pep8==1.7.1 pexpect==4.7.0 pickleshare==0.7.5 Pillow==8.2.0 -pmrlanms prompt-toolkit==2.0.9 protobuf==3.9.2 psycopg2-binary==2.8.6 @@ -71,15 +69,17 @@ pytz==2019.1 PyWavelets==1.0.3 redis==3.2.1 requests==2.22.0 +scikit-image==0.14.2 scikit-learn==0.22 +scikit-video==1.1.11 scipy==1.5.0 six==1.15.0 sklearn==0.0 sqlparse==0.3.0 -tensorboard==2.5.0 -tensorflow==2.5.0 -tensorflow-estimator==2.5.0 -tensorflow-serving-api==2.5.0 +tensorboard +tensorflow==2.6.2 +tensorflow-estimator==2.6.0 +tensorflow-serving-api==2.6.2 termcolor==1.1.0 testresources==2.0.1 toolz==0.9.0 @@ -95,4 +95,7 @@ youtube-dl==2019.6.8 dj_database_url gunicorn==20.1.0 wordninja==0.1.5 +ipdb pmrlanms +ffmpeg-python +tqdm diff --git a/tests/test_views.py b/tests/test_views.py index 55c7fd8..c75cc5f 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -1,16 +1,14 @@ from django.test import TestCase from django.core.files import File -from django.conf import settings from django.core.files.uploadedfile import SimpleUploadedFile from rest_framework import status -settings.configure() from rest_framework.test import APIClient # noqa: E402 class TestImageFr(TestCase): def setUp(self): - + print("Testing ImageFr") super(TestImageFr, self).setUp() self.client = APIClient() file1 = File(open('tests/testdata/t1.png', 'rb')) @@ -34,37 +32,34 @@ def test_get(self): class TestVideoFr(TestCase): def setUp(self): + print("Testing TestVideoFr") super(TestVideoFr, self).setUp() self.client = APIClient() file1 = File(open('tests/testdata/test1.mp4', 'rb')) - self.uploaded_file1 = SimpleUploadedFile("temp1.png", file1.read(), content_type='multipart/form-data') + self.uploaded_file1 = SimpleUploadedFile("temp1.mp4", file1.read(), content_type='multipart/form-data') file2 = File(open('tests/testdata/test2.mp4', 'rb')) - self.uploaded_file2 = SimpleUploadedFile("temp2.jpeg", file2.read(), content_type='multipart/form-data') + self.uploaded_file2 = SimpleUploadedFile("temp2.mp4", file2.read(), content_type='multipart/form-data') def test_post(self): - response1 = self.client.post('/api/old_video/', {'file': self.uploaded_file1}) + response1 = self.client.post('/api/video/', {'file': self.uploaded_file1}) self.assertEqual(status.HTTP_200_OK, response1.status_code) - response2 = self.client.post('/api/old_video/', {'file': self.uploaded_file2}) + response2 = self.client.post('/api/video/', {'file': self.uploaded_file2}) self.assertEqual(status.HTTP_200_OK, response2.status_code) - def test_get(self): - - response1 = self.client.get('/api/old_video/') - self.assertEqual(status.HTTP_200_OK, response1.status_code) - class TestAsyncVideoFr(TestCase): def setUp(self): + print("Testing TestAsyncVideoFr") super(TestAsyncVideoFr, self).setUp() self.client = APIClient() file1 = File(open('tests/testdata/test1.mp4', 'rb')) - self.uploaded_file1 = SimpleUploadedFile("temp1.png", file1.read(), content_type='multipart/form-data') + self.uploaded_file1 = SimpleUploadedFile("temp1.mp4", file1.read(), content_type='multipart/form-data') file2 = File(open('tests/testdata/test2.mp4', 'rb')) - self.uploaded_file2 = SimpleUploadedFile("temp2.jpeg", file2.read(), content_type='multipart/form-data') + self.uploaded_file2 = SimpleUploadedFile("temp2.mp4", file2.read(), content_type='multipart/form-data') def test_post(self): @@ -73,16 +68,11 @@ def test_post(self): response2 = self.client.post('/api/video/', {'file': self.uploaded_file2}) self.assertEqual(status.HTTP_200_OK, response2.status_code) - def test_get(self): - - response1 = self.client.get('/api/video/') - self.assertEqual(status.HTTP_200_OK, response1.status_code) - class TestNsfwRecognise(TestCase): def setUp(self): - + print("Testing TestNsfwRecognise") super(TestNsfwRecognise, self).setUp() self.client = APIClient() file1 = File(open('tests/testdata/t1.png', 'rb')) @@ -101,13 +91,13 @@ def test_post(self): class TestNsfwVideo(TestCase): def setUp(self): - + print("Testing TestNsfwVideo") super(TestNsfwVideo, self).setUp() self.client = APIClient() file1 = File(open('tests/testdata/test3.mp4', 'rb')) - self.uploaded_file1 = SimpleUploadedFile("temp1.png", file1.read(), content_type='multipart/form-data') - file2 = File(open('tests/testdata/test4.mp4.jpeg', 'rb')) - self.uploaded_file2 = SimpleUploadedFile("temp2.jpeg", file2.read(), content_type='multipart/form-data') + self.uploaded_file1 = SimpleUploadedFile("temp1.mp4", file1.read(), content_type='multipart/form-data') + file2 = File(open('tests/testdata/test4.mp4', 'rb')) + self.uploaded_file2 = SimpleUploadedFile("temp2.mp4", file2.read(), content_type='multipart/form-data') def test_post(self): @@ -120,12 +110,12 @@ def test_post(self): class TestEmbedding(TestCase): def setUp(self): - + print("Testing TestEmbedding") super(TestEmbedding, self).setUp() self.client = APIClient() - file1 = File(open('tests/testdata/t1.png', 'rb')) - self.uploaded_file1 = SimpleUploadedFile("temp1.png", file1.read(), content_type='multipart/form-data') - file2 = File(open('tests/testdata/t2.jpeg', 'rb')) + file1 = File(open('tests/testdata/compareImage.jpeg', 'rb')) + self.uploaded_file1 = SimpleUploadedFile("temp1.jpeg", file1.read(), content_type='multipart/form-data') + file2 = File(open('tests/testdata/compareImage.jpeg', 'rb')) self.uploaded_file2 = SimpleUploadedFile("temp2.jpeg", file2.read(), content_type='multipart/form-data') def test_post(self): @@ -144,7 +134,7 @@ def test_get(self): class TestSimilarFace(TestCase): def setUp(self): - + print("Testing TestSimilarFace") super(TestSimilarFace, self).setUp() self.client = APIClient() file1 = File(open('tests/testdata/t1.png', 'rb')) @@ -163,48 +153,48 @@ def test_get(self): self.assertEqual(status.HTTP_200_OK, response1.status_code) -class TestObjectDetect(TestCase): +# class TestObjectDetect(TestCase): - def setUp(self): +# def setUp(self): - super(TestObjectDetect, self).setUp() - self.client = APIClient() - file1 = File(open('tests/testdata/t1.png', 'rb')) - self.uploaded_file1 = SimpleUploadedFile("temp1.png", file1.read(), content_type='multipart/form-data') - file2 = File(open('tests/testdata/t2.jpeg', 'rb')) - self.uploaded_file2 = SimpleUploadedFile("temp2.jpeg", file2.read(), content_type='multipart/form-data') +# super(TestObjectDetect, self).setUp() +# self.client = APIClient() +# file1 = File(open('tests/testdata/t1.png', 'rb')) +# self.uploaded_file1 = SimpleUploadedFile("temp1.png", file1.read(), content_type='multipart/form-data') +# file2 = File(open('tests/testdata/t2.jpeg', 'rb')) +# self.uploaded_file2 = SimpleUploadedFile("temp2.jpeg", file2.read(), content_type='multipart/form-data') - def test_post(self): +# def test_post(self): - response1 = self.client.post('/api/objects/', {'file': self.uploaded_file1}) - self.assertEqual(status.HTTP_200_OK, response1.status_code) - response2 = self.client.post('/api/objects/', {'file': self.uploaded_file2}) - self.assertEqual(status.HTTP_200_OK, response2.status_code) +# response1 = self.client.post('/api/objects/', {'file': self.uploaded_file1}) +# self.assertEqual(status.HTTP_200_OK, response1.status_code) +# response2 = self.client.post('/api/objects/', {'file': self.uploaded_file2}) +# self.assertEqual(status.HTTP_200_OK, response2.status_code) -class TestObjectDetectVideo(TestCase): +# class TestObjectDetectVideo(TestCase): - def setUp(self): +# def setUp(self): - super(TestObjectDetectVideo, self).setUp() - self.client = APIClient() - file1 = File(open('tests/testdata/obj1.mp4', 'rb')) - self.uploaded_file1 = SimpleUploadedFile("temp1.mp4", file1.read(), content_type='multipart/form-data') - file2 = File(open('tests/testdata/obj2.mp4', 'rb')) - self.uploaded_file2 = SimpleUploadedFile("temp2.mp4", file2.read(), content_type='multipart/form-data') +# super(TestObjectDetectVideo, self).setUp() +# self.client = APIClient() +# file1 = File(open('tests/testdata/obj1.mp4', 'rb')) +# self.uploaded_file1 = SimpleUploadedFile("temp1.mp4", file1.read(), content_type='multipart/form-data') +# file2 = File(open('tests/testdata/obj2.mp4', 'rb')) +# self.uploaded_file2 = SimpleUploadedFile("temp2.mp4", file2.read(), content_type='multipart/form-data') - def test_post(self): +# def test_post(self): - response1 = self.client.post('/api/objectsvideo/', {'file': self.uploaded_file1}) - self.assertEqual(status.HTTP_200_OK, response1.status_code) - response2 = self.client.post('/api/objectsvideo/', {'file': self.uploaded_file2}) - self.assertEqual(status.HTTP_200_OK, response2.status_code) +# response1 = self.client.post('/api/objectsvideo/', {'file': self.uploaded_file1}) +# self.assertEqual(status.HTTP_200_OK, response1.status_code) +# response2 = self.client.post('/api/objectsvideo/', {'file': self.uploaded_file2}) +# self.assertEqual(status.HTTP_200_OK, response2.status_code) class TestSceneText(TestCase): def setUp(self): - + print("Testing TestSceneText") super(TestSceneText, self).setUp() self.client = APIClient() file1 = File(open('tests/testdata/t3.jpeg', 'rb')) @@ -216,25 +206,25 @@ def test_post(self): self.assertEqual(status.HTTP_200_OK, response1.status_code) -class TestSceneTextVideo(TestCase): - - def setUp(self): +# class TestSceneTextVideo(TestCase): - super(TestSceneTextVideo, self).setUp() - self.client = APIClient() - file1 = File(open('tests/testdata/text.mp4', 'rb')) - self.uploaded_file1 = SimpleUploadedFile("temp1.mp4", file1.read(), content_type='multipart/form-data') +# def setUp(self): +# print("Testing TestSceneTextVideo") +# super(TestSceneTextVideo, self).setUp() +# self.client = APIClient() +# file1 = File(open('tests/testdata/test3.mp4', 'rb')) +# self.uploaded_file1 = SimpleUploadedFile("temp1.mp4", file1.read(), content_type='multipart/form-data') - def test_post(self): +# def test_post(self): - response1 = self.client.post('/api/scenetextvideo/', {'file': self.uploaded_file1}) - self.assertEqual(status.HTTP_200_OK, response1.status_code) +# response1 = self.client.post('/api/scenetextvideo/', {'file': self.uploaded_file1}) +# self.assertEqual(status.HTTP_200_OK, response1.status_code) class TestSceneDetect(TestCase): def setUp(self): - + print("Testing TestSceneDetect") super(TestSceneDetect, self).setUp() self.client = APIClient() file1 = File(open('tests/testdata/t3.jpeg', 'rb')) @@ -249,10 +239,10 @@ def test_post(self): class TestSceneVideo(TestCase): def setUp(self): - + print("Testing TestSceneVideo") super(TestSceneVideo, self).setUp() self.client = APIClient() - file1 = File(open('tests/testdata/scene1.mp4', 'rb')) + file1 = File(open('tests/testdata/test1.mp4', 'rb')) self.uploaded_file1 = SimpleUploadedFile("temp1.mp4", file1.read(), content_type='multipart/form-data') def test_post(self):