diff --git a/.all-contributorsrc b/.all-contributorsrc new file mode 100644 index 0000000..b9cf524 --- /dev/null +++ b/.all-contributorsrc @@ -0,0 +1,71 @@ +{ + "files": [ + "README.md" + ], + "imageSize": 100, + "commit": false, + "contributors": [ + { + "login": "ManosMagnus", + "name": "Manos Kirtas", + "avatar_url": "https://avatars.githubusercontent.com/u/10010230?v=4", + "profile": "http://eakirtas.webpages.auth.gr/", + "contributions": [ + "code" + ] + }, + { + "login": "tsampazk", + "name": "Kostas Tsampazis", + "avatar_url": "https://avatars.githubusercontent.com/u/27914645?v=4", + "profile": "https://github.com/tsampazk", + "contributions": [ + "code" + ] + }, + { + "login": "KelvinYang0320", + "name": "Jiun Kai Yang", + "avatar_url": "https://avatars.githubusercontent.com/u/49781698?v=4", + "profile": "https://www.linkedin.com/in/kelvin-yang-b7b508198/", + "contributions": [ + "code" + ] + }, + { + "login": "MentalGear", + "name": "MentalGear", + "avatar_url": "https://avatars.githubusercontent.com/u/2837147?v=4", + "profile": "https://github.com/MentalGear", + "contributions": [ + "ideas" + ] + }, + { + "login": "DreamtaleCore", + "name": "Dreamtale", + "avatar_url": "https://avatars.githubusercontent.com/u/12713528?v=4", + "profile": "https://github.com/DreamtaleCore", + "contributions": [ + "bug" + ] + }, + { + "login": "NickKok", + "name": "Nikolaos Kokkinis-Ntrenis", + "avatar_url": "https://avatars.githubusercontent.com/u/8222731?v=4", + "profile": "https://nickkok.github.io/my-website/", + "contributions": [ + "code", + "doc", + "ideas" + ] + } + ], + "contributorsPerLine": 7, + "projectName": "deepbots", + "projectOwner": "aidudezzz", + "repoType": "github", + "repoHost": "https://github.com", + "skipCi": true +} diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 0b8b7f9..81dfc55 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.3-dev1 +current_version = 0.1.3-dev3 commit = True tag = True parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? @@ -20,3 +20,7 @@ values = [bumpversion:file:VERSION] [bumpversion:file:setup.py] + +[bumpversion:file:deepbots/__init__.py] +search = "{current_version}" +replace = "{new_version}" diff --git a/.github/workflows/python_build.yaml b/.github/workflows/python_build.yaml index 5946e5f..9618aca 100644 --- a/.github/workflows/python_build.yaml +++ b/.github/workflows/python_build.yaml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.5, 3.6, 3.7] + python-version: [3.7, 3.8, 3.9] steps: - uses: actions/checkout@v1 diff --git a/.isort.cfg b/.isort.cfg index f3b067f..e9b9c5f 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,2 +1,2 @@ [settings] -known_third_party = controller,numpy,setuptools,tensorboardX +known_third_party = controller,cv2,gym,numpy,ray,setuptools,tensorboardX diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1bae66a..e59b980 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,6 +34,7 @@ repos: hooks: - id: prettier args: [--prose-wrap=always, --print-width=79] + exclude: README.md - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.4.0 diff --git a/README.md b/README.md index c2ed0b4..0cf16e9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,12 @@ -# deepbots +

+ +

+ +[![Version](https://img.shields.io/pypi/v/deepbots?color=green)](https://pypi.org/project/deepbots/) +[![Dev Version](https://img.shields.io/github/v/tag/aidudezzz/deepbots?include_prereleases&label=test-pypi&color=green)](https://test.pypi.org/project/deepbots/) +[![Downloads](https://static.pepy.tech/personalized-badge/deepbots?period=total&units=international_system&left_color=grey&right_color=green&left_text=Downloads)](https://pepy.tech/project/deepbots) +[![License](https://img.shields.io/github/license/aidudezzz/deepbots?color=green)](https://github.com/aidudezzz/deepbots/blob/dev/LICENSE) +[![All Contributors](https://img.shields.io/badge/all_contributors-6-orange.svg?style=flat-square)](#contributors-) Deepbots is a simple framework which is used as "middleware" between the free and open-source [Cyberbotics' Webots](https://cyberbotics.com/) robot simulator @@ -60,16 +68,12 @@ https://link.springer.com/chapter/10.1007/978-3-030-49186-4_6 and Tsampazis, K. and Passalis, N. and Tefas, A.", - editor="Maglogiannis, Ilias - and Iliadis, Lazaros - and Pimenidis, Elias", title="Deepbots: A Webots-Based Deep Reinforcement Learning Framework for Robotics", booktitle="Artificial Intelligence Applications and Innovations", year="2020", publisher="Springer International Publishing", address="Cham", pages="64--75", - abstract="Deep Reinforcement Learning (DRL) is increasingly used to train robots to perform complex and delicate tasks, while the development of realistic simulators contributes to the acceleration of research on DRL for robotics. However, it is still not straightforward to employ such simulators in the typical DRL pipeline, since their steep learning curve and the enormous amount of development required to interface with DRL methods significantly restrict their use by researchers. To overcome these limitations, in this work we present an open-source framework that combines an established interface used by DRL researchers, the OpenAI Gym interface, with the state-of-the-art Webots robot simulator in order to provide a standardized way to employ DRL in various robotics scenarios. Deepbots aims to enable researchers to easily develop DRL methods in Webots by handling all the low-level details and reducing the required development effort. The effectiveness of the proposed framework is demonstrated through code examples, as well as using three use cases of varying difficulty.", isbn="978-3-030-49186-4" } @@ -142,26 +146,27 @@ those who are familiar with the OpenAI gym environment. More specifically, algorithms and follows the OpenAI Gym environment logic. The Deepbots framework provides different levels of abstraction according to the user's needs. Moreover, a goal of the framework is to provide different wrappers for a wide -range of robots. +range of robots. Deepbots also provides a default implementation of the `reset()` method, -leveraging Webots' built-in simulation reset functions, removing the need -for the user to implement reset procedures for simpler use-cases. It is -always possible to override this method and implement any custom reset -procedure, as needed. +leveraging Webots' built-in simulation reset functions, removing the need for +the user to implement reset procedures for simpler use-cases. It is always +possible to override this method and implement any custom reset procedure, as +needed. #### Emitter - receiver scheme -Currently, the communication between the `Supervisor` and the -`Robot` is achieved via an `emitter` and a `receiver`. Separating the `Supervisor` -from the `Robot`, deepbots can fit a variety of use-cases, e.g. multiple -`Robots` collecting experience and a `Supervisor` controlling them with a single -agent. The way Webots implements `emitter`/`receiver` communication requires messages -to be packed and unpacked, which introduces an overhead that becomes prohibiting in -use-cases where the observations are high-dimensional or long, such as camera images. -Deepbots provides another partially abstract class that combines the `Supervisor` -and the `Robot` into one controller and circumvents that issue, while being less -flexible, which is discussed [later](#combined-robot-supervisor-scheme). +Currently, the communication between the `Supervisor` and the `Robot` is +achieved via an `emitter` and a `receiver`. Separating the `Supervisor` from +the `Robot`, deepbots can fit a variety of use-cases, e.g. multiple `Robots` +collecting experience and a `Supervisor` controlling them with a single agent. +The way Webots implements `emitter`/`receiver` communication requires messages +to be packed and unpacked, which introduces an overhead that becomes +prohibiting in use-cases where the observations are high-dimensional or long, +such as camera images. Deepbots provides another partially abstract class that +combines the `Supervisor` and the `Robot` into one controller and circumvents +that issue, while being less flexible, which is discussed +[later](#combined-robot-supervisor-scheme).

@@ -170,16 +175,20 @@ flexible, which is discussed [later](#combined-robot-supervisor-scheme). On one hand, the `emitter` is an entity which is provided by Webots, that broadcasts messages to the world. On the other hand, the `receiver` is an entity that is used to receive messages from the `World`. Consequently, the -agent-environment loop is transformed accordingly. Firstly, the `Robot` uses its -sensors to retrieve the observation from the `World` and in turn uses the `emitter` -component to broadcast this observation. Secondly, the `Supervisor` receives the -observation via the `receiver` component and in turn, the agent uses it to choose -an action. It should be noted that the observation the agent uses might be -extended from the `Supervisor`. For example, a model might use LiDAR sensors -installed on the `Robot`, but also the Euclidean distance between the `Robot` and -an object. As it is expected, the `Robot` does not know the Euclidean distance, -only the `Supervisor` can calculate it, because it has access to all entities in -the `World`. +agent-environment loop is transformed accordingly. Firstly, the `Robot` uses +its sensors to retrieve the observation from the `World` and in turn uses the +`emitter` component to broadcast this observation. Secondly, the `Supervisor` +receives the observation via the `receiver` component and in turn, the agent +uses it to choose an action. It should be noted that the observation the agent +uses might be extended from the `Supervisor`. For example, a model might use +LiDAR sensors installed on the `Robot`, but also the Euclidean distance between +the `Robot` and an object. As it is expected, the `Robot` does not know the +Euclidean distance, only the `Supervisor` can calculate it, because it has +access to all entities in the `World`. + +You can follow the +[emitter-receiver scheme tutorial](https://github.com/aidudezzz/deepbots-tutorials/blob/master/emitterReceiverSchemeTutorial/README.md) +to get started and work your way up from there.

@@ -187,14 +196,19 @@ the `World`. #### Combined Robot-Supervisor scheme -As mentioned earlier, in use-cases where the observation transmitted between -the `Robot` and the `Supervisor` is high-dimensional or long, e.g. high resolution -images taken from a camera, a significant overhead is introduced. This is circumvented -by inheriting and implementing the partially abstract `RobotSupervisor` that combines -the `Robot controller` and the `Supervisor Controller` into one, forgoing all -`emitter`/`receiver` communication. This new controller runs on the `Robot`, but -requires `Supervisor` privileges and is limited to one `Robot`, one `Supervisor`. +As mentioned earlier, in use-cases where the observation transmitted between +the `Robot` and the `Supervisor` is high-dimensional or long, e.g. high +resolution images taken from a camera, a significant overhead is introduced. +This is circumvented by inheriting and implementing the partially abstract +`RobotSupervisor` that combines the `Robot controller` and the +`Supervisor Controller` into one, forgoing all `emitter`/`receiver` +communication. This new controller runs on the `Robot`, but requires +`Supervisor` privileges and is limited to one `Robot`, one `Supervisor`. +You can follow the +[robot-supervisor scheme tutorial](https://github.com/aidudezzz/deepbots-tutorials/tree/master/robotSupervisorSchemeTutorial) +to get started and work your way up from there. We recommended this +tutorial to get started with deepbots. ### Abstraction Levels @@ -203,15 +217,49 @@ aim of the framework is to enable people to use Reinforcement Learning in Webots. More specifically, we can consider deepbots as a wrapper of Webots exposing an OpenAI gym style interface. For this reason there are multiple levels of abstraction. For example, a user can choose if they want to use CSV -`emitter`/`receiver` or if they want to make an implementation from scratch. -In the top level of the abstraction hierarchy is the `SupervisorEnv` which is the +`emitter`/`receiver` or if they want to make an implementation from scratch. In +the top level of the abstraction hierarchy is the `SupervisorEnv` which is the OpenAI gym interface. Below that level there are partially implemented classes -with common functionality. These implementations aim to hide the communication +with common functionality. These implementations aim to hide the communication between the `Supervisor` and the `Robot`, as described in the two different -schemes ealier. Similarly, in the `emitter`/`receiver` scheme the `Robot` -also has different abstraction levels. According to their needs, users can choose -either to process the messages received from the `Supervisor` themselves or -use the existing implementations. - -### Acknowledgments -This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement No 871449 (OpenDR). This publication reflects the authors’ views only. The European Commission is not responsible for any use that may be made of the information it contains. +schemes ealier. Similarly, in the `emitter`/`receiver` scheme the `Robot` also +has different abstraction levels. According to their needs, users can choose +either to process the messages received from the `Supervisor` themselves or use +the existing implementations. + +### Acknowledgments + +This project has received funding from the European Union's Horizon 2020 +research and innovation programme under grant agreement No 871449 (OpenDR). +This publication reflects the authors’ views only. The European Commission is +not responsible for any use that may be made of the information it contains. + +## Contributors ✨ + +Thanks goes to these wonderful people +([emoji key](https://allcontributors.org/docs/en/emoji-key)): + + + + + + + + + + + + + +

Manos Kirtas

💻

Kostas Tsampazis

💻

Jiun Kai Yang

💻

MentalGear

🤔

Dreamtale

🐛

Nikolaos Kokkinis-Ntrenis

💻 📖 🤔
+ + + + + + +This project follows the +[all-contributors](https://github.com/all-contributors/all-contributors) +specification. Contributions of any kind welcome! + + Special thanks to Papanikolaou Evangelia for designing project's logo! diff --git a/VERSION b/VERSION index a87819e..e28037e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1.3-dev1 +0.1.3-dev3 diff --git a/deepbots/__init__.py b/deepbots/__init__.py index 1672089..61fdb84 100644 --- a/deepbots/__init__.py +++ b/deepbots/__init__.py @@ -1 +1 @@ -__version__ = "0.0.0.11-pre" +__version__ = "0.1.3-dev3" diff --git a/deepbots/robots/controllers/robot_emitter_receiver.py b/deepbots/robots/controllers/robot_emitter_receiver.py index 3ab19a0..ebb029a 100644 --- a/deepbots/robots/controllers/robot_emitter_receiver.py +++ b/deepbots/robots/controllers/robot_emitter_receiver.py @@ -1,19 +1,19 @@ -from abc import ABC, abstractmethod - +from warnings import warn, simplefilter from controller import Robot -class RobotEmitterReceiver(ABC): +class RobotEmitterReceiver: """ This RobotEmitterReceiver implements only the most basic run method, that - steps the robot and calls the handleEmitter, handleReceiver methods that + steps the robot and calls the handle_emitter, handle_receiver methods that are needed for communication with the supervisor. This class must be inherited by all robot controllers created by the user - and the handleEmitter, handleReceiver, initialize_comms methods are all - abstract and need to be implemented, according to their docstrings. For a + and the handle_emitter, handle_receiver, initialize_comms methods are all + abstract and need to be implemented according to their docstrings. For a simpler RobotController that implements the methods in a basic form - inherit the RobotEmitterReceiver class. + inherit the RobotEmitterReceiverCSV subclass or other emitter-receiver + subclasses. """ def __init__(self, emitter_name="emitter", @@ -28,12 +28,12 @@ def __init__(self, Also initializes the emitter and the receiver used to communicate with the supervisor, using the initialize_comms() method which must be implemented by the user. The two methods handle_emitter() and - handle_receiver() must also be implemented by the user. + handle_receiver() are also implemented by the user. For the step argument see relevant Webots documentation: https://cyberbotics.com/doc/guide/controller-programming#the-step-and-wb_robot_step-functions - :param timestep: float, positive or None + :param timestep: int, positive or None """ self.robot = Robot() @@ -46,12 +46,34 @@ def __init__(self, emitter_name, receiver_name) def get_timestep(self): + # The filter is required so as to not ignore the Deprecation warning + simplefilter("once") + warn("get_timestep is deprecated, use .timestep instead", + DeprecationWarning) return self.timestep - @abstractmethod + @property + def timestep(self): + """ + Getter of _timestep field. Timestep is defined in milliseconds + + :return: The timestep of the controller in milliseconds + """ + return self._timestep + + @timestep.setter + def timestep(self, value): + """ + Setter of timestep field. Automatically converts to int as + required by Webots. + + :param value: The new controller timestep in milliseconds + """ + self._timestep = int(value) + def initialize_comms(self, emitter_name, receiver_name): """ - This method should initialize and the return emitter and receiver in a + This method should initialize and return the emitter and receiver in a tuple as expected by the constructor. A basic example implementation can be: @@ -63,18 +85,16 @@ def initialize_comms(self, emitter_name, receiver_name): :return: (emitter, receiver) tuple, as initialized """ - pass + raise NotImplementedError - @abstractmethod def handle_emitter(self): """ This method should take data from the robot, eg. sensor data, parse it into a message and use the robot's emitter to send it to the supervisor. This message will be used as the observation of the robot. """ - pass + raise NotImplementedError - @abstractmethod def handle_receiver(self): """ This method should take data through the receiver in the form of a @@ -83,7 +103,7 @@ def handle_receiver(self): For example the message might include a motor speed, which should be parsed and applied to the robot's motor. """ - pass + raise NotImplementedError def run(self): """ diff --git a/deepbots/robots/controllers/robot_emitter_receiver_csv.py b/deepbots/robots/controllers/robot_emitter_receiver_csv.py index cecfb62..516b08b 100644 --- a/deepbots/robots/controllers/robot_emitter_receiver_csv.py +++ b/deepbots/robots/controllers/robot_emitter_receiver_csv.py @@ -1,30 +1,46 @@ -from abc import abstractmethod from collections.abc import Iterable -from .robot_emitter_receiver import RobotEmitterReceiver +from deepbots.robots.controllers.robot_emitter_receiver import \ + RobotEmitterReceiver class RobotEmitterReceiverCSV(RobotEmitterReceiver): """ Basic implementation of a robot that can emit and receive messages to/from - the supervisor in string utf-8 form that are comma separated, i.e. a list. + the supervisor in string utf-8 form that are Comma Separated Values, + i.e. a list. """ def __init__(self, emitter_name="emitter", receiver_name="receiver", timestep=None): - super().__init__(timestep=timestep) + """ + The constructor just passes the arguments provided to the parent + class contructor. + + :param emitter_name: The name of the emitter device on the + robot node, defaults to "emitter" + :param receiver_name: The name of the receiver device on the + robot node, defaults to "receiver" + :param timestep: The robot controller timestep, defaults to None + """ + super().__init__(emitter_name, receiver_name, + timestep) def initialize_comms(self, emitter_name, receiver_name): """ This method implements the basic emitter/receiver initialization that - assumes that an emitter and a receiver components are present on the + assumes that an emitter and a receiver component are present on the Webots robot with appropriate DEFs ("emitter"/"receiver"). - :return: emitter and receiver references + :param emitter_name: The name of the emitter device on the + supervisor node + :param receiver_name: The name of the receiver device on the + supervisor node + :return: The initialized emitter and receiver references """ - emitter = self.robot.getDevice("emitter") - receiver = self.robot.getDevice("receiver") + emitter = self.robot.getDevice(emitter_name) + receiver = self.robot.getDevice(receiver_name) receiver.enable(self.timestep) return emitter, receiver @@ -62,7 +78,10 @@ def handle_receiver(self): """ if self.receiver.getQueueLength() > 0: # Receive and decode message from supervisor - message = self.receiver.getData().decode("utf-8") + try: + message = self.receiver.getString() + except AttributeError: + message = self.receiver.getData().decode("utf-8") # Convert string message into a list message = message.split(",") @@ -70,7 +89,6 @@ def handle_receiver(self): self.receiver.nextPacket() - @abstractmethod def create_message(self): """ This method should be implemented to convert whatever data the robot @@ -79,13 +97,13 @@ def create_message(self): :return: a list or a comma-separated string containing all data """ - pass + raise NotImplementedError - @abstractmethod def use_message_data(self, message): """ This method should be implemented to apply whatever actions the message (received from the supervisor) contains. + :param message: list containing data received from the supervisor """ - pass + raise NotImplementedError diff --git a/deepbots/supervisor/controllers/GA.py b/deepbots/supervisor/controllers/GA.py new file mode 100644 index 0000000..dc4849c --- /dev/null +++ b/deepbots/supervisor/controllers/GA.py @@ -0,0 +1,192 @@ +import pygad + +class GA(pygad.GA): + def __init__(self, **kwargs): + super(GA, self).__init__(**kwargs) + + def cal_pop_fitness(self): + + """ + Calculating the fitness values of all solutions in the current population. + It returns: + -fitness: An array of the calculated fitness values. + """ + + if self.valid_parameters == False: + raise ValueError("ERROR calling the cal_pop_fitness() method: \nPlease check the parameters passed while creating an instance of the GA class.\n") + + pop_fitness = [] + # Calculating the fitness value of each solution in the current population. + for sol_idx, sol in enumerate(self.population): + + # Check if the parent's fitness value is already calculated. If so, use it instead of calling the fitness function. + if not (self.last_generation_parents is None) and len(numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0] > 0): + # Index of the parent in the parents array (self.last_generation_parents). This is not its index within the population. + parent_idx = numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0][0] + # Index of the parent in the population. + parent_idx = self.last_generation_parents_indices[parent_idx] + # Use the parent's index to return its pre-calculated fitness value. + fitness = self.last_generation_fitness[parent_idx] + else: + fitness = self.fitness_func(sol, sol_idx) + pop_fitness.append(fitness) + + pop_fitness = numpy.array(pop_fitness) + + return pop_fitness + + def run(self): + + """ + Runs the genetic algorithm. This is the main method in which the genetic algorithm is evolved through a number of generations. + """ + + if self.valid_parameters == False: + raise ValueError("Error calling the run() method: \nThe run() method cannot be executed with invalid parameters. Please check the parameters passed while creating an instance of the GA class.\n") + + # Reset the variables that store the solutions and their fitness after each generation. If not reset, then for each call to the run() method the new solutions and their fitness values will be appended to the old variables and their length double. Some errors arise if not reset. + # If, in the future, new variables are created that get appended after each generation, please consider resetting them here. + self.best_solutions = [] # Holds the best solution in each generation. + self.best_solutions_fitness = [] # A list holding the fitness value of the best solution for each generation. + self.solutions = [] # Holds the solutions in each generation. + self.solutions_fitness = [] # Holds the fitness of the solutions in each generation. + + if not (self.on_start is None): + self.on_start(self) + + stop_run = False + + # Measuring the fitness of each chromosome in the population. Save the fitness in the last_generation_fitness attribute. + self.last_generation_fitness = self.cal_pop_fitness() + + best_solution, best_solution_fitness, best_match_idx = self.best_solution(pop_fitness=self.last_generation_fitness) + + # Appending the best solution in the initial population to the best_solutions list. + if self.save_best_solutions: + self.best_solutions.append(best_solution) + + # Appending the solutions in the initial population to the solutions list. + if self.save_solutions: + self.solutions.extend(self.population.copy()) + + for generation in range(self.num_generations): + if not (self.on_fitness is None): + self.on_fitness(self, self.last_generation_fitness) + + # Appending the fitness value of the best solution in the current generation to the best_solutions_fitness attribute. + self.best_solutions_fitness.append(best_solution_fitness) + + if self.save_solutions: + self.solutions_fitness.extend(self.last_generation_fitness) + + # Selecting the best parents in the population for mating. + if callable(self.parent_selection_type): + self.last_generation_parents, self.last_generation_parents_indices = self.select_parents(self.last_generation_fitness, self.num_parents_mating, self) + else: + self.last_generation_parents, self.last_generation_parents_indices = self.select_parents(self.last_generation_fitness, num_parents=self.num_parents_mating) + if not (self.on_parents is None): + self.on_parents(self, self.last_generation_parents) + + # If self.crossover_type=None, then no crossover is applied and thus no offspring will be created in the next generations. The next generation will use the solutions in the current population. + if self.crossover_type is None: + if self.num_offspring <= self.keep_parents: + self.last_generation_offspring_crossover = self.last_generation_parents[0:self.num_offspring] + else: + self.last_generation_offspring_crossover = numpy.concatenate((self.last_generation_parents, self.population[0:(self.num_offspring - self.last_generation_parents.shape[0])])) + else: + # Generating offspring using crossover. + if callable(self.crossover_type): + self.last_generation_offspring_crossover = self.crossover(self.last_generation_parents, + (self.num_offspring, self.num_genes), + self) + else: + self.last_generation_offspring_crossover = self.crossover(self.last_generation_parents, + offspring_size=(self.num_offspring, self.num_genes)) + if not (self.on_crossover is None): + self.on_crossover(self, self.last_generation_offspring_crossover) + + # If self.mutation_type=None, then no mutation is applied and thus no changes are applied to the offspring created using the crossover operation. The offspring will be used unchanged in the next generation. + if self.mutation_type is None: + self.last_generation_offspring_mutation = self.last_generation_offspring_crossover + else: + # Adding some variations to the offspring using mutation. + if callable(self.mutation_type): + self.last_generation_offspring_mutation = self.mutation(self.last_generation_offspring_crossover, self) + else: + self.last_generation_offspring_mutation = self.mutation(self.last_generation_offspring_crossover) + if not (self.on_mutation is None): + self.on_mutation(self, self.last_generation_offspring_mutation) + + if (self.keep_parents == 0): + self.population = self.last_generation_offspring_mutation + elif (self.keep_parents == -1): + # Creating the new population based on the parents and offspring. + self.population[0:self.last_generation_parents.shape[0], :] = self.last_generation_parents + self.population[self.last_generation_parents.shape[0]:, :] = self.last_generation_offspring_mutation + elif (self.keep_parents > 0): + parents_to_keep, _ = self.steady_state_selection(self.last_generation_fitness, num_parents=self.keep_parents) + self.population[0:parents_to_keep.shape[0], :] = parents_to_keep + self.population[parents_to_keep.shape[0]:, :] = self.last_generation_offspring_mutation + + self.generations_completed = generation + 1 # The generations_completed attribute holds the number of the last completed generation. + + # Measuring the fitness of each chromosome in the population. Save the fitness in the last_generation_fitness attribute. + self.last_generation_fitness = self.cal_pop_fitness() + + best_solution, best_solution_fitness, best_match_idx = self.best_solution(pop_fitness=self.last_generation_fitness) + + # Appending the best solution in the current generation to the best_solutions list. + if self.save_best_solutions: + self.best_solutions.append(best_solution) + + # Appending the solutions in the current generation to the solutions list. + if self.save_solutions: + self.solutions.extend(self.population.copy()) + + # If the callback_generation attribute is not None, then cal the callback function after the generation. + if not (self.on_generation is None): + r = self.on_generation(self) + if type(r) is str and r.lower() == "stop": + # Before aborting the loop, save the fitness value of the best solution. + _, best_solution_fitness, _ = self.best_solution(self.last_generation_fitness) + self.best_solutions_fitness.append(best_solution_fitness) + break + + if not self.stop_criteria is None: + for criterion in self.stop_criteria: + if criterion[0] == "reach": + if max(self.last_generation_fitness) >= criterion[1]: + stop_run = True + break + elif criterion[0] == "saturate": + criterion[1] = int(criterion[1]) + if (self.generations_completed >= criterion[1]): + if (self.best_solutions_fitness[self.generations_completed - criterion[1]] - self.best_solutions_fitness[self.generations_completed - 1]) == 0: + stop_run = True + break + + if stop_run: + break + + time.sleep(self.delay_after_gen) + + # Save the fitness of the last generation. + if self.save_solutions: + self.solutions_fitness.extend(self.last_generation_fitness) + + # Save the fitness value of the best solution. + _, best_solution_fitness, _ = self.best_solution(pop_fitness=self.last_generation_fitness) + self.best_solutions_fitness.append(best_solution_fitness) + + self.best_solution_generation = numpy.where(numpy.array(self.best_solutions_fitness) == numpy.max(numpy.array(self.best_solutions_fitness)))[0][0] + # After the run() method completes, the run_completed flag is changed from False to True. + self.run_completed = True # Set to True only after the run() method completes gracefully. + + if not (self.on_stop is None): + self.on_stop(self, self.last_generation_fitness) + + # Converting the 'best_solutions' list into a NumPy array. + self.best_solutions = numpy.array(self.best_solutions) + + # Converting the 'solutions' list into a NumPy array. + self.solutions = numpy.array(self.solutions) diff --git a/deepbots/supervisor/controllers/robot_supervisor.py b/deepbots/supervisor/controllers/robot_supervisor.py index 82bf104..9d0e902 100644 --- a/deepbots/supervisor/controllers/robot_supervisor.py +++ b/deepbots/supervisor/controllers/robot_supervisor.py @@ -1,16 +1,17 @@ -from abc import abstractmethod - +from warnings import warn, simplefilter from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv +from controller import Supervisor class RobotSupervisor(SupervisorEnv): """ The RobotSupervisor class implements both a robot controller and a - supervisor RL environment. This class can be used when there is no - need to separate the Robot from the Supervisor, or the observations of - the robot are too big to be packaged in messages, e.g. high resolution - images from a camera, that introduce a bottleneck and reduce - performance significantly. + supervisor RL environment, referred to as Robot-Supervisor scheme. + + This class can be used when there is no need to separate the Robot + from the Supervisor, or the observations of the robot are too big + to be packaged in messages, e.g. high resolution images from a camera, + that introduce a bottleneck and reduce performance significantly. Controllers that inherit this method *must* run on Robot nodes that have supervisor privileges. @@ -26,29 +27,56 @@ class RobotSupervisor(SupervisorEnv): action, e.g. motor speeds. Note that apply_action() is called during step(). """ - def __init__(self, time_step=None): + def __init__(self, timestep=None): super(RobotSupervisor, self).__init__() - if time_step is None: - self.timestep = int(self.supervisor.getBasicTimeStep()) + if timestep is None: + self.timestep = int(self.getBasicTimeStep()) else: - self.timestep = time_step + self.timestep = timestep def get_timestep(self): + # The filter is required so as to not ignore the Deprecation warning + simplefilter("once") + warn("get_timestep is deprecated, use .timestep instead", + DeprecationWarning) return self.timestep + @property + def timestep(self): + """ + Getter of _timestep field. Timestep is defined in milliseconds + + :return: The timestep of the controller in milliseconds + """ + return self._timestep + + @timestep.setter + def timestep(self, value): + """ + Setter of timestep field. Automatically converts to int as + required by Webots. + + :param value: The new controller timestep in milliseconds + """ + self._timestep = int(value) + def step(self, action): """ - Default step implementation that contains a Webots step conditional - for terminating properly. + The basic step method that steps the controller, + calls the method that applies the action on the robot + and returns the (observations, reward, done, info) object. - :param action: The agent's action - :return: tuple, (observation, reward, is_done, info) + :param action: Whatever the use-case uses as an action, e.g. + an integer representing discrete actions + :type action: Defined by the implementation of handle_emitter + :return: tuple, (observations, reward, done, info) as provided by the + corresponding methods as implemented for the use-case """ - if self.supervisor.step(self.timestep) == -1: + self.apply_action(action) + if super(Supervisor, self).step(self.timestep) == -1: exit() - self.apply_action(action) return ( self.get_observations(), self.get_reward(action), @@ -56,7 +84,6 @@ def step(self, action): self.get_info(), ) - @abstractmethod def apply_action(self, action): """ This method should be implemented to apply whatever actions the @@ -71,4 +98,4 @@ def apply_action(self, action): :param action: list, containing action data """ - pass + raise NotImplementedError diff --git a/deepbots/supervisor/controllers/supervisor_emitter_receiver.py b/deepbots/supervisor/controllers/supervisor_emitter_receiver.py index 09dd60a..f0c5d6b 100644 --- a/deepbots/supervisor/controllers/supervisor_emitter_receiver.py +++ b/deepbots/supervisor/controllers/supervisor_emitter_receiver.py @@ -1,37 +1,72 @@ -from abc import abstractmethod from collections.abc import Iterable +from warnings import warn, simplefilter -from .supervisor_env import SupervisorEnv +from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv +from controller import Supervisor class SupervisorEmitterReceiver(SupervisorEnv): + """ + This is the base class for the emitter - receiver scheme. + + Subclasses implement a variety of communication formats such as CSV + messages. + """ def __init__(self, emitter_name="emitter", receiver_name="receiver", - time_step=None): - + timestep=None): + """ + The constructor sets up the timestep and calls the method that + initializes the emitter and receiver devices with the names provided. + + :param emitter_name: The name of the emitter device on the + supervisor node + :param receiver_name: The name of the receiver device on the + supervisor node + :param timestep: The supervisor controller timestep + """ super(SupervisorEmitterReceiver, self).__init__() - if time_step is None: - self.timestep = int(self.supervisor.getBasicTimeStep()) + if timestep is None: + self.timestep = int(self.getBasicTimeStep()) else: - self.timestep = time_step + self.timestep = timestep - self.emitter = None - self.receiver = None - self.initialize_comms(emitter_name, receiver_name) + self.emitter, self.receiver = self.initialize_comms( + emitter_name, receiver_name) def initialize_comms(self, emitter_name, receiver_name): - self.emitter = self.supervisor.getDevice(emitter_name) - self.receiver = self.supervisor.getDevice(receiver_name) - self.receiver.enable(self.timestep) - return self.emitter, self.receiver + """ + Initializes the emitter and receiver devices with the names provided. + + :param emitter_name: The name of the emitter device on the + supervisor node + :param receiver_name: The name of the receiver device on the + supervisor node + :return: The initialized emitter and receiver references + """ + emitter = self.getDevice(emitter_name) + receiver = self.getDevice(receiver_name) + receiver.enable(self.timestep) + return emitter, receiver def step(self, action): - if self.supervisor.step(self.timestep) == -1: + """ + The basic step method that steps the controller, + calls the method that sends the action through the emitter + and returns the (observations, reward, done, info) object. + + :param action: Whatever the use-case uses as an action, e.g. + an integer representing discrete actions + :type action: Defined by the implementation of handle_emitter + :return: (observations, reward, done, info) as provided by the + corresponding methods as implemented for the use-case + """ + self.handle_emitter(action) + if super(Supervisor, self).step(self.timestep) == -1: exit() - self.handle_emitter(action) return ( self.get_observations(), self.get_reward(action), @@ -39,27 +74,82 @@ def step(self, action): self.get_info(), ) - @abstractmethod def handle_emitter(self, action): - pass + """ + This method is implemented by subclasses depending on the + communication format used. + + :param action: The action that is sent through the emitter device + to the robot, e.g. an integer representing discrete actions + """ + raise NotImplementedError - @abstractmethod def handle_receiver(self): - pass + """ + This method is implemented by subclasses depending on the + communication format used. + """ + raise NotImplementedError def get_timestep(self): + # The filter is required so as to not ignore the Deprecation warning + simplefilter("once") + warn("get_timestep is deprecated, use .timestep instead", + DeprecationWarning) return self.timestep + @property + def timestep(self): + """ + Getter of _timestep field. Timestep is defined in milliseconds + + :return: The timestep of the controller in milliseconds + """ + return self._timestep + + @timestep.setter + def timestep(self, value): + """ + Setter of timestep field. Automatically converts to int as + required by Webots. + + :param value: The new controller timestep in milliseconds + """ + self._timestep = int(value) + class SupervisorCSV(SupervisorEmitterReceiver): + """ + This class implements the emitter-receiver scheme using Comma Separated + Values. + """ def __init__(self, emitter_name="emitter", receiver_name="receiver", - time_step=None): + timestep=None): + """ + The constructor just passes the arguments provided to the parent + class contructor. + + :param emitter_name: The name of the emitter device on the + supervisor node + :param receiver_name: The name of the receiver device on the + supervisor node + :param timestep: The supervisor controller timestep + """ super(SupervisorCSV, self).__init__(emitter_name, receiver_name, - time_step) + timestep) def handle_emitter(self, action): + """ + Implementation of the handle_emitter method expecting an iterable + with Comma Separated Values (CSV). + + :param action: Whatever the use-case uses as an action, e.g. + an integer representing discrete actions + :type action: Iterable, for multiple values the CSV format is + required, e.g. [0, 1] for two actions + """ assert isinstance(action, Iterable), \ "The action object should be Iterable" @@ -67,8 +157,19 @@ def handle_emitter(self, action): self.emitter.send(message) def handle_receiver(self): + """ + Implementation of the handle_receiver method expecting an iterable + with Comma Separated Values (CSV). + + :return: Returns the message received from the robot, returns None + if no message is received + :rtype: List of string values + """ if self.receiver.getQueueLength() > 0: - string_message = self.receiver.getData().decode("utf-8") + try: + string_message = self.receiver.getString() + except AttributeError: + string_message = self.receiver.getData().decode("utf-8") self.receiver.nextPacket() return string_message.split(",") else: diff --git a/deepbots/supervisor/controllers/supervisor_env.py b/deepbots/supervisor/controllers/supervisor_env.py index e02259f..78a58f0 100644 --- a/deepbots/supervisor/controllers/supervisor_env.py +++ b/deepbots/supervisor/controllers/supervisor_env.py @@ -1,46 +1,28 @@ -from abc import ABC, abstractmethod - +import gym from controller import Supervisor -class SupervisorEnv(ABC): +class SupervisorEnv(Supervisor, gym.Env): """ - This class represents the basic template which contains the necessary - methods to train a reinforcement learning algorithm. The interface class - follows the gym interface which is standardized in many reinforcement - learning algorithms. The OpenAI gym environment can be described by the - following figure: - - +----------+ (action) +---------------+ - | |-------------------------------->| | - | Agent | | SupervisorEnv | - | |<--------------------------------| | - +----------+ (observation, reward) +---------------+ + This class is the highest class in deepbots class hierarchy, inheriting + both the Webots Supervisor controller and the basic gym.Env. + + Refer to gym.Env documentation on how to implement a custom gym.Env + for additional functionality. + + This class contains abstract methods that guide the development process + for users that want to implement a simple environment. This class is not intended for user usage, but to provide a common interface for all provided supervisor classes and make them compatible with reinforcement learning agents that work with the gym interface. Moreover, a problem-agnostic reset method is - provided. Please use any of the children supervisor classes to be - inherited by your own classes, such as the RobotSupervisor class. + provided. Please use any of the children supervisor classes to be + inherited by your own class, such as the RobotSupervisor class. Nevertheless, advanced users can inherit this class to create their own supervisor classes if they wish. """ - def __init__(self): - self.supervisor = Supervisor() - - @abstractmethod - def get_observations(self): - """ - Return the observations of the robot. For example, metrics from - sensors, a camera image, etc. - - :returns: An object of observations - """ - pass - - @abstractmethod def step(self, action): """ On each timestep, the agent chooses an action for the previous @@ -48,34 +30,18 @@ def step(self, action): observation, *state_t+1*, the reward and whether the episode is done or not. + Each of the values returned is produced by implementations of + other abstract methods defined below. + observation: The next observation from the environment reward: The amount of reward awarded on this step is_done: Whether the episode is done - info: Diagnostic information mostly useful for debugging. + info: Diagnostic information mostly useful for debugging :param action: The agent's action :return: tuple, (observation, reward, is_done, info) """ - pass - - @abstractmethod - def get_reward(self, action): - """ - Calculates and returns the reward for this step. - - :param action: The agent's action - :return: The amount of reward awarded on this step - """ - pass - - @abstractmethod - def is_done(self): - """ - Used to inform the agent that the problem is solved. - - :return: bool, True if the episode is done - """ - pass + raise NotImplementedError def reset(self): """ @@ -93,8 +59,9 @@ def reset(self): :return: default observation provided by get_default_observation() """ - self.supervisor.simulationReset() - self.supervisor.simulationResetPhysics() + self.simulationReset() + self.simulationResetPhysics() + super(Supervisor, self).step(int(self.getBasicTimeStep())) return self.get_default_observation() def get_default_observation(self): @@ -105,12 +72,46 @@ def get_default_observation(self): :return: list-like, contains default agent observation """ - return NotImplementedError + raise NotImplementedError + + def get_observations(self): + """ + Return the observations of the robot. For example, metrics from + sensors, a camera image, etc. + + This method is use-case specific and needs to be implemented + by the user. + + :returns: An object of observations + """ + raise NotImplementedError + + def get_reward(self, action): + """ + Calculates and returns the reward for this step. + + This method is use-case specific and needs to be implemented + by the user. + + :param action: The agent's action + :return: The amount of reward awarded on this step + """ + raise NotImplementedError + + def is_done(self): + """ + Used to inform the agent that the problem is solved. + + This method is use-case specific and needs to be implemented + by the user. + + :return: bool, True if the episode is done + """ + raise NotImplementedError - @abstractmethod def get_info(self): """ This method can be implemented to return any diagnostic information on each step, e.g. for debugging purposes. """ - pass + raise NotImplementedError diff --git a/deepbots/supervisor/controllers/supervisor_evolutionary.py b/deepbots/supervisor/controllers/supervisor_evolutionary.py new file mode 100644 index 0000000..0ee87dd --- /dev/null +++ b/deepbots/supervisor/controllers/supervisor_evolutionary.py @@ -0,0 +1,189 @@ +from deepbots.supervisor.controllers.supervisor_emitter_receiver import SupervisorCSV + +import pygad +import pygad.torchga as torchga +from pygad.torchga import TorchGA +import torch +import matplotlib.pyplot as plt +import wandb + + +class SupervisorEvolutionary(SupervisorCSV): + ''' + TODO: + - Change comm scheme and add support for multiple robots + ''' + + def __init__(self, model, device=None): + """ + The base class for implementing genetic algorithms using deepbots. + + :param model: The model to be trained and used in the genetic algorithm. + :type model: torch.nn.Module + :param device: The device to be used for training the model. + :type device: torch.device + """ + super(SupervisorEvolutionary, self).__init__() + self.fitness = [] + if device is None: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.model = model.to(device) + self.device = device + + def fitness_function(self, solution, solution_idx): + """ + The default fitness function for the genetic algorithm. The fitness is defined as the total reward accumulated by + the agent over one episoder. This function can be overrided with a custom fitness function for the genetic algorithm + by re-defining in the inheriting class. + + :param solution: The solution vector of the model. + :type solution: dict(torch.Tensor) + :param solution_idx: The index of the solution vector in the population. + :type solution_idx: int + :return: The fitness value of the solution vector. + :rtype: float + """ + + model_weights = torchga.model_weights_as_dict(model=self.model, weights_vector=solution) + model_weights = {k: v.to(self.device) for k, v in model_weights.items()} + self.model.load_state_dict(model_weights) + + observation = self.reset() + total_reward = 0 + done = False + no_steps = 0 + + while not done: + observation = torch.tensor(observation).unsqueeze(0).float().to(self.device) + next_observation, reward, done, info = self.step(observation) + self.episodeScore += reward + observation = next_observation + no_steps += 1 + + return self.episodeScore + + def step(self, observation): + """ + Take one step in the environment by using the model to predict the action to take. + + :param observation: The observation received from the environment. + :type observation: torch.Tensor + :return: The next observation, reward, done flag and info. + :rtype: torch.Tensor, float, bool, dict + """ + action = self.get_action(observation) + next_observation, reward, done, info = super(SupervisorCSV, self).step(action) + + return next_observation, reward, done, info + + def get_action(self, observation): + """ + Get the action to be taken by the agent. + + :param observation: The observation received from the environment. + :type observation: torch.Tensor + """ + raise NotImplementedError + + def callback_generation(self, ga_solver, wandb_logging): + """ + Callback function for the genetic algorithm solver. This function is called after each generation and prints fitness + of the best solution in the generation. + + :param ga_solver: The genetic algorithm solver object. + :type ga_solver: pygad.GA + """ + print(f"Generation: {ga_solver.generations_completed} | Fitness: {ga_solver.best_solution()[1]}") + if wandb_logging: + wandb.log({"Fitness": ga_solver.best_solution()[1]}) + self.fitness.append(ga_solver.best_solution()[1]) + + def train( + self, + wandb_logging=True, + num_generations=75, + num_parents_mating=5, + num_solutions=10, + parent_selection_type="sss", + crossover_type="single_point", + mutation_type="random", + mutation_percent_genes=10, + keep_parents=-1, + **kwargs, + ): + """ + Method to train the model using the genetic algorithm. + + :param wandb_logging: Flag to enable wandb logging. + :type wandb_logging: bool + :param num_generations: The number of generations to be trained for. + :type num_generations: int + :param num_parents_mating: The number of parents to be selected for mating. + :type num_parents_mating: int + :param num_solutions: The number of solutions to be selected for mating. + :type num_solutions: int + :param parent_selection_type: The type of parent selection to be used. + :type parent_selection_type: str + :param crossover_type: The type of crossover to be used. + :type crossover_type: str + :param mutation_type: The type of mutation to be used. + :type mutation_type: str + :param mutation_percent_genes: The percentage of genes to be mutated. + :type mutation_percent_genes: int + :param keep_parents: The number of parents to keep in the population. + :type keep_parents: int + :param kwargs: Other keyword arguments for the genetic algorithm solver. + :type kwargs: dict + + :return: The model loaded with the best solution, the fitness of the best solution and its index, the history of + the fitness. + :rtype: torch.nn.Module, float, int, list + """ + initial_population = TorchGA(model=self.model, num_solutions=num_solutions).population_weights + #print(f"Initial population len: {initial_population[0].shape}") + fitness_func = lambda solution, solution_idx: self.fitness_function(solution=solution, solution_idx=solution_idx) + callback = lambda ga_solver: self.callback_generation(ga_solver=ga_solver, wandb_logging=wandb_logging) + #partial(fitness_func, model=self.model) + + self.ga_solver = pygad.GA( + num_generations=num_generations, + num_parents_mating=num_parents_mating, + initial_population=initial_population, + fitness_func=fitness_func, + parent_selection_type=parent_selection_type, + crossover_type=crossover_type, + mutation_type=mutation_type, + mutation_percent_genes=mutation_percent_genes, + keep_parents=keep_parents, + on_generation=callback, + **kwargs, + ) + + self.ga_solver.run() + solution, solution_fitness, solution_idx = self.ga_solver.best_solution() + trained_model_weights = torchga.model_weights_as_dict(model=self.model, weights_vector=solution) + trained_model_weights = {k: v.to(self.device) for k, v in trained_model_weights.items()} + self.model.load_state_dict(trained_model_weights) + print(f"Fitness value of best solution = {solution_fitness}") + self.plot_fitness() + + return self.model, solution_fitness, solution_idx, self.fitness + + def save_model(self, path): + """ + Function to save the model. + + :param path: The path to save the model. + :type path: str + """ + torch.save(self.model.state_dict(), path) + + def plot_fitness(self): + """ + Function to plot the fitness history. + """ + plt.plot(self.fitness) + plt.xlabel("Generation") + plt.ylabel("Fitness") + plt.title("Fitness vs Generation Plot") + plt.show() diff --git a/deepbots/supervisor/controllers/supervisor_multi_evolutionary.py b/deepbots/supervisor/controllers/supervisor_multi_evolutionary.py new file mode 100644 index 0000000..11d8fa8 --- /dev/null +++ b/deepbots/supervisor/controllers/supervisor_multi_evolutionary.py @@ -0,0 +1,83 @@ +from deepbots.supervisor.controllers.supervisor_evolutionary import SupervisorEvolutionary + +import pygad +import pygad.torchga as torchga +from pygad.torchga import TorchGA +import torch +import matplotlib.pyplot as plt +import wandb + +class SupervisorMultiEvolutionary(SupervisorEvolutionary, pygad.GA): + def __init__(self, num_robots, model, device=None): + super().__init__(model, device) + self.num_robots = num_robots + + def fitness_function(self, solution, solution_idx): + """ + The default fitness function for the genetic algorithm. The fitness is defined as the total reward accumulated by + the agent over one episoder. This function can be overrided with a custom fitness function for the genetic algorithm + by re-defining in the inheriting class. + + :param solution: The solution vector of the model. + :type solution: dict(torch.Tensor) + :param solution_idx: The index of the solution vector in the population. + :type solution_idx: int + :return: The fitness value of the solution vector. + :rtype: float + """ + + model_weights = torchga.model_weights_as_dict(model=self.model, weights_vector=solution) + model_weights = {k: v.to(self.device) for k, v in model_weights.items()} + self.model.load_state_dict(model_weights) + + observation = self.reset() + total_reward = 0 + done = False + no_steps = 0 + + while not done: + observation = torch.tensor(observation).unsqueeze(0).float().to(self.device) + next_observation, reward, done, info = self.step(observation) + self.episodeScore += reward + observation = next_observation + no_steps += 1 + + return self.episodeScore + + def step(self, observation): + """ + Take one step in the environment by using the model to predict the action to take. + + :param observation: The observation received from the environment. + :type observation: torch.Tensor + :return: The next observation, reward, done flag and info. + :rtype: torch.Tensor, float, bool, dict + """ + action = self.get_action(observation) + next_observation, reward, done, info = super(SupervisorCSV, self).step(action) + + return next_observation, reward, done, info + + def cal_pop_fitness(self): + if self.valid_parameters == False: + raise ValueError("ERROR calling the cal_pop_fitness() method: \nPlease check the parameters passed while creating an instance of the GA class.\n") + + pop_fitness = [] + # Calculating the fitness value of each solution in the current population. + for sol_idx, sol in enumerate(self.population): + + # Check if the parent's fitness value is already calculated. If so, use it instead of calling the fitness function. + if not (self.last_generation_parents is None) and len(numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0] > 0): + # Index of the parent in the parents array (self.last_generation_parents). This is not its index within the population. + parent_idx = numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0][0] + # Index of the parent in the population. + parent_idx = self.last_generation_parents_indices[parent_idx] + # Use the parent's index to return its pre-calculated fitness value. + fitness = self.last_generation_fitness[parent_idx] + else: + fitness = self.fitness_func(sol, sol_idx) + pop_fitness.append(fitness) + + pop_fitness = numpy.array(pop_fitness) + + return pop_fitness \ No newline at end of file diff --git a/deepbots/supervisor/wrappers/keyboard_printer.py b/deepbots/supervisor/wrappers/keyboard_printer.py index 01c1696..3e43ac2 100644 --- a/deepbots/supervisor/wrappers/keyboard_printer.py +++ b/deepbots/supervisor/wrappers/keyboard_printer.py @@ -7,7 +7,7 @@ class KeyboardPrinter(SupervisorEnv): def __init__(self, controller): self.controller = controller self.keyboard = Keyboard() - self.keyboard.enable(self.controller.get_timestep()) + self.keyboard.enable(self.controller.timestep) def step(self, action): observation, reward, isDone, info = self.controller.step(action) diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..1584f1b --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,57 @@ +FROM nvidia/cudagl:11.0-devel-ubuntu20.04 +ARG DEBIAN_FRONTEND=noninteractive + +ARG PYTHON_VERSION=3.8 + +ARG branch + +# Install ubuntu libaries +RUN apt-get update && \ + apt-get install -y --no-install-recommends build-essential cmake pkg-config \ + libfreetype6-dev git nano wget curl vim ca-certificates unzip libjpeg-dev \ + libpng-dev libosmesa6-dev software-properties-common xvfb gpg-agent + +# Install miniconda +RUN curl -o ~/miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + chmod +x ~/miniconda.sh && \ + ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + /opt/conda/bin/conda update -n base -c defaults conda && \ + /opt/conda/bin/conda install -y python=$PYTHON_VERSION setuptools patchelf && \ + /opt/conda/bin/conda clean -ya +ENV PATH /opt/conda/bin:$PATH + + +# Env vars for the nvidia-container-runtime. +ENV PATH /usr/local/cuda/bin/:$PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/lib:/usr/local/cuda/lib64 +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute,utility +LABEL com.nvidia.volumes.needed="nvidia_driver" + +# Install weebots +RUN wget -qO- https://cyberbotics.com/Cyberbotics.asc | apt-key add - +RUN apt-add-repository 'deb https://cyberbotics.com/debian/ binary-amd64/' && \ + apt-get update && apt-get install -y webots + +# Save enviroment libraries +ENV WEBOTS_HOME /usr/local/webots +ENV LD_LIBRARY_PATH $WEBOTS_HOME/lib/controller:$LD_LIBRARY_PATH + + +ADD requirements.txt . +# Install python dependencies +RUN pip install -r requirements.txt + +RUN if [ $branch = "dev" ]; then pip install -i https://test.pypi.org/simple/ deepbots ; else pip install deepbots ; fi + +RUN pip install 'ray[tune]' 'ray[rllib]' + +# Fix the error of the custome enviroment on Ray +ADD preprocessors.py . +RUN cp -r preprocessors.py opt/conda/lib/python3.8/site-packages/ray/rllib/models/ +RUN rm preprocessors.py + + +WORKDIR /workspace +RUN chmod -R a+w /workspace \ No newline at end of file diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..cea8742 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,69 @@ +# Use the Docker of the deepbots + +## In case you would like to add other functionalities/libraries on the docker: + +* Edit the Dockerfile +* Build the docker image using below commands +* Building argument ```branch``` specify the ```dev``` or the ```master``` branch of deepbots. + +### Building and tagging a Docker image: +```bash +$ docker build -t yourusername/repository-name --build-arg branch=dev . +``` + +## Pull the existing image from DockerHub + +```bash +$ docker pull nickok/deepbots-dev +``` + +## For the use of Cuda on your docker container + +You should install NVIDIA Container Toolkit on your ```host``` machine. + +1) Setup the stable repository and the GPG key: +``` bash +$ distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \ + && curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \ + && curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list +``` +2) Install the nvidia-docker2 package (and dependencies) after updating the package listing: + +``` bash +$ sudo apt-get update +``` + +``` bash +$ sudo apt-get install -y nvidia-docker2 +``` + +Restart the Docker daemon to complete the installation after setting the default runtime: +``` bash +$ sudo sudo systemctl restart docker +``` + + + +## Run docker + +### Use docker with ```cpu``` +Mount Webots project and run it on interactive Docker container: +```bash +$ docker run -it -v /absolute/path/to/webots/project:/workspace/name-of-project nickok/deepbots-dev +``` + +### Use docker with ```cuda``` (GPU) +``` bash +$ docker run --rm --gpus all run -it -v /absolute/path/to/webots/project:/workspace/name-of-project nickok/deepbots +``` + +After starting the docker container you can start Webots headlessly using xvfb: +```bash +$ xvfb-run webots --stdout --stderr --batch --no-sandbox --mode=fast /path/to/your/world/file + +``` + +Start Webots headlessly using xvfb and save the output at out.txt: +```bash +$ xvfb-run webots --stdout --stderr --batch --no-sandbox --mode=fast /path/to/your/world/file &> out.txt & +``` \ No newline at end of file diff --git a/docker/preprocessors.py b/docker/preprocessors.py new file mode 100644 index 0000000..2a9d1c0 --- /dev/null +++ b/docker/preprocessors.py @@ -0,0 +1,361 @@ +# This file is modified to be used for custom environments +# Link of the original file before my modification +# Link: https://github.com/ray-project/ray/blob/releases/1.2.0/rllib/models/preprocessors.py + +from collections import OrderedDict +import cv2 +import logging +import numpy as np +import gym +from typing import Any, List + +from ray.rllib.utils.annotations import override, PublicAPI +from ray.rllib.utils.spaces.repeated import Repeated +from ray.rllib.utils.typing import TensorType + +ATARI_OBS_SHAPE = (210, 160, 3) +ATARI_RAM_OBS_SHAPE = (128, ) + +# Only validate env observations vs the observation space every n times in a +# Preprocessor. +OBS_VALIDATION_INTERVAL = 100 + +logger = logging.getLogger(__name__) + + +@PublicAPI +class Preprocessor: + """Defines an abstract observation preprocessor function. + + Attributes: + shape (List[int]): Shape of the preprocessed output. + """ + + @PublicAPI + def __init__(self, obs_space: gym.Space, options: dict = None): + legacy_patch_shapes(obs_space) + self._obs_space = obs_space + if not options: + from ray.rllib.models.catalog import MODEL_DEFAULTS + self._options = MODEL_DEFAULTS.copy() + else: + self._options = options + self.shape = self._init_shape(obs_space, self._options) + self._size = int(np.product(self.shape)) + self._i = 0 + + @PublicAPI + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + """Returns the shape after preprocessing.""" + raise NotImplementedError + + @PublicAPI + def transform(self, observation: TensorType) -> np.ndarray: + """Returns the preprocessed observation.""" + raise NotImplementedError + + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + """Alternative to transform for more efficient flattening.""" + array[offset:offset + self._size] = self.transform(observation) + + def check_shape(self, observation: Any) -> None: + """Checks the shape of the given observation.""" + if self._i % OBS_VALIDATION_INTERVAL == 0: + if type(observation) is list and isinstance( + self._obs_space, gym.spaces.Box): + observation = np.array(observation) + + # try: + # if not self._obs_space.contains(observation): + # raise ValueError( + # "Observation ({}) outside given space ({})!", + # observation, self._obs_space) + # except AttributeError: + # raise ValueError( + # "Observation for a Box/MultiBinary/MultiDiscrete space " + # "should be an np.array, not a Python list.", observation) + self._i += 1 + + @property + @PublicAPI + def size(self) -> int: + return self._size + + @property + @PublicAPI + def observation_space(self) -> gym.Space: + obs_space = gym.spaces.Box(-1., 1., self.shape, dtype=np.float32) + # Stash the unwrapped space so that we can unwrap dict and tuple spaces + # automatically in modelv2.py + classes = (DictFlatteningPreprocessor, OneHotPreprocessor, + RepeatedValuesPreprocessor, TupleFlatteningPreprocessor) + if isinstance(self, classes): + obs_space.original_space = self._obs_space + return obs_space + + +class GenericPixelPreprocessor(Preprocessor): + """Generic image preprocessor. + + Note: for Atari games, use config {"preprocessor_pref": "deepmind"} + instead for deepmind-style Atari preprocessing. + """ + + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + self._grayscale = options.get("grayscale") + self._zero_mean = options.get("zero_mean") + self._dim = options.get("dim") + if self._grayscale: + shape = (self._dim, self._dim, 1) + else: + shape = (self._dim, self._dim, 3) + + return shape + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + """Downsamples images from (210, 160, 3) by the configured factor.""" + self.check_shape(observation) + scaled = observation[25:-25, :, :] + if self._dim < 84: + scaled = cv2.resize(scaled, (84, 84)) + # OpenAI: Resize by half, then down to 42x42 (essentially mipmapping). + # If we resize directly we lose pixels that, when mapped to 42x42, + # aren't close enough to the pixel boundary. + scaled = cv2.resize(scaled, (self._dim, self._dim)) + if self._grayscale: + scaled = scaled.mean(2) + scaled = scaled.astype(np.float32) + # Rescale needed for maintaining 1 channel + scaled = np.reshape(scaled, [self._dim, self._dim, 1]) + if self._zero_mean: + scaled = (scaled - 128) / 128 + else: + scaled *= 1.0 / 255.0 + return scaled + + +class AtariRamPreprocessor(Preprocessor): + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + return (128, ) + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + self.check_shape(observation) + return (observation - 128) / 128 + + +class OneHotPreprocessor(Preprocessor): + """One-hot preprocessor for Discrete and MultiDiscrete spaces. + + Examples: + >>> self.transform(Discrete(3).sample()) + ... np.array([0.0, 1.0, 0.0]) + >>> self.transform(MultiDiscrete([2, 3]).sample()) + ... np.array([0.0, 1.0, 0.0, 0.0, 1.0]) + """ + + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + if isinstance(obs_space, gym.spaces.Discrete): + return (self._obs_space.n, ) + else: + return (np.sum(self._obs_space.nvec), ) + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + self.check_shape(observation) + arr = np.zeros(self._init_shape(self._obs_space, {}), dtype=np.float32) + if isinstance(self._obs_space, gym.spaces.Discrete): + arr[observation] = 1 + else: + for i, o in enumerate(observation): + arr[np.sum(self._obs_space.nvec[:i]) + o] = 1 + return arr + + @override(Preprocessor) + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + array[offset:offset + self.size] = self.transform(observation) + + +class NoPreprocessor(Preprocessor): + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + return self._obs_space.shape + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + self.check_shape(observation) + return observation + + @override(Preprocessor) + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + array[offset:offset + self._size] = np.array( + observation, copy=False).ravel() + + @property + @override(Preprocessor) + def observation_space(self) -> gym.Space: + return self._obs_space + + +class TupleFlatteningPreprocessor(Preprocessor): + """Preprocesses each tuple element, then flattens it all into a vector. + + RLlib models will unpack the flattened output before _build_layers_v2(). + """ + + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + assert isinstance(self._obs_space, gym.spaces.Tuple) + size = 0 + self.preprocessors = [] + for i in range(len(self._obs_space.spaces)): + space = self._obs_space.spaces[i] + logger.debug("Creating sub-preprocessor for {}".format(space)) + preprocessor = get_preprocessor(space)(space, self._options) + self.preprocessors.append(preprocessor) + size += preprocessor.size + return (size, ) + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + self.check_shape(observation) + array = np.zeros(self.shape, dtype=np.float32) + self.write(observation, array, 0) + return array + + @override(Preprocessor) + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + assert len(observation) == len(self.preprocessors), observation + for o, p in zip(observation, self.preprocessors): + p.write(o, array, offset) + offset += p.size + + +class DictFlatteningPreprocessor(Preprocessor): + """Preprocesses each dict value, then flattens it all into a vector. + + RLlib models will unpack the flattened output before _build_layers_v2(). + """ + + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + assert isinstance(self._obs_space, gym.spaces.Dict) + size = 0 + self.preprocessors = [] + for space in self._obs_space.spaces.values(): + logger.debug("Creating sub-preprocessor for {}".format(space)) + preprocessor = get_preprocessor(space)(space, self._options) + self.preprocessors.append(preprocessor) + size += preprocessor.size + return (size, ) + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + self.check_shape(observation) + array = np.zeros(self.shape, dtype=np.float32) + self.write(observation, array, 0) + return array + + @override(Preprocessor) + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + if not isinstance(observation, OrderedDict): + observation = OrderedDict(sorted(observation.items())) + assert len(observation) == len(self.preprocessors), \ + (len(observation), len(self.preprocessors)) + for o, p in zip(observation.values(), self.preprocessors): + p.write(o, array, offset) + offset += p.size + + +class RepeatedValuesPreprocessor(Preprocessor): + """Pads and batches the variable-length list value.""" + + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + assert isinstance(self._obs_space, Repeated) + child_space = obs_space.child_space + self.child_preprocessor = get_preprocessor(child_space)(child_space, + self._options) + # The first slot encodes the list length. + size = 1 + self.child_preprocessor.size * obs_space.max_len + return (size, ) + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + array = np.zeros(self.shape) + if isinstance(observation, list): + for elem in observation: + self.child_preprocessor.check_shape(elem) + else: + pass # ValueError will be raised in write() below. + self.write(observation, array, 0) + return array + + @override(Preprocessor) + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + if not isinstance(observation, list): + raise ValueError("Input for {} must be list type, got {}".format( + self, observation)) + elif len(observation) > self._obs_space.max_len: + raise ValueError("Input {} exceeds max len of space {}".format( + observation, self._obs_space.max_len)) + # The first slot encodes the list length. + array[offset] = len(observation) + for i, elem in enumerate(observation): + offset_i = offset + 1 + i * self.child_preprocessor.size + self.child_preprocessor.write(elem, array, offset_i) + + +@PublicAPI +def get_preprocessor(space: gym.Space) -> type: + """Returns an appropriate preprocessor class for the given space.""" + + legacy_patch_shapes(space) + obs_shape = space.shape + + if isinstance(space, (gym.spaces.Discrete, gym.spaces.MultiDiscrete)): + preprocessor = OneHotPreprocessor + elif obs_shape == ATARI_OBS_SHAPE: + preprocessor = GenericPixelPreprocessor + elif obs_shape == ATARI_RAM_OBS_SHAPE: + preprocessor = AtariRamPreprocessor + elif isinstance(space, gym.spaces.Tuple): + preprocessor = TupleFlatteningPreprocessor + elif isinstance(space, gym.spaces.Dict): + preprocessor = DictFlatteningPreprocessor + elif isinstance(space, Repeated): + preprocessor = RepeatedValuesPreprocessor + else: + preprocessor = NoPreprocessor + + return preprocessor + + +def legacy_patch_shapes(space: gym.Space) -> List[int]: + """Assigns shapes to spaces that don't have shapes. + + This is only needed for older gym versions that don't set shapes properly + for Tuple and Discrete spaces. + """ + + if not hasattr(space, "shape"): + if isinstance(space, gym.spaces.Discrete): + space.shape = () + elif isinstance(space, gym.spaces.Tuple): + shapes = [] + for s in space.spaces: + shape = legacy_patch_shapes(s) + shapes.append(shape) + space.shape = tuple(shapes) + + return space.shape \ No newline at end of file diff --git a/docker/requirements.txt b/docker/requirements.txt new file mode 100644 index 0000000..38c8558 --- /dev/null +++ b/docker/requirements.txt @@ -0,0 +1,7 @@ +gym==0.17.2 +numpy==1.22.0 +tensorflow==2.9.3 +stable-baselines3 +ray==1.2.0 +torch==1.8.1 +wandb \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d297686..1e6c2dd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -seed-isort-config -pre-commit +gym diff --git a/setup.py b/setup.py index 81a1ccd..695dfe5 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,10 @@ DESCRIPTION = "A wrapper framework for Reinforcement Learning in Webots \ simulator" + +with open("requirements.txt") as requirements_file: + requirements = requirements_file.read().splitlines() + with open("README.md", "r", encoding="utf-8") as readme_file: readme = readme_file.read() @@ -10,9 +14,10 @@ name="deepbots", author="aidudezzz", author_email="deepbots@protonmail.com", - version="0.1.3-dev1", + version="0.1.3-dev3", description=DESCRIPTION, long_description=readme, long_description_content_type="text/markdown", packages=find_packages(), + install_requires=requirements, )