From 88b4da1f65e3091cde0e835b49f912e530fe9999 Mon Sep 17 00:00:00 2001 From: Zach Sailer Date: Mon, 29 Aug 2022 11:54:53 -0700 Subject: [PATCH 1/4] emit events from the contents manager --- jupyter_server/__init__.py | 3 + .../event_schemas/contents_service/v1.yaml | 80 +++++++++++++++++++ jupyter_server/serverapp.py | 15 ++++ .../services/contents/filemanager.py | 6 +- jupyter_server/services/contents/manager.py | 26 ++++++ jupyter_server/utils.py | 2 + 6 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 jupyter_server/event_schemas/contents_service/v1.yaml diff --git a/jupyter_server/__init__.py b/jupyter_server/__init__.py index d5b97f0c90..25d09ee20e 100644 --- a/jupyter_server/__init__.py +++ b/jupyter_server/__init__.py @@ -1,5 +1,6 @@ """The Jupyter Server""" import os +import pathlib import subprocess import sys @@ -10,6 +11,8 @@ ] DEFAULT_JUPYTER_SERVER_PORT = 8888 +JUPYTER_SERVER_EVENTS_URI = "https://events.jupyter.org/jupyter_server" +DEFAULT_EVENTS_SCHEMA_PATH = pathlib.Path(__file__).parent / "event_schemas" del os diff --git a/jupyter_server/event_schemas/contents_service/v1.yaml b/jupyter_server/event_schemas/contents_service/v1.yaml new file mode 100644 index 0000000000..595c21cc11 --- /dev/null +++ b/jupyter_server/event_schemas/contents_service/v1.yaml @@ -0,0 +1,80 @@ +"$id": https://events.jupyter.org/jupyter_server/contents_service/v1 +version: 1 +title: Contents Manager activities +personal-data: true +description: | + Record actions on files via the ContentsManager. + + The notebook ContentsManager REST API is used by all frontends to retreive, + save, list, delete and perform other actions on notebooks, directories, + and other files through the UI. This is pluggable - the default acts on + the file system, but can be replaced with a different ContentsManager + implementation - to work on S3, Postgres, other object stores, etc. + The events get recorded regardless of the ContentsManager implementation + being used. + + Limitations: + + 1. This does not record all filesystem access, just the ones that happen + explicitly via the notebook server's REST API. Users can (and often do) + trivially access the filesystem in many other ways (such as `open()` calls + in their code), so this is usually never a complete record. + 2. As with all events recorded by the notebook server, users most likely + have the ability to modify the code of the notebook server. Unless other + security measures are in place, these events should be treated as user + controlled and not used in high security areas. + 3. Events are only recorded when an action succeeds. +type: object +required: + - action + - path +properties: + action: + enum: + - get + - create + - save + - upload + - rename + - copy + - delete + description: | + Action performed by the ContentsManager API. + + This is a required field. + + Possible values: + + 1. get + Get contents of a particular file, or list contents of a directory. + + 2. create + Create a new directory or file at 'path'. Currently, name of the + file or directory is auto generated by the ContentsManager implementation. + + 3. save + Save a file at path with contents from the client + + 4. upload + Upload a file at given path with contents from the client + + 5. rename + Rename a file or directory from value in source_path to + value in path. + + 5. copy + Copy a file or directory from value in source_path to + value in path. + + 6. delete + Delete a file or empty directory at given path + path: + type: string + description: | + Logical path on which the operation was performed. + + This is a required field. + source_path: + type: string + description: | + Source path of an operation when action is 'copy' or 'rename' diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 4e9b5a46a2..8dcd8ec95f 100644 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -83,9 +83,11 @@ from traitlets.config.application import boolean_flag, catch_config_error from jupyter_server import ( + DEFAULT_EVENTS_SCHEMA_PATH, DEFAULT_JUPYTER_SERVER_PORT, DEFAULT_STATIC_FILES_PATH, DEFAULT_TEMPLATE_PATH_LIST, + JUPYTER_SERVER_EVENTS_URI, __version__, ) from jupyter_server._sysinfo import get_sys_info @@ -1951,6 +1953,19 @@ def init_logging(self): def init_event_logger(self): """Initialize the Event Bus.""" self.event_logger = EventLogger(parent=self) + # Load the core Jupyter Server event schemas + # All event schemas must start with Jupyter Server's + # events URI, `JUPYTER_SERVER_EVENTS_URI`. + schema_ids = [ + "https://events.jupyter.org/jupyter_server/contents_service/v1", + ] + for schema_id in schema_ids: + # Get the schema path from the schema ID. + rel_schema_path = schema_id.lstrip(JUPYTER_SERVER_EVENTS_URI) + ".yaml" + schema_path = DEFAULT_EVENTS_SCHEMA_PATH / rel_schema_path + # Use this pathlib object to register the schema + # breakpoint() + self.event_logger.register_event_schema(schema_path) def init_webapp(self): """initialize tornado webapp""" diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index e84c2d36c6..2e28c5d2dc 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -395,6 +395,7 @@ def get(self, path, content=True, type=None, format=None): if type == "directory": raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type") model = self._file_model(path, content=content, format=format) + self.emit(data={"action": "get", "path": path}) return model def _save_directory(self, os_path, model, path=""): @@ -459,7 +460,7 @@ def save(self, model, path=""): model["message"] = validation_message self.run_post_save_hooks(model=model, os_path=os_path) - + self.emit(data={"action": "save", "path": path}) return model def delete_file(self, path): @@ -735,6 +736,7 @@ async def get(self, path, content=True, type=None, format=None): if type == "directory": raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type") model = await self._file_model(path, content=content, format=format) + self.emit(data={"action": "get", "path": path}) return model async def _save_directory(self, os_path, model, path=""): @@ -795,7 +797,7 @@ async def save(self, model, path=""): model["message"] = validation_message self.run_post_save_hooks(model=model, os_path=os_path) - + self.emit(data={"action": "save", "path": path}) return model async def delete_file(self, path): diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 7bd6450803..fcd9de33b0 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -3,10 +3,12 @@ # Distributed under the terms of the Modified BSD License. import itertools import json +import pathlib import re import warnings from fnmatch import fnmatch +from jupyter_events import EventLogger from nbformat import ValidationError, sign from nbformat import validate as validate_nb from nbformat.v4 import new_notebook @@ -25,6 +27,7 @@ ) from traitlets.config.configurable import LoggingConfigurable +from jupyter_server import DEFAULT_EVENTS_SCHEMA_PATH, JUPYTER_SERVER_EVENTS_URI from jupyter_server.transutils import _i18n from jupyter_server.utils import ensure_async, import_item @@ -53,6 +56,23 @@ class ContentsManager(LoggingConfigurable): """ + event_schema_id = JUPYTER_SERVER_EVENTS_URI + "/contents_service/v1" + event_logger = Instance(EventLogger).tag(config=True) + + @default("event_logger") + def _default_event_logger(self): + if self.parent and hasattr(self.parent, "event_logger"): + return self.parent.event_logger + else: + # If parent does not have an event logger, create one. + logger = EventLogger() + schema_path = DEFAULT_EVENTS_SCHEMA_PATH / "contents_service" / "v1.yaml" + logger.register_event_schema(schema_path) + + def emit(self, data): + """Emit event using the core event schema from Jupyter Server's Contents Manager.""" + self.event_logger.emit(schema_id=self.event_schema_id, data=data) + root_dir = Unicode("/", config=True) allow_hidden = Bool(False, config=True, help="Allow access to hidden files") @@ -416,11 +436,13 @@ def delete(self, path): raise HTTPError(400, "Can't delete root") self.delete_file(path) self.checkpoints.delete_all_checkpoints(path) + self.emit(data={"action": "delete", "path": path}) def rename(self, old_path, new_path): """Rename a file and any checkpoints associated with that file.""" self.rename_file(old_path, new_path) self.checkpoints.rename_all_checkpoints(old_path, new_path) + self.emit(data={"action": "rename", "path": new_path, "source_path": old_path}) def update(self, model, path): """Update the file's path @@ -616,6 +638,7 @@ def copy(self, from_path, to_path=None): raise HTTPError(404, "No such directory: %s" % to_path) model = self.save(model, to_path) + self.emit(data={"action": "copy", "path": to_path, "source_path": from_path}) return model def log_info(self): @@ -819,11 +842,13 @@ async def delete(self, path): await self.delete_file(path) await self.checkpoints.delete_all_checkpoints(path) + self.emit(data={"action": "delete", "path": path}) async def rename(self, old_path, new_path): """Rename a file and any checkpoints associated with that file.""" await self.rename_file(old_path, new_path) await self.checkpoints.rename_all_checkpoints(old_path, new_path) + self.emit(data={"action": "rename", "path": new_path, "source_path": old_path}) async def update(self, model, path): """Update the file's path @@ -985,6 +1010,7 @@ async def copy(self, from_path, to_path=None): raise HTTPError(404, "No such directory: %s" % to_path) model = await self.save(model, to_path) + self.emit(data={"action": "copy", "path": to_path, "source_path": from_path}) return model async def trust_notebook(self, path): diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 0c772d38e9..aac97660d5 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -18,6 +18,8 @@ from tornado.httpclient import AsyncHTTPClient, HTTPClient, HTTPRequest from tornado.netutil import Resolver +from jupyter_server import DEFAULT_EVENTS_SCHEMA_PATH, JUPYTER_SERVER_EVENTS_URI + def url_path_join(*pieces): """Join components of url into a relative url From 607a0528614529a25b91c875fcad95bbdae37481 Mon Sep 17 00:00:00 2001 From: Zach Sailer Date: Mon, 29 Aug 2022 12:16:22 -0700 Subject: [PATCH 2/4] remove unused imports --- jupyter_server/services/contents/manager.py | 1 - jupyter_server/utils.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index fcd9de33b0..e90474894a 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -3,7 +3,6 @@ # Distributed under the terms of the Modified BSD License. import itertools import json -import pathlib import re import warnings from fnmatch import fnmatch diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index aac97660d5..0c772d38e9 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -18,8 +18,6 @@ from tornado.httpclient import AsyncHTTPClient, HTTPClient, HTTPRequest from tornado.netutil import Resolver -from jupyter_server import DEFAULT_EVENTS_SCHEMA_PATH, JUPYTER_SERVER_EVENTS_URI - def url_path_join(*pieces): """Join components of url into a relative url From f0ba8dbf6b1ea487a7a96b56130ed453194186e0 Mon Sep 17 00:00:00 2001 From: Zach Sailer Date: Mon, 29 Aug 2022 12:24:07 -0700 Subject: [PATCH 3/4] return default event_logger in contents manager --- jupyter_server/services/contents/manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index e90474894a..089a71fc65 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -67,6 +67,7 @@ def _default_event_logger(self): logger = EventLogger() schema_path = DEFAULT_EVENTS_SCHEMA_PATH / "contents_service" / "v1.yaml" logger.register_event_schema(schema_path) + return logger def emit(self, data): """Emit event using the core event schema from Jupyter Server's Contents Manager.""" From 88907172cb016af38cb8ef59ee8264cae720d839 Mon Sep 17 00:00:00 2001 From: Zach Sailer Date: Mon, 29 Aug 2022 16:59:04 -0700 Subject: [PATCH 4/4] remove unused actions --- .../event_schemas/contents_service/v1.yaml | 15 ++++----------- .../services/contents/largefilemanager.py | 3 +++ 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/jupyter_server/event_schemas/contents_service/v1.yaml b/jupyter_server/event_schemas/contents_service/v1.yaml index 595c21cc11..2c574f7b93 100644 --- a/jupyter_server/event_schemas/contents_service/v1.yaml +++ b/jupyter_server/event_schemas/contents_service/v1.yaml @@ -48,25 +48,18 @@ properties: 1. get Get contents of a particular file, or list contents of a directory. - 2. create - Create a new directory or file at 'path'. Currently, name of the - file or directory is auto generated by the ContentsManager implementation. - - 3. save + 2. save Save a file at path with contents from the client - 4. upload - Upload a file at given path with contents from the client - - 5. rename + 3. rename Rename a file or directory from value in source_path to value in path. - 5. copy + 4. copy Copy a file or directory from value in source_path to value in path. - 6. delete + 5. delete Delete a file or empty directory at given path path: type: string diff --git a/jupyter_server/services/contents/largefilemanager.py b/jupyter_server/services/contents/largefilemanager.py index f2a6c072fd..bb66b57758 100644 --- a/jupyter_server/services/contents/largefilemanager.py +++ b/jupyter_server/services/contents/largefilemanager.py @@ -54,6 +54,7 @@ def save(self, model, path=""): # Last chunk if chunk == -1: self.run_post_save_hooks(model=model, os_path=os_path) + self.emit(data={"action": "save", "path": path}) return model else: return super().save(model, path) @@ -125,6 +126,8 @@ async def save(self, model, path=""): # Last chunk if chunk == -1: self.run_post_save_hooks(model=model, os_path=os_path) + + self.emit(data={"action": "save", "path": path}) return model else: return await super().save(model, path)