From 344bb57abc08db9a2dbe01d4fb3a53ca2ef7056a Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 00:00:23 -0700 Subject: [PATCH 01/68] Add initial eventlog hook --- MANIFEST.in | 3 ++ jupyter_server/base/handlers.py | 4 ++ .../contentsmanager-actions.json | 30 +++++++++++++++ jupyter_server/serverapp.py | 20 +++++++++- jupyter_server/services/contents/handlers.py | 37 ++++++++++++++++++- 5 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 jupyter_server/event-schemas/contentsmanager-actions.json diff --git a/MANIFEST.in b/MANIFEST.in index 9d4060fc69..b81a6d5536 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,9 @@ include setupbase.py include Dockerfile graft tools +# Event Schemas +graft jupyter_server/event-schemas + # Documentation graft docs exclude docs/\#* diff --git a/jupyter_server/base/handlers.py b/jupyter_server/base/handlers.py index 75467718c8..5185365c4d 100755 --- a/jupyter_server/base/handlers.py +++ b/jupyter_server/base/handlers.py @@ -204,6 +204,10 @@ def jinja_template_vars(self): """User-supplied values to supply to jinja templates.""" return self.settings.get('jinja_template_vars', {}) + @property + def eventlog(self): + return self.settings.get('eventlog') + #--------------------------------------------------------------- # URLs #--------------------------------------------------------------- diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json new file mode 100644 index 0000000000..242111722e --- /dev/null +++ b/jupyter_server/event-schemas/contentsmanager-actions.json @@ -0,0 +1,30 @@ +{ + "$id": "jupyter.org/contentsmanager-actions", + "version": 1, + "title": "Contents Manager activities", + "description": "Notebook Server emits this event whenever a contentsmanager action happens", + "type": "object", + "required": ["action", "path"], + "properties": { + "action": { + "enum": [ + "get", + "create", + "save", + "upload", + "rename", + "create", + "copy" + ], + "description": "Action performed by contents manager" + }, + "path": { + "type": "string", + "description": "Logical path the action was performed in" + }, + "source_path": { + "type": "string", + "description": "If action is 'copy', this specifies the source path" + } + } +} \ No newline at end of file diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 21a4e68ca9..e10bc80967 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -33,6 +33,7 @@ import warnings import webbrowser import urllib +from glob import glob from types import ModuleType from base64 import encodebytes @@ -99,6 +100,8 @@ ) from ipython_genutils import py3compat from jupyter_core.paths import jupyter_runtime_dir, jupyter_path +from jupyter_telemetry.eventlog import EventLog + from jupyter_server._sysinfo import get_sys_info from ._tz import utcnow, utcfromtimestamp @@ -279,7 +282,8 @@ def init_settings(self, jupyter_app, kernel_manager, contents_manager, server_root_dir=root_dir, jinja2_env=env, terminals_available=False, # Set later if terminals are available - serverapp=self + serverapp=self, + eventlog=jupyter_app.eventlog ) # allow custom overrides for the tornado web app. @@ -1758,6 +1762,18 @@ def _init_asyncio_patch(): # WindowsProactorEventLoopPolicy is not compatible with tornado 6 # fallback to the pre-3.8 default of Selector asyncio.set_event_loop_policy(WindowsSelectorEventLoopPolicy()) + def init_eventlog(self): + self.eventlog = EventLog(parent=self) + + schemas_glob = os.path.join( + os.path.dirname(__file__), + 'event-schemas', + '*.json' + ) + + for schema_file in glob(schemas_glob): + with open(schema_file) as f: + self.eventlog.register_schema(json.load(f)) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): @@ -1788,10 +1804,12 @@ def initialize(self, argv=None, find_extensions=True, new_httpserver=True): self.init_server_extensions() # Initialize all components of the ServerApp. self.init_logging() + self.init_eventlog() if self._dispatching: return self.init_configurables() self.init_components() + self.init_eventlog() self.init_webapp() if new_httpserver: self.init_httpserver() diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 53aff09078..7bdf369f11 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -112,6 +112,10 @@ async def get(self, path=''): )) validate_model(model, expect_content=content) self._finish_model(model, location=False) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'get', 'path': model['path'] } + ) @web.authenticated async def patch(self, path=''): @@ -120,10 +124,19 @@ async def patch(self, path=''): model = self.get_json_body() if model is None: raise web.HTTPError(400, u'JSON body missing') - model = cm.update(model, path) + self.log.info(model) + model = yield maybe_future(cm.update(model, path)) validate_model(model, expect_content=False) self._finish_model(model) + self.log.info(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + # FIXME: 'path' always has a leading slash, while model['path'] does not. + # What to do here for source_path? path munge manually? Eww + { 'action': 'rename', 'path': model['path'], 'source_path': path } + ) + @gen.coroutine async def _copy(self, copy_from, copy_to=None): """Copy a file, optionally specifying a target directory.""" self.log.info(u"Copying {copy_from} to {copy_to}".format( @@ -134,6 +147,10 @@ async def _copy(self, copy_from, copy_to=None): self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'copy', 'path': model['path'], 'source_path': copy_from } + ) async def _upload(self, model, path): """Handle upload of a new file to path""" @@ -142,6 +159,10 @@ async def _upload(self, model, path): self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'upload', 'path': model['path'] } + ) async def _new_untitled(self, path, type='', ext=''): """Create a new, empty untitled entity""" @@ -150,6 +171,11 @@ async def _new_untitled(self, path, type='', ext=''): self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + # Set path to path of created object, not directory it was created in + { 'action': 'create', 'path': model['path'] } + ) async def _save(self, model, path): """Save an existing file.""" @@ -160,6 +186,11 @@ async def _save(self, model, path): validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'save', 'path': model['path'] } + ) + @web.authenticated async def post(self, path=''): """Create a new file in the specified path. @@ -228,6 +259,10 @@ async def delete(self, path=''): cm.delete(path) self.set_status(204) self.finish() + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'delete', 'path': path } + ) class CheckpointsHandler(APIHandler): From a0f40eab99a2c407ddd13ea5e6043830946977d2 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 00:31:11 -0700 Subject: [PATCH 02/68] Install jupyter_telemetry from source --- .travis.yml | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..6346a3d78d --- /dev/null +++ b/.travis.yml @@ -0,0 +1,105 @@ +# http://travis-ci.org/#!/ipython/ipython +language: python + +cache: + directories: + - $HOME/.cache/bower + - $HOME/.cache/pip +python: + - 3.6 + + +env: + global: + - PATH=$TRAVIS_BUILD_DIR/pandoc:$PATH + matrix: + - GROUP=js/notebook + - GROUP=python + - GROUP=js/base + - GROUP=js/services + +before_install: + - pip install --upgrade pip + # Manually install jupyter_telemetry, as of https://github.com/jupyter/telemetry/pull/10 + - pip install git+https://github.com/yuvipanda/telemetry@5789321 + - pip install --upgrade setuptools wheel nose coverage codecov + - nvm install 6.9.2 + - nvm use 6.9.2 + - node --version + - npm --version + - npm upgrade -g npm + - npm install + - | + if [[ $GROUP == js* ]]; then + npm install -g casperjs@1.1.3 phantomjs-prebuilt@2.1.7 + fi + - | + if [[ $GROUP == docs ]]; then + pip install -r docs/doc-requirements.txt + pip install --upgrade pytest + fi + - | + if [[ $GROUP == selenium ]]; then + pip install --upgrade selenium pytest + # Install Webdriver backend for Firefox: + wget https://github.com/mozilla/geckodriver/releases/download/v0.19.1/geckodriver-v0.19.1-linux64.tar.gz + mkdir geckodriver + tar -xzf geckodriver-v0.19.1-linux64.tar.gz -C geckodriver + export PATH=$PATH:$PWD/geckodriver + fi + - pip install "attrs>=17.4.0" + +install: + - pip install --pre .[test] $EXTRA_PIP + - pip freeze + - wget https://github.com/jgm/pandoc/releases/download/1.19.1/pandoc-1.19.1-1-amd64.deb && sudo dpkg -i pandoc-1.19.1-1-amd64.deb + + +script: + - jupyter kernelspec list + - | + symlinks=$(find . -type l| grep -v './node_modules/' | grep -v './git-hooks') + if [[ $(echo $symlinks) ]]; then + echo "Repository contains symlinks which won't work on windows:" + echo $symlinks + echo "" + false + else + true + fi + - 'if [[ $GROUP == js* ]]; then travis_retry python -m notebook.jstest ${GROUP:3}; fi' + - 'if [[ $GROUP == python ]]; then nosetests -v --exclude-dir notebook/tests/selenium --with-coverage --cover-package=notebook notebook; fi' + - 'if [[ $GROUP == selenium ]]; then py.test -sv notebook/tests/selenium; fi' + - | + if [[ $GROUP == docs ]]; then + EXIT_STATUS=0 + make -C docs/ html || EXIT_STATUS=$? + + if [[ $TRAVIS_EVENT_TYPE == cron ]]; then + make -C docs/ linkcheck || EXIT_STATUS=$?; + fi + + pytest --nbval --current-env docs || EXIT_STATUS=$? + exit $EXIT_STATUS + fi + + +matrix: + include: + - python: 3.6 + env: + - GROUP=selenium + - JUPYTER_TEST_BROWSER=firefox + - MOZ_HEADLESS=1 + addons: + firefox: 57.0 + - python: 3.5 + env: GROUP=python + - python: 3.7 + dist: xenial + env: GROUP=python + - python: 3.6 + env: GROUP=docs + +after_success: + - codecov From 96bf2f03f7ba7dc828ef64bb7ebf9016a5f82538 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 00:57:01 -0700 Subject: [PATCH 03/68] Set up an eventlog API endpoint Bump telemetry extension commit as well --- .travis.yml | 105 ------------------- jupyter_server/services/eventlog/__init__.py | 0 jupyter_server/services/eventlog/handlers.py | 42 ++++++++ 3 files changed, 42 insertions(+), 105 deletions(-) delete mode 100644 .travis.yml create mode 100644 jupyter_server/services/eventlog/__init__.py create mode 100644 jupyter_server/services/eventlog/handlers.py diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 6346a3d78d..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,105 +0,0 @@ -# http://travis-ci.org/#!/ipython/ipython -language: python - -cache: - directories: - - $HOME/.cache/bower - - $HOME/.cache/pip -python: - - 3.6 - - -env: - global: - - PATH=$TRAVIS_BUILD_DIR/pandoc:$PATH - matrix: - - GROUP=js/notebook - - GROUP=python - - GROUP=js/base - - GROUP=js/services - -before_install: - - pip install --upgrade pip - # Manually install jupyter_telemetry, as of https://github.com/jupyter/telemetry/pull/10 - - pip install git+https://github.com/yuvipanda/telemetry@5789321 - - pip install --upgrade setuptools wheel nose coverage codecov - - nvm install 6.9.2 - - nvm use 6.9.2 - - node --version - - npm --version - - npm upgrade -g npm - - npm install - - | - if [[ $GROUP == js* ]]; then - npm install -g casperjs@1.1.3 phantomjs-prebuilt@2.1.7 - fi - - | - if [[ $GROUP == docs ]]; then - pip install -r docs/doc-requirements.txt - pip install --upgrade pytest - fi - - | - if [[ $GROUP == selenium ]]; then - pip install --upgrade selenium pytest - # Install Webdriver backend for Firefox: - wget https://github.com/mozilla/geckodriver/releases/download/v0.19.1/geckodriver-v0.19.1-linux64.tar.gz - mkdir geckodriver - tar -xzf geckodriver-v0.19.1-linux64.tar.gz -C geckodriver - export PATH=$PATH:$PWD/geckodriver - fi - - pip install "attrs>=17.4.0" - -install: - - pip install --pre .[test] $EXTRA_PIP - - pip freeze - - wget https://github.com/jgm/pandoc/releases/download/1.19.1/pandoc-1.19.1-1-amd64.deb && sudo dpkg -i pandoc-1.19.1-1-amd64.deb - - -script: - - jupyter kernelspec list - - | - symlinks=$(find . -type l| grep -v './node_modules/' | grep -v './git-hooks') - if [[ $(echo $symlinks) ]]; then - echo "Repository contains symlinks which won't work on windows:" - echo $symlinks - echo "" - false - else - true - fi - - 'if [[ $GROUP == js* ]]; then travis_retry python -m notebook.jstest ${GROUP:3}; fi' - - 'if [[ $GROUP == python ]]; then nosetests -v --exclude-dir notebook/tests/selenium --with-coverage --cover-package=notebook notebook; fi' - - 'if [[ $GROUP == selenium ]]; then py.test -sv notebook/tests/selenium; fi' - - | - if [[ $GROUP == docs ]]; then - EXIT_STATUS=0 - make -C docs/ html || EXIT_STATUS=$? - - if [[ $TRAVIS_EVENT_TYPE == cron ]]; then - make -C docs/ linkcheck || EXIT_STATUS=$?; - fi - - pytest --nbval --current-env docs || EXIT_STATUS=$? - exit $EXIT_STATUS - fi - - -matrix: - include: - - python: 3.6 - env: - - GROUP=selenium - - JUPYTER_TEST_BROWSER=firefox - - MOZ_HEADLESS=1 - addons: - firefox: 57.0 - - python: 3.5 - env: GROUP=python - - python: 3.7 - dist: xenial - env: GROUP=python - - python: 3.6 - env: GROUP=docs - -after_success: - - codecov diff --git a/jupyter_server/services/eventlog/__init__.py b/jupyter_server/services/eventlog/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py new file mode 100644 index 0000000000..687b2591cc --- /dev/null +++ b/jupyter_server/services/eventlog/handlers.py @@ -0,0 +1,42 @@ +import json + +from tornado import web + +from notebook.utils import url_path_join +from notebook.base.handlers import APIHandler, json_errors +from jupyter_telemetry.eventlog import EventLog + + +class EventLoggingHandler(APIHandler): + """ + A handler that receives and stores telemetry data from the client. + """ + @json_errors + @web.authenticated + def post(self, *args, **kwargs): + try: + # Parse the data from the request body + raw_event = json.loads(self.request.body.strip().decode()) + except Exception as e: + raise web.HTTPError(400, str(e)) + + required_fields = {'schema', 'version', 'event'} + for rf in required_fields: + if rf not in raw_event: + raise web.HTTPError(400, f'{rf} is a required field') + + schema_name = raw_event['schema'] + version = raw_event['version'] + event = raw_event['event'] + + # Profile, and move to a background thread if this is problematic + # FIXME: Return a more appropriate error response if validation fails + self.eventlog.record_event(schema_name, version, event) + + self.set_status(204) + self.finish() + + +default_handlers = [ + (r"/api/eventlog", EventLoggingHandler), +] \ No newline at end of file From 06b91e0c34050bfab6072b829584a63e15fff60b Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 13:00:59 -0700 Subject: [PATCH 04/68] Use different naming convention & add test for it Experiments here informed the schema naming recommendations in https://github.com/jupyter/telemetry/pull/11 --- .../contentsmanager-actions.json | 2 +- .../contentsmanager-actions/v1.json | 30 +++++++++++++++++++ jupyter_server/serverapp.py | 17 +++++------ jupyter_server/services/contents/handlers.py | 22 ++++++++------ jupyter_server/utils.py | 9 ++++++ 5 files changed, 61 insertions(+), 19 deletions(-) create mode 100644 jupyter_server/event-schemas/contentsmanager-actions/v1.json diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json index 242111722e..5da6d68b88 100644 --- a/jupyter_server/event-schemas/contentsmanager-actions.json +++ b/jupyter_server/event-schemas/contentsmanager-actions.json @@ -1,5 +1,5 @@ { - "$id": "jupyter.org/contentsmanager-actions", + "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", "version": 1, "title": "Contents Manager activities", "description": "Notebook Server emits this event whenever a contentsmanager action happens", diff --git a/jupyter_server/event-schemas/contentsmanager-actions/v1.json b/jupyter_server/event-schemas/contentsmanager-actions/v1.json new file mode 100644 index 0000000000..5da6d68b88 --- /dev/null +++ b/jupyter_server/event-schemas/contentsmanager-actions/v1.json @@ -0,0 +1,30 @@ +{ + "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", + "version": 1, + "title": "Contents Manager activities", + "description": "Notebook Server emits this event whenever a contentsmanager action happens", + "type": "object", + "required": ["action", "path"], + "properties": { + "action": { + "enum": [ + "get", + "create", + "save", + "upload", + "rename", + "create", + "copy" + ], + "description": "Action performed by contents manager" + }, + "path": { + "type": "string", + "description": "Logical path the action was performed in" + }, + "source_path": { + "type": "string", + "description": "If action is 'copy', this specifies the source path" + } + } +} \ No newline at end of file diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index e10bc80967..3e7e561b49 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -1765,15 +1765,14 @@ def _init_asyncio_patch(): def init_eventlog(self): self.eventlog = EventLog(parent=self) - schemas_glob = os.path.join( - os.path.dirname(__file__), - 'event-schemas', - '*.json' - ) - - for schema_file in glob(schemas_glob): - with open(schema_file) as f: - self.eventlog.register_schema(json.load(f)) + event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') + # Recursively register all .json files under event-schemas + for dirname, _, files in os.walk(event_schemas_dir): + for file in files: + if file.endswith('.json'): + file_path = os.path.join(dirname, file) + with open(file_path) as f: + self.eventlog.register_schema(json.load(f)) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 7bdf369f11..d80ba9b768 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -9,10 +9,14 @@ import json from tornado import web - -from jupyter_server.utils import url_path_join, url_escape, ensure_async from jupyter_client.jsonutil import date_default +from jupyter_server.utils import ( + url_path_join, + url_escape, + ensure_async, + eventlogging_schema_fqn +) from jupyter_server.base.handlers import ( JupyterHandler, APIHandler, path_regex, ) @@ -113,7 +117,7 @@ async def get(self, path=''): validate_model(model, expect_content=content) self._finish_model(model, location=False) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'get', 'path': model['path'] } ) @@ -130,7 +134,7 @@ async def patch(self, path=''): self._finish_model(model) self.log.info(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, # FIXME: 'path' always has a leading slash, while model['path'] does not. # What to do here for source_path? path munge manually? Eww { 'action': 'rename', 'path': model['path'], 'source_path': path } @@ -148,7 +152,7 @@ async def _copy(self, copy_from, copy_to=None): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'copy', 'path': model['path'], 'source_path': copy_from } ) @@ -160,7 +164,7 @@ async def _upload(self, model, path): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'upload', 'path': model['path'] } ) @@ -172,7 +176,7 @@ async def _new_untitled(self, path, type='', ext=''): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, # Set path to path of created object, not directory it was created in { 'action': 'create', 'path': model['path'] } ) @@ -187,7 +191,7 @@ async def _save(self, model, path): self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'save', 'path': model['path'] } ) @@ -260,7 +264,7 @@ async def delete(self, path=''): self.set_status(204) self.finish() self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'delete', 'path': path } ) diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 42a6ae9278..54e112f97b 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -440,3 +440,12 @@ def wrapped(): result = asyncio.ensure_future(maybe_async) return result return wrapped() + + +def eventlogging_schema_fqn(name): + """ + Return fully qualified event schema name + + Matches convention for this particular repo + """ + return 'eventlogging.jupyter.org/notebook/{}'.format(name) From 716ff1b3ef6a4f7da446ff191a89e1e93047a615 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 15:07:45 -0700 Subject: [PATCH 05/68] Don't use f-strings python 3.5 is still supported --- jupyter_server/services/eventlog/handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py index 687b2591cc..4665e43e8b 100644 --- a/jupyter_server/services/eventlog/handlers.py +++ b/jupyter_server/services/eventlog/handlers.py @@ -23,7 +23,7 @@ def post(self, *args, **kwargs): required_fields = {'schema', 'version', 'event'} for rf in required_fields: if rf not in raw_event: - raise web.HTTPError(400, f'{rf} is a required field') + raise web.HTTPError(400, '{} is a required field'.format(rf)) schema_name = raw_event['schema'] version = raw_event['version'] From 8e122fcaaa4d348a4edc9c1ce780742bff788d7f Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Tue, 9 Jul 2019 08:08:04 -0700 Subject: [PATCH 06/68] Derive JSON Schema files from YAML files This lets us add detailed documentation & description to our schemas, which is very hard to do in JSON. We also add a lot of documentation to the one JSON schema we have --- jupyter_server/event-schemas/README.md | 19 +++++ .../contentsmanager-actions.json | 17 ++-- jupyter_server/event-schemas/generate-json.py | 39 +++++++++ jupyter_server/event-schemas/v1.yaml | 79 +++++++++++++++++++ 4 files changed, 147 insertions(+), 7 deletions(-) create mode 100644 jupyter_server/event-schemas/README.md create mode 100755 jupyter_server/event-schemas/generate-json.py create mode 100644 jupyter_server/event-schemas/v1.yaml diff --git a/jupyter_server/event-schemas/README.md b/jupyter_server/event-schemas/README.md new file mode 100644 index 0000000000..541a9b0398 --- /dev/null +++ b/jupyter_server/event-schemas/README.md @@ -0,0 +1,19 @@ +# Event Schemas + +## Generating .json files + +Event Schemas are written in a human readable `.yaml` format. +This is primarily to get multi-line strings in our descriptions, +as documentation is very important. + +Every time you modify a `.yaml` file, you should run the following +commands. + +```bash +./generate-json.py +``` + +This needs the `ruamel.yaml` python package installed. + +Hopefully, this is extremely temporary, and we can just use YAML +with jupyter_telemetry. \ No newline at end of file diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json index 5da6d68b88..065f1d5c2f 100644 --- a/jupyter_server/event-schemas/contentsmanager-actions.json +++ b/jupyter_server/event-schemas/contentsmanager-actions.json @@ -2,9 +2,12 @@ "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", "version": 1, "title": "Contents Manager activities", - "description": "Notebook Server emits this event whenever a contentsmanager action happens", + "description": "Record actions on files via the ContentsManager REST API.\n\nThe notebook ContentsManager REST API is used by all frontends to retreive,\nsave, list, delete and perform other actions on notebooks, directories,\nand other files through the UI. This is pluggable - the default acts on\nthe file system, but can be replaced with a different ContentsManager\nimplementation - to work on S3, Postgres, other object stores, etc.\nThe events get recorded regardless of the ContentsManager implementation\nbeing used.\n\nLimitations:\n\n1. This does not record all filesystem access, just the ones that happen\n explicitly via the notebook server's REST API. Users can (and often do)\n trivially access the filesystem in many other ways (such as `open()` calls\n in their code), so this is usually never a complete record.\n2. As with all events recorded by the notebook server, users most likely\n have the ability to modify the code of the notebook server. Unless other\n security measures are in place, these events should be treated as user\n controlled and not used in high security areas.\n3. Events are only recorded when an action succeeds.\n", "type": "object", - "required": ["action", "path"], + "required": [ + "action", + "path" + ], "properties": { "action": { "enum": [ @@ -13,18 +16,18 @@ "save", "upload", "rename", - "create", - "copy" + "copy", + "delete" ], - "description": "Action performed by contents manager" + "description": "Action performed by the ContentsManager API.\n\nThis is a required field.\n\nPossible values:\n\n1. get\n Get contents of a particular file, or list contents of a directory.\n\n2. create\n Create a new directory or file at 'path'. Currently, name of the\n file or directory is auto generated by the ContentsManager implementation.\n\n3. save\n Save a file at path with contents from the client\n\n4. upload\n Upload a file at given path with contents from the client\n\n5. rename\n Rename a file or directory from value in source_path to\n value in path.\n\n5. copy\n Copy a file or directory from value in source_path to\n value in path.\n\n6. delete\n Delete a file or empty directory at given path\n" }, "path": { "type": "string", - "description": "Logical path the action was performed in" + "description": "Logical path on which the operation was performed.\n\nThis is a required field.\n" }, "source_path": { "type": "string", - "description": "If action is 'copy', this specifies the source path" + "description": "Source path of an operation when action is 'copy' or 'rename'" } } } \ No newline at end of file diff --git a/jupyter_server/event-schemas/generate-json.py b/jupyter_server/event-schemas/generate-json.py new file mode 100755 index 0000000000..a39fa0610b --- /dev/null +++ b/jupyter_server/event-schemas/generate-json.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import argparse +import json +import os +import jsonschema +from ruamel.yaml import YAML + +from jupyter_telemetry.eventlog import EventLog + +yaml = YAML(typ='safe') + +def main(): + argparser = argparse.ArgumentParser() + argparser.add_argument( + 'directory', + help='Directory with Schema .yaml files' + ) + + args = argparser.parse_args() + + el = EventLog() + for dirname, _, files in os.walk(args.directory): + for file in files: + if not file.endswith('.yaml'): + continue + yaml_path = os.path.join(dirname, file) + print('Processing', yaml_path) + with open(yaml_path) as f: + schema = yaml.load(f) + + # validate schema + el.register_schema(schema) + + json_path = os.path.join(dirname, os.path.splitext(file)[0] + '.json') + with open(json_path, 'w') as f: + json.dump(schema, f, indent=4) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/jupyter_server/event-schemas/v1.yaml b/jupyter_server/event-schemas/v1.yaml new file mode 100644 index 0000000000..3d7e8f2fe9 --- /dev/null +++ b/jupyter_server/event-schemas/v1.yaml @@ -0,0 +1,79 @@ +"$id": eventlogging.jupyter.org/notebook/contentsmanager-actions +version: 1 +title: Contents Manager activities +description: | + Record actions on files via the ContentsManager REST API. + + The notebook ContentsManager REST API is used by all frontends to retreive, + save, list, delete and perform other actions on notebooks, directories, + and other files through the UI. This is pluggable - the default acts on + the file system, but can be replaced with a different ContentsManager + implementation - to work on S3, Postgres, other object stores, etc. + The events get recorded regardless of the ContentsManager implementation + being used. + + Limitations: + + 1. This does not record all filesystem access, just the ones that happen + explicitly via the notebook server's REST API. Users can (and often do) + trivially access the filesystem in many other ways (such as `open()` calls + in their code), so this is usually never a complete record. + 2. As with all events recorded by the notebook server, users most likely + have the ability to modify the code of the notebook server. Unless other + security measures are in place, these events should be treated as user + controlled and not used in high security areas. + 3. Events are only recorded when an action succeeds. +type: object +required: +- action +- path +properties: + action: + enum: + - get + - create + - save + - upload + - rename + - copy + - delete + description: | + Action performed by the ContentsManager API. + + This is a required field. + + Possible values: + + 1. get + Get contents of a particular file, or list contents of a directory. + + 2. create + Create a new directory or file at 'path'. Currently, name of the + file or directory is auto generated by the ContentsManager implementation. + + 3. save + Save a file at path with contents from the client + + 4. upload + Upload a file at given path with contents from the client + + 5. rename + Rename a file or directory from value in source_path to + value in path. + + 5. copy + Copy a file or directory from value in source_path to + value in path. + + 6. delete + Delete a file or empty directory at given path + path: + type: string + description: | + Logical path on which the operation was performed. + + This is a required field. + source_path: + type: string + description: | + Source path of an operation when action is 'copy' or 'rename' \ No newline at end of file From f9a0dfb6c3ad69b541de65e053b5354b17d21d1f Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Tue, 9 Jul 2019 11:10:07 -0700 Subject: [PATCH 07/68] Keep event schemas in YAML Primary advantage over JSON is that we can do multi-line strings for more detailed documentation. We also expect humans to read & write these, so YAML is a much better format there. All JSON is also valid YAML, so that helps. Depends on https://github.com/jupyter/telemetry/pull/13 --- jupyter_server/serverapp.py | 7 ++-- notebook/tests/test_eventlog.py | 57 +++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 notebook/tests/test_eventlog.py diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 3e7e561b49..76896b03d3 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -33,6 +33,7 @@ import warnings import webbrowser import urllib +from ruamel.yaml import YAML from glob import glob from types import ModuleType @@ -1765,14 +1766,14 @@ def _init_asyncio_patch(): def init_eventlog(self): self.eventlog = EventLog(parent=self) + yaml = YAML(typ='safe') event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') # Recursively register all .json files under event-schemas for dirname, _, files in os.walk(event_schemas_dir): for file in files: - if file.endswith('.json'): + if file.endswith('.yaml'): file_path = os.path.join(dirname, file) - with open(file_path) as f: - self.eventlog.register_schema(json.load(f)) + self.eventlog.register_schema_file(file_path) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): diff --git a/notebook/tests/test_eventlog.py b/notebook/tests/test_eventlog.py new file mode 100644 index 0000000000..c2f74a59c9 --- /dev/null +++ b/notebook/tests/test_eventlog.py @@ -0,0 +1,57 @@ +import os +import re +import jsonschema +from ruamel.yaml import YAML +from notebook.notebookapp import NotebookApp +from notebook.utils import eventlogging_schema_fqn +from unittest import TestCase + +yaml = YAML(typ='safe') + +class RegisteredSchemasTestCase(TestCase): + def schema_files(self): + event_schemas_dir = os.path.realpath( + os.path.join(os.path.dirname(__file__), '..', 'event-schemas') + ) + schemas = [] + for dirname, _, files in os.walk(event_schemas_dir): + for file in files: + if file.endswith('.yaml'): + yield os.path.join(dirname, file) + + def test_eventlogging_schema_fqn(self): + self.assertEqual( + eventlogging_schema_fqn('test'), + 'eventlogging.jupyter.org/notebook/test' + ) + def test_valid_schemas(self): + """ + All schemas must be valid json schemas + """ + for schema_file in self.schema_files(): + with open(schema_file) as f: + jsonschema.Draft7Validator.check_schema(yaml.load(f)) + + def test_schema_conventions(self): + """ + Test schema naming convention for this repo. + + 1. All schemas should be under event-schamas/{name}/v{version}.yaml + 2. Schema id should be eventlogging.jupyter.org/notebook/{name} + 3. Schema version should match version in file + """ + for schema_file in self.schema_files(): + filename = os.path.basename(schema_file) + match = re.match('v(\d+)\.yaml', filename) + # All schema locations must match the following pattern + # schema-name/v(version).yaml + self.assertIsNotNone(match) + + with open(schema_file) as f: + schema = yaml.load(f) + + self.assertEqual(schema['$id'], eventlogging_schema_fqn( + os.path.basename(os.path.dirname(schema_file)) + )) + self.assertEqual(schema['version'], int(match.groups()[0])) + \ No newline at end of file From c7428e8aa778b8e2352b88af3c98fbb424cfac2e Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Tue, 9 Jul 2019 16:34:36 -0700 Subject: [PATCH 08/68] Depend on the jupyter_telemetry package We made a v0.0.1 release! --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2697fad271..e402e71669 100755 --- a/setup.py +++ b/setup.py @@ -94,7 +94,8 @@ 'Send2Trash', 'terminado>=0.8.3', 'prometheus_client', - "pywin32>=1.0 ; sys_platform == 'win32'" + "pywin32>=1.0 ; sys_platform == 'win32'", + 'jupyter_telemetry' ], extras_require = { 'test': ['nose', 'coverage', 'requests', 'nose_warnings_filters', From 9437e88353d515b1b39a210321c92cb34667dc8c Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 12:02:13 -0700 Subject: [PATCH 09/68] read schemas from new utils function --- jupyter_server/serverapp.py | 21 +++++++++++---------- jupyter_server/utils.py | 14 ++++++++++++++ notebook/tests/test_eventlog.py | 20 ++++++-------------- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 76896b03d3..18f28967f0 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -106,7 +106,14 @@ from jupyter_server._sysinfo import get_sys_info from ._tz import utcnow, utcfromtimestamp -from .utils import url_path_join, check_pid, url_escape, urljoin, pathname2url +from .utils import ( + url_path_join, + check_pid, + url_escape, + urljoin, + pathname2url, + get_schema_files +) from jupyter_server.extension.serverextension import ( ServerExtensionApp, @@ -1765,15 +1772,9 @@ def _init_asyncio_patch(): asyncio.set_event_loop_policy(WindowsSelectorEventLoopPolicy()) def init_eventlog(self): self.eventlog = EventLog(parent=self) - - yaml = YAML(typ='safe') - event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') - # Recursively register all .json files under event-schemas - for dirname, _, files in os.walk(event_schemas_dir): - for file in files: - if file.endswith('.yaml'): - file_path = os.path.join(dirname, file) - self.eventlog.register_schema_file(file_path) + # Register schemas for notebook services. + for file_path in get_schema_files(): + self.eventlog.register_schema_file(file_path) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 54e112f97b..55389f037a 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -449,3 +449,17 @@ def eventlogging_schema_fqn(name): Matches convention for this particular repo """ return 'eventlogging.jupyter.org/notebook/{}'.format(name) + + +def get_schema_files(): + """Yield a sequence of event schemas for jupyter services.""" + # Hardcode path to event schemas directory. + event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') + schema_files = [] + # Recursively register all .json files under event-schemas + for dirname, _, files in os.walk(event_schemas_dir): + for file in files: + if file.endswith('.yaml'): + file_path = os.path.join(dirname, file) + schema_files.append(file_path) + yield schema_files diff --git a/notebook/tests/test_eventlog.py b/notebook/tests/test_eventlog.py index c2f74a59c9..994181b73e 100644 --- a/notebook/tests/test_eventlog.py +++ b/notebook/tests/test_eventlog.py @@ -3,32 +3,25 @@ import jsonschema from ruamel.yaml import YAML from notebook.notebookapp import NotebookApp -from notebook.utils import eventlogging_schema_fqn +from notebook.utils import eventlogging_schema_fqn, get_schema_files from unittest import TestCase yaml = YAML(typ='safe') + class RegisteredSchemasTestCase(TestCase): - def schema_files(self): - event_schemas_dir = os.path.realpath( - os.path.join(os.path.dirname(__file__), '..', 'event-schemas') - ) - schemas = [] - for dirname, _, files in os.walk(event_schemas_dir): - for file in files: - if file.endswith('.yaml'): - yield os.path.join(dirname, file) def test_eventlogging_schema_fqn(self): self.assertEqual( eventlogging_schema_fqn('test'), 'eventlogging.jupyter.org/notebook/test' ) + def test_valid_schemas(self): """ All schemas must be valid json schemas """ - for schema_file in self.schema_files(): + for schema_file in get_schema_files(): with open(schema_file) as f: jsonschema.Draft7Validator.check_schema(yaml.load(f)) @@ -40,7 +33,7 @@ def test_schema_conventions(self): 2. Schema id should be eventlogging.jupyter.org/notebook/{name} 3. Schema version should match version in file """ - for schema_file in self.schema_files(): + for schema_file in get_schema_files(): filename = os.path.basename(schema_file) match = re.match('v(\d+)\.yaml', filename) # All schema locations must match the following pattern @@ -53,5 +46,4 @@ def test_schema_conventions(self): self.assertEqual(schema['$id'], eventlogging_schema_fqn( os.path.basename(os.path.dirname(schema_file)) )) - self.assertEqual(schema['version'], int(match.groups()[0])) - \ No newline at end of file + self.assertEqual(schema['version'], int(match.groups()[0])) \ No newline at end of file From 6e3c80c622352fb007c89315fa5063e5e71b9241 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 14:56:24 -0700 Subject: [PATCH 10/68] Add fix for tables in RTD theme sphinx docs. Solution came from https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html --- docs/source/_static/theme_overrides.css | 13 +++ docs/source/conf.py | 127 +++++++++++++++++++++++- jupyter_server/utils.py | 5 +- 3 files changed, 140 insertions(+), 5 deletions(-) create mode 100644 docs/source/_static/theme_overrides.css diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css new file mode 100644 index 0000000000..63ee6cc74c --- /dev/null +++ b/docs/source/_static/theme_overrides.css @@ -0,0 +1,13 @@ +/* override table width restrictions */ +@media screen and (min-width: 767px) { + + .wy-table-responsive table td { + /* !important prevents the common CSS stylesheets from overriding + this as on RTD they are loaded after this stylesheet */ + white-space: normal !important; + } + + .wy-table-responsive { + overflow: visible !important; + } +} diff --git a/docs/source/conf.py b/docs/source/conf.py index e105e82d40..fc538a1613 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -70,8 +70,7 @@ 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting', 'sphinxcontrib_github_alt', - 'sphinxcontrib.openapi', - 'sphinxemoji.sphinxemoji' + 'sphinx-jsonschema' ] # Add any paths that contain templates here, relative to this directory. @@ -208,6 +207,12 @@ # since it is needed to properly generate _static in the build directory html_static_path = ['_static'] +html_context = { + 'css_files': [ + '_static/theme_overrides.css', # override wide tables in RTD theme + ], + } + # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. @@ -370,4 +375,122 @@ spelling_word_list_filename='spelling_wordlist.txt' # import before any doc is built, so _ is guaranteed to be injected +<<<<<<< HEAD import jupyter_server.transutils +======= +import notebook.transutils + +# -- Autogenerate documentation for event schemas ------------------ + +from notebook.utils import get_schema_files + +# Build a dictionary that describes the event schema table of contents. +# toc = { +# schema_name : { +# src: # file path to schema +# dst: # file path to documentation +# ver: # latest version of schema +# } +# } +toc = {} + +# Iterate over schema directories and generate documentation. +# Generates documentation for the latest version of each schema. +for file_path in get_schema_files(): + # Make path relative. + file_path = os.path.relpath(file_path) + # Break apart path to its pieces + pieces = file_path.split(os.path.sep) + # Schema version. Outputs as a string that looks like "v#" + schema_ver = os.path.splitext(pieces[-1])[0] + # Strip "v" and make version an integer. + schema_int = int(schema_ver[1:]) + # Schema name. + schema_name = pieces[-2] + + # Add this version file to schema_dir + src = '../' + file_path + dst = os.path.join('events', os.path.join(schema_name + '.rst')) + + if schema_name in toc: + # If this is a later version, replace the old version. + if schema_int > toc[schema_name]['ver']: + toc[schema_name] = { + 'src': src, + 'dst': dst, + 'ver': schema_int + } + else: + toc[schema_name] = { + 'src': src, + 'dst': dst, + 'ver': schema_int + } + +# Write schema documentation +for schema_name, x in toc.items(): + with open(dst, 'w') as f: + f.write('.. jsonschema:: {}'.format(src)) + +# Write table of contents +events_index = """ +.. toctree:: + :maxdepth: 1 + :glob: + +""" + +with open(os.path.join('events', 'index.rst'), 'w') as f: + f.write(events_index) + for item in toc.keys(): + f.write(' {}'.format(item)) + + + + + + + + + + +# # create a directory for this schema if it doesn't exist: +# schema_dir = os.path.join('events', schema_name) +# if not os.path.exists(schema_dir): +# os.makedirs(schema_dir) + + +# toc[schema_name] + + + +# with open(dst, 'w') as f: +# f.write('.. jsonschema:: {}'.format(src)) + + + + + + + +# toc.append(schema_name) + + +# events_index = """ +# .. toctree:: +# :maxdepth: 1 +# :glob: + +# """ + + +# with open(os.path.join('events', 'index.rst'), 'w') as f: +# f.write(events_index) +# for item in set(toc): +# f.write(' {}/*'.format(item)) + + + + + +>>>>>>> 4fb0a0443... Add fix for tables in RTD theme sphinx docs. diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 55389f037a..8fc6e89479 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -455,11 +455,10 @@ def get_schema_files(): """Yield a sequence of event schemas for jupyter services.""" # Hardcode path to event schemas directory. event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') - schema_files = [] + #schema_files = [] # Recursively register all .json files under event-schemas for dirname, _, files in os.walk(event_schemas_dir): for file in files: if file.endswith('.yaml'): file_path = os.path.join(dirname, file) - schema_files.append(file_path) - yield schema_files + yield file_path From 4035fd557e99a7549e1aa53e5b7874f83a2587e3 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:02:22 -0700 Subject: [PATCH 11/68] add event schema auto-documentation to jupyter notebook docs --- .gitignore | 1 + docs/environment.yml | 3 +- docs/source/conf.py | 118 --------------------------------------- docs/source/eventlog.rst | 47 ++++++++++++++++ 4 files changed, 50 insertions(+), 119 deletions(-) create mode 100644 docs/source/eventlog.rst diff --git a/.gitignore b/.gitignore index d9fb5e0c6c..a69d2eeee6 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ docs/man/*.gz docs/source/api/generated docs/source/config.rst docs/gh-pages +docs/source/events notebook/i18n/*/LC_MESSAGES/*.mo notebook/i18n/*/LC_MESSAGES/nbjs.json notebook/static/components diff --git a/docs/environment.yml b/docs/environment.yml index 5d77bc7bb4..1d9c9d3eb8 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -13,4 +13,5 @@ dependencies: - sphinxcontrib_github_alt - sphinxcontrib-openapi - sphinxemoji - - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master \ No newline at end of file + - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master + - sphinx-jsonschema diff --git a/docs/source/conf.py b/docs/source/conf.py index fc538a1613..41b089cc07 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -375,122 +375,4 @@ spelling_word_list_filename='spelling_wordlist.txt' # import before any doc is built, so _ is guaranteed to be injected -<<<<<<< HEAD import jupyter_server.transutils -======= -import notebook.transutils - -# -- Autogenerate documentation for event schemas ------------------ - -from notebook.utils import get_schema_files - -# Build a dictionary that describes the event schema table of contents. -# toc = { -# schema_name : { -# src: # file path to schema -# dst: # file path to documentation -# ver: # latest version of schema -# } -# } -toc = {} - -# Iterate over schema directories and generate documentation. -# Generates documentation for the latest version of each schema. -for file_path in get_schema_files(): - # Make path relative. - file_path = os.path.relpath(file_path) - # Break apart path to its pieces - pieces = file_path.split(os.path.sep) - # Schema version. Outputs as a string that looks like "v#" - schema_ver = os.path.splitext(pieces[-1])[0] - # Strip "v" and make version an integer. - schema_int = int(schema_ver[1:]) - # Schema name. - schema_name = pieces[-2] - - # Add this version file to schema_dir - src = '../' + file_path - dst = os.path.join('events', os.path.join(schema_name + '.rst')) - - if schema_name in toc: - # If this is a later version, replace the old version. - if schema_int > toc[schema_name]['ver']: - toc[schema_name] = { - 'src': src, - 'dst': dst, - 'ver': schema_int - } - else: - toc[schema_name] = { - 'src': src, - 'dst': dst, - 'ver': schema_int - } - -# Write schema documentation -for schema_name, x in toc.items(): - with open(dst, 'w') as f: - f.write('.. jsonschema:: {}'.format(src)) - -# Write table of contents -events_index = """ -.. toctree:: - :maxdepth: 1 - :glob: - -""" - -with open(os.path.join('events', 'index.rst'), 'w') as f: - f.write(events_index) - for item in toc.keys(): - f.write(' {}'.format(item)) - - - - - - - - - - -# # create a directory for this schema if it doesn't exist: -# schema_dir = os.path.join('events', schema_name) -# if not os.path.exists(schema_dir): -# os.makedirs(schema_dir) - - -# toc[schema_name] - - - -# with open(dst, 'w') as f: -# f.write('.. jsonschema:: {}'.format(src)) - - - - - - - -# toc.append(schema_name) - - -# events_index = """ -# .. toctree:: -# :maxdepth: 1 -# :glob: - -# """ - - -# with open(os.path.join('events', 'index.rst'), 'w') as f: -# f.write(events_index) -# for item in set(toc): -# f.write(' {}/*'.format(item)) - - - - - ->>>>>>> 4fb0a0443... Add fix for tables in RTD theme sphinx docs. diff --git a/docs/source/eventlog.rst b/docs/source/eventlog.rst new file mode 100644 index 0000000000..fd77a1b9c8 --- /dev/null +++ b/docs/source/eventlog.rst @@ -0,0 +1,47 @@ +Eventlogging and Telemetry +========================== + +The Notebook Server can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that the Notebook Server emits are defined by `JSON schemas`_ listed below_ emitted as JSON data, defined and validated by the JSON schemas listed below. + + +.. _logging: https://docs.python.org/3/library/logging.html +.. _`Telemetry System`: https://github.com/jupyter/telemetry +.. _`JSON schemas`: https://json-schema.org/ + +How to emit events +------------------ + +Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. + + +To begin recording events, you'll need to set two configurations: + + 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to + 2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here. + +Here's a basic example for emitting events from the `contents` service: + +.. code-block:: + + import logging + + c.EventLog.handlers = [ + logging.FileHandler('event.log'), + ] + + c.EventLog.allowed_schemas = [ + 'hub.jupyter.org/server-action' + ] + +The output is a file, ``"event.log"``, with events recorded as JSON data. + + +.. _below: + +Event schemas +------------- + +.. toctree:: + :maxdepth: 2 + + events/index From 23d50a38b16512503acbeee1204d2e245c0af0ac Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:14:04 -0700 Subject: [PATCH 12/68] format paths in recorded events --- jupyter_server/services/contents/handlers.py | 30 +++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index d80ba9b768..85065b21cb 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -117,7 +117,8 @@ async def get(self, path=''): validate_model(model, expect_content=content) self._finish_model(model, location=False) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, + eventlogging_schema_fqn('contentsmanager-actions'), + 1, { 'action': 'get', 'path': model['path'] } ) @@ -134,10 +135,13 @@ async def patch(self, path=''): self._finish_model(model) self.log.info(model) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, - # FIXME: 'path' always has a leading slash, while model['path'] does not. - # What to do here for source_path? path munge manually? Eww - { 'action': 'rename', 'path': model['path'], 'source_path': path } + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { + 'action': 'rename', + 'path': model['path'], + 'source_path': path.lstrip(os.path.sep) + } ) @gen.coroutine @@ -152,8 +156,13 @@ async def _copy(self, copy_from, copy_to=None): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, - { 'action': 'copy', 'path': model['path'], 'source_path': copy_from } + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { + 'action': 'copy', + 'path': model['path'], + 'source_path': copy_from.lstrip(os.path.sep) + } ) async def _upload(self, model, path): @@ -164,7 +173,8 @@ async def _upload(self, model, path): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, + eventlogging_schema_fqn('contentsmanager-actions'), + 1, { 'action': 'upload', 'path': model['path'] } ) @@ -189,9 +199,9 @@ async def _save(self, model, path): model = self.contents_manager.save(model, path) validate_model(model, expect_content=False) self._finish_model(model) - self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, + eventlogging_schema_fqn('contentsmanager-actions'), + 1, { 'action': 'save', 'path': model['path'] } ) From 3c94970d5be68fd5b8f6bab0f5c2ae3311843f00 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:37:03 -0700 Subject: [PATCH 13/68] add documentation for eventlog endpoint --- docs/source/eventlog.rst | 24 ++++++++++++++++---- jupyter_server/services/eventlog/handlers.py | 11 +++++---- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/docs/source/eventlog.rst b/docs/source/eventlog.rst index fd77a1b9c8..df5c153fb7 100644 --- a/docs/source/eventlog.rst +++ b/docs/source/eventlog.rst @@ -8,12 +8,11 @@ The Notebook Server can be configured to record structured events from a running .. _`Telemetry System`: https://github.com/jupyter/telemetry .. _`JSON schemas`: https://json-schema.org/ -How to emit events ------------------- +Emitting Server Events +---------------------- Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. - To begin recording events, you'll need to set two configurations: 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to @@ -35,11 +34,26 @@ Here's a basic example for emitting events from the `contents` service: The output is a file, ``"event.log"``, with events recorded as JSON data. +`eventlog` endpoint +------------------- + +The Notebook Server provides a public REST endpoint for external applications to validate and log events +through the Server's Event Log. + +To log events, send a `POST` request to the `/api/eventlog` endpoint. The body of the request should be a +JSON blog and is required to have the follow keys: + + 1. `'schema'` : the event's schema ID. + 2. `'version'` : the version of the event's schema. + 3. `'event'` : the event data in JSON format. + +Events that are validated by this endpoint must have their schema listed in the `allowed_schemas` trait listed above. .. _below: -Event schemas -------------- + +Server Event schemas +-------=======------ .. toctree:: :maxdepth: 2 diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py index 4665e43e8b..b27dd87304 100644 --- a/jupyter_server/services/eventlog/handlers.py +++ b/jupyter_server/services/eventlog/handlers.py @@ -29,14 +29,15 @@ def post(self, *args, **kwargs): version = raw_event['version'] event = raw_event['event'] - # Profile, and move to a background thread if this is problematic - # FIXME: Return a more appropriate error response if validation fails - self.eventlog.record_event(schema_name, version, event) - + # Profile, may need to move to a background thread if this is problematic + try: + self.eventlog.record_event(schema_name, version, event) + except: + raise web.HTTPError(500, "Event could not be validated.") + self.set_status(204) self.finish() - default_handlers = [ (r"/api/eventlog", EventLoggingHandler), ] \ No newline at end of file From e76c91b3ac264b20bf2aa6d7d468a1d8c3999fc2 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:46:40 -0700 Subject: [PATCH 14/68] return exception as 400 error in eventlog endpoint --- docs/source/eventlog.rst | 2 +- jupyter_server/services/eventlog/handlers.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/source/eventlog.rst b/docs/source/eventlog.rst index df5c153fb7..7229717f69 100644 --- a/docs/source/eventlog.rst +++ b/docs/source/eventlog.rst @@ -53,7 +53,7 @@ Events that are validated by this endpoint must have their schema listed in the Server Event schemas --------=======------ +-------------------- .. toctree:: :maxdepth: 2 diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py index b27dd87304..0c9b69815f 100644 --- a/jupyter_server/services/eventlog/handlers.py +++ b/jupyter_server/services/eventlog/handlers.py @@ -6,7 +6,6 @@ from notebook.base.handlers import APIHandler, json_errors from jupyter_telemetry.eventlog import EventLog - class EventLoggingHandler(APIHandler): """ A handler that receives and stores telemetry data from the client. @@ -32,8 +31,8 @@ def post(self, *args, **kwargs): # Profile, may need to move to a background thread if this is problematic try: self.eventlog.record_event(schema_name, version, event) - except: - raise web.HTTPError(500, "Event could not be validated.") + except Exception as e: + raise web.HTTPError(400, e) self.set_status(204) self.finish() From 2ce7c54efa056604c52c1bf725464ead6504b4ed Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:55:54 -0700 Subject: [PATCH 15/68] normalize path in emitted event --- jupyter_server/services/contents/handlers.py | 2 +- notebook/tests/test_eventlog.py | 49 -------------------- 2 files changed, 1 insertion(+), 50 deletions(-) delete mode 100644 notebook/tests/test_eventlog.py diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 85065b21cb..c2ba749c85 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -275,7 +275,7 @@ async def delete(self, path=''): self.finish() self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, - { 'action': 'delete', 'path': path } + { 'action': 'delete', 'path': path.lstrip(os.path.sep) } ) diff --git a/notebook/tests/test_eventlog.py b/notebook/tests/test_eventlog.py deleted file mode 100644 index 994181b73e..0000000000 --- a/notebook/tests/test_eventlog.py +++ /dev/null @@ -1,49 +0,0 @@ -import os -import re -import jsonschema -from ruamel.yaml import YAML -from notebook.notebookapp import NotebookApp -from notebook.utils import eventlogging_schema_fqn, get_schema_files -from unittest import TestCase - -yaml = YAML(typ='safe') - - -class RegisteredSchemasTestCase(TestCase): - - def test_eventlogging_schema_fqn(self): - self.assertEqual( - eventlogging_schema_fqn('test'), - 'eventlogging.jupyter.org/notebook/test' - ) - - def test_valid_schemas(self): - """ - All schemas must be valid json schemas - """ - for schema_file in get_schema_files(): - with open(schema_file) as f: - jsonschema.Draft7Validator.check_schema(yaml.load(f)) - - def test_schema_conventions(self): - """ - Test schema naming convention for this repo. - - 1. All schemas should be under event-schamas/{name}/v{version}.yaml - 2. Schema id should be eventlogging.jupyter.org/notebook/{name} - 3. Schema version should match version in file - """ - for schema_file in get_schema_files(): - filename = os.path.basename(schema_file) - match = re.match('v(\d+)\.yaml', filename) - # All schema locations must match the following pattern - # schema-name/v(version).yaml - self.assertIsNotNone(match) - - with open(schema_file) as f: - schema = yaml.load(f) - - self.assertEqual(schema['$id'], eventlogging_schema_fqn( - os.path.basename(os.path.dirname(schema_file)) - )) - self.assertEqual(schema['version'], int(match.groups()[0])) \ No newline at end of file From 5794d31efb62a1b1a1c5ed1a6e3816a25d223849 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 19 May 2020 15:31:03 -0700 Subject: [PATCH 16/68] initial tests --- jupyter_server/event-schemas/README.md | 19 --------- .../contentsmanager-actions.json | 33 ---------------- .../contentsmanager-actions/v1.json | 30 -------------- .../{ => contentsmanager-actions}/v1.yaml | 8 +++- jupyter_server/event-schemas/generate-json.py | 39 ------------------- jupyter_server/services/contents/handlers.py | 36 ++++++++--------- setup.py | 3 +- tests/test_eventlog.py | 4 ++ 8 files changed, 29 insertions(+), 143 deletions(-) delete mode 100644 jupyter_server/event-schemas/README.md delete mode 100644 jupyter_server/event-schemas/contentsmanager-actions.json delete mode 100644 jupyter_server/event-schemas/contentsmanager-actions/v1.json rename jupyter_server/event-schemas/{ => contentsmanager-actions}/v1.yaml (94%) delete mode 100755 jupyter_server/event-schemas/generate-json.py create mode 100644 tests/test_eventlog.py diff --git a/jupyter_server/event-schemas/README.md b/jupyter_server/event-schemas/README.md deleted file mode 100644 index 541a9b0398..0000000000 --- a/jupyter_server/event-schemas/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Event Schemas - -## Generating .json files - -Event Schemas are written in a human readable `.yaml` format. -This is primarily to get multi-line strings in our descriptions, -as documentation is very important. - -Every time you modify a `.yaml` file, you should run the following -commands. - -```bash -./generate-json.py -``` - -This needs the `ruamel.yaml` python package installed. - -Hopefully, this is extremely temporary, and we can just use YAML -with jupyter_telemetry. \ No newline at end of file diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json deleted file mode 100644 index 065f1d5c2f..0000000000 --- a/jupyter_server/event-schemas/contentsmanager-actions.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", - "version": 1, - "title": "Contents Manager activities", - "description": "Record actions on files via the ContentsManager REST API.\n\nThe notebook ContentsManager REST API is used by all frontends to retreive,\nsave, list, delete and perform other actions on notebooks, directories,\nand other files through the UI. This is pluggable - the default acts on\nthe file system, but can be replaced with a different ContentsManager\nimplementation - to work on S3, Postgres, other object stores, etc.\nThe events get recorded regardless of the ContentsManager implementation\nbeing used.\n\nLimitations:\n\n1. This does not record all filesystem access, just the ones that happen\n explicitly via the notebook server's REST API. Users can (and often do)\n trivially access the filesystem in many other ways (such as `open()` calls\n in their code), so this is usually never a complete record.\n2. As with all events recorded by the notebook server, users most likely\n have the ability to modify the code of the notebook server. Unless other\n security measures are in place, these events should be treated as user\n controlled and not used in high security areas.\n3. Events are only recorded when an action succeeds.\n", - "type": "object", - "required": [ - "action", - "path" - ], - "properties": { - "action": { - "enum": [ - "get", - "create", - "save", - "upload", - "rename", - "copy", - "delete" - ], - "description": "Action performed by the ContentsManager API.\n\nThis is a required field.\n\nPossible values:\n\n1. get\n Get contents of a particular file, or list contents of a directory.\n\n2. create\n Create a new directory or file at 'path'. Currently, name of the\n file or directory is auto generated by the ContentsManager implementation.\n\n3. save\n Save a file at path with contents from the client\n\n4. upload\n Upload a file at given path with contents from the client\n\n5. rename\n Rename a file or directory from value in source_path to\n value in path.\n\n5. copy\n Copy a file or directory from value in source_path to\n value in path.\n\n6. delete\n Delete a file or empty directory at given path\n" - }, - "path": { - "type": "string", - "description": "Logical path on which the operation was performed.\n\nThis is a required field.\n" - }, - "source_path": { - "type": "string", - "description": "Source path of an operation when action is 'copy' or 'rename'" - } - } -} \ No newline at end of file diff --git a/jupyter_server/event-schemas/contentsmanager-actions/v1.json b/jupyter_server/event-schemas/contentsmanager-actions/v1.json deleted file mode 100644 index 5da6d68b88..0000000000 --- a/jupyter_server/event-schemas/contentsmanager-actions/v1.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", - "version": 1, - "title": "Contents Manager activities", - "description": "Notebook Server emits this event whenever a contentsmanager action happens", - "type": "object", - "required": ["action", "path"], - "properties": { - "action": { - "enum": [ - "get", - "create", - "save", - "upload", - "rename", - "create", - "copy" - ], - "description": "Action performed by contents manager" - }, - "path": { - "type": "string", - "description": "Logical path the action was performed in" - }, - "source_path": { - "type": "string", - "description": "If action is 'copy', this specifies the source path" - } - } -} \ No newline at end of file diff --git a/jupyter_server/event-schemas/v1.yaml b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml similarity index 94% rename from jupyter_server/event-schemas/v1.yaml rename to jupyter_server/event-schemas/contentsmanager-actions/v1.yaml index 3d7e8f2fe9..31a5f293a9 100644 --- a/jupyter_server/event-schemas/v1.yaml +++ b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml @@ -1,6 +1,7 @@ "$id": eventlogging.jupyter.org/notebook/contentsmanager-actions version: 1 title: Contents Manager activities +personal-data: true description: | Record actions on files via the ContentsManager REST API. @@ -37,6 +38,7 @@ properties: - rename - copy - delete + category: unrestricted description: | Action performed by the ContentsManager API. @@ -60,20 +62,22 @@ properties: 5. rename Rename a file or directory from value in source_path to value in path. - + 5. copy Copy a file or directory from value in source_path to value in path. - + 6. delete Delete a file or empty directory at given path path: + category: personally-identifiable-information type: string description: | Logical path on which the operation was performed. This is a required field. source_path: + category: personally-identifiable-information type: string description: | Source path of an operation when action is 'copy' or 'rename' \ No newline at end of file diff --git a/jupyter_server/event-schemas/generate-json.py b/jupyter_server/event-schemas/generate-json.py deleted file mode 100755 index a39fa0610b..0000000000 --- a/jupyter_server/event-schemas/generate-json.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import json -import os -import jsonschema -from ruamel.yaml import YAML - -from jupyter_telemetry.eventlog import EventLog - -yaml = YAML(typ='safe') - -def main(): - argparser = argparse.ArgumentParser() - argparser.add_argument( - 'directory', - help='Directory with Schema .yaml files' - ) - - args = argparser.parse_args() - - el = EventLog() - for dirname, _, files in os.walk(args.directory): - for file in files: - if not file.endswith('.yaml'): - continue - yaml_path = os.path.join(dirname, file) - print('Processing', yaml_path) - with open(yaml_path) as f: - schema = yaml.load(f) - - # validate schema - el.register_schema(schema) - - json_path = os.path.join(dirname, os.path.splitext(file)[0] + '.json') - with open(json_path, 'w') as f: - json.dump(schema, f, indent=4) - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index c2ba749c85..9b7802ff2a 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -5,7 +5,7 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. - +import os import json from tornado import web @@ -115,12 +115,12 @@ async def get(self, path=''): path=path, type=type, format=format, content=content, )) validate_model(model, expect_content=content) - self._finish_model(model, location=False) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'get', 'path': model['path'] } ) + self._finish_model(model, location=False) @web.authenticated async def patch(self, path=''): @@ -130,21 +130,20 @@ async def patch(self, path=''): if model is None: raise web.HTTPError(400, u'JSON body missing') self.log.info(model) - model = yield maybe_future(cm.update(model, path)) + model = await ensure_async(cm.update(model, path)) validate_model(model, expect_content=False) - self._finish_model(model) - self.log.info(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, - { - 'action': 'rename', - 'path': model['path'], - 'source_path': path.lstrip(os.path.sep) + { + 'action': 'rename', + 'path': model['path'], + 'source_path': path.lstrip(os.path.sep) } ) + self._finish_model(model) + - @gen.coroutine async def _copy(self, copy_from, copy_to=None): """Copy a file, optionally specifying a target directory.""" self.log.info(u"Copying {copy_from} to {copy_to}".format( @@ -154,16 +153,16 @@ async def _copy(self, copy_from, copy_to=None): model = self.contents_manager.copy(copy_from, copy_to) self.set_status(201) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { - 'action': 'copy', - 'path': model['path'], - 'source_path': copy_from.lstrip(os.path.sep) + 'action': 'copy', + 'path': model['path'], + 'source_path': copy_from.lstrip(os.path.sep) } ) + self._finish_model(model) async def _upload(self, model, path): """Handle upload of a new file to path""" @@ -171,12 +170,12 @@ async def _upload(self, model, path): model = self.contents_manager.new(model, path) self.set_status(201) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'upload', 'path': model['path'] } ) + self._finish_model(model) async def _new_untitled(self, path, type='', ext=''): """Create a new, empty untitled entity""" @@ -184,12 +183,12 @@ async def _new_untitled(self, path, type='', ext=''): model = self.contents_manager.new_untitled(path=path, type=type, ext=ext) self.set_status(201) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, # Set path to path of created object, not directory it was created in { 'action': 'create', 'path': model['path'] } ) + self._finish_model(model) async def _save(self, model, path): """Save an existing file.""" @@ -198,12 +197,12 @@ async def _save(self, model, path): self.log.info(u"Saving file at %s", path) model = self.contents_manager.save(model, path) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'save', 'path': model['path'] } ) + self._finish_model(model) @web.authenticated async def post(self, path=''): @@ -272,12 +271,11 @@ async def delete(self, path=''): self.log.warning('delete %s', path) cm.delete(path) self.set_status(204) - self.finish() self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'delete', 'path': path.lstrip(os.path.sep) } ) - + self.finish() class CheckpointsHandler(APIHandler): diff --git a/setup.py b/setup.py index e402e71669..3ccc6b5fcb 100755 --- a/setup.py +++ b/setup.py @@ -95,7 +95,8 @@ 'terminado>=0.8.3', 'prometheus_client', "pywin32>=1.0 ; sys_platform == 'win32'", - 'jupyter_telemetry' + # Install teh + 'git+https://github.com/Zsailer/telemetry.git@personal-data' ], extras_require = { 'test': ['nose', 'coverage', 'requests', 'nose_warnings_filters', diff --git a/tests/test_eventlog.py b/tests/test_eventlog.py new file mode 100644 index 0000000000..1f7b587327 --- /dev/null +++ b/tests/test_eventlog.py @@ -0,0 +1,4 @@ + + +def test_eventlog(serverapp): + pass \ No newline at end of file From 7c9d3d51f4e4b8f03d984819ddbee6f48202d2cf Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 19 May 2020 16:05:05 -0700 Subject: [PATCH 17/68] add initial telemetry docs --- .gitignore | 10 +---- docs/doc-requirements.txt | 3 +- docs/source/conf.py | 11 ++++-- docs/source/operators/index.rst | 3 +- docs/source/operators/telemetry.rst | 61 +++++++++++++++++++++++++++++ docs/source/other/full-config.rst | 2 +- jupyter_server/utils.py | 2 +- setup.py | 2 +- 8 files changed, 77 insertions(+), 17 deletions(-) create mode 100644 docs/source/operators/telemetry.rst diff --git a/.gitignore b/.gitignore index a69d2eeee6..0ab0672302 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ MANIFEST +docs/source/operators/events build dist _build @@ -7,15 +8,6 @@ docs/source/api/generated docs/source/config.rst docs/gh-pages docs/source/events -notebook/i18n/*/LC_MESSAGES/*.mo -notebook/i18n/*/LC_MESSAGES/nbjs.json -notebook/static/components -notebook/static/style/*.min.css* -notebook/static/*/js/built/ -notebook/static/*/built/ -notebook/static/built/ -notebook/static/*/js/main.min.js* -notebook/static/lab/*bundle.js node_modules *.py[co] __pycache__ diff --git a/docs/doc-requirements.txt b/docs/doc-requirements.txt index 48b3eda1d0..4167aabf6d 100644 --- a/docs/doc-requirements.txt +++ b/docs/doc-requirements.txt @@ -8,4 +8,5 @@ prometheus_client sphinxcontrib_github_alt sphinxcontrib-openapi sphinxemoji -git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master \ No newline at end of file +git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master +jupyter_telemetry_sphinxext \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 41b089cc07..4add156c81 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -70,7 +70,7 @@ 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting', 'sphinxcontrib_github_alt', - 'sphinx-jsonschema' + 'jupyter_telemetry_sphinxext' ] # Add any paths that contain templates here, relative to this directory. @@ -209,9 +209,9 @@ html_context = { 'css_files': [ - '_static/theme_overrides.css', # override wide tables in RTD theme + '_static/theme_overrides.css', # override wide tables in RTD theme ], - } +} # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied @@ -376,3 +376,8 @@ # import before any doc is built, so _ is guaranteed to be injected import jupyter_server.transutils + +# Jupyter telemetry configuration values. +jupyter_telemetry_schema_source = "../jupyter_server/event-schemas" # Path is relative to conf.py +jupyter_telemetry_schema_output = "source/operators/events" # Path is relative to conf.py +jupyter_telemetry_index_title = "Telemetry Event Schemas" # Title of the index page that lists all found schemas. \ No newline at end of file diff --git a/docs/source/operators/index.rst b/docs/source/operators/index.rst index a654be1a0c..a6d2e212fd 100644 --- a/docs/source/operators/index.rst +++ b/docs/source/operators/index.rst @@ -12,4 +12,5 @@ These pages are targeted at people using, configuring, and/or deploying multiple configuring-extensions migrate-from-nbserver public-server - security \ No newline at end of file + security + telemetry \ No newline at end of file diff --git a/docs/source/operators/telemetry.rst b/docs/source/operators/telemetry.rst new file mode 100644 index 0000000000..2c94e99a7c --- /dev/null +++ b/docs/source/operators/telemetry.rst @@ -0,0 +1,61 @@ +Telemetry and Eventlogging +========================== + +Jupyter Server can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that the Server emits are defined by `JSON schemas`_ listed below_ emitted as JSON data, defined and validated by the JSON schemas listed below. + + +.. _logging: https://docs.python.org/3/library/logging.html +.. _`Telemetry System`: https://github.com/jupyter/telemetry +.. _`JSON schemas`: https://json-schema.org/ + +Emitting Server Events +---------------------- + +Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. + +To begin recording events, you'll need to set two configurations: + + 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to + 2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here. + +Here's a basic example for emitting events from the `contents` service: + +.. code-block:: + + import logging + + c.EventLog.handlers = [ + logging.FileHandler('event.log'), + ] + + c.EventLog.allowed_schemas = [ + 'hub.jupyter.org/server-action' + ] + +The output is a file, ``"event.log"``, with events recorded as JSON data. + +`eventlog` endpoint +------------------- + +The Notebook Server provides a public REST endpoint for external applications to validate and log events +through the Server's Event Log. + +To log events, send a `POST` request to the `/api/eventlog` endpoint. The body of the request should be a +JSON blog and is required to have the follow keys: + + 1. `'schema'` : the event's schema ID. + 2. `'version'` : the version of the event's schema. + 3. `'event'` : the event data in JSON format. + +Events that are validated by this endpoint must have their schema listed in the `allowed_schemas` trait listed above. + +.. _below: + + +Server Event schemas +-------------------- + +.. toctree:: + :maxdepth: 2 + + events/index diff --git a/docs/source/other/full-config.rst b/docs/source/other/full-config.rst index f7f0cab4ba..70852ea40f 100644 --- a/docs/source/other/full-config.rst +++ b/docs/source/other/full-config.rst @@ -897,7 +897,7 @@ FileContentsManager.root_dir : Unicode No description -NotebookNotary.algorithm : 'md5'|'sha3_384'|'sha3_512'|'sha256'|'sha1'|'blake2s'|'sha3_256'|'sha3_224'|'sha384'|'sha512'|'blake2b'|'sha224' +NotebookNotary.algorithm : 'sha1'|'sha3_224'|'blake2s'|'sha384'|'sha224'|'sha3_256'|'sha3_384'|'sha3_512'|'sha512'|'sha256'|'md5'|'blake2b' Default: ``'sha256'`` The hashing algorithm used to sign notebooks. diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 8fc6e89479..ec44e13b75 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -448,7 +448,7 @@ def eventlogging_schema_fqn(name): Matches convention for this particular repo """ - return 'eventlogging.jupyter.org/notebook/{}'.format(name) + return 'eventlogging.jupyter.org/jupyter_server/{}'.format(name) def get_schema_files(): diff --git a/setup.py b/setup.py index 3ccc6b5fcb..bfedef2528 100755 --- a/setup.py +++ b/setup.py @@ -95,7 +95,7 @@ 'terminado>=0.8.3', 'prometheus_client', "pywin32>=1.0 ; sys_platform == 'win32'", - # Install teh + # Install the working branch of telemetry. 'git+https://github.com/Zsailer/telemetry.git@personal-data' ], extras_require = { From ef8573d82d407ddd56f3ebe7ab1d37729e685117 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 19 May 2020 16:16:31 -0700 Subject: [PATCH 18/68] fix jupyter_telemetry dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bfedef2528..9f3932fac7 100755 --- a/setup.py +++ b/setup.py @@ -96,7 +96,7 @@ 'prometheus_client', "pywin32>=1.0 ; sys_platform == 'win32'", # Install the working branch of telemetry. - 'git+https://github.com/Zsailer/telemetry.git@personal-data' + 'jupyter_telemetry@git+https://github.com/Zsailer/telemetry.git@master' ], extras_require = { 'test': ['nose', 'coverage', 'requests', 'nose_warnings_filters', From ea9e352d2906ace72c32158b3abb8976c5637a54 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 19 May 2020 16:18:22 -0700 Subject: [PATCH 19/68] point telemetry at correct dev branch --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9f3932fac7..6123e8f64f 100755 --- a/setup.py +++ b/setup.py @@ -96,7 +96,7 @@ 'prometheus_client', "pywin32>=1.0 ; sys_platform == 'win32'", # Install the working branch of telemetry. - 'jupyter_telemetry@git+https://github.com/Zsailer/telemetry.git@master' + 'jupyter_telemetry@git+https://github.com/Zsailer/telemetry.git@personal-data' ], extras_require = { 'test': ['nose', 'coverage', 'requests', 'nose_warnings_filters', From b06f7d6f324ea8a29b2a351c631d3109be0a8971 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Tue, 20 Oct 2020 15:23:47 +0800 Subject: [PATCH 20/68] add tests for eventlog --- .../contentsmanager-actions/v1.yaml | 4 +- tests/test_eventlog.py | 44 ++++++++++++++++++- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml index 31a5f293a9..d95de1d282 100644 --- a/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml +++ b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml @@ -1,4 +1,4 @@ -"$id": eventlogging.jupyter.org/notebook/contentsmanager-actions +"$id": eventlogging.jupyter.org/jupyter_server/contentsmanager-actions version: 1 title: Contents Manager activities personal-data: true @@ -80,4 +80,4 @@ properties: category: personally-identifiable-information type: string description: | - Source path of an operation when action is 'copy' or 'rename' \ No newline at end of file + Source path of an operation when action is 'copy' or 'rename' diff --git a/tests/test_eventlog.py b/tests/test_eventlog.py index 1f7b587327..cef3915d43 100644 --- a/tests/test_eventlog.py +++ b/tests/test_eventlog.py @@ -1,4 +1,44 @@ +import io +import json +import logging +import jsonschema +import pytest +from traitlets.config import Config -def test_eventlog(serverapp): - pass \ No newline at end of file +from jupyter_server.utils import eventlogging_schema_fqn +from .services.contents.test_api import contents, contents_dir, dirs + + +@pytest.fixture +def eventlog_sink(configurable_serverapp): + """Return eventlog and sink objects""" + sink = io.StringIO() + handler = logging.StreamHandler(sink) + + cfg = Config() + cfg.EventLog.handlers = [handler] + serverapp = configurable_serverapp(config=cfg) + yield serverapp, sink + + +@pytest.mark.parametrize('path, name', dirs) +async def test_eventlog_list_notebooks(eventlog_sink, fetch, contents, path, name): + schema, version = (eventlogging_schema_fqn('contentsmanager-actions'), 1) + serverapp, sink = eventlog_sink + serverapp.eventlog.allowed_schemas = [schema] + + r = await fetch( + 'api', + 'contents', + path, + method='GET', + ) + assert r.code == 200 + + output = sink.getvalue() + assert output + data = json.loads(output) + jsonschema.validate(data, serverapp.eventlog.schemas[(schema, version)]) + expected = {'action': 'get', 'path': path} + assert expected.items() <= data.items() From 38712f9a99c5a3744eec08b66ca87d2194b041ee Mon Sep 17 00:00:00 2001 From: Kevin Bates Date: Thu, 5 Nov 2020 12:05:56 -0800 Subject: [PATCH 21/68] Enable CodeQL runs on all pushed branches --- .github/workflows/codeql-analysis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 85ca80f64c..07f62968e2 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -14,7 +14,7 @@ name: "CodeQL" on: push: - branches: [ master ] + branches: [ '*' ] pull_request: # The branches below must be a subset of the branches above branches: [ master ] From 9e18c8ed700a55e8c30e51670884ba732583e04c Mon Sep 17 00:00:00 2001 From: David Lukes Date: Wed, 1 Jul 2020 18:55:30 +0200 Subject: [PATCH 22/68] Nested shells should not be login (#5247) (#5565) --- jupyter_server/terminal/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/jupyter_server/terminal/__init__.py b/jupyter_server/terminal/__init__.py index 639a17a711..26f51b9c50 100644 --- a/jupyter_server/terminal/__init__.py +++ b/jupyter_server/terminal/__init__.py @@ -22,8 +22,13 @@ def initialize(webapp, root_dir, connection_url, settings): shell = settings.get('shell_command', [os.environ.get('SHELL') or default_shell] ) - # Enable login mode - to automatically source the /etc/profile script - if os.name != 'nt': + # Enable login mode - to automatically source the /etc/profile + # script, but only for non-nested shells; for nested shells, it's + # superfluous and may even be harmful (e.g. on macOS, where login + # shells invoke /usr/libexec/path_helper to add entries from + # /etc/paths{,.d} to the PATH, reordering it in the process and + # potentially overriding virtualenvs and other PATH modifications) + if os.name != 'nt' and int(os.environ.get("SHLVL", 0)) < 1: shell.append('-l') terminal_manager = webapp.settings['terminal_manager'] = NamedTermManager( shell_command=shell, From 66962b530261f9eca35f4060133bed818330b619 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Fri, 3 Jul 2020 15:04:08 +0200 Subject: [PATCH 23/68] Improve login shell heuristics Co-authored-by: Kevin Bates --- jupyter_server/terminal/__init__.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/jupyter_server/terminal/__init__.py b/jupyter_server/terminal/__init__.py index 26f51b9c50..ea51ab1353 100644 --- a/jupyter_server/terminal/__init__.py +++ b/jupyter_server/terminal/__init__.py @@ -1,4 +1,5 @@ import os +import sys import terminado from ..utils import check_version @@ -19,16 +20,18 @@ def initialize(webapp, root_dir, connection_url, settings): default_shell = 'powershell.exe' else: default_shell = which('sh') - shell = settings.get('shell_command', + shell_override = settings.get('shell_command') + shell = ( [os.environ.get('SHELL') or default_shell] + if shell_override is None + else shell_override ) - # Enable login mode - to automatically source the /etc/profile - # script, but only for non-nested shells; for nested shells, it's - # superfluous and may even be harmful (e.g. on macOS, where login - # shells invoke /usr/libexec/path_helper to add entries from - # /etc/paths{,.d} to the PATH, reordering it in the process and - # potentially overriding virtualenvs and other PATH modifications) - if os.name != 'nt' and int(os.environ.get("SHLVL", 0)) < 1: + # When the notebook server is not running in a terminal (e.g. when + # it's launched by a JupyterHub spawner), it's likely that the user + # environment hasn't been fully set up. In that case, run a login + # shell to automatically source /etc/profile and the like, unless + # the user has specifically set a preferred shell command. + if os.name != 'nt' and shell_override is None and not sys.stdout.isatty(): shell.append('-l') terminal_manager = webapp.settings['terminal_manager'] = NamedTermManager( shell_command=shell, From 293bff0c72572edd9c8e642fa8784ae21b7fea3e Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 18 Nov 2020 13:31:54 +0530 Subject: [PATCH 24/68] Allow toggling auth for prometheus metrics Equivalent to https://github.com/jupyterhub/jupyterhub/pull/2224. Port of https://github.com/jupyter/notebook/pull/5870 Prometheus metrics can potentially leak information about the user, so they should be kept behind auth by default. However, for many JupyterHub deployments, they would need to be scraped by a centralized Prometheus instance that can not really authenticate separately to each user notebook without a lot of work. Admins can use this setting to allow unauthenticated access to the /metrics endpoint. --- jupyter_server/base/handlers.py | 6 ++++-- jupyter_server/serverapp.py | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/jupyter_server/base/handlers.py b/jupyter_server/base/handlers.py index bad31513d7..363230b1de 100755 --- a/jupyter_server/base/handlers.py +++ b/jupyter_server/base/handlers.py @@ -842,10 +842,12 @@ def get(self): class PrometheusMetricsHandler(JupyterHandler): """ - Return prometheus metrics for this Jupyter server + Return prometheus metrics for this notebook server """ - @web.authenticated def get(self): + if self.settings['authenticate_prometheus'] and not self.logged_in: + raise web.HTTPError(403) + self.set_header('Content-Type', prometheus_client.CONTENT_TYPE_LATEST) self.write(prometheus_client.generate_latest(prometheus_client.REGISTRY)) diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 13bc0940be..6e6b9aafab 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -246,6 +246,7 @@ def init_settings(self, jupyter_app, kernel_manager, contents_manager, disable_check_xsrf=jupyter_app.disable_check_xsrf, allow_remote_access=jupyter_app.allow_remote_access, local_hostnames=jupyter_app.local_hostnames, + authenticate_prometheus=jupyter_app.authenticate_prometheus, # managers kernel_manager=kernel_manager, @@ -1199,6 +1200,14 @@ def _update_server_extensions(self, change): is not available. """)) + authenticate_prometheus = Bool( + True, + help="""" + Require authentication to access prometheus metrics. + """, + config=True + ) + def parse_command_line(self, argv=None): super(ServerApp, self).parse_command_line(argv) From d865e5901cae3c7723438491732441c7b265cc6a Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Thu, 3 Dec 2020 10:41:41 -0700 Subject: [PATCH 25/68] Fix upgrade packaging dependencies build step --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6193fc09e3..7ffdefeb7c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -25,7 +25,7 @@ jobs: architecture: 'x64' - name: Upgrade packaging dependencies run: | - pip install --upgrade pip setuptools wheel + pip install --upgrade pip setuptools wheel --user - name: Get pip cache dir id: pip-cache run: | From 20c84e8d6deca063e43f6bdbd27ecaea41af3791 Mon Sep 17 00:00:00 2001 From: Min RK Date: Wed, 9 Dec 2020 13:21:19 +0100 Subject: [PATCH 26/68] sync _redirect_safe with upstream --- jupyter_server/auth/login.py | 19 +++++--- tests/auth/test_login.py | 95 ++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 6 deletions(-) create mode 100644 tests/auth/test_login.py diff --git a/jupyter_server/auth/login.py b/jupyter_server/auth/login.py index 4d93cc192d..db4b89718b 100644 --- a/jupyter_server/auth/login.py +++ b/jupyter_server/auth/login.py @@ -36,14 +36,19 @@ def _redirect_safe(self, url, default=None): """ if default is None: default = self.base_url - if not url.startswith(self.base_url): + # protect chrome users from mishandling unescaped backslashes. + # \ is not valid in urls, but some browsers treat it as / + # instead of %5C, causing `\\` to behave as `//` + url = url.replace("\\", "%5C") + parsed = urlparse(url) + if parsed.netloc or not (parsed.path + "/").startswith(self.base_url): # require that next_url be absolute path within our path allow = False # OR pass our cross-origin check - if '://' in url: + if parsed.netloc: # if full URL, run our cross-origin check: - parsed = urlparse(url.lower()) origin = '%s://%s' % (parsed.scheme, parsed.netloc) + origin = origin.lower() if self.allow_origin: allow = self.allow_origin == origin elif self.allow_origin_pat: @@ -77,9 +82,11 @@ def post(self): self.set_login_cookie(self, uuid.uuid4().hex) elif self.token and self.token == typed_password: self.set_login_cookie(self, uuid.uuid4().hex) - if new_password and self.settings.get('allow_password_change'): - config_dir = self.settings.get('config_dir') - config_file = os.path.join(config_dir, 'jupyter_server_config.json') + if new_password and self.settings.get("allow_password_change"): + config_dir = self.settings.get("config_dir") + config_file = os.path.join( + config_dir, "jupyter_notebook_config.json" + ) set_password(new_password, config_file=config_file) self.log.info("Wrote hashed password to %s" % config_file) else: diff --git a/tests/auth/test_login.py b/tests/auth/test_login.py new file mode 100644 index 0000000000..ebf2a5a1d2 --- /dev/null +++ b/tests/auth/test_login.py @@ -0,0 +1,95 @@ +"""Tests for login redirects""" + +from functools import partial +from urllib.parse import urlencode + +import pytest +from tornado.httpclient import HTTPClientError +from tornado.httputil import url_concat, parse_cookie + +from jupyter_server.utils import url_path_join + + +# override default config to ensure a non-empty base url is used +@pytest.fixture +def jp_base_url(): + return "/a%40b/" + + +@pytest.fixture +def jp_server_config(jp_base_url): + return { + "ServerApp": { + "base_url": jp_base_url, + }, + } + + +async def _login(jp_serverapp, http_server_client, jp_base_url, next): + # first: request login page with no creds + login_url = url_path_join(jp_base_url, "login") + first = await http_server_client.fetch(login_url) + cookie_header = first.headers["Set-Cookie"] + cookies = parse_cookie(cookie_header) + + # second, submit login form with credentials + try: + resp = await http_server_client.fetch( + url_concat(login_url, {"next": next}), + method="POST", + body=urlencode( + { + "password": jp_serverapp.token, + "_xsrf": cookies.get("_xsrf", ""), + } + ), + headers={"Cookie": cookie_header}, + follow_redirects=False, + ) + except HTTPClientError as e: + if e.code != 302: + raise + return e.response.headers["Location"] + else: + assert resp.code == 302, "Should have returned a redirect!" + + +@pytest.fixture +def login(jp_serverapp, http_server_client, jp_base_url): + """Fixture to return a function to login to a Jupyter server + + by submitting the login page form + """ + yield partial(_login, jp_serverapp, http_server_client, jp_base_url) + + +@pytest.mark.parametrize( + "bad_next", + ( + r"\\tree", + "//some-host", + "//host{base_url}tree", + "https://google.com", + "/absolute/not/base_url", + ), +) +async def test_next_bad(login, jp_base_url, bad_next): + bad_next = bad_next.format(base_url=jp_base_url) + url = await login(bad_next) + assert url == jp_base_url + + +@pytest.mark.parametrize( + "next_path", + ( + "tree/", + "//{base_url}tree", + "notebooks/notebook.ipynb", + "tree//something", + ), +) +async def test_next_ok(login, jp_base_url, next_path): + next_path = next_path.format(base_url=jp_base_url) + expected = jp_base_url + next_path + actual = await login(next=expected) + assert actual == expected From 1858bb50961deec8d3aac386c5b02a965252bfa1 Mon Sep 17 00:00:00 2001 From: Kevin Bates Date: Thu, 10 Dec 2020 11:05:46 -0800 Subject: [PATCH 27/68] Restore pytest plugin from pytest-jupyter --- examples/simple/setup.py | 2 +- examples/simple/tests/conftest.py | 3 + jupyter_server/pytest_plugin.py | 450 ++++++++++++++++++++++++ setup.py | 3 +- tests/conftest.py | 3 + tests/extension/test_handler.py | 38 +- tests/extension/test_serverextension.py | 2 +- tests/services/contents/test_api.py | 63 ++-- tests/services/kernels/test_api.py | 12 +- tests/services/kernelspecs/test_api.py | 5 +- tests/services/sessions/test_api.py | 10 +- tests/test_paths.py | 6 +- tests/utils.py | 7 + 13 files changed, 535 insertions(+), 69 deletions(-) create mode 100644 examples/simple/tests/conftest.py create mode 100644 jupyter_server/pytest_plugin.py create mode 100644 tests/conftest.py diff --git a/examples/simple/setup.py b/examples/simple/setup.py index 9040c55e86..c15b124566 100755 --- a/examples/simple/setup.py +++ b/examples/simple/setup.py @@ -40,7 +40,7 @@ def add_data_files(path): 'jinja2', ], extras_require = { - 'test': ['pytest-jupyter'], + 'test': ['pytest'], }, include_package_data=True, cmdclass = cmdclass, diff --git a/examples/simple/tests/conftest.py b/examples/simple/tests/conftest.py new file mode 100644 index 0000000000..87c6aff30a --- /dev/null +++ b/examples/simple/tests/conftest.py @@ -0,0 +1,3 @@ +pytest_plugins = [ + 'jupyter_server.pytest_plugin' +] diff --git a/jupyter_server/pytest_plugin.py b/jupyter_server/pytest_plugin.py new file mode 100644 index 0000000000..dee6d3f0d5 --- /dev/null +++ b/jupyter_server/pytest_plugin.py @@ -0,0 +1,450 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +import os +import sys +import json +import pytest +import shutil +import urllib.parse + +from binascii import hexlify + +import tornado +from tornado.escape import url_escape +import jupyter_core.paths +import nbformat +from traitlets.config import Config + +from jupyter_server.extension import serverextension +from jupyter_server.serverapp import ServerApp +from jupyter_server.utils import url_path_join +from jupyter_server.services.contents.filemanager import FileContentsManager +from jupyter_server.services.contents.largefilemanager import LargeFileManager + + +# List of dependencies needed for this plugin. +pytest_plugins = [ + "pytest_tornasync", + # Once the chunk below moves to Jupyter Core, we'll uncomment + # This plugin and use the fixtures directly from Jupyter Core. + # "jupyter_core.pytest_plugin" +] + +# ============ Move to Jupyter Core ============= + +def mkdir(tmp_path, *parts): + path = tmp_path.joinpath(*parts) + if not path.exists(): + path.mkdir(parents=True) + return path + + +@pytest.fixture +def jp_home_dir(tmp_path): + """Provides a temporary HOME directory value.""" + return mkdir(tmp_path, "home") + + +@pytest.fixture +def jp_data_dir(tmp_path): + """Provides a temporary Jupyter data dir directory value.""" + return mkdir(tmp_path, "data") + + +@pytest.fixture +def jp_config_dir(tmp_path): + """Provides a temporary Jupyter config dir directory value.""" + return mkdir(tmp_path, "config") + + +@pytest.fixture +def jp_runtime_dir(tmp_path): + """Provides a temporary Jupyter runtime dir directory value.""" + return mkdir(tmp_path, "runtime") + + +@pytest.fixture +def jp_system_jupyter_path(tmp_path): + """Provides a temporary Jupyter system path value.""" + return mkdir(tmp_path, "share", "jupyter") + + +@pytest.fixture +def jp_env_jupyter_path(tmp_path): + """Provides a temporary Jupyter env system path value.""" + return mkdir(tmp_path, "env", "share", "jupyter") + + +@pytest.fixture +def jp_system_config_path(tmp_path): + """Provides a temporary Jupyter config path value.""" + return mkdir(tmp_path, "etc", "jupyter") + + +@pytest.fixture +def jp_env_config_path(tmp_path): + """Provides a temporary Jupyter env config path value.""" + return mkdir(tmp_path, "env", "etc", "jupyter") + + +@pytest.fixture +def jp_environ( + monkeypatch, + tmp_path, + jp_home_dir, + jp_data_dir, + jp_config_dir, + jp_runtime_dir, + jp_system_jupyter_path, + jp_system_config_path, + jp_env_jupyter_path, + jp_env_config_path, +): + """Configures a temporary environment based on Jupyter-specific environment variables. """ + monkeypatch.setenv("HOME", str(jp_home_dir)) + monkeypatch.setenv("PYTHONPATH", os.pathsep.join(sys.path)) + # monkeypatch.setenv("JUPYTER_NO_CONFIG", "1") + monkeypatch.setenv("JUPYTER_CONFIG_DIR", str(jp_config_dir)) + monkeypatch.setenv("JUPYTER_DATA_DIR", str(jp_data_dir)) + monkeypatch.setenv("JUPYTER_RUNTIME_DIR", str(jp_runtime_dir)) + monkeypatch.setattr( + jupyter_core.paths, "SYSTEM_JUPYTER_PATH", [str(jp_system_jupyter_path)] + ) + monkeypatch.setattr(jupyter_core.paths, "ENV_JUPYTER_PATH", [str(jp_env_jupyter_path)]) + monkeypatch.setattr( + jupyter_core.paths, "SYSTEM_CONFIG_PATH", [str(jp_system_config_path)] + ) + monkeypatch.setattr(jupyter_core.paths, "ENV_CONFIG_PATH", [str(jp_env_config_path)]) + + +# ================= End: Move to Jupyter core ================ + +# NOTE: This is a temporary fix for Windows 3.8 +# We have to override the io_loop fixture with an +# asyncio patch. This will probably be removed in +# the future. +@pytest.fixture +def jp_asyncio_patch(): + """Appropriately configures the event loop policy if running on Windows w/ Python >= 3.8.""" + ServerApp()._init_asyncio_patch() + + +@pytest.fixture +def io_loop(jp_asyncio_patch): + """Returns an ioloop instance that includes the asyncio patch for Windows 3.8 platforms.""" + loop = tornado.ioloop.IOLoop() + loop.make_current() + yield loop + loop.clear_current() + loop.close(all_fds=True) + + +@pytest.fixture +def jp_server_config(): + """Allows tests to setup their specific configuration values. """ + return {} + + +@pytest.fixture +def jp_root_dir(tmp_path): + """Provides a temporary Jupyter root directory value.""" + return mkdir(tmp_path, "root_dir") + + +@pytest.fixture +def jp_template_dir(tmp_path): + """Provides a temporary Jupyter templates directory value.""" + return mkdir(tmp_path, "templates") + + +@pytest.fixture +def jp_argv(): + """Allows tests to setup specific argv values. """ + return [] + + +@pytest.fixture +def jp_extension_environ(jp_env_config_path, monkeypatch): + """Monkeypatch a Jupyter Extension's config path into each test's environment variable""" + monkeypatch.setattr(serverextension, "ENV_CONFIG_PATH", [str(jp_env_config_path)]) + + +@pytest.fixture +def jp_http_port(http_server_port): + """Returns the port value from the http_server_port fixture. """ + return http_server_port[-1] + + +@pytest.fixture +def jp_nbconvert_templates(jp_data_dir): + """Setups up a temporary directory consisting of the nbconvert templates.""" + + # Get path to nbconvert template directory *before* + # monkeypatching the paths env variable via the jp_environ fixture. + possible_paths = jupyter_core.paths.jupyter_path('nbconvert', 'templates') + nbconvert_path = None + for path in possible_paths: + if os.path.exists(path): + nbconvert_path = path + break + + nbconvert_target = jp_data_dir / 'nbconvert' / 'templates' + + # copy nbconvert templates to new tmp data_dir. + if nbconvert_path: + shutil.copytree(nbconvert_path, str(nbconvert_target)) + + +@pytest.fixture(scope='function') +def jp_configurable_serverapp( + jp_nbconvert_templates, # this fixture must preceed jp_environ + jp_environ, + jp_server_config, + jp_argv, + jp_http_port, + jp_base_url, + tmp_path, + jp_root_dir, + io_loop, +): + """Starts a Jupyter Server instance based on + the provided configuration values. + + The fixture is a factory; it can be called like + a function inside a unit test. Here's a basic + example of how use this fixture: + + .. code-block:: python + + def my_test(jp_configurable_serverapp): + + app = jp_configurable_serverapp(...) + ... + """ + ServerApp.clear_instance() + + def _configurable_serverapp( + config=jp_server_config, + base_url=jp_base_url, + argv=jp_argv, + environ=jp_environ, + http_port=jp_http_port, + tmp_path=tmp_path, + root_dir=jp_root_dir, + **kwargs + ): + c = Config(config) + c.NotebookNotary.db_file = ":memory:" + token = hexlify(os.urandom(4)).decode("ascii") + app = ServerApp.instance( + # Set the log level to debug for testing purposes + log_level='DEBUG', + port=http_port, + port_retries=0, + open_browser=False, + root_dir=str(root_dir), + base_url=base_url, + config=c, + allow_root=True, + token=token, + **kwargs + ) + + app.init_signal = lambda: None + app.log.propagate = True + app.log.handlers = [] + # Initialize app without httpserver + app.initialize(argv=argv, new_httpserver=False) + app.log.propagate = True + app.log.handlers = [] + # Start app without ioloop + app.start_app() + return app + + return _configurable_serverapp + + +@pytest.fixture +def jp_ensure_app_fixture(request): + """Ensures that the 'app' fixture used by pytest-tornasync + is set to `jp_web_app`, the Tornado Web Application returned + by the ServerApp in Jupyter Server, provided by the jp_web_app + fixture in this module. + + Note, this hardcodes the `app_fixture` option from + pytest-tornasync to `jp_web_app`. If this value is configured + to something other than the default, it will raise an exception. + """ + app_option = request.config.getoption("app_fixture") + if app_option not in ["app", "jp_web_app"]: + raise Exception("jp_serverapp requires the `app-fixture` option " + "to be set to 'jp_web_app`. Try rerunning the " + "current tests with the option `--app-fixture " + "jp_web_app`.") + elif app_option == "app": + # Manually set the app_fixture to `jp_web_app` if it's + # not set already. + request.config.option.app_fixture = "jp_web_app" + + +@pytest.fixture(scope="function") +def jp_serverapp( + jp_ensure_app_fixture, + jp_server_config, + jp_argv, + jp_configurable_serverapp +): + """Starts a Jupyter Server instance based on the established configuration values.""" + app = jp_configurable_serverapp(config=jp_server_config, argv=jp_argv) + yield app + app.remove_server_info_file() + app.remove_browser_open_file() + app.cleanup_kernels() + + +@pytest.fixture +def jp_web_app(jp_serverapp): + """app fixture is needed by pytest_tornasync plugin""" + return jp_serverapp.web_app + + +@pytest.fixture +def jp_auth_header(jp_serverapp): + """Configures an authorization header using the token from the serverapp fixture.""" + return {"Authorization": "token {token}".format(token=jp_serverapp.token)} + + +@pytest.fixture +def jp_base_url(): + """Returns the base url to use for the test.""" + return "/a%40b/" + + +@pytest.fixture +def jp_fetch(jp_serverapp, http_server_client, jp_auth_header, jp_base_url): + """Sends an (asynchronous) HTTP request to a test server. + + The fixture is a factory; it can be called like + a function inside a unit test. Here's a basic + example of how use this fixture: + + .. code-block:: python + + async def my_test(jp_fetch): + + response = await jp_fetch("api", "spec.yaml") + ... + """ + def client_fetch(*parts, headers={}, params={}, **kwargs): + # Handle URL strings + path_url = url_escape(url_path_join(*parts), plus=False) + base_path_url = url_path_join(jp_base_url, path_url) + params_url = urllib.parse.urlencode(params) + url = base_path_url + "?" + params_url + # Add auth keys to header + headers.update(jp_auth_header) + # Make request. + return http_server_client.fetch( + url, headers=headers, request_timeout=20, **kwargs + ) + return client_fetch + + +@pytest.fixture +def jp_ws_fetch(jp_serverapp, jp_auth_header, jp_http_port, jp_base_url): + """Sends a websocket request to a test server. + + The fixture is a factory; it can be called like + a function inside a unit test. Here's a basic + example of how use this fixture: + + .. code-block:: python + + async def my_test(jp_fetch, jp_ws_fetch): + # Start a kernel + r = await jp_fetch( + 'api', 'kernels', + method='POST', + body=json.dumps({ + 'name': "python3" + }) + ) + kid = json.loads(r.body.decode())['id'] + + # Open a websocket connection. + ws = await jp_ws_fetch( + 'api', 'kernels', kid, 'channels' + ) + ... + """ + def client_fetch(*parts, headers={}, params={}, **kwargs): + # Handle URL strings + path_url = url_escape(url_path_join(*parts), plus=False) + base_path_url = url_path_join(jp_base_url, path_url) + urlparts = urllib.parse.urlparse('ws://localhost:{}'.format(jp_http_port)) + urlparts = urlparts._replace( + path=base_path_url, + query=urllib.parse.urlencode(params) + ) + url = urlparts.geturl() + # Add auth keys to header + headers.update(jp_auth_header) + # Make request. + req = tornado.httpclient.HTTPRequest( + url, + headers=jp_auth_header, + connect_timeout=120 + ) + return tornado.websocket.websocket_connect(req) + return client_fetch + + +some_resource = u"The very model of a modern major general" +sample_kernel_json = { + 'argv':['cat', '{connection_file}'], + 'display_name': 'Test kernel', +} +@pytest.fixture +def jp_kernelspecs(jp_data_dir): + """Configures some sample kernelspecs in the Jupyter data directory.""" + spec_names = ['sample', 'sample 2'] + for name in spec_names: + sample_kernel_dir = jp_data_dir.joinpath('kernels', name) + sample_kernel_dir.mkdir(parents=True) + # Create kernel json file + sample_kernel_file = sample_kernel_dir.joinpath('kernel.json') + sample_kernel_file.write_text(json.dumps(sample_kernel_json)) + # Create resources text + sample_kernel_resources = sample_kernel_dir.joinpath('resource.txt') + sample_kernel_resources.write_text(some_resource) + + +@pytest.fixture(params=[True, False]) +def jp_contents_manager(request, tmp_path): + """Returns a FileContentsManager instance based on the use_atomic_writing parameter value.""" + return FileContentsManager(root_dir=str(tmp_path), use_atomic_writing=request.param) + + +@pytest.fixture +def jp_large_contents_manager(tmp_path): + """Returns a LargeFileManager instance.""" + return LargeFileManager(root_dir=str(tmp_path)) + + +@pytest.fixture +def jp_create_notebook(jp_root_dir): + """Creates a notebook in the test's home directory.""" + def inner(nbpath): + nbpath = jp_root_dir.joinpath(nbpath) + # Check that the notebook has the correct file extension. + if nbpath.suffix != '.ipynb': + raise Exception("File extension for notebook must be .ipynb") + # If the notebook path has a parent directory, make sure it's created. + parent = nbpath.parent + parent.mkdir(parents=True, exist_ok=True) + # Create a notebook string and write to file. + nb = nbformat.v4.new_notebook() + nbtext = nbformat.writes(nb, version=4) + nbpath.write_text(nbtext) + return inner diff --git a/setup.py b/setup.py index c796720558..063764d2ae 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,8 @@ ], extras_require = { 'test': ['coverage', 'requests', - 'pytest', 'pytest-cov', 'pytest-jupyter', + 'pytest', 'pytest-cov', + 'pytest-tornasync', 'pytest-console-scripts', 'ipykernel'], }, python_requires = '>=3.6', diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000..bdac3802bc --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,3 @@ +pytest_plugins = [ + "jupyter_server.pytest_plugin" +] diff --git a/tests/extension/test_handler.py b/tests/extension/test_handler.py index 1f5b52e64a..1e4a47c78a 100644 --- a/tests/extension/test_handler.py +++ b/tests/extension/test_handler.py @@ -51,7 +51,7 @@ async def test_handler_template(jp_fetch, mock_template): } ] ) -async def test_handler_setting(jp_fetch): +async def test_handler_setting(jp_fetch, jp_server_config): # Test that the extension trait was picked up by the webapp. r = await jp_fetch( 'mock', @@ -64,7 +64,7 @@ async def test_handler_setting(jp_fetch): @pytest.mark.parametrize( 'jp_argv', (['--MockExtensionApp.mock_trait=test mock trait'],) ) -async def test_handler_argv(jp_fetch): +async def test_handler_argv(jp_fetch, jp_argv): # Test that the extension trait was picked up by the webapp. r = await jp_fetch( 'mock', @@ -75,28 +75,31 @@ async def test_handler_argv(jp_fetch): @pytest.mark.parametrize( - 'jp_server_config', + 'jp_server_config,jp_base_url', [ - { - "ServerApp": { - "jpserver_extensions": { - "tests.extension.mockextensions": True + ( + { + "ServerApp": { + "jpserver_extensions": { + "tests.extension.mockextensions": True + }, + # Move extension handlers behind a url prefix + "base_url": "test_prefix" }, - # Move extension handlers behind a url prefix - "base_url": "test_prefix" + "MockExtensionApp": { + # Change a trait in the MockExtensionApp using + # the following config value. + "mock_trait": "test mock trait" + } }, - "MockExtensionApp": { - # Change a trait in the MockExtensionApp using - # the following config value. - "mock_trait": "test mock trait" - } - } + '/test_prefix/' + ) ] ) -async def test_base_url(jp_fetch): +async def test_base_url(jp_fetch, jp_server_config, jp_base_url): # Test that the extension's handlers were properly prefixed r = await jp_fetch( - 'test_prefix', 'mock', + 'mock', method='GET' ) assert r.code == 200 @@ -104,7 +107,6 @@ async def test_base_url(jp_fetch): # Test that the static namespace was prefixed by base_url r = await jp_fetch( - 'test_prefix', 'static', 'mockextension', 'mock.txt', method='GET' ) diff --git a/tests/extension/test_serverextension.py b/tests/extension/test_serverextension.py index cc2fb80dd6..2034d3279f 100644 --- a/tests/extension/test_serverextension.py +++ b/tests/extension/test_serverextension.py @@ -106,7 +106,7 @@ def test_merge_config( } ] ) -def test_load_ordered(jp_serverapp): +def test_load_ordered(jp_serverapp, jp_server_config): assert jp_serverapp.mockII is True, "Mock II should have been loaded" assert jp_serverapp.mockI is True, "Mock I should have been loaded" assert jp_serverapp.mock_shared == 'II', "Mock II should be loaded after Mock I" diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index ee8b57fb7f..fea04db7b4 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -264,24 +264,25 @@ async def test_get_bad_type(jp_fetch, contents): ) assert expected_http_error(e, 400, '%s is not a directory' % path) - -def _check_created(r, contents_dir, path, name, type='notebook'): - fpath = path+'/'+name - assert r.code == 201 - location = '/api/contents/' + tornado.escape.url_escape(fpath, plus=False) - assert r.headers['Location'] == location - model = json.loads(r.body.decode()) - assert model['name'] == name - assert model['path'] == fpath - assert model['type'] == type - path = contents_dir + '/' + fpath - if type == 'directory': - assert pathlib.Path(path).is_dir() - else: - assert pathlib.Path(path).is_file() - - -async def test_create_untitled(jp_fetch, contents, contents_dir): +@pytest.fixture +def _check_created(jp_base_url): + def _inner(r, contents_dir, path, name, type='notebook'): + fpath = path+'/'+name + assert r.code == 201 + location = jp_base_url + 'api/contents/' + tornado.escape.url_escape(fpath, plus=False) + assert r.headers['Location'] == location + model = json.loads(r.body.decode()) + assert model['name'] == name + assert model['path'] == fpath + assert model['type'] == type + path = contents_dir + '/' + fpath + if type == 'directory': + assert pathlib.Path(path).is_dir() + else: + assert pathlib.Path(path).is_file() + return _inner + +async def test_create_untitled(jp_fetch, contents, contents_dir, _check_created): path = 'å b' name = 'Untitled.ipynb' r = await jp_fetch( @@ -309,7 +310,7 @@ async def test_create_untitled(jp_fetch, contents, contents_dir): _check_created(r, str(contents_dir), path, name, type='notebook') -async def test_create_untitled_txt(jp_fetch, contents, contents_dir): +async def test_create_untitled_txt(jp_fetch, contents, contents_dir, _check_created): name = 'untitled.txt' path = 'foo/bar' r = await jp_fetch( @@ -329,7 +330,7 @@ async def test_create_untitled_txt(jp_fetch, contents, contents_dir): assert model['content'] == '' -async def test_upload(jp_fetch, contents, contents_dir): +async def test_upload(jp_fetch, contents, contents_dir, _check_created): nb = new_notebook() nbmodel = {'content': nb, 'type': 'notebook'} path = 'å b' @@ -342,7 +343,7 @@ async def test_upload(jp_fetch, contents, contents_dir): _check_created(r, str(contents_dir), path, name) -async def test_mkdir_untitled(jp_fetch, contents, contents_dir): +async def test_mkdir_untitled(jp_fetch, contents, contents_dir, _check_created): name = 'Untitled Folder' path = 'å b' r = await jp_fetch( @@ -370,7 +371,7 @@ async def test_mkdir_untitled(jp_fetch, contents, contents_dir): _check_created(r, str(contents_dir), path, name, type='directory') -async def test_mkdir(jp_fetch, contents, contents_dir): +async def test_mkdir(jp_fetch, contents, contents_dir, _check_created): name = 'New ∂ir' path = 'å b' r = await jp_fetch( @@ -391,7 +392,7 @@ async def test_mkdir_hidden_400(jp_fetch): assert expected_http_error(e, 400) -async def test_upload_txt(jp_fetch, contents, contents_dir): +async def test_upload_txt(jp_fetch, contents, contents_dir, _check_created): body = 'ünicode téxt' model = { 'content' : body, @@ -418,7 +419,7 @@ async def test_upload_txt(jp_fetch, contents, contents_dir): assert model['content'] == body -async def test_upload_b64(jp_fetch, contents, contents_dir): +async def test_upload_b64(jp_fetch, contents, contents_dir, _check_created): body = b'\xFFblob' b64body = encodebytes(body).decode('ascii') model = { @@ -446,7 +447,7 @@ async def test_upload_b64(jp_fetch, contents, contents_dir): assert decoded == body -async def test_copy(jp_fetch, contents, contents_dir): +async def test_copy(jp_fetch, contents, contents_dir, _check_created): path = 'å b' name = 'ç d.ipynb' copy = 'ç d-Copy1.ipynb' @@ -476,7 +477,7 @@ async def test_copy(jp_fetch, contents, contents_dir): _check_created(r, str(contents_dir), path, copy3, type='notebook') -async def test_copy_path(jp_fetch, contents, contents_dir): +async def test_copy_path(jp_fetch, contents, contents_dir, _check_created): path1 = 'foo' path2 = 'å b' name = 'a.ipynb' @@ -496,7 +497,7 @@ async def test_copy_path(jp_fetch, contents, contents_dir): _check_created(r, str(contents_dir), path2, copy, type='notebook') -async def test_copy_put_400(jp_fetch, contents, contents_dir): +async def test_copy_put_400(jp_fetch, contents, contents_dir, _check_created): with pytest.raises(tornado.httpclient.HTTPClientError) as e: await jp_fetch( 'api', 'contents', 'å b/cøpy.ipynb', @@ -506,7 +507,7 @@ async def test_copy_put_400(jp_fetch, contents, contents_dir): assert expected_http_error(e, 400) -async def test_copy_dir_400(jp_fetch, contents, contents_dir): +async def test_copy_dir_400(jp_fetch, contents, contents_dir, _check_created): with pytest.raises(tornado.httpclient.HTTPClientError) as e: await jp_fetch( 'api', 'contents', 'foo', @@ -517,7 +518,7 @@ async def test_copy_dir_400(jp_fetch, contents, contents_dir): @pytest.mark.parametrize('path,name', dirs) -async def test_delete(jp_fetch, contents, contents_dir, path, name): +async def test_delete(jp_fetch, contents, contents_dir, path, name, _check_created): nbname = name+'.ipynb' nbpath = (path + '/' + nbname).lstrip('/') r = await jp_fetch( @@ -567,7 +568,7 @@ async def test_delete_non_empty_dir(jp_fetch, contents): assert expected_http_error(e, 404) -async def test_rename(jp_fetch, contents, contents_dir): +async def test_rename(jp_fetch, jp_base_url, contents, contents_dir): path = 'foo' name = 'a.ipynb' new_name = 'z.ipynb' @@ -579,7 +580,7 @@ async def test_rename(jp_fetch, contents, contents_dir): ) fpath = path+'/'+new_name assert r.code == 200 - location = '/api/contents/' + fpath + location = url_path_join(jp_base_url, 'api/contents/', fpath) assert r.headers['Location'] == location model = json.loads(r.body.decode()) assert model['name'] == new_name diff --git a/tests/services/kernels/test_api.py b/tests/services/kernels/test_api.py index 712ea85518..bc2ade403d 100644 --- a/tests/services/kernels/test_api.py +++ b/tests/services/kernels/test_api.py @@ -31,18 +31,18 @@ async def test_no_kernels(jp_fetch): assert kernels == [] -async def test_default_kernels(jp_fetch): +async def test_default_kernels(jp_fetch, jp_base_url): r = await jp_fetch( 'api', 'kernels', method='POST', allow_nonstandard_methods=True ) kernel = json.loads(r.body.decode()) - assert r.headers['location'] == '/api/kernels/' + kernel['id'] + assert r.headers['location'] == url_path_join(jp_base_url, '/api/kernels/', kernel['id']) assert r.code == 201 assert isinstance(kernel, dict) - report_uri = '/api/security/csp-report' + report_uri = url_path_join(jp_base_url, '/api/security/csp-report') expected_csp = '; '.join([ "frame-ancestors 'self'", 'report-uri ' + report_uri, @@ -51,7 +51,7 @@ async def test_default_kernels(jp_fetch): assert r.headers['Content-Security-Policy'] == expected_csp -async def test_main_kernel_handler(jp_fetch): +async def test_main_kernel_handler(jp_fetch, jp_base_url): # Start the first kernel r = await jp_fetch( 'api', 'kernels', @@ -61,11 +61,11 @@ async def test_main_kernel_handler(jp_fetch): }) ) kernel1 = json.loads(r.body.decode()) - assert r.headers['location'] == '/api/kernels/' + kernel1['id'] + assert r.headers['location'] == url_path_join(jp_base_url, '/api/kernels/', kernel1['id']) assert r.code == 201 assert isinstance(kernel1, dict) - report_uri = '/api/security/csp-report' + report_uri = url_path_join(jp_base_url, '/api/security/csp-report') expected_csp = '; '.join([ "frame-ancestors 'self'", 'report-uri ' + report_uri, diff --git a/tests/services/kernelspecs/test_api.py b/tests/services/kernelspecs/test_api.py index e95e57f639..f5cc3a9a07 100644 --- a/tests/services/kernelspecs/test_api.py +++ b/tests/services/kernelspecs/test_api.py @@ -3,11 +3,8 @@ import tornado -from pytest_jupyter.jupyter_server import some_resource - from jupyter_client.kernelspec import NATIVE_KERNEL_NAME - -from ...utils import expected_http_error +from ...utils import expected_http_error, some_resource async def test_list_kernelspecs_bad(jp_fetch, jp_kernelspecs, jp_data_dir): diff --git a/tests/services/sessions/test_api.py b/tests/services/sessions/test_api.py index 6ffd01354b..065e392a6d 100644 --- a/tests/services/sessions/test_api.py +++ b/tests/services/sessions/test_api.py @@ -10,6 +10,8 @@ from nbformat import writes from ...utils import expected_http_error +from jupyter_server.utils import url_path_join + j = lambda r: json.loads(r.body.decode()) @@ -148,7 +150,7 @@ def assert_session_equality(actual, expected): assert_kernel_equality(actual['kernel'], expected['kernel']) -async def test_create(session_client): +async def test_create(session_client, jp_base_url): # Make sure no sessions exist. resp = await session_client.list() sessions = j(resp) @@ -161,7 +163,7 @@ async def test_create(session_client): assert 'id' in new_session assert new_session['path'] == 'foo/nb1.ipynb' assert new_session['type'] == 'notebook' - assert resp.headers['Location'] == '/api/sessions/' + new_session['id'] + assert resp.headers['Location'] == url_path_join(jp_base_url, '/api/sessions/', new_session['id']) # Check that the new session appears in list. resp = await session_client.list() @@ -209,7 +211,7 @@ async def test_create_deprecated(session_client): await session_client.cleanup() -async def test_create_with_kernel_id(session_client, jp_fetch): +async def test_create_with_kernel_id(session_client, jp_fetch, jp_base_url): # create a new kernel resp = await jp_fetch('api/kernels', method='POST', allow_nonstandard_methods=True) kernel = j(resp) @@ -220,7 +222,7 @@ async def test_create_with_kernel_id(session_client, jp_fetch): assert 'id' in new_session assert new_session['path'] == 'foo/nb1.ipynb' assert new_session['kernel']['id'] == kernel['id'] - assert resp.headers['Location'] == '/api/sessions/{0}'.format(new_session['id']) + assert resp.headers['Location'] == url_path_join(jp_base_url, '/api/sessions/{0}'.format(new_session['id'])) resp = await session_client.list() sessions = j(resp) diff --git a/tests/test_paths.py b/tests/test_paths.py index 155426d060..60c2951a13 100644 --- a/tests/test_paths.py +++ b/tests/test_paths.py @@ -2,7 +2,7 @@ import pytest import tornado from jupyter_server.base.handlers import path_regex - +from jupyter_server.utils import url_path_join # build regexps that tornado uses: path_pat = re.compile('^' + '/x%s' % path_regex + '$') @@ -45,7 +45,7 @@ async def test_trailing_slash(jp_ensure_app_fixture, uri, expected, http_server_ # http_server_client raises an exception when follow_redirects=False with pytest.raises(tornado.httpclient.HTTPClientError) as err: await http_server_client.fetch( - uri, + url_path_join(jp_base_url, uri), headers=jp_auth_header, request_timeout=20, follow_redirects=False @@ -54,4 +54,4 @@ async def test_trailing_slash(jp_ensure_app_fixture, uri, expected, http_server_ response = err.value.response assert response.code == 302 assert "Location" in response.headers - assert response.headers["Location"] == expected + assert response.headers["Location"] == url_path_join(jp_base_url, expected) diff --git a/tests/utils.py b/tests/utils.py index 819d62aa5e..8e7376897c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,6 +1,13 @@ import json import tornado +some_resource = u"The very model of a modern major general" + +sample_kernel_json = { + 'argv':['cat', '{connection_file}'], + 'display_name': 'Test kernel', +} + def mkdir(tmp_path, *parts): path = tmp_path.joinpath(*parts) From 1cfc2110bb529220ce8aacfc78acd39b62e178ae Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 30 Oct 2020 08:54:12 -0600 Subject: [PATCH 28/68] [DRAFT] Asynchronous Contents API --- .../services/contents/checkpoints.py | 51 +++ jupyter_server/services/contents/manager.py | 303 ++++++++++++++++-- tests/services/contents/test_config.py | 12 +- 3 files changed, 346 insertions(+), 20 deletions(-) diff --git a/jupyter_server/services/contents/checkpoints.py b/jupyter_server/services/contents/checkpoints.py index c29a669c22..16460d5200 100644 --- a/jupyter_server/services/contents/checkpoints.py +++ b/jupyter_server/services/contents/checkpoints.py @@ -140,3 +140,54 @@ def get_notebook_checkpoint(self, checkpoint_id, path): } """ raise NotImplementedError("must be implemented in a subclass") + + +class AsyncCheckpoints(Checkpoints): + """ + Base class for managing checkpoints for a ContentsManager asynchronously. + """ + + async def rename_all_checkpoints(self, old_path, new_path): + """Rename all checkpoints for old_path to new_path.""" + for cp in (await self.list_checkpoints(old_path)): + await self.rename_checkpoint(cp['id'], old_path, new_path) + + async def delete_all_checkpoints(self, path): + """Delete all checkpoints for the given path.""" + for checkpoint in (await self.list_checkpoints(path)): + await self.delete_checkpoint(checkpoint['id'], path) + + +class AsyncGenericCheckpointsMixin(GenericCheckpointsMixin): + """ + Helper for creating Asynchronous Checkpoints subclasses that can be used with any + ContentsManager. + """ + + async def create_checkpoint(self, contents_mgr, path): + model = await contents_mgr.get(path, content=True) + type = model['type'] + if type == 'notebook': + return await self.create_notebook_checkpoint( + model['content'], + path, + ) + elif type == 'file': + return await self.create_file_checkpoint( + model['content'], + model['format'], + path, + ) + else: + raise HTTPError(500, u'Unexpected type %s' % type) + + async def restore_checkpoint(self, contents_mgr, checkpoint_id, path): + """Restore a checkpoint.""" + type = await contents_mgr.get(path, content=False)['type'] + if type == 'notebook': + model = await self.get_notebook_checkpoint(checkpoint_id, path) + elif type == 'file': + model = await self.get_file_checkpoint(checkpoint_id, path) + else: + raise HTTPError(500, u'Unexpected type %s' % type) + await contents_mgr.save(model, path) diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 28ac63fcce..0aaf69a982 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -12,7 +12,7 @@ from tornado.web import HTTPError, RequestHandler from ...files.handlers import FilesHandler -from .checkpoints import Checkpoints +from .checkpoints import Checkpoints, AsyncCheckpoints from traitlets.config.configurable import LoggingConfigurable from nbformat import sign, validate as validate_nb, ValidationError from nbformat.v4 import new_notebook @@ -334,7 +334,7 @@ def increment_filename(self, filename, path='', insert=''): basename, dot, ext = filename.rpartition('.') if ext != 'ipynb': basename, dot, ext = filename.partition('.') - + suffix = dot + ext for i in itertools.count(): @@ -357,29 +357,29 @@ def validate_notebook_model(self, model): e.message, json.dumps(e.instance, indent=1, default=lambda obj: ''), ) return model - + def new_untitled(self, path='', type='', ext=''): """Create a new untitled file or directory in path - + path must be a directory - + File extension can be specified. - + Use `new` to create files with a fully specified path (including filename). """ path = path.strip('/') if not self.dir_exists(path): raise HTTPError(404, 'No such directory: %s' % path) - + model = {} if type: model['type'] = type - + if ext == '.ipynb': model.setdefault('type', 'notebook') else: model.setdefault('type', 'file') - + insert = '' if model['type'] == 'directory': untitled = self.untitled_directory @@ -391,25 +391,25 @@ def new_untitled(self, path='', type='', ext=''): untitled = self.untitled_file else: raise HTTPError(400, "Unexpected model type: %r" % model['type']) - + name = self.increment_filename(untitled + ext, path, insert=insert) path = u'{0}/{1}'.format(path, name) return self.new(model, path) - + def new(self, model=None, path=''): """Create a new file or directory and return its model with no content. - + To create a new untitled entity in a directory, use `new_untitled`. """ path = path.strip('/') if model is None: model = {} - + if path.endswith('.ipynb'): model.setdefault('type', 'notebook') else: model.setdefault('type', 'file') - + # no content, not a directory, so fill out new-file model if 'content' not in model and model['type'] != 'directory': if model['type'] == 'notebook': @@ -419,7 +419,7 @@ def new(self, model=None, path=''): model['content'] = '' model['type'] = 'file' model['format'] = 'text' - + model = self.save(model, path) return model @@ -429,7 +429,7 @@ def copy(self, from_path, to_path=None): If to_path not specified, it will be the parent directory of from_path. If to_path is a directory, filename will increment `from_path-Copy#.ext`. Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. - For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. from_path must be a full path to a file. """ @@ -442,20 +442,20 @@ def copy(self, from_path, to_path=None): else: from_dir = '' from_name = path - + model = self.get(path) model.pop('path', None) model.pop('name', None) if model['type'] == 'directory': raise HTTPError(400, "Can't copy directories") - + if to_path is None: to_path = from_dir if self.dir_exists(to_path): name = copy_pat.sub(u'.', from_name) to_name = self.increment_filename(name, to_path, insert='-Copy') to_path = u'{0}/{1}'.format(to_path, to_name) - + model = self.save(model, to_path) return model @@ -530,3 +530,268 @@ def list_checkpoints(self, path): def delete_checkpoint(self, checkpoint_id, path): return self.checkpoints.delete_checkpoint(checkpoint_id, path) + + +class AsyncContentsManager(ContentsManager): + """Base class for serving files and directories asynchronously.""" + + checkpoints_class = Type(AsyncCheckpoints, config=True) + checkpoints = Instance(AsyncCheckpoints, config=True) + + # ContentsManager API part 1: methods that must be + # implemented in subclasses. + + async def dir_exists(self, path): + """Does a directory exist at the given path? + + Like os.path.isdir + + Override this method in subclasses. + + Parameters + ---------- + path : string + The path to check + + Returns + ------- + exists : bool + Whether the path does indeed exist. + """ + raise NotImplementedError + + async def is_hidden(self, path): + """Is path a hidden directory or file? + + Parameters + ---------- + path : string + The path to check. This is an API path (`/` separated, + relative to root dir). + + Returns + ------- + hidden : bool + Whether the path is hidden. + + """ + raise NotImplementedError + + async def file_exists(self, path=''): + """Does a file exist at the given path? + + Like os.path.isfile + + Override this method in subclasses. + + Parameters + ---------- + path : string + The API path of a file to check for. + + Returns + ------- + exists : bool + Whether the file exists. + """ + raise NotImplementedError('must be implemented in a subclass') + + async def exists(self, path): + """Does a file or directory exist at the given path? + + Like os.path.exists + + Parameters + ---------- + path : string + The API path of a file or directory to check for. + + Returns + ------- + exists : bool + Whether the target exists. + """ + return await (self.file_exists(path) or self.dir_exists(path)) + + async def get(self, path, content=True, type=None, format=None): + """Get a file or directory model.""" + raise NotImplementedError('must be implemented in a subclass') + + async def save(self, model, path): + """ + Save a file or directory model to path. + + Should return the saved model with no content. Save implementations + should call self.run_pre_save_hook(model=model, path=path) prior to + writing any data. + """ + raise NotImplementedError('must be implemented in a subclass') + + async def delete_file(self, path): + """Delete the file or directory at path.""" + raise NotImplementedError('must be implemented in a subclass') + + async def rename_file(self, old_path, new_path): + """Rename a file or directory.""" + raise NotImplementedError('must be implemented in a subclass') + + # ContentsManager API part 2: methods that have useable default + # implementations, but can be overridden in subclasses. + + async def delete(self, path): + """Delete a file/directory and any associated checkpoints.""" + path = path.strip('/') + if not path: + raise HTTPError(400, "Can't delete root") + + await self.delete_file(path) + await self.checkpoints.delete_all_checkpoints(path) + + async def rename(self, old_path, new_path): + """Rename a file and any checkpoints associated with that file.""" + await self.rename_file(old_path, new_path) + await self.checkpoints.rename_all_checkpoints(old_path, new_path) + + async def update(self, model, path): + """Update the file's path + + For use in PATCH requests, to enable renaming a file without + re-uploading its contents. Only used for renaming at the moment. + """ + path = path.strip('/') + new_path = await model.get('path', path).strip('/') + if path != new_path: + await self.rename(path, new_path) + model = await self.get(new_path, content=False) + return model + + async def new_untitled(self, path='', type='', ext=''): + """Create a new untitled file or directory in path + + path must be a directory + + File extension can be specified. + + Use `new` to create files with a fully specified path (including filename). + """ + path = path.strip('/') + if not (await self.dir_exists(path)): + raise HTTPError(404, 'No such directory: %s' % path) + + model = {} + if type: + model['type'] = type + + if ext == '.ipynb': + model.setdefault('type', 'notebook') + else: + model.setdefault('type', 'file') + + insert = '' + if model['type'] == 'directory': + untitled = self.untitled_directory + insert = ' ' + elif model['type'] == 'notebook': + untitled = self.untitled_notebook + ext = '.ipynb' + elif model['type'] == 'file': + untitled = self.untitled_file + else: + raise HTTPError(400, "Unexpected model type: %r" % model['type']) + + name = self.increment_filename(untitled + ext, path, insert=insert) + path = u'{0}/{1}'.format(path, name) + return await self.new(model, path) + + async def new(self, model=None, path=''): + """Create a new file or directory and return its model with no content. + + To create a new untitled entity in a directory, use `new_untitled`. + """ + path = path.strip('/') + if model is None: + model = {} + + if path.endswith('.ipynb'): + model.setdefault('type', 'notebook') + else: + model.setdefault('type', 'file') + + # no content, not a directory, so fill out new-file model + if 'content' not in model and model['type'] != 'directory': + if model['type'] == 'notebook': + model['content'] = new_notebook() + model['format'] = 'json' + else: + model['content'] = '' + model['type'] = 'file' + model['format'] = 'text' + + model = await self.save(model, path) + return model + + async def copy(self, from_path, to_path=None): + """Copy an existing file and return its new model. + + If to_path not specified, it will be the parent directory of from_path. + If to_path is a directory, filename will increment `from_path-Copy#.ext`. + Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. + + from_path must be a full path to a file. + """ + path = from_path.strip('/') + if to_path is not None: + to_path = to_path.strip('/') + + if '/' in path: + from_dir, from_name = path.rsplit('/', 1) + else: + from_dir = '' + from_name = path + + model = await self.get(path) + model.pop('path', None) + model.pop('name', None) + if model['type'] == 'directory': + raise HTTPError(400, "Can't copy directories") + if to_path is None: + to_path = from_dir + if (await self.dir_exists(to_path)) : + name = copy_pat.sub(u'.', from_name) + to_name = self.increment_filename(name, to_path, insert='-Copy') + to_path = u'{0}/{1}'.format(to_path, to_name) + + model = await self.save(model, to_path) + return model + + async def trust_notebook(self, path): + """Explicitly trust a notebook + + Parameters + ---------- + path : string + The path of a notebook + """ + model = await self.get(path) + nb = model['content'] + self.log.warning("Trusting notebook %s", path) + self.notary.mark_cells(nb, True) + self.check_and_sign(nb, path) + + # Part 3: Checkpoints API + async def create_checkpoint(self, path): + """Create a checkpoint.""" + return await self.checkpoints.create_checkpoint(self, path) + + async def restore_checkpoint(self, checkpoint_id, path): + """ + Restore a checkpoint. + """ + await self.checkpoints.restore_checkpoint(self, checkpoint_id, path) + + async def list_checkpoints(self, path): + return await self.checkpoints.list_checkpoints(path) + + async def delete_checkpoint(self, checkpoint_id, path): + return await self.checkpoints.delete_checkpoint(checkpoint_id, path) diff --git a/tests/services/contents/test_config.py b/tests/services/contents/test_config.py index 9b4c862dea..3625f781b4 100644 --- a/tests/services/contents/test_config.py +++ b/tests/services/contents/test_config.py @@ -1,7 +1,9 @@ import pytest from traitlets.config import Config +from jupyter_server.services.contents.checkpoints import AsyncCheckpoints from jupyter_server.services.contents.filecheckpoints import GenericFileCheckpoints +from jupyter_server.services.contents.manager import AsyncContentsManager @pytest.fixture @@ -10,4 +12,12 @@ def jp_server_config(): def test_config_did_something(jp_serverapp): - assert isinstance(jp_serverapp.contents_manager.checkpoints, GenericFileCheckpoints) \ No newline at end of file + assert isinstance(jp_serverapp.contents_manager.checkpoints, GenericFileCheckpoints) + + +async def test_async_contents_manager(jp_configurable_serverapp): + config = {'ContentsManager': {'checkpoints_class': AsyncCheckpoints}} + argv = ['--ServerApp.contents_manager_class=jupyter_server.services.contents.manager.AsyncContentsManager'] + app = jp_configurable_serverapp(config=config, argv=argv) + assert isinstance(app.contents_manager, AsyncContentsManager) + From ca60bcf84245b3ff2b672f5a2a0a3c84596296a0 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Thu, 12 Nov 2020 11:46:20 -0700 Subject: [PATCH 29/68] Add asynchronous support section --- docs/source/developers/contents.rst | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/source/developers/contents.rst b/docs/source/developers/contents.rst index cd0f7f7aaf..21472d91d1 100644 --- a/docs/source/developers/contents.rst +++ b/docs/source/developers/contents.rst @@ -192,19 +192,19 @@ methods: ContentsManager.is_hidden You may be required to specify a Checkpoints object, as the default one, -``FileCheckpoints``, could be incompatible with your custom +``FileCheckpoints``, could be incompatible with your custom ContentsManager. Customizing Checkpoints ----------------------- .. currentmodule:: jupyter_server.services.contents.checkpoints -Customized Checkpoint definitions allows behavior to be +Customized Checkpoint definitions allows behavior to be altered and extended. The ``Checkpoints`` and ``GenericCheckpointsMixin`` classes (from :mod:`jupyter_server.services.contents.checkpoints`) -have reusable code and are intended to be used together, +have reusable code and are intended to be used together, but require the following methods to be implemented. .. autosummary:: @@ -220,7 +220,7 @@ No-op example ~~~~~~~~~~~~~ Here is an example of a no-op checkpoints object - note the mixin -comes first. The docstrings indicate what each method should do or +comes first. The docstrings indicate what each method should do or return for a more complete implementation. .. code-block:: python @@ -238,7 +238,7 @@ return for a more complete implementation. def delete_checkpoint(self, checkpoint_id, path): """deletes a checkpoint for a file""" def list_checkpoints(self, path): - """returns a list of checkpoint models for a given file, + """returns a list of checkpoint models for a given file, default just does one per file """ return [] @@ -267,3 +267,13 @@ ContentsManager. .. _NBFormat: https://nbformat.readthedocs.io/en/latest/index.html .. _PGContents: https://github.com/quantopian/pgcontents .. _PostgreSQL: https://www.postgresql.org/ + +Asynchronous Support +~~~~~~~~~~~~~~~~~~~~ + +To execute file operations asynchronously in a virtual filesystem, the following classes are available. + +- :class:`~manager.AsyncContentsManager` +- :class:`~filemanager.AsyncFileContentsManager` +- :class:`~checkpoints.AsyncCheckpoints` + From 3a97a6ab0bd2e57c7ee663c91359e731a7942034 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Thu, 12 Nov 2020 11:46:49 -0700 Subject: [PATCH 30/68] Create AsyncFileContentsManager --- jupyter_server/serverapp.py | 4 +- .../services/contents/filecheckpoints.py | 69 +++- jupyter_server/services/contents/fileio.py | 117 ++++++- .../services/contents/filemanager.py | 305 +++++++++++++++++- jupyter_server/services/contents/handlers.py | 21 +- jupyter_server/services/contents/manager.py | 61 +++- setup.py | 1 + tests/services/contents/test_api.py | 8 + tests/services/contents/test_manager.py | 235 +++++++------- tests/services/kernels/test_api.py | 1 + 10 files changed, 679 insertions(+), 143 deletions(-) diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 6e6b9aafab..f6f203772e 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -68,8 +68,8 @@ from .log import log_request from .services.kernels.kernelmanager import MappingKernelManager, AsyncMappingKernelManager from .services.config import ConfigManager -from .services.contents.manager import ContentsManager -from .services.contents.filemanager import FileContentsManager +from .services.contents.manager import AsyncContentsManager, ContentsManager +from .services.contents.filemanager import AsyncFileContentsManager, FileContentsManager from .services.contents.largefilemanager import LargeFileManager from .services.sessions.sessionmanager import SessionManager from .gateway.managers import GatewayKernelManager, GatewayKernelSpecManager, GatewaySessionManager, GatewayClient diff --git a/jupyter_server/services/contents/filecheckpoints.py b/jupyter_server/services/contents/filecheckpoints.py index a8a795da65..5ddf8885e9 100644 --- a/jupyter_server/services/contents/filecheckpoints.py +++ b/jupyter_server/services/contents/filecheckpoints.py @@ -7,12 +7,15 @@ from tornado.web import HTTPError from .checkpoints import ( + AsyncCheckpoints, Checkpoints, GenericCheckpointsMixin, ) -from .fileio import FileManagerMixin +from .fileio import AsyncFileManagerMixin, FileManagerMixin +from anyio import run_sync_in_worker_thread from jupyter_core.utils import ensure_dir_exists +from jupyter_server.utils import ensure_async from ipython_genutils.py3compat import getcwd from traitlets import Unicode @@ -137,6 +140,70 @@ def no_such_checkpoint(self, path, checkpoint_id): ) +class AsyncFileCheckpoints(FileCheckpoints, AsyncFileManagerMixin, AsyncCheckpoints): + async def create_checkpoint(self, contents_mgr, path): + """Create a checkpoint.""" + checkpoint_id = u'checkpoint' + src_path = contents_mgr._get_os_path(path) + dest_path = self.checkpoint_path(checkpoint_id, path) + await self._copy(src_path, dest_path) + return (await self.checkpoint_model(checkpoint_id, dest_path)) + + async def restore_checkpoint(self, contents_mgr, checkpoint_id, path): + """Restore a checkpoint.""" + src_path = self.checkpoint_path(checkpoint_id, path) + dest_path = contents_mgr._get_os_path(path) + await self._copy(src_path, dest_path) + + async def checkpoint_model(self, checkpoint_id, os_path): + """construct the info dict for a given checkpoint""" + stats = await run_sync_in_worker_thread(os.stat, os_path) + last_modified = tz.utcfromtimestamp(stats.st_mtime) + info = dict( + id=checkpoint_id, + last_modified=last_modified, + ) + return info + + # ContentsManager-independent checkpoint API + async def rename_checkpoint(self, checkpoint_id, old_path, new_path): + """Rename a checkpoint from old_path to new_path.""" + old_cp_path = self.checkpoint_path(checkpoint_id, old_path) + new_cp_path = self.checkpoint_path(checkpoint_id, new_path) + if os.path.isfile(old_cp_path): + self.log.debug( + "Renaming checkpoint %s -> %s", + old_cp_path, + new_cp_path, + ) + with self.perm_to_403(): + await run_sync_in_worker_thread(shutil.move, old_cp_path, new_cp_path) + + async def delete_checkpoint(self, checkpoint_id, path): + """delete a file's checkpoint""" + path = path.strip('/') + cp_path = self.checkpoint_path(checkpoint_id, path) + if not os.path.isfile(cp_path): + self.no_such_checkpoint(path, checkpoint_id) + + self.log.debug("unlinking %s", cp_path) + with self.perm_to_403(): + await run_sync_in_worker_thread(os.unlink, cp_path) + + async def list_checkpoints(self, path): + """list the checkpoints for a given file + + This contents manager currently only supports one checkpoint per file. + """ + path = path.strip('/') + checkpoint_id = "checkpoint" + os_path = self.checkpoint_path(checkpoint_id, path) + if not os.path.isfile(os_path): + return [] + else: + return [await self.checkpoint_model(checkpoint_id, os_path)] + + class GenericFileCheckpoints(GenericCheckpointsMixin, FileCheckpoints): """ Local filesystem Checkpoints that works with any conforming diff --git a/jupyter_server/services/contents/fileio.py b/jupyter_server/services/contents/fileio.py index d1fdaefa2a..91e095729e 100644 --- a/jupyter_server/services/contents/fileio.py +++ b/jupyter_server/services/contents/fileio.py @@ -7,10 +7,12 @@ from contextlib import contextmanager import errno +from functools import partial import io import os import shutil +from anyio import open_file, run_sync_in_worker_thread from tornado.web import HTTPError from jupyter_server.utils import ( @@ -32,6 +34,11 @@ def replace_file(src, dst): """ os.replace(src, dst) +async def async_replace_file(src, dst): + """ replace dst with src asynchronously + """ + await run_sync_in_worker_thread(os.replace, src, dst) + def copy2_safe(src, dst, log=None): """copy src to dst @@ -44,6 +51,18 @@ def copy2_safe(src, dst, log=None): if log: log.debug("copystat on %s failed", dst, exc_info=True) +async def async_copy2_safe(src, dst, log=None): + """copy src to dst asynchronously + + like shutil.copy2, but log errors in copystat instead of raising + """ + await run_sync_in_worker_thread(shutil.copyfile, src, dst) + try: + await run_sync_in_worker_thread(shutil.copystat, src, dst) + except OSError: + if log: + log.debug("copystat on %s failed", dst, exc_info=True) + def path_to_intermediate(path): '''Name of the intermediate file used in atomic writes. @@ -116,11 +135,10 @@ def atomic_writing(path, text=True, encoding='utf-8', log=None, **kwargs): os.remove(tmp_path) - @contextmanager def _simple_writing(path, text=True, encoding='utf-8', log=None, **kwargs): """Context manager to write file without doing atomic writing - ( for weird filesystem eg: nfs). + (for weird filesystem eg: nfs). Parameters ---------- @@ -159,8 +177,6 @@ def _simple_writing(path, text=True, encoding='utf-8', log=None, **kwargs): fileobj.close() - - class FileManagerMixin(Configurable): """ Mixin for ContentsAPI classes that interact with the filesystem. @@ -186,7 +202,7 @@ class FileManagerMixin(Configurable): @contextmanager def open(self, os_path, *args, **kwargs): - """wrapper around io.open that turns permission errors into 403""" + """wrapper around open that turns permission errors into 403""" with self.perm_to_403(os_path): with io.open(os_path, *args, **kwargs) as f: yield f @@ -330,3 +346,94 @@ def _save_file(self, os_path, content, format): with self.atomic_writing(os_path, text=False) as f: f.write(bcontent) + +class AsyncFileManagerMixin(FileManagerMixin): + """ + Mixin for ContentsAPI classes that interact with the filesystem asynchronously. + """ + async def _copy(self, src, dest): + """copy src to dest + + like shutil.copy2, but log errors in copystat + """ + await async_copy2_safe(src, dest, log=self.log) + + async def _read_notebook(self, os_path, as_version=4): + """Read a notebook from an os path.""" + with self.open(os_path, 'r', encoding='utf-8') as f: + try: + return await run_sync_in_worker_thread(partial(nbformat.read, as_version=as_version), f) + except Exception as e: + e_orig = e + + # If use_atomic_writing is enabled, we'll guess that it was also + # enabled when this notebook was written and look for a valid + # atomic intermediate. + tmp_path = path_to_intermediate(os_path) + + if not self.use_atomic_writing or not os.path.exists(tmp_path): + raise HTTPError( + 400, + u"Unreadable Notebook: %s %r" % (os_path, e_orig), + ) + + # Move the bad file aside, restore the intermediate, and try again. + invalid_file = path_to_invalid(os_path) + async_replace_file(os_path, invalid_file) + async_replace_file(tmp_path, os_path) + return await self._read_notebook(os_path, as_version) + + async def _save_notebook(self, os_path, nb): + """Save a notebook to an os_path.""" + with self.atomic_writing(os_path, encoding='utf-8') as f: + await run_sync_in_worker_thread(partial(nbformat.write, version=nbformat.NO_CONVERT), nb, f) + + async def _read_file(self, os_path, format): + """Read a non-notebook file. + + os_path: The path to be read. + format: + If 'text', the contents will be decoded as UTF-8. + If 'base64', the raw bytes contents will be encoded as base64. + If not specified, try to decode as UTF-8, and fall back to base64 + """ + if not os.path.isfile(os_path): + raise HTTPError(400, "Cannot read non-file %s" % os_path) + + with self.open(os_path, 'rb') as f: + bcontent = await run_sync_in_worker_thread(f.read) + + if format is None or format == 'text': + # Try to interpret as unicode if format is unknown or if unicode + # was explicitly requested. + try: + return bcontent.decode('utf8'), 'text' + except UnicodeError as e: + if format == 'text': + raise HTTPError( + 400, + "%s is not UTF-8 encoded" % os_path, + reason='bad format', + ) from e + return encodebytes(bcontent).decode('ascii'), 'base64' + + async def _save_file(self, os_path, content, format): + """Save content of a generic file.""" + if format not in {'text', 'base64'}: + raise HTTPError( + 400, + "Must specify format of file contents as 'text' or 'base64'", + ) + try: + if format == 'text': + bcontent = content.encode('utf8') + else: + b64_bytes = content.encode('ascii') + bcontent = decodebytes(b64_bytes) + except Exception as e: + raise HTTPError( + 400, u'Encoding error saving %s: %s' % (os_path, e) + ) from e + + with self.atomic_writing(os_path, text=False) as f: + await run_sync_in_worker_thread(f.write, bcontent) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 32bc0389c9..a46f0b64b0 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -14,12 +14,13 @@ import mimetypes import nbformat +from anyio import run_sync_in_worker_thread from send2trash import send2trash from tornado import web -from .filecheckpoints import FileCheckpoints -from .fileio import FileManagerMixin -from .manager import ContentsManager +from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints +from .fileio import AsyncFileManagerMixin, FileManagerMixin +from .manager import AsyncContentsManager, ContentsManager from ...utils import exists from ipython_genutils.importstring import import_item @@ -548,3 +549,301 @@ def get_kernel_path(self, path, model=None): parent_dir = '' return parent_dir +class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager): + @default('checkpoints_class') + def _checkpoints_class_default(self): + return AsyncFileCheckpoints + + async def _dir_model(self, path, content=True): + """Build a model for a directory + + if content is requested, will include a listing of the directory + """ + os_path = self._get_os_path(path) + + four_o_four = u'directory does not exist: %r' % path + + if not os.path.isdir(os_path): + raise web.HTTPError(404, four_o_four) + elif is_hidden(os_path, self.root_dir) and not self.allow_hidden: + self.log.info("Refusing to serve hidden directory %r, via 404 Error", + os_path + ) + raise web.HTTPError(404, four_o_four) + + model = self._base_model(path) + model['type'] = 'directory' + model['size'] = None + if content: + model['content'] = contents = [] + os_dir = self._get_os_path(path) + dir_contents = await run_sync_in_worker_thread(os.listdir, os_dir) + for name in dir_contents: + try: + os_path = os.path.join(os_dir, name) + except UnicodeDecodeError as e: + self.log.warning( + "failed to decode filename '%s': %s", name, e) + continue + + try: + st = await run_sync_in_worker_thread(os.lstat, os_path) + except OSError as e: + # skip over broken symlinks in listing + if e.errno == errno.ENOENT: + self.log.warning("%s doesn't exist", os_path) + else: + self.log.warning("Error stat-ing %s: %s", os_path, e) + continue + + if (not stat.S_ISLNK(st.st_mode) + and not stat.S_ISREG(st.st_mode) + and not stat.S_ISDIR(st.st_mode)): + self.log.debug("%s not a regular file", os_path) + continue + + if self.should_list(name): + if self.allow_hidden or not is_file_hidden(os_path, stat_res=st): + contents.append( + await self.get(path='%s/%s' % (path, name), content=False) + ) + + model['format'] = 'json' + + return model + + async def _file_model(self, path, content=True, format=None): + """Build a model for a file + + if content is requested, include the file contents. + + format: + If 'text', the contents will be decoded as UTF-8. + If 'base64', the raw bytes contents will be encoded as base64. + If not specified, try to decode as UTF-8, and fall back to base64 + """ + model = self._base_model(path) + model['type'] = 'file' + + os_path = self._get_os_path(path) + model['mimetype'] = mimetypes.guess_type(os_path)[0] + + if content: + content, format = await self._read_file(os_path, format) + if model['mimetype'] is None: + default_mime = { + 'text': 'text/plain', + 'base64': 'application/octet-stream' + }[format] + model['mimetype'] = default_mime + + model.update( + content=content, + format=format, + ) + + return model + + async def _notebook_model(self, path, content=True): + """Build a notebook model + + if content is requested, the notebook content will be populated + as a JSON structure (not double-serialized) + """ + model = self._base_model(path) + model['type'] = 'notebook' + os_path = self._get_os_path(path) + + if content: + nb = await self._read_notebook(os_path, as_version=4) + self.mark_trusted_cells(nb, path) + model['content'] = nb + model['format'] = 'json' + self.validate_notebook_model(model) + + return model + + async def get(self, path, content=True, type=None, format=None): + """ Takes a path for an entity and returns its model + + Parameters + ---------- + path : str + the API path that describes the relative path for the target + content : bool + Whether to include the contents in the reply + type : str, optional + The requested type - 'file', 'notebook', or 'directory'. + Will raise HTTPError 400 if the content doesn't match. + format : str, optional + The requested format for file contents. 'text' or 'base64'. + Ignored if this returns a notebook or directory model. + + Returns + ------- + model : dict + the contents model. If content=True, returns the contents + of the file or directory as well. + """ + path = path.strip('/') + + if not self.exists(path): + raise web.HTTPError(404, u'No such file or directory: %s' % path) + + os_path = self._get_os_path(path) + if os.path.isdir(os_path): + if type not in (None, 'directory'): + raise web.HTTPError(400, + u'%s is a directory, not a %s' % (path, type), reason='bad type') + model = await self._dir_model(path, content=content) + elif type == 'notebook' or (type is None and path.endswith('.ipynb')): + model = await self._notebook_model(path, content=content) + else: + if type == 'directory': + raise web.HTTPError(400, + u'%s is not a directory' % path, reason='bad type') + model = await self._file_model(path, content=content, format=format) + return model + + async def _save_directory(self, os_path, model, path=''): + """create a directory""" + if is_hidden(os_path, self.root_dir) and not self.allow_hidden: + raise web.HTTPError(400, u'Cannot create hidden directory %r' % os_path) + if not os.path.exists(os_path): + with self.perm_to_403(): + await run_sync_in_worker_thread(os.mkdir, os_path) + elif not os.path.isdir(os_path): + raise web.HTTPError(400, u'Not a directory: %s' % (os_path)) + else: + self.log.debug("Directory %r already exists", os_path) + + async def save(self, model, path=''): + """Save the file model and return the model with no content.""" + path = path.strip('/') + + if 'type' not in model: + raise web.HTTPError(400, u'No file type provided') + if 'content' not in model and model['type'] != 'directory': + raise web.HTTPError(400, u'No file content provided') + + os_path = self._get_os_path(path) + self.log.debug("Saving %s", os_path) + + self.run_pre_save_hook(model=model, path=path) + + try: + if model['type'] == 'notebook': + nb = nbformat.from_dict(model['content']) + self.check_and_sign(nb, path) + await self._save_notebook(os_path, nb) + # One checkpoint should always exist for notebooks. + if not (await self.checkpoints.list_checkpoints(path)): + await self.create_checkpoint(path) + elif model['type'] == 'file': + # Missing format will be handled internally by _save_file. + await self._save_file(os_path, model['content'], model.get('format')) + elif model['type'] == 'directory': + await self._save_directory(os_path, model, path) + else: + raise web.HTTPError(400, "Unhandled contents type: %s" % model['type']) + except web.HTTPError: + raise + except Exception as e: + self.log.error(u'Error while saving file: %s %s', path, e, exc_info=True) + raise web.HTTPError(500, u'Unexpected error while saving file: %s %s' + % (path, e)) from e + + validation_message = None + if model['type'] == 'notebook': + self.validate_notebook_model(model) + validation_message = model.get('message', None) + + model = await self.get(path, content=False) + if validation_message: + model['message'] = validation_message + + self.run_post_save_hook(model=model, os_path=os_path) + + return model + + async def delete_file(self, path): + """Delete file at path.""" + path = path.strip('/') + os_path = self._get_os_path(path) + rm = os.unlink + if not os.path.exists(os_path): + raise web.HTTPError(404, u'File or directory does not exist: %s' % os_path) + + async def _check_trash(os_path): + if sys.platform in {'win32', 'darwin'}: + return True + + # It's a bit more nuanced than this, but until we can better + # distinguish errors from send2trash, assume that we can only trash + # files on the same partition as the home directory. + file_dev = (await run_sync_in_worker_thread(os.stat, os_path)).st_dev + home_dev = (await run_sync_in_worker_thread(os.stat, os.path.expanduser('~'))).st_dev + return file_dev == home_dev + + async def is_non_empty_dir(os_path): + if os.path.isdir(os_path): + # A directory containing only leftover checkpoints is + # considered empty. + cp_dir = getattr(self.checkpoints, 'checkpoint_dir', None) + dir_contents = set(await run_sync_in_worker_thread(os.listdir, os_path)) + if dir_contents - {cp_dir}: + return True + + return False + + if self.delete_to_trash: + if sys.platform == 'win32' and await is_non_empty_dir(os_path): + # send2trash can really delete files on Windows, so disallow + # deleting non-empty files. See Github issue 3631. + raise web.HTTPError(400, u'Directory %s not empty' % os_path) + if await _check_trash(os_path): + self.log.debug("Sending %s to trash", os_path) + # Looking at the code in send2trash, I don't think the errors it + # raises let us distinguish permission errors from other errors in + # code. So for now, just let them all get logged as server errors. + send2trash(os_path) + return + else: + self.log.warning("Skipping trash for %s, on different device " + "to home directory", os_path) + + if os.path.isdir(os_path): + # Don't permanently delete non-empty directories. + if await is_non_empty_dir(os_path): + raise web.HTTPError(400, u'Directory %s not empty' % os_path) + self.log.debug("Removing directory %s", os_path) + with self.perm_to_403(): + await run_sync_in_worker_thread(shutil.rmtree, os_path) + else: + self.log.debug("Unlinking file %s", os_path) + with self.perm_to_403(): + await run_sync_in_worker_thread(rm, os_path) + + async def rename_file(self, old_path, new_path): + """Rename a file.""" + old_path = old_path.strip('/') + new_path = new_path.strip('/') + if new_path == old_path: + return + + new_os_path = self._get_os_path(new_path) + old_os_path = self._get_os_path(old_path) + + # Should we proceed with the move? + if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path): + raise web.HTTPError(409, u'File already exists: %s' % new_path) + + # Move the file + try: + with self.perm_to_403(): + await run_sync_in_worker_thread(shutil.move, old_os_path, new_os_path) + except web.HTTPError: + raise + except Exception as e: + raise web.HTTPError(500, u'Unknown error renaming file: %s %s' % + (old_path, e)) from e \ No newline at end of file diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 53aff09078..70e11366bf 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -120,7 +120,7 @@ async def patch(self, path=''): model = self.get_json_body() if model is None: raise web.HTTPError(400, u'JSON body missing') - model = cm.update(model, path) + model = await ensure_async(cm.update(model, path)) validate_model(model, expect_content=False) self._finish_model(model) @@ -130,7 +130,7 @@ async def _copy(self, copy_from, copy_to=None): copy_from=copy_from, copy_to=copy_to or '', )) - model = self.contents_manager.copy(copy_from, copy_to) + model = await ensure_async(self.contents_manager.copy(copy_from, copy_to)) self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) @@ -138,7 +138,7 @@ async def _copy(self, copy_from, copy_to=None): async def _upload(self, model, path): """Handle upload of a new file to path""" self.log.info(u"Uploading file to %s", path) - model = self.contents_manager.new(model, path) + model = await ensure_async(self.contents_manager.new(model, path)) self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) @@ -146,7 +146,8 @@ async def _upload(self, model, path): async def _new_untitled(self, path, type='', ext=''): """Create a new, empty untitled entity""" self.log.info(u"Creating new %s in %s", type or 'file', path) - model = self.contents_manager.new_untitled(path=path, type=type, ext=ext) + model = await ensure_async(self.contents_manager.new_untitled( + path=path, type=type, ext=ext)) self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) @@ -156,7 +157,7 @@ async def _save(self, model, path): chunk = model.get("chunk", None) if not chunk or chunk == -1: # Avoid tedious log information self.log.info(u"Saving file at %s", path) - model = self.contents_manager.save(model, path) + model = await ensure_async(self.contents_manager.save(model, path)) validate_model(model, expect_content=False) self._finish_model(model) @@ -225,7 +226,7 @@ async def delete(self, path=''): """delete a file in the given path""" cm = self.contents_manager self.log.warning('delete %s', path) - cm.delete(path) + await ensure_async(cm.delete(path)) self.set_status(204) self.finish() @@ -236,7 +237,7 @@ class CheckpointsHandler(APIHandler): async def get(self, path=''): """get lists checkpoints for a file""" cm = self.contents_manager - checkpoints = cm.list_checkpoints(path) + checkpoints = await ensure_async(cm.list_checkpoints(path)) data = json.dumps(checkpoints, default=date_default) self.finish(data) @@ -244,7 +245,7 @@ async def get(self, path=''): async def post(self, path=''): """post creates a new checkpoint""" cm = self.contents_manager - checkpoint = cm.create_checkpoint(path) + checkpoint = await ensure_async(cm.create_checkpoint(path)) data = json.dumps(checkpoint, default=date_default) location = url_path_join(self.base_url, 'api/contents', url_escape(path), 'checkpoints', url_escape(checkpoint['id'])) @@ -259,7 +260,7 @@ class ModifyCheckpointsHandler(APIHandler): async def post(self, path, checkpoint_id): """post restores a file from a checkpoint""" cm = self.contents_manager - cm.restore_checkpoint(checkpoint_id, path) + await ensure_async(cm.restore_checkpoint(checkpoint_id, path)) self.set_status(204) self.finish() @@ -267,7 +268,7 @@ async def post(self, path, checkpoint_id): async def delete(self, path, checkpoint_id): """delete clears a checkpoint for a given file""" cm = self.contents_manager - cm.delete_checkpoint(checkpoint_id, path) + await ensure_async(cm.delete_checkpoint(checkpoint_id, path)) self.set_status(204) self.finish() diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 0aaf69a982..a7e3c61a06 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -32,6 +32,7 @@ from ipython_genutils.py3compat import string_types from jupyter_server.base.handlers import JupyterHandler from jupyter_server.transutils import _ +from jupyter_server.utils import ensure_async copy_pat = re.compile(r'\-Copy\d*\.') @@ -537,6 +538,18 @@ class AsyncContentsManager(ContentsManager): checkpoints_class = Type(AsyncCheckpoints, config=True) checkpoints = Instance(AsyncCheckpoints, config=True) + checkpoints_kwargs = Dict(config=True) + + @default('checkpoints') + def _default_checkpoints(self): + return self.checkpoints_class(**self.checkpoints_kwargs) + + @default('checkpoints_kwargs') + def _default_checkpoints_kwargs(self): + return dict( + parent=self, + log=self.log, + ) # ContentsManager API part 1: methods that must be # implemented in subclasses. @@ -659,12 +672,49 @@ async def update(self, model, path): re-uploading its contents. Only used for renaming at the moment. """ path = path.strip('/') - new_path = await model.get('path', path).strip('/') + new_path = model.get('path', path).strip('/') if path != new_path: await self.rename(path, new_path) model = await self.get(new_path, content=False) return model + async def increment_filename(self, filename, path='', insert=''): + """Increment a filename until it is unique. + + Parameters + ---------- + filename : unicode + The name of a file, including extension + path : unicode + The API path of the target's directory + insert: unicode + The characters to insert after the base filename + + Returns + ------- + name : unicode + A filename that is unique, based on the input filename. + """ + # Extract the full suffix from the filename (e.g. .tar.gz) + path = path.strip('/') + basename, dot, ext = filename.rpartition('.') + if ext != 'ipynb': + basename, dot, ext = filename.partition('.') + + suffix = dot + ext + + for i in itertools.count(): + if i: + insert_i = '{}{}'.format(insert, i) + else: + insert_i = '' + name = u'{basename}{insert}{suffix}'.format(basename=basename, + insert=insert_i, suffix=suffix) + file_exists = await ensure_async(self.exists(u'{}/{}'.format(path, name))) + if not file_exists: + break + return name + async def new_untitled(self, path='', type='', ext=''): """Create a new untitled file or directory in path @@ -675,7 +725,8 @@ async def new_untitled(self, path='', type='', ext=''): Use `new` to create files with a fully specified path (including filename). """ path = path.strip('/') - if not (await self.dir_exists(path)): + dir_exists = await ensure_async(self.dir_exists(path)) + if not dir_exists: raise HTTPError(404, 'No such directory: %s' % path) model = {} @@ -699,7 +750,7 @@ async def new_untitled(self, path='', type='', ext=''): else: raise HTTPError(400, "Unexpected model type: %r" % model['type']) - name = self.increment_filename(untitled + ext, path, insert=insert) + name = await self.increment_filename(untitled + ext, path, insert=insert) path = u'{0}/{1}'.format(path, name) return await self.new(model, path) @@ -757,9 +808,9 @@ async def copy(self, from_path, to_path=None): raise HTTPError(400, "Can't copy directories") if to_path is None: to_path = from_dir - if (await self.dir_exists(to_path)) : + if self.dir_exists(to_path): name = copy_pat.sub(u'.', from_name) - to_name = self.increment_filename(name, to_path, insert='-Copy') + to_name = await self.increment_filename(name, to_path, insert='-Copy') to_path = u'{0}/{1}'.format(to_path, to_name) model = await self.save(model, to_path) diff --git a/setup.py b/setup.py index 063764d2ae..96279432ba 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,7 @@ 'terminado>=0.8.3', 'prometheus_client', "pywin32>=1.0 ; sys_platform == 'win32'", + "anyio>=2.0.2", ], extras_require = { 'test': ['coverage', 'requests', diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index fea04db7b4..623c2e15e6 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -12,6 +12,7 @@ ) from jupyter_server.utils import url_path_join +from jupyter_server.services.contents.filecheckpoints import AsyncFileCheckpoints, FileCheckpoints from base64 import encodebytes, decodebytes @@ -41,6 +42,13 @@ def dirs_only(dir_model): ] +@pytest.fixture(params=["FileContentsManager", "AsyncFileContentsManager"]) +def argv(request): + if request.param == "AsyncFileContentsManager" and sys.version_info < (3, 6): + pytest.skip("Kernel manager is AsyncFileContentsManager, Python version < 3.6") + return ["--ServerApp.contents_manager_class=jupyter_server.services.contents.filemanager." + request.param] + + @pytest.fixture def contents_dir(tmp_path, jp_serverapp): return tmp_path / jp_serverapp.root_dir diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index 4d3d04f8f0..7357611d9f 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -10,7 +10,8 @@ from nbformat import v4 as nbformat -from jupyter_server.services.contents.filemanager import FileContentsManager +from jupyter_server.services.contents.filecheckpoints import AsyncFileCheckpoints, FileCheckpoints +from jupyter_server.utils import ensure_async from ...utils import expected_http_error # -------------- Functions ---------------------------- @@ -43,30 +44,30 @@ def add_code_cell(notebook): notebook.cells.append(cell) -def new_notebook(jp_contents_manager): +async def new_notebook(jp_contents_manager): cm = jp_contents_manager - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) name = model['name'] path = model['path'] - full_model = cm.get(path) + full_model = await ensure_async(cm.get(path)) nb = full_model['content'] nb['metadata']['counter'] = int(1e6 * time.time()) add_code_cell(nb) - cm.save(full_model, path) + await ensure_async(cm.save(full_model, path)) return nb, name, path -def make_populated_dir(jp_contents_manager, api_path): +async def make_populated_dir(jp_contents_manager, api_path): cm = jp_contents_manager _make_dir(cm, api_path) - cm.new(path="/".join([api_path, "nb.ipynb"])) - cm.new(path="/".join([api_path, "file.txt"])) + await ensure_async(cm.new(path="/".join([api_path, "nb.ipynb"]))) + await ensure_async(cm.new(path="/".join([api_path, "file.txt"]))) -def check_populated_dir_files(jp_contents_manager, api_path): - dir_model = jp_contents_manager.get(api_path) +async def check_populated_dir_files(jp_contents_manager, api_path): + dir_model = await ensure_async(jp_contents_manager.get(api_path)) assert dir_model['path'] == api_path assert dir_model['type'] == "directory" @@ -85,45 +86,45 @@ def check_populated_dir_files(jp_contents_manager, api_path): # ----------------- Tests ---------------------------------- -def test_root_dir(tmp_path): - fm = FileContentsManager(root_dir=str(tmp_path)) +def test_root_dir(file_contents_manager_class, tmp_path): + fm = file_contents_manager_class(root_dir=str(tmp_path)) assert fm.root_dir == str(tmp_path) -def test_missing_root_dir(tmp_path): +def test_missing_root_dir(file_contents_manager_class, tmp_path): root = tmp_path / 'notebook' / 'dir' / 'is' / 'missing' with pytest.raises(TraitError): - FileContentsManager(root_dir=str(root)) + file_contents_manager_class(root_dir=str(root)) -def test_invalid_root_dir(tmp_path): +def test_invalid_root_dir(file_contents_manager_class, tmp_path): temp_file = tmp_path / 'file.txt' temp_file.write_text('') with pytest.raises(TraitError): - FileContentsManager(root_dir=str(temp_file)) + file_contents_manager_class(root_dir=str(temp_file)) -def test_get_os_path(tmp_path): - fm = FileContentsManager(root_dir=str(tmp_path)) +def test_get_os_path(file_contents_manager_class, tmp_path): + fm = file_contents_manager_class(root_dir=str(tmp_path)) path = fm._get_os_path('/path/to/notebook/test.ipynb') rel_path_list = '/path/to/notebook/test.ipynb'.split('/') fs_path = os.path.join(fm.root_dir, *rel_path_list) assert path == fs_path - fm = FileContentsManager(root_dir=str(tmp_path)) + fm = file_contents_manager_class(root_dir=str(tmp_path)) path = fm._get_os_path('test.ipynb') fs_path = os.path.join(fm.root_dir, 'test.ipynb') assert path == fs_path - fm = FileContentsManager(root_dir=str(tmp_path)) + fm = file_contents_manager_class(root_dir=str(tmp_path)) path = fm._get_os_path('////test.ipynb') fs_path = os.path.join(fm.root_dir, 'test.ipynb') assert path == fs_path -def test_checkpoint_subdir(tmp_path): +def test_checkpoint_subdir(file_contents_manager_class, tmp_path): subd = 'sub ∂ir' cp_name = 'test-cp.ipynb' - fm = FileContentsManager(root_dir=str(tmp_path)) + fm = file_contents_manager_class(root_dir=str(tmp_path)) tmp_path.joinpath(subd).mkdir() cpm = fm.checkpoints cp_dir = cpm.checkpoint_path('cp', 'test.ipynb') @@ -136,18 +137,18 @@ def test_checkpoint_subdir(tmp_path): sys.platform == 'win32' and sys.version_info[0] < 3, reason="System platform is Windows, version < 3" ) -def test_bad_symlink(tmp_path): +async def test_bad_symlink(file_contents_manager_class, tmp_path): td = str(tmp_path) - cm = FileContentsManager(root_dir=td) + cm = file_contents_manager_class(root_dir=td) path = 'test bad symlink' _make_dir(cm, path) - file_model = cm.new_untitled(path=path, ext='.txt') + file_model = await ensure_async(cm.new_untitled(path=path, ext='.txt')) # create a broken symlink symlink(cm, "target", '%s/%s' % (path, 'bad symlink')) - model = cm.get(path) + model = await ensure_async(cm.get(path)) contents = { content['name']: content for content in model['content'] @@ -161,24 +162,24 @@ def test_bad_symlink(tmp_path): sys.platform == 'win32' and sys.version_info[0] < 3, reason="System platform is Windows, version < 3" ) -def test_good_symlink(tmp_path): +async def test_good_symlink(file_contents_manager_class, tmp_path): td = str(tmp_path) - cm = FileContentsManager(root_dir=td) + cm = file_contents_manager_class(root_dir=td) parent = 'test good symlink' name = 'good symlink' path = '{0}/{1}'.format(parent, name) _make_dir(cm, parent) - file_model = cm.new(path=parent + '/zfoo.txt') + file_model = await ensure_async(cm.new(path=parent + '/zfoo.txt')) # create a good symlink symlink(cm, file_model['path'], path) - symlink_model = cm.get(path, content=False) - dir_model = cm.get(parent) + symlink_model = await ensure_async(cm.get(path, content=False)) + dir_model = await ensure_async(cm.get(parent)) assert sorted(dir_model['content'], key=lambda x: x['name']) == [symlink_model, file_model] -def test_403(tmp_path): +async def test_403(file_contents_manager_class, tmp_path): if hasattr(os, 'getuid'): if os.getuid() == 0: raise pytest.skip("Can't test permissions as root") @@ -186,8 +187,8 @@ def test_403(tmp_path): raise pytest.skip("Can't test permissions on Windows") td = str(tmp_path) - cm = FileContentsManager(root_dir=td) - model = cm.new_untitled(type='file') + cm = file_contents_manager_class(root_dir=td) + model = await ensure_async(cm.new_untitled(type='file')) os_path = cm._get_os_path(model['path']) os.chmod(os_path, 0o400) @@ -197,9 +198,9 @@ def test_403(tmp_path): except HTTPError as e: assert e.status_code == 403 -def test_escape_root(tmp_path): +async def test_escape_root(file_contents_manager_class, tmp_path): td = str(tmp_path) - cm = FileContentsManager(root_dir=td) + cm = file_contents_manager_class(root_dir=td) # make foo, bar next to root with open(os.path.join(cm.root_dir, '..', 'foo'), 'w') as f: f.write('foo') @@ -207,34 +208,34 @@ def test_escape_root(tmp_path): f.write('bar') with pytest.raises(HTTPError) as e: - cm.get('..') + await ensure_async(cm.get('..')) expected_http_error(e, 404) with pytest.raises(HTTPError) as e: - cm.get('foo/../../../bar') + await ensure_async(cm.get('foo/../../../bar')) expected_http_error(e, 404) with pytest.raises(HTTPError) as e: - cm.delete('../foo') + await ensure_async(cm.delete('../foo')) expected_http_error(e, 404) with pytest.raises(HTTPError) as e: - cm.rename('../foo', '../bar') + await ensure_async(cm.rename('../foo', '../bar')) expected_http_error(e, 404) with pytest.raises(HTTPError) as e: - cm.save(model={ + await ensure_async(cm.save(model={ 'type': 'file', 'content': u'', 'format': 'text', - }, path='../foo') + }, path='../foo')) expected_http_error(e, 404) -def test_new_untitled(jp_contents_manager): +async def test_new_untitled(jp_contents_manager): cm = jp_contents_manager # Test in root directory - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -244,7 +245,7 @@ def test_new_untitled(jp_contents_manager): assert model['path'] == 'Untitled.ipynb' # Test in sub-directory - model = cm.new_untitled(type='directory') + model = await ensure_async(cm.new_untitled(type='directory')) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -254,7 +255,7 @@ def test_new_untitled(jp_contents_manager): assert model['path'] == 'Untitled Folder' sub_dir = model['path'] - model = cm.new_untitled(path=sub_dir) + model = await ensure_async(cm.new_untitled(path=sub_dir)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -264,64 +265,64 @@ def test_new_untitled(jp_contents_manager): assert model['path'] == '%s/untitled' % sub_dir # Test with a compound extension - model = cm.new_untitled(path=sub_dir, ext='.foo.bar') + model = await ensure_async(cm.new_untitled(path=sub_dir, ext='.foo.bar')) assert model['name'] == 'untitled.foo.bar' - model = cm.new_untitled(path=sub_dir, ext='.foo.bar') + model = await ensure_async(cm.new_untitled(path=sub_dir, ext='.foo.bar')) assert model['name'] == 'untitled1.foo.bar' -def test_modified_date(jp_contents_manager): +async def test_modified_date(jp_contents_manager): cm = jp_contents_manager # Create a new notebook. - nb, name, path = new_notebook(cm) - model = cm.get(path) + nb, name, path = await new_notebook(cm) + model = await ensure_async(cm.get(path)) # Add a cell and save. add_code_cell(model['content']) - cm.save(model, path) + await ensure_async(cm.save(model, path)) # Reload notebook and verify that last_modified incremented. - saved = cm.get(path) + saved = await ensure_async(cm.get(path)) assert saved['last_modified'] >= model['last_modified'] # Move the notebook and verify that last_modified stayed the same. # (The frontend fires a warning if last_modified increases on the # renamed file.) new_path = 'renamed.ipynb' - cm.rename(path, new_path) - renamed = cm.get(new_path) + await ensure_async(cm.rename(path, new_path)) + renamed = await ensure_async(cm.get(new_path)) assert renamed['last_modified'] >= saved['last_modified'] -def test_get(jp_contents_manager): +async def test_get(jp_contents_manager): cm = jp_contents_manager # Create a notebook - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) name = model['name'] path = model['path'] # Check that we 'get' on the notebook we just created - model2 = cm.get(path) + model2 = await ensure_async(cm.get(path)) assert isinstance(model2, dict) assert 'name' in model2 assert 'path' in model2 assert model['name'] == name assert model['path'] == path - nb_as_file = cm.get(path, content=True, type='file') + nb_as_file = await ensure_async(cm.get(path, content=True, type='file')) assert nb_as_file['path'] == path assert nb_as_file['type'] == 'file' assert nb_as_file['format'] == 'text' assert not isinstance(nb_as_file['content'], dict) - nb_as_bin_file = cm.get(path, content=True, type='file', format='base64') + nb_as_bin_file = await ensure_async(cm.get(path, content=True, type='file', format='base64')) assert nb_as_bin_file['format'] == 'base64' # Test in sub-directory sub_dir = '/foo/' _make_dir(cm, 'foo') - model = cm.new_untitled(path=sub_dir, ext='.ipynb') - model2 = cm.get(sub_dir + name) + model = await ensure_async(cm.new_untitled(path=sub_dir, ext='.ipynb')) + model2 = await ensure_async(cm.get(sub_dir + name)) assert isinstance(model2, dict) assert 'name' in model2 assert 'path' in model2 @@ -331,8 +332,8 @@ def test_get(jp_contents_manager): # Test with a regular file. - file_model_path = cm.new_untitled(path=sub_dir, ext='.txt')['path'] - file_model = cm.get(file_model_path) + file_model_path = (await ensure_async(cm.new_untitled(path=sub_dir, ext='.txt')))['path'] + file_model = await ensure_async(cm.get(file_model_path)) expected_model = { 'content': u'', 'format': u'text', @@ -351,7 +352,7 @@ def test_get(jp_contents_manager): # Create a sub-sub directory to test getting directory contents with a # subdir. _make_dir(cm, 'foo/bar') - dirmodel = cm.get('foo') + dirmodel = await ensure_async(cm.get('foo')) assert dirmodel['type'] == 'directory' assert isinstance(dirmodel['content'], list) assert len(dirmodel['content']) == 3 @@ -360,9 +361,9 @@ def test_get(jp_contents_manager): # Directory contents should match the contents of each individual entry # when requested with content=False. - model2_no_content = cm.get(sub_dir + name, content=False) - file_model_no_content = cm.get(u'foo/untitled.txt', content=False) - sub_sub_dir_no_content = cm.get('foo/bar', content=False) + model2_no_content = await ensure_async(cm.get(sub_dir + name, content=False)) + file_model_no_content = await ensure_async(cm.get(u'foo/untitled.txt', content=False)) + sub_sub_dir_no_content = await ensure_async(cm.get('foo/bar', content=False)) assert sub_sub_dir_no_content['path'] == 'foo/bar' assert sub_sub_dir_no_content['name'] == 'bar' @@ -379,19 +380,19 @@ def test_get(jp_contents_manager): assert False, "Unexpected directory entry: %s" % entry() with pytest.raises(HTTPError): - cm.get('foo', type='file') + await ensure_async(cm.get('foo', type='file')) -def test_update(jp_contents_manager): +async def test_update(jp_contents_manager): cm = jp_contents_manager # Create a notebook. - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) name = model['name'] path = model['path'] # Change the name in the model for rename model['path'] = 'test.ipynb' - model = cm.update(model, path) + model = await ensure_async(cm.update(model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -399,19 +400,19 @@ def test_update(jp_contents_manager): # Make sure the old name is gone with pytest.raises(HTTPError): - cm.get(path) + await ensure_async(cm.get(path)) # Test in sub-directory # Create a directory and notebook in that directory sub_dir = '/foo/' _make_dir(cm, 'foo') - model = cm.new_untitled(path=sub_dir, type='notebook') + model = await ensure_async(cm.new_untitled(path=sub_dir, type='notebook')) path = model['path'] # Change the name in the model for rename d = path.rsplit('/', 1)[0] new_path = model['path'] = d + '/test_in_sub.ipynb' - model = cm.update(model, path) + model = await ensure_async(cm.update(model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -420,21 +421,21 @@ def test_update(jp_contents_manager): # Make sure the old name is gone with pytest.raises(HTTPError): - cm.get(path) + await ensure_async(cm.get(path)) -def test_save(jp_contents_manager): +async def test_save(jp_contents_manager): cm = jp_contents_manager # Create a notebook - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) name = model['name'] path = model['path'] # Get the model with 'content' - full_model = cm.get(path) + full_model = await ensure_async(cm.get(path)) # Save the notebook - model = cm.save(full_model, path) + model = await ensure_async(cm.save(full_model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -445,13 +446,13 @@ def test_save(jp_contents_manager): # Create a directory and notebook in that directory sub_dir = '/foo/' _make_dir(cm, 'foo') - model = cm.new_untitled(path=sub_dir, type='notebook') + model = await ensure_async(cm.new_untitled(path=sub_dir, type='notebook')) name = model['name'] path = model['path'] - model = cm.get(path) + model = await ensure_async(cm.get(path)) # Change the name in the model for rename - model = cm.save(model, path) + model = await ensure_async(cm.save(model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -459,36 +460,36 @@ def test_save(jp_contents_manager): assert model['path'] == 'foo/Untitled.ipynb' -def test_delete(jp_contents_manager): +async def test_delete(jp_contents_manager): cm = jp_contents_manager # Create a notebook - nb, name, path = new_notebook(cm) + nb, name, path = await new_notebook(cm) # Delete the notebook - cm.delete(path) + await ensure_async(cm.delete(path)) # Check that deleting a non-existent path raises an error. with pytest.raises(HTTPError): - cm.delete(path) + await ensure_async(cm.delete(path)) # Check that a 'get' on the deleted notebook raises and error with pytest.raises(HTTPError): - cm.get(path) + await ensure_async(cm.get(path)) -def test_rename(jp_contents_manager): +async def test_rename(jp_contents_manager): cm = jp_contents_manager # Create a new notebook - nb, name, path = new_notebook(cm) + nb, name, path = await new_notebook(cm) # Rename the notebook - cm.rename(path, "changed_path") + await ensure_async(cm.rename(path, "changed_path")) # Attempting to get the notebook under the old name raises an error with pytest.raises(HTTPError): - cm.get(path) + await ensure_async(cm.get(path)) # Fetching the notebook under the new name is successful - assert isinstance(cm.get("changed_path"), dict) + assert isinstance(await ensure_async(cm.get("changed_path")), dict) # Ported tests on nested directory renaming from pgcontents all_dirs = ['foo', 'bar', 'foo/bar', 'foo/bar/foo', 'foo/bar/foo/bar'] @@ -496,93 +497,93 @@ def test_rename(jp_contents_manager): changed_dirs = all_dirs[2:] for _dir in all_dirs: - make_populated_dir(cm, _dir) - check_populated_dir_files(cm, _dir) + await make_populated_dir(cm, _dir) + await check_populated_dir_files(cm, _dir) # Renaming to an existing directory should fail for src, dest in combinations(all_dirs, 2): with pytest.raises(HTTPError) as e: - cm.rename(src, dest) + await ensure_async(cm.rename(src, dest)) assert expected_http_error(e, 409) # Creating a notebook in a non_existant directory should fail with pytest.raises(HTTPError) as e: - cm.new_untitled("foo/bar_diff", ext=".ipynb") + await ensure_async(cm.new_untitled("foo/bar_diff", ext=".ipynb")) assert expected_http_error(e, 404) - cm.rename("foo/bar", "foo/bar_diff") + await ensure_async(cm.rename("foo/bar", "foo/bar_diff")) # Assert that unchanged directories remain so for unchanged in unchanged_dirs: - check_populated_dir_files(cm, unchanged) + await check_populated_dir_files(cm, unchanged) # Assert changed directories can no longer be accessed under old names for changed_dirname in changed_dirs: with pytest.raises(HTTPError) as e: - cm.get(changed_dirname) + await ensure_async(cm.get(changed_dirname)) assert expected_http_error(e, 404) new_dirname = changed_dirname.replace("foo/bar", "foo/bar_diff", 1) - check_populated_dir_files(cm, new_dirname) + await check_populated_dir_files(cm, new_dirname) # Created a notebook in the renamed directory should work - cm.new_untitled("foo/bar_diff", ext=".ipynb") + await ensure_async(cm.new_untitled("foo/bar_diff", ext=".ipynb")) -def test_delete_root(jp_contents_manager): +async def test_delete_root(jp_contents_manager): cm = jp_contents_manager with pytest.raises(HTTPError) as e: - cm.delete('') + await ensure_async(cm.delete('')) assert expected_http_error(e, 400) -def test_copy(jp_contents_manager): +async def test_copy(jp_contents_manager): cm = jp_contents_manager parent = u'å b' name = u'nb √.ipynb' path = u'{0}/{1}'.format(parent, name) _make_dir(cm, parent) - orig = cm.new(path=path) + orig = await ensure_async(cm.new(path=path)) # copy with unspecified name - copy = cm.copy(path) + copy = await ensure_async(cm.copy(path)) assert copy['name'] == orig['name'].replace('.ipynb', '-Copy1.ipynb') # copy with specified name - copy2 = cm.copy(path, u'å b/copy 2.ipynb') + copy2 = await ensure_async(cm.copy(path, u'å b/copy 2.ipynb')) assert copy2['name'] == u'copy 2.ipynb' assert copy2['path'] == u'å b/copy 2.ipynb' # copy with specified path - copy2 = cm.copy(path, u'/') + copy2 = await ensure_async(cm.copy(path, u'/')) assert copy2['name'] == name assert copy2['path'] == name -def test_mark_trusted_cells(jp_contents_manager): +async def test_mark_trusted_cells(jp_contents_manager): cm = jp_contents_manager - nb, name, path = new_notebook(cm) + nb, name, path = await new_notebook(cm) cm.mark_trusted_cells(nb, path) for cell in nb.cells: if cell.cell_type == 'code': assert not cell.metadata.trusted - cm.trust_notebook(path) - nb = cm.get(path)['content'] + await ensure_async(cm.trust_notebook(path)) + nb = (await ensure_async(cm.get(path)))['content'] for cell in nb.cells: if cell.cell_type == 'code': assert cell.metadata.trusted -def test_check_and_sign(jp_contents_manager): +async def test_check_and_sign(jp_contents_manager): cm = jp_contents_manager - nb, name, path = new_notebook(cm) + nb, name, path = await new_notebook(cm) cm.mark_trusted_cells(nb, path) cm.check_and_sign(nb, path) assert not cm.notary.check_signature(nb) - cm.trust_notebook(path) - nb = cm.get(path)['content'] + await ensure_async(cm.trust_notebook(path)) + nb = (await ensure_async(cm.get(path)))['content'] cm.mark_trusted_cells(nb, path) cm.check_and_sign(nb, path) assert cm.notary.check_signature(nb) diff --git a/tests/services/kernels/test_api.py b/tests/services/kernels/test_api.py index bc2ade403d..44c815891c 100644 --- a/tests/services/kernels/test_api.py +++ b/tests/services/kernels/test_api.py @@ -4,6 +4,7 @@ import pytest + import tornado import urllib.parse from tornado.escape import url_escape From 24dd61839f8629f28a6185362b73b9b4aed61fef Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Tue, 17 Nov 2020 11:55:16 -0700 Subject: [PATCH 31/68] Use jp prefix for fixtures --- jupyter_server/pytest_plugin.py | 3 +++ tests/services/contents/test_manager.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/jupyter_server/pytest_plugin.py b/jupyter_server/pytest_plugin.py index dee6d3f0d5..b8454a6c49 100644 --- a/jupyter_server/pytest_plugin.py +++ b/jupyter_server/pytest_plugin.py @@ -19,6 +19,7 @@ from jupyter_server.extension import serverextension from jupyter_server.serverapp import ServerApp from jupyter_server.utils import url_path_join +<<<<<<< HEAD from jupyter_server.services.contents.filemanager import FileContentsManager from jupyter_server.services.contents.largefilemanager import LargeFileManager @@ -44,6 +45,8 @@ def mkdir(tmp_path, *parts): def jp_home_dir(tmp_path): """Provides a temporary HOME directory value.""" return mkdir(tmp_path, "home") +======= +>>>>>>> Use jp prefix for fixtures @pytest.fixture diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index 7357611d9f..cb20095677 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -11,9 +11,23 @@ from nbformat import v4 as nbformat from jupyter_server.services.contents.filecheckpoints import AsyncFileCheckpoints, FileCheckpoints +from jupyter_server.services.contents.filemanager import AsyncFileContentsManager, FileContentsManager from jupyter_server.utils import ensure_async from ...utils import expected_http_error +@pytest.fixture(params=[(FileContentsManager, True), + (FileContentsManager, False), + (AsyncFileContentsManager, True), + (AsyncFileContentsManager, False)]) +def jp_contents_manager(request, tmp_path): + contents_manager, use_atomic_writing = request.param + return contents_manager(root_dir=str(tmp_path), use_atomic_writing=use_atomic_writing) + + +@pytest.fixture(params=[FileContentsManager, AsyncFileContentsManager]) +def file_contents_manager_class(request, tmp_path): + return request.param + # -------------- Functions ---------------------------- def _make_dir(jp_contents_manager, api_path): From 6bd874d5365a0ad51f2b01a53555a97a255d1d52 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Tue, 17 Nov 2020 11:56:25 -0700 Subject: [PATCH 32/68] Remove condition for testing python 3.5 --- tests/services/contents/test_api.py | 2 -- tests/services/kernels/test_api.py | 2 -- tests/services/sessions/test_api.py | 2 -- 3 files changed, 6 deletions(-) diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index 623c2e15e6..8a8bacbb43 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -44,8 +44,6 @@ def dirs_only(dir_model): @pytest.fixture(params=["FileContentsManager", "AsyncFileContentsManager"]) def argv(request): - if request.param == "AsyncFileContentsManager" and sys.version_info < (3, 6): - pytest.skip("Kernel manager is AsyncFileContentsManager, Python version < 3.6") return ["--ServerApp.contents_manager_class=jupyter_server.services.contents.filemanager." + request.param] diff --git a/tests/services/kernels/test_api.py b/tests/services/kernels/test_api.py index 44c815891c..e0d28ddb12 100644 --- a/tests/services/kernels/test_api.py +++ b/tests/services/kernels/test_api.py @@ -18,8 +18,6 @@ @pytest.fixture(params=["MappingKernelManager", "AsyncMappingKernelManager"]) def argv(request): - if request.param == "AsyncMappingKernelManager" and sys.version_info < (3, 6): - pytest.skip("Kernel manager is AsyncMappingKernelManager, Python version < 3.6") return ["--ServerApp.kernel_manager_class=jupyter_server.services.kernels.kernelmanager." + request.param] diff --git a/tests/services/sessions/test_api.py b/tests/services/sessions/test_api.py index 065e392a6d..fd650d810c 100644 --- a/tests/services/sessions/test_api.py +++ b/tests/services/sessions/test_api.py @@ -18,8 +18,6 @@ @pytest.fixture(params=["MappingKernelManager", "AsyncMappingKernelManager"]) def argv(request): - if request.param == "AsyncMappingKernelManager" and sys.version_info < (3, 6): - pytest.skip("Kernel manager is AsyncMappingKernelManager, Python version < 3.6") return ["--ServerApp.kernel_manager_class=jupyter_server.services.kernels.kernelmanager." + request.param] From 5a521e305bd8c3faa6fae61bf7e470dfc05e9d52 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Tue, 17 Nov 2020 11:59:50 -0700 Subject: [PATCH 33/68] Add new async classes to serverapp --- jupyter_server/serverapp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index f6f203772e..3089465886 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -553,8 +553,8 @@ class ServerApp(JupyterApp): aliases = Dict(aliases) classes = [ - KernelManager, Session, MappingKernelManager, KernelSpecManager, - ContentsManager, FileContentsManager, NotebookNotary, + KernelManager, Session, MappingKernelManager, KernelSpecManager, AsyncMappingKernelManager, + ContentsManager, FileContentsManager, AsyncContentsManager, AsyncFileContentsManager, NotebookNotary, GatewayKernelManager, GatewayKernelSpecManager, GatewaySessionManager, GatewayClient ] From 0afe5b196a5be6a91215f8a3448ad3b52656d233 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Tue, 17 Nov 2020 12:04:58 -0700 Subject: [PATCH 34/68] Fix lint errors --- jupyter_server/services/contents/filecheckpoints.py | 1 - jupyter_server/services/contents/fileio.py | 2 +- jupyter_server/services/contents/filemanager.py | 1 - jupyter_server/services/contents/manager.py | 1 - tests/services/contents/test_api.py | 1 - tests/services/contents/test_config.py | 1 - tests/services/contents/test_manager.py | 4 +--- 7 files changed, 2 insertions(+), 9 deletions(-) diff --git a/jupyter_server/services/contents/filecheckpoints.py b/jupyter_server/services/contents/filecheckpoints.py index 5ddf8885e9..5910b60768 100644 --- a/jupyter_server/services/contents/filecheckpoints.py +++ b/jupyter_server/services/contents/filecheckpoints.py @@ -15,7 +15,6 @@ from anyio import run_sync_in_worker_thread from jupyter_core.utils import ensure_dir_exists -from jupyter_server.utils import ensure_async from ipython_genutils.py3compat import getcwd from traitlets import Unicode diff --git a/jupyter_server/services/contents/fileio.py b/jupyter_server/services/contents/fileio.py index 91e095729e..531715eb67 100644 --- a/jupyter_server/services/contents/fileio.py +++ b/jupyter_server/services/contents/fileio.py @@ -12,7 +12,7 @@ import os import shutil -from anyio import open_file, run_sync_in_worker_thread +from anyio import run_sync_in_worker_thread from tornado.web import HTTPError from jupyter_server.utils import ( diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index a46f0b64b0..3bc5905230 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -10,7 +10,6 @@ import shutil import stat import sys -import warnings import mimetypes import nbformat diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index a7e3c61a06..4868c2b965 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -30,7 +30,6 @@ default, ) from ipython_genutils.py3compat import string_types -from jupyter_server.base.handlers import JupyterHandler from jupyter_server.transutils import _ from jupyter_server.utils import ensure_async diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index 8a8bacbb43..30471d0ab4 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -12,7 +12,6 @@ ) from jupyter_server.utils import url_path_join -from jupyter_server.services.contents.filecheckpoints import AsyncFileCheckpoints, FileCheckpoints from base64 import encodebytes, decodebytes diff --git a/tests/services/contents/test_config.py b/tests/services/contents/test_config.py index 3625f781b4..06159d0b51 100644 --- a/tests/services/contents/test_config.py +++ b/tests/services/contents/test_config.py @@ -1,6 +1,5 @@ import pytest -from traitlets.config import Config from jupyter_server.services.contents.checkpoints import AsyncCheckpoints from jupyter_server.services.contents.filecheckpoints import GenericFileCheckpoints from jupyter_server.services.contents.manager import AsyncContentsManager diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index cb20095677..5bedb79779 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -10,7 +10,6 @@ from nbformat import v4 as nbformat -from jupyter_server.services.contents.filecheckpoints import AsyncFileCheckpoints, FileCheckpoints from jupyter_server.services.contents.filemanager import AsyncFileContentsManager, FileContentsManager from jupyter_server.utils import ensure_async from ...utils import expected_http_error @@ -335,7 +334,7 @@ async def test_get(jp_contents_manager): # Test in sub-directory sub_dir = '/foo/' _make_dir(cm, 'foo') - model = await ensure_async(cm.new_untitled(path=sub_dir, ext='.ipynb')) + await ensure_async(cm.new_untitled(path=sub_dir, ext='.ipynb')) model2 = await ensure_async(cm.get(sub_dir + name)) assert isinstance(model2, dict) assert 'name' in model2 @@ -461,7 +460,6 @@ async def test_save(jp_contents_manager): sub_dir = '/foo/' _make_dir(cm, 'foo') model = await ensure_async(cm.new_untitled(path=sub_dir, type='notebook')) - name = model['name'] path = model['path'] model = await ensure_async(cm.get(path)) From 3bc0febc8d8df19dcf660e7ddfeb36406bdbc4f0 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Wed, 18 Nov 2020 18:45:58 -0700 Subject: [PATCH 35/68] Skip test for windows --- tests/services/contents/test_manager.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index 5bedb79779..27534dd8fc 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -192,12 +192,14 @@ async def test_good_symlink(file_contents_manager_class, tmp_path): assert sorted(dir_model['content'], key=lambda x: x['name']) == [symlink_model, file_model] +@pytest.mark.skipif( + sys.platform.startswith('win'), + reason="Can't test permissions on Windows" +) async def test_403(file_contents_manager_class, tmp_path): if hasattr(os, 'getuid'): if os.getuid() == 0: raise pytest.skip("Can't test permissions as root") - if sys.platform.startswith('win'): - raise pytest.skip("Can't test permissions on Windows") td = str(tmp_path) cm = file_contents_manager_class(root_dir=td) From 9be92d81c402fe0fd31df571e45e71647330076a Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 20 Nov 2020 10:10:05 -0700 Subject: [PATCH 36/68] Create AsyncLargeFileManager --- .../services/contents/largefilemanager.py | 70 ++++++++++++++++++- .../contents/test_largefilemanager.py | 37 ++++++---- 2 files changed, 92 insertions(+), 15 deletions(-) diff --git a/jupyter_server/services/contents/largefilemanager.py b/jupyter_server/services/contents/largefilemanager.py index 7359659ad4..e48eeae91f 100644 --- a/jupyter_server/services/contents/largefilemanager.py +++ b/jupyter_server/services/contents/largefilemanager.py @@ -1,10 +1,12 @@ -from jupyter_server.services.contents.filemanager import FileContentsManager +from anyio import run_sync_in_worker_thread from contextlib import contextmanager from tornado import web import nbformat import base64 import os, io +from jupyter_server.services.contents.filemanager import AsyncFileContentsManager, FileContentsManager + class LargeFileManager(FileContentsManager): """Handle large file upload.""" @@ -71,3 +73,69 @@ def _save_large_file(self, os_path, content, format): with io.open(os_path, 'ab') as f: f.write(bcontent) + +class AsyncLargeFileManager(AsyncFileContentsManager): + """Handle large file upload asynchronously""" + + async def save(self, model, path=''): + """Save the file model and return the model with no content.""" + chunk = model.get('chunk', None) + if chunk is not None: + path = path.strip('/') + + if 'type' not in model: + raise web.HTTPError(400, u'No file type provided') + if model['type'] != 'file': + raise web.HTTPError(400, u'File type "{}" is not supported for large file transfer'.format(model['type'])) + if 'content' not in model and model['type'] != 'directory': + raise web.HTTPError(400, u'No file content provided') + + os_path = self._get_os_path(path) + + try: + if chunk == 1: + self.log.debug("Saving %s", os_path) + self.run_pre_save_hook(model=model, path=path) + await super(AsyncLargeFileManager, self)._save_file(os_path, model['content'], model.get('format')) + else: + await self._save_large_file(os_path, model['content'], model.get('format')) + except web.HTTPError: + raise + except Exception as e: + self.log.error(u'Error while saving file: %s %s', path, e, exc_info=True) + raise web.HTTPError(500, u'Unexpected error while saving file: %s %s' % + (path, e)) from e + + model = await self.get(path, content=False) + + # Last chunk + if chunk == -1: + self.run_post_save_hook(model=model, os_path=os_path) + return model + else: + return await super(AsyncLargeFileManager, self).save(model, path) + + async def _save_large_file(self, os_path, content, format): + """Save content of a generic file.""" + if format not in {'text', 'base64'}: + raise web.HTTPError( + 400, + "Must specify format of file contents as 'text' or 'base64'", + ) + try: + if format == 'text': + bcontent = content.encode('utf8') + else: + b64_bytes = content.encode('ascii') + bcontent = base64.b64decode(b64_bytes) + except Exception as e: + raise web.HTTPError( + 400, u'Encoding error saving %s: %s' % (os_path, e) + ) from e + + with self.perm_to_403(os_path): + if os.path.islink(os_path): + os_path = os.path.join(os.path.dirname(os_path), os.readlink(os_path)) + with io.open(os_path, 'ab') as f: + await run_sync_in_worker_thread(f.write, bcontent) + diff --git a/tests/services/contents/test_largefilemanager.py b/tests/services/contents/test_largefilemanager.py index f06f31dfb8..bd3dfdd7a7 100644 --- a/tests/services/contents/test_largefilemanager.py +++ b/tests/services/contents/test_largefilemanager.py @@ -1,19 +1,28 @@ import pytest import tornado +from jupyter_server.services.contents.largefilemanager import AsyncLargeFileManager, LargeFileManager +from jupyter_server.utils import ensure_async from ...utils import expected_http_error -def test_save(jp_large_contents_manager): +@pytest.fixture(params=[LargeFileManager, AsyncLargeFileManager]) +def jp_large_contents_manager(request, tmp_path): + """Returns a LargeFileManager instance.""" + file_manager = request.param + return file_manager(root_dir=str(tmp_path)) + + +async def test_save(jp_large_contents_manager): cm = jp_large_contents_manager - model = cm.new_untitled(type='notebook') + model = await ensure_async(cm.new_untitled(type='notebook')) name = model['name'] path = model['path'] # Get the model with 'content' - full_model = cm.get(path) + full_model = await ensure_async(cm.get(path)) # Save the notebook - model = cm.save(full_model, path) + model = await ensure_async(cm.save(full_model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model @@ -43,26 +52,26 @@ def test_save(jp_large_contents_manager): ) ] ) -def test_bad_save(jp_large_contents_manager, model, err_message): +async def test_bad_save(jp_large_contents_manager, model, err_message): with pytest.raises(tornado.web.HTTPError) as e: - jp_large_contents_manager.save(model, model['path']) + await ensure_async(jp_large_contents_manager.save(model, model['path'])) assert expected_http_error(e, 400, expected_message=err_message) -def test_saving_different_chunks(jp_large_contents_manager): +async def test_saving_different_chunks(jp_large_contents_manager): cm = jp_large_contents_manager model = {'name': 'test', 'path': 'test', 'type': 'file', 'content': u'test==', 'format': 'text'} name = model['name'] path = model['path'] - cm.save(model, path) + await ensure_async(cm.save(model, path)) for chunk in (1, 2, -1): for fm in ('text', 'base64'): - full_model = cm.get(path) + full_model = await ensure_async(cm.get(path)) full_model['chunk'] = chunk full_model['format'] = fm - model_res = cm.save(full_model, path) + model_res = await ensure_async(cm.save(full_model, path)) assert isinstance(model_res, dict) assert 'name' in model_res assert 'path' in model_res @@ -71,16 +80,16 @@ def test_saving_different_chunks(jp_large_contents_manager): assert model_res['path'] == path -def test_save_in_subdirectory(jp_large_contents_manager, tmp_path): +async def test_save_in_subdirectory(jp_large_contents_manager, tmp_path): cm = jp_large_contents_manager sub_dir = tmp_path / 'foo' sub_dir.mkdir() - model = cm.new_untitled(path='/foo/', type='notebook') + model = await ensure_async(cm.new_untitled(path='/foo/', type='notebook')) path = model['path'] - model = cm.get(path) + model = await ensure_async(cm.get(path)) # Change the name in the model for rename - model = cm.save(model, path) + model = await ensure_async(cm.save(model, path)) assert isinstance(model, dict) assert 'name' in model assert 'path' in model From 564325e9ba048c203c1cde83a47390328166821f Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 20 Nov 2020 10:47:51 -0700 Subject: [PATCH 37/68] Create AsyncGenericFileCheckpoints --- .../services/contents/filecheckpoints.py | 66 +++++++++++++++++++ tests/services/contents/test_config.py | 13 ++-- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/jupyter_server/services/contents/filecheckpoints.py b/jupyter_server/services/contents/filecheckpoints.py index 5910b60768..757429a217 100644 --- a/jupyter_server/services/contents/filecheckpoints.py +++ b/jupyter_server/services/contents/filecheckpoints.py @@ -9,6 +9,7 @@ from .checkpoints import ( AsyncCheckpoints, Checkpoints, + AsyncGenericCheckpointsMixin, GenericCheckpointsMixin, ) from .fileio import AsyncFileManagerMixin, FileManagerMixin @@ -266,3 +267,68 @@ def get_file_checkpoint(self, checkpoint_id, path): 'content': content, 'format': format, } + + +class AsyncGenericFileCheckpoints(AsyncGenericCheckpointsMixin, AsyncFileCheckpoints): + """ + Asynchronous Local filesystem Checkpoints that works with any conforming + ContentsManager. + """ + async def create_file_checkpoint(self, content, format, path): + """Create a checkpoint from the current content of a file.""" + path = path.strip('/') + # only the one checkpoint ID: + checkpoint_id = u"checkpoint" + os_checkpoint_path = self.checkpoint_path(checkpoint_id, path) + self.log.debug("creating checkpoint for %s", path) + with self.perm_to_403(): + await self._save_file(os_checkpoint_path, content, format=format) + + # return the checkpoint info + return await self.checkpoint_model(checkpoint_id, os_checkpoint_path) + + async def create_notebook_checkpoint(self, nb, path): + """Create a checkpoint from the current content of a notebook.""" + path = path.strip('/') + # only the one checkpoint ID: + checkpoint_id = u"checkpoint" + os_checkpoint_path = self.checkpoint_path(checkpoint_id, path) + self.log.debug("creating checkpoint for %s", path) + with self.perm_to_403(): + await self._save_notebook(os_checkpoint_path, nb) + + # return the checkpoint info + return await self.checkpoint_model(checkpoint_id, os_checkpoint_path) + + async def get_notebook_checkpoint(self, checkpoint_id, path): + """Get a checkpoint for a notebook.""" + path = path.strip('/') + self.log.info("restoring %s from checkpoint %s", path, checkpoint_id) + os_checkpoint_path = self.checkpoint_path(checkpoint_id, path) + + if not os.path.isfile(os_checkpoint_path): + self.no_such_checkpoint(path, checkpoint_id) + + return { + 'type': 'notebook', + 'content': await self._read_notebook( + os_checkpoint_path, + as_version=4, + ), + } + + async def get_file_checkpoint(self, checkpoint_id, path): + """Get a checkpoint for a file.""" + path = path.strip('/') + self.log.info("restoring %s from checkpoint %s", path, checkpoint_id) + os_checkpoint_path = self.checkpoint_path(checkpoint_id, path) + + if not os.path.isfile(os_checkpoint_path): + self.no_such_checkpoint(path, checkpoint_id) + + content, format = await self._read_file(os_checkpoint_path, format=None) + return { + 'type': 'file', + 'content': content, + 'format': format, + } diff --git a/tests/services/contents/test_config.py b/tests/services/contents/test_config.py index 06159d0b51..7fb2289eaf 100644 --- a/tests/services/contents/test_config.py +++ b/tests/services/contents/test_config.py @@ -1,17 +1,18 @@ import pytest from jupyter_server.services.contents.checkpoints import AsyncCheckpoints -from jupyter_server.services.contents.filecheckpoints import GenericFileCheckpoints +from jupyter_server.services.contents.filecheckpoints import AsyncGenericFileCheckpoints, GenericFileCheckpoints from jupyter_server.services.contents.manager import AsyncContentsManager -@pytest.fixture -def jp_server_config(): - return {'FileContentsManager': {'checkpoints_class': GenericFileCheckpoints}} +@pytest.fixture(params=[AsyncGenericFileCheckpoints, GenericFileCheckpoints]) +def jp_server_config(request): + return {'FileContentsManager': {'checkpoints_class': request.param}} -def test_config_did_something(jp_serverapp): - assert isinstance(jp_serverapp.contents_manager.checkpoints, GenericFileCheckpoints) +def test_config_did_something(jp_server_config, jp_serverapp): + assert isinstance(jp_serverapp.contents_manager.checkpoints, + jp_server_config['FileContentsManager']['checkpoints_class']) async def test_async_contents_manager(jp_configurable_serverapp): From 188fc3c1e2c505ce6e1ce66887eb002979d7f68b Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Thu, 3 Dec 2020 10:26:22 -0700 Subject: [PATCH 38/68] Make handlers compatible with async CMs --- jupyter_server/base/handlers.py | 10 +++++----- jupyter_server/files/handlers.py | 6 +++--- jupyter_server/nbconvert/handlers.py | 5 +++-- jupyter_server/services/contents/filemanager.py | 1 - jupyter_server/services/contents/handlers.py | 4 ++-- jupyter_server/services/contents/largefilemanager.py | 3 +-- jupyter_server/services/contents/manager.py | 4 ++-- jupyter_server/view/handlers.py | 6 +++--- 8 files changed, 19 insertions(+), 20 deletions(-) diff --git a/jupyter_server/base/handlers.py b/jupyter_server/base/handlers.py index 32a348607a..57a37bda83 100755 --- a/jupyter_server/base/handlers.py +++ b/jupyter_server/base/handlers.py @@ -31,7 +31,7 @@ import jupyter_server from jupyter_server._tz import utcnow from jupyter_server.i18n import combine_translations -from jupyter_server.utils import is_hidden, url_path_join, url_is_absolute, url_escape +from jupyter_server.utils import ensure_async, is_hidden, url_path_join, url_is_absolute, url_escape from jupyter_server.services.security import csp_report_uri #----------------------------------------------------------------------------- @@ -800,13 +800,13 @@ class FilesRedirectHandler(JupyterHandler): """Handler for redirecting relative URLs to the /files/ handler""" @staticmethod - def redirect_to_files(self, path): + async def redirect_to_files(self, path): """make redirect logic a reusable static method so it can be called from other handlers. """ cm = self.contents_manager - if cm.dir_exists(path): + if await ensure_async(cm.dir_exists(path)): # it's a *directory*, redirect to /tree url = url_path_join(self.base_url, 'tree', url_escape(path)) else: @@ -814,14 +814,14 @@ def redirect_to_files(self, path): # otherwise, redirect to /files parts = path.split('/') - if not cm.file_exists(path=path) and 'files' in parts: + if not await ensure_async(cm.file_exists(path=path)) and 'files' in parts: # redirect without files/ iff it would 404 # this preserves pre-2.0-style 'files/' links self.log.warning("Deprecated files/ URL: %s", orig_path) parts.remove('files') path = '/'.join(parts) - if not cm.file_exists(path=path): + if not await ensure_async(cm.file_exists(path=path)): raise web.HTTPError(404) url = url_path_join(self.base_url, 'files', url_escape(path)) diff --git a/jupyter_server/files/handlers.py b/jupyter_server/files/handlers.py index 51b990f238..e73c445c65 100644 --- a/jupyter_server/files/handlers.py +++ b/jupyter_server/files/handlers.py @@ -8,7 +8,7 @@ from base64 import decodebytes from tornado import web from jupyter_server.base.handlers import JupyterHandler - +from jupyter_server.utils import ensure_async class FilesHandler(JupyterHandler): """serve files via ContentsManager @@ -34,7 +34,7 @@ def head(self, path): async def get(self, path, include_body=True): cm = self.contents_manager - if cm.is_hidden(path) and not cm.allow_hidden: + if await ensure_async(cm.is_hidden(path)) and not cm.allow_hidden: self.log.info("Refusing to serve hidden file, via 404 Error") raise web.HTTPError(404) @@ -44,7 +44,7 @@ async def get(self, path, include_body=True): else: name = path - model = await cm.get(path, type='file', content=include_body) + model = await ensure_async(cm.get(path, type='file', content=include_body)) if self.get_argument("download", False): self.set_attachment_header(name) diff --git a/jupyter_server/nbconvert/handlers.py b/jupyter_server/nbconvert/handlers.py index 0c962e3c13..550d7bace1 100644 --- a/jupyter_server/nbconvert/handlers.py +++ b/jupyter_server/nbconvert/handlers.py @@ -14,6 +14,7 @@ JupyterHandler, FilesRedirectHandler, path_regex, ) +from jupyter_server.utils import ensure_async from nbformat import from_dict from ipython_genutils.py3compat import cast_bytes @@ -80,7 +81,7 @@ class NbconvertFileHandler(JupyterHandler): SUPPORTED_METHODS = ('GET',) @web.authenticated - def get(self, format, path): + async def get(self, format, path): exporter = get_exporter(format, config=self.config, log=self.log) @@ -93,7 +94,7 @@ def get(self, format, path): else: ext_resources_dir = None - model = self.contents_manager.get(path=path) + model = await ensure_async(self.contents_manager.get(path=path)) name = model['name'] if model['type'] != 'notebook': # not a notebook, redirect to files diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 3bc5905230..e6fc1e53ca 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -5,7 +5,6 @@ from datetime import datetime import errno -import io import os import shutil import stat diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 70e11366bf..7ba6bd2d5e 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -176,7 +176,7 @@ async def post(self, path=''): cm = self.contents_manager - file_exists = cm.file_exists(path) + file_exists = await ensure_async(cm.file_exists(path)) if file_exists: raise web.HTTPError(400, "Cannot POST to files, use PUT instead.") @@ -213,7 +213,7 @@ async def put(self, path=''): if model: if model.get('copy_from'): raise web.HTTPError(400, "Cannot copy with PUT, only POST") - exists = self.contents_manager.file_exists(path) + exists = await ensure_async(self.contents_manager.file_exists(path)) if exists: await self._save(model, path) else: diff --git a/jupyter_server/services/contents/largefilemanager.py b/jupyter_server/services/contents/largefilemanager.py index e48eeae91f..b2c7a2fd74 100644 --- a/jupyter_server/services/contents/largefilemanager.py +++ b/jupyter_server/services/contents/largefilemanager.py @@ -1,7 +1,5 @@ from anyio import run_sync_in_worker_thread -from contextlib import contextmanager from tornado import web -import nbformat import base64 import os, io @@ -139,3 +137,4 @@ async def _save_large_file(self, os_path, content, format): with io.open(os_path, 'ab') as f: await run_sync_in_worker_thread(f.write, bcontent) + diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 4868c2b965..c91c4493cf 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -623,7 +623,7 @@ async def exists(self, path): exists : bool Whether the target exists. """ - return await (self.file_exists(path) or self.dir_exists(path)) + return await (ensure_async(self.file_exists(path)) or ensure_async(self.dir_exists(path))) async def get(self, path, content=True, type=None, format=None): """Get a file or directory model.""" @@ -807,7 +807,7 @@ async def copy(self, from_path, to_path=None): raise HTTPError(400, "Can't copy directories") if to_path is None: to_path = from_dir - if self.dir_exists(to_path): + if await ensure_async(self.dir_exists(to_path)): name = copy_pat.sub(u'.', from_name) to_name = await self.increment_filename(name, to_path, insert='-Copy') to_path = u'{0}/{1}'.format(to_path, to_name) diff --git a/jupyter_server/view/handlers.py b/jupyter_server/view/handlers.py index 5663d4db3a..76f5a65b29 100644 --- a/jupyter_server/view/handlers.py +++ b/jupyter_server/view/handlers.py @@ -6,15 +6,15 @@ from tornado import web from ..base.handlers import JupyterHandler, path_regex -from ..utils import url_escape, url_path_join +from ..utils import ensure_async, url_escape, url_path_join class ViewHandler(JupyterHandler): """Render HTML files within an iframe.""" @web.authenticated - def get(self, path): + async def get(self, path): path = path.strip('/') - if not self.contents_manager.file_exists(path): + if not await ensure_async(self.contents_manager.file_exists(path)): raise web.HTTPError(404, u'File does not exist: %s' % path) basename = path.rsplit('/', 1)[-1] From b19edc00708cbda23311212d5f04b71262fedf27 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 4 Dec 2020 10:21:57 -0700 Subject: [PATCH 39/68] update docs and configs --- docs/source/developers/contents.rst | 15 +- docs/source/other/full-config.rst | 364 +++++++++++++++++++++++++++- 2 files changed, 373 insertions(+), 6 deletions(-) diff --git a/docs/source/developers/contents.rst b/docs/source/developers/contents.rst index 21472d91d1..49429b4476 100644 --- a/docs/source/developers/contents.rst +++ b/docs/source/developers/contents.rst @@ -269,11 +269,22 @@ ContentsManager. .. _PostgreSQL: https://www.postgresql.org/ Asynchronous Support -~~~~~~~~~~~~~~~~~~~~ +-------------------- -To execute file operations asynchronously in a virtual filesystem, the following classes are available. +An asynchronous version of the Contents API is available to run slow IO processes concurrently. - :class:`~manager.AsyncContentsManager` - :class:`~filemanager.AsyncFileContentsManager` +- :class:`~largefilemanager.AsyncLargeFileManager` - :class:`~checkpoints.AsyncCheckpoints` +- :class:`~checkpoints.AsyncGenericCheckpointsMixin` + +.. note:: + + .. _contentfree: + + In most cases, the non-asynchronous Contents API is performant for local filesystems. + However, if the Jupyter Notebook web application is interacting with a high-latent virtual filesystem, you may see performance gains by using the asynchronous version. + For example, if you're experiencing terminal lag in the web application due to the slow and blocking file operations, the asynchronous version may be a good option. + Before opting in, comparing both non-async and async options' performances is recommended. diff --git a/docs/source/other/full-config.rst b/docs/source/other/full-config.rst index 7699735388..8e0a3b1c46 100644 --- a/docs/source/other/full-config.rst +++ b/docs/source/other/full-config.rst @@ -41,11 +41,21 @@ Application.log_format : Unicode The Logging format template -Application.log_level : 0|10|20|30|40|50|'DEBUG'|'INFO'|'WARN'|'ERROR'|'CRITICAL' +Application.log_level : any of ``0``|``10``|``20``|``30``|``40``|``50``|``'DEBUG'``|``'INFO'``|``'WARN'``|``'ERROR'``|``'CRITICAL'`` Default: ``30`` Set the log level by value or name. +Application.show_config : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout + +Application.show_config_json : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout (as JSON) + JupyterApp.answer_yes : Bool Default: ``False`` @@ -66,6 +76,31 @@ JupyterApp.generate_config : Bool Generate default config file. +JupyterApp.log_datefmt : Unicode + Default: ``'%Y-%m-%d %H:%M:%S'`` + + The date format used by logging formatters for %(asctime)s + +JupyterApp.log_format : Unicode + Default: ``'[%(name)s]%(highlevel)s %(message)s'`` + + The Logging format template + +JupyterApp.log_level : any of ``0``|``10``|``20``|``30``|``40``|``50``|``'DEBUG'``|``'INFO'``|``'WARN'``|``'ERROR'``|``'CRITICAL'`` + Default: ``30`` + + Set the log level by value or name. + +JupyterApp.show_config : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout + +JupyterApp.show_config_json : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout (as JSON) + ServerApp.allow_credentials : Bool Default: ``False`` @@ -129,6 +164,11 @@ ServerApp.allow_root : Bool Whether to allow the user to run the server as root. +ServerApp.answer_yes : Bool + Default: ``False`` + + Answer yes to any prompts. + ServerApp.base_url : Unicode Default: ``'/'`` @@ -158,6 +198,16 @@ ServerApp.client_ca : Unicode The full path to a certificate authority certificate for SSL/TLS client authentication. +ServerApp.config_file : Unicode + Default: ``''`` + + Full path of a config file. + +ServerApp.config_file_name : Unicode + Default: ``''`` + + Specify a config file to load. + ServerApp.config_manager_class : Type Default: ``'jupyter_server.services.config.manager.ConfigManager'`` @@ -250,6 +300,11 @@ ServerApp.file_to_run : Unicode No description +ServerApp.generate_config : Bool + Default: ``False`` + + Generate default config file. + ServerApp.get_secure_cookie_kwargs : Dict Default: ``{}`` @@ -319,6 +374,21 @@ ServerApp.local_hostnames : List as local as well. +ServerApp.log_datefmt : Unicode + Default: ``'%Y-%m-%d %H:%M:%S'`` + + The date format used by logging formatters for %(asctime)s + +ServerApp.log_format : Unicode + Default: ``'[%(name)s]%(highlevel)s %(message)s'`` + + The Logging format template + +ServerApp.log_level : any of ``0``|``10``|``20``|``30``|``40``|``50``|``'DEBUG'``|``'INFO'``|``'WARN'``|``'ERROR'``|``'CRITICAL'`` + Default: ``30`` + + Set the log level by value or name. + ServerApp.login_handler_class : Type Default: ``'jupyter_server.auth.login.LoginHandler'`` @@ -431,6 +501,16 @@ ServerApp.session_manager_class : Type The session manager class to use. +ServerApp.show_config : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout + +ServerApp.show_config_json : Bool + Default: ``False`` + + Instead of starting the Application, dump configuration to stdout (as JSON) + ServerApp.shutdown_no_activity_timeout : Int Default: ``0`` @@ -560,7 +640,7 @@ ConnectionFileMixin.stdin_port : Int set the stdin (ROUTER) port [default: random] -ConnectionFileMixin.transport : 'tcp'|'ipc' +ConnectionFileMixin.transport : any of ``'tcp'``|``'ipc'`` (case-insensitive) Default: ``'tcp'`` No description @@ -570,6 +650,39 @@ KernelManager.autorestart : Bool Should we autorestart the kernel if it dies. +KernelManager.connection_file : Unicode + Default: ``''`` + + JSON file in which to store connection info [default: kernel-.json] + + This file will contain the IP, ports, and authentication key needed to connect + clients to this kernel. By default, this file will be created in the security dir + of the current profile, but can be specified by absolute path. + + +KernelManager.control_port : Int + Default: ``0`` + + set the control (ROUTER) port [default: random] + +KernelManager.hb_port : Int + Default: ``0`` + + set the heartbeat port [default: random] + +KernelManager.iopub_port : Int + Default: ``0`` + + set the iopub (PUB) port [default: random] + +KernelManager.ip : Unicode + Default: ``''`` + + Set the kernel's IP address [default localhost]. + If the IP address is something other than localhost, then + Consoles on other machines will be able to connect + to the Kernel, so be careful! + KernelManager.kernel_cmd : List Default: ``[]`` @@ -585,11 +698,26 @@ KernelManager.kernel_cmd : List option --debug if it given on the Jupyter command line. +KernelManager.shell_port : Int + Default: ``0`` + + set the shell (ROUTER) port [default: random] + KernelManager.shutdown_wait_time : Float Default: ``5.0`` Time to wait for a kernel to terminate before killing it, in seconds. +KernelManager.stdin_port : Int + Default: ``0`` + + set the stdin (ROUTER) port [default: random] + +KernelManager.transport : any of ``'tcp'``|``'ipc'`` (case-insensitive) + Default: ``'tcp'`` + + No description + Session.buffer_threshold : Int Default: ``1024`` @@ -668,7 +796,7 @@ Session.unpacker : DottedObjectName Only used with custom functions for `packer`. Session.username : Unicode - Default: ``'echar4'`` + Default: ``'mwakabayashi'`` Username for the Session. Default is your system username. @@ -689,6 +817,11 @@ MultiKernelManager.shared_context : Bool Share a single zmq.Context to talk to all my kernels +MappingKernelManager.allow_tracebacks : Bool + Default: ``True`` + + Whether to send tracebacks to clients on exceptions. + MappingKernelManager.allowed_message_types : List Default: ``[]`` @@ -732,6 +865,11 @@ MappingKernelManager.cull_interval : Int The interval (in seconds) on which to check for idle kernels exceeding the cull timeout value. +MappingKernelManager.default_kernel_name : Unicode + Default: ``'python3'`` + + The name of the default kernel to start + MappingKernelManager.kernel_info_timeout : Float Default: ``60`` @@ -745,11 +883,28 @@ MappingKernelManager.kernel_info_timeout : Float and the ZMQChannelsHandler (which handles the startup). +MappingKernelManager.kernel_manager_class : DottedObjectName + Default: ``'jupyter_client.ioloop.IOLoopKernelManager'`` + + The kernel manager class. This is configurable to allow + subclassing of the KernelManager for customized behavior. + + MappingKernelManager.root_dir : Unicode Default: ``''`` No description +MappingKernelManager.shared_context : Bool + Default: ``True`` + + Share a single zmq.Context to talk to all my kernels + +MappingKernelManager.traceback_replacement_message : Unicode + Default: ``'An exception occurred at runtime, which is not shown due to ...`` + + Message to print when allow_tracebacks is False, and an exception occurs + KernelSpecManager.ensure_native_kernel : Bool Default: ``True`` @@ -870,6 +1025,26 @@ FileManagerMixin.use_atomic_writing : Bool This procedure, namely 'atomic_writing', causes some bugs on file system whitout operation order enforcement (like some networked fs). If set to False, the new notebook is written directly on the old one which could fail (eg: full filesystem or quota ) +FileContentsManager.allow_hidden : Bool + Default: ``False`` + + Allow access to hidden files + +FileContentsManager.checkpoints : Instance + Default: ``None`` + + No description + +FileContentsManager.checkpoints_class : Type + Default: ``'jupyter_server.services.contents.checkpoints.Checkpoints'`` + + No description + +FileContentsManager.checkpoints_kwargs : Dict + Default: ``{}`` + + No description + FileContentsManager.delete_to_trash : Bool Default: ``True`` @@ -877,6 +1052,36 @@ FileContentsManager.delete_to_trash : Bool platform's trash/recycle bin, where they can be recovered. If False, deleting files really deletes them. +FileContentsManager.files_handler_class : Type + Default: ``'jupyter_server.files.handlers.FilesHandler'`` + + handler class to use when serving raw file requests. + + Default is a fallback that talks to the ContentsManager API, + which may be inefficient, especially for large files. + + Local files-based ContentsManagers can use a StaticFileHandler subclass, + which will be much more efficient. + + Access to these files should be Authenticated. + + +FileContentsManager.files_handler_params : Dict + Default: ``{}`` + + Extra parameters to pass to files_handler_class. + + For example, StaticFileHandlers generally expect a `path` argument + specifying the root directory from which to serve files. + + +FileContentsManager.hide_globs : List + Default: ``['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dyl...`` + + + Glob patterns to hide in file and directory listings. + + FileContentsManager.post_save_hook : Any Default: ``None`` @@ -896,12 +1101,55 @@ FileContentsManager.post_save_hook : Any - contents_manager: this ContentsManager instance +FileContentsManager.pre_save_hook : Any + Default: ``None`` + + Python callable or importstring thereof + + To be called on a contents model prior to save. + + This can be used to process the structure, + such as removing notebook outputs or other side effects that + should not be saved. + + It will be called as (all arguments passed by keyword):: + + hook(path=path, model=model, contents_manager=self) + + - model: the model to be saved. Includes file contents. + Modifying this dict will affect the file that is stored. + - path: the API path of the save destination + - contents_manager: this ContentsManager instance + + FileContentsManager.root_dir : Unicode Default: ``''`` No description -NotebookNotary.algorithm : 'md5'|'sha3_512'|'blake2b'|'sha3_384'|'sha3_256'|'sha224'|'sha3_224'|'sha384'|'sha512'|'blake2s'|'sha1'|'sha256' +FileContentsManager.untitled_directory : Unicode + Default: ``'Untitled Folder'`` + + The base name used when creating untitled directories. + +FileContentsManager.untitled_file : Unicode + Default: ``'untitled'`` + + The base name used when creating untitled files. + +FileContentsManager.untitled_notebook : Unicode + Default: ``'Untitled'`` + + The base name used when creating untitled notebooks. + +FileContentsManager.use_atomic_writing : Bool + Default: ``True`` + + By default notebooks are saved on disk on a temporary file and then if succefully written, it replaces the old ones. + This procedure, namely 'atomic_writing', causes some bugs on file system whitout operation order enforcement (like some networked fs). + If set to False, the new notebook is written directly on the old one which could fail (eg: full filesystem or quota ) + +NotebookNotary.algorithm : any of ``'blake2s'``|``'sha512'``|``'md5'``|``'sha3_512'``|``'sha3_224'``|``'blake2b'``|``'sha384'``|``'sha1'``|``'sha3_256'``|``'sha256'``|``'sha224'``|``'sha3_384'`` Default: ``'sha256'`` The hashing algorithm used to sign notebooks. @@ -930,6 +1178,114 @@ NotebookNotary.store_factory : Callable A callable returning the storage backend for notebook signatures. The default uses an SQLite database. +GatewayKernelManager.allow_tracebacks : Bool + Default: ``True`` + + Whether to send tracebacks to clients on exceptions. + +GatewayKernelManager.allowed_message_types : List + Default: ``[]`` + + White list of allowed kernel message types. + When the list is empty, all message types are allowed. + + +GatewayKernelManager.buffer_offline_messages : Bool + Default: ``True`` + + Whether messages from kernels whose frontends have disconnected should be buffered in-memory. + + When True (default), messages are buffered and replayed on reconnect, + avoiding lost messages due to interrupted connectivity. + + Disable if long-running kernels will produce too much output while + no frontends are connected. + + +GatewayKernelManager.cull_busy : Bool + Default: ``False`` + + Whether to consider culling kernels which are busy. + Only effective if cull_idle_timeout > 0. + +GatewayKernelManager.cull_connected : Bool + Default: ``False`` + + Whether to consider culling kernels which have one or more connections. + Only effective if cull_idle_timeout > 0. + +GatewayKernelManager.cull_idle_timeout : Int + Default: ``0`` + + Timeout (in seconds) after which a kernel is considered idle and ready to be culled. + Values of 0 or lower disable culling. Very short timeouts may result in kernels being culled + for users with poor network connections. + +GatewayKernelManager.cull_interval : Int + Default: ``300`` + + The interval (in seconds) on which to check for idle kernels exceeding the cull timeout value. + +GatewayKernelManager.default_kernel_name : Unicode + Default: ``'python3'`` + + The name of the default kernel to start + +GatewayKernelManager.kernel_info_timeout : Float + Default: ``60`` + + Timeout for giving up on a kernel (in seconds). + + On starting and restarting kernels, we check whether the + kernel is running and responsive by sending kernel_info_requests. + This sets the timeout in seconds for how long the kernel can take + before being presumed dead. + This affects the MappingKernelManager (which handles kernel restarts) + and the ZMQChannelsHandler (which handles the startup). + + +GatewayKernelManager.kernel_manager_class : DottedObjectName + Default: ``'jupyter_client.ioloop.IOLoopKernelManager'`` + + The kernel manager class. This is configurable to allow + subclassing of the KernelManager for customized behavior. + + +GatewayKernelManager.root_dir : Unicode + Default: ``''`` + + No description + +GatewayKernelManager.shared_context : Bool + Default: ``True`` + + Share a single zmq.Context to talk to all my kernels + +GatewayKernelManager.traceback_replacement_message : Unicode + Default: ``'An exception occurred at runtime, which is not shown due to ...`` + + Message to print when allow_tracebacks is False, and an exception occurs + +GatewayKernelSpecManager.ensure_native_kernel : Bool + Default: ``True`` + + If there is no Python kernelspec registered and the IPython + kernel is available, ensure it is added to the spec list. + + +GatewayKernelSpecManager.kernel_spec_class : Type + Default: ``'jupyter_client.kernelspec.KernelSpec'`` + + The kernel spec class. This is configurable to allow + subclassing of the KernelSpecManager for customized behavior. + + +GatewayKernelSpecManager.whitelist : Set + Default: ``set()`` + + Whitelist of allowed kernel names. + + By default, all installed kernels are allowed. From 01500bdafbab4452f5a3180a86c68387f0ac05e3 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 4 Dec 2020 11:48:12 -0700 Subject: [PATCH 40/68] Checkpoints: Make all not implemented methods async --- .../services/contents/checkpoints.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/jupyter_server/services/contents/checkpoints.py b/jupyter_server/services/contents/checkpoints.py index 16460d5200..4e86b4c0e0 100644 --- a/jupyter_server/services/contents/checkpoints.py +++ b/jupyter_server/services/contents/checkpoints.py @@ -146,6 +146,25 @@ class AsyncCheckpoints(Checkpoints): """ Base class for managing checkpoints for a ContentsManager asynchronously. """ + async def create_checkpoint(self, contents_mgr, path): + """Create a checkpoint.""" + raise NotImplementedError("must be implemented in a subclass") + + async def restore_checkpoint(self, contents_mgr, checkpoint_id, path): + """Restore a checkpoint""" + raise NotImplementedError("must be implemented in a subclass") + + async def rename_checkpoint(self, checkpoint_id, old_path, new_path): + """Rename a single checkpoint from old_path to new_path.""" + raise NotImplementedError("must be implemented in a subclass") + + async def delete_checkpoint(self, checkpoint_id, path): + """delete a checkpoint for a file""" + raise NotImplementedError("must be implemented in a subclass") + + async def list_checkpoints(self, path): + """Return a list of checkpoints for a given file""" + raise NotImplementedError("must be implemented in a subclass") async def rename_all_checkpoints(self, old_path, new_path): """Rename all checkpoints for old_path to new_path.""" @@ -191,3 +210,41 @@ async def restore_checkpoint(self, contents_mgr, checkpoint_id, path): else: raise HTTPError(500, u'Unexpected type %s' % type) await contents_mgr.save(model, path) + + # Required Methods + async def create_file_checkpoint(self, content, format, path): + """Create a checkpoint of the current state of a file + + Returns a checkpoint model for the new checkpoint. + """ + raise NotImplementedError("must be implemented in a subclass") + + async def create_notebook_checkpoint(self, nb, path): + """Create a checkpoint of the current state of a file + + Returns a checkpoint model for the new checkpoint. + """ + raise NotImplementedError("must be implemented in a subclass") + + async def get_file_checkpoint(self, checkpoint_id, path): + """Get the content of a checkpoint for a non-notebook file. + + Returns a dict of the form: + { + 'type': 'file', + 'content': , + 'format': {'text','base64'}, + } + """ + raise NotImplementedError("must be implemented in a subclass") + + async def get_notebook_checkpoint(self, checkpoint_id, path): + """Get the content of a checkpoint for a notebook. + + Returns a dict of the form: + { + 'type': 'notebook', + 'content': , + } + """ + raise NotImplementedError("must be implemented in a subclass") From bd057ef13dabd47f79826124b8020f14b374cd0f Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Fri, 4 Dec 2020 11:55:04 -0700 Subject: [PATCH 41/68] Update docs --- docs/source/developers/contents.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/developers/contents.rst b/docs/source/developers/contents.rst index 49429b4476..f5ac3c83ad 100644 --- a/docs/source/developers/contents.rst +++ b/docs/source/developers/contents.rst @@ -285,6 +285,6 @@ An asynchronous version of the Contents API is available to run slow IO processe In most cases, the non-asynchronous Contents API is performant for local filesystems. However, if the Jupyter Notebook web application is interacting with a high-latent virtual filesystem, you may see performance gains by using the asynchronous version. - For example, if you're experiencing terminal lag in the web application due to the slow and blocking file operations, the asynchronous version may be a good option. + For example, if you're experiencing terminal lag in the web application due to the slow and blocking file operations, the asynchronous version can reduce the lag. Before opting in, comparing both non-async and async options' performances is recommended. From 7d863f35c9c1e5dc8a58ec8af373bbb1b5158475 Mon Sep 17 00:00:00 2001 From: Zachary Sailer Date: Thu, 10 Dec 2020 10:58:22 -0800 Subject: [PATCH 42/68] Update jupyter_server/services/contents/fileio.py --- jupyter_server/pytest_plugin.py | 3 --- jupyter_server/services/contents/fileio.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/jupyter_server/pytest_plugin.py b/jupyter_server/pytest_plugin.py index b8454a6c49..dee6d3f0d5 100644 --- a/jupyter_server/pytest_plugin.py +++ b/jupyter_server/pytest_plugin.py @@ -19,7 +19,6 @@ from jupyter_server.extension import serverextension from jupyter_server.serverapp import ServerApp from jupyter_server.utils import url_path_join -<<<<<<< HEAD from jupyter_server.services.contents.filemanager import FileContentsManager from jupyter_server.services.contents.largefilemanager import LargeFileManager @@ -45,8 +44,6 @@ def mkdir(tmp_path, *parts): def jp_home_dir(tmp_path): """Provides a temporary HOME directory value.""" return mkdir(tmp_path, "home") -======= ->>>>>>> Use jp prefix for fixtures @pytest.fixture diff --git a/jupyter_server/services/contents/fileio.py b/jupyter_server/services/contents/fileio.py index 531715eb67..3311f455b0 100644 --- a/jupyter_server/services/contents/fileio.py +++ b/jupyter_server/services/contents/fileio.py @@ -202,7 +202,7 @@ class FileManagerMixin(Configurable): @contextmanager def open(self, os_path, *args, **kwargs): - """wrapper around open that turns permission errors into 403""" + """wrapper around io.open that turns permission errors into 403""" with self.perm_to_403(os_path): with io.open(os_path, *args, **kwargs) as f: yield f From d05aebe015b9be1d1914c5b80b705f288e888660 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Fri, 11 Dec 2020 14:23:05 -0800 Subject: [PATCH 43/68] update changelog for 1.1.0 release --- CHANGELOG.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf943f841a..e7c7bb5b43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.1.0] - 2020-12-11 + +[Full Changelog](https://github.com/jupyter-server/jupyter_server/compare/1.0.10...HEAD) + +**Merged pull requests:** + +- Restore pytest plugin from pytest-jupyter [\#360](https://github.com/jupyter-server/jupyter_server/pull/360) ([kevin-bates](https://github.com/kevin-bates)) +- Fix upgrade packaging dependencies build step [\#354](https://github.com/jupyter-server/jupyter_server/pull/354) ([mwakaba2](https://github.com/mwakaba2)) +- Await \_connect and inline read\_messages callback to \_connect [\#350](https://github.com/jupyter-server/jupyter_server/pull/350) ([ricklamers](https://github.com/ricklamers)) +- Update release instructions and dev version [\#348](https://github.com/jupyter-server/jupyter_server/pull/348) ([kevin-bates](https://github.com/kevin-bates)) +- Fix test\_trailing\_slash [\#346](https://github.com/jupyter-server/jupyter_server/pull/346) ([kevin-bates](https://github.com/kevin-bates)) +- Apply security advisory fix to master [\#345](https://github.com/jupyter-server/jupyter_server/pull/345) ([kevin-bates](https://github.com/kevin-bates)) +- Allow toggling auth for prometheus metrics [\#344](https://github.com/jupyter-server/jupyter_server/pull/344) ([yuvipanda](https://github.com/yuvipanda)) +- Port Notebook PRs 5565 and 5588 - terminal shell heuristics [\#343](https://github.com/jupyter-server/jupyter_server/pull/343) ([kevin-bates](https://github.com/kevin-bates)) +- Port gateway updates from notebook \(PRs 5317 and 5484\) [\#341](https://github.com/jupyter-server/jupyter_server/pull/341) ([kevin-bates](https://github.com/kevin-bates)) +- add check\_origin handler to gateway WebSocketChannelsHandler [\#340](https://github.com/jupyter-server/jupyter_server/pull/340) ([ricklamers](https://github.com/ricklamers)) +- Remove pytest11 entrypoint and plugin, require tornado 6.1, remove asyncio patch, CI work [\#339](https://github.com/jupyter-server/jupyter_server/pull/339) ([bollwyvl](https://github.com/bollwyvl)) +- Switch fixtures to use those in pytest-jupyter to avoid collisions [\#335](https://github.com/jupyter-server/jupyter_server/pull/335) ([kevin-bates](https://github.com/kevin-bates)) +- Enable CodeQL runs on all pushed branches [\#333](https://github.com/jupyter-server/jupyter_server/pull/333) ([kevin-bates](https://github.com/kevin-bates)) +- Asynchronous Contents API [\#324](https://github.com/jupyter-server/jupyter_server/pull/324) ([mwakaba2](https://github.com/mwakaba2)) + + + ## [1.0.6] - 2020-11-18 1.0.6 is a security release, fixing one vulnerability: From 082fc383661c95ccf49af6ca9aef752bd26a3a6b Mon Sep 17 00:00:00 2001 From: Zsailer Date: Fri, 11 Dec 2020 14:30:12 -0800 Subject: [PATCH 44/68] remove changelog from docs to reduce duplicity --- CHANGELOG.md | 2 +- docs/source/developers/index.rst | 1 - docs/source/other/changelog.rst | 59 -------------------------------- 3 files changed, 1 insertion(+), 61 deletions(-) delete mode 100644 docs/source/other/changelog.rst diff --git a/CHANGELOG.md b/CHANGELOG.md index e7c7bb5b43..745a7eb99b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.1.0] - 2020-12-11 -[Full Changelog](https://github.com/jupyter-server/jupyter_server/compare/1.0.10...HEAD) +[Full Changelog](https://github.com/jupyter-server/jupyter_server/compare/1.0.10...1.1.0) **Merged pull requests:** diff --git a/docs/source/developers/index.rst b/docs/source/developers/index.rst index 64d0a214d4..2d22d69070 100644 --- a/docs/source/developers/index.rst +++ b/docs/source/developers/index.rst @@ -13,4 +13,3 @@ These pages target people writing Jupyter Web applications and server extensions extensions savehooks contents - ../other/changelog \ No newline at end of file diff --git a/docs/source/other/changelog.rst b/docs/source/other/changelog.rst deleted file mode 100644 index 34e73f62cc..0000000000 --- a/docs/source/other/changelog.rst +++ /dev/null @@ -1,59 +0,0 @@ -.. _changelog: - -Changelog -========= - -A summary of changes in the Jupyter Server. -For more detailed information, see -`GitHub `__. - -.. tip:: - - Use ``pip install jupyter_server --upgrade`` or ``conda upgrade jupyter_server`` to - upgrade to the latest release. - -.. we push for pip 9+ or it will break for Python 2 users when IPython 6 is out. - -We strongly recommend that you upgrade to version 9+ of pip before upgrading ``jupyter_server``. - -.. tip:: - - Use ``pip install pip --upgrade`` to upgrade pip. Check pip version with - ``pip --version``. - -.. _release-1.0.6: - -1.0.6 ------ - -1.0.6 is a security release, fixing one vulnerability: - -- Fix open redirect vulnerability GHSA-grfj-wjv9-4f9v (CVE-2020-26232) - -.. _release-1.0.0: - -1.0.0 ------ - -- Extension discovery -- Classic server extension discovery and support -- Bug fixes -- Ready for users -- JupyterLab is the first server running on top of Jupyter Server - -.. _release-0.0.2: - -0.0.2 ------ - -- Introduce new extension module -- Pytest for unit tests -- Server documentation -- NbClassic for migration from notebook - -.. _release-0.0.1: - -0.0.1 ------ - -- First release of the Jupyter Server. From f55926ea3c7df5baa42b262880920290fa9204ac Mon Sep 17 00:00:00 2001 From: Zsailer Date: Fri, 11 Dec 2020 14:31:51 -0800 Subject: [PATCH 45/68] remove references to changelog --- docs/source/developers/dependency.rst | 4 +--- docs/source/other/index.rst | 1 - docs/source/users/installation.rst | 3 --- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/docs/source/developers/dependency.rst b/docs/source/developers/dependency.rst index d2efb82b24..feaf90b7b1 100644 --- a/docs/source/developers/dependency.rst +++ b/docs/source/developers/dependency.rst @@ -1,12 +1,10 @@ Depending on Jupyter Server =========================== -If your project depends directly on Jupyter Server, be sure to watch Jupyter Server's :ref:`Change Log ` and pin your project to a version that works for your application. Major releases represent possible backwards-compatibility breaking API changes or features. +If your project depends directly on Jupyter Server, be sure to watch Jupyter Server's ChangeLog and pin your project to a version that works for your application. Major releases represent possible backwards-compatibility breaking API changes or features. When a new major version in released on PyPI, a branch for that version will be created in this repository, and the version of the master branch will be bumped to the next major version number. That way, the master branch always reflects the latest un-released version. -To see the changes between releases, checkout the :ref:`Change Log `. - To install the latest patch of a given version: .. code-block:: console diff --git a/docs/source/other/index.rst b/docs/source/other/index.rst index 651c0d6581..50dc4f39c3 100644 --- a/docs/source/other/index.rst +++ b/docs/source/other/index.rst @@ -6,5 +6,4 @@ Other helpful documentation links faq - changelog full-config \ No newline at end of file diff --git a/docs/source/users/installation.rst b/docs/source/users/installation.rst index 14bef0a74f..3bc0e3706d 100644 --- a/docs/source/users/installation.rst +++ b/docs/source/users/installation.rst @@ -17,6 +17,3 @@ You upgrade or downgrade to a specific version of Jupyter Server by adding an op .. code-block:: bash pip install jupyter_server==1.0 - - -To see what each version has to offer, checkout our :ref:`changelog`. From 921508b338286d8e946c349e8512bb78770763c8 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Fri, 11 Dec 2020 14:34:07 -0800 Subject: [PATCH 46/68] Release 1.1.0 --- jupyter_server/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/_version.py b/jupyter_server/_version.py index c534f5cdb0..1339559a77 100644 --- a/jupyter_server/_version.py +++ b/jupyter_server/_version.py @@ -11,5 +11,5 @@ # However, be sure to INCLUDE a dot prefix when adding the dev release: X.Y.Z.devN # See: https://www.python.org/dev/peps/pep-0440/#public-version-identifiers -version_info = (1, 1, 0, '.dev0') +version_info = (1, 1, 0, '') __version__ = '.'.join(map(str, version_info[:3])) + ''.join(version_info[3:]) From a3a3a46b907cabd7be66c639cf161aafbf20b2e5 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Fri, 11 Dec 2020 14:38:10 -0800 Subject: [PATCH 47/68] Back to dev version --- jupyter_server/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/_version.py b/jupyter_server/_version.py index 1339559a77..4c3183007d 100644 --- a/jupyter_server/_version.py +++ b/jupyter_server/_version.py @@ -11,5 +11,5 @@ # However, be sure to INCLUDE a dot prefix when adding the dev release: X.Y.Z.devN # See: https://www.python.org/dev/peps/pep-0440/#public-version-identifiers -version_info = (1, 1, 0, '') +version_info = (1, 2, 0, '.dev0') __version__ = '.'.join(map(str, version_info[:3])) + ''.join(version_info[3:]) From 755471206fc2e8d62b5e46ea0e780c89ff83c7f1 Mon Sep 17 00:00:00 2001 From: Mariko Wakabayashi Date: Tue, 15 Dec 2020 09:28:00 -0700 Subject: [PATCH 48/68] Fix: await dir_exists --- jupyter_server/services/contents/handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 7ba6bd2d5e..ac4da46f73 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -180,7 +180,7 @@ async def post(self, path=''): if file_exists: raise web.HTTPError(400, "Cannot POST to files, use PUT instead.") - dir_exists = cm.dir_exists(path) + dir_exists = await ensure_async(cm.dir_exists(path)) if not dir_exists: raise web.HTTPError(404, "No such directory: %s" % path) From 506e10b593b9c3bc3690b1b8cbdd6cdd2772baae Mon Sep 17 00:00:00 2001 From: Zsailer Date: Wed, 16 Dec 2020 14:08:53 -0800 Subject: [PATCH 49/68] Release 1.1.1 --- jupyter_server/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/_version.py b/jupyter_server/_version.py index 4c3183007d..aad8f588f5 100644 --- a/jupyter_server/_version.py +++ b/jupyter_server/_version.py @@ -11,5 +11,5 @@ # However, be sure to INCLUDE a dot prefix when adding the dev release: X.Y.Z.devN # See: https://www.python.org/dev/peps/pep-0440/#public-version-identifiers -version_info = (1, 2, 0, '.dev0') +version_info = (1, 1, 1, '') __version__ = '.'.join(map(str, version_info[:3])) + ''.join(version_info[3:]) From 7f81846033947b2e16f716f499c8ed39c5e2dce8 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Wed, 16 Dec 2020 14:10:16 -0800 Subject: [PATCH 50/68] Back to dev version --- jupyter_server/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/_version.py b/jupyter_server/_version.py index aad8f588f5..4c3183007d 100644 --- a/jupyter_server/_version.py +++ b/jupyter_server/_version.py @@ -11,5 +11,5 @@ # However, be sure to INCLUDE a dot prefix when adding the dev release: X.Y.Z.devN # See: https://www.python.org/dev/peps/pep-0440/#public-version-identifiers -version_info = (1, 1, 1, '') +version_info = (1, 2, 0, '.dev0') __version__ = '.'.join(map(str, version_info[:3])) + ''.join(version_info[3:]) From adcfa2555c31dd20ecd72cecaf1b6f1c3ee3130b Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 00:00:23 -0700 Subject: [PATCH 51/68] Add initial eventlog hook --- MANIFEST.in | 3 ++ jupyter_server/base/handlers.py | 4 +++ .../contentsmanager-actions.json | 30 ++++++++++++++++ jupyter_server/serverapp.py | 19 ++++++++++ jupyter_server/services/contents/handlers.py | 35 +++++++++++++++++++ 5 files changed, 91 insertions(+) create mode 100644 jupyter_server/event-schemas/contentsmanager-actions.json diff --git a/MANIFEST.in b/MANIFEST.in index e8b3aad87e..387a95e5f9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -8,6 +8,9 @@ include setupbase.py # include everything in package_data recursive-include jupyter_server * +# Event Schemas +graft jupyter_server/event-schemas + # Documentation graft docs exclude docs/\#* diff --git a/jupyter_server/base/handlers.py b/jupyter_server/base/handlers.py index 57a37bda83..89203569e1 100755 --- a/jupyter_server/base/handlers.py +++ b/jupyter_server/base/handlers.py @@ -204,6 +204,10 @@ def jinja_template_vars(self): """User-supplied values to supply to jinja templates.""" return self.settings.get('jinja_template_vars', {}) + @property + def eventlog(self): + return self.settings.get('eventlog') + #--------------------------------------------------------------- # URLs #--------------------------------------------------------------- diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json new file mode 100644 index 0000000000..242111722e --- /dev/null +++ b/jupyter_server/event-schemas/contentsmanager-actions.json @@ -0,0 +1,30 @@ +{ + "$id": "jupyter.org/contentsmanager-actions", + "version": 1, + "title": "Contents Manager activities", + "description": "Notebook Server emits this event whenever a contentsmanager action happens", + "type": "object", + "required": ["action", "path"], + "properties": { + "action": { + "enum": [ + "get", + "create", + "save", + "upload", + "rename", + "create", + "copy" + ], + "description": "Action performed by contents manager" + }, + "path": { + "type": "string", + "description": "Logical path the action was performed in" + }, + "source_path": { + "type": "string", + "description": "If action is 'copy', this specifies the source path" + } + } +} \ No newline at end of file diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 3089465886..ab005fd9a2 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -33,6 +33,7 @@ import warnings import webbrowser import urllib +from glob import glob from types import ModuleType from base64 import encodebytes @@ -94,6 +95,8 @@ ) from ipython_genutils import py3compat from jupyter_core.paths import jupyter_runtime_dir, jupyter_path +from jupyter_telemetry.eventlog import EventLog + from jupyter_server._sysinfo import get_sys_info from ._tz import utcnow, utcfromtimestamp @@ -272,6 +275,7 @@ def init_settings(self, jupyter_app, kernel_manager, contents_manager, server_root_dir=root_dir, jinja2_env=env, terminals_available=False, # Set later if terminals are available + eventlog=jupyter_app.eventlog, serverapp=jupyter_app ) @@ -1619,6 +1623,19 @@ def _init_asyncio_patch(): DeprecationWarning ) + def init_eventlog(self): + self.eventlog = EventLog(parent=self) + + schemas_glob = os.path.join( + os.path.dirname(__file__), + 'event-schemas', + '*.json' + ) + + for schema_file in glob(schemas_glob): + with open(schema_file) as f: + self.eventlog.register_schema(json.load(f)) + @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): """Initialize the Server application class, configurables, web application, and http server. @@ -1649,8 +1666,10 @@ def initialize(self, argv=None, find_extensions=True, new_httpserver=True): self.find_server_extensions() self.init_logging() self.init_server_extensions() + self.init_eventlog() self.init_configurables() self.init_components() + self.init_eventlog() self.init_webapp() if new_httpserver: self.init_httpserver() diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index ac4da46f73..0f6cb7f97b 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -112,6 +112,10 @@ async def get(self, path=''): )) validate_model(model, expect_content=content) self._finish_model(model, location=False) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'get', 'path': model['path'] } + ) @web.authenticated async def patch(self, path=''): @@ -120,10 +124,19 @@ async def patch(self, path=''): model = self.get_json_body() if model is None: raise web.HTTPError(400, u'JSON body missing') + self.log.info(model) model = await ensure_async(cm.update(model, path)) validate_model(model, expect_content=False) self._finish_model(model) + self.log.info(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + # FIXME: 'path' always has a leading slash, while model['path'] does not. + # What to do here for source_path? path munge manually? Eww + { 'action': 'rename', 'path': model['path'], 'source_path': path } + ) + @gen.coroutine async def _copy(self, copy_from, copy_to=None): """Copy a file, optionally specifying a target directory.""" self.log.info(u"Copying {copy_from} to {copy_to}".format( @@ -134,6 +147,10 @@ async def _copy(self, copy_from, copy_to=None): self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'copy', 'path': model['path'], 'source_path': copy_from } + ) async def _upload(self, model, path): """Handle upload of a new file to path""" @@ -142,6 +159,10 @@ async def _upload(self, model, path): self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'upload', 'path': model['path'] } + ) async def _new_untitled(self, path, type='', ext=''): """Create a new, empty untitled entity""" @@ -151,6 +172,11 @@ async def _new_untitled(self, path, type='', ext=''): self.set_status(201) validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + # Set path to path of created object, not directory it was created in + { 'action': 'create', 'path': model['path'] } + ) async def _save(self, model, path): """Save an existing file.""" @@ -161,6 +187,11 @@ async def _save(self, model, path): validate_model(model, expect_content=False) self._finish_model(model) + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'save', 'path': model['path'] } + ) + @web.authenticated async def post(self, path=''): """Create a new file in the specified path. @@ -229,6 +260,10 @@ async def delete(self, path=''): await ensure_async(cm.delete(path)) self.set_status(204) self.finish() + self.eventlog.record_event( + 'jupyter.org/contentsmanager-actions', 1, + { 'action': 'delete', 'path': path } + ) class CheckpointsHandler(APIHandler): From 6870c34cdfdf07ba41c7199435aa07cd2925cabc Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 00:31:11 -0700 Subject: [PATCH 52/68] Install jupyter_telemetry from source --- .travis.yml | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..6346a3d78d --- /dev/null +++ b/.travis.yml @@ -0,0 +1,105 @@ +# http://travis-ci.org/#!/ipython/ipython +language: python + +cache: + directories: + - $HOME/.cache/bower + - $HOME/.cache/pip +python: + - 3.6 + + +env: + global: + - PATH=$TRAVIS_BUILD_DIR/pandoc:$PATH + matrix: + - GROUP=js/notebook + - GROUP=python + - GROUP=js/base + - GROUP=js/services + +before_install: + - pip install --upgrade pip + # Manually install jupyter_telemetry, as of https://github.com/jupyter/telemetry/pull/10 + - pip install git+https://github.com/yuvipanda/telemetry@5789321 + - pip install --upgrade setuptools wheel nose coverage codecov + - nvm install 6.9.2 + - nvm use 6.9.2 + - node --version + - npm --version + - npm upgrade -g npm + - npm install + - | + if [[ $GROUP == js* ]]; then + npm install -g casperjs@1.1.3 phantomjs-prebuilt@2.1.7 + fi + - | + if [[ $GROUP == docs ]]; then + pip install -r docs/doc-requirements.txt + pip install --upgrade pytest + fi + - | + if [[ $GROUP == selenium ]]; then + pip install --upgrade selenium pytest + # Install Webdriver backend for Firefox: + wget https://github.com/mozilla/geckodriver/releases/download/v0.19.1/geckodriver-v0.19.1-linux64.tar.gz + mkdir geckodriver + tar -xzf geckodriver-v0.19.1-linux64.tar.gz -C geckodriver + export PATH=$PATH:$PWD/geckodriver + fi + - pip install "attrs>=17.4.0" + +install: + - pip install --pre .[test] $EXTRA_PIP + - pip freeze + - wget https://github.com/jgm/pandoc/releases/download/1.19.1/pandoc-1.19.1-1-amd64.deb && sudo dpkg -i pandoc-1.19.1-1-amd64.deb + + +script: + - jupyter kernelspec list + - | + symlinks=$(find . -type l| grep -v './node_modules/' | grep -v './git-hooks') + if [[ $(echo $symlinks) ]]; then + echo "Repository contains symlinks which won't work on windows:" + echo $symlinks + echo "" + false + else + true + fi + - 'if [[ $GROUP == js* ]]; then travis_retry python -m notebook.jstest ${GROUP:3}; fi' + - 'if [[ $GROUP == python ]]; then nosetests -v --exclude-dir notebook/tests/selenium --with-coverage --cover-package=notebook notebook; fi' + - 'if [[ $GROUP == selenium ]]; then py.test -sv notebook/tests/selenium; fi' + - | + if [[ $GROUP == docs ]]; then + EXIT_STATUS=0 + make -C docs/ html || EXIT_STATUS=$? + + if [[ $TRAVIS_EVENT_TYPE == cron ]]; then + make -C docs/ linkcheck || EXIT_STATUS=$?; + fi + + pytest --nbval --current-env docs || EXIT_STATUS=$? + exit $EXIT_STATUS + fi + + +matrix: + include: + - python: 3.6 + env: + - GROUP=selenium + - JUPYTER_TEST_BROWSER=firefox + - MOZ_HEADLESS=1 + addons: + firefox: 57.0 + - python: 3.5 + env: GROUP=python + - python: 3.7 + dist: xenial + env: GROUP=python + - python: 3.6 + env: GROUP=docs + +after_success: + - codecov From 86d9bcbae28fb1d76bee580f3b30b4526247e71b Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 00:57:01 -0700 Subject: [PATCH 53/68] Set up an eventlog API endpoint Bump telemetry extension commit as well --- .travis.yml | 105 ------------------- jupyter_server/services/eventlog/__init__.py | 0 jupyter_server/services/eventlog/handlers.py | 42 ++++++++ 3 files changed, 42 insertions(+), 105 deletions(-) delete mode 100644 .travis.yml create mode 100644 jupyter_server/services/eventlog/__init__.py create mode 100644 jupyter_server/services/eventlog/handlers.py diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 6346a3d78d..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,105 +0,0 @@ -# http://travis-ci.org/#!/ipython/ipython -language: python - -cache: - directories: - - $HOME/.cache/bower - - $HOME/.cache/pip -python: - - 3.6 - - -env: - global: - - PATH=$TRAVIS_BUILD_DIR/pandoc:$PATH - matrix: - - GROUP=js/notebook - - GROUP=python - - GROUP=js/base - - GROUP=js/services - -before_install: - - pip install --upgrade pip - # Manually install jupyter_telemetry, as of https://github.com/jupyter/telemetry/pull/10 - - pip install git+https://github.com/yuvipanda/telemetry@5789321 - - pip install --upgrade setuptools wheel nose coverage codecov - - nvm install 6.9.2 - - nvm use 6.9.2 - - node --version - - npm --version - - npm upgrade -g npm - - npm install - - | - if [[ $GROUP == js* ]]; then - npm install -g casperjs@1.1.3 phantomjs-prebuilt@2.1.7 - fi - - | - if [[ $GROUP == docs ]]; then - pip install -r docs/doc-requirements.txt - pip install --upgrade pytest - fi - - | - if [[ $GROUP == selenium ]]; then - pip install --upgrade selenium pytest - # Install Webdriver backend for Firefox: - wget https://github.com/mozilla/geckodriver/releases/download/v0.19.1/geckodriver-v0.19.1-linux64.tar.gz - mkdir geckodriver - tar -xzf geckodriver-v0.19.1-linux64.tar.gz -C geckodriver - export PATH=$PATH:$PWD/geckodriver - fi - - pip install "attrs>=17.4.0" - -install: - - pip install --pre .[test] $EXTRA_PIP - - pip freeze - - wget https://github.com/jgm/pandoc/releases/download/1.19.1/pandoc-1.19.1-1-amd64.deb && sudo dpkg -i pandoc-1.19.1-1-amd64.deb - - -script: - - jupyter kernelspec list - - | - symlinks=$(find . -type l| grep -v './node_modules/' | grep -v './git-hooks') - if [[ $(echo $symlinks) ]]; then - echo "Repository contains symlinks which won't work on windows:" - echo $symlinks - echo "" - false - else - true - fi - - 'if [[ $GROUP == js* ]]; then travis_retry python -m notebook.jstest ${GROUP:3}; fi' - - 'if [[ $GROUP == python ]]; then nosetests -v --exclude-dir notebook/tests/selenium --with-coverage --cover-package=notebook notebook; fi' - - 'if [[ $GROUP == selenium ]]; then py.test -sv notebook/tests/selenium; fi' - - | - if [[ $GROUP == docs ]]; then - EXIT_STATUS=0 - make -C docs/ html || EXIT_STATUS=$? - - if [[ $TRAVIS_EVENT_TYPE == cron ]]; then - make -C docs/ linkcheck || EXIT_STATUS=$?; - fi - - pytest --nbval --current-env docs || EXIT_STATUS=$? - exit $EXIT_STATUS - fi - - -matrix: - include: - - python: 3.6 - env: - - GROUP=selenium - - JUPYTER_TEST_BROWSER=firefox - - MOZ_HEADLESS=1 - addons: - firefox: 57.0 - - python: 3.5 - env: GROUP=python - - python: 3.7 - dist: xenial - env: GROUP=python - - python: 3.6 - env: GROUP=docs - -after_success: - - codecov diff --git a/jupyter_server/services/eventlog/__init__.py b/jupyter_server/services/eventlog/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py new file mode 100644 index 0000000000..687b2591cc --- /dev/null +++ b/jupyter_server/services/eventlog/handlers.py @@ -0,0 +1,42 @@ +import json + +from tornado import web + +from notebook.utils import url_path_join +from notebook.base.handlers import APIHandler, json_errors +from jupyter_telemetry.eventlog import EventLog + + +class EventLoggingHandler(APIHandler): + """ + A handler that receives and stores telemetry data from the client. + """ + @json_errors + @web.authenticated + def post(self, *args, **kwargs): + try: + # Parse the data from the request body + raw_event = json.loads(self.request.body.strip().decode()) + except Exception as e: + raise web.HTTPError(400, str(e)) + + required_fields = {'schema', 'version', 'event'} + for rf in required_fields: + if rf not in raw_event: + raise web.HTTPError(400, f'{rf} is a required field') + + schema_name = raw_event['schema'] + version = raw_event['version'] + event = raw_event['event'] + + # Profile, and move to a background thread if this is problematic + # FIXME: Return a more appropriate error response if validation fails + self.eventlog.record_event(schema_name, version, event) + + self.set_status(204) + self.finish() + + +default_handlers = [ + (r"/api/eventlog", EventLoggingHandler), +] \ No newline at end of file From ed746b6cf71d6b414cc9b2a3d2f560aed4dd20bb Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 13:00:59 -0700 Subject: [PATCH 54/68] Use different naming convention & add test for it Experiments here informed the schema naming recommendations in https://github.com/jupyter/telemetry/pull/11 --- .../contentsmanager-actions.json | 2 +- .../contentsmanager-actions/v1.json | 30 +++++++++++++++++++ jupyter_server/serverapp.py | 17 +++++------ jupyter_server/services/contents/handlers.py | 22 ++++++++------ jupyter_server/utils.py | 9 ++++++ 5 files changed, 61 insertions(+), 19 deletions(-) create mode 100644 jupyter_server/event-schemas/contentsmanager-actions/v1.json diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json index 242111722e..5da6d68b88 100644 --- a/jupyter_server/event-schemas/contentsmanager-actions.json +++ b/jupyter_server/event-schemas/contentsmanager-actions.json @@ -1,5 +1,5 @@ { - "$id": "jupyter.org/contentsmanager-actions", + "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", "version": 1, "title": "Contents Manager activities", "description": "Notebook Server emits this event whenever a contentsmanager action happens", diff --git a/jupyter_server/event-schemas/contentsmanager-actions/v1.json b/jupyter_server/event-schemas/contentsmanager-actions/v1.json new file mode 100644 index 0000000000..5da6d68b88 --- /dev/null +++ b/jupyter_server/event-schemas/contentsmanager-actions/v1.json @@ -0,0 +1,30 @@ +{ + "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", + "version": 1, + "title": "Contents Manager activities", + "description": "Notebook Server emits this event whenever a contentsmanager action happens", + "type": "object", + "required": ["action", "path"], + "properties": { + "action": { + "enum": [ + "get", + "create", + "save", + "upload", + "rename", + "create", + "copy" + ], + "description": "Action performed by contents manager" + }, + "path": { + "type": "string", + "description": "Logical path the action was performed in" + }, + "source_path": { + "type": "string", + "description": "If action is 'copy', this specifies the source path" + } + } +} \ No newline at end of file diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index ab005fd9a2..5b30039b62 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -1626,15 +1626,14 @@ def _init_asyncio_patch(): def init_eventlog(self): self.eventlog = EventLog(parent=self) - schemas_glob = os.path.join( - os.path.dirname(__file__), - 'event-schemas', - '*.json' - ) - - for schema_file in glob(schemas_glob): - with open(schema_file) as f: - self.eventlog.register_schema(json.load(f)) + event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') + # Recursively register all .json files under event-schemas + for dirname, _, files in os.walk(event_schemas_dir): + for file in files: + if file.endswith('.json'): + file_path = os.path.join(dirname, file) + with open(file_path) as f: + self.eventlog.register_schema(json.load(f)) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 0f6cb7f97b..eb2cc76b04 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -9,10 +9,14 @@ import json from tornado import web - -from jupyter_server.utils import url_path_join, url_escape, ensure_async from jupyter_client.jsonutil import date_default +from jupyter_server.utils import ( + url_path_join, + url_escape, + ensure_async, + eventlogging_schema_fqn +) from jupyter_server.base.handlers import ( JupyterHandler, APIHandler, path_regex, ) @@ -113,7 +117,7 @@ async def get(self, path=''): validate_model(model, expect_content=content) self._finish_model(model, location=False) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'get', 'path': model['path'] } ) @@ -130,7 +134,7 @@ async def patch(self, path=''): self._finish_model(model) self.log.info(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, # FIXME: 'path' always has a leading slash, while model['path'] does not. # What to do here for source_path? path munge manually? Eww { 'action': 'rename', 'path': model['path'], 'source_path': path } @@ -148,7 +152,7 @@ async def _copy(self, copy_from, copy_to=None): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'copy', 'path': model['path'], 'source_path': copy_from } ) @@ -160,7 +164,7 @@ async def _upload(self, model, path): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'upload', 'path': model['path'] } ) @@ -173,7 +177,7 @@ async def _new_untitled(self, path, type='', ext=''): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, # Set path to path of created object, not directory it was created in { 'action': 'create', 'path': model['path'] } ) @@ -188,7 +192,7 @@ async def _save(self, model, path): self._finish_model(model) self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'save', 'path': model['path'] } ) @@ -261,7 +265,7 @@ async def delete(self, path=''): self.set_status(204) self.finish() self.eventlog.record_event( - 'jupyter.org/contentsmanager-actions', 1, + eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'delete', 'path': path } ) diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 42a6ae9278..54e112f97b 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -440,3 +440,12 @@ def wrapped(): result = asyncio.ensure_future(maybe_async) return result return wrapped() + + +def eventlogging_schema_fqn(name): + """ + Return fully qualified event schema name + + Matches convention for this particular repo + """ + return 'eventlogging.jupyter.org/notebook/{}'.format(name) From e58531f02546f5c38e99faab0936761333a5ff35 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Sun, 7 Jul 2019 15:07:45 -0700 Subject: [PATCH 55/68] Don't use f-strings python 3.5 is still supported --- jupyter_server/services/eventlog/handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py index 687b2591cc..4665e43e8b 100644 --- a/jupyter_server/services/eventlog/handlers.py +++ b/jupyter_server/services/eventlog/handlers.py @@ -23,7 +23,7 @@ def post(self, *args, **kwargs): required_fields = {'schema', 'version', 'event'} for rf in required_fields: if rf not in raw_event: - raise web.HTTPError(400, f'{rf} is a required field') + raise web.HTTPError(400, '{} is a required field'.format(rf)) schema_name = raw_event['schema'] version = raw_event['version'] From 913215a586597a244febd8a7dc4a338e1d9ae8d9 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Tue, 9 Jul 2019 08:08:04 -0700 Subject: [PATCH 56/68] Derive JSON Schema files from YAML files This lets us add detailed documentation & description to our schemas, which is very hard to do in JSON. We also add a lot of documentation to the one JSON schema we have --- jupyter_server/event-schemas/README.md | 19 +++++ .../contentsmanager-actions.json | 17 ++-- jupyter_server/event-schemas/generate-json.py | 39 +++++++++ jupyter_server/event-schemas/v1.yaml | 79 +++++++++++++++++++ 4 files changed, 147 insertions(+), 7 deletions(-) create mode 100644 jupyter_server/event-schemas/README.md create mode 100755 jupyter_server/event-schemas/generate-json.py create mode 100644 jupyter_server/event-schemas/v1.yaml diff --git a/jupyter_server/event-schemas/README.md b/jupyter_server/event-schemas/README.md new file mode 100644 index 0000000000..541a9b0398 --- /dev/null +++ b/jupyter_server/event-schemas/README.md @@ -0,0 +1,19 @@ +# Event Schemas + +## Generating .json files + +Event Schemas are written in a human readable `.yaml` format. +This is primarily to get multi-line strings in our descriptions, +as documentation is very important. + +Every time you modify a `.yaml` file, you should run the following +commands. + +```bash +./generate-json.py +``` + +This needs the `ruamel.yaml` python package installed. + +Hopefully, this is extremely temporary, and we can just use YAML +with jupyter_telemetry. \ No newline at end of file diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json index 5da6d68b88..065f1d5c2f 100644 --- a/jupyter_server/event-schemas/contentsmanager-actions.json +++ b/jupyter_server/event-schemas/contentsmanager-actions.json @@ -2,9 +2,12 @@ "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", "version": 1, "title": "Contents Manager activities", - "description": "Notebook Server emits this event whenever a contentsmanager action happens", + "description": "Record actions on files via the ContentsManager REST API.\n\nThe notebook ContentsManager REST API is used by all frontends to retreive,\nsave, list, delete and perform other actions on notebooks, directories,\nand other files through the UI. This is pluggable - the default acts on\nthe file system, but can be replaced with a different ContentsManager\nimplementation - to work on S3, Postgres, other object stores, etc.\nThe events get recorded regardless of the ContentsManager implementation\nbeing used.\n\nLimitations:\n\n1. This does not record all filesystem access, just the ones that happen\n explicitly via the notebook server's REST API. Users can (and often do)\n trivially access the filesystem in many other ways (such as `open()` calls\n in their code), so this is usually never a complete record.\n2. As with all events recorded by the notebook server, users most likely\n have the ability to modify the code of the notebook server. Unless other\n security measures are in place, these events should be treated as user\n controlled and not used in high security areas.\n3. Events are only recorded when an action succeeds.\n", "type": "object", - "required": ["action", "path"], + "required": [ + "action", + "path" + ], "properties": { "action": { "enum": [ @@ -13,18 +16,18 @@ "save", "upload", "rename", - "create", - "copy" + "copy", + "delete" ], - "description": "Action performed by contents manager" + "description": "Action performed by the ContentsManager API.\n\nThis is a required field.\n\nPossible values:\n\n1. get\n Get contents of a particular file, or list contents of a directory.\n\n2. create\n Create a new directory or file at 'path'. Currently, name of the\n file or directory is auto generated by the ContentsManager implementation.\n\n3. save\n Save a file at path with contents from the client\n\n4. upload\n Upload a file at given path with contents from the client\n\n5. rename\n Rename a file or directory from value in source_path to\n value in path.\n\n5. copy\n Copy a file or directory from value in source_path to\n value in path.\n\n6. delete\n Delete a file or empty directory at given path\n" }, "path": { "type": "string", - "description": "Logical path the action was performed in" + "description": "Logical path on which the operation was performed.\n\nThis is a required field.\n" }, "source_path": { "type": "string", - "description": "If action is 'copy', this specifies the source path" + "description": "Source path of an operation when action is 'copy' or 'rename'" } } } \ No newline at end of file diff --git a/jupyter_server/event-schemas/generate-json.py b/jupyter_server/event-schemas/generate-json.py new file mode 100755 index 0000000000..a39fa0610b --- /dev/null +++ b/jupyter_server/event-schemas/generate-json.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import argparse +import json +import os +import jsonschema +from ruamel.yaml import YAML + +from jupyter_telemetry.eventlog import EventLog + +yaml = YAML(typ='safe') + +def main(): + argparser = argparse.ArgumentParser() + argparser.add_argument( + 'directory', + help='Directory with Schema .yaml files' + ) + + args = argparser.parse_args() + + el = EventLog() + for dirname, _, files in os.walk(args.directory): + for file in files: + if not file.endswith('.yaml'): + continue + yaml_path = os.path.join(dirname, file) + print('Processing', yaml_path) + with open(yaml_path) as f: + schema = yaml.load(f) + + # validate schema + el.register_schema(schema) + + json_path = os.path.join(dirname, os.path.splitext(file)[0] + '.json') + with open(json_path, 'w') as f: + json.dump(schema, f, indent=4) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/jupyter_server/event-schemas/v1.yaml b/jupyter_server/event-schemas/v1.yaml new file mode 100644 index 0000000000..3d7e8f2fe9 --- /dev/null +++ b/jupyter_server/event-schemas/v1.yaml @@ -0,0 +1,79 @@ +"$id": eventlogging.jupyter.org/notebook/contentsmanager-actions +version: 1 +title: Contents Manager activities +description: | + Record actions on files via the ContentsManager REST API. + + The notebook ContentsManager REST API is used by all frontends to retreive, + save, list, delete and perform other actions on notebooks, directories, + and other files through the UI. This is pluggable - the default acts on + the file system, but can be replaced with a different ContentsManager + implementation - to work on S3, Postgres, other object stores, etc. + The events get recorded regardless of the ContentsManager implementation + being used. + + Limitations: + + 1. This does not record all filesystem access, just the ones that happen + explicitly via the notebook server's REST API. Users can (and often do) + trivially access the filesystem in many other ways (such as `open()` calls + in their code), so this is usually never a complete record. + 2. As with all events recorded by the notebook server, users most likely + have the ability to modify the code of the notebook server. Unless other + security measures are in place, these events should be treated as user + controlled and not used in high security areas. + 3. Events are only recorded when an action succeeds. +type: object +required: +- action +- path +properties: + action: + enum: + - get + - create + - save + - upload + - rename + - copy + - delete + description: | + Action performed by the ContentsManager API. + + This is a required field. + + Possible values: + + 1. get + Get contents of a particular file, or list contents of a directory. + + 2. create + Create a new directory or file at 'path'. Currently, name of the + file or directory is auto generated by the ContentsManager implementation. + + 3. save + Save a file at path with contents from the client + + 4. upload + Upload a file at given path with contents from the client + + 5. rename + Rename a file or directory from value in source_path to + value in path. + + 5. copy + Copy a file or directory from value in source_path to + value in path. + + 6. delete + Delete a file or empty directory at given path + path: + type: string + description: | + Logical path on which the operation was performed. + + This is a required field. + source_path: + type: string + description: | + Source path of an operation when action is 'copy' or 'rename' \ No newline at end of file From a921ff0cfb7b5a04e8851d5a0c2b2db884fe82c5 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Tue, 9 Jul 2019 11:10:07 -0700 Subject: [PATCH 57/68] Keep event schemas in YAML Primary advantage over JSON is that we can do multi-line strings for more detailed documentation. We also expect humans to read & write these, so YAML is a much better format there. All JSON is also valid YAML, so that helps. Depends on https://github.com/jupyter/telemetry/pull/13 --- jupyter_server/serverapp.py | 7 ++-- notebook/tests/test_eventlog.py | 57 +++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 notebook/tests/test_eventlog.py diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 5b30039b62..79551d3d98 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -33,6 +33,7 @@ import warnings import webbrowser import urllib +from ruamel.yaml import YAML from glob import glob from types import ModuleType @@ -1626,14 +1627,14 @@ def _init_asyncio_patch(): def init_eventlog(self): self.eventlog = EventLog(parent=self) + yaml = YAML(typ='safe') event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') # Recursively register all .json files under event-schemas for dirname, _, files in os.walk(event_schemas_dir): for file in files: - if file.endswith('.json'): + if file.endswith('.yaml'): file_path = os.path.join(dirname, file) - with open(file_path) as f: - self.eventlog.register_schema(json.load(f)) + self.eventlog.register_schema_file(file_path) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): diff --git a/notebook/tests/test_eventlog.py b/notebook/tests/test_eventlog.py new file mode 100644 index 0000000000..c2f74a59c9 --- /dev/null +++ b/notebook/tests/test_eventlog.py @@ -0,0 +1,57 @@ +import os +import re +import jsonschema +from ruamel.yaml import YAML +from notebook.notebookapp import NotebookApp +from notebook.utils import eventlogging_schema_fqn +from unittest import TestCase + +yaml = YAML(typ='safe') + +class RegisteredSchemasTestCase(TestCase): + def schema_files(self): + event_schemas_dir = os.path.realpath( + os.path.join(os.path.dirname(__file__), '..', 'event-schemas') + ) + schemas = [] + for dirname, _, files in os.walk(event_schemas_dir): + for file in files: + if file.endswith('.yaml'): + yield os.path.join(dirname, file) + + def test_eventlogging_schema_fqn(self): + self.assertEqual( + eventlogging_schema_fqn('test'), + 'eventlogging.jupyter.org/notebook/test' + ) + def test_valid_schemas(self): + """ + All schemas must be valid json schemas + """ + for schema_file in self.schema_files(): + with open(schema_file) as f: + jsonschema.Draft7Validator.check_schema(yaml.load(f)) + + def test_schema_conventions(self): + """ + Test schema naming convention for this repo. + + 1. All schemas should be under event-schamas/{name}/v{version}.yaml + 2. Schema id should be eventlogging.jupyter.org/notebook/{name} + 3. Schema version should match version in file + """ + for schema_file in self.schema_files(): + filename = os.path.basename(schema_file) + match = re.match('v(\d+)\.yaml', filename) + # All schema locations must match the following pattern + # schema-name/v(version).yaml + self.assertIsNotNone(match) + + with open(schema_file) as f: + schema = yaml.load(f) + + self.assertEqual(schema['$id'], eventlogging_schema_fqn( + os.path.basename(os.path.dirname(schema_file)) + )) + self.assertEqual(schema['version'], int(match.groups()[0])) + \ No newline at end of file From 54b3594b54b54248ead9e5dd384a2d58b7958e07 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Tue, 9 Jul 2019 16:34:36 -0700 Subject: [PATCH 58/68] Depend on the jupyter_telemetry package We made a v0.0.1 release! --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 96279432ba..432cb82ecb 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,8 @@ 'terminado>=0.8.3', 'prometheus_client', "pywin32>=1.0 ; sys_platform == 'win32'", - "anyio>=2.0.2", + 'anyio>=2.0.2', + 'jupyter_telemetry' ], extras_require = { 'test': ['coverage', 'requests', From 5a82794ba3d53c925d05e9bcdf13b61ca5794f42 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 12:02:13 -0700 Subject: [PATCH 59/68] read schemas from new utils function --- jupyter_server/serverapp.py | 21 +++++++++++---------- jupyter_server/utils.py | 14 ++++++++++++++ notebook/tests/test_eventlog.py | 20 ++++++-------------- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 79551d3d98..8f09e439e3 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -101,7 +101,14 @@ from jupyter_server._sysinfo import get_sys_info from ._tz import utcnow, utcfromtimestamp -from .utils import url_path_join, check_pid, url_escape, urljoin, pathname2url +from .utils import ( + url_path_join, + check_pid, + url_escape, + urljoin, + pathname2url, + get_schema_files +) from jupyter_server.extension.serverextension import ServerExtensionApp from jupyter_server.extension.manager import ExtensionManager @@ -1626,15 +1633,9 @@ def _init_asyncio_patch(): def init_eventlog(self): self.eventlog = EventLog(parent=self) - - yaml = YAML(typ='safe') - event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') - # Recursively register all .json files under event-schemas - for dirname, _, files in os.walk(event_schemas_dir): - for file in files: - if file.endswith('.yaml'): - file_path = os.path.join(dirname, file) - self.eventlog.register_schema_file(file_path) + # Register schemas for notebook services. + for file_path in get_schema_files(): + self.eventlog.register_schema_file(file_path) @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True): diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 54e112f97b..55389f037a 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -449,3 +449,17 @@ def eventlogging_schema_fqn(name): Matches convention for this particular repo """ return 'eventlogging.jupyter.org/notebook/{}'.format(name) + + +def get_schema_files(): + """Yield a sequence of event schemas for jupyter services.""" + # Hardcode path to event schemas directory. + event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') + schema_files = [] + # Recursively register all .json files under event-schemas + for dirname, _, files in os.walk(event_schemas_dir): + for file in files: + if file.endswith('.yaml'): + file_path = os.path.join(dirname, file) + schema_files.append(file_path) + yield schema_files diff --git a/notebook/tests/test_eventlog.py b/notebook/tests/test_eventlog.py index c2f74a59c9..994181b73e 100644 --- a/notebook/tests/test_eventlog.py +++ b/notebook/tests/test_eventlog.py @@ -3,32 +3,25 @@ import jsonschema from ruamel.yaml import YAML from notebook.notebookapp import NotebookApp -from notebook.utils import eventlogging_schema_fqn +from notebook.utils import eventlogging_schema_fqn, get_schema_files from unittest import TestCase yaml = YAML(typ='safe') + class RegisteredSchemasTestCase(TestCase): - def schema_files(self): - event_schemas_dir = os.path.realpath( - os.path.join(os.path.dirname(__file__), '..', 'event-schemas') - ) - schemas = [] - for dirname, _, files in os.walk(event_schemas_dir): - for file in files: - if file.endswith('.yaml'): - yield os.path.join(dirname, file) def test_eventlogging_schema_fqn(self): self.assertEqual( eventlogging_schema_fqn('test'), 'eventlogging.jupyter.org/notebook/test' ) + def test_valid_schemas(self): """ All schemas must be valid json schemas """ - for schema_file in self.schema_files(): + for schema_file in get_schema_files(): with open(schema_file) as f: jsonschema.Draft7Validator.check_schema(yaml.load(f)) @@ -40,7 +33,7 @@ def test_schema_conventions(self): 2. Schema id should be eventlogging.jupyter.org/notebook/{name} 3. Schema version should match version in file """ - for schema_file in self.schema_files(): + for schema_file in get_schema_files(): filename = os.path.basename(schema_file) match = re.match('v(\d+)\.yaml', filename) # All schema locations must match the following pattern @@ -53,5 +46,4 @@ def test_schema_conventions(self): self.assertEqual(schema['$id'], eventlogging_schema_fqn( os.path.basename(os.path.dirname(schema_file)) )) - self.assertEqual(schema['version'], int(match.groups()[0])) - \ No newline at end of file + self.assertEqual(schema['version'], int(match.groups()[0])) \ No newline at end of file From e9ffb47f88f6cbe7f01b0152465d1216312b7f89 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 14:56:24 -0700 Subject: [PATCH 60/68] Add fix for tables in RTD theme sphinx docs. Solution came from https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html --- docs/source/_static/theme_overrides.css | 13 +++ docs/source/conf.py | 127 +++++++++++++++++++++++- jupyter_server/utils.py | 5 +- 3 files changed, 140 insertions(+), 5 deletions(-) create mode 100644 docs/source/_static/theme_overrides.css diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css new file mode 100644 index 0000000000..63ee6cc74c --- /dev/null +++ b/docs/source/_static/theme_overrides.css @@ -0,0 +1,13 @@ +/* override table width restrictions */ +@media screen and (min-width: 767px) { + + .wy-table-responsive table td { + /* !important prevents the common CSS stylesheets from overriding + this as on RTD they are loaded after this stylesheet */ + white-space: normal !important; + } + + .wy-table-responsive { + overflow: visible !important; + } +} diff --git a/docs/source/conf.py b/docs/source/conf.py index e105e82d40..fc538a1613 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -70,8 +70,7 @@ 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting', 'sphinxcontrib_github_alt', - 'sphinxcontrib.openapi', - 'sphinxemoji.sphinxemoji' + 'sphinx-jsonschema' ] # Add any paths that contain templates here, relative to this directory. @@ -208,6 +207,12 @@ # since it is needed to properly generate _static in the build directory html_static_path = ['_static'] +html_context = { + 'css_files': [ + '_static/theme_overrides.css', # override wide tables in RTD theme + ], + } + # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. @@ -370,4 +375,122 @@ spelling_word_list_filename='spelling_wordlist.txt' # import before any doc is built, so _ is guaranteed to be injected +<<<<<<< HEAD import jupyter_server.transutils +======= +import notebook.transutils + +# -- Autogenerate documentation for event schemas ------------------ + +from notebook.utils import get_schema_files + +# Build a dictionary that describes the event schema table of contents. +# toc = { +# schema_name : { +# src: # file path to schema +# dst: # file path to documentation +# ver: # latest version of schema +# } +# } +toc = {} + +# Iterate over schema directories and generate documentation. +# Generates documentation for the latest version of each schema. +for file_path in get_schema_files(): + # Make path relative. + file_path = os.path.relpath(file_path) + # Break apart path to its pieces + pieces = file_path.split(os.path.sep) + # Schema version. Outputs as a string that looks like "v#" + schema_ver = os.path.splitext(pieces[-1])[0] + # Strip "v" and make version an integer. + schema_int = int(schema_ver[1:]) + # Schema name. + schema_name = pieces[-2] + + # Add this version file to schema_dir + src = '../' + file_path + dst = os.path.join('events', os.path.join(schema_name + '.rst')) + + if schema_name in toc: + # If this is a later version, replace the old version. + if schema_int > toc[schema_name]['ver']: + toc[schema_name] = { + 'src': src, + 'dst': dst, + 'ver': schema_int + } + else: + toc[schema_name] = { + 'src': src, + 'dst': dst, + 'ver': schema_int + } + +# Write schema documentation +for schema_name, x in toc.items(): + with open(dst, 'w') as f: + f.write('.. jsonschema:: {}'.format(src)) + +# Write table of contents +events_index = """ +.. toctree:: + :maxdepth: 1 + :glob: + +""" + +with open(os.path.join('events', 'index.rst'), 'w') as f: + f.write(events_index) + for item in toc.keys(): + f.write(' {}'.format(item)) + + + + + + + + + + +# # create a directory for this schema if it doesn't exist: +# schema_dir = os.path.join('events', schema_name) +# if not os.path.exists(schema_dir): +# os.makedirs(schema_dir) + + +# toc[schema_name] + + + +# with open(dst, 'w') as f: +# f.write('.. jsonschema:: {}'.format(src)) + + + + + + + +# toc.append(schema_name) + + +# events_index = """ +# .. toctree:: +# :maxdepth: 1 +# :glob: + +# """ + + +# with open(os.path.join('events', 'index.rst'), 'w') as f: +# f.write(events_index) +# for item in set(toc): +# f.write(' {}/*'.format(item)) + + + + + +>>>>>>> 4fb0a0443... Add fix for tables in RTD theme sphinx docs. diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 55389f037a..8fc6e89479 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -455,11 +455,10 @@ def get_schema_files(): """Yield a sequence of event schemas for jupyter services.""" # Hardcode path to event schemas directory. event_schemas_dir = os.path.join(os.path.dirname(__file__), 'event-schemas') - schema_files = [] + #schema_files = [] # Recursively register all .json files under event-schemas for dirname, _, files in os.walk(event_schemas_dir): for file in files: if file.endswith('.yaml'): file_path = os.path.join(dirname, file) - schema_files.append(file_path) - yield schema_files + yield file_path From f08cc2421d0acd9c2f80d4b45093c5fef101c76e Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:02:22 -0700 Subject: [PATCH 61/68] add event schema auto-documentation to jupyter notebook docs --- .gitignore | 1 + docs/environment.yml | 3 +- docs/source/conf.py | 118 --------------------------------------- docs/source/eventlog.rst | 47 ++++++++++++++++ 4 files changed, 50 insertions(+), 119 deletions(-) create mode 100644 docs/source/eventlog.rst diff --git a/.gitignore b/.gitignore index 936f7dbcb6..deb6e6ad14 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ docs/man/*.gz docs/source/api/generated docs/source/config.rst docs/gh-pages +docs/source/events notebook/i18n/*/LC_MESSAGES/*.mo notebook/i18n/*/LC_MESSAGES/nbjs.json notebook/static/components diff --git a/docs/environment.yml b/docs/environment.yml index 5d77bc7bb4..1d9c9d3eb8 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -13,4 +13,5 @@ dependencies: - sphinxcontrib_github_alt - sphinxcontrib-openapi - sphinxemoji - - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master \ No newline at end of file + - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master + - sphinx-jsonschema diff --git a/docs/source/conf.py b/docs/source/conf.py index fc538a1613..41b089cc07 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -375,122 +375,4 @@ spelling_word_list_filename='spelling_wordlist.txt' # import before any doc is built, so _ is guaranteed to be injected -<<<<<<< HEAD import jupyter_server.transutils -======= -import notebook.transutils - -# -- Autogenerate documentation for event schemas ------------------ - -from notebook.utils import get_schema_files - -# Build a dictionary that describes the event schema table of contents. -# toc = { -# schema_name : { -# src: # file path to schema -# dst: # file path to documentation -# ver: # latest version of schema -# } -# } -toc = {} - -# Iterate over schema directories and generate documentation. -# Generates documentation for the latest version of each schema. -for file_path in get_schema_files(): - # Make path relative. - file_path = os.path.relpath(file_path) - # Break apart path to its pieces - pieces = file_path.split(os.path.sep) - # Schema version. Outputs as a string that looks like "v#" - schema_ver = os.path.splitext(pieces[-1])[0] - # Strip "v" and make version an integer. - schema_int = int(schema_ver[1:]) - # Schema name. - schema_name = pieces[-2] - - # Add this version file to schema_dir - src = '../' + file_path - dst = os.path.join('events', os.path.join(schema_name + '.rst')) - - if schema_name in toc: - # If this is a later version, replace the old version. - if schema_int > toc[schema_name]['ver']: - toc[schema_name] = { - 'src': src, - 'dst': dst, - 'ver': schema_int - } - else: - toc[schema_name] = { - 'src': src, - 'dst': dst, - 'ver': schema_int - } - -# Write schema documentation -for schema_name, x in toc.items(): - with open(dst, 'w') as f: - f.write('.. jsonschema:: {}'.format(src)) - -# Write table of contents -events_index = """ -.. toctree:: - :maxdepth: 1 - :glob: - -""" - -with open(os.path.join('events', 'index.rst'), 'w') as f: - f.write(events_index) - for item in toc.keys(): - f.write(' {}'.format(item)) - - - - - - - - - - -# # create a directory for this schema if it doesn't exist: -# schema_dir = os.path.join('events', schema_name) -# if not os.path.exists(schema_dir): -# os.makedirs(schema_dir) - - -# toc[schema_name] - - - -# with open(dst, 'w') as f: -# f.write('.. jsonschema:: {}'.format(src)) - - - - - - - -# toc.append(schema_name) - - -# events_index = """ -# .. toctree:: -# :maxdepth: 1 -# :glob: - -# """ - - -# with open(os.path.join('events', 'index.rst'), 'w') as f: -# f.write(events_index) -# for item in set(toc): -# f.write(' {}/*'.format(item)) - - - - - ->>>>>>> 4fb0a0443... Add fix for tables in RTD theme sphinx docs. diff --git a/docs/source/eventlog.rst b/docs/source/eventlog.rst new file mode 100644 index 0000000000..fd77a1b9c8 --- /dev/null +++ b/docs/source/eventlog.rst @@ -0,0 +1,47 @@ +Eventlogging and Telemetry +========================== + +The Notebook Server can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that the Notebook Server emits are defined by `JSON schemas`_ listed below_ emitted as JSON data, defined and validated by the JSON schemas listed below. + + +.. _logging: https://docs.python.org/3/library/logging.html +.. _`Telemetry System`: https://github.com/jupyter/telemetry +.. _`JSON schemas`: https://json-schema.org/ + +How to emit events +------------------ + +Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. + + +To begin recording events, you'll need to set two configurations: + + 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to + 2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here. + +Here's a basic example for emitting events from the `contents` service: + +.. code-block:: + + import logging + + c.EventLog.handlers = [ + logging.FileHandler('event.log'), + ] + + c.EventLog.allowed_schemas = [ + 'hub.jupyter.org/server-action' + ] + +The output is a file, ``"event.log"``, with events recorded as JSON data. + + +.. _below: + +Event schemas +------------- + +.. toctree:: + :maxdepth: 2 + + events/index From 92621940cd338a4dbfc95348c0b2e1ca8d4a1e4b Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:14:04 -0700 Subject: [PATCH 62/68] format paths in recorded events --- jupyter_server/services/contents/handlers.py | 30 +++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index eb2cc76b04..d133fbb476 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -117,7 +117,8 @@ async def get(self, path=''): validate_model(model, expect_content=content) self._finish_model(model, location=False) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, + eventlogging_schema_fqn('contentsmanager-actions'), + 1, { 'action': 'get', 'path': model['path'] } ) @@ -134,10 +135,13 @@ async def patch(self, path=''): self._finish_model(model) self.log.info(model) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, - # FIXME: 'path' always has a leading slash, while model['path'] does not. - # What to do here for source_path? path munge manually? Eww - { 'action': 'rename', 'path': model['path'], 'source_path': path } + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { + 'action': 'rename', + 'path': model['path'], + 'source_path': path.lstrip(os.path.sep) + } ) @gen.coroutine @@ -152,8 +156,13 @@ async def _copy(self, copy_from, copy_to=None): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, - { 'action': 'copy', 'path': model['path'], 'source_path': copy_from } + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { + 'action': 'copy', + 'path': model['path'], + 'source_path': copy_from.lstrip(os.path.sep) + } ) async def _upload(self, model, path): @@ -164,7 +173,8 @@ async def _upload(self, model, path): validate_model(model, expect_content=False) self._finish_model(model) self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, + eventlogging_schema_fqn('contentsmanager-actions'), + 1, { 'action': 'upload', 'path': model['path'] } ) @@ -190,9 +200,9 @@ async def _save(self, model, path): model = await ensure_async(self.contents_manager.save(model, path)) validate_model(model, expect_content=False) self._finish_model(model) - self.eventlog.record_event( - eventlogging_schema_fqn('contentsmanager-actions'), 1, + eventlogging_schema_fqn('contentsmanager-actions'), + 1, { 'action': 'save', 'path': model['path'] } ) From 5ed465212559f8d6f3ea9d2e8e8c25188807de12 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:37:03 -0700 Subject: [PATCH 63/68] add documentation for eventlog endpoint --- docs/source/eventlog.rst | 24 ++++++++++++++++---- jupyter_server/services/eventlog/handlers.py | 11 +++++---- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/docs/source/eventlog.rst b/docs/source/eventlog.rst index fd77a1b9c8..df5c153fb7 100644 --- a/docs/source/eventlog.rst +++ b/docs/source/eventlog.rst @@ -8,12 +8,11 @@ The Notebook Server can be configured to record structured events from a running .. _`Telemetry System`: https://github.com/jupyter/telemetry .. _`JSON schemas`: https://json-schema.org/ -How to emit events ------------------- +Emitting Server Events +---------------------- Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. - To begin recording events, you'll need to set two configurations: 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to @@ -35,11 +34,26 @@ Here's a basic example for emitting events from the `contents` service: The output is a file, ``"event.log"``, with events recorded as JSON data. +`eventlog` endpoint +------------------- + +The Notebook Server provides a public REST endpoint for external applications to validate and log events +through the Server's Event Log. + +To log events, send a `POST` request to the `/api/eventlog` endpoint. The body of the request should be a +JSON blog and is required to have the follow keys: + + 1. `'schema'` : the event's schema ID. + 2. `'version'` : the version of the event's schema. + 3. `'event'` : the event data in JSON format. + +Events that are validated by this endpoint must have their schema listed in the `allowed_schemas` trait listed above. .. _below: -Event schemas -------------- + +Server Event schemas +-------=======------ .. toctree:: :maxdepth: 2 diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py index 4665e43e8b..b27dd87304 100644 --- a/jupyter_server/services/eventlog/handlers.py +++ b/jupyter_server/services/eventlog/handlers.py @@ -29,14 +29,15 @@ def post(self, *args, **kwargs): version = raw_event['version'] event = raw_event['event'] - # Profile, and move to a background thread if this is problematic - # FIXME: Return a more appropriate error response if validation fails - self.eventlog.record_event(schema_name, version, event) - + # Profile, may need to move to a background thread if this is problematic + try: + self.eventlog.record_event(schema_name, version, event) + except: + raise web.HTTPError(500, "Event could not be validated.") + self.set_status(204) self.finish() - default_handlers = [ (r"/api/eventlog", EventLoggingHandler), ] \ No newline at end of file From 791dbba9e653a78315d9f057f0aa1b5233a8b03a Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:46:40 -0700 Subject: [PATCH 64/68] return exception as 400 error in eventlog endpoint --- docs/source/eventlog.rst | 2 +- jupyter_server/services/eventlog/handlers.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/source/eventlog.rst b/docs/source/eventlog.rst index df5c153fb7..7229717f69 100644 --- a/docs/source/eventlog.rst +++ b/docs/source/eventlog.rst @@ -53,7 +53,7 @@ Events that are validated by this endpoint must have their schema listed in the Server Event schemas --------=======------ +-------------------- .. toctree:: :maxdepth: 2 diff --git a/jupyter_server/services/eventlog/handlers.py b/jupyter_server/services/eventlog/handlers.py index b27dd87304..0c9b69815f 100644 --- a/jupyter_server/services/eventlog/handlers.py +++ b/jupyter_server/services/eventlog/handlers.py @@ -6,7 +6,6 @@ from notebook.base.handlers import APIHandler, json_errors from jupyter_telemetry.eventlog import EventLog - class EventLoggingHandler(APIHandler): """ A handler that receives and stores telemetry data from the client. @@ -32,8 +31,8 @@ def post(self, *args, **kwargs): # Profile, may need to move to a background thread if this is problematic try: self.eventlog.record_event(schema_name, version, event) - except: - raise web.HTTPError(500, "Event could not be validated.") + except Exception as e: + raise web.HTTPError(400, e) self.set_status(204) self.finish() From 80682e8ba3a73bdbfe7b36e32072816dc28b79c1 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 1 Oct 2019 15:55:54 -0700 Subject: [PATCH 65/68] normalize path in emitted event --- jupyter_server/services/contents/handlers.py | 2 +- notebook/tests/test_eventlog.py | 49 -------------------- 2 files changed, 1 insertion(+), 50 deletions(-) delete mode 100644 notebook/tests/test_eventlog.py diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index d133fbb476..5a205e5e96 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -276,7 +276,7 @@ async def delete(self, path=''): self.finish() self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, - { 'action': 'delete', 'path': path } + { 'action': 'delete', 'path': path.lstrip(os.path.sep) } ) diff --git a/notebook/tests/test_eventlog.py b/notebook/tests/test_eventlog.py deleted file mode 100644 index 994181b73e..0000000000 --- a/notebook/tests/test_eventlog.py +++ /dev/null @@ -1,49 +0,0 @@ -import os -import re -import jsonschema -from ruamel.yaml import YAML -from notebook.notebookapp import NotebookApp -from notebook.utils import eventlogging_schema_fqn, get_schema_files -from unittest import TestCase - -yaml = YAML(typ='safe') - - -class RegisteredSchemasTestCase(TestCase): - - def test_eventlogging_schema_fqn(self): - self.assertEqual( - eventlogging_schema_fqn('test'), - 'eventlogging.jupyter.org/notebook/test' - ) - - def test_valid_schemas(self): - """ - All schemas must be valid json schemas - """ - for schema_file in get_schema_files(): - with open(schema_file) as f: - jsonschema.Draft7Validator.check_schema(yaml.load(f)) - - def test_schema_conventions(self): - """ - Test schema naming convention for this repo. - - 1. All schemas should be under event-schamas/{name}/v{version}.yaml - 2. Schema id should be eventlogging.jupyter.org/notebook/{name} - 3. Schema version should match version in file - """ - for schema_file in get_schema_files(): - filename = os.path.basename(schema_file) - match = re.match('v(\d+)\.yaml', filename) - # All schema locations must match the following pattern - # schema-name/v(version).yaml - self.assertIsNotNone(match) - - with open(schema_file) as f: - schema = yaml.load(f) - - self.assertEqual(schema['$id'], eventlogging_schema_fqn( - os.path.basename(os.path.dirname(schema_file)) - )) - self.assertEqual(schema['version'], int(match.groups()[0])) \ No newline at end of file From b10174f088e9b448c014f4753bb7900db0246868 Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 19 May 2020 15:31:03 -0700 Subject: [PATCH 66/68] initial tests --- jupyter_server/event-schemas/README.md | 19 --------- .../contentsmanager-actions.json | 33 ---------------- .../contentsmanager-actions/v1.json | 30 -------------- .../{ => contentsmanager-actions}/v1.yaml | 8 +++- jupyter_server/event-schemas/generate-json.py | 39 ------------------- jupyter_server/services/contents/handlers.py | 34 ++++++++-------- tests/test_eventlog.py | 4 ++ 7 files changed, 26 insertions(+), 141 deletions(-) delete mode 100644 jupyter_server/event-schemas/README.md delete mode 100644 jupyter_server/event-schemas/contentsmanager-actions.json delete mode 100644 jupyter_server/event-schemas/contentsmanager-actions/v1.json rename jupyter_server/event-schemas/{ => contentsmanager-actions}/v1.yaml (94%) delete mode 100755 jupyter_server/event-schemas/generate-json.py create mode 100644 tests/test_eventlog.py diff --git a/jupyter_server/event-schemas/README.md b/jupyter_server/event-schemas/README.md deleted file mode 100644 index 541a9b0398..0000000000 --- a/jupyter_server/event-schemas/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Event Schemas - -## Generating .json files - -Event Schemas are written in a human readable `.yaml` format. -This is primarily to get multi-line strings in our descriptions, -as documentation is very important. - -Every time you modify a `.yaml` file, you should run the following -commands. - -```bash -./generate-json.py -``` - -This needs the `ruamel.yaml` python package installed. - -Hopefully, this is extremely temporary, and we can just use YAML -with jupyter_telemetry. \ No newline at end of file diff --git a/jupyter_server/event-schemas/contentsmanager-actions.json b/jupyter_server/event-schemas/contentsmanager-actions.json deleted file mode 100644 index 065f1d5c2f..0000000000 --- a/jupyter_server/event-schemas/contentsmanager-actions.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", - "version": 1, - "title": "Contents Manager activities", - "description": "Record actions on files via the ContentsManager REST API.\n\nThe notebook ContentsManager REST API is used by all frontends to retreive,\nsave, list, delete and perform other actions on notebooks, directories,\nand other files through the UI. This is pluggable - the default acts on\nthe file system, but can be replaced with a different ContentsManager\nimplementation - to work on S3, Postgres, other object stores, etc.\nThe events get recorded regardless of the ContentsManager implementation\nbeing used.\n\nLimitations:\n\n1. This does not record all filesystem access, just the ones that happen\n explicitly via the notebook server's REST API. Users can (and often do)\n trivially access the filesystem in many other ways (such as `open()` calls\n in their code), so this is usually never a complete record.\n2. As with all events recorded by the notebook server, users most likely\n have the ability to modify the code of the notebook server. Unless other\n security measures are in place, these events should be treated as user\n controlled and not used in high security areas.\n3. Events are only recorded when an action succeeds.\n", - "type": "object", - "required": [ - "action", - "path" - ], - "properties": { - "action": { - "enum": [ - "get", - "create", - "save", - "upload", - "rename", - "copy", - "delete" - ], - "description": "Action performed by the ContentsManager API.\n\nThis is a required field.\n\nPossible values:\n\n1. get\n Get contents of a particular file, or list contents of a directory.\n\n2. create\n Create a new directory or file at 'path'. Currently, name of the\n file or directory is auto generated by the ContentsManager implementation.\n\n3. save\n Save a file at path with contents from the client\n\n4. upload\n Upload a file at given path with contents from the client\n\n5. rename\n Rename a file or directory from value in source_path to\n value in path.\n\n5. copy\n Copy a file or directory from value in source_path to\n value in path.\n\n6. delete\n Delete a file or empty directory at given path\n" - }, - "path": { - "type": "string", - "description": "Logical path on which the operation was performed.\n\nThis is a required field.\n" - }, - "source_path": { - "type": "string", - "description": "Source path of an operation when action is 'copy' or 'rename'" - } - } -} \ No newline at end of file diff --git a/jupyter_server/event-schemas/contentsmanager-actions/v1.json b/jupyter_server/event-schemas/contentsmanager-actions/v1.json deleted file mode 100644 index 5da6d68b88..0000000000 --- a/jupyter_server/event-schemas/contentsmanager-actions/v1.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "$id": "eventlogging.jupyter.org/notebook/contentsmanager-actions", - "version": 1, - "title": "Contents Manager activities", - "description": "Notebook Server emits this event whenever a contentsmanager action happens", - "type": "object", - "required": ["action", "path"], - "properties": { - "action": { - "enum": [ - "get", - "create", - "save", - "upload", - "rename", - "create", - "copy" - ], - "description": "Action performed by contents manager" - }, - "path": { - "type": "string", - "description": "Logical path the action was performed in" - }, - "source_path": { - "type": "string", - "description": "If action is 'copy', this specifies the source path" - } - } -} \ No newline at end of file diff --git a/jupyter_server/event-schemas/v1.yaml b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml similarity index 94% rename from jupyter_server/event-schemas/v1.yaml rename to jupyter_server/event-schemas/contentsmanager-actions/v1.yaml index 3d7e8f2fe9..31a5f293a9 100644 --- a/jupyter_server/event-schemas/v1.yaml +++ b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml @@ -1,6 +1,7 @@ "$id": eventlogging.jupyter.org/notebook/contentsmanager-actions version: 1 title: Contents Manager activities +personal-data: true description: | Record actions on files via the ContentsManager REST API. @@ -37,6 +38,7 @@ properties: - rename - copy - delete + category: unrestricted description: | Action performed by the ContentsManager API. @@ -60,20 +62,22 @@ properties: 5. rename Rename a file or directory from value in source_path to value in path. - + 5. copy Copy a file or directory from value in source_path to value in path. - + 6. delete Delete a file or empty directory at given path path: + category: personally-identifiable-information type: string description: | Logical path on which the operation was performed. This is a required field. source_path: + category: personally-identifiable-information type: string description: | Source path of an operation when action is 'copy' or 'rename' \ No newline at end of file diff --git a/jupyter_server/event-schemas/generate-json.py b/jupyter_server/event-schemas/generate-json.py deleted file mode 100755 index a39fa0610b..0000000000 --- a/jupyter_server/event-schemas/generate-json.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import json -import os -import jsonschema -from ruamel.yaml import YAML - -from jupyter_telemetry.eventlog import EventLog - -yaml = YAML(typ='safe') - -def main(): - argparser = argparse.ArgumentParser() - argparser.add_argument( - 'directory', - help='Directory with Schema .yaml files' - ) - - args = argparser.parse_args() - - el = EventLog() - for dirname, _, files in os.walk(args.directory): - for file in files: - if not file.endswith('.yaml'): - continue - yaml_path = os.path.join(dirname, file) - print('Processing', yaml_path) - with open(yaml_path) as f: - schema = yaml.load(f) - - # validate schema - el.register_schema(schema) - - json_path = os.path.join(dirname, os.path.splitext(file)[0] + '.json') - with open(json_path, 'w') as f: - json.dump(schema, f, indent=4) - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 5a205e5e96..c9c6a86554 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -5,7 +5,7 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. - +import os import json from tornado import web @@ -115,12 +115,12 @@ async def get(self, path=''): path=path, type=type, format=format, content=content, )) validate_model(model, expect_content=content) - self._finish_model(model, location=False) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'get', 'path': model['path'] } ) + self._finish_model(model, location=False) @web.authenticated async def patch(self, path=''): @@ -132,19 +132,18 @@ async def patch(self, path=''): self.log.info(model) model = await ensure_async(cm.update(model, path)) validate_model(model, expect_content=False) - self._finish_model(model) - self.log.info(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, - { - 'action': 'rename', - 'path': model['path'], - 'source_path': path.lstrip(os.path.sep) + { + 'action': 'rename', + 'path': model['path'], + 'source_path': path.lstrip(os.path.sep) } ) + self._finish_model(model) + - @gen.coroutine async def _copy(self, copy_from, copy_to=None): """Copy a file, optionally specifying a target directory.""" self.log.info(u"Copying {copy_from} to {copy_to}".format( @@ -154,16 +153,16 @@ async def _copy(self, copy_from, copy_to=None): model = await ensure_async(self.contents_manager.copy(copy_from, copy_to)) self.set_status(201) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { - 'action': 'copy', - 'path': model['path'], - 'source_path': copy_from.lstrip(os.path.sep) + 'action': 'copy', + 'path': model['path'], + 'source_path': copy_from.lstrip(os.path.sep) } ) + self._finish_model(model) async def _upload(self, model, path): """Handle upload of a new file to path""" @@ -171,12 +170,12 @@ async def _upload(self, model, path): model = await ensure_async(self.contents_manager.new(model, path)) self.set_status(201) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'upload', 'path': model['path'] } ) + self._finish_model(model) async def _new_untitled(self, path, type='', ext=''): """Create a new, empty untitled entity""" @@ -185,12 +184,12 @@ async def _new_untitled(self, path, type='', ext=''): path=path, type=type, ext=ext)) self.set_status(201) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, # Set path to path of created object, not directory it was created in { 'action': 'create', 'path': model['path'] } ) + self._finish_model(model) async def _save(self, model, path): """Save an existing file.""" @@ -199,12 +198,12 @@ async def _save(self, model, path): self.log.info(u"Saving file at %s", path) model = await ensure_async(self.contents_manager.save(model, path)) validate_model(model, expect_content=False) - self._finish_model(model) self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'save', 'path': model['path'] } ) + self._finish_model(model) @web.authenticated async def post(self, path=''): @@ -273,12 +272,11 @@ async def delete(self, path=''): self.log.warning('delete %s', path) await ensure_async(cm.delete(path)) self.set_status(204) - self.finish() self.eventlog.record_event( eventlogging_schema_fqn('contentsmanager-actions'), 1, { 'action': 'delete', 'path': path.lstrip(os.path.sep) } ) - + self.finish() class CheckpointsHandler(APIHandler): diff --git a/tests/test_eventlog.py b/tests/test_eventlog.py new file mode 100644 index 0000000000..1f7b587327 --- /dev/null +++ b/tests/test_eventlog.py @@ -0,0 +1,4 @@ + + +def test_eventlog(serverapp): + pass \ No newline at end of file From 1bbb7052c833e21869a9a81a37cf6aeba70eeb8d Mon Sep 17 00:00:00 2001 From: Zsailer Date: Tue, 19 May 2020 16:05:05 -0700 Subject: [PATCH 67/68] add initial telemetry docs --- .gitignore | 10 +- docs/doc-requirements.txt | 3 +- docs/source/conf.py | 11 +- docs/source/operators/index.rst | 3 +- docs/source/operators/telemetry.rst | 61 +++++++++++ docs/source/other/full-config.rst | 161 ++++++++++++++-------------- jupyter_server/utils.py | 2 +- 7 files changed, 155 insertions(+), 96 deletions(-) create mode 100644 docs/source/operators/telemetry.rst diff --git a/.gitignore b/.gitignore index deb6e6ad14..240fff5492 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ MANIFEST +docs/source/operators/events build dist _build @@ -7,15 +8,6 @@ docs/source/api/generated docs/source/config.rst docs/gh-pages docs/source/events -notebook/i18n/*/LC_MESSAGES/*.mo -notebook/i18n/*/LC_MESSAGES/nbjs.json -notebook/static/components -notebook/static/style/*.min.css* -notebook/static/*/js/built/ -notebook/static/*/built/ -notebook/static/built/ -notebook/static/*/js/main.min.js* -notebook/static/lab/*bundle.js node_modules *.py[co] __pycache__ diff --git a/docs/doc-requirements.txt b/docs/doc-requirements.txt index 48b3eda1d0..4167aabf6d 100644 --- a/docs/doc-requirements.txt +++ b/docs/doc-requirements.txt @@ -8,4 +8,5 @@ prometheus_client sphinxcontrib_github_alt sphinxcontrib-openapi sphinxemoji -git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master \ No newline at end of file +git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master +jupyter_telemetry_sphinxext \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 41b089cc07..4add156c81 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -70,7 +70,7 @@ 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting', 'sphinxcontrib_github_alt', - 'sphinx-jsonschema' + 'jupyter_telemetry_sphinxext' ] # Add any paths that contain templates here, relative to this directory. @@ -209,9 +209,9 @@ html_context = { 'css_files': [ - '_static/theme_overrides.css', # override wide tables in RTD theme + '_static/theme_overrides.css', # override wide tables in RTD theme ], - } +} # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied @@ -376,3 +376,8 @@ # import before any doc is built, so _ is guaranteed to be injected import jupyter_server.transutils + +# Jupyter telemetry configuration values. +jupyter_telemetry_schema_source = "../jupyter_server/event-schemas" # Path is relative to conf.py +jupyter_telemetry_schema_output = "source/operators/events" # Path is relative to conf.py +jupyter_telemetry_index_title = "Telemetry Event Schemas" # Title of the index page that lists all found schemas. \ No newline at end of file diff --git a/docs/source/operators/index.rst b/docs/source/operators/index.rst index a654be1a0c..a6d2e212fd 100644 --- a/docs/source/operators/index.rst +++ b/docs/source/operators/index.rst @@ -12,4 +12,5 @@ These pages are targeted at people using, configuring, and/or deploying multiple configuring-extensions migrate-from-nbserver public-server - security \ No newline at end of file + security + telemetry \ No newline at end of file diff --git a/docs/source/operators/telemetry.rst b/docs/source/operators/telemetry.rst new file mode 100644 index 0000000000..2c94e99a7c --- /dev/null +++ b/docs/source/operators/telemetry.rst @@ -0,0 +1,61 @@ +Telemetry and Eventlogging +========================== + +Jupyter Server can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that the Server emits are defined by `JSON schemas`_ listed below_ emitted as JSON data, defined and validated by the JSON schemas listed below. + + +.. _logging: https://docs.python.org/3/library/logging.html +.. _`Telemetry System`: https://github.com/jupyter/telemetry +.. _`JSON schemas`: https://json-schema.org/ + +Emitting Server Events +---------------------- + +Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. + +To begin recording events, you'll need to set two configurations: + + 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to + 2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here. + +Here's a basic example for emitting events from the `contents` service: + +.. code-block:: + + import logging + + c.EventLog.handlers = [ + logging.FileHandler('event.log'), + ] + + c.EventLog.allowed_schemas = [ + 'hub.jupyter.org/server-action' + ] + +The output is a file, ``"event.log"``, with events recorded as JSON data. + +`eventlog` endpoint +------------------- + +The Notebook Server provides a public REST endpoint for external applications to validate and log events +through the Server's Event Log. + +To log events, send a `POST` request to the `/api/eventlog` endpoint. The body of the request should be a +JSON blog and is required to have the follow keys: + + 1. `'schema'` : the event's schema ID. + 2. `'version'` : the version of the event's schema. + 3. `'event'` : the event data in JSON format. + +Events that are validated by this endpoint must have their schema listed in the `allowed_schemas` trait listed above. + +.. _below: + + +Server Event schemas +-------------------- + +.. toctree:: + :maxdepth: 2 + + events/index diff --git a/docs/source/other/full-config.rst b/docs/source/other/full-config.rst index 8e0a3b1c46..620a578679 100644 --- a/docs/source/other/full-config.rst +++ b/docs/source/other/full-config.rst @@ -110,9 +110,9 @@ ServerApp.allow_origin : Unicode Default: ``''`` Set the Access-Control-Allow-Origin header - + Use '*' to allow any origin to access your server. - + Takes precedence over allow_origin_pat. @@ -120,13 +120,13 @@ ServerApp.allow_origin_pat : Unicode Default: ``''`` Use a regular expression for the Access-Control-Allow-Origin header - + Requests from an origin matching the expression will get replies with: - + Access-Control-Allow-Origin: origin - + where `origin` is the origin of the request. - + Ignored if allow_origin is set. @@ -134,11 +134,11 @@ ServerApp.allow_password_change : Bool Default: ``True`` Allow password to be changed at login for the Jupyter server. - + While loggin in with a token, the Jupyter server UI will give the opportunity to the user to enter a new password at the same time that will replace the token login mechanism. - + This can be set to false to prevent changing password from the UI/API. @@ -146,15 +146,15 @@ ServerApp.allow_remote_access : Bool Default: ``False`` Allow requests where the Host header doesn't point to a local server - + By default, requests get a 403 forbidden response if the 'Host' header shows that the browser thinks it's on a non-local domain. Setting this option to True disables this check. - + This protects against 'DNS rebinding' attacks, where a remote web server serves you a page and then changes its DNS to send later requests to a local IP, bypassing same-origin checks. - + Local IP addresses (such as 127.0.0.1 and ::1) are allowed as local, along with hostnames configured in local_hostnames. @@ -173,7 +173,7 @@ ServerApp.base_url : Unicode Default: ``'/'`` The base URL for the Jupyter server. - + Leading and trailing slashes can be omitted, and will automatically be added. @@ -229,7 +229,7 @@ ServerApp.cookie_secret : Bytes The random bytes used to secure cookies. By default this is a new random number every time you start the server. Set it to a value in a config file to enable logins to persist across server sessions. - + Note: Cookie secrets should be kept private, do not share config files with cookie_secret stored in plaintext (you can read the value from a file). @@ -243,12 +243,12 @@ ServerApp.custom_display_url : Unicode Default: ``''`` Override URL shown to users. - + Replace actual URL, including protocol, address, port and base URL, with the given value when displaying URL to the users. Do not change the actual connection URL. If authentication token is enabled, the token is added to the custom URL automatically. - + This option is intended to be used when the URL to display to the user cannot be determined reliably by the Jupyter server (proxified or containerized setups for example). @@ -262,13 +262,13 @@ ServerApp.disable_check_xsrf : Bool Default: ``False`` Disable cross-site-request-forgery protection - + Jupyter notebook 4.3.1 introduces protection from cross-site request forgeries, requiring API requests to either: - + - originate from pages served by this server (validated with XSRF cookie and token), or - authenticate with a token - + Some anonymous compute resources still desire the ability to run code, completely without authentication. These services can disable all authentication and security checks, @@ -284,7 +284,7 @@ ServerApp.extra_static_paths : List Default: ``[]`` Extra paths to search for serving static files. - + This allows adding javascript/css to be available from the Jupyter server machine, or overriding individual files in the IPython @@ -292,7 +292,7 @@ ServerApp.extra_template_paths : List Default: ``[]`` Extra paths to search for serving jinja templates. - + Can be used to override templates from jupyter_server.templates. ServerApp.file_to_run : Unicode @@ -352,10 +352,10 @@ ServerApp.kernel_manager_class : Type ServerApp.kernel_spec_manager_class : Type Default: ``'jupyter_client.kernelspec.KernelSpecManager'`` - + The kernel spec manager class to use. Should be a subclass of `jupyter_client.kernelspec.KernelSpecManager`. - + The Api of KernelSpecManager is provisional and might change without warning between this version of Jupyter and the next stable one. @@ -369,7 +369,7 @@ ServerApp.local_hostnames : List Default: ``['localhost']`` Hostnames to allow as local when allow_remote_access is False. - + Local IP addresses (such as 127.0.0.1 and ::1) are automatically accepted as local as well. @@ -402,19 +402,19 @@ ServerApp.logout_handler_class : Type ServerApp.max_body_size : Int Default: ``536870912`` - + Sets the maximum allowed size of the client request body, specified in the Content-Length request header field. If the size in a request exceeds the configured value, a malformed HTTP message is returned to the client. - + Note: max_body_size is applied even in streaming mode. ServerApp.max_buffer_size : Int Default: ``536870912`` - + Gets or sets the maximum amount of memory, in bytes, that is allocated for use by the buffer manager. @@ -438,11 +438,11 @@ ServerApp.password : Unicode Default: ``''`` Hashed password to use for web authentication. - + To generate, type in a python/IPython shell: - + from jupyter_server.auth import passwd; passwd() - + The string should be of the form type:salt:hashed-password. @@ -452,10 +452,10 @@ ServerApp.password_required : Bool Forces users to use a password for the Jupyter server. This is useful in a multi user environment, for instance when everybody in the LAN can access each other's machine through ssh. - + In such a case, serving on localhost is not secure since any user can connect to the Jupyter server via ssh. - + ServerApp.port : Int @@ -471,7 +471,7 @@ ServerApp.port_retries : Int ServerApp.pylab : Unicode Default: ``'disabled'`` - + DISABLED: use %pylab or %matplotlib in the notebook to enable matplotlib. @@ -531,10 +531,10 @@ ServerApp.terminals_enabled : Bool Default: ``True`` Set to False to disable terminals. - + This does *not* make the server more secure by itself. Anything the user can in a terminal, they can also do in a notebook. - + Terminals may also be automatically disabled if the terminado package is not available. @@ -543,10 +543,10 @@ ServerApp.token : Unicode Default: ``''`` Token used for authenticating first-time connections to the server. - + When no password is enabled, the default is to generate a new, random token. - + Setting to an empty string disables authentication altogether, which is NOT RECOMMENDED. @@ -567,24 +567,24 @@ ServerApp.webbrowser_open_new : Int `new` argument passed to the standard library method `webbrowser.open`. The behaviour is not guaranteed, but depends on browser support. Valid values are: - + - 2 opens a new tab, - 1 opens a new window, - 0 opens in an existing window. - + See the `webbrowser.open` documentation for details. ServerApp.websocket_compression_options : Any Default: ``None`` - + Set the tornado compression options for websocket connections. - + This value will be returned from :meth:`WebSocketHandler.get_compression_options`. None (default) will disable compression. A dict (even an empty one) will enable compression. - + See the tornado docs for WebSocketHandler.get_compression_options for details. @@ -593,7 +593,7 @@ ServerApp.websocket_url : Unicode The base URL for websockets, if it differs from the HTTP server (hint: it almost certainly doesn't). - + Should be in the form of an HTTP origin: ws[s]://hostname[:port] @@ -601,7 +601,7 @@ ConnectionFileMixin.connection_file : Unicode Default: ``''`` JSON file in which to store connection info [default: kernel-.json] - + This file will contain the IP, ports, and authentication key needed to connect clients to this kernel. By default, this file will be created in the security dir of the current profile, but can be specified by absolute path. @@ -654,7 +654,7 @@ KernelManager.connection_file : Unicode Default: ``''`` JSON file in which to store connection info [default: kernel-.json] - + This file will contain the IP, ports, and authentication key needed to connect clients to this kernel. By default, this file will be created in the security dir of the current profile, but can be specified by absolute path. @@ -687,7 +687,7 @@ KernelManager.kernel_cmd : List Default: ``[]`` DEPRECATED: Use kernel_name instead. - + The Popen Command to launch the kernel. Override this if you have a custom kernel. If kernel_cmd is specified in a configuration file, @@ -727,7 +727,7 @@ Session.check_pid : Bool Default: ``True`` Whether to check PID to protect against calls after fork. - + This check can be disabled if fork-safety is handled elsewhere. @@ -745,7 +745,7 @@ Session.digest_history_size : Int Default: ``65536`` The maximum number of digests to remember. - + The digest history will be culled when it exceeds this value. @@ -833,10 +833,10 @@ MappingKernelManager.buffer_offline_messages : Bool Default: ``True`` Whether messages from kernels whose frontends have disconnected should be buffered in-memory. - + When True (default), messages are buffered and replayed on reconnect, avoiding lost messages due to interrupted connectivity. - + Disable if long-running kernels will produce too much output while no frontends are connected. @@ -874,7 +874,7 @@ MappingKernelManager.kernel_info_timeout : Float Default: ``60`` Timeout for giving up on a kernel (in seconds). - + On starting and restarting kernels, we check whether the kernel is running and responsive by sending kernel_info_requests. This sets the timeout in seconds for how long the kernel can take @@ -923,7 +923,7 @@ KernelSpecManager.whitelist : Set Default: ``set()`` Whitelist of allowed kernel names. - + By default, all installed kernels are allowed. @@ -951,13 +951,13 @@ ContentsManager.files_handler_class : Type Default: ``'jupyter_server.files.handlers.FilesHandler'`` handler class to use when serving raw file requests. - + Default is a fallback that talks to the ContentsManager API, which may be inefficient, especially for large files. - + Local files-based ContentsManagers can use a StaticFileHandler subclass, which will be much more efficient. - + Access to these files should be Authenticated. @@ -965,7 +965,7 @@ ContentsManager.files_handler_params : Dict Default: ``{}`` Extra parameters to pass to files_handler_class. - + For example, StaticFileHandlers generally expect a `path` argument specifying the root directory from which to serve files. @@ -973,7 +973,7 @@ ContentsManager.files_handler_params : Dict ContentsManager.hide_globs : List Default: ``['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dyl...`` - + Glob patterns to hide in file and directory listings. @@ -981,17 +981,17 @@ ContentsManager.pre_save_hook : Any Default: ``None`` Python callable or importstring thereof - + To be called on a contents model prior to save. - + This can be used to process the structure, such as removing notebook outputs or other side effects that should not be saved. - + It will be called as (all arguments passed by keyword):: - + hook(path=path, model=model, contents_manager=self) - + - model: the model to be saved. Includes file contents. Modifying this dict will affect the file that is stored. - path: the API path of the save destination @@ -1056,13 +1056,13 @@ FileContentsManager.files_handler_class : Type Default: ``'jupyter_server.files.handlers.FilesHandler'`` handler class to use when serving raw file requests. - + Default is a fallback that talks to the ContentsManager API, which may be inefficient, especially for large files. - + Local files-based ContentsManagers can use a StaticFileHandler subclass, which will be much more efficient. - + Access to these files should be Authenticated. @@ -1070,7 +1070,7 @@ FileContentsManager.files_handler_params : Dict Default: ``{}`` Extra parameters to pass to files_handler_class. - + For example, StaticFileHandlers generally expect a `path` argument specifying the root directory from which to serve files. @@ -1078,7 +1078,7 @@ FileContentsManager.files_handler_params : Dict FileContentsManager.hide_globs : List Default: ``['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dyl...`` - + Glob patterns to hide in file and directory listings. @@ -1086,16 +1086,16 @@ FileContentsManager.post_save_hook : Any Default: ``None`` Python callable or importstring thereof - + to be called on the path of a file just saved. - + This can be used to process the file on disk, such as converting the notebook to a script or HTML via nbconvert. - + It will be called as (all arguments passed by keyword):: - + hook(os_path=os_path, model=model, contents_manager=instance) - + - path: the filesystem path to the file just written - model: the model representing the file - contents_manager: this ContentsManager instance @@ -1105,17 +1105,17 @@ FileContentsManager.pre_save_hook : Any Default: ``None`` Python callable or importstring thereof - + To be called on a contents model prior to save. - + This can be used to process the structure, such as removing notebook outputs or other side effects that should not be saved. - + It will be called as (all arguments passed by keyword):: - + hook(path=path, model=model, contents_manager=self) - + - model: the model to be saved. Includes file contents. Modifying this dict will affect the file that is stored. - path: the API path of the save destination @@ -1194,10 +1194,10 @@ GatewayKernelManager.buffer_offline_messages : Bool Default: ``True`` Whether messages from kernels whose frontends have disconnected should be buffered in-memory. - + When True (default), messages are buffered and replayed on reconnect, avoiding lost messages due to interrupted connectivity. - + Disable if long-running kernels will produce too much output while no frontends are connected. @@ -1235,7 +1235,7 @@ GatewayKernelManager.kernel_info_timeout : Float Default: ``60`` Timeout for giving up on a kernel (in seconds). - + On starting and restarting kernels, we check whether the kernel is running and responsive by sending kernel_info_requests. This sets the timeout in seconds for how long the kernel can take @@ -1284,7 +1284,7 @@ GatewayKernelSpecManager.whitelist : Set Default: ``set()`` Whitelist of allowed kernel names. - + By default, all installed kernels are allowed. @@ -1388,4 +1388,3 @@ GatewayClient.ws_url : Unicode The websocket url of the Kernel or Enterprise Gateway server. If not provided, this value will correspond to the value of the Gateway url with 'ws' in place of 'http'. (JUPYTER_GATEWAY_WS_URL env var) - diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 8fc6e89479..ec44e13b75 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -448,7 +448,7 @@ def eventlogging_schema_fqn(name): Matches convention for this particular repo """ - return 'eventlogging.jupyter.org/notebook/{}'.format(name) + return 'eventlogging.jupyter.org/jupyter_server/{}'.format(name) def get_schema_files(): From c69bdd9b953d035088714bd6872d489cda2667a7 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Tue, 20 Oct 2020 15:23:47 +0800 Subject: [PATCH 68/68] add tests for eventlog --- .../contentsmanager-actions/v1.yaml | 4 +- tests/test_eventlog.py | 44 ++++++++++++++++++- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml index 31a5f293a9..d95de1d282 100644 --- a/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml +++ b/jupyter_server/event-schemas/contentsmanager-actions/v1.yaml @@ -1,4 +1,4 @@ -"$id": eventlogging.jupyter.org/notebook/contentsmanager-actions +"$id": eventlogging.jupyter.org/jupyter_server/contentsmanager-actions version: 1 title: Contents Manager activities personal-data: true @@ -80,4 +80,4 @@ properties: category: personally-identifiable-information type: string description: | - Source path of an operation when action is 'copy' or 'rename' \ No newline at end of file + Source path of an operation when action is 'copy' or 'rename' diff --git a/tests/test_eventlog.py b/tests/test_eventlog.py index 1f7b587327..cef3915d43 100644 --- a/tests/test_eventlog.py +++ b/tests/test_eventlog.py @@ -1,4 +1,44 @@ +import io +import json +import logging +import jsonschema +import pytest +from traitlets.config import Config -def test_eventlog(serverapp): - pass \ No newline at end of file +from jupyter_server.utils import eventlogging_schema_fqn +from .services.contents.test_api import contents, contents_dir, dirs + + +@pytest.fixture +def eventlog_sink(configurable_serverapp): + """Return eventlog and sink objects""" + sink = io.StringIO() + handler = logging.StreamHandler(sink) + + cfg = Config() + cfg.EventLog.handlers = [handler] + serverapp = configurable_serverapp(config=cfg) + yield serverapp, sink + + +@pytest.mark.parametrize('path, name', dirs) +async def test_eventlog_list_notebooks(eventlog_sink, fetch, contents, path, name): + schema, version = (eventlogging_schema_fqn('contentsmanager-actions'), 1) + serverapp, sink = eventlog_sink + serverapp.eventlog.allowed_schemas = [schema] + + r = await fetch( + 'api', + 'contents', + path, + method='GET', + ) + assert r.code == 200 + + output = sink.getvalue() + assert output + data = json.loads(output) + jsonschema.validate(data, serverapp.eventlog.schemas[(schema, version)]) + expected = {'action': 'get', 'path': path} + assert expected.items() <= data.items()