diff --git a/.gitignore b/.gitignore index 1a246f57fe..5b1b10a4fb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ MANIFEST +docs/source/operators/events build dist _build @@ -6,6 +7,7 @@ docs/man/*.gz docs/source/api/generated docs/source/config.rst docs/gh-pages +docs/source/events jupyter_server/i18n/*/LC_MESSAGES/*.mo jupyter_server/i18n/*/LC_MESSAGES/nbjs.json jupyter_server/static/style/*.min.css* @@ -36,4 +38,3 @@ config.rst # copied changelog file docs/source/other/changelog.md - diff --git a/MANIFEST.in b/MANIFEST.in index 70ccbaaa91..dca369fb90 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -9,6 +9,9 @@ include package.json # include everything in package_data recursive-include jupyter_server * +# Event Schemas +graft jupyter_server/event_schemas + # Documentation graft docs exclude docs/\#* diff --git a/docs/doc-requirements.txt b/docs/doc-requirements.txt index 189bf8f566..619f2f792c 100644 --- a/docs/doc-requirements.txt +++ b/docs/doc-requirements.txt @@ -10,3 +10,6 @@ sphinxcontrib-openapi sphinxemoji myst-parser pydata_sphinx_theme +git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master +sphinx-jsonschema +jupyter_telemetry_sphinxext diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css new file mode 100644 index 0000000000..63ee6cc74c --- /dev/null +++ b/docs/source/_static/theme_overrides.css @@ -0,0 +1,13 @@ +/* override table width restrictions */ +@media screen and (min-width: 767px) { + + .wy-table-responsive table td { + /* !important prevents the common CSS stylesheets from overriding + this as on RTD they are loaded after this stylesheet */ + white-space: normal !important; + } + + .wy-table-responsive { + overflow: visible !important; + } +} diff --git a/docs/source/conf.py b/docs/source/conf.py index 79f9df3ed6..7e2fdd31dd 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -77,7 +77,9 @@ 'IPython.sphinxext.ipython_console_highlighting', 'sphinxcontrib_github_alt', 'sphinxcontrib.openapi', - 'sphinxemoji.sphinxemoji' + 'sphinxemoji.sphinxemoji', + 'sphinx-jsonschema', + 'jupyter_telemetry_sphinxext' ] myst_enable_extensions = ["html_image"] @@ -216,6 +218,12 @@ # since it is needed to properly generate _static in the build directory html_static_path = ['_static'] +html_context = { + 'css_files': [ + '_static/theme_overrides.css', # override wide tables in RTD theme + ], +} + # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. @@ -380,6 +388,11 @@ # import before any doc is built, so _ is guaranteed to be injected import jupyter_server.transutils +# Jupyter telemetry configuration values. +jupyter_telemetry_schema_source = osp.join(HERE, '../../jupyter_server/event_schemas') +jupyter_telemetry_schema_output = osp.join(HERE, 'operators/events') +# Title of the index page that lists all found schemas +jupyter_telemetry_index_title = 'Telemetry Event Schemas' def setup(app): dest = osp.join(HERE, 'other', 'changelog.md') diff --git a/docs/source/operators/index.rst b/docs/source/operators/index.rst index a654be1a0c..a6d2e212fd 100644 --- a/docs/source/operators/index.rst +++ b/docs/source/operators/index.rst @@ -12,4 +12,5 @@ These pages are targeted at people using, configuring, and/or deploying multiple configuring-extensions migrate-from-nbserver public-server - security \ No newline at end of file + security + telemetry \ No newline at end of file diff --git a/docs/source/operators/telemetry.rst b/docs/source/operators/telemetry.rst new file mode 100644 index 0000000000..0ab41a34d4 --- /dev/null +++ b/docs/source/operators/telemetry.rst @@ -0,0 +1,54 @@ +Eventlogging and Telemetry +========================== + +Jupyter Server can be configured to record structured events from a running server using Jupyter's `Telemetry System`_. The types of events that Jupyter Server emits are defined by `JSON schemas`_ listed below_ emitted as JSON data, defined and validated by the JSON schemas listed below. + + +.. _logging: https://docs.python.org/3/library/logging.html +.. _`Telemetry System`: https://github.com/jupyter/telemetry +.. _`JSON schemas`: https://json-schema.org/ + +.. warning:: + Do NOT rely on this feature for security or auditing purposes. Neither `server <#emitting-server-events>`_ nor `client <#the-eventlog-endpoint>`_ events are protected against meddling. For server events, those who have access to the environment can change the server code to emit whatever they want. The same goes for client events where nothing prevents users from sending spurious data to the `eventlog` endpoint. + +Emitting server events +---------------------- + +Event logging is handled by its ``Eventlog`` object. This leverages Python's standing logging_ library to emit, filter, and collect event data. + +To begin recording events, you'll need to set two configurations: + + 1. ``handlers``: tells the EventLog *where* to route your events. This trait is a list of Python logging handlers that route events to + 2. ``allows_schemas``: tells the EventLog *which* events should be recorded. No events are emitted by default; all recorded events must be listed here. + +Here's a basic example for emitting events from the `contents` service: + +.. code-block:: + + import logging + + c.EventLog.handlers = [ + logging.FileHandler('event.log'), + ] + + c.EventLog.allowed_schemas = [ + 'hub.jupyter.org/server-action' + ] + +The output is a file, ``"event.log"``, with events recorded as JSON data. + +Server event schemas +-------------------- + +.. toctree:: + :maxdepth: 2 + + events/index + +The ``eventlog`` endpoint +------------------------- + +.. note:: + This has not yet been implemented. + +.. _below: diff --git a/docs/source/other/full-config.rst b/docs/source/other/full-config.rst index 52d0bcf3ff..2b0be1640a 100644 --- a/docs/source/other/full-config.rst +++ b/docs/source/other/full-config.rst @@ -110,9 +110,9 @@ ServerApp.allow_origin : Unicode Default: ``''`` Set the Access-Control-Allow-Origin header - + Use '*' to allow any origin to access your server. - + Takes precedence over allow_origin_pat. @@ -120,13 +120,13 @@ ServerApp.allow_origin_pat : Unicode Default: ``''`` Use a regular expression for the Access-Control-Allow-Origin header - + Requests from an origin matching the expression will get replies with: - + Access-Control-Allow-Origin: origin - + where `origin` is the origin of the request. - + Ignored if allow_origin is set. @@ -134,11 +134,11 @@ ServerApp.allow_password_change : Bool Default: ``True`` Allow password to be changed at login for the Jupyter server. - + While loggin in with a token, the Jupyter server UI will give the opportunity to the user to enter a new password at the same time that will replace the token login mechanism. - + This can be set to false to prevent changing password from the UI/API. @@ -146,15 +146,15 @@ ServerApp.allow_remote_access : Bool Default: ``False`` Allow requests where the Host header doesn't point to a local server - + By default, requests get a 403 forbidden response if the 'Host' header shows that the browser thinks it's on a non-local domain. Setting this option to True disables this check. - + This protects against 'DNS rebinding' attacks, where a remote web server serves you a page and then changes its DNS to send later requests to a local IP, bypassing same-origin checks. - + Local IP addresses (such as 127.0.0.1 and ::1) are allowed as local, along with hostnames configured in local_hostnames. @@ -169,11 +169,23 @@ ServerApp.answer_yes : Bool Answer yes to any prompts. +ServerApp.authenticate_prometheus : Bool + Default: ``True`` + + " + Require authentication to access prometheus metrics. + + +ServerApp.autoreload : Bool + Default: ``False`` + + Reload the webapp when changes are made to any Python src files. + ServerApp.base_url : Unicode Default: ``'/'`` The base URL for the Jupyter server. - + Leading and trailing slashes can be omitted, and will automatically be added. @@ -213,7 +225,7 @@ ServerApp.config_manager_class : Type The config manager class to use -ServerApp.contents_manager_class : Type +ServerApp.contents_manager_class : TypeFromClasses Default: ``'jupyter_server.services.contents.largefilemanager.LargeFileM...`` The content manager class to use. @@ -229,7 +241,7 @@ ServerApp.cookie_secret : Bytes The random bytes used to secure cookies. By default this is a new random number every time you start the server. Set it to a value in a config file to enable logins to persist across server sessions. - + Note: Cookie secrets should be kept private, do not share config files with cookie_secret stored in plaintext (you can read the value from a file). @@ -243,12 +255,12 @@ ServerApp.custom_display_url : Unicode Default: ``''`` Override URL shown to users. - + Replace actual URL, including protocol, address, port and base URL, with the given value when displaying URL to the users. Do not change the actual connection URL. If authentication token is enabled, the token is added to the custom URL automatically. - + This option is intended to be used when the URL to display to the user cannot be determined reliably by the Jupyter server (proxified or containerized setups for example). @@ -262,13 +274,13 @@ ServerApp.disable_check_xsrf : Bool Default: ``False`` Disable cross-site-request-forgery protection - + Jupyter notebook 4.3.1 introduces protection from cross-site request forgeries, requiring API requests to either: - + - originate from pages served by this server (validated with XSRF cookie and token), or - authenticate with a token - + Some anonymous compute resources still desire the ability to run code, completely without authentication. These services can disable all authentication and security checks, @@ -284,7 +296,7 @@ ServerApp.extra_static_paths : List Default: ``[]`` Extra paths to search for serving static files. - + This allows adding javascript/css to be available from the Jupyter server machine, or overriding individual files in the IPython @@ -292,13 +304,18 @@ ServerApp.extra_template_paths : List Default: ``[]`` Extra paths to search for serving jinja templates. - + Can be used to override templates from jupyter_server.templates. ServerApp.file_to_run : Unicode Default: ``''`` - No description + Open the named file when the application is launched. + +ServerApp.file_url_prefix : Unicode + Default: ``'notebooks'`` + + The URL prefix where files are opened directly. ServerApp.generate_config : Bool Default: ``False`` @@ -345,17 +362,17 @@ ServerApp.jpserver_extensions : Dict Dict of Python modules to load as notebook server extensions.Entry values can be used to enable and disable the loading ofthe extensions. The extensions will be loaded in alphabetical order. ServerApp.kernel_manager_class : Type - Default: ``'jupyter_server.services.kernels.kernelmanager.MappingKernelM...`` + Default: ``'jupyter_server.services.kernels.kernelmanager.AsyncMappingKe...`` The kernel manager class to use. ServerApp.kernel_spec_manager_class : Type Default: ``'jupyter_client.kernelspec.KernelSpecManager'`` - + The kernel spec manager class to use. Should be a subclass of `jupyter_client.kernelspec.KernelSpecManager`. - + The Api of KernelSpecManager is provisional and might change without warning between this version of Jupyter and the next stable one. @@ -369,7 +386,7 @@ ServerApp.local_hostnames : List Default: ``['localhost']`` Hostnames to allow as local when allow_remote_access is False. - + Local IP addresses (such as 127.0.0.1 and ::1) are automatically accepted as local as well. @@ -402,23 +419,33 @@ ServerApp.logout_handler_class : Type ServerApp.max_body_size : Int Default: ``536870912`` - + Sets the maximum allowed size of the client request body, specified in the Content-Length request header field. If the size in a request exceeds the configured value, a malformed HTTP message is returned to the client. - + Note: max_body_size is applied even in streaming mode. ServerApp.max_buffer_size : Int Default: ``536870912`` - + Gets or sets the maximum amount of memory, in bytes, that is allocated for use by the buffer manager. +ServerApp.min_open_files_limit : Int + Default: ``0`` + + + Gets or sets a lower bound on the open file handles process resource + limit. This may need to be increased if you run into an + OSError: [Errno 24] Too many open files. + This is not applicable when running on Windows. + + ServerApp.notebook_dir : Unicode Default: ``''`` @@ -438,11 +465,11 @@ ServerApp.password : Unicode Default: ``''`` Hashed password to use for web authentication. - + To generate, type in a python/IPython shell: - + from jupyter_server.auth import passwd; passwd() - + The string should be of the form type:salt:hashed-password. @@ -452,26 +479,26 @@ ServerApp.password_required : Bool Forces users to use a password for the Jupyter server. This is useful in a multi user environment, for instance when everybody in the LAN can access each other's machine through ssh. - + In such a case, serving on localhost is not secure since any user can connect to the Jupyter server via ssh. - + ServerApp.port : Int Default: ``8888`` - The port the Jupyter server will listen on. + The port the server will listen on (env: JUPYTER_PORT). ServerApp.port_retries : Int Default: ``50`` - The number of additional ports to try if the specified port is not available. + The number of additional ports to try if the specified port is not available (env: JUPYTER_PORT_RETRIES). ServerApp.pylab : Unicode Default: ``'disabled'`` - + DISABLED: use %pylab or %matplotlib in the notebook to enable matplotlib. @@ -531,10 +558,10 @@ ServerApp.terminals_enabled : Bool Default: ``True`` Set to False to disable terminals. - + This does *not* make the server more secure by itself. Anything the user can in a terminal, they can also do in a notebook. - + Terminals may also be automatically disabled if the terminado package is not available. @@ -543,10 +570,13 @@ ServerApp.token : Unicode Default: ``''`` Token used for authenticating first-time connections to the server. - + + The token can be read from the file referenced by JUPYTER_TOKEN_FILE or set directly + with the JUPYTER_TOKEN environment variable. + When no password is enabled, the default is to generate a new, random token. - + Setting to an empty string disables authentication altogether, which is NOT RECOMMENDED. @@ -560,6 +590,23 @@ ServerApp.trust_xheaders : Bool Whether to trust or not X-Scheme/X-Forwarded-Proto and X-Real-Ip/X-Forwarded-For headerssent by the upstream reverse proxy. Necessary if the proxy handles SSL +ServerApp.use_redirect_file : Bool + Default: ``True`` + + Disable launching browser by redirect file + For versions of notebook > 5.7.2, a security feature measure was added that + prevented the authentication token used to launch the browser from being visible. + This feature makes it difficult for other users on a multi-user system from + running code in your Jupyter session as you. + However, some environments (like Windows Subsystem for Linux (WSL) and Chromebooks), + launching a browser using a redirect file can lead the browser failing to load. + This is because of the difference in file structures/paths between the runtime and + the browser. + + Disabling this setting to False will disable this behavior, allowing the browser + to launch by using a URL and visible token (as before). + + ServerApp.webbrowser_open_new : Int Default: ``2`` @@ -567,24 +614,24 @@ ServerApp.webbrowser_open_new : Int `new` argument passed to the standard library method `webbrowser.open`. The behaviour is not guaranteed, but depends on browser support. Valid values are: - + - 2 opens a new tab, - 1 opens a new window, - 0 opens in an existing window. - + See the `webbrowser.open` documentation for details. ServerApp.websocket_compression_options : Any Default: ``None`` - + Set the tornado compression options for websocket connections. - + This value will be returned from :meth:`WebSocketHandler.get_compression_options`. None (default) will disable compression. A dict (even an empty one) will enable compression. - + See the tornado docs for WebSocketHandler.get_compression_options for details. @@ -593,7 +640,7 @@ ServerApp.websocket_url : Unicode The base URL for websockets, if it differs from the HTTP server (hint: it almost certainly doesn't). - + Should be in the form of an HTTP origin: ws[s]://hostname[:port] @@ -601,7 +648,7 @@ ConnectionFileMixin.connection_file : Unicode Default: ``''`` JSON file in which to store connection info [default: kernel-.json] - + This file will contain the IP, ports, and authentication key needed to connect clients to this kernel. By default, this file will be created in the security dir of the current profile, but can be specified by absolute path. @@ -654,7 +701,7 @@ KernelManager.connection_file : Unicode Default: ``''`` JSON file in which to store connection info [default: kernel-.json] - + This file will contain the IP, ports, and authentication key needed to connect clients to this kernel. By default, this file will be created in the security dir of the current profile, but can be specified by absolute path. @@ -687,7 +734,7 @@ KernelManager.kernel_cmd : List Default: ``[]`` DEPRECATED: Use kernel_name instead. - + The Popen Command to launch the kernel. Override this if you have a custom kernel. If kernel_cmd is specified in a configuration file, @@ -727,7 +774,7 @@ Session.check_pid : Bool Default: ``True`` Whether to check PID to protect against calls after fork. - + This check can be disabled if fork-safety is handled elsewhere. @@ -745,7 +792,7 @@ Session.digest_history_size : Int Default: ``65536`` The maximum number of digests to remember. - + The digest history will be culled when it exceeds this value. @@ -796,7 +843,7 @@ Session.unpacker : DottedObjectName Only used with custom functions for `packer`. Session.username : Unicode - Default: ``'username'`` + Default: ``'kien'`` Username for the Session. Default is your system username. @@ -833,10 +880,10 @@ MappingKernelManager.buffer_offline_messages : Bool Default: ``True`` Whether messages from kernels whose frontends have disconnected should be buffered in-memory. - + When True (default), messages are buffered and replayed on reconnect, avoiding lost messages due to interrupted connectivity. - + Disable if long-running kernels will produce too much output while no frontends are connected. @@ -874,7 +921,7 @@ MappingKernelManager.kernel_info_timeout : Float Default: ``60`` Timeout for giving up on a kernel (in seconds). - + On starting and restarting kernels, we check whether the kernel is running and responsive by sending kernel_info_requests. This sets the timeout in seconds for how long the kernel can take @@ -923,10 +970,115 @@ KernelSpecManager.whitelist : Set Default: ``set()`` Whitelist of allowed kernel names. - + By default, all installed kernels are allowed. +AsyncMultiKernelManager.default_kernel_name : Unicode + Default: ``'python3'`` + + The name of the default kernel to start + +AsyncMultiKernelManager.kernel_manager_class : DottedObjectName + Default: ``'jupyter_client.ioloop.AsyncIOLoopKernelManager'`` + + The kernel manager class. This is configurable to allow + subclassing of the AsyncKernelManager for customized behavior. + + +AsyncMultiKernelManager.shared_context : Bool + Default: ``True`` + + Share a single zmq.Context to talk to all my kernels + +AsyncMappingKernelManager.allow_tracebacks : Bool + Default: ``True`` + + Whether to send tracebacks to clients on exceptions. + +AsyncMappingKernelManager.allowed_message_types : List + Default: ``[]`` + + White list of allowed kernel message types. + When the list is empty, all message types are allowed. + + +AsyncMappingKernelManager.buffer_offline_messages : Bool + Default: ``True`` + + Whether messages from kernels whose frontends have disconnected should be buffered in-memory. + + When True (default), messages are buffered and replayed on reconnect, + avoiding lost messages due to interrupted connectivity. + + Disable if long-running kernels will produce too much output while + no frontends are connected. + + +AsyncMappingKernelManager.cull_busy : Bool + Default: ``False`` + + Whether to consider culling kernels which are busy. + Only effective if cull_idle_timeout > 0. + +AsyncMappingKernelManager.cull_connected : Bool + Default: ``False`` + + Whether to consider culling kernels which have one or more connections. + Only effective if cull_idle_timeout > 0. + +AsyncMappingKernelManager.cull_idle_timeout : Int + Default: ``0`` + + Timeout (in seconds) after which a kernel is considered idle and ready to be culled. + Values of 0 or lower disable culling. Very short timeouts may result in kernels being culled + for users with poor network connections. + +AsyncMappingKernelManager.cull_interval : Int + Default: ``300`` + + The interval (in seconds) on which to check for idle kernels exceeding the cull timeout value. + +AsyncMappingKernelManager.default_kernel_name : Unicode + Default: ``'python3'`` + + The name of the default kernel to start + +AsyncMappingKernelManager.kernel_info_timeout : Float + Default: ``60`` + + Timeout for giving up on a kernel (in seconds). + + On starting and restarting kernels, we check whether the + kernel is running and responsive by sending kernel_info_requests. + This sets the timeout in seconds for how long the kernel can take + before being presumed dead. + This affects the MappingKernelManager (which handles kernel restarts) + and the ZMQChannelsHandler (which handles the startup). + + +AsyncMappingKernelManager.kernel_manager_class : DottedObjectName + Default: ``'jupyter_client.ioloop.AsyncIOLoopKernelManager'`` + + The kernel manager class. This is configurable to allow + subclassing of the AsyncKernelManager for customized behavior. + + +AsyncMappingKernelManager.root_dir : Unicode + Default: ``''`` + + No description + +AsyncMappingKernelManager.shared_context : Bool + Default: ``True`` + + Share a single zmq.Context to talk to all my kernels + +AsyncMappingKernelManager.traceback_replacement_message : Unicode + Default: ``'An exception occurred at runtime, which is not shown due to ...`` + + Message to print when allow_tracebacks is False, and an exception occurs + ContentsManager.allow_hidden : Bool Default: ``False`` @@ -951,13 +1103,13 @@ ContentsManager.files_handler_class : Type Default: ``'jupyter_server.files.handlers.FilesHandler'`` handler class to use when serving raw file requests. - + Default is a fallback that talks to the ContentsManager API, which may be inefficient, especially for large files. - + Local files-based ContentsManagers can use a StaticFileHandler subclass, which will be much more efficient. - + Access to these files should be Authenticated. @@ -965,7 +1117,7 @@ ContentsManager.files_handler_params : Dict Default: ``{}`` Extra parameters to pass to files_handler_class. - + For example, StaticFileHandlers generally expect a `path` argument specifying the root directory from which to serve files. @@ -973,7 +1125,7 @@ ContentsManager.files_handler_params : Dict ContentsManager.hide_globs : List Default: ``['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dyl...`` - + Glob patterns to hide in file and directory listings. @@ -981,17 +1133,17 @@ ContentsManager.pre_save_hook : Any Default: ``None`` Python callable or importstring thereof - + To be called on a contents model prior to save. - + This can be used to process the structure, such as removing notebook outputs or other side effects that should not be saved. - + It will be called as (all arguments passed by keyword):: - + hook(path=path, model=model, contents_manager=self) - + - model: the model to be saved. Includes file contents. Modifying this dict will affect the file that is stored. - path: the API path of the save destination @@ -1056,13 +1208,13 @@ FileContentsManager.files_handler_class : Type Default: ``'jupyter_server.files.handlers.FilesHandler'`` handler class to use when serving raw file requests. - + Default is a fallback that talks to the ContentsManager API, which may be inefficient, especially for large files. - + Local files-based ContentsManagers can use a StaticFileHandler subclass, which will be much more efficient. - + Access to these files should be Authenticated. @@ -1070,7 +1222,7 @@ FileContentsManager.files_handler_params : Dict Default: ``{}`` Extra parameters to pass to files_handler_class. - + For example, StaticFileHandlers generally expect a `path` argument specifying the root directory from which to serve files. @@ -1078,7 +1230,7 @@ FileContentsManager.files_handler_params : Dict FileContentsManager.hide_globs : List Default: ``['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dyl...`` - + Glob patterns to hide in file and directory listings. @@ -1086,16 +1238,16 @@ FileContentsManager.post_save_hook : Any Default: ``None`` Python callable or importstring thereof - + to be called on the path of a file just saved. - + This can be used to process the file on disk, such as converting the notebook to a script or HTML via nbconvert. - + It will be called as (all arguments passed by keyword):: - + hook(os_path=os_path, model=model, contents_manager=instance) - + - path: the filesystem path to the file just written - model: the model representing the file - contents_manager: this ContentsManager instance @@ -1105,17 +1257,17 @@ FileContentsManager.pre_save_hook : Any Default: ``None`` Python callable or importstring thereof - + To be called on a contents model prior to save. - + This can be used to process the structure, such as removing notebook outputs or other side effects that should not be saved. - + It will be called as (all arguments passed by keyword):: - + hook(path=path, model=model, contents_manager=self) - + - model: the model to be saved. Includes file contents. Modifying this dict will affect the file that is stored. - path: the API path of the save destination @@ -1149,7 +1301,229 @@ FileContentsManager.use_atomic_writing : Bool This procedure, namely 'atomic_writing', causes some bugs on file system whitout operation order enforcement (like some networked fs). If set to False, the new notebook is written directly on the old one which could fail (eg: full filesystem or quota ) -NotebookNotary.algorithm : any of ``'blake2s'``|``'sha512'``|``'md5'``|``'sha3_512'``|``'sha3_224'``|``'blake2b'``|``'sha384'``|``'sha1'``|``'sha3_256'``|``'sha256'``|``'sha224'``|``'sha3_384'`` +AsyncContentsManager.allow_hidden : Bool + Default: ``False`` + + Allow access to hidden files + +AsyncContentsManager.checkpoints : Instance + Default: ``None`` + + No description + +AsyncContentsManager.checkpoints_class : Type + Default: ``'jupyter_server.services.contents.checkpoints.AsyncCheckpoints'`` + + No description + +AsyncContentsManager.checkpoints_kwargs : Dict + Default: ``{}`` + + No description + +AsyncContentsManager.files_handler_class : Type + Default: ``'jupyter_server.files.handlers.FilesHandler'`` + + handler class to use when serving raw file requests. + + Default is a fallback that talks to the ContentsManager API, + which may be inefficient, especially for large files. + + Local files-based ContentsManagers can use a StaticFileHandler subclass, + which will be much more efficient. + + Access to these files should be Authenticated. + + +AsyncContentsManager.files_handler_params : Dict + Default: ``{}`` + + Extra parameters to pass to files_handler_class. + + For example, StaticFileHandlers generally expect a `path` argument + specifying the root directory from which to serve files. + + +AsyncContentsManager.hide_globs : List + Default: ``['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dyl...`` + + + Glob patterns to hide in file and directory listings. + + +AsyncContentsManager.pre_save_hook : Any + Default: ``None`` + + Python callable or importstring thereof + + To be called on a contents model prior to save. + + This can be used to process the structure, + such as removing notebook outputs or other side effects that + should not be saved. + + It will be called as (all arguments passed by keyword):: + + hook(path=path, model=model, contents_manager=self) + + - model: the model to be saved. Includes file contents. + Modifying this dict will affect the file that is stored. + - path: the API path of the save destination + - contents_manager: this ContentsManager instance + + +AsyncContentsManager.root_dir : Unicode + Default: ``'/'`` + + No description + +AsyncContentsManager.untitled_directory : Unicode + Default: ``'Untitled Folder'`` + + The base name used when creating untitled directories. + +AsyncContentsManager.untitled_file : Unicode + Default: ``'untitled'`` + + The base name used when creating untitled files. + +AsyncContentsManager.untitled_notebook : Unicode + Default: ``'Untitled'`` + + The base name used when creating untitled notebooks. + +AsyncFileManagerMixin.use_atomic_writing : Bool + Default: ``True`` + + By default notebooks are saved on disk on a temporary file and then if succefully written, it replaces the old ones. + This procedure, namely 'atomic_writing', causes some bugs on file system whitout operation order enforcement (like some networked fs). + If set to False, the new notebook is written directly on the old one which could fail (eg: full filesystem or quota ) + +AsyncFileContentsManager.allow_hidden : Bool + Default: ``False`` + + Allow access to hidden files + +AsyncFileContentsManager.checkpoints : Instance + Default: ``None`` + + No description + +AsyncFileContentsManager.checkpoints_class : Type + Default: ``'jupyter_server.services.contents.checkpoints.AsyncCheckpoints'`` + + No description + +AsyncFileContentsManager.checkpoints_kwargs : Dict + Default: ``{}`` + + No description + +AsyncFileContentsManager.delete_to_trash : Bool + Default: ``True`` + + If True (default), deleting files will send them to the + platform's trash/recycle bin, where they can be recovered. If False, + deleting files really deletes them. + +AsyncFileContentsManager.files_handler_class : Type + Default: ``'jupyter_server.files.handlers.FilesHandler'`` + + handler class to use when serving raw file requests. + + Default is a fallback that talks to the ContentsManager API, + which may be inefficient, especially for large files. + + Local files-based ContentsManagers can use a StaticFileHandler subclass, + which will be much more efficient. + + Access to these files should be Authenticated. + + +AsyncFileContentsManager.files_handler_params : Dict + Default: ``{}`` + + Extra parameters to pass to files_handler_class. + + For example, StaticFileHandlers generally expect a `path` argument + specifying the root directory from which to serve files. + + +AsyncFileContentsManager.hide_globs : List + Default: ``['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dyl...`` + + + Glob patterns to hide in file and directory listings. + + +AsyncFileContentsManager.post_save_hook : Any + Default: ``None`` + + Python callable or importstring thereof + + to be called on the path of a file just saved. + + This can be used to process the file on disk, + such as converting the notebook to a script or HTML via nbconvert. + + It will be called as (all arguments passed by keyword):: + + hook(os_path=os_path, model=model, contents_manager=instance) + + - path: the filesystem path to the file just written + - model: the model representing the file + - contents_manager: this ContentsManager instance + + +AsyncFileContentsManager.pre_save_hook : Any + Default: ``None`` + + Python callable or importstring thereof + + To be called on a contents model prior to save. + + This can be used to process the structure, + such as removing notebook outputs or other side effects that + should not be saved. + + It will be called as (all arguments passed by keyword):: + + hook(path=path, model=model, contents_manager=self) + + - model: the model to be saved. Includes file contents. + Modifying this dict will affect the file that is stored. + - path: the API path of the save destination + - contents_manager: this ContentsManager instance + + +AsyncFileContentsManager.root_dir : Unicode + Default: ``''`` + + No description + +AsyncFileContentsManager.untitled_directory : Unicode + Default: ``'Untitled Folder'`` + + The base name used when creating untitled directories. + +AsyncFileContentsManager.untitled_file : Unicode + Default: ``'untitled'`` + + The base name used when creating untitled files. + +AsyncFileContentsManager.untitled_notebook : Unicode + Default: ``'Untitled'`` + + The base name used when creating untitled notebooks. + +AsyncFileContentsManager.use_atomic_writing : Bool + Default: ``True`` + + By default notebooks are saved on disk on a temporary file and then if succefully written, it replaces the old ones. + This procedure, namely 'atomic_writing', causes some bugs on file system whitout operation order enforcement (like some networked fs). + If set to False, the new notebook is written directly on the old one which could fail (eg: full filesystem or quota ) + +NotebookNotary.algorithm : any of ``'sha3_512'``|``'sha224'``|``'sha1'``|``'sha256'``|``'sha384'``|``'md5'``|``'blake2b'``|``'sha3_224'``|``'sha3_256'``|``'blake2s'``|``'sha512'``|``'sha3_384'`` Default: ``'sha256'`` The hashing algorithm used to sign notebooks. @@ -1194,10 +1568,10 @@ GatewayKernelManager.buffer_offline_messages : Bool Default: ``True`` Whether messages from kernels whose frontends have disconnected should be buffered in-memory. - + When True (default), messages are buffered and replayed on reconnect, avoiding lost messages due to interrupted connectivity. - + Disable if long-running kernels will produce too much output while no frontends are connected. @@ -1235,7 +1609,7 @@ GatewayKernelManager.kernel_info_timeout : Float Default: ``60`` Timeout for giving up on a kernel (in seconds). - + On starting and restarting kernels, we check whether the kernel is running and responsive by sending kernel_info_requests. This sets the timeout in seconds for how long the kernel can take @@ -1245,10 +1619,10 @@ GatewayKernelManager.kernel_info_timeout : Float GatewayKernelManager.kernel_manager_class : DottedObjectName - Default: ``'jupyter_client.ioloop.IOLoopKernelManager'`` + Default: ``'jupyter_client.ioloop.AsyncIOLoopKernelManager'`` The kernel manager class. This is configurable to allow - subclassing of the KernelManager for customized behavior. + subclassing of the AsyncKernelManager for customized behavior. GatewayKernelManager.root_dir : Unicode @@ -1284,7 +1658,7 @@ GatewayKernelSpecManager.whitelist : Set Default: ``set()`` Whitelist of allowed kernel names. - + By default, all installed kernels are allowed. @@ -1315,7 +1689,7 @@ GatewayClient.client_key : Unicode GatewayClient.connect_timeout : Float - Default: ``60.0`` + Default: ``40.0`` The time allowed for HTTP connection establishment with the Gateway server. (JUPYTER_GATEWAY_CONNECT_TIMEOUT env var) @@ -1328,6 +1702,26 @@ GatewayClient.env_whitelist : Unicode value must also be set on the Gateway server - since that configuration value indicates which environmental values to make available to the kernel. (JUPYTER_GATEWAY_ENV_WHITELIST env var) +GatewayClient.gateway_retry_interval : Float + Default: ``1.0`` + + The time allowed for HTTP reconnection with the Gateway server for the first time. + Next will be JUPYTER_GATEWAY_RETRY_INTERVAL multiplied by two in factor of numbers of retries + but less than JUPYTER_GATEWAY_RETRY_INTERVAL_MAX. + (JUPYTER_GATEWAY_RETRY_INTERVAL env var) + +GatewayClient.gateway_retry_interval_max : Float + Default: ``30.0`` + + The maximum time allowed for HTTP reconnection retry with the Gateway server. + (JUPYTER_GATEWAY_RETRY_INTERVAL_MAX env var) + +GatewayClient.gateway_retry_max : Int + Default: ``5`` + + The maximum retries allowed for HTTP reconnection with the Gateway server. + (JUPYTER_GATEWAY_RETRY_MAX env var) + GatewayClient.headers : Unicode Default: ``'{}'`` @@ -1364,7 +1758,7 @@ GatewayClient.kernelspecs_resource_endpoint : Unicode (JUPYTER_GATEWAY_KERNELSPECS_RESOURCE_ENDPOINT env var) GatewayClient.request_timeout : Float - Default: ``60.0`` + Default: ``40.0`` The time allowed for HTTP request completion. (JUPYTER_GATEWAY_REQUEST_TIMEOUT env var) @@ -1389,3 +1783,14 @@ GatewayClient.ws_url : Unicode The websocket url of the Kernel or Enterprise Gateway server. If not provided, this value will correspond to the value of the Gateway url with 'ws' in place of 'http'. (JUPYTER_GATEWAY_WS_URL env var) + +TerminalManager.cull_inactive_timeout : Int + Default: ``0`` + + Timeout (in seconds) in which a terminal has been inactive and ready to be culled. + Values of 0 or lower disable culling. + +TerminalManager.cull_interval : Int + Default: ``300`` + + The interval (in seconds) on which to check for terminals exceeding the inactive timeout value. diff --git a/jupyter_server/base/handlers.py b/jupyter_server/base/handlers.py index 743f12d31d..6a8208e756 100755 --- a/jupyter_server/base/handlers.py +++ b/jupyter_server/base/handlers.py @@ -205,6 +205,10 @@ def jinja_template_vars(self): """User-supplied values to supply to jinja templates.""" return self.settings.get('jinja_template_vars', {}) + @property + def eventlog(self): + return self.settings.get('eventlog') + #--------------------------------------------------------------- # URLs #--------------------------------------------------------------- diff --git a/jupyter_server/event_schemas/__init__.py b/jupyter_server/event_schemas/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/jupyter_server/event_schemas/contentsmanager-actions/v1.yaml b/jupyter_server/event_schemas/contentsmanager-actions/v1.yaml new file mode 100644 index 0000000000..0f89d7ae42 --- /dev/null +++ b/jupyter_server/event_schemas/contentsmanager-actions/v1.yaml @@ -0,0 +1,86 @@ +"$id": eventlogging.jupyter.org/jupyter_server/contentsmanager-actions +version: 1 +title: Contents Manager activities +personal-data: true +description: | + Record actions on files via the ContentsManager REST API. + + The notebook ContentsManager REST API is used by all frontends to retreive, + save, list, delete and perform other actions on notebooks, directories, + and other files through the UI. This is pluggable - the default acts on + the file system, but can be replaced with a different ContentsManager + implementation - to work on S3, Postgres, other object stores, etc. + The events get recorded regardless of the ContentsManager implementation + being used. + + Limitations: + + 1. This does not record all filesystem access, just the ones that happen + explicitly via the notebook server's REST API. Users can (and often do) + trivially access the filesystem in many other ways (such as `open()` calls + in their code), so this is usually never a complete record. + 2. As with all events recorded by the notebook server, users most likely + have the ability to modify the code of the notebook server. Unless other + security measures are in place, these events should be treated as user + controlled and not used in high security areas. + 3. Events are only recorded when an action succeeds. +type: object +required: + - action + - path +properties: + action: + enum: + - get + - create + - save + - upload + - rename + - copy + - delete + categories: + - category.jupyter.org/unrestricted + description: | + Action performed by the ContentsManager API. + + This is a required field. + + Possible values: + + 1. get + Get contents of a particular file, or list contents of a directory. + + 2. create + Create a new directory or file at 'path'. Currently, name of the + file or directory is auto generated by the ContentsManager implementation. + + 3. save + Save a file at path with contents from the client + + 4. upload + Upload a file at given path with contents from the client + + 5. rename + Rename a file or directory from value in source_path to + value in path. + + 5. copy + Copy a file or directory from value in source_path to + value in path. + + 6. delete + Delete a file or empty directory at given path + path: + categories: + - category.jupyter.org/user-identifiable-information + type: string + description: | + Logical path on which the operation was performed. + + This is a required field. + source_path: + categories: + - category.jupyter.org/user-identifiable-information + type: string + description: | + Source path of an operation when action is 'copy' or 'rename' diff --git a/jupyter_server/pytest_plugin.py b/jupyter_server/pytest_plugin.py index 849cdc469b..f8580825f0 100644 --- a/jupyter_server/pytest_plugin.py +++ b/jupyter_server/pytest_plugin.py @@ -3,7 +3,9 @@ import os import sys +import io import json +import logging import pytest import shutil import urllib.parse @@ -432,3 +434,15 @@ def inner(nbpath): def jp_server_cleanup(): yield ServerApp.clear_instance() + + +@pytest.fixture +def jp_eventlog_sink(jp_configurable_serverapp): + """Return eventlog and sink objects""" + sink = io.StringIO() + handler = logging.StreamHandler(sink) + + cfg = Config() + cfg.EventLog.handlers = [handler] + serverapp = jp_configurable_serverapp(config=cfg) + yield serverapp, sink diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 24c1227e6e..11866fa1f3 100755 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -31,6 +31,11 @@ import inspect import pathlib +if sys.version_info >= (3, 9): + import importlib.resources as importlib_resources +else: + import importlib_resources + from base64 import encodebytes try: import resource @@ -96,6 +101,8 @@ TraitError, Type, Float, observe, default, validate ) from jupyter_core.paths import jupyter_runtime_dir, jupyter_path +from jupyter_telemetry.eventlog import EventLog + from jupyter_server._sysinfo import get_sys_info from jupyter_server._tz import utcnow, utcfromtimestamp @@ -104,7 +111,8 @@ check_pid, url_escape, urljoin, - pathname2url + pathname2url, + get_schema_files, ) from jupyter_server.extension.serverextension import ServerExtensionApp @@ -290,6 +298,7 @@ def init_settings(self, jupyter_app, kernel_manager, contents_manager, server_root_dir=root_dir, jinja2_env=env, terminals_available=terminado_available and jupyter_app.terminals_enabled, + eventlog=jupyter_app.eventlog, serverapp=jupyter_app ) @@ -1817,6 +1826,13 @@ def _init_asyncio_patch(): DeprecationWarning ) + def init_eventlog(self): + self.eventlog = EventLog(parent=self) + # Register schemas for notebook services. + for file in get_schema_files(): + with importlib_resources.as_file(file) as f: + self.eventlog.register_schema_file(f) + @catch_config_error def initialize(self, argv=None, find_extensions=True, new_httpserver=True, starter_extension=None): """Initialize the Server application class, configurables, web application, and http server. @@ -1846,6 +1862,7 @@ def initialize(self, argv=None, find_extensions=True, new_httpserver=True, start if find_extensions: self.find_server_extensions() self.init_logging() + self.init_eventlog() self.init_server_extensions() # Special case the starter extension and load diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index b7a8b1af1b..6bbbd08649 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -5,14 +5,18 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. - +import os import json from tornado import web - -from jupyter_server.utils import url_path_join, url_escape, ensure_async from jupyter_client.jsonutil import date_default +from jupyter_server.utils import ( + url_path_join, + url_escape, + ensure_async, + eventlogging_schema_fqn +) from jupyter_server.base.handlers import ( JupyterHandler, APIHandler, path_regex, ) @@ -111,6 +115,11 @@ async def get(self, path=''): path=path, type=type, format=format, content=content, )) validate_model(model, expect_content=content) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { 'action': 'get', 'path': model['path'] } + ) self._finish_model(model, location=False) @web.authenticated @@ -122,8 +131,18 @@ async def patch(self, path=''): raise web.HTTPError(400, u'JSON body missing') model = await ensure_async(cm.update(model, path)) validate_model(model, expect_content=False) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { + 'action': 'rename', + 'path': model['path'], + 'source_path': path.lstrip(os.path.sep) + } + ) self._finish_model(model) + async def _copy(self, copy_from, copy_to=None): """Copy a file, optionally specifying a target directory.""" self.log.info(u"Copying {copy_from} to {copy_to}".format( @@ -133,6 +152,15 @@ async def _copy(self, copy_from, copy_to=None): model = await ensure_async(self.contents_manager.copy(copy_from, copy_to)) self.set_status(201) validate_model(model, expect_content=False) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { + 'action': 'copy', + 'path': model['path'], + 'source_path': copy_from.lstrip(os.path.sep) + } + ) self._finish_model(model) async def _upload(self, model, path): @@ -141,6 +169,11 @@ async def _upload(self, model, path): model = await ensure_async(self.contents_manager.new(model, path)) self.set_status(201) validate_model(model, expect_content=False) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { 'action': 'upload', 'path': model['path'] } + ) self._finish_model(model) async def _new_untitled(self, path, type='', ext=''): @@ -150,6 +183,11 @@ async def _new_untitled(self, path, type='', ext=''): path=path, type=type, ext=ext)) self.set_status(201) validate_model(model, expect_content=False) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), 1, + # Set path to path of created object, not directory it was created in + { 'action': 'create', 'path': model['path'] } + ) self._finish_model(model) async def _save(self, model, path): @@ -159,6 +197,11 @@ async def _save(self, model, path): self.log.info(u"Saving file at %s", path) model = await ensure_async(self.contents_manager.save(model, path)) validate_model(model, expect_content=False) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), + 1, + { 'action': 'save', 'path': model['path'] } + ) self._finish_model(model) @web.authenticated @@ -228,6 +271,10 @@ async def delete(self, path=''): self.log.warning('delete %s', path) await ensure_async(cm.delete(path)) self.set_status(204) + self.eventlog.record_event( + eventlogging_schema_fqn('contentsmanager-actions'), 1, + { 'action': 'delete', 'path': path.lstrip(os.path.sep) } + ) self.finish() diff --git a/jupyter_server/tests/test_eventlog.py b/jupyter_server/tests/test_eventlog.py new file mode 100644 index 0000000000..749f889bd1 --- /dev/null +++ b/jupyter_server/tests/test_eventlog.py @@ -0,0 +1,36 @@ +import json + +import jsonschema +import pytest + +from jupyter_server.utils import eventlogging_schema_fqn +from .services.contents.test_api import contents, contents_dir, dirs + + +@pytest.mark.parametrize('path, name', dirs) +async def test_eventlog_list_notebooks(jp_eventlog_sink, jp_fetch, contents, path, name): + schema, version = (eventlogging_schema_fqn('contentsmanager-actions'), 1) + serverapp, sink = jp_eventlog_sink + serverapp.eventlog.allowed_schemas = { + serverapp.eventlog.schemas[(schema, version)]['$id']: { + 'allowed_categories': [ + 'category.jupyter.org/unrestricted', + 'category.jupyter.org/user-identifiable-information' + ] + } + } + + r = await jp_fetch( + 'api', + 'contents', + path, + method='GET', + ) + assert r.code == 200 + + output = sink.getvalue() + assert output + data = json.loads(output) + jsonschema.validate(data, serverapp.eventlog.schemas[(schema, version)]) + expected = {'action': 'get', 'path': path} + assert expected.items() <= data.items() diff --git a/jupyter_server/utils.py b/jupyter_server/utils.py index 7a86f38581..5163e63f9b 100644 --- a/jupyter_server/utils.py +++ b/jupyter_server/utils.py @@ -10,11 +10,15 @@ import sys from distutils.version import LooseVersion +if sys.version_info >= (3, 9): + import importlib.resources as importlib_resources +else: + import importlib_resources + from urllib.parse import quote, unquote, urlparse, urljoin from urllib.request import pathname2url - def url_path_join(*pieces): """Join components of url into a relative url @@ -222,3 +226,29 @@ def wrapped(): raise e return result return wrapped() + + +def eventlogging_schema_fqn(name): + """ + Return fully qualified event schema name + + Matches convention for this particular repo + """ + return 'eventlogging.jupyter.org/jupyter_server/{}'.format(name) + + +def list_resources(resources): + for entry in resources.iterdir(): + if entry.is_dir(): + yield from list_resources(entry) + else: + yield entry + + +def get_schema_files(): + """Yield a sequence of event schemas for jupyter services.""" + return ( + entry for entry in list_resources( + importlib_resources.files('jupyter_server.event_schemas') + ) if os.path.splitext(entry.name)[1] == '.yaml' + ) diff --git a/setup.cfg b/setup.cfg index 4cd951aca2..c610a47574 100644 --- a/setup.cfg +++ b/setup.cfg @@ -44,6 +44,8 @@ install_requires = pywin32>=1.0 ; sys_platform == 'win32' anyio>=2.0.2,<3 ; python_version < '3.7' anyio>=3.0.1,<4 ; python_version >= '3.7' + importlib-resources ; python_version < '3.9' + jupyter_telemetry@git+https://github.com/jupyter/telemetry.git@6f1933ca88349fbcfb02dbbe35028ffa930cf836 [options.extras_require] test = coverage; pytest; pytest-cov; pytest-mock; requests; pytest-tornasync; pytest-console-scripts; ipykernel