Skip to content

Commit 7538b01

Browse files
committed
Reorganize
Add docstrings Rename stuff more appropriately Expose only what's necessary
1 parent b17dc59 commit 7538b01

File tree

3 files changed

+93
-20
lines changed

3 files changed

+93
-20
lines changed

jupyter_telemetry/_eventschema.py renamed to jupyter_telemetry/_categories.py

Lines changed: 89 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,21 @@
44
from jsonschema.exceptions import ValidationError
55

66

7+
__all__ = ['JSONSchemaValidator', 'filter_categories_from_event']
8+
9+
710
class ExtractCategories(ValidationError):
11+
"""
12+
A special `jsonschema.ValidationError` that carries information about the
13+
`categories` keyword, intended to be yielded whenever a `categories` keyword
14+
is encountered during `jsonschema` JSON validation.
15+
16+
The primary use case for this class is to make use of the JSON validation
17+
mechanism implemented by `jsonschema` to extract all categories associated
18+
with each property in a JSON instance based on a JSON schema. It is not
19+
intended to be used as an actual validation error.
20+
"""
21+
822
def __init__(self, property, categories, *args, **kwargs):
923
super(ValidationError, self).__init__(*args, **kwargs)
1024
self.property = property
@@ -13,10 +27,25 @@ def __init__(self, property, categories, *args, **kwargs):
1327

1428
def extend_with_categories(validator_class):
1529
"""
16-
Extend the validator class so that during json schema validation, whenever
17-
the keyword 'categories' is encountered in a valid context with regards to a
18-
property, it yields an instance of ExtractCategories containing the
19-
information needed for category filtering later.
30+
Extend a `jsonschema.IValidator` class so that it yields a `_ExtractCategories`
31+
whenever a `categories` keyword is encountered during JSON validation
32+
33+
Parameters
34+
----------
35+
validator_class : jsonschema.IValidator
36+
an existing validator class
37+
38+
Returns
39+
-------
40+
jsonschema.IValidator
41+
a new `jsonschema.IValidator` class extending the one provided
42+
43+
Examples
44+
--------
45+
from jsonschema import Draft7Validator
46+
47+
48+
CategoryExtractor = extend_with_categories(Draft7Validator)
2049
"""
2150
validate_properties = validator_class.VALIDATORS["properties"]
2251

@@ -57,24 +86,72 @@ def extract_categories_from_errors(errors):
5786
yield from extract_categories_from_errors(e.context)
5887

5988

60-
def extract_categories(instance, schema):
89+
def extract_categories_from_event(event, schema):
6190
"""
62-
Generate dict of ExtractCategories whose keys are pointers to the properties
91+
Generate a `dict` of `_ExtractCategories` whose keys are pointers to the properties
92+
93+
Parameters
94+
----------
95+
event : dict
96+
A telemetry event
97+
98+
schema : dict
99+
A JSON schema
100+
101+
Returns
102+
-------
103+
dict
104+
A mapping from properties in the event to their categories.
105+
106+
In each entry, the key is a pointer to a property in the event
107+
(in the form of a tuple) and the value is a `_ExtractCategories`
108+
containing the categories associated with that property.
63109
"""
64110
return {
65111
tuple(c.absolute_path + deque([c.property])): c
66112
for c in extract_categories_from_errors(
67-
CategoryExtractor(schema).iter_errors(instance)
113+
CategoryExtractor(schema).iter_errors(event)
68114
)
69115
}
70116

71117

72-
def filter_categories(instance, categories, allowed_categories, allowed_properties):
118+
def filter_categories_from_event(event, schema, allowed_categories, allowed_properties):
119+
"""
120+
Filter properties from an event based on their categories.
121+
122+
Only whitelisted properties and properties whose categories are allowed are kept.
123+
124+
Parameters
125+
----------
126+
event : dict
127+
The input telemetry event
128+
129+
schema : dict
130+
A JSON schema that makes use of the the `categories` keyword to
131+
specify what categories are associated with a certain property.
132+
133+
allowed_categories : set
134+
Specify which categories are allowed
135+
136+
allowed_properties : set
137+
Whitelist certain top level properties.
138+
139+
These properties are included in the output event even if not all of
140+
their properties are allowed.
141+
142+
Returns
143+
-------
144+
dict
145+
The output event after category filtering
146+
147+
"""
148+
categories = extract_categories_from_event(event, schema)
149+
73150
# Top-level properties without declared categories are set to null
74-
for property in instance.keys():
151+
for property in event.keys():
75152
path = (property,)
76153
if path not in categories:
77-
instance[property] = None
154+
event[property] = None
78155

79156
# Allow only properties whose categories are included in allowed_categories
80157
# and whose top-level parent is included in allowed_properties
@@ -91,7 +168,7 @@ def filter_categories(instance, categories, allowed_categories, allowed_properti
91168
# the descendent would either return None or raise an IndexError or
92169
# KeyError. Just skip it.
93170
try:
94-
item = deep_get(instance, c.absolute_path)
171+
item = deep_get(event, c.absolute_path)
95172
except IndexError:
96173
continue
97174
except KeyError:
@@ -100,7 +177,7 @@ def filter_categories(instance, categories, allowed_categories, allowed_properti
100177
if item is not None:
101178
item[c.property] = None
102179

103-
return instance
180+
return event
104181

105182

106183
def deep_get(instance, path):

jupyter_telemetry/categories.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from ._categories import JSONSchemaValidator, filter_categories_from_event

jupyter_telemetry/eventlog.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,7 @@
2828
from .traits import Handlers, SchemaOptions
2929
from . import TELEMETRY_METADATA_VERSION
3030

31-
from ._eventschema import (
32-
JSONSchemaValidator,
33-
extract_categories,
34-
filter_categories
35-
)
31+
from .categories import JSONSchemaValidator, filter_categories_from_event
3632

3733
yaml = YAML(typ='safe')
3834

@@ -216,9 +212,8 @@ def record_event(self, schema_name, version, event, timestamp_override=None):
216212
allowed_categories = self.get_allowed_categories(schema_name)
217213
allowed_properties = self.get_allowed_properties(schema_name)
218214

219-
categories = extract_categories(event, schema)
220-
filtered_event = filter_categories(
221-
event, categories, allowed_categories, allowed_properties
215+
filtered_event = filter_categories_from_event(
216+
event, schema, allowed_categories, allowed_properties
222217
)
223218
capsule.update(filtered_event)
224219

0 commit comments

Comments
 (0)