Skip to content

Commit 27b21e6

Browse files
committed
Reorganize
Add docstrings Rename stuff more appropriately Expose only what's necessary
1 parent b17dc59 commit 27b21e6

File tree

3 files changed

+90
-20
lines changed

3 files changed

+90
-20
lines changed

jupyter_telemetry/_eventschema.py renamed to jupyter_telemetry/_categories.py

Lines changed: 86 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@
55

66

77
class ExtractCategories(ValidationError):
8+
"""
9+
A special `jsonschema.ValidationError` that carries information about the
10+
`categories` keyword, intended to be yielded whenever a `categories` keyword
11+
is encountered during `jsonschema` JSON validation.
12+
13+
The primary use case for this class is to make use of the JSON validation
14+
mechanism implemented by `jsonschema` to extract all categories associated
15+
with each property in a JSON instance based on a JSON schema. It is not
16+
intended to be used as an actual validation error.
17+
"""
18+
819
def __init__(self, property, categories, *args, **kwargs):
920
super(ValidationError, self).__init__(*args, **kwargs)
1021
self.property = property
@@ -13,10 +24,25 @@ def __init__(self, property, categories, *args, **kwargs):
1324

1425
def extend_with_categories(validator_class):
1526
"""
16-
Extend the validator class so that during json schema validation, whenever
17-
the keyword 'categories' is encountered in a valid context with regards to a
18-
property, it yields an instance of ExtractCategories containing the
19-
information needed for category filtering later.
27+
Extend a `jsonschema.IValidator` class so that it yields a `_ExtractCategories`
28+
whenever a `categories` keyword is encountered during JSON validation
29+
30+
Parameters
31+
----------
32+
validator_class : jsonschema.IValidator
33+
an existing validator class
34+
35+
Returns
36+
-------
37+
jsonschema.IValidator
38+
a new `jsonschema.IValidator` class extending the one provided
39+
40+
Examples
41+
--------
42+
from jsonschema import Draft7Validator
43+
44+
45+
CategoryExtractor = extend_with_categories(Draft7Validator)
2046
"""
2147
validate_properties = validator_class.VALIDATORS["properties"]
2248

@@ -57,24 +83,72 @@ def extract_categories_from_errors(errors):
5783
yield from extract_categories_from_errors(e.context)
5884

5985

60-
def extract_categories(instance, schema):
86+
def extract_categories_from_event(event, schema):
6187
"""
62-
Generate dict of ExtractCategories whose keys are pointers to the properties
88+
Generate a `dict` of `_ExtractCategories` whose keys are pointers to the properties
89+
90+
Parameters
91+
----------
92+
event : dict
93+
A telemetry event
94+
95+
schema : dict
96+
A JSON schema
97+
98+
Returns
99+
-------
100+
dict
101+
A mapping from properties in the event to their categories.
102+
103+
In each entry, the key is a pointer to a property in the event
104+
(in the form of a tuple) and the value is a `_ExtractCategories`
105+
containing the categories associated with that property.
63106
"""
64107
return {
65108
tuple(c.absolute_path + deque([c.property])): c
66109
for c in extract_categories_from_errors(
67-
CategoryExtractor(schema).iter_errors(instance)
110+
CategoryExtractor(schema).iter_errors(event)
68111
)
69112
}
70113

71114

72-
def filter_categories(instance, categories, allowed_categories, allowed_properties):
115+
def filter_categories_from_event(event, schema, allowed_categories, allowed_properties):
116+
"""
117+
Filter properties from an event based on their categories.
118+
119+
Only whitelisted properties and properties whose categories are allowed are kept.
120+
121+
Parameters
122+
----------
123+
event : dict
124+
The input telemetry event
125+
126+
schema : dict
127+
A JSON schema that makes use of the the `categories` keyword to
128+
specify what categories are associated with a certain property.
129+
130+
allowed_categories : set
131+
Specify which categories are allowed
132+
133+
allowed_properties : set
134+
Whitelist certain top level properties.
135+
136+
These properties are included in the output event even if not all of
137+
their properties are allowed.
138+
139+
Returns
140+
-------
141+
dict
142+
The output event after category filtering
143+
144+
"""
145+
categories = extract_categories_from_event(event, schema)
146+
73147
# Top-level properties without declared categories are set to null
74-
for property in instance.keys():
148+
for property in event.keys():
75149
path = (property,)
76150
if path not in categories:
77-
instance[property] = None
151+
event[property] = None
78152

79153
# Allow only properties whose categories are included in allowed_categories
80154
# and whose top-level parent is included in allowed_properties
@@ -91,7 +165,7 @@ def filter_categories(instance, categories, allowed_categories, allowed_properti
91165
# the descendent would either return None or raise an IndexError or
92166
# KeyError. Just skip it.
93167
try:
94-
item = deep_get(instance, c.absolute_path)
168+
item = deep_get(event, c.absolute_path)
95169
except IndexError:
96170
continue
97171
except KeyError:
@@ -100,7 +174,7 @@ def filter_categories(instance, categories, allowed_categories, allowed_properti
100174
if item is not None:
101175
item[c.property] = None
102176

103-
return instance
177+
return event
104178

105179

106180
def deep_get(instance, path):

jupyter_telemetry/categories.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from ._categories import JSONSchemaValidator, filter_categories_from_event # noqa

jupyter_telemetry/eventlog.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,7 @@
2828
from .traits import Handlers, SchemaOptions
2929
from . import TELEMETRY_METADATA_VERSION
3030

31-
from ._eventschema import (
32-
JSONSchemaValidator,
33-
extract_categories,
34-
filter_categories
35-
)
31+
from .categories import JSONSchemaValidator, filter_categories_from_event
3632

3733
yaml = YAML(typ='safe')
3834

@@ -216,9 +212,8 @@ def record_event(self, schema_name, version, event, timestamp_override=None):
216212
allowed_categories = self.get_allowed_categories(schema_name)
217213
allowed_properties = self.get_allowed_properties(schema_name)
218214

219-
categories = extract_categories(event, schema)
220-
filtered_event = filter_categories(
221-
event, categories, allowed_categories, allowed_properties
215+
filtered_event = filter_categories_from_event(
216+
event, schema, allowed_categories, allowed_properties
222217
)
223218
capsule.update(filtered_event)
224219

0 commit comments

Comments
 (0)