4
4
from jsonschema .exceptions import ValidationError
5
5
6
6
7
+ __all__ = ['JSONSchemaValidator' , 'filter_categories_from_event' ]
8
+
9
+
7
10
class ExtractCategories (ValidationError ):
11
+ """
12
+ A special `jsonschema.ValidationError` that carries information about the
13
+ `categories` keyword, intended to be yielded whenever a `categories` keyword
14
+ is encountered during `jsonschema` JSON validation.
15
+
16
+ The primary use case for this class is to make use of the JSON validation
17
+ mechanism implemented by `jsonschema` to extract all categories associated
18
+ with each property in a JSON instance based on a JSON schema. It is not
19
+ intended to be used as an actual validation error.
20
+ """
21
+
8
22
def __init__ (self , property , categories , * args , ** kwargs ):
9
23
super (ValidationError , self ).__init__ (* args , ** kwargs )
10
24
self .property = property
@@ -13,10 +27,25 @@ def __init__(self, property, categories, *args, **kwargs):
13
27
14
28
def extend_with_categories (validator_class ):
15
29
"""
16
- Extend the validator class so that during json schema validation, whenever
17
- the keyword 'categories' is encountered in a valid context with regards to a
18
- property, it yields an instance of ExtractCategories containing the
19
- information needed for category filtering later.
30
+ Extend a `jsonschema.IValidator` class so that it yields a `_ExtractCategories`
31
+ whenever a `categories` keyword is encountered during JSON validation
32
+
33
+ Parameters
34
+ ----------
35
+ validator_class : jsonschema.IValidator
36
+ an existing validator class
37
+
38
+ Returns
39
+ -------
40
+ jsonschema.IValidator
41
+ a new `jsonschema.IValidator` class extending the one provided
42
+
43
+ Examples
44
+ --------
45
+ from jsonschema import Draft7Validator
46
+
47
+
48
+ CategoryExtractor = extend_with_categories(Draft7Validator)
20
49
"""
21
50
validate_properties = validator_class .VALIDATORS ["properties" ]
22
51
@@ -57,24 +86,72 @@ def extract_categories_from_errors(errors):
57
86
yield from extract_categories_from_errors (e .context )
58
87
59
88
60
- def extract_categories ( instance , schema ):
89
+ def extract_categories_from_event ( event , schema ):
61
90
"""
62
- Generate dict of ExtractCategories whose keys are pointers to the properties
91
+ Generate a `dict` of `_ExtractCategories` whose keys are pointers to the properties
92
+
93
+ Parameters
94
+ ----------
95
+ event : dict
96
+ A telemetry event
97
+
98
+ schema : dict
99
+ A JSON schema
100
+
101
+ Returns
102
+ -------
103
+ dict
104
+ A mapping from properties in the event to their categories.
105
+
106
+ In each entry, the key is a pointer to a property in the event
107
+ (in the form of a tuple) and the value is a `_ExtractCategories`
108
+ containing the categories associated with that property.
63
109
"""
64
110
return {
65
111
tuple (c .absolute_path + deque ([c .property ])): c
66
112
for c in extract_categories_from_errors (
67
- CategoryExtractor (schema ).iter_errors (instance )
113
+ CategoryExtractor (schema ).iter_errors (event )
68
114
)
69
115
}
70
116
71
117
72
- def filter_categories (instance , categories , allowed_categories , allowed_properties ):
118
+ def filter_categories_from_event (event , schema , allowed_categories , allowed_properties ):
119
+ """
120
+ Filter properties from an event based on their categories.
121
+
122
+ Only whitelisted properties and properties whose categories are allowed are kept.
123
+
124
+ Parameters
125
+ ----------
126
+ event : dict
127
+ The input telemetry event
128
+
129
+ schema : dict
130
+ A JSON schema that makes use of the the `categories` keyword to
131
+ specify what categories are associated with a certain property.
132
+
133
+ allowed_categories : set
134
+ Specify which categories are allowed
135
+
136
+ allowed_properties : set
137
+ Whitelist certain top level properties.
138
+
139
+ These properties are included in the output event even if not all of
140
+ their properties are allowed.
141
+
142
+ Returns
143
+ -------
144
+ dict
145
+ The output event after category filtering
146
+
147
+ """
148
+ categories = extract_categories_from_event (event , schema )
149
+
73
150
# Top-level properties without declared categories are set to null
74
- for property in instance .keys ():
151
+ for property in event .keys ():
75
152
path = (property ,)
76
153
if path not in categories :
77
- instance [property ] = None
154
+ event [property ] = None
78
155
79
156
# Allow only properties whose categories are included in allowed_categories
80
157
# and whose top-level parent is included in allowed_properties
@@ -91,7 +168,7 @@ def filter_categories(instance, categories, allowed_categories, allowed_properti
91
168
# the descendent would either return None or raise an IndexError or
92
169
# KeyError. Just skip it.
93
170
try :
94
- item = deep_get (instance , c .absolute_path )
171
+ item = deep_get (event , c .absolute_path )
95
172
except IndexError :
96
173
continue
97
174
except KeyError :
@@ -100,7 +177,7 @@ def filter_categories(instance, categories, allowed_categories, allowed_properti
100
177
if item is not None :
101
178
item [c .property ] = None
102
179
103
- return instance
180
+ return event
104
181
105
182
106
183
def deep_get (instance , path ):
0 commit comments