5
5
6
6
7
7
class ExtractCategories (ValidationError ):
8
+ """
9
+ A special `jsonschema.ValidationError` that carries information about the
10
+ `categories` keyword, intended to be yielded whenever a `categories` keyword
11
+ is encountered during `jsonschema` JSON validation.
12
+
13
+ The primary use case for this class is to make use of the JSON validation
14
+ mechanism implemented by `jsonschema` to extract all categories associated
15
+ with each property in a JSON instance based on a JSON schema. It is not
16
+ intended to be used as an actual validation error.
17
+ """
18
+
8
19
def __init__ (self , property , categories , * args , ** kwargs ):
9
20
super (ValidationError , self ).__init__ (* args , ** kwargs )
10
21
self .property = property
@@ -13,10 +24,25 @@ def __init__(self, property, categories, *args, **kwargs):
13
24
14
25
def extend_with_categories (validator_class ):
15
26
"""
16
- Extend the validator class so that during json schema validation, whenever
17
- the keyword 'categories' is encountered in a valid context with regards to a
18
- property, it yields an instance of ExtractCategories containing the
19
- information needed for category filtering later.
27
+ Extend a `jsonschema.IValidator` class so that it yields a `_ExtractCategories`
28
+ whenever a `categories` keyword is encountered during JSON validation
29
+
30
+ Parameters
31
+ ----------
32
+ validator_class : jsonschema.IValidator
33
+ an existing validator class
34
+
35
+ Returns
36
+ -------
37
+ jsonschema.IValidator
38
+ a new `jsonschema.IValidator` class extending the one provided
39
+
40
+ Examples
41
+ --------
42
+ from jsonschema import Draft7Validator
43
+
44
+
45
+ CategoryExtractor = extend_with_categories(Draft7Validator)
20
46
"""
21
47
validate_properties = validator_class .VALIDATORS ["properties" ]
22
48
@@ -57,24 +83,72 @@ def extract_categories_from_errors(errors):
57
83
yield from extract_categories_from_errors (e .context )
58
84
59
85
60
- def extract_categories ( instance , schema ):
86
+ def extract_categories_from_event ( event , schema ):
61
87
"""
62
- Generate dict of ExtractCategories whose keys are pointers to the properties
88
+ Generate a `dict` of `_ExtractCategories` whose keys are pointers to the properties
89
+
90
+ Parameters
91
+ ----------
92
+ event : dict
93
+ A telemetry event
94
+
95
+ schema : dict
96
+ A JSON schema
97
+
98
+ Returns
99
+ -------
100
+ dict
101
+ A mapping from properties in the event to their categories.
102
+
103
+ In each entry, the key is a pointer to a property in the event
104
+ (in the form of a tuple) and the value is a `_ExtractCategories`
105
+ containing the categories associated with that property.
63
106
"""
64
107
return {
65
108
tuple (c .absolute_path + deque ([c .property ])): c
66
109
for c in extract_categories_from_errors (
67
- CategoryExtractor (schema ).iter_errors (instance )
110
+ CategoryExtractor (schema ).iter_errors (event )
68
111
)
69
112
}
70
113
71
114
72
- def filter_categories (instance , categories , allowed_categories , allowed_properties ):
115
+ def filter_categories_from_event (event , schema , allowed_categories , allowed_properties ):
116
+ """
117
+ Filter properties from an event based on their categories.
118
+
119
+ Only whitelisted properties and properties whose categories are allowed are kept.
120
+
121
+ Parameters
122
+ ----------
123
+ event : dict
124
+ The input telemetry event
125
+
126
+ schema : dict
127
+ A JSON schema that makes use of the the `categories` keyword to
128
+ specify what categories are associated with a certain property.
129
+
130
+ allowed_categories : set
131
+ Specify which categories are allowed
132
+
133
+ allowed_properties : set
134
+ Whitelist certain top level properties.
135
+
136
+ These properties are included in the output event even if not all of
137
+ their properties are allowed.
138
+
139
+ Returns
140
+ -------
141
+ dict
142
+ The output event after category filtering
143
+
144
+ """
145
+ categories = extract_categories_from_event (event , schema )
146
+
73
147
# Top-level properties without declared categories are set to null
74
- for property in instance .keys ():
148
+ for property in event .keys ():
75
149
path = (property ,)
76
150
if path not in categories :
77
- instance [property ] = None
151
+ event [property ] = None
78
152
79
153
# Allow only properties whose categories are included in allowed_categories
80
154
# and whose top-level parent is included in allowed_properties
@@ -91,7 +165,7 @@ def filter_categories(instance, categories, allowed_categories, allowed_properti
91
165
# the descendent would either return None or raise an IndexError or
92
166
# KeyError. Just skip it.
93
167
try :
94
- item = deep_get (instance , c .absolute_path )
168
+ item = deep_get (event , c .absolute_path )
95
169
except IndexError :
96
170
continue
97
171
except KeyError :
@@ -100,7 +174,7 @@ def filter_categories(instance, categories, allowed_categories, allowed_properti
100
174
if item is not None :
101
175
item [c .property ] = None
102
176
103
- return instance
177
+ return event
104
178
105
179
106
180
def deep_get (instance , path ):
0 commit comments