From 14155d9252b24038dac2be3758e60c63e9139607 Mon Sep 17 00:00:00 2001 From: Miskler Date: Wed, 27 Aug 2025 04:00:28 +0300 Subject: [PATCH 1/5] add empty-list test --- jsf/tests/data/empty-list.json | 8 ++++++++ jsf/tests/test_default_fake.py | 12 ++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 jsf/tests/data/empty-list.json diff --git a/jsf/tests/data/empty-list.json b/jsf/tests/data/empty-list.json new file mode 100644 index 0000000..3d97dba --- /dev/null +++ b/jsf/tests/data/empty-list.json @@ -0,0 +1,8 @@ +{ + "type": "object", + "properties": { + "items": { + "type": "array" + } + } +} \ No newline at end of file diff --git a/jsf/tests/test_default_fake.py b/jsf/tests/test_default_fake.py index 56b42e6..26e3a81 100644 --- a/jsf/tests/test_default_fake.py +++ b/jsf/tests/test_default_fake.py @@ -544,3 +544,15 @@ def test_use_defaults_and_examples(TestData): assert d["name"] in ["Chop", "Luna", "Thanos"] breed = d.get("breed") assert breed is None or breed == "Mixed Breed" + +def test_gen_empty_list(TestData): + with open(TestData / "empty-list.json") as file: + schema = json.load(file) + p = JSF(schema) + + fake_data = [p.generate(use_defaults=True, use_examples=True) for _ in range(10)] + for d in fake_data: + assert isinstance(d, dict) + assert d in ["items"] + assert isinstance(d["items"], list) + assert len(d["items"]) == 0 From 4fd2b0c179b6c13e93c933c365c57df2614d144e Mon Sep 17 00:00:00 2001 From: Miskler <69055489+Miskler@users.noreply.github.com> Date: Wed, 27 Aug 2025 05:04:57 +0300 Subject: [PATCH 2/5] Handle arrays without items --- jsf/parser.py | 19 ++++++++++++++++++- jsf/schema_types/array.py | 13 +++++++++---- jsf/schema_types/object.py | 4 ++-- jsf/tests/test_default_fake.py | 4 ++-- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/jsf/parser.py b/jsf/parser.py index 3174429..d8a6765 100644 --- a/jsf/parser.py +++ b/jsf/parser.py @@ -128,6 +128,11 @@ def __parse_object( self, name: str, path: str, schema: Dict[str, Any], root: Optional[AllTypes] = None ) -> Object: _, is_nullable = self.__is_field_nullable(schema) + schema_without_props = { + k: v + for k, v in schema.items() + if k not in ("properties", "patternProperties", "dependencies") + } model = Object.from_dict( { "name": name, @@ -135,7 +140,7 @@ def __parse_object( "is_nullable": is_nullable, "allow_none_optionals": self.allow_none_optionals, "max_recursive_depth": self.max_recursive_depth, - **schema, + **schema_without_props, } ) root = model if root is None else root @@ -292,6 +297,18 @@ def __parse_definition( isinstance(x, dict) for x in schema.get("items", []) ): return self.__parse_tuple(name, path, schema, root) + # arrays without an "items" definition should still be valid and + # simply produce empty lists + return Array.from_dict( + { + "name": name, + "path": path, + "is_nullable": is_nullable, + "allow_none_optionals": self.allow_none_optionals, + "max_recursive_depth": self.max_recursive_depth, + **schema, + } + ) else: return self.__parse_primitive(name, path, schema) elif "$ref" in schema: diff --git a/jsf/schema_types/array.py b/jsf/schema_types/array.py index 8686c52..3607867 100644 --- a/jsf/schema_types/array.py +++ b/jsf/schema_types/array.py @@ -22,6 +22,8 @@ def generate(self, context: Dict[str, Any]) -> Optional[List[Any]]: try: return super().generate(context) except ProviderNotSetException: + if self.items is None: + return [] if isinstance(self.fixed, str): self.minItems = self.maxItems = eval(self.fixed, context)() elif isinstance(self.fixed, int): @@ -47,8 +49,11 @@ def generate(self, context: Dict[str, Any]) -> Optional[List[Any]]: return output def model(self, context: Dict[str, Any]) -> Tuple[Type, Any]: - _type = eval( - f"List[Union[{','.join([self.items.model(context)[0].__name__])}]]", - context["__internal__"], - ) + if self.items is None: + _type = List[Any] + else: + _type = eval( + f"List[Union[{','.join([self.items.model(context)[0].__name__])}]]", + context["__internal__"], + ) return self.to_pydantic(context, _type) diff --git a/jsf/schema_types/object.py b/jsf/schema_types/object.py index 7e3fcf6..d8e19a3 100644 --- a/jsf/schema_types/object.py +++ b/jsf/schema_types/object.py @@ -19,14 +19,14 @@ class PropertyNames(BaseModel): class Object(BaseSchema): - properties: Dict[str, BaseSchema] = {} + properties: List[BaseSchema] = [] additionalProperties: Optional[Union[bool, BaseSchema]] = None required: Optional[List[str]] = None propertyNames: Optional[PropertyNames] = None minProperties: Optional[int] = None maxProperties: Optional[int] = None dependencies: Optional[Union[PropertyDependency, SchemaDependency]] = None - patternProperties: Optional[Dict[str, BaseSchema]] = None + patternProperties: Optional[List[BaseSchema]] = None @classmethod def from_dict(cls, d: Dict[str, Any]) -> "Object": diff --git a/jsf/tests/test_default_fake.py b/jsf/tests/test_default_fake.py index 26e3a81..c7c5463 100644 --- a/jsf/tests/test_default_fake.py +++ b/jsf/tests/test_default_fake.py @@ -548,11 +548,11 @@ def test_use_defaults_and_examples(TestData): def test_gen_empty_list(TestData): with open(TestData / "empty-list.json") as file: schema = json.load(file) - p = JSF(schema) + p = JSF(schema, allow_none_optionals=0.0) fake_data = [p.generate(use_defaults=True, use_examples=True) for _ in range(10)] for d in fake_data: assert isinstance(d, dict) - assert d in ["items"] + assert "items" in d assert isinstance(d["items"], list) assert len(d["items"]) == 0 From f367924baf779009372b4f4e1447f86aee758c05 Mon Sep 17 00:00:00 2001 From: Miskler Date: Wed, 27 Aug 2025 05:22:37 +0300 Subject: [PATCH 3/5] update tests --- jsf/tests/data/empty-list-pro.json | 31 ++++++++++++++++++++++++++++++ jsf/tests/test_default_fake.py | 30 +++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 jsf/tests/data/empty-list-pro.json diff --git a/jsf/tests/data/empty-list-pro.json b/jsf/tests/data/empty-list-pro.json new file mode 100644 index 0000000..4d88148 --- /dev/null +++ b/jsf/tests/data/empty-list-pro.json @@ -0,0 +1,31 @@ +{ + "$schema": "http://json-schema.org/schema#", + "type": "object", + "properties": { + "content": { + "type": "object", + "properties": { + "list": { + "type": "array" + }, + "jon-empty-sub-list": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "sub-list": { + "type": "array", + "items": { + "type": "array" + } + } + }, + "required": ["list", "sub-list"] + } + }, + "required": ["content"] +} \ No newline at end of file diff --git a/jsf/tests/test_default_fake.py b/jsf/tests/test_default_fake.py index c7c5463..bbf60ee 100644 --- a/jsf/tests/test_default_fake.py +++ b/jsf/tests/test_default_fake.py @@ -556,3 +556,33 @@ def test_gen_empty_list(TestData): assert "items" in d assert isinstance(d["items"], list) assert len(d["items"]) == 0 + +def test_gen_empty_list_pro(TestData): + with open(TestData / "empty-list-pro.json") as file: + schema = json.load(file) + p = JSF(schema, allow_none_optionals=0.0) + + fake_data = [p.generate(use_defaults=True, use_examples=True) for _ in range(10)] + for d in fake_data: + assert isinstance(d, dict) + assert "content" in d + + assert isinstance(d["content"], dict) + assert "list" in d["content"] + + assert isinstance(d["content"]["list"], list) + assert len(d["content"]["list"]) == 0 + + assert "sub-list" in d["content"] + assert isinstance(d["content"]["sub-list"], list) + + assert len(d["content"]["sub-list"]) == 1 + assert isinstance(d["content"]["sub-list"][0], list) + assert len(d["content"]["sub-list"][0]) == 0 + + assert "jon-empty-sub-list" in d["content"] + assert isinstance(d["content"]["jon-empty-sub-list"], list) + assert len(d["content"]["jon-empty-sub-list"]) == 1 + assert isinstance(d["content"]["jon-empty-sub-list"][0], list) + assert len(d["content"]["jon-empty-sub-list"][0]) == 1 + assert isinstance(d["content"]["jon-empty-sub-list"][0][0], str) From 001c9809810681e52defb8aeb43e4befbd7a53d2 Mon Sep 17 00:00:00 2001 From: Miskler Date: Wed, 27 Aug 2025 05:30:55 +0300 Subject: [PATCH 4/5] fix --- jsf/tests/data/empty-list-pro.json | 2 +- jsf/tests/test_default_fake.py | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/jsf/tests/data/empty-list-pro.json b/jsf/tests/data/empty-list-pro.json index 4d88148..d504f1a 100644 --- a/jsf/tests/data/empty-list-pro.json +++ b/jsf/tests/data/empty-list-pro.json @@ -8,7 +8,7 @@ "list": { "type": "array" }, - "jon-empty-sub-list": { + "non-empty-sub-list": { "type": "array", "items": { "type": "array", diff --git a/jsf/tests/test_default_fake.py b/jsf/tests/test_default_fake.py index bbf60ee..7523828 100644 --- a/jsf/tests/test_default_fake.py +++ b/jsf/tests/test_default_fake.py @@ -573,16 +573,14 @@ def test_gen_empty_list_pro(TestData): assert isinstance(d["content"]["list"], list) assert len(d["content"]["list"]) == 0 + assert "non-empty-sub-list" in d["content"] + assert isinstance(d["content"]["non-empty-sub-list"], list) + assert len(d["content"]["non-empty-sub-list"]) >= 1 + assert isinstance(d["content"]["non-empty-sub-list"][0], list) + assert any(len(sublist) >= 1 and all(isinstance(item, str) for item in sublist) for sublist in d["content"]["non-empty-sub-list"]) + assert "sub-list" in d["content"] assert isinstance(d["content"]["sub-list"], list) - - assert len(d["content"]["sub-list"]) == 1 + assert len(d["content"]["sub-list"]) >= 1 assert isinstance(d["content"]["sub-list"][0], list) assert len(d["content"]["sub-list"][0]) == 0 - - assert "jon-empty-sub-list" in d["content"] - assert isinstance(d["content"]["jon-empty-sub-list"], list) - assert len(d["content"]["jon-empty-sub-list"]) == 1 - assert isinstance(d["content"]["jon-empty-sub-list"][0], list) - assert len(d["content"]["jon-empty-sub-list"][0]) == 1 - assert isinstance(d["content"]["jon-empty-sub-list"][0][0], str) From 1422765464b31d2de81b2468f1e69df50ad0da8b Mon Sep 17 00:00:00 2001 From: Miskler <69055489+Miskler@users.noreply.github.com> Date: Wed, 27 Aug 2025 16:46:35 +0300 Subject: [PATCH 5/5] Ensure arrays with items have default min length --- jsf/schema_types/array.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/jsf/schema_types/array.py b/jsf/schema_types/array.py index 3607867..ba1e20a 100644 --- a/jsf/schema_types/array.py +++ b/jsf/schema_types/array.py @@ -9,7 +9,12 @@ class Array(BaseSchema): items: Optional[BaseSchema] = None contains: Optional[BaseSchema] = None # NOTE: Validation only - minItems: Optional[int] = 0 + # If `items` is provided in the schema, JSON Schema treats the array as + # having an item type. In that case JSF should emit at least one element + # by default. Using ``None`` here allows us to distinguish between the + # schema omitting ``minItems`` and explicitly setting ``minItems`` to ``0`` + # which callers may rely on. + minItems: Optional[int] = None maxItems: Optional[int] = 5 uniqueItems: Optional[bool] = False fixed: Optional[Union[int, str]] = Field(None, alias="$fixed") @@ -23,26 +28,41 @@ def generate(self, context: Dict[str, Any]) -> Optional[List[Any]]: return super().generate(context) except ProviderNotSetException: if self.items is None: + # No item schema means we cannot infer what the array should + # contain, therefore return an empty list. return [] + if isinstance(self.fixed, str): self.minItems = self.maxItems = eval(self.fixed, context)() elif isinstance(self.fixed, int): self.minItems = self.maxItems = self.fixed depth = context["state"]["__depth__"] + + # ``minItems`` may be ``None`` when it wasn't provided in the + # schema. In that scenario we want non-empty arrays if an item + # schema exists. When the user explicitly sets ``minItems`` to 0 + # we honour that and allow empty arrays. + min_items = ( + int(self.minItems) + if self.minItems is not None + else (0 if self.items is None else 1) + ) + max_items = int(self.maxItems) if self.maxItems is not None else 5 + output = [] - for _ in range(random.randint(int(self.minItems), int(self.maxItems))): + for _ in range(random.randint(min_items, max_items)): output.append(self.items.generate(context)) context["state"]["__depth__"] = depth if self.uniqueItems and self.items.type == "object": output = [dict(s) for s in {frozenset(d.items()) for d in output}] - while len(output) < self.minItems: + while len(output) < min_items: output.append(self.items.generate(context)) output = [dict(s) for s in {frozenset(d.items()) for d in output}] context["state"]["__depth__"] = depth elif self.uniqueItems: output = set(output) - while len(output) < self.minItems: + while len(output) < min_items: output.add(self.items.generate(context)) context["state"]["__depth__"] = depth output = list(output)