From 30422c8fdec72515262bfca7c2c27161dcdb3de3 Mon Sep 17 00:00:00 2001 From: Ethan Haley Date: Wed, 11 May 2022 18:37:27 +0100 Subject: [PATCH 1/2] Handle implied padding in composite types Some composite component types have an offset value, which can imply padding between components. By adding the offset attribute to the Type class, we can check if padding needs to be added. --- sbe/__init__.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/sbe/__init__.py b/sbe/__init__.py index fbb865d..d2940b6 100644 --- a/sbe/__init__.py +++ b/sbe/__init__.py @@ -79,12 +79,13 @@ class DecodedMessage: @dataclass(init=False) class Type: __slots__ = ( - 'name', 'primitiveType', 'presence', 'semanticType', + 'name', 'primitiveType', 'presence', 'offset', 'semanticType', 'description', 'length', 'characterEncoding', 'nullValue') name: str primitiveType: PrimitiveType presence: Presence + offset: Optional[int] semanticType: Optional[str] description: Optional[str] length: int @@ -96,6 +97,7 @@ def __init__(self, name: str, primitiveType: PrimitiveType, nullValue: Optional[ self.name = name self.primitiveType = primitiveType self.presence = Presence.REQUIRED + self.offset = None self.length = 1 self.characterEncoding = None if nullValue is not None: @@ -605,16 +607,31 @@ def _unpack_format( return _unpack_format(schema, type_.encodingType, '', buffer, buffer_cursor) elif isinstance(type_, Composite): - return prefix + ''.join(_unpack_format(schema, t, '', buffer, buffer_cursor) for t in type_.types) + format_string = prefix + current_offset = 0 + for t in type_.types: + if t.offset and t.offset > current_offset: + format_string += "x" * (t.offset - current_offset) + if buffer_cursor: + buffer_cursor.val += t.offset - current_offset + format_string += _unpack_format(schema, t, '', buffer, buffer_cursor) + current_offset = struct.calcsize(format_string) + return format_string def _pack_format(_schema: Schema, composite: Composite): fmt = [] + current_offset = 0 for t in composite.types: + try: + padding = "x" * max(t.offset - current_offset, 0) + except TypeError: + padding = "" if t.length > 1 and t.primitiveType == PrimitiveType.CHAR: - fmt.append(str(t.length) + 's') + fmt.append(padding + str(t.length) + 's') else: - fmt.append(FORMAT[t.primitiveType]) + fmt.append(padding + FORMAT[t.primitiveType]) + current_offset += struct.calcsize(fmt[-1]) return ''.join(fmt) @@ -1003,6 +1020,8 @@ def _parse_schema(f: TextIO) -> Schema: x.semanticType = attrs['semanticType'] if 'presence' in attrs: x.presence = PRESENCE_TYPES[attrs['presence']] + if 'offset' in attrs: + x.offset = int(attrs['offset']) stack.append(x) From c9d6233861db08efe6abfdf174cbc9e6c532d5b8 Mon Sep 17 00:00:00 2001 From: Ethan Haley Date: Wed, 11 May 2022 18:38:52 +0100 Subject: [PATCH 2/2] Handle implied padding at end of block If the total length of fields in a block is less than the given block length, then padding needs to be added up to said length. --- sbe/__init__.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/sbe/__init__.py b/sbe/__init__.py index d2940b6..ef1063a 100644 --- a/sbe/__init__.py +++ b/sbe/__init__.py @@ -470,7 +470,7 @@ def encode(self, message: Message, obj: dict, header: Optional[dict] = None) -> fmts = [] vals = [] cursor = Cursor(0) - _walk_fields_encode(self, message.fields, obj, fmts, vals, cursor) + _walk_fields_encode(self, message.blockLength, message.fields, obj, fmts, vals, cursor) fmt = "<" + ''.join(fmts) header = { @@ -706,20 +706,29 @@ def _walk_fields_encode_composite( cursor.val += FORMAT_SIZES[t1] -def _walk_fields_encode(schema: Schema, fields: List[Union[Group, Field]], obj: dict, fmt: list, vals: list, cursor: Cursor): +def _add_padding(block_length, cursor): + diff = block_length - cursor.val + if diff > 0: + cursor.val += diff + return "x" * diff + return "" + + +def _walk_fields_encode(schema: Schema, block_length: int, fields: List[Union[Group, Field]], obj: dict, fmt: list, vals: list, cursor: Cursor): for f in fields: if isinstance(f, Group): + try: + fmt[-1] += _add_padding(block_length, cursor) + except IndexError: + ... xs = obj[f.name] fmt1 = [] vals1 = [] - block_length = None for x in xs: - _walk_fields_encode(schema, f.fields, x, fmt1, vals1, Cursor(0)) - if block_length is None: - block_length = struct.calcsize("<" + ''.join(fmt1)) + _walk_fields_encode(schema, f.blockLength, f.fields, x, fmt1, vals1, Cursor(0)) - dimension = {"numInGroup": len(obj[f.name]), "blockLength": block_length or f.blockLength} + dimension = {"numInGroup": len(obj[f.name]), "blockLength": f.blockLength} dimension_fmt = _pack_format(schema, f.dimensionType) fmt.extend(dimension_fmt) @@ -783,6 +792,11 @@ def _walk_fields_encode(schema: Schema, fields: List[Union[Group, Field]], obj: else: assert 0 + try: + fmt[-1] += _add_padding(block_length, cursor) + except IndexError: + ... + def _walk_fields_wrap_composite( schema: Schema, rv: Dict[str, Union[Pointer, WrappedGroup]],