From 30422c8fdec72515262bfca7c2c27161dcdb3de3 Mon Sep 17 00:00:00 2001
From: Ethan Haley <ethan@boerboeltrading.com>
Date: Wed, 11 May 2022 18:37:27 +0100
Subject: [PATCH 1/2] Handle implied padding in composite types

Some composite component types have an offset value, which can imply
padding between components. By adding the offset attribute to the Type
class, we can check if padding needs to be added.
---
 sbe/__init__.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/sbe/__init__.py b/sbe/__init__.py
index fbb865d..d2940b6 100644
--- a/sbe/__init__.py
+++ b/sbe/__init__.py
@@ -79,12 +79,13 @@ class DecodedMessage:
 @dataclass(init=False)
 class Type:
     __slots__ = (
-        'name', 'primitiveType', 'presence', 'semanticType',
+        'name', 'primitiveType', 'presence', 'offset', 'semanticType',
         'description', 'length', 'characterEncoding', 'nullValue')
 
     name: str
     primitiveType: PrimitiveType
     presence: Presence
+    offset: Optional[int]
     semanticType: Optional[str]
     description: Optional[str]
     length: int
@@ -96,6 +97,7 @@ def __init__(self, name: str, primitiveType: PrimitiveType, nullValue: Optional[
         self.name = name
         self.primitiveType = primitiveType
         self.presence = Presence.REQUIRED
+        self.offset = None
         self.length = 1
         self.characterEncoding = None
         if nullValue is not None:
@@ -605,16 +607,31 @@ def _unpack_format(
         return _unpack_format(schema, type_.encodingType, '', buffer, buffer_cursor)
 
     elif isinstance(type_, Composite):
-        return prefix + ''.join(_unpack_format(schema, t, '', buffer, buffer_cursor) for t in type_.types)
+        format_string = prefix
+        current_offset = 0
+        for t in type_.types:
+            if t.offset and t.offset > current_offset:
+                format_string += "x" * (t.offset - current_offset)
+                if buffer_cursor:
+                    buffer_cursor.val += t.offset - current_offset
+            format_string += _unpack_format(schema, t, '', buffer, buffer_cursor)
+            current_offset = struct.calcsize(format_string)
+        return format_string
 
 
 def _pack_format(_schema: Schema, composite: Composite):
     fmt = []
+    current_offset = 0
     for t in composite.types:
+        try:
+            padding = "x" * max(t.offset - current_offset, 0)
+        except TypeError:
+            padding = ""
         if t.length > 1 and t.primitiveType == PrimitiveType.CHAR:
-            fmt.append(str(t.length) + 's')
+            fmt.append(padding + str(t.length) + 's')
         else:
-            fmt.append(FORMAT[t.primitiveType])
+            fmt.append(padding + FORMAT[t.primitiveType])
+        current_offset += struct.calcsize(fmt[-1])
 
     return ''.join(fmt)
 
@@ -1003,6 +1020,8 @@ def _parse_schema(f: TextIO) -> Schema:
                     x.semanticType = attrs['semanticType']
                 if 'presence' in attrs:
                     x.presence = PRESENCE_TYPES[attrs['presence']]
+                if 'offset' in attrs:
+                    x.offset = int(attrs['offset'])
 
                 stack.append(x)
 

From c9d6233861db08efe6abfdf174cbc9e6c532d5b8 Mon Sep 17 00:00:00 2001
From: Ethan Haley <ethan@boerboeltrading.com>
Date: Wed, 11 May 2022 18:38:52 +0100
Subject: [PATCH 2/2] Handle implied padding at end of block

If the total length of fields in a block is less than the given block
length, then padding needs to be added up to said length.
---
 sbe/__init__.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/sbe/__init__.py b/sbe/__init__.py
index d2940b6..ef1063a 100644
--- a/sbe/__init__.py
+++ b/sbe/__init__.py
@@ -470,7 +470,7 @@ def encode(self, message: Message, obj: dict, header: Optional[dict] = None) ->
         fmts = []
         vals = []
         cursor = Cursor(0)
-        _walk_fields_encode(self, message.fields, obj, fmts, vals, cursor)
+        _walk_fields_encode(self, message.blockLength, message.fields, obj, fmts, vals, cursor)
         fmt = "<" + ''.join(fmts)
 
         header = {
@@ -706,20 +706,29 @@ def _walk_fields_encode_composite(
                 cursor.val += FORMAT_SIZES[t1]
 
 
-def _walk_fields_encode(schema: Schema, fields: List[Union[Group, Field]], obj: dict, fmt: list, vals: list, cursor: Cursor):
+def _add_padding(block_length, cursor):
+    diff = block_length - cursor.val
+    if diff > 0:
+        cursor.val += diff
+        return "x" * diff
+    return ""
+
+
+def _walk_fields_encode(schema: Schema, block_length: int, fields: List[Union[Group, Field]], obj: dict, fmt: list, vals: list, cursor: Cursor):
     for f in fields:
         if isinstance(f, Group):
+            try:
+                fmt[-1] += _add_padding(block_length, cursor)
+            except IndexError:
+                ...
             xs = obj[f.name]
 
             fmt1 = []
             vals1 = []
-            block_length = None
             for x in xs:
-                _walk_fields_encode(schema, f.fields, x, fmt1, vals1, Cursor(0))
-                if block_length is None:
-                    block_length = struct.calcsize("<" + ''.join(fmt1))
+                _walk_fields_encode(schema, f.blockLength, f.fields, x, fmt1, vals1, Cursor(0))
 
-            dimension = {"numInGroup": len(obj[f.name]), "blockLength": block_length or f.blockLength}
+            dimension = {"numInGroup": len(obj[f.name]), "blockLength": f.blockLength}
             dimension_fmt = _pack_format(schema, f.dimensionType)
 
             fmt.extend(dimension_fmt)
@@ -783,6 +792,11 @@ def _walk_fields_encode(schema: Schema, fields: List[Union[Group, Field]], obj:
         else:
             assert 0
 
+    try:
+        fmt[-1] += _add_padding(block_length, cursor)
+    except IndexError:
+        ...
+
 
 def _walk_fields_wrap_composite(
     schema: Schema, rv: Dict[str, Union[Pointer, WrappedGroup]],