Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 1 addition & 9 deletions kmir/src/kmir/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from .build import HASKELL_DEF_DIR, LLVM_DEF_DIR, LLVM_LIB_DIR
from .cargo import CargoProject
from .kmir import KMIR, DecodeMode, KMIRAPRNodePrinter
from .kmir import KMIR, KMIRAPRNodePrinter
from .linker import link
from .options import (
GenSpecOpts,
Expand Down Expand Up @@ -352,13 +352,6 @@ def _arg_parser() -> ArgumentParser:
prove_rs_parser.add_argument(
'--start-symbol', type=str, metavar='SYMBOL', default='main', help='Symbol name to begin execution from'
)
prove_rs_parser.add_argument(
'--decode-mode',
type=DecodeMode,
metavar='DECODE_MODE',
default=DecodeMode.NONE,
help='Allocation decoding mode: NONE (default), PARTIAL, or FULL',
)

link_parser = command_parser.add_parser(
'link', help='Link together 2 or more SMIR JSON files', parents=[kcli_args.logging_args]
Expand Down Expand Up @@ -435,7 +428,6 @@ def _parse_args(ns: Namespace) -> KMirOpts:
save_smir=ns.save_smir,
smir=ns.smir,
start_symbol=ns.start_symbol,
decode_mode=ns.decode_mode,
)
case 'link':
return LinkOpts(
Expand Down
4 changes: 2 additions & 2 deletions kmir/src/kmir/alloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def from_dict(dct: dict[str, Any]) -> ProvenanceMap:
ptrs=[
ProvenanceEntry(
offset=int(size),
prov=AllocId(prov),
alloc_id=AllocId(prov),
)
for size, prov in dct['ptrs']
],
Expand All @@ -88,4 +88,4 @@ def from_dict(dct: dict[str, Any]) -> ProvenanceMap:

class ProvenanceEntry(NamedTuple):
offset: int
prov: AllocId
alloc_id: AllocId
93 changes: 46 additions & 47 deletions kmir/src/kmir/decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from typing import TYPE_CHECKING

from pyk.kast.inner import KApply
from pyk.kast.prelude.bytes import bytesToken
from pyk.kast.prelude.kint import intToken
from pyk.kast.prelude.string import stringToken

from .alloc import Allocation, AllocInfo, Memory, ProvenanceEntry, ProvenanceMap
from .ty import ArrayT, Bool, EnumT, Int, IntTy, PtrT, RefT, Uint
Expand All @@ -32,28 +31,12 @@

@dataclass
class UnableToDecodeValue(Value):
data: bytes
type_info: TypeMetadata
msg: str

def to_kast(self) -> KInner:
return KApply(
'Evaluation::UnableToDecodeValue',
bytesToken(self.data),
KApply('TypeInfo::VoidType'), # TODO: TypeInfo -> KAST transformation
)


@dataclass
class UnableToDecodeAlloc(Value):
data: bytes
ty: Ty

def to_kast(self) -> KInner:
return KApply(
'Evaluation::UnableToDecodeAlloc',
bytesToken(self.data),
KApply('ty', intToken(self.ty)),
KApply('ProvenanceMapEntries::empty'), # TODO
'Evaluation::UnableToDecodePy',
stringToken(self.msg),
)


Expand All @@ -71,34 +54,50 @@ def decode_alloc_or_unable(alloc_info: AllocInfo, types: Mapping[Ty, TypeMetadat
),
):
data = bytes(n or 0 for n in bytez)
type_info = types[ty]

match ptrs:
case []:
return decode_value_or_unable(data=data, type_info=type_info, types=types)

case [ProvenanceEntry(0, alloc_id)]:
if (pointee_ty := _pointee_ty(type_info)) is not None: # ensures this is a reference type
pointee_type_info = types[pointee_ty]
metadata = _metadata(pointee_type_info)

if len(data) == 8:
# single slim pointer (assumes usize == u64)
return AllocRefValue(alloc_id=alloc_id, metadata=metadata)

if len(data) == 16 and metadata == DynamicSize(1):
# sufficient data to decode dynamic size (assumes usize == u64)
# expect fat pointer
return AllocRefValue(
alloc_id=alloc_id,
metadata=DynamicSize(int.from_bytes(data[8:16], byteorder='little', signed=False)),
)

return UnableToDecodeAlloc(data=data, ty=ty)
return _decode_memory_alloc_or_unable(data=data, ptrs=ptrs, ty=ty, types=types)
case _:
raise AssertionError('Unhandled case')


def _decode_memory_alloc_or_unable(
data: bytes,
ptrs: list[ProvenanceEntry],
ty: Ty,
types: Mapping[Ty, TypeMetadata],
) -> Value:
try:
type_info = types[ty]
except KeyError:
return UnableToDecodeValue(f'Unknown type: {ty}')

match ptrs:
case []:
return decode_value_or_unable(data=data, type_info=type_info, types=types)

case [ProvenanceEntry(0, alloc_id)]:
if (pointee_ty := _pointee_ty(type_info)) is not None: # ensures this is a reference type
try:
pointee_type_info = types[pointee_ty]
except KeyError:
return UnableToDecodeValue(f'Unknown pointee type: {pointee_ty}')

metadata = _metadata(pointee_type_info)

if len(data) == 8:
# single slim pointer (assumes usize == u64)
return AllocRefValue(alloc_id=alloc_id, metadata=metadata)

if len(data) == 16 and metadata == DynamicSize(1):
# sufficient data to decode dynamic size (assumes usize == u64)
# expect fat pointer
return AllocRefValue(
alloc_id=alloc_id,
metadata=DynamicSize(int.from_bytes(data[8:16], byteorder='little', signed=False)),
)

return UnableToDecodeValue(f'Unable to decode alloc: {data!r}, of type: {type_info}')


def _pointee_ty(type_info: TypeMetadata) -> Ty | None:
match type_info:
case PtrT(ty) | RefT(ty):
Expand All @@ -120,8 +119,8 @@ def _metadata(type_info: TypeMetadata) -> Metadata:
def decode_value_or_unable(data: bytes, type_info: TypeMetadata, types: Mapping[Ty, TypeMetadata]) -> Value:
try:
return decode_value(data=data, type_info=type_info, types=types)
except ValueError:
return UnableToDecodeValue(data=data, type_info=type_info)
except ValueError as err:
return UnableToDecodeValue(f'Unable to decode value: {data!r}, of type: {type_info}: {err}')


def decode_value(data: bytes, type_info: TypeMetadata, types: Mapping[Ty, TypeMetadata]) -> Value:
Expand Down
104 changes: 4 additions & 100 deletions kmir/src/kmir/kdist/mir-semantics/rt/decoding.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,12 @@ from raw bytes to typed `Value` objects according to Rust's memory layout rules.
requires "../ty.md"
requires "value.md"
requires "numbers.md"
requires "../alloc.md"
module RT-DECODING
imports BOOL
imports MAP
imports TYPES
imports ALLOC
imports RT-VALUE-SYNTAX
imports RT-NUMBERS
imports RT-TYPES
Expand All @@ -37,9 +35,9 @@ and arrays (where layout is trivial).
### Decoding `PrimitiveType`s

```k
syntax Evaluation ::= #decodeValue ( Bytes , TypeInfo , Map ) [function, total, symbol(decodeValue)]
| UnableToDecodeValue( Bytes , TypeInfo ) [symbol(Evaluation::UnableToDecodeValue)]
| UnableToDecodeAlloc( Bytes , Ty , ProvenanceMapEntries ) [symbol(Evaluation::UnableToDecodeAlloc)]
syntax Evaluation ::= #decodeValue ( Bytes , TypeInfo , Map ) [function, total, symbol(decodeValue)]
| UnableToDecode ( Bytes , TypeInfo ) [symbol(Evaluation::UnableToDecode)]
| UnableToDecodePy ( msg: String ) [symbol(Evaluation::UnableToDecodePy)]
// Boolean: should be one byte with value one or zero
rule #decodeValue(BYTES, typeInfoPrimitiveType(primTypeBool), _TYPEMAP) => BoolVal(false)
Expand Down Expand Up @@ -83,7 +81,7 @@ rule #decodeValue(BYTES, typeInfoArrayType(ELEMTY, noTyConst), TYPEMAP)
All unimplemented cases will become thunks by way of this default rule:

```k
rule #decodeValue(BYTES, TYPEINFO, _TYPEMAP) => UnableToDecodeValue(BYTES, TYPEINFO) [owise]
rule #decodeValue(BYTES, TYPEINFO, _TYPEMAP) => UnableToDecode(BYTES, TYPEINFO) [owise]
```

## Helper function to determine the expected byte length for a type
Expand Down Expand Up @@ -283,100 +281,6 @@ by the element size, then uses the same element-by-element decoding approach as
[preserves-definedness]
```

# Decoding the allocated constants into a memory map

When the program starts, all allocated constants are stored in the `<memory>` cell in the configuration,
as a mapping of `AllocId -> Value`, decoding the allocations to values using the above functions.

Besides actual byte allocations, we often find allocations with provenance,
as slim or fat pointers pointing to other static allocations.
One example of this is allocated error strings, whose length is stored in a fat pointer that is itself an allocated constant.


The basic decoding function is very similar to `#decodeConstant` but returns its result as a singleton `Map` instead of a mere value.

```k
syntax Map ::= #decodeAlloc ( AllocId , Ty , Allocation , Map ) [function] // returns AllocId |-> Value
// ----------------------------------------------------------
rule #decodeAlloc(ID, TY, allocation(BYTES, provenanceMap(.ProvenanceMapEntries), _ALIGN, _MUT), TYPEMAP)
=> ID |-> #decodeValue(BYTES, {TYPEMAP[TY]}:>TypeInfo, TYPEMAP)
requires TY in_keys(TYPEMAP)
andBool isTypeInfo(TYPEMAP[TY])
```

If there are entries in the provenance map, then the decoder must create references to other allocations.
Each entry replaces a pointer starting at a certain offset in the bytes.
The pointee of the respective pointer is determined by type _layout_ of the type to decode.

Simple cases of offset 0 and reference types can be implemented here without consideration of layout.
The reference metadata is either determined statically by type, or filled in from the bytes for fat pointers.

```k
// if length(BYTES) ==Int 16 decode BYTES[9..16] as dynamic size.
rule #decodeAlloc(
ID,
TY,
allocation(BYTES, provenanceMap(provenanceMapEntry(0, REF_ID) ), _ALIGN, _MUT),
TYPEMAP
)
=> ID |-> AllocRef(REF_ID, .ProjectionElems, dynamicSize(Bytes2Int(substrBytes(BYTES, 8, 16), LE, Unsigned)))
requires TY in_keys(TYPEMAP)
andBool isTypeInfo(TYPEMAP[TY])
andBool isTy(pointeeTy({TYPEMAP[TY]}:>TypeInfo)) // ensures this is a reference type
andBool lengthBytes(BYTES) ==Int 16 // sufficient data to decode dynamic size (assumes usize == u64)
andBool dynamicSize(1) ==K #metadata(pointeeTy({TYPEMAP[TY]}:>TypeInfo), TYPEMAP) // expect fat pointer
[preserves-definedness] // valid type lookup ensured
// Otherwise query type information for static size and insert it.
rule #decodeAlloc(
ID,
TY,
allocation(BYTES, provenanceMap(provenanceMapEntry(0, REF_ID) ), _ALIGN, _MUT),
TYPEMAP
)
=> ID |-> AllocRef(REF_ID, .ProjectionElems, #metadata(pointeeTy({TYPEMAP[TY]}:>TypeInfo), TYPEMAP))
requires TY in_keys(TYPEMAP)
andBool isTypeInfo(TYPEMAP[TY])
andBool isTy(pointeeTy({TYPEMAP[TY]}:>TypeInfo)) // ensures this is a reference type
andBool lengthBytes(BYTES) ==Int 8 // single slim pointer (assumes usize == u64)
[priority(60), preserves-definedness] // valid type lookup ensured
```

Allocations with more than one provenance map entry or witn non-reference types remain undecoded.

```k
rule #decodeAlloc(ID, TY, allocation(BYTES, provenanceMap(ENTRIES), _ALIGN, _MUT), _TYPEMAP)
=> ID |-> UnableToDecodeAlloc(BYTES, TY, ENTRIES)
[owise]
```

The entire set of `GlobalAllocs` is decoded by iterating over the list.
It is assumed that the given `Ty -> TypeInfo` map contains all types required.

```k
syntax Map ::= #decodeAllocs ( GlobalAllocs , Map ) [function, total, symbol("decodeAllocs") ] // AllocId |-> Thing
| #decodeAllocsAux ( Map , GlobalAllocs , Map ) [function, total, symbol("decodeAllocsAux")] // accumulating version
// -----------------------------------------------------------------------------------------------
rule #decodeAllocs(ALLOCS, TYPEMAP) => #decodeAllocsAux(.Map, ALLOCS, TYPEMAP)
rule #decodeAllocsAux(ACCUM, .GlobalAllocs, _TYPEMAP) => ACCUM
rule #decodeAllocsAux(ACCUM, globalAllocEntry(ID, TY, Memory(ALLOC)) ALLOCS, TYPEMAP)
=> #decodeAllocsAux(ACCUM #decodeAlloc(ID, TY, ALLOC, TYPEMAP), ALLOCS, TYPEMAP)
requires TY in_keys(TYPEMAP)
[preserves-definedness]
```

If the type of an alloc cannot be found, or for non-`Memory` allocs, the raw data is stored in a super-sort of `Value`.
This ensures that the function is total (anyway lookups require constraining the sort).

```k
syntax AllocData ::= Value | AllocData ( Ty , GlobalAllocKind )
rule #decodeAllocsAux(ACCUM, globalAllocEntry(ID, TY, OTHER) ALLOCS, TYPEMAP)
=> #decodeAllocsAux(ACCUM ID |-> AllocData(TY, OTHER), ALLOCS, TYPEMAP)
[owise]
```
Comment on lines -356 to -378
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The rule here at the bottom previously took care of the case where the type ID is not found in the type map, producing AllocData nodes in the expected output:
https://github.com/runtimeverification/mir-semantics/blob/master/kmir/src/tests/integration/data/exec-smir/allocs/enum-two-refs-fail.state#L87-L90


```k
endmodule
```
Loading