Skip to content

Commit 5c79087

Browse files
author
Gilad Chase
committed
feat(byte_array): add ByteSpan iterator
1 parent 6dde184 commit 5c79087

File tree

2 files changed

+315
-12
lines changed

2 files changed

+315
-12
lines changed

corelib/src/byte_array.cairo

Lines changed: 173 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,11 @@
4242
//! assert!(first_byte == 0x41);
4343
//! ```
4444

45-
use crate::array::{ArrayTrait, Span, SpanTrait};
46-
use crate::bytes_31::split_bytes31;
45+
use crate::array::{ArrayTrait, Span, SpanIter, SpanTrait};
4746
#[allow(unused_imports)]
4847
use crate::bytes_31::{
4948
BYTES_IN_BYTES31, Bytes31Trait, POW_2_128, POW_2_8, U128IntoBytes31, U8IntoBytes31,
50-
one_shift_left_bytes_u128, split_u128, u8_at_u256,
49+
one_shift_left_bytes_u128, split_bytes31, split_u128, u8_at_u256,
5150
};
5251
use crate::clone::Clone;
5352
use crate::cmp::min;
@@ -706,7 +705,7 @@ pub(crate) impl ByteArrayIndexView of crate::traits::IndexView<ByteArray, usize,
706705
}
707706
}
708707

709-
// TODO: Implement a more efficient version of this iterator.
708+
// TODO(giladchase): Delegate to byte span iterator instead of current at-based implementation.
710709
/// An iterator struct over a ByteArray.
711710
#[derive(Drop, Clone)]
712711
pub struct ByteArrayIter {
@@ -716,6 +715,7 @@ pub struct ByteArrayIter {
716715

717716
impl ByteArrayIterator of crate::iter::Iterator<ByteArrayIter> {
718717
type Item = u8;
718+
719719
fn next(ref self: ByteArrayIter) -> Option<u8> {
720720
self.ba.at(self.current_index.next()?)
721721
}
@@ -973,6 +973,86 @@ impl ByteSpanToByteSpan of ToByteSpanTrait<ByteSpan> {
973973
}
974974
}
975975

976+
/// An iterator struct over a ByteSpan.
977+
#[derive(Drop, Clone)]
978+
pub struct ByteSpanIter {
979+
/// Iterator over the full words.
980+
data_iter: SpanIter<bytes31>,
981+
/// The word currently being iterated over.
982+
current_word: ShortString,
983+
/// The last, partial word of the ByteSpan, iterated over after all full words are consumed.
984+
remainder: ShortString,
985+
}
986+
987+
impl ByteSpanIterator of crate::iter::Iterator<ByteSpanIter> {
988+
type Item = u8;
989+
990+
fn next(ref self: ByteSpanIter) -> Option<u8> {
991+
if let Some(byte) = self.current_word.pop_first() {
992+
return Some(byte);
993+
}
994+
995+
// Current word exhausted, try loading the next into current word from data or remainder.
996+
match self.data_iter.next() {
997+
Some(word) => {
998+
self.current_word = ShortString { word: (*word).into(), word_len: 31 };
999+
},
1000+
// No more words in data, try loading the remainder.
1001+
None => {
1002+
if self.remainder.word_len == 0 { // Remainder is empty.
1003+
return None;
1004+
}
1005+
1006+
self.current_word = self.remainder;
1007+
self.remainder.word_len = 0; // Mark remainder as consumed.
1008+
},
1009+
}
1010+
1011+
self.current_word.pop_first()
1012+
}
1013+
}
1014+
1015+
impl ByteSpanIntoIterator of crate::iter::IntoIterator<ByteSpan> {
1016+
type IntoIter = ByteSpanIter;
1017+
1018+
/// Creates an iterator over the bytes in the `ByteSpan`.
1019+
fn into_iter(self: ByteSpan) -> Self::IntoIter {
1020+
let mut data_iter = self.data.into_iter();
1021+
1022+
// Get first word in data array if exists, otherwise iterate on the remainder word.
1023+
let Some(first_word) = data_iter.next() else {
1024+
if self.remainder_len == 0 {
1025+
return ByteSpanIter {
1026+
data_iter, current_word: Default::default(), remainder: Default::default(),
1027+
};
1028+
}
1029+
1030+
// If remainder length is nonzero then it's strictly larger than the start offset.
1031+
let word_len = helpers::length_sub_offset(
1032+
upcast(self.remainder_len), self.first_char_start_offset,
1033+
);
1034+
1035+
return ByteSpanIter {
1036+
data_iter,
1037+
current_word: ShortString { word: self.remainder_word, word_len: word_len },
1038+
remainder: Default::default(),
1039+
};
1040+
};
1041+
1042+
let word_len = helpers::length_sub_offset(
1043+
upcast(BYTES_IN_BYTES31), self.first_char_start_offset,
1044+
);
1045+
ByteSpanIter {
1046+
data_iter,
1047+
current_word: ShortString { word: (*first_word).into(), word_len: word_len },
1048+
remainder: ShortString {
1049+
word: self.remainder_word, word_len: upcast(self.remainder_len),
1050+
},
1051+
}
1052+
}
1053+
}
1054+
1055+
9761056
/// Shifts a word right by `n_bytes`.
9771057
/// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve
9781058
/// performance.
@@ -987,6 +1067,36 @@ fn shift_right(word: felt252, word_len: usize, n_bytes: usize) -> felt252 {
9871067
let (_shifted_out, after_shift_right) = split_bytes31(word, word_len, n_bytes);
9881068
after_shift_right
9891069
}
1070+
/// Representation of a `felt252` holding a string up to size 31, including length.
1071+
#[derive(Drop, Copy)]
1072+
struct ShortString {
1073+
/// The actual data.
1074+
word: felt252,
1075+
/// The actual length of the short string in bytes.
1076+
word_len: BoundedInt<0, 31>,
1077+
}
1078+
1079+
#[generate_trait]
1080+
impl ShortStringImpl of ShortStringTrait {
1081+
/// Removes and returns the first byte from the string if it exists.
1082+
fn pop_first(ref self: ShortString) -> Option<u8> {
1083+
let Some(byte_position) = helpers::short_string_byte_count_dec(self.word_len) else {
1084+
return None;
1085+
};
1086+
1087+
// Strings are indexed by lsb, so the first char is at position (byte_count - 1).
1088+
let byte = u8_at_u256(self.word.into(), upcast(byte_position));
1089+
1090+
self.word_len = byte_position;
1091+
Some(byte)
1092+
}
1093+
}
1094+
1095+
impl ShortStringDefault of Default<ShortString> {
1096+
fn default() -> ShortString {
1097+
ShortString { word: 0, word_len: 0 }
1098+
}
1099+
}
9901100

9911101
mod helpers {
9921102
use core::num::traits::Bounded;
@@ -1049,6 +1159,11 @@ mod helpers {
10491159
type Result = BoundedInt<-1, { BYTES_IN_BYTES31_MINUS_ONE.into() - 1 }>;
10501160
}
10511161

1162+
// For decrementing ShortString count (BoundedInt<0, 31>) by 1
1163+
impl ShortStringCountSub1 of SubHelper<BoundedInt<0, 31>, UnitInt<1>> {
1164+
type Result = BoundedInt<-1, { BYTES_IN_BYTES31.into() - 1 }>;
1165+
}
1166+
10521167
// For byte_at: (BoundedInt<0,30> - 1) - BoundedInt<0,30>
10531168
impl Bytes31IndexMinus1SubBytes31Index of SubHelper<Bytes31IndexSub1::Result, Bytes31Index> {
10541169
type Result =
@@ -1110,6 +1225,57 @@ mod helpers {
11101225
}
11111226
}
11121227

1228+
pub impl TrimMinShortStringCount of bounded_int::TrimMinHelper<BoundedInt<0, 31>> {
1229+
type Target = BoundedInt<1, 31>;
1230+
}
1231+
impl SubShortStringCount of SubHelper<BoundedInt<1, 31>, UnitInt<1>> {
1232+
type Result = BoundedInt<0, 30>;
1233+
}
1234+
1235+
/// Decrements the ShortString byte count by one, or returns `None` if the count is zero.
1236+
pub fn short_string_byte_count_dec(count: BoundedInt<0, 31>) -> Option<BoundedInt<0, 31>> {
1237+
if let crate::internal::OptionRev::Some(trimmed) = bounded_int::trim_min(count) {
1238+
Some(upcast(bounded_int::sub(trimmed, 1)))
1239+
} else {
1240+
None
1241+
}
1242+
}
1243+
1244+
impl Bytes31IndexSubBytes31Index of SubHelper<Bytes31Index, Bytes31Index> {
1245+
type Result = BoundedInt<{ -30 }, 30>;
1246+
}
1247+
1248+
impl ConstrainBytes31IndexSubPositive of ConstrainHelper<
1249+
Bytes31IndexSubBytes31Index::Result, 0,
1250+
> {
1251+
type LowT = BoundedInt<{ -30 }, { -1 }>;
1252+
type HighT = BoundedInt<0, 30>;
1253+
}
1254+
1255+
impl B31SubOffset of SubHelper<BoundedInt<0, 31>, Bytes31Index> {
1256+
type Result = BoundedInt<-30, 31>;
1257+
}
1258+
1259+
impl ConstrainB31SubOffsetPos of ConstrainHelper<B31SubOffset::Result, 0> {
1260+
type LowT = BoundedInt<-30, -1>;
1261+
type HighT = BoundedInt<0, 31>;
1262+
}
1263+
1264+
/// Subtracts `offset` from `length`, assumes `offset < length`.
1265+
pub fn length_sub_offset(length: BoundedInt<0, 31>, offset: Bytes31Index) -> BoundedInt<0, 31> {
1266+
let diff = bounded_int::sub(length, offset);
1267+
bounded_int::constrain::<_, 0>(diff).unwrap_err()
1268+
}
1269+
1270+
/// Decrements the index by one, or returns `None` if the index is zero.
1271+
pub fn byte31_index_dec(index: Bytes31Index) -> Option<Bytes31Index> {
1272+
if let crate::internal::OptionRev::Some(trimmed) = bounded_int::trim_min(index) {
1273+
Some(upcast(bounded_int::sub(trimmed, 1)))
1274+
} else {
1275+
None
1276+
}
1277+
}
1278+
11131279
/// The information about the new pending word length and the split index.
11141280
pub enum AppendWordInfo {
11151281
/// The new pending word length is less than 31, and fits in the current pending word.
@@ -1129,6 +1295,7 @@ mod helpers {
11291295
type Result = Bytes31Index;
11301296
}
11311297

1298+
11321299
/// Returns the information about the new pending word length and the split index.
11331300
pub fn append_word_info(
11341301
pending_bytes: Bytes31Index, new_word_bytes: BoundedInt<1, 31>,
@@ -1146,11 +1313,9 @@ mod helpers {
11461313
pub impl TrimMinBytes31Index of bounded_int::TrimMinHelper<Bytes31Index> {
11471314
type Target = BoundedInt<1, 30>;
11481315
}
1149-
1150-
impl LengthToBytes31Index of SubHelper<BoundedInt<1, 31>, UnitInt<1>> {
1151-
type Result = Bytes31Index;
1316+
impl SubBytes31Index of SubHelper<BoundedInt<1, 30>, UnitInt<1>> {
1317+
type Result = BoundedInt<0, 29>;
11521318
}
1153-
11541319
/// Takes the length of an input word and returns the length minus one.
11551320
pub fn length_minus_one(len: BoundedInt<1, 31>) -> Bytes31Index {
11561321
bounded_int::sub(len, 1)

0 commit comments

Comments
 (0)