Skip to content

Commit b74fbe0

Browse files
author
Gilad Chase
committed
feat(byte_array): add ByteSpan iterator
1 parent 6dde184 commit b74fbe0

File tree

2 files changed

+313
-8
lines changed

2 files changed

+313
-8
lines changed

corelib/src/byte_array.cairo

Lines changed: 171 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,11 @@
4242
//! assert!(first_byte == 0x41);
4343
//! ```
4444

45-
use crate::array::{ArrayTrait, Span, SpanTrait};
46-
use crate::bytes_31::split_bytes31;
45+
use crate::array::{ArrayTrait, Span, SpanIter, SpanTrait};
4746
#[allow(unused_imports)]
4847
use crate::bytes_31::{
4948
BYTES_IN_BYTES31, Bytes31Trait, POW_2_128, POW_2_8, U128IntoBytes31, U8IntoBytes31,
50-
one_shift_left_bytes_u128, split_u128, u8_at_u256,
49+
one_shift_left_bytes_u128, split_bytes31, split_u128, u8_at_u256,
5150
};
5251
use crate::clone::Clone;
5352
use crate::cmp::min;
@@ -706,7 +705,7 @@ pub(crate) impl ByteArrayIndexView of crate::traits::IndexView<ByteArray, usize,
706705
}
707706
}
708707

709-
// TODO: Implement a more efficient version of this iterator.
708+
// TODO(giladchase): Delegate to byte span iterator instead of current at-based implementation.
710709
/// An iterator struct over a ByteArray.
711710
#[derive(Drop, Clone)]
712711
pub struct ByteArrayIter {
@@ -716,6 +715,7 @@ pub struct ByteArrayIter {
716715

717716
impl ByteArrayIterator of crate::iter::Iterator<ByteArrayIter> {
718717
type Item = u8;
718+
719719
fn next(ref self: ByteArrayIter) -> Option<u8> {
720720
self.ba.at(self.current_index.next()?)
721721
}
@@ -973,6 +973,87 @@ impl ByteSpanToByteSpan of ToByteSpanTrait<ByteSpan> {
973973
}
974974
}
975975

976+
/// An iterator struct over a ByteSpan.
977+
#[derive(Drop, Clone)]
978+
pub struct ByteSpanIter {
979+
/// Iterator over the full words.
980+
data_iter: SpanIter<bytes31>,
981+
/// The word currently being iterated over.
982+
current_word: ShortString,
983+
/// The last, partial word of the ByteSpan, iterated over after all full words are consumed.
984+
remainder: ShortString,
985+
}
986+
987+
impl ByteSpanIterator of crate::iter::Iterator<ByteSpanIter> {
988+
type Item = u8;
989+
990+
fn next(ref self: ByteSpanIter) -> Option<u8> {
991+
if let Some(byte) = self.current_word.pop_first() {
992+
return Some(byte);
993+
}
994+
995+
// Current word exhausted, try loading the next into current word from data or remainder.
996+
match self.data_iter.next() {
997+
Some(word) => {
998+
let word_len = upcast(BYTES_IN_BYTES31);
999+
self.current_word = ShortString { word: (*word).into(), word_len };
1000+
},
1001+
// No more words in data, try loading the remainder.
1002+
None => {
1003+
if self.remainder.word_len == 0 { // Remainder is empty.
1004+
return None;
1005+
}
1006+
1007+
self.current_word = self.remainder;
1008+
self.remainder.word_len = 0; // Mark remainder as consumed.
1009+
},
1010+
}
1011+
1012+
self.current_word.pop_first()
1013+
}
1014+
}
1015+
1016+
impl ByteSpanIntoIterator of crate::iter::IntoIterator<ByteSpan> {
1017+
type IntoIter = ByteSpanIter;
1018+
1019+
/// Creates an iterator over the bytes in the `ByteSpan`.
1020+
fn into_iter(self: ByteSpan) -> Self::IntoIter {
1021+
let mut data_iter = self.data.into_iter();
1022+
1023+
// Get first word in data array if exists, otherwise iterate on the remainder word.
1024+
let Some(first_word) = data_iter.next() else {
1025+
if self.remainder_len == 0 {
1026+
return ByteSpanIter {
1027+
data_iter, current_word: Default::default(), remainder: Default::default(),
1028+
};
1029+
}
1030+
1031+
// If remainder length is nonzero then it's strictly larger than the start offset.
1032+
let word_len = helpers::length_sub_offset(
1033+
upcast(self.remainder_len), self.first_char_start_offset,
1034+
);
1035+
1036+
return ByteSpanIter {
1037+
data_iter,
1038+
current_word: ShortString { word: self.remainder_word.into(), word_len },
1039+
remainder: Default::default(),
1040+
};
1041+
};
1042+
1043+
let word_len = helpers::length_sub_offset(
1044+
upcast(BYTES_IN_BYTES31), self.first_char_start_offset,
1045+
);
1046+
ByteSpanIter {
1047+
data_iter,
1048+
current_word: ShortString { word: (*first_word).into(), word_len },
1049+
remainder: ShortString {
1050+
word: self.remainder_word.into(), word_len: upcast(self.remainder_len),
1051+
},
1052+
}
1053+
}
1054+
}
1055+
1056+
9761057
/// Shifts a word right by `n_bytes`.
9771058
/// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve
9781059
/// performance.
@@ -987,6 +1068,36 @@ fn shift_right(word: felt252, word_len: usize, n_bytes: usize) -> felt252 {
9871068
let (_shifted_out, after_shift_right) = split_bytes31(word, word_len, n_bytes);
9881069
after_shift_right
9891070
}
1071+
/// Representation of a `felt252` holding a string up to size 31, including length.
1072+
#[derive(Drop, Copy)]
1073+
struct ShortString {
1074+
/// The actual data.
1075+
word: u256,
1076+
/// The actual length of the short string in bytes.
1077+
word_len: BoundedInt<0, 31>,
1078+
}
1079+
1080+
#[generate_trait]
1081+
impl ShortStringImpl of ShortStringTrait {
1082+
/// Removes and returns the first byte from the string if it exists.
1083+
fn pop_first(ref self: ShortString) -> Option<u8> {
1084+
let Some(byte_position) = helpers::short_string_byte_count_dec(self.word_len) else {
1085+
return None;
1086+
};
1087+
1088+
// Strings are indexed by lsb, so the first char is at position (byte_count - 1).
1089+
let byte = u8_at_u256(self.word, upcast(byte_position));
1090+
1091+
self.word_len = byte_position;
1092+
Some(byte)
1093+
}
1094+
}
1095+
1096+
impl ShortStringDefault of Default<ShortString> {
1097+
fn default() -> ShortString {
1098+
ShortString { word: 0, word_len: 0 }
1099+
}
1100+
}
9901101

9911102
mod helpers {
9921103
use core::num::traits::Bounded;
@@ -1049,6 +1160,11 @@ mod helpers {
10491160
type Result = BoundedInt<-1, { BYTES_IN_BYTES31_MINUS_ONE.into() - 1 }>;
10501161
}
10511162

1163+
// For decrementing ShortString count (BoundedInt<0, 31>) by 1
1164+
impl ShortStringCountSub1 of SubHelper<BoundedInt<0, 31>, UnitInt<1>> {
1165+
type Result = BoundedInt<-1, { BYTES_IN_BYTES31.into() - 1 }>;
1166+
}
1167+
10521168
// For byte_at: (BoundedInt<0,30> - 1) - BoundedInt<0,30>
10531169
impl Bytes31IndexMinus1SubBytes31Index of SubHelper<Bytes31IndexSub1::Result, Bytes31Index> {
10541170
type Result =
@@ -1110,6 +1226,53 @@ mod helpers {
11101226
}
11111227
}
11121228

1229+
pub impl TrimMinShortStringCount of bounded_int::TrimMinHelper<BoundedInt<0, 31>> {
1230+
type Target = BoundedInt<1, 31>;
1231+
}
1232+
/// Decrements the ShortString byte count by one, or returns `None` if the count is zero.
1233+
pub fn short_string_byte_count_dec(count: BoundedInt<0, 31>) -> Option<BoundedInt<0, 31>> {
1234+
if let crate::internal::OptionRev::Some(trimmed) = bounded_int::trim_min(count) {
1235+
Some(upcast(length_minus_one(trimmed)))
1236+
} else {
1237+
None
1238+
}
1239+
}
1240+
1241+
impl Bytes31IndexSubBytes31Index of SubHelper<Bytes31Index, Bytes31Index> {
1242+
type Result = BoundedInt<{ -30 }, 30>;
1243+
}
1244+
1245+
impl ConstrainBytes31IndexSubPositive of ConstrainHelper<
1246+
Bytes31IndexSubBytes31Index::Result, 0,
1247+
> {
1248+
type LowT = BoundedInt<{ -30 }, { -1 }>;
1249+
type HighT = BoundedInt<0, 30>;
1250+
}
1251+
1252+
impl B31SubOffset of SubHelper<BoundedInt<0, 31>, Bytes31Index> {
1253+
type Result = BoundedInt<-30, 31>;
1254+
}
1255+
1256+
impl ConstrainB31SubOffsetPos of ConstrainHelper<B31SubOffset::Result, 0> {
1257+
type LowT = BoundedInt<-30, -1>;
1258+
type HighT = BoundedInt<0, 31>;
1259+
}
1260+
1261+
/// Subtracts `offset` from `length`, assumes `offset < length`.
1262+
pub fn length_sub_offset(length: BoundedInt<0, 31>, offset: Bytes31Index) -> BoundedInt<0, 31> {
1263+
let diff = bounded_int::sub(length, offset);
1264+
bounded_int::constrain::<_, 0>(diff).unwrap_err()
1265+
}
1266+
1267+
/// Decrements the index by one, or returns `None` if the index is zero.
1268+
pub fn byte31_index_dec(index: Bytes31Index) -> Option<Bytes31Index> {
1269+
if let crate::internal::OptionRev::Some(trimmed) = bounded_int::trim_min(index) {
1270+
Some(upcast(bounded_int::sub(trimmed, 1)))
1271+
} else {
1272+
None
1273+
}
1274+
}
1275+
11131276
/// The information about the new pending word length and the split index.
11141277
pub enum AppendWordInfo {
11151278
/// The new pending word length is less than 31, and fits in the current pending word.
@@ -1129,6 +1292,7 @@ mod helpers {
11291292
type Result = Bytes31Index;
11301293
}
11311294

1295+
11321296
/// Returns the information about the new pending word length and the split index.
11331297
pub fn append_word_info(
11341298
pending_bytes: Bytes31Index, new_word_bytes: BoundedInt<1, 31>,
@@ -1146,6 +1310,9 @@ mod helpers {
11461310
pub impl TrimMinBytes31Index of bounded_int::TrimMinHelper<Bytes31Index> {
11471311
type Target = BoundedInt<1, 30>;
11481312
}
1313+
impl SubBytes31Index of SubHelper<BoundedInt<1, 30>, UnitInt<1>> {
1314+
type Result = BoundedInt<0, 29>;
1315+
}
11491316

11501317
impl LengthToBytes31Index of SubHelper<BoundedInt<1, 31>, UnitInt<1>> {
11511318
type Result = Bytes31Index;

0 commit comments

Comments
 (0)