Skip to content

Commit ccfb107

Browse files
author
Gilad Chase
committed
feat(byte_array): add ByteSpan iterator
1 parent 6dde184 commit ccfb107

File tree

2 files changed

+296
-5
lines changed

2 files changed

+296
-5
lines changed

corelib/src/byte_array.cairo

Lines changed: 157 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,11 @@
4242
//! assert!(first_byte == 0x41);
4343
//! ```
4444

45-
use crate::array::{ArrayTrait, Span, SpanTrait};
46-
use crate::bytes_31::split_bytes31;
45+
use crate::array::{ArrayTrait, Span, SpanIter, SpanTrait};
4746
#[allow(unused_imports)]
4847
use crate::bytes_31::{
4948
BYTES_IN_BYTES31, Bytes31Trait, POW_2_128, POW_2_8, U128IntoBytes31, U8IntoBytes31,
50-
one_shift_left_bytes_u128, split_u128, u8_at_u256,
49+
one_shift_left_bytes_u128, split_bytes31, split_u128, u8_at_u256,
5150
};
5251
use crate::clone::Clone;
5352
use crate::cmp::min;
@@ -706,7 +705,7 @@ pub(crate) impl ByteArrayIndexView of crate::traits::IndexView<ByteArray, usize,
706705
}
707706
}
708707

709-
// TODO: Implement a more efficient version of this iterator.
708+
// TODO(giladchase): Delegate to byte span iterator instead of current at-based implementation.
710709
/// An iterator struct over a ByteArray.
711710
#[derive(Drop, Clone)]
712711
pub struct ByteArrayIter {
@@ -716,6 +715,7 @@ pub struct ByteArrayIter {
716715

717716
impl ByteArrayIterator of crate::iter::Iterator<ByteArrayIter> {
718717
type Item = u8;
718+
719719
fn next(ref self: ByteArrayIter) -> Option<u8> {
720720
self.ba.at(self.current_index.next()?)
721721
}
@@ -973,6 +973,107 @@ impl ByteSpanToByteSpan of ToByteSpanTrait<ByteSpan> {
973973
}
974974
}
975975

976+
/// An iterator struct over a ByteSpan.
977+
#[derive(Drop, Clone)]
978+
pub struct ByteSpanIter {
979+
// Whether all bytes have been consumed.
980+
iterator_exhausted: bool,
981+
// Iterator over the full words.
982+
data_iter: SpanIter<bytes31>,
983+
// The word currently being iterated over.
984+
// ShortString.end_index` represents `len - 1`.
985+
current_word: ShortString,
986+
// The last, partial word of the ByteSpan, iterated over after all full words are consumed.
987+
// `ShortString.end_index` acts like `len` for the remainder, rather than `len` - 1.
988+
remainder: ShortString,
989+
}
990+
991+
impl ByteSpanIterator of crate::iter::Iterator<ByteSpanIter> {
992+
type Item = u8;
993+
994+
fn next(ref self: ByteSpanIter) -> Option<u8> {
995+
if self.iterator_exhausted {
996+
return None;
997+
}
998+
999+
let byte = self.current_word.first();
1000+
// If still in the current word, return.
1001+
if self.current_word.pop_first() {
1002+
return Some(byte);
1003+
}
1004+
1005+
// Current word exhausted, try advancing to the next word from the data iterator.
1006+
let Some(word) = self.data_iter.next() else {
1007+
if self.remainder.end_index == 0 { // Remainder is consumed.
1008+
self.iterator_exhausted = true;
1009+
return Some(byte);
1010+
}
1011+
1012+
self.current_word.word = self.remainder.word;
1013+
self
1014+
.current_word
1015+
.end_index = helpers::byte31_index_dec(self.remainder.end_index)
1016+
.unwrap();
1017+
self.remainder.end_index = 0; // Mark remainder as consumed.
1018+
return Some(byte);
1019+
};
1020+
1021+
self.current_word.word = (*word).into();
1022+
self.current_word.end_index = downcast(BYTES_IN_BYTES31_MINUS_ONE).unwrap();
1023+
Some(byte)
1024+
}
1025+
}
1026+
1027+
impl ByteSpanIntoIterator of crate::iter::IntoIterator<ByteSpan> {
1028+
type IntoIter = ByteSpanIter;
1029+
1030+
/// Creates an iterator over the bytes in the `ByteSpan`.
1031+
fn into_iter(self: ByteSpan) -> Self::IntoIter {
1032+
let mut data_iter = self.data.into_iter();
1033+
1034+
// Get first word in data array if exists, otherwise iterate on the remainder word.
1035+
let Some(first_word) = data_iter.next() else {
1036+
if self.remainder_len == 0 {
1037+
return ByteSpanIter {
1038+
iterator_exhausted: true,
1039+
data_iter,
1040+
current_word: Default::default(),
1041+
remainder: Default::default(),
1042+
};
1043+
}
1044+
1045+
// If remainder length is nonzero then it's strictly larger than the start offset.
1046+
let byte_in_word_lsb_index: usize = upcast(self.remainder_len)
1047+
- upcast(
1048+
helpers::byte31_index_inc(self.first_char_start_offset)
1049+
.expect('offset is < 30 in remainder'),
1050+
);
1051+
1052+
return ByteSpanIter {
1053+
data_iter,
1054+
current_word: ShortString {
1055+
word: self.remainder_word, end_index: downcast(byte_in_word_lsb_index).unwrap(),
1056+
},
1057+
iterator_exhausted: false,
1058+
remainder: Default::default(),
1059+
};
1060+
};
1061+
1062+
let byte_in_word_lsb_index = helpers::byte31_index_inc(self.first_char_start_offset)
1063+
.map(|offset_inc| BYTES_IN_BYTES31 - upcast(offset_inc))
1064+
.unwrap_or_default();
1065+
ByteSpanIter {
1066+
data_iter,
1067+
current_word: ShortString {
1068+
word: (*first_word).into(), end_index: downcast(byte_in_word_lsb_index).unwrap(),
1069+
},
1070+
iterator_exhausted: false,
1071+
remainder: ShortString { word: self.remainder_word, end_index: self.remainder_len },
1072+
}
1073+
}
1074+
}
1075+
1076+
9761077
/// Shifts a word right by `n_bytes`.
9771078
/// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve
9781079
/// performance.
@@ -988,6 +1089,45 @@ fn shift_right(word: felt252, word_len: usize, n_bytes: usize) -> felt252 {
9881089
after_shift_right
9891090
}
9901091

1092+
#[derive(Drop, Copy)]
1093+
struct ShortString {
1094+
// A `felt252` that actually represents a `bytes31`, with at most 31 bytes.
1095+
// Unlike `ByteArray.pending_word`, this felt _can_ represent a 31 bytes word, rather than
1096+
// just 30 bytes.
1097+
word: felt252,
1098+
// The index of the last byte in the word.
1099+
// Note: In practice, we use this to represent either `len` or `len - 1` depending on caller's
1100+
// choice.
1101+
end_index: Bytes31Index,
1102+
}
1103+
1104+
#[generate_trait]
1105+
impl ShortStringImpl of ShortStringTrait {
1106+
/// Returns the first element of the string.
1107+
fn first(self: ShortString) -> u8 {
1108+
// Strings are indexed by lsb, so the first char is at the end.
1109+
u8_at_u256(self.word.into(), upcast(self.end_index))
1110+
}
1111+
1112+
/// Remove the first char from the string, if it has chars remaining returns true, otherwise
1113+
/// returns false.
1114+
fn pop_first(ref self: ShortString) -> bool {
1115+
match helpers::byte31_index_dec(self.end_index) {
1116+
Some(next_index) => {
1117+
self.end_index = next_index;
1118+
true
1119+
},
1120+
None => false,
1121+
}
1122+
}
1123+
}
1124+
1125+
impl ShortStringDefault of Default<ShortString> {
1126+
fn default() -> ShortString {
1127+
ShortString { word: 0, end_index: 0 }
1128+
}
1129+
}
1130+
9911131
mod helpers {
9921132
use core::num::traits::Bounded;
9931133
use crate::bytes_31::{BYTES_IN_BYTES31, Bytes31Trait, u8_at_u256};
@@ -1110,6 +1250,15 @@ mod helpers {
11101250
}
11111251
}
11121252

1253+
/// Decrements the index by one, or returns `None` if the index is zero.
1254+
pub fn byte31_index_dec(index: Bytes31Index) -> Option<Bytes31Index> {
1255+
if let crate::internal::OptionRev::Some(trimmed) = bounded_int::trim_min(index) {
1256+
Some(upcast(bounded_int::sub(trimmed, 1)))
1257+
} else {
1258+
None
1259+
}
1260+
}
1261+
11131262
/// The information about the new pending word length and the split index.
11141263
pub enum AppendWordInfo {
11151264
/// The new pending word length is less than 31, and fits in the current pending word.
@@ -1129,6 +1278,7 @@ mod helpers {
11291278
type Result = Bytes31Index;
11301279
}
11311280

1281+
11321282
/// Returns the information about the new pending word length and the split index.
11331283
pub fn append_word_info(
11341284
pending_bytes: Bytes31Index, new_word_bytes: BoundedInt<1, 31>,
@@ -1146,6 +1296,9 @@ mod helpers {
11461296
pub impl TrimMinBytes31Index of bounded_int::TrimMinHelper<Bytes31Index> {
11471297
type Target = BoundedInt<1, 30>;
11481298
}
1299+
impl SubBytes31Index of SubHelper<BoundedInt<1, 30>, UnitInt<1>> {
1300+
type Result = BoundedInt<0, 29>;
1301+
}
11491302

11501303
impl LengthToBytes31Index of SubHelper<BoundedInt<1, 31>, UnitInt<1>> {
11511304
type Result = Bytes31Index;

corelib/src/test/byte_array_test.cairo

Lines changed: 139 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -731,10 +731,148 @@ fn test_span_at_overflows() {
731731
// Test overflow protection with large indices.
732732
let ba: ByteArray = "test";
733733
let span = ba.span();
734-
735734
assert_eq!(span.get(Bounded::<usize>::MAX), None);
736735

737736
let sliced = ba.span().get(1..3).unwrap();
738737
assert_eq!(sliced.get(Bounded::<usize>::MAX - 1), None);
739738
assert_eq!(sliced.get(Bounded::<usize>::MAX), None);
740739
}
740+
741+
#[test]
742+
fn test_byte_span_simple() {
743+
let empty: ByteArray = "";
744+
assert_eq!(empty.span().into_iter().collect(), array![]);
745+
746+
let ba: ByteArray = "A";
747+
assert_eq!(ba.span().into_iter().collect(), array!['A']);
748+
749+
let ba: ByteArray = "AB";
750+
assert_eq!(ba.span().into_iter().collect(), array!['A', 'B']);
751+
}
752+
753+
#[test]
754+
fn test_byte_span_iterator_word_boundaries() {
755+
// Test 30, 31, 32 bytes (1 word boundary).
756+
let ba_30: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcd";
757+
let mut iter = ba_30.span().into_iter();
758+
iter.advance_by(29).unwrap();
759+
assert_eq!(iter.collect(), array!['d'], "30 bytes - last byte");
760+
761+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
762+
let mut iter = ba_31.span().into_iter();
763+
assert_eq!(iter.next(), Some('A'));
764+
iter.advance_by(29).unwrap();
765+
assert_eq!(iter.collect(), array!['e'], "31 bytes - last byte");
766+
767+
let ba_32: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef";
768+
let mut iter = ba_32.span().into_iter();
769+
iter.advance_by(30).unwrap();
770+
assert_eq!(iter.collect(), array!['e', 'f'], "32 bytes - last two bytes");
771+
772+
// Test 62, 63, 64 bytes (2 word boundaries).
773+
let ba_62: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
774+
let mut iter = ba_62.span().into_iter();
775+
iter.advance_by(61).unwrap();
776+
assert_eq!(iter.collect(), array!['9'], "62 bytes - last byte");
777+
778+
let ba_63: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!";
779+
let mut iter = ba_63.span().into_iter();
780+
iter.advance_by(62).unwrap();
781+
assert_eq!(iter.collect(), array!['!'], "63 bytes - last byte");
782+
783+
let ba_64: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@";
784+
let mut iter = ba_64.span().into_iter();
785+
iter.advance_by(62).unwrap();
786+
assert_eq!(iter.collect(), array!['!', '@'], "64 bytes - last two bytes");
787+
}
788+
789+
#[test]
790+
fn test_byte_span_iterator_multiple_words() {
791+
// Test with 3+ words to verify iteration works across multiple word boundaries.
792+
// 92 bytes: 31 + 31 + 30.
793+
let ba_92: ByteArray =
794+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=[]{}|;':,.<>?/~`";
795+
let span = ba_92.span();
796+
assert_eq!(span.into_iter().count(), 92, "should iterate all 92 bytes");
797+
798+
// Verify correctness at specific positions.
799+
let mut iter = span.into_iter();
800+
assert_eq!(iter.next(), Some('A'));
801+
802+
// Skip to last byte.
803+
iter.advance_by(90).unwrap();
804+
assert_eq!(iter.collect(), array!['`'], "90 bytes - last byte");
805+
}
806+
807+
#[test]
808+
fn test_byte_span_iterator_for_loop_collect() {
809+
let small_ba: ByteArray = "Hello";
810+
let span = small_ba.span();
811+
812+
let mut collected = Default::default();
813+
let mut count = 0;
814+
for byte in span {
815+
collected.append_byte(byte);
816+
count += 1;
817+
}
818+
assert_eq!(collected, small_ba);
819+
assert_eq!(count, 5);
820+
assert_eq!(span.into_iter().collect(), small_ba);
821+
822+
// Test with 2 words.
823+
let ba_40: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmn";
824+
collected = Default::default();
825+
count = 0;
826+
for byte in ba_40.span() {
827+
collected.append_byte(byte);
828+
count += 1;
829+
}
830+
assert_eq!(collected, ba_40);
831+
assert_eq!(count, 40);
832+
assert_eq!(ba_40.span().into_iter().collect(), ba_40);
833+
834+
// Test with 3 words.
835+
let ba_70: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*";
836+
collected = Default::default();
837+
count = 0;
838+
for byte in ba_70.span() {
839+
collected.append_byte(byte);
840+
count += 1;
841+
}
842+
assert_eq!(collected, ba_70);
843+
assert_eq!(count, 70);
844+
assert_eq!(ba_70.span().into_iter().collect(), ba_70);
845+
}
846+
847+
#[test]
848+
fn test_byte_span_iterator_slices() {
849+
// Slice within remainder word (< 31 bytes).
850+
let ba_13: ByteArray = "Hello Shmello";
851+
let span = ba_13.span().get(2..7).unwrap();
852+
assert_eq!(
853+
span.into_iter().collect(), array!['l', 'l', 'o', ' ', 'S'], "slice within remainder word",
854+
);
855+
856+
// Iterate slice across 2 words (1 data + remainder).
857+
let ba_33: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg";
858+
let span = ba_33.span().get(27..32).unwrap();
859+
assert_eq!(span.into_iter().collect(), array!['b', 'c', 'd', 'e', 'f'], "slice across 2 words");
860+
861+
// Iterate slice across 3 words.
862+
let ba_66: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$";
863+
let span = ba_66.span().get(29..64).unwrap();
864+
assert_eq!(
865+
span.into_iter().collect(),
866+
array![
867+
'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
868+
'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!',
869+
'@',
870+
],
871+
"slice across 3 words",
872+
);
873+
874+
// Test case where data array is exhausted and remainder has exactly 1 byte.
875+
let ba_32: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef";
876+
let span = ba_32.span().get(31..32).unwrap();
877+
assert_eq!(span.into_iter().collect(), array!['f'], "1 byte in remainder after data exhausted");
878+
}

0 commit comments

Comments
 (0)