feat(byte_array): add ByteSpan iterator

Gilad Chase · Gilad Chase · commit ccfb107af29d · 2025-10-22T10:01:05.000+03:00
diff --git a/corelib/src/byte_array.cairo b/corelib/src/byte_array.cairo
@@ -42,12 +42,11 @@
 //! assert!(first_byte == 0x41);
 //! ```
 
-use crate::array::{ArrayTrait, Span, SpanTrait};
-use crate::bytes_31::split_bytes31;
+use crate::array::{ArrayTrait, Span, SpanIter, SpanTrait};
 #[allow(unused_imports)]
 use crate::bytes_31::{
     BYTES_IN_BYTES31, Bytes31Trait, POW_2_128, POW_2_8, U128IntoBytes31, U8IntoBytes31,
-    one_shift_left_bytes_u128, split_u128, u8_at_u256,
+    one_shift_left_bytes_u128, split_bytes31, split_u128, u8_at_u256,
 };
 use crate::clone::Clone;
 use crate::cmp::min;
@@ -706,7 +705,7 @@ pub(crate) impl ByteArrayIndexView of crate::traits::IndexView<ByteArray, usize,
     }
 }
 
-// TODO: Implement a more efficient version of this iterator.
+// TODO(giladchase): Delegate to byte span iterator instead of current at-based implementation.
 /// An iterator struct over a ByteArray.
 #[derive(Drop, Clone)]
 pub struct ByteArrayIter {
@@ -716,6 +715,7 @@ pub struct ByteArrayIter {
 
 impl ByteArrayIterator of crate::iter::Iterator<ByteArrayIter> {
     type Item = u8;
+
     fn next(ref self: ByteArrayIter) -> Option<u8> {
         self.ba.at(self.current_index.next()?)
     }
@@ -973,6 +973,107 @@ impl ByteSpanToByteSpan of ToByteSpanTrait<ByteSpan> {
     }
 }
 
+/// An iterator struct over a ByteSpan.
+#[derive(Drop, Clone)]
+pub struct ByteSpanIter {
+    // Whether all bytes have been consumed.
+    iterator_exhausted: bool,
+    // Iterator over the full words.
+    data_iter: SpanIter<bytes31>,
+    // The word currently being iterated over.
+    // ShortString.end_index` represents `len - 1`.
+    current_word: ShortString,
+    // The last, partial word of the ByteSpan, iterated over after all full words are consumed.
+    // `ShortString.end_index` acts like `len` for the remainder, rather than `len` - 1.
+    remainder: ShortString,
+}
+
+impl ByteSpanIterator of crate::iter::Iterator<ByteSpanIter> {
+    type Item = u8;
+
+    fn next(ref self: ByteSpanIter) -> Option<u8> {
+        if self.iterator_exhausted {
+            return None;
+        }
+
+        let byte = self.current_word.first();
+        // If still in the current word, return.
+        if self.current_word.pop_first() {
+            return Some(byte);
+        }
+
+        // Current word exhausted, try advancing to the next word from the data iterator.
+        let Some(word) = self.data_iter.next() else {
+            if self.remainder.end_index == 0 { // Remainder is consumed.
+                self.iterator_exhausted = true;
+                return Some(byte);
+            }
+
+            self.current_word.word = self.remainder.word;
+            self
+                .current_word
+                .end_index = helpers::byte31_index_dec(self.remainder.end_index)
+                .unwrap();
+            self.remainder.end_index = 0; // Mark remainder as consumed.
+            return Some(byte);
+        };
+
+        self.current_word.word = (*word).into();
+        self.current_word.end_index = downcast(BYTES_IN_BYTES31_MINUS_ONE).unwrap();
+        Some(byte)
+    }
+}
+
+impl ByteSpanIntoIterator of crate::iter::IntoIterator<ByteSpan> {
+    type IntoIter = ByteSpanIter;
+
+    /// Creates an iterator over the bytes in the `ByteSpan`.
+    fn into_iter(self: ByteSpan) -> Self::IntoIter {
+        let mut data_iter = self.data.into_iter();
+
+        // Get first word in data array if exists, otherwise iterate on the remainder word.
+        let Some(first_word) = data_iter.next() else {
+            if self.remainder_len == 0 {
+                return ByteSpanIter {
+                    iterator_exhausted: true,
+                    data_iter,
+                    current_word: Default::default(),
+                    remainder: Default::default(),
+                };
+            }
+
+            // If remainder length is nonzero then it's strictly larger than the start offset.
+            let byte_in_word_lsb_index: usize = upcast(self.remainder_len)
+                - upcast(
+                    helpers::byte31_index_inc(self.first_char_start_offset)
+                        .expect('offset is < 30 in remainder'),
+                );
+
+            return ByteSpanIter {
+                data_iter,
+                current_word: ShortString {
+                    word: self.remainder_word, end_index: downcast(byte_in_word_lsb_index).unwrap(),
+                },
+                iterator_exhausted: false,
+                remainder: Default::default(),
+            };
+        };
+
+        let byte_in_word_lsb_index = helpers::byte31_index_inc(self.first_char_start_offset)
+            .map(|offset_inc| BYTES_IN_BYTES31 - upcast(offset_inc))
+            .unwrap_or_default();
+        ByteSpanIter {
+            data_iter,
+            current_word: ShortString {
+                word: (*first_word).into(), end_index: downcast(byte_in_word_lsb_index).unwrap(),
+            },
+            iterator_exhausted: false,
+            remainder: ShortString { word: self.remainder_word, end_index: self.remainder_len },
+        }
+    }
+}
+
+
 /// Shifts a word right by `n_bytes`.
 /// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve
 /// performance.
@@ -988,6 +1089,45 @@ fn shift_right(word: felt252, word_len: usize, n_bytes: usize) -> felt252 {
     after_shift_right
 }
 
+#[derive(Drop, Copy)]
+struct ShortString {
+    // A `felt252` that actually represents a `bytes31`, with at most 31 bytes.
+    // Unlike `ByteArray.pending_word`, this felt _can_ represent a 31 bytes word, rather than
+    // just 30 bytes.
+    word: felt252,
+    // The index of the last byte in the word.
+    // Note: In practice, we use this to represent either `len` or `len - 1` depending on caller's
+    // choice.
+    end_index: Bytes31Index,
+}
+
+#[generate_trait]
+impl ShortStringImpl of ShortStringTrait {
+    /// Returns the first element of the string.
+    fn first(self: ShortString) -> u8 {
+        // Strings are indexed by lsb, so the first char is at the end.
+        u8_at_u256(self.word.into(), upcast(self.end_index))
+    }
+
+    /// Remove the first char from the string, if it has chars remaining returns true, otherwise
+    /// returns false.
+    fn pop_first(ref self: ShortString) -> bool {
+        match helpers::byte31_index_dec(self.end_index) {
+            Some(next_index) => {
+                self.end_index = next_index;
+                true
+            },
+            None => false,
+        }
+    }
+}
+
+impl ShortStringDefault of Default<ShortString> {
+    fn default() -> ShortString {
+        ShortString { word: 0, end_index: 0 }
+    }
+}
+
 mod helpers {
     use core::num::traits::Bounded;
     use crate::bytes_31::{BYTES_IN_BYTES31, Bytes31Trait, u8_at_u256};
@@ -1110,6 +1250,15 @@ mod helpers {
         }
     }
 
+    /// Decrements the index by one, or returns `None` if the index is zero.
+    pub fn byte31_index_dec(index: Bytes31Index) -> Option<Bytes31Index> {
+        if let crate::internal::OptionRev::Some(trimmed) = bounded_int::trim_min(index) {
+            Some(upcast(bounded_int::sub(trimmed, 1)))
+        } else {
+            None
+        }
+    }
+
     /// The information about the new pending word length and the split index.
     pub enum AppendWordInfo {
         /// The new pending word length is less than 31, and fits in the current pending word.
@@ -1129,6 +1278,7 @@ mod helpers {
         type Result = Bytes31Index;
     }
 
+
     /// Returns the information about the new pending word length and the split index.
     pub fn append_word_info(
         pending_bytes: Bytes31Index, new_word_bytes: BoundedInt<1, 31>,
@@ -1146,6 +1296,9 @@ mod helpers {
     pub impl TrimMinBytes31Index of bounded_int::TrimMinHelper<Bytes31Index> {
         type Target = BoundedInt<1, 30>;
     }
+    impl SubBytes31Index of SubHelper<BoundedInt<1, 30>, UnitInt<1>> {
+        type Result = BoundedInt<0, 29>;
+    }
 
     impl LengthToBytes31Index of SubHelper<BoundedInt<1, 31>, UnitInt<1>> {
         type Result = Bytes31Index;
diff --git a/corelib/src/test/byte_array_test.cairo b/corelib/src/test/byte_array_test.cairo
@@ -731,10 +731,148 @@ fn test_span_at_overflows() {
     // Test overflow protection with large indices.
     let ba: ByteArray = "test";
     let span = ba.span();
-
     assert_eq!(span.get(Bounded::<usize>::MAX), None);
 
     let sliced = ba.span().get(1..3).unwrap();
     assert_eq!(sliced.get(Bounded::<usize>::MAX - 1), None);
     assert_eq!(sliced.get(Bounded::<usize>::MAX), None);
 }
+
+#[test]
+fn test_byte_span_simple() {
+    let empty: ByteArray = "";
+    assert_eq!(empty.span().into_iter().collect(), array![]);
+
+    let ba: ByteArray = "A";
+    assert_eq!(ba.span().into_iter().collect(), array!['A']);
+
+    let ba: ByteArray = "AB";
+    assert_eq!(ba.span().into_iter().collect(), array!['A', 'B']);
+}
+
+#[test]
+fn test_byte_span_iterator_word_boundaries() {
+    // Test 30, 31, 32 bytes (1 word boundary).
+    let ba_30: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcd";
+    let mut iter = ba_30.span().into_iter();
+    iter.advance_by(29).unwrap();
+    assert_eq!(iter.collect(), array!['d'], "30 bytes - last byte");
+
+    let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
+    let mut iter = ba_31.span().into_iter();
+    assert_eq!(iter.next(), Some('A'));
+    iter.advance_by(29).unwrap();
+    assert_eq!(iter.collect(), array!['e'], "31 bytes - last byte");
+
+    let ba_32: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef";
+    let mut iter = ba_32.span().into_iter();
+    iter.advance_by(30).unwrap();
+    assert_eq!(iter.collect(), array!['e', 'f'], "32 bytes - last two bytes");
+
+    // Test 62, 63, 64 bytes (2 word boundaries).
+    let ba_62: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+    let mut iter = ba_62.span().into_iter();
+    iter.advance_by(61).unwrap();
+    assert_eq!(iter.collect(), array!['9'], "62 bytes - last byte");
+
+    let ba_63: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!";
+    let mut iter = ba_63.span().into_iter();
+    iter.advance_by(62).unwrap();
+    assert_eq!(iter.collect(), array!['!'], "63 bytes - last byte");
+
+    let ba_64: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@";
+    let mut iter = ba_64.span().into_iter();
+    iter.advance_by(62).unwrap();
+    assert_eq!(iter.collect(), array!['!', '@'], "64 bytes - last two bytes");
+}
+
+#[test]
+fn test_byte_span_iterator_multiple_words() {
+    // Test with 3+ words to verify iteration works across multiple word boundaries.
+    // 92 bytes: 31 + 31 + 30.
+    let ba_92: ByteArray =
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=[]{}|;':,.<>?/~`";
+    let span = ba_92.span();
+    assert_eq!(span.into_iter().count(), 92, "should iterate all 92 bytes");
+
+    // Verify correctness at specific positions.
+    let mut iter = span.into_iter();
+    assert_eq!(iter.next(), Some('A'));
+
+    // Skip to last byte.
+    iter.advance_by(90).unwrap();
+    assert_eq!(iter.collect(), array!['`'], "90 bytes - last byte");
+}
+
+#[test]
+fn test_byte_span_iterator_for_loop_collect() {
+    let small_ba: ByteArray = "Hello";
+    let span = small_ba.span();
+
+    let mut collected = Default::default();
+    let mut count = 0;
+    for byte in span {
+        collected.append_byte(byte);
+        count += 1;
+    }
+    assert_eq!(collected, small_ba);
+    assert_eq!(count, 5);
+    assert_eq!(span.into_iter().collect(), small_ba);
+
+    // Test with 2 words.
+    let ba_40: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmn";
+    collected = Default::default();
+    count = 0;
+    for byte in ba_40.span() {
+        collected.append_byte(byte);
+        count += 1;
+    }
+    assert_eq!(collected, ba_40);
+    assert_eq!(count, 40);
+    assert_eq!(ba_40.span().into_iter().collect(), ba_40);
+
+    // Test with 3 words.
+    let ba_70: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*";
+    collected = Default::default();
+    count = 0;
+    for byte in ba_70.span() {
+        collected.append_byte(byte);
+        count += 1;
+    }
+    assert_eq!(collected, ba_70);
+    assert_eq!(count, 70);
+    assert_eq!(ba_70.span().into_iter().collect(), ba_70);
+}
+
+#[test]
+fn test_byte_span_iterator_slices() {
+    // Slice within remainder word (< 31 bytes).
+    let ba_13: ByteArray = "Hello Shmello";
+    let span = ba_13.span().get(2..7).unwrap();
+    assert_eq!(
+        span.into_iter().collect(), array!['l', 'l', 'o', ' ', 'S'], "slice within remainder word",
+    );
+
+    // Iterate slice across 2 words (1 data + remainder).
+    let ba_33: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg";
+    let span = ba_33.span().get(27..32).unwrap();
+    assert_eq!(span.into_iter().collect(), array!['b', 'c', 'd', 'e', 'f'], "slice across 2 words");
+
+    // Iterate slice across 3 words.
+    let ba_66: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$";
+    let span = ba_66.span().get(29..64).unwrap();
+    assert_eq!(
+        span.into_iter().collect(),
+        array![
+            'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+            'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!',
+            '@',
+        ],
+        "slice across 3 words",
+    );
+
+    // Test case where data array is exhausted and remainder has exactly 1 byte.
+    let ba_32: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef";
+    let span = ba_32.span().get(31..32).unwrap();
+    assert_eq!(span.into_iter().collect(), array!['f'], "1 byte in remainder after data exhausted");
+}