@@ -2,6 +2,7 @@ use std::borrow::Cow;
22use std:: collections:: BTreeSet ;
33use std:: env;
44use std:: fmt;
5+ use std:: ops:: Range ;
56use std:: sync:: atomic:: { AtomicBool , Ordering } ;
67
78use lazy_static:: lazy_static;
@@ -724,7 +725,7 @@ fn str_width(s: &str) -> usize {
724725}
725726
726727#[ cfg( feature = "ansi-parsing" ) ]
727- pub ( crate ) fn char_width ( c : char ) -> usize {
728+ fn char_width ( c : char ) -> usize {
728729 #[ cfg( feature = "unicode-width" ) ]
729730 {
730731 use unicode_width:: UnicodeWidthChar ;
@@ -737,80 +738,98 @@ pub(crate) fn char_width(c: char) -> usize {
737738 }
738739}
739740
740- /// Truncates a string to a certain number of characters.
741+ /// Slice a `&str` in terms of text width. This means that only the text
742+ /// columns strictly between `start` and `stop` will be kept.
741743///
742- /// This ensures that escape codes are not screwed up in the process.
743- /// If the maximum length is hit the string will be truncated but
744- /// escapes code will still be honored. If truncation takes place
745- /// the tail string will be appended.
746- pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
744+ /// If a multi-columns character overlaps with the end of the interval it will
745+ /// not be included. In such a case, the result will be less than `end - start`
746+ /// columns wide.
747+ ///
748+ /// This ensures that escape codes are not screwed up in the process. And if
749+ /// non-empty head and tail are specified, they are inserted between the ANSI
750+ /// symbols from truncated bounds and the slice.
751+ pub fn slice_str < ' a > ( s : & ' a str , head : & str , bounds : Range < usize > , tail : & str ) -> Cow < ' a , str > {
747752 #[ cfg( feature = "ansi-parsing" ) ]
748753 {
749- use std:: cmp:: Ordering ;
750- let mut iter = AnsiCodeIterator :: new ( s) ;
751- let mut length = 0 ;
752- let mut rv = None ;
753-
754- while let Some ( item) = iter. next ( ) {
755- match item {
756- ( s, false ) => {
757- if rv. is_none ( ) {
758- if str_width ( s) + length > width - str_width ( tail) {
759- let ts = iter. current_slice ( ) ;
760-
761- let mut s_byte = 0 ;
762- let mut s_width = 0 ;
763- let rest_width = width - str_width ( tail) - length;
764- for c in s. chars ( ) {
765- s_byte += c. len_utf8 ( ) ;
766- s_width += char_width ( c) ;
767- match s_width. cmp ( & rest_width) {
768- Ordering :: Equal => break ,
769- Ordering :: Greater => {
770- s_byte -= c. len_utf8 ( ) ;
771- break ;
772- }
773- Ordering :: Less => continue ,
774- }
775- }
776-
777- let idx = ts. len ( ) - s. len ( ) + s_byte;
778- let mut buf = ts[ ..idx] . to_string ( ) ;
779- buf. push_str ( tail) ;
780- rv = Some ( buf) ;
781- }
782- length += str_width ( s) ;
783- }
754+ let mut pos = 0 ;
755+ let mut slice = 0 ..0 ;
756+
757+ // ANSI symbols outside of the slice
758+ let mut front_ansi = String :: new ( ) ;
759+ let mut back_ansi = String :: new ( ) ;
760+
761+ // Iterate through each ANSI symbol or unicode character while keeping
762+ // track of:
763+ // - pos: cumulated width of characters iterated so far
764+ // - slice: char indices of the part of the string for which `pos`
765+ // was inside bounds
766+ for ( sub, is_ansi) in AnsiCodeIterator :: new ( s) {
767+ if is_ansi {
768+ if pos < bounds. start {
769+ // An ANSI symbol before the interval: keep for later
770+ front_ansi. push_str ( sub) ;
771+ slice. start += sub. len ( ) ;
772+ slice. end = slice. start ;
773+ } else if pos <= bounds. end {
774+ // An ANSI symbol inside of the interval: extend the slice
775+ slice. end += sub. len ( ) ;
776+ } else {
777+ // An ANSI symbol after the interval: keep for later
778+ back_ansi. push_str ( sub) ;
784779 }
785- ( s, true ) => {
786- if let Some ( ref mut rv) = rv {
787- rv. push_str ( s) ;
780+ } else {
781+ for c in sub. chars ( ) {
782+ let c_width = char_width ( c) ;
783+
784+ if pos < bounds. start {
785+ // The char is before the interval: move the slice back
786+ slice. start += c. len_utf8 ( ) ;
787+ slice. end = slice. start ;
788+ } else if pos + c_width <= bounds. end {
789+ // The char fits into the interval: extend the slice
790+ slice. end += c. len_utf8 ( ) ;
788791 }
792+
793+ pos += c_width;
789794 }
790795 }
791796 }
792797
793- if let Some ( buf) = rv {
794- Cow :: Owned ( buf)
798+ let slice = & s[ slice] ;
799+
800+ if front_ansi. is_empty ( ) && back_ansi. is_empty ( ) && head. is_empty ( ) && tail. is_empty ( ) {
801+ Cow :: Borrowed ( slice)
795802 } else {
796- Cow :: Borrowed ( s )
803+ Cow :: Owned ( front_ansi + head + slice + tail + & back_ansi )
797804 }
798805 }
799-
800806 #[ cfg( not( feature = "ansi-parsing" ) ) ]
801807 {
802- if s. len ( ) <= width - tail. len ( ) {
803- Cow :: Borrowed ( s)
808+ let slice = s. get ( bounds) . unwrap_or ( "" ) ;
809+
810+ if head. is_empty ( ) && tail. is_empty ( ) {
811+ Cow :: Borrowed ( slice)
804812 } else {
805- Cow :: Owned ( format ! (
806- "{}{}" ,
807- s. get( ..width - tail. len( ) ) . unwrap_or_default( ) ,
808- tail
809- ) )
813+ Cow :: Owned ( format ! ( "{head}{slice}{tail}" ) )
810814 }
811815 }
812816}
813817
818+ /// Truncates a string to a certain number of characters.
819+ ///
820+ /// This ensures that escape codes are not screwed up in the process.
821+ /// If the maximum length is hit the string will be truncated but
822+ /// escapes code will still be honored. If truncation takes place
823+ /// the tail string will be appended.
824+ pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
825+ if measure_text_width ( s) > width {
826+ let tail_width = measure_text_width ( tail) ;
827+ slice_str ( s, "" , 0 ..width. saturating_sub ( tail_width) , tail)
828+ } else {
829+ Cow :: Borrowed ( s)
830+ }
831+ }
832+
814833/// Pads a string to fill a certain number of characters.
815834///
816835/// This will honor ansi codes correctly and allows you to align a string
@@ -919,8 +938,50 @@ fn test_truncate_str() {
919938 ) ;
920939}
921940
941+ #[ test]
942+ fn test_slice_ansi_str ( ) {
943+ // Note that 🐶 is two columns wide
944+ let test_str = "Hello\x1b [31m🐶\x1b [1m🐶\x1b [0m world!" ;
945+ assert_eq ! ( slice_str( test_str, "" , 0 ..test_str. len( ) , "" ) , test_str) ;
946+
947+ if cfg ! ( feature = "unicode-width" ) && cfg ! ( feature = "ansi-parsing" ) {
948+ assert_eq ! ( measure_text_width( test_str) , 16 ) ;
949+
950+ assert_eq ! (
951+ slice_str( test_str, "" , 5 ..5 , "" ) ,
952+ "\u{1b} [31m\u{1b} [1m\u{1b} [0m"
953+ ) ;
954+
955+ assert_eq ! (
956+ slice_str( test_str, "" , 0 ..5 , "" ) ,
957+ "Hello\x1b [31m\x1b [1m\x1b [0m"
958+ ) ;
959+
960+ assert_eq ! (
961+ slice_str( test_str, "" , 0 ..6 , "" ) ,
962+ "Hello\x1b [31m\x1b [1m\x1b [0m"
963+ ) ;
964+
965+ assert_eq ! (
966+ slice_str( test_str, "" , 0 ..7 , "" ) ,
967+ "Hello\x1b [31m🐶\x1b [1m\x1b [0m"
968+ ) ;
969+
970+ assert_eq ! (
971+ slice_str( test_str, "" , 4 ..9 , "" ) ,
972+ "o\x1b [31m🐶\x1b [1m🐶\x1b [0m"
973+ ) ;
974+
975+ assert_eq ! (
976+ slice_str( test_str, "" , 7 ..21 , "" ) ,
977+ "\x1b [31m\x1b [1m🐶\x1b [0m world!"
978+ ) ;
979+ }
980+ }
981+
922982#[ test]
923983fn test_truncate_str_no_ansi ( ) {
984+ assert_eq ! ( & truncate_str( "foo bar" , 7 , "!" ) , "foo bar" ) ;
924985 assert_eq ! ( & truncate_str( "foo bar" , 5 , "" ) , "foo b" ) ;
925986 assert_eq ! ( & truncate_str( "foo bar" , 5 , "!" ) , "foo !" ) ;
926987 assert_eq ! ( & truncate_str( "foo bar baz" , 10 , "..." ) , "foo bar..." ) ;
0 commit comments