diff --git a/lib/truncate_html.rb b/lib/truncate_html.rb index 95b7923..3987501 100644 --- a/lib/truncate_html.rb +++ b/lib/truncate_html.rb @@ -7,7 +7,7 @@ TruncateHtml.configure do |config| config.length = 100 config.omission = '...' - config.word_boundary = /\S/ + config.word_boundary = /(?![[:space:]])./ end diff --git a/lib/truncate_html/html_string.rb b/lib/truncate_html/html_string.rb index 76d82e9..62d8675 100644 --- a/lib/truncate_html/html_string.rb +++ b/lib/truncate_html/html_string.rb @@ -3,7 +3,7 @@ module TruncateHtml class HtmlString < String UNPAIRED_TAGS = %w(br hr img).freeze - REGEX = /(?:.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]][0-9]\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'²³§",\.\/?]+|\s+|[[:punct:]]/.freeze + REGEX = /(?:.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]][0-9]\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'²³§",\.\/?]+|[[:space:]]+|[[:punct:]]/.freeze def initialize(original_html) super(original_html) @@ -13,9 +13,7 @@ def html_tokens scan(REGEX).map do |token| HtmlString.new( token.gsub( - /\n/,' ' #replace newline characters with a whitespace - ).gsub( - /\s+/, ' ' #clean out extra consecutive whitespace + /[[:space:]]+/, ' ' #clean out extra consecutive whitespace ) ) end diff --git a/spec/truncate_html/html_string_spec.rb b/spec/truncate_html/html_string_spec.rb index 064f87b..b60e165 100644 --- a/spec/truncate_html/html_string_spec.rb +++ b/spec/truncate_html/html_string_spec.rb @@ -9,9 +9,9 @@ def html_string(original_string) describe '#html_tokens' do it 'returns each token in the string as an array element removing any consecutive whitespace from the string' do - html = '

Hi there

This is sweet!

squaremeter m²

' + html = "

Hi there

This is sweet!

\r\n

squaremeter m²

Non-breaking\nspace here: 
" html_string(html).html_tokens.should == ['

', 'Hi', ' ', 'there', '

', ' ', '

', 'This', ' ', 'is', ' ', 'sweet!', '

', - ' ', '

', ' ', 'squaremeter', ' ', 'm²', ' ', '

'] + ' ', '

', ' ', 'squaremeter', ' ', 'm²', ' ', '

', '
', 'Non-breaking', ' ', 'space', ' ', 'here:', ' ', '
'] end end