hgmnz · abonec · Feb 19, 2014
diff --git a/lib/truncate_html/html_string.rb b/lib/truncate_html/html_string.rb
@@ -3,7 +3,7 @@ module TruncateHtml
   class HtmlString < String
 
     UNPAIRED_TAGS = %w(br hr img).freeze
-    REGEX = /(?:<script.*>.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]][0-9]\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'²³§",\.\/?]+|\s+|[[:punct:]]/.freeze
+    REGEX = /(?:<script.*>.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]][0-9]\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'²³§",\.\/?\u00a0]+|\s+|[[:punct:]]/.freeze
 
     def initialize(original_html)
       super(original_html)

diff --git a/spec/truncate_html/html_string_spec.rb b/spec/truncate_html/html_string_spec.rb
@@ -9,9 +9,9 @@ def html_string(original_string)
 
   describe '#html_tokens' do
     it 'returns each token in the string as an array element removing any consecutive whitespace from the string' do
-      html = '<h1>Hi there</h1> <p>This          is sweet!</p> <p> squaremeter m² </p>'
+      html = "<h1>Hi there</h1> <p>This          is sweet!</p> <p> squaremeter m² and no\u00a0break space </p>"
       html_string(html).html_tokens.should == ['<h1>', 'Hi', ' ', 'there', '</h1>', ' ', '<p>', 'This', ' ', 'is', ' ', 'sweet!', '</p>',
-        ' ', '<p>', ' ', 'squaremeter', ' ', 'm²', ' ', '</p>']
+        ' ', '<p>', ' ', 'squaremeter', ' ', 'm²', ' ', 'and', ' ', "no break", ' ', 'space', ' ', '</p>']
     end
   end