.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]][0-9]\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'²³§",\.\/?\u00a0]+|\s+|[[:punct:]]/.freeze
def initialize(original_html)
super(original_html)
diff --git a/spec/truncate_html/html_string_spec.rb b/spec/truncate_html/html_string_spec.rb
index 064f87b..4e5396c 100644
--- a/spec/truncate_html/html_string_spec.rb
+++ b/spec/truncate_html/html_string_spec.rb
@@ -9,9 +9,9 @@ def html_string(original_string)
describe '#html_tokens' do
it 'returns each token in the string as an array element removing any consecutive whitespace from the string' do
- html = 'Hi there
This is sweet!
squaremeter m²
'
+ html = "Hi there
This is sweet!
squaremeter m² and no\u00a0break space
"
html_string(html).html_tokens.should == ['', 'Hi', ' ', 'there', '
', ' ', '', 'This', ' ', 'is', ' ', 'sweet!', '
',
- ' ', '', ' ', 'squaremeter', ' ', 'm²', ' ', '
']
+ ' ', '', ' ', 'squaremeter', ' ', 'm²', ' ', 'and', ' ', "no break", ' ', 'space', ' ', '
']
end
end