Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions lib/truncate_html/html_string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,16 @@ module TruncateHtml
class HtmlString < String

UNPAIRED_TAGS = %w(br hr img).freeze
REGEX = /(?:<script.*>.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]][0-9]\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'²³§",\.\/?]+|\s+|[[:punct:]]/.freeze
REGEX = /(?:<script.*>.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]][0-9]\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'²³§",\.\/?]+|\s+|[[:punct:]]/.freeze
HTMLTAGS = /<script\b[^>]*>([\s\S]*?)<\/script>|<("[^"]*"|'[^']*'|[^'">])*>/.freeze

def initialize(original_html)
super(original_html)
end

def html_tokens
scan(REGEX).map do |token|
HtmlString.new(
token.gsub(
/\n/,' ' #replace newline characters with a whitespace
).gsub(
/\s+/, ' ' #clean out extra consecutive whitespace
)
)
HtmlString.new(token).replace_newline.clean_whitespaces
end
end

Expand All @@ -37,5 +32,17 @@ def matching_close_tag
gsub(/<(\w+)\s?.*>/, '</\1>').strip
end

def clean_html
gsub(HTMLTAGS, '').replace_newline.clean_whitespaces
end

def replace_newline
gsub(/\n/, ' ')
end

def clean_whitespaces
gsub(/\s+/, ' ')
end

end
end
9 changes: 7 additions & 2 deletions lib/truncate_html/html_truncator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@ class HtmlTruncator

def initialize(original_html, options = {})
@original_html = original_html
length = options[:length] || TruncateHtml.configuration.length
@length = options[:length] || TruncateHtml.configuration.length
@omission = options[:omission] || TruncateHtml.configuration.omission
@word_boundary = (options.has_key?(:word_boundary) ? options[:word_boundary] : TruncateHtml.configuration.word_boundary)
@break_token = options[:break_token] || TruncateHtml.configuration.break_token || nil
@chars_remaining = length - @omission.length
@chars_remaining = @length - @omission.length
@open_tags, @closing_tags, @truncated_html = [], [], ['']
end

def truncate
return @omission if @chars_remaining < 0
return @original_html if return_html?
@original_html.html_tokens.each do |token|
if @chars_remaining <= 0 || truncate_token?(token)
close_open_tags
Expand Down Expand Up @@ -93,5 +94,9 @@ def remove_latest_open_tag(close_tag)
def truncate_token?(token)
@break_token and token == @break_token
end

def return_html?
@original_html.clean_html.length <= @length && !@original_html.html_tokens.include?(@break_token)
end
end
end
33 changes: 33 additions & 0 deletions spec/truncate_html/html_string_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,37 @@ def html_string(original_string)
html_string('foo').should_not be_html_comment
end
end

describe '#replace_newline' do
it 'returns the string with whitespaces instead of newlines' do
html = 'This is a string.
With newlines.
Dont want them.'
expected = 'This is a string. With newlines. Dont want them.'

html_string(html).replace_newline.should == expected
end
end

describe '#clean_whitespace' do
it 'returns the string with only single whitespaces' do
html = 'This is a string. With double white spaces.'
expected = 'This is a string. With double white spaces.'
html_string(html).clean_whitespaces.should == expected
end
end

describe '#clean_html' do
it 'returns the html string without any html tags' do
html = '<b>This is bold</b>. <script> alert("Dont show!") </script> <div class="this-class">This will show</div>'
expected = 'This is bold. This will show'
html_string(html).clean_html.should == expected
end

it 'returns the html string without any comments' do
html = '<b>This is bold</b>. <!-- this is a comment --> And this will show'
expected = 'This is bold. And this will show'
html_string(html).clean_html.should == expected
end
end
end
34 changes: 32 additions & 2 deletions spec/truncate_html/html_truncator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ def truncate(html, opts = {})

it 'retains the tags within the text' do
html = 'some text <span class="caps">CAPS</span> some text'
truncate(html, :length => 25, :word_boundary => false).should == 'some text <span class="caps">CAPS</span> some te...'
truncate(html, :length => 19, :word_boundary => false).should == 'some text <span class="caps">CAPS</span> s...'
end

context 'and a custom omission value is passed' do
it 'retains the omission text' do
truncate("testtest", :length => 10, :omission => '..', :word_boundary => false).should == 'testtest..'
truncate("testtest", :length => 7, :omission => '..', :word_boundary => false).should == 'testt..'
end

it 'handles multibyte characters' do
Expand Down Expand Up @@ -204,4 +204,34 @@ def truncate(html, opts = {})
'<h1>hello <!-- stuff --> and <!-- la -->...</h1>'
end
end

context 'when the clean string length is the same than the length param' do
it 'does not truncate the string' do
html = 'exact string length'
truncate(html, length: 19).should == html
end

it 'does not truncate the string even if it contains html tags' do
html = '<b>exact</b> <span class="this-class">string</span> length'
truncate(html, length: 19).should == html
end

it 'does not truncate the string even if it contains html comments' do
html = 'exact <!-- stuff --> string length'
truncate(html, length: 19).should == html
end

context 'when the break_token is set' do
it 'truncates before the break_token if included in the string' do
html = 'exact <!-- stuff --> string length'
expected = 'exact <!-- stuff --> string'
truncate(html, length: 19, break_token: 'length').should == expected
end

it 'does not truncate before if break token is not in the string' do
html = 'exact <!-- stuff --> string length'
truncate(html, length: 19, break_token: 'nothere').should == html
end
end
end
end