Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions config/config.exs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# This file is responsible for configuring your application
# and its dependencies with the aid of the Mix.Config module.
use Mix.Config
# and its dependencies. It is evaluated by Mix at compile time.
# See: https://hexdocs.pm/elixir/Config.html
import Config

# This configuration is loaded before any dependency and is restricted
# to this project. If another project depends on this project, this
Expand Down
4 changes: 2 additions & 2 deletions lib/elixir_email_reply_parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ defmodule ElixirEmailReplyParser do
"Hi!\n\n How are you?"

"""
@spec parse_reply(String.t) :: String.t
@spec parse_reply(String.t()) :: String.t()
def parse_reply(text) do
text |> ElixirEmailReplyParser.Parser.read |> ElixirEmailReplyParser.Parser.reply
text |> ElixirEmailReplyParser.Parser.read() |> ElixirEmailReplyParser.Parser.reply()
end
end
4 changes: 1 addition & 3 deletions lib/elixir_email_reply_parser/email_message.ex
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
defmodule ElixirEmailReplyParser.EmailMessage do
@moduledoc false

defstruct [
fragments: []
]
defstruct fragments: []
end
14 changes: 6 additions & 8 deletions lib/elixir_email_reply_parser/fragment.ex
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
defmodule ElixirEmailReplyParser.Fragment do
@moduledoc false

defstruct [
signature: false,
headers: false,
hidden: false,
quoted: false,
content: nil,
lines: []
]
defstruct signature: false,
headers: false,
hidden: false,
quoted: false,
content: nil,
lines: []
end
128 changes: 81 additions & 47 deletions lib/elixir_email_reply_parser/parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ defmodule ElixirEmailReplyParser.Parser do
|> draw_away_lines_with_underscores
|> draw_away_signatures
|> String.split("\n")
|> Enum.reverse
|> Enum.reverse()

{:ok, fragments} = scan_line({nil, [], false}, lines)

Expand All @@ -18,33 +18,36 @@ defmodule ElixirEmailReplyParser.Parser do

def reply(%ElixirEmailReplyParser.EmailMessage{fragments: fragments}) do
fragments
|> Enum.filter(fn f -> unless (f.hidden or f.quoted), do: true end)
|> Enum.filter(fn f -> unless f.hidden or f.quoted, do: true end)
|> Enum.map(fn f -> f.content end)
|> Enum.join("\n")
|> String.trim_trailing()
end

@spec normalize_line_endings(String.t) :: String.t
@spec normalize_line_endings(String.t()) :: String.t()
defp normalize_line_endings(s) do
String.replace(s, "\r\n", "\n")
end

# Check for multi-line reply headers. Some clients break up
# the "On DATE, NAME <EMAIL> wrote:" line into multiple lines.
@spec handle_multiline(String.t) :: String.t
@spec handle_multiline(String.t()) :: String.t()
defp handle_multiline(s) do
Enum.reduce([
~R/(On(?:(?!On|wrote:)(.|\s))*?wrote:)/s,
~R/(schrieb\sam\s(.+?)um\s(.+?):)/s,
~R/(Am\s(.+?)um\s(.+?)schrieb\s(.+?):)/s],
s,
&remove_newlines_if_matched/2)
Enum.reduce(
[
~r/(On(?:(?!On|wrote:)(.|\s))*?wrote:)/s,
~r/(schrieb\sam\s(.+?)um\s(.+?):)/s,
~r/(Am\s(.+?)um\s(.+?)schrieb\s(.+?):)/s
],
s,
&remove_newlines_if_matched/2
)
end

# For removal of all new lines from the reply header.
@spec remove_newlines_if_matched(Regex.t, String.t) :: String.t
@spec remove_newlines_if_matched(Regex.t(), String.t()) :: String.t()
defp remove_newlines_if_matched(re, s) do
if (Regex.match?(re, s)) do
if Regex.match?(re, s) do
Regex.replace(re, s, fn x -> String.replace(x, "\n", "") end)
else
s
Expand All @@ -55,60 +58,65 @@ defmodule ElixirEmailReplyParser.Parser do
# In order to ensure that these fragments are split correctly,
# make sure that all lines of underscores are preceded by
# at least two newline characters.
@spec draw_away_lines_with_underscores(String.t) :: String.t
@spec draw_away_lines_with_underscores(String.t()) :: String.t()
defp draw_away_lines_with_underscores(s) do
Regex.replace(~R/([^\n])(?=\n_{7}_+)$/m, s, "\\1\n")
Regex.replace(~r/([^\n])(?=\n_{7}_+)$/m, s, "\\1\n")
end

# Some users may write directly above signature markers
# In order to ensure that these fragments are split correctly,
# make sure that all lines with signature markers are preceded by
# at least two newline characters.
@spec draw_away_signatures(String.t) :: String.t
@spec draw_away_signatures(String.t()) :: String.t()
defp draw_away_signatures(s) do
Regex.replace(~R/([^\n])(?=\n-{2,}\s*\n)$/m, s, "\\1\n")
Regex.replace(~r/([^\n])(?=\n-{2,}\s*\n)$/m, s, "\\1\n")
end

@spec string_empty?(String.t) :: boolean
@spec string_empty?(String.t()) :: boolean
defp string_empty?(s) do
String.trim(s) == ""
end

@spec match_at_least_one_regex?(String.t, [Regex.t]) :: boolean
@spec match_at_least_one_regex?(String.t(), [Regex.t()]) :: boolean
defp match_at_least_one_regex?(s, regexes)
defp match_at_least_one_regex?(_, []), do: false
defp match_at_least_one_regex?(s, [head | tail]), do: (Regex.match?(head, s) or match_at_least_one_regex?(s, tail))

@spec string_signature?(String.t) :: boolean
defp match_at_least_one_regex?(s, [head | tail]),
do: Regex.match?(head, s) or match_at_least_one_regex?(s, tail)

@spec string_signature?(String.t()) :: boolean
defp string_signature?(s) do
match_at_least_one_regex?(s, [
~R/(^\s*--|^\s*__|^-\w)|(^Sent from my ([a-zA-Z0-9_-]+\s*){1,3})\.?$/,
~R/^Diese Nachricht wurde von mein.* gesendet\.?$/,
~R/^Von mein.* gesendet\.?$/,
~R/^Gesendet von mein.* ([a-zA-Z0-9_-]+\s*){1,3}\.?$/,
~R"^Get Outlook for (iOS|Android) <https?://[a-z0-9.-]+[a-zA-Z0-9/.,_:;#?%!@$&'()*+~=-]*>$",
~R"^Outlook für (iOS|Android) beziehen <https?://[a-z0-9.-]+[a-zA-Z0-9/.,_:;#?%!@$&'()*+~=-]*>$"])
~r/(^\s*--|^\s*__|^-\w)|(^Sent from my ([a-zA-Z0-9_-]+\s*){1,3})\.?$/,
~r/^Diese Nachricht wurde von mein.* gesendet\.?$/,
~r/^Von mein.* gesendet\.?$/,
~r/^Gesendet von mein.* ([a-zA-Z0-9_-]+\s*){1,3}\.?$/,
~r"^Get Outlook for (iOS|Android) <https?://[a-z0-9.-]+[a-zA-Z0-9/.,_:;#?%!@$&'()*+~=-]*>$",
~r"^Outlook für (iOS|Android) beziehen <https?://[a-z0-9.-]+[a-zA-Z0-9/.,_:;#?%!@$&'()*+~=-]*>$"
])
end

@spec string_quoted?(String.t) :: boolean
@spec string_quoted?(String.t()) :: boolean
defp string_quoted?(s) do
Regex.match?(~R/^ *(>+)/, s)
Regex.match?(~r/^ *(>+)/, s)
end

@spec string_quote_header?(String.t) :: boolean
@spec string_quote_header?(String.t()) :: boolean
defp string_quote_header?(s) do
match_at_least_one_regex?(s, [
~R/On.*wrote:$/,
~R/^.+schrieb am.+um.+:$/,
~R/^Am.+um.+schrieb.+:$/,
~R/^-{5}Ursprüngliche Nachricht-{5}$/])
~r/On.*wrote:$/,
~r/^.+schrieb am.+um.+:$/,
~r/^Am.+um.+schrieb.+:$/,
~r/^-{5}Ursprüngliche Nachricht-{5}$/
])
end

@spec string_email_header?(String.t) :: boolean
@spec string_email_header?(String.t()) :: boolean
defp string_email_header?(s) do
match_at_least_one_regex?(s, [
~R/^\*?(From|Sent|To|Subject):\*? .+/,
~R/^\*?(Von|Gesendet|An|Betreff):\*? .+/ ])
~r/^\*?(From|Sent|To|Subject):\*? .+/,
~r/^\*?(Von|Gesendet|An|Betreff):\*? .+/
])
end

defp scan_line({nil, fragments, _found_visible}, []) do
Expand Down Expand Up @@ -138,6 +146,7 @@ defmodule ElixirEmailReplyParser.Parser do
fragment.lines
|> Enum.join("\n")
|> String.trim_leading()

%{fragment | content: content, lines: nil}
end

Expand All @@ -155,10 +164,19 @@ defmodule ElixirEmailReplyParser.Parser do
end

defp hide_hidden({_fragment, _fragments, true = _found_visible} = parameters), do: parameters
defp hide_hidden({%{quoted: true} = fragment, fragments, false}), do: {%{fragment | hidden: true}, fragments, false}
defp hide_hidden({%{headers: true} = fragment, fragments, false}), do: {%{fragment | hidden: true}, fragments, false}
defp hide_hidden({%{signature: true} = fragment, fragments, false}), do: {%{fragment | hidden: true}, fragments, false}
defp hide_hidden({%{content: ""} = fragment, fragments, false}), do: {%{fragment | hidden: true}, fragments, false}

defp hide_hidden({%{quoted: true} = fragment, fragments, false}),
do: {%{fragment | hidden: true}, fragments, false}

defp hide_hidden({%{headers: true} = fragment, fragments, false}),
do: {%{fragment | hidden: true}, fragments, false}

defp hide_hidden({%{signature: true} = fragment, fragments, false}),
do: {%{fragment | hidden: true}, fragments, false}

defp hide_hidden({%{content: ""} = fragment, fragments, false}),
do: {%{fragment | hidden: true}, fragments, false}

defp hide_hidden({fragment, fragments, false}), do: {fragment, fragments, true}

defp add_fragment({fragment, fragments, found_visible}) do
Expand All @@ -171,6 +189,7 @@ defmodule ElixirEmailReplyParser.Parser do

defp finish_fragment({fragment, fragments, found_visible}) do
fragment = consolidate_lines(fragment)

{fragment, fragments, found_visible}
|> hide_headers
|> hide_hidden
Expand All @@ -186,9 +205,11 @@ defmodule ElixirEmailReplyParser.Parser do
end

defp check_signature(parameters, line_is_empty, previous_line_is_signature)
defp check_signature(parameters, false , _), do: parameters
defp check_signature(parameters, false, _), do: parameters
defp check_signature(parameters, true, false), do: parameters
defp check_signature({fragment, fragments, found_visible}, true, true), do: finish_fragment({mark_as_signature(fragment), fragments, found_visible})

defp check_signature({fragment, fragments, found_visible}, true, true),
do: finish_fragment({mark_as_signature(fragment), fragments, found_visible})

defp add_line_to_fragment({fragment, fragments, found_visible}, line) do
fragment = %{fragment | lines: [line | fragment.lines]}
Expand All @@ -197,14 +218,27 @@ defmodule ElixirEmailReplyParser.Parser do

defp make_new_fragment({fragment, fragments, found_visible}, line, is_quoted, is_header) do
{_fragment, fragments, found_visible} = finish_fragment({fragment, fragments, found_visible})
fragment = %ElixirEmailReplyParser.Fragment{lines: [line], quoted: is_quoted, headers: is_header}

fragment = %ElixirEmailReplyParser.Fragment{
lines: [line],
quoted: is_quoted,
headers: is_header
}

{fragment, fragments, found_visible}
end

defp process_line(parameters, line, is_quoted, is_header, is_quote_header, is_empty)
defp process_line({nil, _f, _fv} = p, l, q, h, _qh , _e), do: make_new_fragment(p, l, q, h)
defp process_line({%{headers: h, quoted: q}, _f, _fv} = p, l, q, h, _qh, _e), do: add_line_to_fragment(p, l)
defp process_line({%{quoted: true}, _f, _fv} = p, l, _q, _h, true, _e), do: add_line_to_fragment(p, l)
defp process_line({%{quoted: true}, _f, _fv} = p, l, _q, _h, _qh, true), do: add_line_to_fragment(p, l)
defp process_line({nil, _f, _fv} = p, l, q, h, _qh, _e), do: make_new_fragment(p, l, q, h)

defp process_line({%{headers: h, quoted: q}, _f, _fv} = p, l, q, h, _qh, _e),
do: add_line_to_fragment(p, l)

defp process_line({%{quoted: true}, _f, _fv} = p, l, _q, _h, true, _e),
do: add_line_to_fragment(p, l)

defp process_line({%{quoted: true}, _f, _fv} = p, l, _q, _h, _qh, true),
do: add_line_to_fragment(p, l)

defp process_line(p, l, q, h, _qh, _e), do: make_new_fragment(p, l, q, h)
end
37 changes: 20 additions & 17 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,21 @@ defmodule ElixirEmailReplyParser.Mixfile do
use Mix.Project

def project do
[app: :elixir_email_reply_parser,
version: "0.1.2",
description: description(),
elixir: "~> 1.3",
build_embedded: Mix.env == :prod,
start_permanent: Mix.env == :prod,
package: package(),
deps: deps(),
[
app: :elixir_email_reply_parser,
version: "0.1.2",
description: description(),
elixir: "~> 1.9",
build_embedded: Mix.env() == :prod,
start_permanent: Mix.env() == :prod,
package: package(),
deps: deps(),

# Docs
name: "Elixir Email Reply Parser",
source_url: "https://github.com/hellogustav/elixir_email_reply_parser",
docs: [main: "readme",
extras: ["README.md", "LICENSE.md"]]]
# Docs
name: "Elixir Email Reply Parser",
source_url: "https://github.com/hellogustav/elixir_email_reply_parser",
docs: [main: "readme", extras: ["README.md", "LICENSE.md"]]
]
end

# Configuration for the OTP application
Expand All @@ -36,10 +37,12 @@ defmodule ElixirEmailReplyParser.Mixfile do
end

defp package do
[name: :elixir_email_reply_parser,
maintainers: ["[email protected]"],
licenses: ["MIT"],
links: %{"GitHub" => "https://github.com/hellogustav/elixir_email_reply_parser"}]
[
name: :elixir_email_reply_parser,
maintainers: ["[email protected]"],
licenses: ["MIT"],
links: %{"GitHub" => "https://github.com/hellogustav/elixir_email_reply_parser"}
]
end

defp deps do
Expand Down
Loading