improve msg sanitization (#280)

This commit is contained in:
Clément DOUIN 2022-02-01 19:27:44 +01:00
parent 8919d03c60
commit ee86afe756
No known key found for this signature in database
GPG key ID: 353E4A18EE0FAB72

View file

@ -100,25 +100,41 @@ impl Msg {
.tags(HashSet::default())
.clean(&html)
.to_string();
// Replace ` ` by regular space
let sanitized_html = Regex::new(r" ")
// Merge new line chars
let sanitized_html = Regex::new(r"(\r?\n\s*){2,}")
.unwrap()
.replace_all(&sanitized_html, "\n\n")
.to_string();
// Replace tabulations and &npsp; by spaces
let sanitized_html = Regex::new(r"(\t| )")
.unwrap()
.replace_all(&sanitized_html, " ")
.to_string();
// Merge new line chars
let sanitized_html = Regex::new(r"(\r?\n[\t ]*){2,}")
// Merge spaces
let sanitized_html = Regex::new(r" {2,}")
.unwrap()
.replace_all(&sanitized_html, "\n\n")
.replace_all(&sanitized_html, " ")
.to_string();
// Decode HTML entities
let sanitized_html = html_escape::decode_html_entities(&sanitized_html).to_string();
sanitized_html
} else {
let sanitized_plain = Regex::new(r"(\r?\n[\t ]*){2,}")
// Merge new line chars
let sanitized_plain = Regex::new(r"(\r?\n\s*){2,}")
.unwrap()
.replace_all(&plain, "\n\n")
.to_string();
// Replace tabulations by spaces
let sanitized_plain = Regex::new(r"\t")
.unwrap()
.replace_all(&sanitized_plain, " ")
.to_string();
// Merge spaces
let sanitized_plain = Regex::new(r" {2,}")
.unwrap()
.replace_all(&sanitized_plain, " ")
.to_string();
sanitized_plain
}