From d21778c35ed2cae3a0f9fc43bf818ed68c2d837e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20DOUIN?= Date: Mon, 11 Oct 2021 23:04:33 +0200 Subject: [PATCH] improve text parts (#221) --- Cargo.lock | 23 +++- Cargo.toml | 2 +- src/domain/msg/msg_arg.rs | 8 +- src/domain/msg/msg_entity.rs | 242 ++++++++++++--------------------- src/domain/msg/msg_handler.rs | 17 +-- src/domain/msg/parts_entity.rs | 6 + src/domain/msg/tpl_entity.rs | 2 +- src/main.rs | 4 +- 8 files changed, 127 insertions(+), 177 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 586a7af..314ee92 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -369,7 +369,7 @@ dependencies = [ "chrono", "clap", "env_logger", - "htmlescape", + "html-escape", "imap", "imap-proto", "lettre", @@ -400,6 +400,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "html-escape" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "816ea801a95538fc5f53c836697b3f8b64a9d664c4f0b91efe1fe7c92e4dbcb7" +dependencies = [ + "utf8-width", +] + [[package]] name = "html5ever" version = "0.25.1" @@ -414,12 +423,6 @@ dependencies = [ "syn", ] -[[package]] -name = "htmlescape" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" - [[package]] name = "httpdate" version = "1.0.1" @@ -1388,6 +1391,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8-width" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cf7d77f457ef8dfa11e4cd5933c5ddb5dc52a94664071951219a97710f0a32b" + [[package]] name = "uuid" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index 1c27147..9612fe2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ atty = "0.2.14" chrono = "0.4.19" clap = { version = "2.33.3", default-features = false, features = ["suggestions", "color"] } env_logger = "0.8.3" -htmlescape = "0.3.1" +html-escape = "0.2.9" imap = "3.0.0-alpha.4" imap-proto = "0.14.3" # This commit includes the de/serialization of the ContentType diff --git a/src/domain/msg/msg_arg.rs b/src/domain/msg/msg_arg.rs index b71d47f..e22de89 100644 --- a/src/domain/msg/msg_arg.rs +++ b/src/domain/msg/msg_arg.rs @@ -15,7 +15,7 @@ type Seq<'a> = &'a str; type PageSize = usize; type Page = usize; type Mbox<'a> = Option<&'a str>; -type Mime = String; +type TextMime<'a> = &'a str; type Raw = bool; type All = bool; type RawMsg<'a> = &'a str; @@ -30,7 +30,7 @@ pub enum Command<'a> { Forward(Seq<'a>, AttachmentsPaths<'a>), List(Option, Page), Move(Seq<'a>, Mbox<'a>), - Read(Seq<'a>, Mime, Raw), + Read(Seq<'a>, TextMime<'a>, Raw), Reply(Seq<'a>, All, AttachmentsPaths<'a>), Save(Mbox<'a>, RawMsg<'a>), Search(Query, Option, Page), @@ -103,8 +103,8 @@ pub fn matches<'a>(m: &'a ArgMatches) -> Result>> { debug!("read command matched"); let seq = m.value_of("seq").unwrap(); trace!("seq: {}", seq); - let mime = format!("text/{}", m.value_of("mime-type").unwrap()); - trace!("mime: {}", mime); + let mime = m.value_of("mime-type").unwrap(); + trace!("text mime: {}", mime); let raw = m.is_present("raw"); trace!("raw: {}", raw); return Ok(Some(Command::Read(seq, mime, raw))); diff --git a/src/domain/msg/msg_entity.rs b/src/domain/msg/msg_entity.rs index 6a98267..1d84d83 100644 --- a/src/domain/msg/msg_entity.rs +++ b/src/domain/msg/msg_entity.rs @@ -1,24 +1,24 @@ use ammonia; use anyhow::{anyhow, Context, Error, Result}; use chrono::{DateTime, FixedOffset}; -use htmlescape; +use html_escape; use imap::types::Flag; use lettre::message::{Attachment, MultiPart, SinglePart}; use regex::Regex; use rfc2047_decoder; -use serde::Serialize; use std::{ + collections::HashSet, convert::{TryFrom, TryInto}, - fmt, fs, + fs, path::PathBuf, }; use crate::{ - config::Account, + config::{Account, DEFAULT_SIG_DELIM}, domain::{ imap::ImapServiceInterface, mbox::Mbox, - msg::{msg_utils, Flags, Parts, TextHtmlPart, TextPlainPart, Tpl, TplOverride}, + msg::{msg_utils, BinaryPart, Flags, Part, Parts, TextPlainPart, Tpl, TplOverride}, smtp::SmtpServiceInterface, }, output::OutputServiceInterface, @@ -28,8 +28,6 @@ use crate::{ }, }; -use super::{BinaryPart, Part}; - type Addr = lettre::message::Mailbox; /// Representation of a message. @@ -66,39 +64,66 @@ impl Msg { self.parts .iter() .filter_map(|part| match part { - Part::Binary(part) => Some(part.clone()), + Part::Binary(part) => Some(part.to_owned()), _ => None, }) .collect() } - pub fn join_text_plain_parts(&self) -> String { - let text_parts = self - .parts - .iter() - .filter_map(|part| match part { - Part::TextPlain(part) => Some(part.content.to_owned()), - _ => None, - }) - .collect::>() - .join("\n\n"); - let text_parts = ammonia::Builder::new() - .tags(Default::default()) - .clean(&text_parts) - .to_string(); - let text_parts = match htmlescape::decode_html(&text_parts) { - Ok(text_parts) => text_parts, - Err(_) => text_parts, - }; - text_parts + /// Fold string body from all plain text parts into a single string body. If no plain text + /// parts are found, HTML parts are used instead. The result is sanitized (all HTML markup is + /// removed). + pub fn fold_text_plain_parts(&self) -> String { + let (plain, html) = self.parts.iter().fold( + (String::default(), String::default()), + |(mut plain, mut html), part| { + match part { + Part::TextPlain(part) => { + let glue = if plain.is_empty() { "" } else { "\n\n" }; + plain.push_str(glue); + plain.push_str(&part.content); + } + Part::TextHtml(part) => { + let glue = if html.is_empty() { "" } else { "\n\n" }; + html.push_str(glue); + html.push_str(&part.content); + } + _ => (), + }; + (plain, html) + }, + ); + if plain.is_empty() { + // Remove HTML markup + let sanitized_html = ammonia::Builder::new() + .tags(HashSet::default()) + .clean(&html) + .to_string(); + // Replace ` ` by regular space + let sanitized_html = Regex::new(r" ") + .unwrap() + .replace_all(&sanitized_html, " ") + .to_string(); + // Merge new line chars + let sanitized_html = Regex::new(r"(\r?\n ?){2,}") + .unwrap() + .replace_all(&sanitized_html, "\n\n") + .to_string(); + // Decode HTML entities + let sanitized_html = html_escape::decode_html_entities(&sanitized_html).to_string(); + sanitized_html + } else { + plain + } } - pub fn join_text_html_parts(&self) -> String { + /// Fold string body from all HTML parts into a single string body. + fn fold_text_html_parts(&self) -> String { let text_parts = self .parts .iter() .filter_map(|part| match part { - Part::TextPlain(part) => Some(part.content.to_owned()), + Part::TextHtml(part) => Some(part.content.to_owned()), _ => None, }) .collect::>() @@ -110,12 +135,13 @@ impl Msg { text_parts } - pub fn join_text_parts(&self) -> String { - let text_parts = self.join_text_plain_parts(); - if text_parts.is_empty() { - self.join_text_html_parts() + /// Fold string body from all text parts into a single string body. The mime allows users to + /// choose between plain text parts and html text parts. + pub fn fold_text_parts(&self, text_mime: &str) -> String { + if text_mime == "html" { + self.fold_text_html_parts() } else { - text_parts + self.fold_text_plain_parts() } } @@ -161,7 +187,7 @@ impl Msg { self.subject = format!("Re: {}", self.subject); } - // Text plain parts + // Body let plain_content = { let date = self .date @@ -178,8 +204,8 @@ impl Msg { let mut content = format!("\n\nOn {}, {} wrote:\n", date, sender); let mut glue = ""; - for line in self.join_text_plain_parts().trim().lines() { - if line == "-- \n" { + for line in self.fold_text_parts("plain").trim().lines() { + if line == DEFAULT_SIG_DELIM { break; } content.push_str(glue); @@ -192,50 +218,7 @@ impl Msg { content }; - // Text HTML parts - let html_content = { - let date = self - .date - .as_ref() - .map(|date| date.format("%d %b %Y, at %H:%M").to_string()) - .unwrap_or("unknown date".into()); - let sender = self - .reply_to - .as_ref() - .or(self.from.as_ref()) - .and_then(|addrs| addrs.first()) - .map(|addr| addr.name.to_owned().unwrap_or(addr.email.to_string())) - .unwrap_or("unknown sender".into()); - let mut content = format!("\n\nOn {}, {} wrote:\n", date, sender); - - let mut glue = ""; - for line in self.join_text_html_parts().trim().lines() { - if line == "-- \n" { - break; - } - content.push_str(glue); - content.push_str(">"); - content.push_str(if line.starts_with(">") { "" } else { " " }); - content.push_str(line); - glue = "\n"; - } - - content - }; - - self.parts = Parts::default(); - - if !plain_content.is_empty() { - self.parts.push(Part::TextPlain(TextPlainPart { - content: plain_content, - })); - } - - if !html_content.is_empty() { - self.parts.push(Part::TextHtml(TextHtmlPart { - content: html_content, - })); - } + self.parts = Parts(vec![Part::new_text_plain(plain_content)]); Ok(self) } @@ -271,73 +254,37 @@ impl Msg { self.subject = format!("Fwd: {}", self.subject); } - // Text plain parts - { - let mut content = String::default(); - content.push_str("\n\n-------- Forwarded Message --------\n"); - content.push_str(&format!("Subject: {}\n", prev_subject)); - if let Some(date) = prev_date { - content.push_str(&format!("Date: {}\n", date.to_rfc2822())); - } - if let Some(addrs) = prev_from.as_ref() { - content.push_str("From: "); - let mut glue = ""; - for addr in addrs { - content.push_str(glue); - content.push_str(&addr.to_string()); - glue = ", "; - } - content.push_str("\n"); - } - if let Some(addrs) = prev_to.as_ref() { - content.push_str("To: "); - let mut glue = ""; - for addr in addrs { - content.push_str(glue); - content.push_str(&addr.to_string()); - glue = ", "; - } - content.push_str("\n"); + // Body + let mut content = String::default(); + content.push_str("\n\n-------- Forwarded Message --------\n"); + content.push_str(&format!("Subject: {}\n", prev_subject)); + if let Some(date) = prev_date { + content.push_str(&format!("Date: {}\n", date.to_rfc2822())); + } + if let Some(addrs) = prev_from.as_ref() { + content.push_str("From: "); + let mut glue = ""; + for addr in addrs { + content.push_str(glue); + content.push_str(&addr.to_string()); + glue = ", "; } content.push_str("\n"); - content.push_str(&self.join_text_plain_parts()); - self.parts - .replace_text_plain_parts_with(TextPlainPart { content }) } - - // Text HTML parts - { - let mut content = String::default(); - content.push_str("\n\n-------- Forwarded Message --------\n"); - content.push_str(&format!("Subject: {}\n", prev_subject)); - if let Some(date) = prev_date { - content.push_str(&format!("Date: {}\n", date.to_rfc2822())); - } - if let Some(addrs) = prev_from.as_ref() { - content.push_str("From: "); - let mut glue = ""; - for addr in addrs { - content.push_str(glue); - content.push_str(&addr.to_string()); - glue = ", "; - } - content.push_str("\n"); - } - if let Some(addrs) = prev_to.as_ref() { - content.push_str("To: "); - let mut glue = ""; - for addr in addrs { - content.push_str(glue); - content.push_str(&addr.to_string()); - glue = ", "; - } - content.push_str("\n"); + if let Some(addrs) = prev_to.as_ref() { + content.push_str("To: "); + let mut glue = ""; + for addr in addrs { + content.push_str(glue); + content.push_str(&addr.to_string()); + glue = ", "; } content.push_str("\n"); - content.push_str(&self.join_text_html_parts()); - self.parts - .replace_text_html_parts_with(TextHtmlPart { content }) } + content.push_str("\n"); + content.push_str(&self.fold_text_parts("plain")); + self.parts + .replace_text_plain_parts_with(TextPlainPart { content }); Ok(self) } @@ -628,7 +575,7 @@ impl TryInto for &Msg { }; let mut multipart = - MultiPart::mixed().singlepart(SinglePart::plain(self.join_text_plain_parts())); + MultiPart::mixed().singlepart(SinglePart::plain(self.fold_text_plain_parts())); for part in self.attachments() { let filename = part.filename; @@ -803,12 +750,3 @@ pub fn parse_some_addrs(addrs: &Option>) -> Result None, }) } - -#[derive(Debug, Serialize)] -pub struct PrintableMsg(pub String); - -impl fmt::Display for PrintableMsg { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "{}", self.0) - } -} diff --git a/src/domain/msg/msg_handler.rs b/src/domain/msg/msg_handler.rs index 7d999e6..154bbfe 100644 --- a/src/domain/msg/msg_handler.rs +++ b/src/domain/msg/msg_handler.rs @@ -25,8 +25,6 @@ use crate::{ output::OutputServiceInterface, }; -use super::PrintableMsg; - /// Download all attachments from the given message sequence number to the user account downloads /// directory. pub fn attachments( @@ -203,19 +201,18 @@ pub fn move_( seq: &str, - // TODO: use the mime to select the right body - _mime: String, + text_mime: &str, raw: bool, output: &OutputService, imap: &mut ImapService, ) -> Result<()> { - if raw { - let msg = String::from_utf8(imap.find_raw_msg(&seq)?)?; - output.print(PrintableMsg(msg)) + let msg = if raw { + String::from_utf8(imap.find_raw_msg(&seq)?)? } else { - let msg = imap.find_msg(&seq)?.join_text_parts(); - output.print(PrintableMsg(msg)) - } + imap.find_msg(&seq)?.fold_text_parts(text_mime) + }; + + output.print(msg) } /// Reply to the given message UID. diff --git a/src/domain/msg/parts_entity.rs b/src/domain/msg/parts_entity.rs index b183747..cc8d6e1 100644 --- a/src/domain/msg/parts_entity.rs +++ b/src/domain/msg/parts_entity.rs @@ -27,6 +27,12 @@ pub enum Part { Binary(BinaryPart), } +impl Part { + pub fn new_text_plain(content: String) -> Self { + Self::TextPlain(TextPlainPart { content }) + } +} + #[derive(Debug, Clone, Default, Serialize)] #[serde(rename_all = "camelCase")] pub struct Parts(pub Vec); diff --git a/src/domain/msg/tpl_entity.rs b/src/domain/msg/tpl_entity.rs index 2dc9768..80e2cec 100644 --- a/src/domain/msg/tpl_entity.rs +++ b/src/domain/msg/tpl_entity.rs @@ -83,7 +83,7 @@ impl Tpl { if let Some(body) = opts.body { tpl.push_str(body); } else { - tpl.push_str(&msg.join_text_plain_parts()) + tpl.push_str(&msg.fold_text_plain_parts()) } // Signature diff --git a/src/main.rs b/src/main.rs index 2b28463..830f024 100644 --- a/src/main.rs +++ b/src/main.rs @@ -111,8 +111,8 @@ fn main() -> Result<()> { Some(msg_arg::Command::Move(seq, target)) => { return msg_handler::move_(seq, target, &output, &mut imap); } - Some(msg_arg::Command::Read(seq, mime, raw)) => { - return msg_handler::read(seq, mime, raw, &output, &mut imap); + Some(msg_arg::Command::Read(seq, text_mime, raw)) => { + return msg_handler::read(seq, text_mime, raw, &output, &mut imap); } Some(msg_arg::Command::Reply(seq, all, atts)) => { return msg_handler::reply(seq, all, atts, &account, &output, &mut imap, &mut smtp);