improve text parts (#221)

This commit is contained in:
Clément DOUIN 2021-10-11 23:04:33 +02:00 committed by GitHub
parent 284929d5dc
commit d21778c35e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 127 additions and 177 deletions

23
Cargo.lock generated
View file

@ -369,7 +369,7 @@ dependencies = [
"chrono",
"clap",
"env_logger",
"htmlescape",
"html-escape",
"imap",
"imap-proto",
"lettre",
@ -400,6 +400,15 @@ dependencies = [
"winapi",
]
[[package]]
name = "html-escape"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "816ea801a95538fc5f53c836697b3f8b64a9d664c4f0b91efe1fe7c92e4dbcb7"
dependencies = [
"utf8-width",
]
[[package]]
name = "html5ever"
version = "0.25.1"
@ -414,12 +423,6 @@ dependencies = [
"syn",
]
[[package]]
name = "htmlescape"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163"
[[package]]
name = "httpdate"
version = "1.0.1"
@ -1388,6 +1391,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8-width"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7cf7d77f457ef8dfa11e4cd5933c5ddb5dc52a94664071951219a97710f0a32b"
[[package]]
name = "uuid"
version = "0.8.2"

View file

@ -12,7 +12,7 @@ atty = "0.2.14"
chrono = "0.4.19"
clap = { version = "2.33.3", default-features = false, features = ["suggestions", "color"] }
env_logger = "0.8.3"
htmlescape = "0.3.1"
html-escape = "0.2.9"
imap = "3.0.0-alpha.4"
imap-proto = "0.14.3"
# This commit includes the de/serialization of the ContentType

View file

@ -15,7 +15,7 @@ type Seq<'a> = &'a str;
type PageSize = usize;
type Page = usize;
type Mbox<'a> = Option<&'a str>;
type Mime = String;
type TextMime<'a> = &'a str;
type Raw = bool;
type All = bool;
type RawMsg<'a> = &'a str;
@ -30,7 +30,7 @@ pub enum Command<'a> {
Forward(Seq<'a>, AttachmentsPaths<'a>),
List(Option<PageSize>, Page),
Move(Seq<'a>, Mbox<'a>),
Read(Seq<'a>, Mime, Raw),
Read(Seq<'a>, TextMime<'a>, Raw),
Reply(Seq<'a>, All, AttachmentsPaths<'a>),
Save(Mbox<'a>, RawMsg<'a>),
Search(Query, Option<PageSize>, Page),
@ -103,8 +103,8 @@ pub fn matches<'a>(m: &'a ArgMatches) -> Result<Option<Command<'a>>> {
debug!("read command matched");
let seq = m.value_of("seq").unwrap();
trace!("seq: {}", seq);
let mime = format!("text/{}", m.value_of("mime-type").unwrap());
trace!("mime: {}", mime);
let mime = m.value_of("mime-type").unwrap();
trace!("text mime: {}", mime);
let raw = m.is_present("raw");
trace!("raw: {}", raw);
return Ok(Some(Command::Read(seq, mime, raw)));

View file

@ -1,24 +1,24 @@
use ammonia;
use anyhow::{anyhow, Context, Error, Result};
use chrono::{DateTime, FixedOffset};
use htmlescape;
use html_escape;
use imap::types::Flag;
use lettre::message::{Attachment, MultiPart, SinglePart};
use regex::Regex;
use rfc2047_decoder;
use serde::Serialize;
use std::{
collections::HashSet,
convert::{TryFrom, TryInto},
fmt, fs,
fs,
path::PathBuf,
};
use crate::{
config::Account,
config::{Account, DEFAULT_SIG_DELIM},
domain::{
imap::ImapServiceInterface,
mbox::Mbox,
msg::{msg_utils, Flags, Parts, TextHtmlPart, TextPlainPart, Tpl, TplOverride},
msg::{msg_utils, BinaryPart, Flags, Part, Parts, TextPlainPart, Tpl, TplOverride},
smtp::SmtpServiceInterface,
},
output::OutputServiceInterface,
@ -28,8 +28,6 @@ use crate::{
},
};
use super::{BinaryPart, Part};
type Addr = lettre::message::Mailbox;
/// Representation of a message.
@ -66,39 +64,66 @@ impl Msg {
self.parts
.iter()
.filter_map(|part| match part {
Part::Binary(part) => Some(part.clone()),
Part::Binary(part) => Some(part.to_owned()),
_ => None,
})
.collect()
}
pub fn join_text_plain_parts(&self) -> String {
let text_parts = self
.parts
.iter()
.filter_map(|part| match part {
Part::TextPlain(part) => Some(part.content.to_owned()),
_ => None,
})
.collect::<Vec<_>>()
.join("\n\n");
let text_parts = ammonia::Builder::new()
.tags(Default::default())
.clean(&text_parts)
.to_string();
let text_parts = match htmlescape::decode_html(&text_parts) {
Ok(text_parts) => text_parts,
Err(_) => text_parts,
};
text_parts
/// Fold string body from all plain text parts into a single string body. If no plain text
/// parts are found, HTML parts are used instead. The result is sanitized (all HTML markup is
/// removed).
pub fn fold_text_plain_parts(&self) -> String {
let (plain, html) = self.parts.iter().fold(
(String::default(), String::default()),
|(mut plain, mut html), part| {
match part {
Part::TextPlain(part) => {
let glue = if plain.is_empty() { "" } else { "\n\n" };
plain.push_str(glue);
plain.push_str(&part.content);
}
Part::TextHtml(part) => {
let glue = if html.is_empty() { "" } else { "\n\n" };
html.push_str(glue);
html.push_str(&part.content);
}
_ => (),
};
(plain, html)
},
);
if plain.is_empty() {
// Remove HTML markup
let sanitized_html = ammonia::Builder::new()
.tags(HashSet::default())
.clean(&html)
.to_string();
// Replace `&nbsp;` by regular space
let sanitized_html = Regex::new(r"&nbsp;")
.unwrap()
.replace_all(&sanitized_html, " ")
.to_string();
// Merge new line chars
let sanitized_html = Regex::new(r"(\r?\n ?){2,}")
.unwrap()
.replace_all(&sanitized_html, "\n\n")
.to_string();
// Decode HTML entities
let sanitized_html = html_escape::decode_html_entities(&sanitized_html).to_string();
sanitized_html
} else {
plain
}
}
pub fn join_text_html_parts(&self) -> String {
/// Fold string body from all HTML parts into a single string body.
fn fold_text_html_parts(&self) -> String {
let text_parts = self
.parts
.iter()
.filter_map(|part| match part {
Part::TextPlain(part) => Some(part.content.to_owned()),
Part::TextHtml(part) => Some(part.content.to_owned()),
_ => None,
})
.collect::<Vec<_>>()
@ -110,12 +135,13 @@ impl Msg {
text_parts
}
pub fn join_text_parts(&self) -> String {
let text_parts = self.join_text_plain_parts();
if text_parts.is_empty() {
self.join_text_html_parts()
/// Fold string body from all text parts into a single string body. The mime allows users to
/// choose between plain text parts and html text parts.
pub fn fold_text_parts(&self, text_mime: &str) -> String {
if text_mime == "html" {
self.fold_text_html_parts()
} else {
text_parts
self.fold_text_plain_parts()
}
}
@ -161,7 +187,7 @@ impl Msg {
self.subject = format!("Re: {}", self.subject);
}
// Text plain parts
// Body
let plain_content = {
let date = self
.date
@ -178,8 +204,8 @@ impl Msg {
let mut content = format!("\n\nOn {}, {} wrote:\n", date, sender);
let mut glue = "";
for line in self.join_text_plain_parts().trim().lines() {
if line == "-- \n" {
for line in self.fold_text_parts("plain").trim().lines() {
if line == DEFAULT_SIG_DELIM {
break;
}
content.push_str(glue);
@ -192,50 +218,7 @@ impl Msg {
content
};
// Text HTML parts
let html_content = {
let date = self
.date
.as_ref()
.map(|date| date.format("%d %b %Y, at %H:%M").to_string())
.unwrap_or("unknown date".into());
let sender = self
.reply_to
.as_ref()
.or(self.from.as_ref())
.and_then(|addrs| addrs.first())
.map(|addr| addr.name.to_owned().unwrap_or(addr.email.to_string()))
.unwrap_or("unknown sender".into());
let mut content = format!("\n\nOn {}, {} wrote:\n", date, sender);
let mut glue = "";
for line in self.join_text_html_parts().trim().lines() {
if line == "-- \n" {
break;
}
content.push_str(glue);
content.push_str(">");
content.push_str(if line.starts_with(">") { "" } else { " " });
content.push_str(line);
glue = "\n";
}
content
};
self.parts = Parts::default();
if !plain_content.is_empty() {
self.parts.push(Part::TextPlain(TextPlainPart {
content: plain_content,
}));
}
if !html_content.is_empty() {
self.parts.push(Part::TextHtml(TextHtmlPart {
content: html_content,
}));
}
self.parts = Parts(vec![Part::new_text_plain(plain_content)]);
Ok(self)
}
@ -271,73 +254,37 @@ impl Msg {
self.subject = format!("Fwd: {}", self.subject);
}
// Text plain parts
{
let mut content = String::default();
content.push_str("\n\n-------- Forwarded Message --------\n");
content.push_str(&format!("Subject: {}\n", prev_subject));
if let Some(date) = prev_date {
content.push_str(&format!("Date: {}\n", date.to_rfc2822()));
}
if let Some(addrs) = prev_from.as_ref() {
content.push_str("From: ");
let mut glue = "";
for addr in addrs {
content.push_str(glue);
content.push_str(&addr.to_string());
glue = ", ";
}
content.push_str("\n");
}
if let Some(addrs) = prev_to.as_ref() {
content.push_str("To: ");
let mut glue = "";
for addr in addrs {
content.push_str(glue);
content.push_str(&addr.to_string());
glue = ", ";
}
content.push_str("\n");
// Body
let mut content = String::default();
content.push_str("\n\n-------- Forwarded Message --------\n");
content.push_str(&format!("Subject: {}\n", prev_subject));
if let Some(date) = prev_date {
content.push_str(&format!("Date: {}\n", date.to_rfc2822()));
}
if let Some(addrs) = prev_from.as_ref() {
content.push_str("From: ");
let mut glue = "";
for addr in addrs {
content.push_str(glue);
content.push_str(&addr.to_string());
glue = ", ";
}
content.push_str("\n");
content.push_str(&self.join_text_plain_parts());
self.parts
.replace_text_plain_parts_with(TextPlainPart { content })
}
// Text HTML parts
{
let mut content = String::default();
content.push_str("\n\n-------- Forwarded Message --------\n");
content.push_str(&format!("Subject: {}\n", prev_subject));
if let Some(date) = prev_date {
content.push_str(&format!("Date: {}\n", date.to_rfc2822()));
}
if let Some(addrs) = prev_from.as_ref() {
content.push_str("From: ");
let mut glue = "";
for addr in addrs {
content.push_str(glue);
content.push_str(&addr.to_string());
glue = ", ";
}
content.push_str("\n");
}
if let Some(addrs) = prev_to.as_ref() {
content.push_str("To: ");
let mut glue = "";
for addr in addrs {
content.push_str(glue);
content.push_str(&addr.to_string());
glue = ", ";
}
content.push_str("\n");
if let Some(addrs) = prev_to.as_ref() {
content.push_str("To: ");
let mut glue = "";
for addr in addrs {
content.push_str(glue);
content.push_str(&addr.to_string());
glue = ", ";
}
content.push_str("\n");
content.push_str(&self.join_text_html_parts());
self.parts
.replace_text_html_parts_with(TextHtmlPart { content })
}
content.push_str("\n");
content.push_str(&self.fold_text_parts("plain"));
self.parts
.replace_text_plain_parts_with(TextPlainPart { content });
Ok(self)
}
@ -628,7 +575,7 @@ impl TryInto<lettre::Message> for &Msg {
};
let mut multipart =
MultiPart::mixed().singlepart(SinglePart::plain(self.join_text_plain_parts()));
MultiPart::mixed().singlepart(SinglePart::plain(self.fold_text_plain_parts()));
for part in self.attachments() {
let filename = part.filename;
@ -803,12 +750,3 @@ pub fn parse_some_addrs(addrs: &Option<Vec<imap_proto::Address>>) -> Result<Opti
None => None,
})
}
#[derive(Debug, Serialize)]
pub struct PrintableMsg(pub String);
impl fmt::Display for PrintableMsg {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "{}", self.0)
}
}

View file

@ -25,8 +25,6 @@ use crate::{
output::OutputServiceInterface,
};
use super::PrintableMsg;
/// Download all attachments from the given message sequence number to the user account downloads
/// directory.
pub fn attachments<OutputService: OutputServiceInterface, ImapService: ImapServiceInterface>(
@ -203,19 +201,18 @@ pub fn move_<OutputService: OutputServiceInterface, ImapService: ImapServiceInte
/// Read a message by its sequence number.
pub fn read<OutputService: OutputServiceInterface, ImapService: ImapServiceInterface>(
seq: &str,
// TODO: use the mime to select the right body
_mime: String,
text_mime: &str,
raw: bool,
output: &OutputService,
imap: &mut ImapService,
) -> Result<()> {
if raw {
let msg = String::from_utf8(imap.find_raw_msg(&seq)?)?;
output.print(PrintableMsg(msg))
let msg = if raw {
String::from_utf8(imap.find_raw_msg(&seq)?)?
} else {
let msg = imap.find_msg(&seq)?.join_text_parts();
output.print(PrintableMsg(msg))
}
imap.find_msg(&seq)?.fold_text_parts(text_mime)
};
output.print(msg)
}
/// Reply to the given message UID.

View file

@ -27,6 +27,12 @@ pub enum Part {
Binary(BinaryPart),
}
impl Part {
pub fn new_text_plain(content: String) -> Self {
Self::TextPlain(TextPlainPart { content })
}
}
#[derive(Debug, Clone, Default, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct Parts(pub Vec<Part>);

View file

@ -83,7 +83,7 @@ impl Tpl {
if let Some(body) = opts.body {
tpl.push_str(body);
} else {
tpl.push_str(&msg.join_text_plain_parts())
tpl.push_str(&msg.fold_text_plain_parts())
}
// Signature

View file

@ -111,8 +111,8 @@ fn main() -> Result<()> {
Some(msg_arg::Command::Move(seq, target)) => {
return msg_handler::move_(seq, target, &output, &mut imap);
}
Some(msg_arg::Command::Read(seq, mime, raw)) => {
return msg_handler::read(seq, mime, raw, &output, &mut imap);
Some(msg_arg::Command::Read(seq, text_mime, raw)) => {
return msg_handler::read(seq, text_mime, raw, &output, &mut imap);
}
Some(msg_arg::Command::Reply(seq, all, atts)) => {
return msg_handler::reply(seq, all, atts, &account, &output, &mut imap, &mut smtp);