improve maildir id <-> hash mapping

This commit is contained in:
Clément DOUIN 2022-02-27 22:36:09 +01:00
parent c87512dbd4
commit 5b002b1f30
No known key found for this signature in database
GPG key ID: 353E4A18EE0FAB72
8 changed files with 253 additions and 160 deletions

View file

@ -24,7 +24,7 @@ pub trait Backend<'a> {
page_size: usize,
page: usize,
) -> Result<Box<dyn Envelopes>>;
fn find_envelopes(
fn search_envelopes(
&mut self,
mbox: &str,
query: &str,

View file

@ -260,7 +260,7 @@ impl<'a> Backend<'a> for ImapBackend<'a> {
Ok(Box::new(envelopes))
}
fn find_envelopes(
fn search_envelopes(
&mut self,
mbox: &str,
query: &str,

View file

@ -1,11 +1,11 @@
use anyhow::{anyhow, Context, Result};
use std::{
collections::HashSet,
collections::HashMap,
convert::TryInto,
env::temp_dir,
fs::{self, OpenOptions},
io::{BufRead, BufReader, Write},
iter::FromIterator,
ops::{Deref, DerefMut},
path::PathBuf,
};
@ -16,6 +16,134 @@ use crate::{
msg::{Envelopes, Msg},
};
#[derive(Debug, Default)]
pub struct MaildirEnvelopesIdHashMapper {
path: PathBuf,
map: HashMap<String, String>,
short_hash_len: usize,
}
impl MaildirEnvelopesIdHashMapper {
fn get_cache_path(mdir: &maildir::Maildir) -> PathBuf {
let path_digest = md5::compute(format!("{:?}", mdir.path()));
let file_name = format!("himalaya-hash-map-{:x}", path_digest);
temp_dir().join(file_name)
}
pub fn new(mdir: &maildir::Maildir) -> Result<Self> {
let mut mapper = Self::default();
mapper.path = Self::get_cache_path(mdir);
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(&mapper.path)
.context("cannot open id hash map file")?;
let reader = BufReader::new(file);
for line in reader.lines() {
let line =
line.context("cannot read line from maildir envelopes id mapper cache file")?;
if mapper.short_hash_len == 0 {
mapper.short_hash_len = 2.max(line.parse().unwrap_or(2));
} else {
let (hash, id) = line.split_once(' ').ok_or_else(|| {
anyhow!(
"cannot parse line {:?} from maildir envelopes id mapper cache file",
line
)
})?;
mapper.insert(hash.to_owned(), id.to_owned());
}
}
Ok(mapper)
}
pub fn find(&self, short_hash: &str) -> Result<String> {
let matching_hashes: Vec<_> = self
.keys()
.filter(|hash| hash.starts_with(short_hash))
.collect();
if matching_hashes.len() == 0 {
Err(anyhow!(
"cannot find maildir message id from short hash {:?}",
short_hash,
))
} else if matching_hashes.len() > 1 {
Err(anyhow!(
"the short hash {:?} matches more than one hash: {}",
short_hash,
matching_hashes
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
.join(", ")
)
.context(format!(
"cannot find maildir message id from short hash {:?}",
short_hash
)))
} else {
Ok(self.get(matching_hashes[0]).unwrap().to_owned())
}
}
fn append(&mut self, lines: Vec<(String, String)>) -> Result<usize> {
let mut entries = String::new();
self.extend(lines.clone());
for (hash, id) in self.iter() {
entries.push_str(&format!("{} {}\n", hash, id));
}
for (hash, id) in lines {
loop {
let short_hash = &hash[0..self.short_hash_len];
let conflict_found = self
.map
.keys()
.find(|cached_hash| {
cached_hash.starts_with(short_hash) && *cached_hash != &hash
})
.is_some();
if self.short_hash_len > 32 || !conflict_found {
break;
}
self.short_hash_len += 1;
}
entries.push_str(&format!("{} {}\n", hash, id));
}
OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(&self.path)
.context("cannot open maildir id hash map cache")?
.write(format!("{}\n{}", self.short_hash_len, entries).as_bytes())
.context("cannot write maildir id hash map cache")?;
Ok(self.short_hash_len)
}
}
impl Deref for MaildirEnvelopesIdHashMapper {
type Target = HashMap<String, String>;
fn deref(&self) -> &Self::Target {
&self.map
}
}
impl DerefMut for MaildirEnvelopesIdHashMapper {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.map
}
}
pub struct MaildirBackend<'a> {
mdir: maildir::Maildir,
account_config: &'a AccountConfig,
@ -63,48 +191,6 @@ impl<'a> MaildirBackend<'a> {
.map(maildir::Maildir::from)
}
}
fn write_envelopes_cache(cache: &[u8]) -> Result<()> {
OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(temp_dir().join("himalaya-msg-id-hash-map"))
.context("cannot open maildir id hash map cache")?
.write(cache)
.map(|_| ())
.context("cannot write maildir id hash map cache")
}
fn get_id_from_short_hash(short_hash: &str) -> Result<String> {
let path = temp_dir().join("himalaya-msg-id-hash-map");
let file = OpenOptions::new()
.read(true)
.open(path)
.context("cannot open id hash map file")?;
let reader = BufReader::new(file);
let mut id_found = None;
for line in reader.lines() {
let line = line.context("cannot read id hash map line")?;
let line = line
.split_once(' ')
.ok_or_else(|| anyhow!("cannot parse id hash map line {:?}", line));
match line {
Ok((id, hash)) if hash.starts_with(short_hash) => {
if id_found.is_some() {
return Err(anyhow!(
"cannot find id from hash {:?}: multiple match found",
short_hash
));
} else {
id_found = Some(id.to_owned())
}
}
_ => continue,
}
}
id_found.ok_or_else(|| anyhow!("cannot find id from hash {:?}", short_hash))
}
}
impl<'a> Backend<'a> for MaildirBackend<'a> {
@ -125,54 +211,20 @@ impl<'a> Backend<'a> for MaildirBackend<'a> {
fn get_envelopes(
&mut self,
mdir: &str,
mdir_str: &str,
page_size: usize,
page: usize,
) -> Result<Box<dyn Envelopes>> {
let mdir = self.get_mdir_from_name(mdir)?;
let mdir = self.get_mdir_from_name(mdir_str)?;
// Reads envelopes from the "cur" folder of the selected
// maildir.
let mut envelopes: MaildirEnvelopes = mdir
.list_cur()
.try_into()
.context("cannot parse maildir envelopes from {:?}")?;
Self::write_envelopes_cache(
envelopes
.iter()
.map(|env| format!("{} {:x}", env.id, md5::compute(&env.id)))
.collect::<Vec<_>>()
.join("\n")
.as_bytes(),
)?;
envelopes.sort_by(|a, b| b.date.partial_cmp(&a.date).unwrap());
envelopes
.iter_mut()
.for_each(|env| env.id = format!("{:x}", md5::compute(&env.id)));
let mut short_id_len = 2;
loop {
let short_ids: Vec<_> = envelopes
.iter()
.map(|env| env.id[0..short_id_len].to_string())
.collect();
let short_ids_set: HashSet<String> = HashSet::from_iter(short_ids.iter().cloned());
if short_id_len > 32 {
break;
}
if short_ids.len() == short_ids_set.len() {
break;
}
short_id_len += 1;
}
envelopes
.iter_mut()
.for_each(|env| env.id = env.id[0..short_id_len].to_string());
// Calculates pagination boundaries.
let page_begin = page * page_size;
if page_begin > envelopes.len() {
return Err(anyhow!(format!(
@ -181,12 +233,34 @@ impl<'a> Backend<'a> for MaildirBackend<'a> {
)));
}
let page_end = envelopes.len().min(page_begin + page_size);
// Sorts envelopes by most recent date.
envelopes.sort_by(|a, b| b.date.partial_cmp(&a.date).unwrap());
// Applies pagination boundaries.
envelopes.0 = envelopes[page_begin..page_end].to_owned();
// Writes envelope ids and their hashes to a cache file. The
// cache file name is based on the name of the given maildir:
// this way there is one cache per maildir.
let short_hash_len = {
let mut mapper = MaildirEnvelopesIdHashMapper::new(&mdir)?;
let entries = envelopes
.iter()
.map(|env| (env.hash.to_owned(), env.id.to_owned()))
.collect();
mapper.append(entries)?
};
// Shorten envelopes hash.
envelopes
.iter_mut()
.for_each(|env| env.hash = env.hash[0..short_hash_len].to_owned());
Ok(Box::new(envelopes))
}
fn find_envelopes(
fn search_envelopes(
&mut self,
_mdir: &str,
_query: &str,
@ -204,89 +278,112 @@ impl<'a> Backend<'a> for MaildirBackend<'a> {
let flags: MaildirFlags = flags.try_into()?;
let id = mdir
.store_cur_with_flags(msg, &flags.to_string())
.context(format!(
"cannot add message to the \"cur\" folder of maildir {:?}",
mdir.path()
))?;
Ok(Box::new(id))
.context(format!("cannot add maildir message at {:?}", mdir.path()))?;
let hash = format!("{:x}", md5::compute(&id));
// Appends hash line to the maildir cache file.
let mut mapper = MaildirEnvelopesIdHashMapper::new(&mdir)?;
mapper.append(vec![(hash.clone(), id)])?;
Ok(Box::new(hash))
}
fn get_msg(&mut self, mdir: &str, hash: &str) -> Result<Msg> {
fn get_msg(&mut self, mdir: &str, short_hash: &str) -> Result<Msg> {
let mdir = self.get_mdir_from_name(mdir)?;
let id = Self::get_id_from_short_hash(hash)
.context(format!("cannot get msg from hash {:?}", hash))?;
let mut mail_entry = mdir.find(&id).ok_or_else(|| {
anyhow!(
"cannot find maildir message {:?} in {:?}",
hash,
mdir.path()
)
})?;
let id = MaildirEnvelopesIdHashMapper::new(&mdir)?
.find(short_hash)
.context(format!(
"cannot get maildir message from short hash {:?}",
short_hash
))?;
let mut mail_entry = mdir
.find(&id)
.ok_or_else(|| anyhow!("cannot find maildir message {:?} in {:?}", id, mdir.path()))?;
let parsed_mail = mail_entry.parsed().context(format!(
"cannot parse maildir message {:?} in {:?}",
hash,
id,
mdir.path()
))?;
Msg::from_parsed_mail(parsed_mail, self.account_config).context(format!(
"cannot parse maildir message {:?} from {:?}",
hash,
id,
mdir.path()
))
}
fn copy_msg(&mut self, mdir_src: &str, mdir_dst: &str, id: &str) -> Result<()> {
fn copy_msg(&mut self, mdir_src: &str, mdir_dst: &str, short_hash: &str) -> Result<()> {
let mdir_src = self.get_mdir_from_name(mdir_src)?;
let mdir_dst = self.get_mdir_from_name(mdir_dst)?;
mdir_src.copy_to(id, &mdir_dst).context(format!(
let id = MaildirEnvelopesIdHashMapper::new(&mdir_src)?.find(short_hash)?;
mdir_src.copy_to(&id, &mdir_dst).context(format!(
"cannot copy message {:?} from maildir {:?} to maildir {:?}",
id,
mdir_src.path(),
mdir_dst.path()
))
))?;
// Appends hash line to the destination maildir cache file.
MaildirEnvelopesIdHashMapper::new(&mdir_dst)?
.append(vec![(format!("{:x}", md5::compute(&id)), id)])?;
Ok(())
}
fn move_msg(&mut self, mdir_src: &str, mdir_dst: &str, id: &str) -> Result<()> {
fn move_msg(&mut self, mdir_src: &str, mdir_dst: &str, short_hash: &str) -> Result<()> {
let mdir_src = self.get_mdir_from_name(mdir_src)?;
let mdir_dst = self.get_mdir_from_name(mdir_dst)?;
mdir_src.move_to(id, &mdir_dst).context(format!(
let id = MaildirEnvelopesIdHashMapper::new(&mdir_src)?.find(short_hash)?;
mdir_src.move_to(&id, &mdir_dst).context(format!(
"cannot move message {:?} from maildir {:?} to maildir {:?}",
id,
mdir_src.path(),
mdir_dst.path()
))
))?;
// Appends hash line to the destination maildir cache file.
MaildirEnvelopesIdHashMapper::new(&mdir_dst)?
.append(vec![(format!("{:x}", md5::compute(&id)), id)])?;
Ok(())
}
fn del_msg(&mut self, mdir: &str, id: &str) -> Result<()> {
fn del_msg(&mut self, mdir: &str, short_hash: &str) -> Result<()> {
let mdir = self.get_mdir_from_name(mdir)?;
mdir.delete(id).context(format!(
let id = MaildirEnvelopesIdHashMapper::new(&mdir)?.find(short_hash)?;
mdir.delete(&id).context(format!(
"cannot delete message {:?} from maildir {:?}",
id,
mdir.path()
))
}
fn add_flags(&mut self, mdir: &str, id: &str, flags_str: &str) -> Result<()> {
fn add_flags(&mut self, mdir: &str, short_hash: &str, flags_str: &str) -> Result<()> {
let mdir = self.get_mdir_from_name(mdir)?;
let id = MaildirEnvelopesIdHashMapper::new(&mdir)?.find(short_hash)?;
let flags: MaildirFlags = flags_str.try_into()?;
mdir.add_flags(id, &flags.to_string()).context(format!(
mdir.add_flags(&id, &flags.to_string()).context(format!(
"cannot add flags {:?} to maildir message {:?}",
flags_str, id
))
}
fn set_flags(&mut self, mdir: &str, id: &str, flags_str: &str) -> Result<()> {
fn set_flags(&mut self, mdir: &str, short_hash: &str, flags_str: &str) -> Result<()> {
let mdir = self.get_mdir_from_name(mdir)?;
let id = MaildirEnvelopesIdHashMapper::new(&mdir)?.find(short_hash)?;
let flags: MaildirFlags = flags_str.try_into()?;
mdir.set_flags(id, &flags.to_string()).context(format!(
mdir.set_flags(&id, &flags.to_string()).context(format!(
"cannot set flags {:?} to maildir message {:?}",
flags_str, id
))
}
fn del_flags(&mut self, mdir: &str, id: &str, flags_str: &str) -> Result<()> {
fn del_flags(&mut self, mdir: &str, short_hash: &str, flags_str: &str) -> Result<()> {
let mdir = self.get_mdir_from_name(mdir)?;
let id = MaildirEnvelopesIdHashMapper::new(&mdir)?.find(short_hash)?;
let flags: MaildirFlags = flags_str.try_into()?;
mdir.remove_flags(id, &flags.to_string()).context(format!(
mdir.remove_flags(&id, &flags.to_string()).context(format!(
"cannot remove flags {:?} from maildir message {:?}",
flags_str, id
))

View file

@ -56,6 +56,9 @@ pub struct MaildirEnvelope {
/// Represents the id of the message.
pub id: String,
/// Represents the MD5 hash of the message id.
pub hash: String,
/// Represents the flags of the message.
pub flags: MaildirFlags,
@ -80,14 +83,14 @@ impl Table for MaildirEnvelope {
}
fn row(&self) -> Row {
let id = self.id.clone();
let hash = self.hash.clone();
let unseen = !self.flags.contains(&MaildirFlag::Seen);
let flags = self.flags.to_symbols_string();
let subject = &self.subject;
let sender = &self.sender;
let date = &self.date;
Row::new()
.cell(Cell::new(id).bold_if(unseen).red())
.cell(Cell::new(hash).bold_if(unseen).red())
.cell(Cell::new(flags).bold_if(unseen).white())
.cell(Cell::new(subject).shrinkable().bold_if(unseen).green())
.cell(Cell::new(sender).bold_if(unseen).blue())
@ -124,13 +127,13 @@ impl<'a> TryFrom<RawMaildirEnvelope> for MaildirEnvelope {
fn try_from(mut mail_entry: RawMaildirEnvelope) -> Result<Self, Self::Error> {
info!("begin: try building envelope from maildir parsed mail");
let mut envelope = Self {
id: mail_entry.id().into(),
flags: (&mail_entry)
.try_into()
.context("cannot parse maildir flags")?,
..Self::default()
};
let mut envelope = Self::default();
envelope.id = mail_entry.id().into();
envelope.hash = format!("{:x}", md5::compute(&envelope.id));
envelope.flags = (&mail_entry)
.try_into()
.context("cannot parse maildir flags")?;
let parsed_mail = mail_entry
.parsed()

View file

@ -120,7 +120,10 @@ mod tests {
fn del_mbox(&mut self, _: &str) -> Result<()> {
unimplemented!();
}
fn get_envelopes(
fn get_envelopes(&mut self, _: &str, _: usize, _: usize) -> Result<Box<dyn Envelopes>> {
unimplemented!()
}
fn search_envelopes(
&mut self,
_: &str,
_: &str,

View file

@ -273,7 +273,7 @@ pub fn search<'a, P: PrinterService, B: Backend<'a> + ?Sized>(
) -> Result<()> {
let page_size = page_size.unwrap_or(config.default_page_size);
debug!("page size: {}", page_size);
let msgs = backend.find_envelopes(mbox, &query, "", page_size, page)?;
let msgs = backend.search_envelopes(mbox, &query, "", page_size, page)?;
trace!("messages: {:#?}", msgs);
printer.print_table(msgs, PrintTableOpts { max_width })
}
@ -292,7 +292,7 @@ pub fn sort<'a, P: PrinterService, B: Backend<'a> + ?Sized>(
) -> Result<()> {
let page_size = page_size.unwrap_or(config.default_page_size);
debug!("page size: {}", page_size);
let msgs = backend.find_envelopes(mbox, &query, &sort, page_size, page)?;
let msgs = backend.search_envelopes(mbox, &query, &sort, page_size, page)?;
trace!("envelopes: {:#?}", msgs);
printer.print_table(msgs, PrintTableOpts { max_width })
}

View file

@ -43,9 +43,7 @@ fn test_imap_backend() {
assert_eq!("Ceci est un message.", msg.fold_text_plain_parts());
// check that the envelope of the added message exists
let envelopes = imap
.get_envelopes("Mailbox1", "arrival:desc", "ALL", 10, 0)
.unwrap();
let envelopes = imap.get_envelopes("Mailbox1", 10, 0).unwrap();
let envelopes: &ImapEnvelopes = envelopes.as_any().downcast_ref().unwrap();
assert_eq!(1, envelopes.len());
let envelope = envelopes.first().unwrap();
@ -55,28 +53,20 @@ fn test_imap_backend() {
// check that the message can be copied
imap.copy_msg("Mailbox1", "Mailbox2", &envelope.id.to_string())
.unwrap();
let envelopes = imap
.get_envelopes("Mailbox1", "arrival:desc", "ALL", 10, 0)
.unwrap();
let envelopes = imap.get_envelopes("Mailbox1", 10, 0).unwrap();
let envelopes: &ImapEnvelopes = envelopes.as_any().downcast_ref().unwrap();
assert_eq!(1, envelopes.len());
let envelopes = imap
.get_envelopes("Mailbox2", "arrival:desc", "ALL", 10, 0)
.unwrap();
let envelopes = imap.get_envelopes("Mailbox2", 10, 0).unwrap();
let envelopes: &ImapEnvelopes = envelopes.as_any().downcast_ref().unwrap();
assert_eq!(1, envelopes.len());
// check that the message can be moved
imap.move_msg("Mailbox1", "Mailbox2", &envelope.id.to_string())
.unwrap();
let envelopes = imap
.get_envelopes("Mailbox1", "arrival:desc", "ALL", 10, 0)
.unwrap();
let envelopes = imap.get_envelopes("Mailbox1", 10, 0).unwrap();
let envelopes: &ImapEnvelopes = envelopes.as_any().downcast_ref().unwrap();
assert_eq!(0, envelopes.len());
let envelopes = imap
.get_envelopes("Mailbox2", "arrival:desc", "ALL", 10, 0)
.unwrap();
let envelopes = imap.get_envelopes("Mailbox2", 10, 0).unwrap();
let envelopes: &ImapEnvelopes = envelopes.as_any().downcast_ref().unwrap();
assert_eq!(2, envelopes.len());
let id = envelopes.first().unwrap().id.to_string();

View file

@ -1,5 +1,5 @@
use maildir::Maildir;
use std::{env, fs};
use std::{collections::HashMap, env, fs, iter::FromIterator};
use himalaya::{
backends::{Backend, MaildirBackend, MaildirEnvelopes},
@ -19,7 +19,7 @@ fn test_maildir_backend() {
// configure accounts
let account_config = AccountConfig {
inbox_folder: "INBOX".into(),
mailboxes: HashMap::from_iter([("inbox".into(), "INBOX".into())]),
..AccountConfig::default()
};
let mdir_config = MaildirBackendConfig {
@ -33,16 +33,16 @@ fn test_maildir_backend() {
// check that a message can be added
let msg = include_bytes!("./emails/alice-to-patrick.eml");
let id = mdir.add_msg("INBOX", msg, "seen").unwrap().to_string();
let hash = mdir.add_msg("INBOX", msg, "seen").unwrap().to_string();
// check that the added message exists
let msg = mdir.get_msg("INBOX", &id).unwrap();
let msg = mdir.get_msg("INBOX", &hash).unwrap();
assert_eq!("alice@localhost", msg.from.clone().unwrap().to_string());
assert_eq!("patrick@localhost", msg.to.clone().unwrap().to_string());
assert_eq!("Ceci est un message.", msg.fold_text_plain_parts());
// check that the envelope of the added message exists
let envelopes = mdir.get_envelopes("INBOX", "", "cur", 10, 0).unwrap();
let envelopes = mdir.get_envelopes("INBOX", 10, 0).unwrap();
let envelopes: &MaildirEnvelopes = envelopes.as_any().downcast_ref().unwrap();
let envelope = envelopes.first().unwrap();
assert_eq!(1, envelopes.len());
@ -50,19 +50,19 @@ fn test_maildir_backend() {
assert_eq!("Plain message", envelope.subject);
// check that the message can be copied
mdir.copy_msg("INBOX", "Subdir", &envelope.id).unwrap();
assert!(mdir.get_msg("INBOX", &id).is_ok());
assert!(mdir.get_msg("Subdir", &id).is_ok());
assert!(mdir_subdir.get_msg("INBOX", &id).is_ok());
mdir.copy_msg("INBOX", "Subdir", &envelope.hash).unwrap();
assert!(mdir.get_msg("INBOX", &hash).is_ok());
assert!(mdir.get_msg("Subdir", &hash).is_ok());
assert!(mdir_subdir.get_msg("INBOX", &hash).is_ok());
// check that the message can be moved
mdir.move_msg("INBOX", "Subdir", &envelope.id).unwrap();
assert!(mdir.get_msg("INBOX", &id).is_err());
assert!(mdir.get_msg("Subdir", &id).is_ok());
assert!(mdir_subdir.get_msg("INBOX", &id).is_ok());
mdir.move_msg("INBOX", "Subdir", &envelope.hash).unwrap();
assert!(mdir.get_msg("INBOX", &hash).is_err());
assert!(mdir.get_msg("Subdir", &hash).is_ok());
assert!(mdir_subdir.get_msg("INBOX", &hash).is_ok());
// check that the message can be deleted
mdir.del_msg("Subdir", &id).unwrap();
assert!(mdir.get_msg("Subdir", &id).is_err());
assert!(mdir_subdir.get_msg("INBOX", &id).is_err());
mdir.del_msg("Subdir", &hash).unwrap();
assert!(mdir.get_msg("Subdir", &hash).is_err());
assert!(mdir_subdir.get_msg("INBOX", &hash).is_err());
}