Skip to content

Allow separate mail and state paths #33

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- Support for bearer token authentication (#40)
- New config options `mail_dir` and `state_dir` to allow mujmap's persistent
storage to be split out according to local policy (eg XDG dirs) (#33)

### Changed
- mujmap now prints a more comprehensive guide on how to recover from a missing
Expand Down
34 changes: 33 additions & 1 deletion mujmap.toml.example
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,43 @@ password_command = "pass [email protected]"

# convert_dos_to_unix = true

################################################################################
## Path config
##
## mujmap needs places to store email and bits of working state. These paths
## can all be specified in the config file, but most of the time you don't need to
## do this and it will choose reasonable defaults.
##
## mujmap has two "configuration modes" that inform how path defaults are chosen.
##
## In "directory mode", the --path option points to the directory containing
## mujmap.toml. In this mode, the default locations for mail and state files will
## be in subdirectories under this directory. This is the right mode to keep the
## entire mujmap data storage in a single place.
##
## In "file mode", the --path option points to a config file. In this mode, the
## default location for mail will be determined from notmuch's `mail_root` config
## variable, while the default location for state is operating-system specific.
## This mode is intended for tighter integration with notmuch "profiles".

## The cache directory in which to store mail files while they are being
## downloaded. The default is operating-system specific.
## downloaded. It must be an absolute path. The default is operating-system
## specific.

# cache_dir =

## The location of the mail dir, where downloaded email is finally stored. It
## must be an absolute path. If not given in the config file, mujmap will choose
## an appropriate default for the configuration mode. You probably don't want to
## set this.

# mail_dir =

## The directory to store state files in. It must be an absolute path. If not
## given, mujmap will choose an appropriate default for the configuration mode.
## You probably don't want to set this.

# state_dir =

################################################################################
## Tag config
Expand Down
11 changes: 2 additions & 9 deletions src/cache.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::config::Config;
use crate::jmap;
use crate::sync::NewEmail;
use directories::ProjectDirs;
use snafu::prelude::*;
use snafu::Snafu;
use std::fs;
Expand Down Expand Up @@ -54,15 +53,9 @@ pub struct Cache {
impl Cache {
/// Open the local store.
///
/// `mail_dir` *must* be a subdirectory of the notmuch path.
/// `mail_cur_dir` *must* be a subdirectory of the notmuch root maildir.
pub fn open(mail_cur_dir: impl AsRef<Path>, config: &Config) -> Result<Self> {
let project_dirs = ProjectDirs::from("sh.eliza", "", "mujmap").unwrap();
let default_cache_dir = project_dirs.cache_dir();

let cache_dir = match &config.cache_dir {
Some(cache_dir) => cache_dir.as_ref(),
None => default_cache_dir,
};
let cache_dir = &config.cache_dir;

// Ensure the cache dir exists.
fs::create_dir_all(cache_dir).context(CreateCacheDirSnafu { path: cache_dir })?;
Expand Down
92 changes: 85 additions & 7 deletions src/config.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use directories::ProjectDirs;
use lazy_static::lazy_static;
use serde::Deserialize;
use snafu::prelude::*;
use std::{
fs, io,
path::{Path, PathBuf},
path::PathBuf,
process::{Command, ExitStatus},
string::FromUtf8Error,
};
Expand All @@ -11,6 +13,9 @@ use snafu::Snafu;

#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Could not canonicalize config dir path: {}", source))]
Canonicalize { source: io::Error },

#[snafu(display("Could not read config file `{}': {}", filename.to_string_lossy(), source))]
ReadConfigFile {
filename: PathBuf,
Expand Down Expand Up @@ -40,6 +45,16 @@ pub enum Error {

#[snafu(display("Could not decode password command output as utf-8"))]
DecodePasswordCommand { source: FromUtf8Error },

#[snafu(display("`mail_dir' path is not a directory: {}", path.to_string_lossy()))]
MailDirPathNotDirectory { path: PathBuf },

#[snafu(display("`cache_dir' path must be absolute: {}", path.to_string_lossy()))]
CacheDirPathNotAbsolute { path: PathBuf },
#[snafu(display("`mail_dir' path must be absolute: {}", path.to_string_lossy()))]
MailDirPathNotAbsolute { path: PathBuf },
#[snafu(display("`state_dir' path must be absolute: {}", path.to_string_lossy()))]
StateDirPathNotAbsolute { path: PathBuf },
}

pub type Result<T, E = Error> = std::result::Result<T, E>;
Expand Down Expand Up @@ -88,8 +103,20 @@ pub struct Config {

/// The cache directory in which to store mail files while they are being downloaded. The
/// default is operating-system specific.
#[serde(default = "default_cache_dir")]
pub cache_dir: PathBuf,

/// The location of the mail dir, where downloaded email is finally stored. If not given,
/// mujmap will try to figure out what you want. You probably don't want to set this.
#[serde(default = "Default::default")]
pub mail_dir: Option<PathBuf>,

/// The directory to store state files in. If not given, mujmap will try to choose something
/// sensible. You probably don't want to set this.
// TODO: this is only `Option` to allow serde to omit it. It will never be `None` after
// `Config::from:path` returns. Making it non-optional somehow would be nice.
#[serde(default = "Default::default")]
pub cache_dir: Option<PathBuf>,
pub state_dir: Option<PathBuf>,

/// Customize the names and synchronization behaviors of notmuch tags with JMAP keywords and
/// mailboxes.
Expand Down Expand Up @@ -265,15 +292,54 @@ fn default_convert_dos_to_unix() -> bool {
true
}

lazy_static! {
static ref PROJECT_DIRS: ProjectDirs = ProjectDirs::from("sh.eliza", "", "mujmap").unwrap();
}

fn default_cache_dir() -> PathBuf {
PROJECT_DIRS.cache_dir().to_path_buf()
}

impl Config {
pub fn from_file(path: impl AsRef<Path>) -> Result<Self> {
let contents = fs::read_to_string(path.as_ref()).context(ReadConfigFileSnafu {
filename: path.as_ref(),
pub fn from_path(path: &PathBuf) -> Result<Self> {
let cpath = path.canonicalize().context(CanonicalizeSnafu)?;

let filename = if path.is_dir() {
cpath.join("mujmap.toml")
} else {
cpath.clone()
};

let contents = fs::read_to_string(&filename).context(ReadConfigFileSnafu {
filename: &filename,
})?;
let config: Self = toml::from_str(contents.as_str()).context(ParseConfigFileSnafu {
filename: path.as_ref(),
let mut config: Self = toml::from_str(contents.as_str()).context(ParseConfigFileSnafu {
filename: &filename,
})?;

// In directory mode, if paths aren't offered then we use the config dir itself.
if cpath.is_dir() {
if config.mail_dir.is_none() {
config.mail_dir = Some(cpath.clone());
} else {
ensure!(path.is_dir(), MailDirPathNotDirectorySnafu { path });
}
if config.state_dir.is_none() {
config.state_dir = Some(cpath.clone());
}
}
// In file mode, choose an appropriate state dir for the system.
else {
if config.state_dir.is_none() {
config.state_dir = Some(
PROJECT_DIRS
.state_dir()
.unwrap_or_else(|| PROJECT_DIRS.cache_dir())
.to_path_buf(),
);
}
}

// Perform final validation.
ensure!(
!(config.fqdn.is_some() && config.session_url.is_some()),
Expand All @@ -287,6 +353,18 @@ impl Config {
!config.tags.directory_separator.is_empty(),
EmptyDirectorySeparatorSnafu {}
);
ensure!(
config.cache_dir.is_absolute(),
CacheDirPathNotAbsoluteSnafu {
path: config.cache_dir
}
);
if let Some(ref path) = config.mail_dir {
ensure!(path.is_absolute(), MailDirPathNotAbsoluteSnafu { path });
}
if let Some(ref path) = config.state_dir {
ensure!(path.is_absolute(), StateDirPathNotAbsoluteSnafu { path });
}
Ok(config)
}

Expand Down
64 changes: 30 additions & 34 deletions src/local.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::jmap;
use crate::sync::NewEmail;
use crate::Config;
use const_format::formatcp;
use lazy_static::lazy_static;
use log::debug;
Expand All @@ -14,9 +15,7 @@ use std::collections::HashMap;
use std::collections::HashSet;
use std::fs;
use std::io;
use std::path::Path;
use std::path::PathBuf;
use std::path::StripPrefixError;

const ID_PATTERN: &'static str = r"[-A-Za-z0-9_]+";
const MAIL_PATTERN: &'static str = formatcp!(r"^({})\.({})(?:$|:)", ID_PATTERN, ID_PATTERN);
Expand All @@ -35,17 +34,6 @@ pub enum Error {
#[snafu(display("Could not canonicalize given path: {}", source))]
Canonicalize { source: io::Error },

#[snafu(display(
"Given maildir path `{}' is not a subdirectory of the notmuch root `{}'",
mail_dir.to_string_lossy(),
notmuch_root.to_string_lossy(),
))]
MailDirNotASubdirOfNotmuchRoot {
mail_dir: PathBuf,
notmuch_root: PathBuf,
source: StripPrefixError,
},

#[snafu(display("Could not open notmuch database: {}", source))]
OpenDatabase { source: notmuch::Error },

Expand Down Expand Up @@ -89,9 +77,7 @@ pub struct Local {

impl Local {
/// Open the local store.
///
/// `mail_dir` *must* be a subdirectory of the notmuch path.
pub fn open(mail_dir: impl AsRef<Path>, dry_run: bool) -> Result<Self> {
pub fn open(config: &Config, dry_run: bool) -> Result<Self> {
// Open the notmuch database with default config options.
let db = Database::open_with_config::<PathBuf, PathBuf>(
None,
Expand All @@ -105,30 +91,40 @@ impl Local {
)
.context(OpenDatabaseSnafu {})?;

// Get the relative directory of the maildir to the database path.
let canonical_db_path = db.path().canonicalize().context(CanonicalizeSnafu {})?;
let canonical_mail_dir_path = mail_dir
.as_ref()
// Get notmuch's idea of the mail root. If, for whatever reason, we get nothing back for
// that key (ancient version of notmuch?), use the database path.
let mail_root = db
.config(ConfigKey::MailRoot)
.map_or(db.path().into(), |root| PathBuf::from(root))
.canonicalize()
.context(CanonicalizeSnafu {})?;
let relative_mail_dir = canonical_mail_dir_path
.strip_prefix(&canonical_db_path)
.context(MailDirNotASubdirOfNotmuchRootSnafu {
mail_dir: &canonical_mail_dir_path,
notmuch_root: &canonical_db_path,
})?;

// Build the query to search for all mail in our maildir.
let all_mail_query = format!("path:\"{}/**\"", relative_mail_dir.to_str().unwrap());
// Figure out our maildir. Either the configured thing, or notmuch's mail root. Which in
// the worst case will be notmuch's database dir, but that's probably not the worst choice.
let mail_dir = match &config.mail_dir {
Some(ref dir) => dir.clone(),
_ => mail_root.clone(),
}
.canonicalize()
.context(CanonicalizeSnafu {})?;
debug!("mail dir: {}", mail_dir.to_string_lossy());

// Build the query to search for all mail in our maildir. If the maildir is under the
// notmuch mail root, then search under the relative maildir path (allowing multiple
// maildirs per notmuch dir). If not, assume this is the only maildir for the notmuch dir,
// and use a global query.
let all_mail_query = mail_dir
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the user types an absolute path for the mail_dir config option but misspells part of the path, causing strip_prefix to fail, they could experience some nasty consequences. We should keep/revise the MailDirNotASubdirOfNotmuchRoot error here in that case instead of swallowing. We can still support the config option being a relative path by checking if its is_absolute() returns false and handling that slightly differently.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure there's a lot we can do really; how do we know if its a error if the path is valid?

The best I can think to do is for a configured mail_dir, make sure it exists and bail out if not. Of course its still possible to typo, but at least it won't plow on regardless.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I've added a commit that does just that if mail_dir is provided (if we compute it, it doesn't have to exist beforehand).

The more I think about the more I think this will be rare, because most of the time I expect that people would mess with their config until its set up how they like, and would then walk away, so its unlikely that they're going to get a path wrong in a system they've already set up. Its still good to have a safety for an obviously-wrong situation though.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, this isn't quite what I meant. I think I understand why you changed it like this now, to specially handle the case for if mail_dir and the notmuch root directory are the same, is that right? According to the docs, it seems like strip_prefix will yield an empty PathBuf rather than Err for this case, so there's actually no need to specially handle this case.

https://doc.rust-lang.org/std/path/struct.PathBuf.html#method.strip_prefix

But the consequences of removing the MailDirNotASubdirOfNotmuchRoot error as this commit currently exists would result in something like the following:

  • The notmuch root directory is ~/Maildir and mujmap is configured so that its mail dir is ~/Maildir/mujmap. So far so good.
  • The user decides they want to move their maildir to an external hard drive, and move it to /mnt/maildrive/Maildir, but forget to update mujmap, which still points to ~/Maildir
  • The user runs mujmap. Both directories are valid, but because strip_prefix fails here, mujmap silently both decides that it owns the entire mail database, and insists on placing mail files in the old ~/Maildir and trying to add them to the notmuch database. Potentially destructive chaos ensues.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not really following your scenario, and I'm not sure if that's because its imprecisely described, or if its because we're not quite in agreement about what we're doing here and so talking past a little bit. I will try and write down what I think is happening here, and hopefully I will understand by the end.

I'm seeing mail_dir (and state_dir and cache_dir) as a kind of override; if you set it, it will be used over any other value.

So in your scenario, if they've set mail_dir to something ~/Maildir/mujmap then they get what they ask for, regardless of whether or not its a subdir of the notmuch root. This has to be this way to support "side-by-side" configs (like XDG).

If on the other hand they haven't set mail_dir explicitly and its implied (by "directory" mode), then I don't see how mujmap could know this. The paths haven't changed, just the files were moved out without its knowledge.

Going back to the code, what I think I'm doing is that if the maildir is under the notmuch root, then the query becomes path:xxx/** where xxx is the path of the maildir relative to the notmuch root, but if its outside the notmuch root, we use the global path:** because what else can we do? The code is effectively converting the empty PathBuf and Err to mean the same thing: use a global query.

Ahh, maybe that's it. Are you talking about the case where the user has explicitly set mail_dir to be under the notmuch root? And then they move it later, which will change the path prefix. That could be a problem (I have not thought hard about this yet). I hadn't considered that someone could deliberately set things up this way because I only know of yours and my use cases! Hmm.

I wonder if that's the real problem here, that the path prefix is kind of baking too much info about the existing paths into the notmuch database, and if they change we don't know how to hook it up properly?

If so I'm not sure we can fix this here. Some possible solutions (off the top of my head):

  • require that if the maildir is under the notmuch dir, you have to move them together so the prefixes match
  • find some other way to match message ids than by filename (maybe notmuch properties? otherwise hard, secondary index or something)
  • force notmuch's mail_rootto follow mujmap's mail_dir so we can use the global path query only (breaks anyone that has multiple maildirs in a single notmuch index)

Something else might be to do a sanity check. If there's a bunch of stuff in the index, but none of the paths exist on disk, abort and warn the user. Hmm, and thinking more on that, maybe the answer is that if a message is not on disk, but is in notmuch, and is on the server, re-download it.

I'm not really arguing against the need for MailDirNotASubdirOfNotmuchRoot as such, I just don't know when it should fire. But honestly, I still wonder if this is a thing. Is moving things around a common thing that notmuch people like to do? (I am only a new notmuch person myself). If its not, then maybe noticing something and off and forcing a resync is the right way.

(I wrote this over the course of a busy and distracted day so I feel like everything has fallen out of my head even more than the original plan fell out of my head in the last couple of months; I'm really struggling. I'll try to think about this more over the weekend).

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So in your scenario, if they've set mail_dir to something ~/Maildir/mujmap then they get what they ask for, regardless of whether or not its a subdir of the notmuch root. This has to be this way to support "side-by-side" configs (like XDG).

I think maybe this is where the confusion stems. notmuch doesn't actually support this configuration; everything has to be a subdirectory of database.mail_root. database.path used to refer to the same thing as database.mail_root, but is now independent and only refers to the actual database files if both of these options are specified (see here). If you've somehow managed to index files outside of the root, I think that's actually undefined behavior and probably not good. 😕 But it seems like the best way to prevent unintended behavior is to catch this discrepancy between the two configurations as early as possible, hence MailDirNotASubdirOfNotmuchRoot.

My example was mostly just me trying to address what I thought your misconception was rather than your actual example, rather than me trying to necessarily specifically prevent these kinds of database migration issues, but I do agree that a sanity check for a really large unexpected change like this would be a great to have feature.

(I wrote this over the course of a busy and distracted day so I feel like everything has fallen out of my head even more than the original plan fell out of my head in the last couple of months; I'm really struggling. I'll try to think about this more over the weekend).

That's okay, me too 🙂

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think maybe this is where the confusion stems. notmuch doesn't actually support this configuration; everything has to be a subdirectory of database.mail_root. database.path used to refer to the same thing as database.mail_root, but is now independent and only refers to the actual database files if both of these options are specified (see here). If you've somehow managed to index files outside of the root, I think that's actually undefined behavior and probably not good. confused

I agree that's where the confusion comes from; if using NOTMUCH_PROFILE these things can all be separated, and indeed, that's a large part of the reason I'm even doing this work!

My current notmuch config with profiles, fyi:

$ env NOTMUCH_PROFILE=home notmuch config list
...
database.backup_dir=/home/robn/.local/share/notmuch/home/backups
database.hook_dir=/home/robn/.local/share/notmuch/home/hooks
database.mail_root=/home/robn/.local/share/mujmap/home
database.path=/home/robn/.local/share/notmuch/home

And then I run mujmap with this PR as: env NOTMUCH_PROFILE=home mujmap --path /home/robn/.config/mujmap/home.toml. mail_dir is not set there, so its being taken from notmuch.

I did consider that maybe the intent of notmuch's profile support was still that the database was under the maildir, and maybe I was indeed getting accidental behaviour. The config doc is not clear on this, though perhaps that's just it not answering a question that no one asked. However, the tests for split-path configs seems to imagine that the database path and mail dir can be in very different locations, as perhaps does the commit that introduced it, and a code read seems to support that too.

I think I don't have any more to add. I (of course) think I'm right about what notmuch is supposed to be able to do here; if you're not persuaded then I'm not sure what we do next.

.strip_prefix(&mail_root)
.ok()
.filter(|rel| rel.components().count() > 0)
.map_or("path:**".to_string(), |rel| {
format!("path:\"{}/**\"", rel.to_str().unwrap())
});

// Ensure the maildir contains the standard cur, new, and tmp dirs.
let mail_cur_dir = canonical_mail_dir_path.join("cur");
let mail_cur_dir = mail_dir.join("cur");
if !dry_run {
for path in &[
&mail_cur_dir,
&canonical_mail_dir_path.join("new"),
&canonical_mail_dir_path.join("tmp"),
] {
for path in &[&mail_cur_dir, &mail_dir.join("new"), &mail_dir.join("tmp")] {
fs::create_dir_all(path).context(CreateMaildirDirSnafu { path })?;
}
}
Expand Down
9 changes: 3 additions & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,12 @@ fn try_main(stdout: &mut StandardStream) -> Result<(), Error> {
.to_owned();

// Determine working directory and load all data files.
let mail_dir = args.path.clone().unwrap_or_else(|| PathBuf::from("."));

let config = Config::from_file(mail_dir.join("mujmap.toml")).context(OpenConfigFileSnafu {})?;
let config_path = args.path.clone().unwrap_or_else(|| PathBuf::from("."));
let config = Config::from_path(&config_path).context(OpenConfigFileSnafu {})?;
debug!("Using config: {:?}", config);

match args.command {
args::Command::Sync => {
sync(stdout, info_color_spec, mail_dir, args, config).context(SyncSnafu {})
}
args::Command::Sync => sync(stdout, info_color_spec, args, config).context(SyncSnafu {}),
args::Command::Send {
read_recipients,
recipients,
Expand Down
18 changes: 14 additions & 4 deletions src/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ pub enum Error {
#[snafu(display("Could not log string: {}", source))]
Log { source: io::Error },

#[snafu(display("Could not create mujmap state dir `{}': {}", path.to_string_lossy(), source))]
CreateStateDir { path: PathBuf, source: io::Error },

#[snafu(display("Could not read mujmap state file `{}': {}", filename.to_string_lossy(), source))]
ReadStateFile {
filename: PathBuf,
Expand Down Expand Up @@ -199,12 +202,19 @@ impl LatestState {
pub fn sync(
stdout: &mut StandardStream,
info_color_spec: ColorSpec,
mail_dir: PathBuf,
args: Args,
config: Config,
) -> Result<(), Error> {
let state_dir = config.state_dir.as_ref().unwrap();
debug!("state dir: {}", state_dir.to_string_lossy());

// Ensure the state dir exists.
fs::create_dir_all(&state_dir).context(CreateStateDirSnafu {
path: state_dir.clone(),
})?;

// Grab lock.
let lock_file_path = mail_dir.join("mujmap.lock");
let lock_file_path = state_dir.join("mujmap.lock");
let mut lock = LockFile::open(&lock_file_path).context(OpenLockFileSnafu {
path: lock_file_path,
})?;
Expand All @@ -215,14 +225,14 @@ pub fn sync(
}

// Load the intermediary state.
let latest_state_filename = mail_dir.join("mujmap.state.json");
let latest_state_filename = state_dir.join("mujmap.state.json");
let latest_state = LatestState::open(&latest_state_filename).unwrap_or_else(|e| {
warn!("{e}");
LatestState::empty()
});

// Open the local notmuch database.
let local = Local::open(mail_dir, args.dry_run).context(OpenLocalSnafu {})?;
let local = Local::open(&config, args.dry_run).context(OpenLocalSnafu {})?;

// Open the local cache.
let cache = Cache::open(&local.mail_cur_dir, &config).context(OpenCacheSnafu {})?;
Expand Down