From a485006ab147695fa28ec20c11ffef4f018253a3 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Mon, 29 Jun 2020 11:53:34 +0200 Subject: [PATCH] Configure backing device max_ratio for FUSE filesystems. By default FUSE filesystems have a max_ratio of 1%, meaning only 1% of dirty pages on the system can belong to a FUSE filesystem before we start writing back pages (and throttling, if writeback can't keep up). This limit is useful for untrusted filesystems, but in our case, we trust the FUSE filesystem. Since FUSE writes result in writes to the lower filesystem, FUSE should take at most 50%. Let's start with changing max_ratio to 40%, to avoid needless throttling. Bug: 159254170 Bug: 159770752 Test: inspect /sys/class/bdi manually after boot Change-Id: I467e3770fc4afba0a08fa480c0b86aa054c8b875 --- Utils.cpp | 50 ++++++++++++++++++++++++++++++++-------- Utils.h | 2 ++ model/EmulatedVolume.cpp | 21 +++++++++++++++++ model/PublicVolume.cpp | 3 +++ 4 files changed, 67 insertions(+), 9 deletions(-) diff --git a/Utils.cpp b/Utils.cpp index 6208efd..a9b7440 100644 --- a/Utils.cpp +++ b/Utils.cpp @@ -1386,22 +1386,54 @@ status_t EnsureDirExists(const std::string& path, mode_t mode, uid_t uid, gid_t return OK; } +// Gets the sysfs path for parameters of the backing device info (bdi) +static std::string getBdiPathForMount(const std::string& mount) { + // First figure out MAJOR:MINOR of mount. Simplest way is to stat the path. + struct stat info; + if (stat(mount.c_str(), &info) != 0) { + PLOG(ERROR) << "Failed to stat " << mount; + return ""; + } + unsigned int maj = major(info.st_dev); + unsigned int min = minor(info.st_dev); + + return StringPrintf("/sys/class/bdi/%u:%u", maj, min); +} + +// Configures max_ratio for the FUSE filesystem. +void ConfigureMaxDirtyRatioForFuse(const std::string& fuse_mount, unsigned int max_ratio) { + LOG(INFO) << "Configuring max_ratio of " << fuse_mount << " fuse filesystem to " << max_ratio; + if (max_ratio > 100) { + LOG(ERROR) << "Invalid max_ratio: " << max_ratio; + return; + } + std::string fuseBdiPath = getBdiPathForMount(fuse_mount); + if (fuseBdiPath == "") { + return; + } + std::string max_ratio_file = StringPrintf("%s/max_ratio", fuseBdiPath.c_str()); + unique_fd fd(TEMP_FAILURE_RETRY(open(max_ratio_file.c_str(), O_WRONLY | O_CLOEXEC))); + if (fd.get() == -1) { + PLOG(ERROR) << "Failed to open " << max_ratio_file; + return; + } + LOG(INFO) << "Writing " << max_ratio << " to " << max_ratio_file; + if (!WriteStringToFd(std::to_string(max_ratio), fd)) { + PLOG(ERROR) << "Failed to write to " << max_ratio_file; + } +} + // Configures read ahead property of the fuse filesystem with the mount point |fuse_mount| by // writing |read_ahead_kb| to the /sys/class/bdi/MAJOR:MINOR/read_ahead_kb. void ConfigureReadAheadForFuse(const std::string& fuse_mount, size_t read_ahead_kb) { LOG(INFO) << "Configuring read_ahead of " << fuse_mount << " fuse filesystem to " << read_ahead_kb << "kb"; - // First figure out MAJOR:MINOR of fuse_mount. Simplest way is to stat the path. - struct stat info; - if (stat(fuse_mount.c_str(), &info) != 0) { - PLOG(ERROR) << "Failed to stat " << fuse_mount; + std::string fuseBdiPath = getBdiPathForMount(fuse_mount); + if (fuseBdiPath == "") { return; } - unsigned int maj = major(info.st_dev); - unsigned int min = minor(info.st_dev); - LOG(INFO) << fuse_mount << " has major:minor " << maj << ":" << min; - // We found major:minor of our filesystem, time to configure read ahead! - std::string read_ahead_file = StringPrintf("/sys/class/bdi/%u:%u/read_ahead_kb", maj, min); + // We found the bdi path for our filesystem, time to configure read ahead! + std::string read_ahead_file = StringPrintf("%s/read_ahead_kb", fuseBdiPath.c_str()); unique_fd fd(TEMP_FAILURE_RETRY(open(read_ahead_file.c_str(), O_WRONLY | O_CLOEXEC))); if (fd.get() == -1) { PLOG(ERROR) << "Failed to open " << read_ahead_file; diff --git a/Utils.h b/Utils.h index a1d34b8..04cbac4 100644 --- a/Utils.h +++ b/Utils.h @@ -176,6 +176,8 @@ bool FsyncDirectory(const std::string& dirname); bool writeStringToFile(const std::string& payload, const std::string& filename); +void ConfigureMaxDirtyRatioForFuse(const std::string& fuse_mount, unsigned int max_ratio); + void ConfigureReadAheadForFuse(const std::string& fuse_mount, size_t read_ahead_kb); status_t MountUserFuse(userid_t user_id, const std::string& absolute_lower_path, diff --git a/model/EmulatedVolume.cpp b/model/EmulatedVolume.cpp index 26d9582..db93bc2 100644 --- a/model/EmulatedVolume.cpp +++ b/model/EmulatedVolume.cpp @@ -404,6 +404,27 @@ status_t EmulatedVolume::doMount() { ConfigureReadAheadForFuse(GetFuseMountPathForUser(user_id, label), 256u); + // By default, FUSE has a max_dirty ratio of 1%. This means that out of + // all dirty pages in the system, only 1% is allowed to belong to any + // FUSE filesystem. The reason this is in place is that FUSE + // filesystems shouldn't be trusted by default; a FUSE filesystem could + // take up say 100% of dirty pages, and subsequently refuse to write + // them back to storage. The kernel will then apply rate-limiting, and + // block other tasks from writing. For this particular FUSE filesystem + // however, we trust the implementation, because it is a part of the + // Android platform. So use the default ratio of 100%. + // + // The reason we're setting this is that there's a suspicion that the + // kernel starts rate-limiting the FUSE filesystem under extreme + // memory pressure scenarios. While the kernel will only rate limit if + // the writeback can't keep up with the write rate, under extreme + // memory pressure the write rate may dip as well, in which case FUSE + // writes to a 1% max_ratio filesystem are throttled to an extreme amount. + // + // To prevent this, just give FUSE 40% max_ratio, meaning it can take + // up to 40% of all dirty pages in the system. + ConfigureMaxDirtyRatioForFuse(GetFuseMountPathForUser(user_id, label), 40u); + // All mounts where successful, disable scope guards sdcardfs_guard.Disable(); fuse_guard.Disable(); diff --git a/model/PublicVolume.cpp b/model/PublicVolume.cpp index 9ca782b..d40e3e3 100644 --- a/model/PublicVolume.cpp +++ b/model/PublicVolume.cpp @@ -255,6 +255,9 @@ status_t PublicVolume::doMount() { } ConfigureReadAheadForFuse(GetFuseMountPathForUser(user_id, stableName), 256u); + + // See comment in model/EmulatedVolume.cpp + ConfigureMaxDirtyRatioForFuse(GetFuseMountPathForUser(user_id, stableName), 40u); } return OK;