From 27691c28838a3a5b005b22608e2455feb99585ee Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 20 Nov 2018 14:07:59 -0800 Subject: [PATCH] Checkpoints: Support validation and roll forward on fail Test: Rolls backward, and if you hexedit a BOW page, rolls forward Change-Id: If99a6c3a3bc3f615bd1445f5af14ea490a58e053 --- Checkpoint.cpp | 140 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 99 insertions(+), 41 deletions(-) diff --git a/Checkpoint.cpp b/Checkpoint.cpp index 28855e6..7586a6c 100644 --- a/Checkpoint.cpp +++ b/Checkpoint.cpp @@ -229,6 +229,7 @@ struct log_sector { uint32_t magic; uint32_t count; uint32_t sequence; + uint64_t sector0; struct log_entry entries[]; } __attribute__((packed)); @@ -289,62 +290,119 @@ void crc32(const void* data, size_t n_bytes, uint32_t* crc) { } // namespace -Status cp_restoreCheckpoint(const std::string& blockDevice) { - LOG(INFO) << "Restoring checkpoint on " << blockDevice; - std::fstream device(blockDevice, std::ios::binary | std::ios::in | std::ios::out); - if (!device) { - PLOG(ERROR) << "Cannot open " << blockDevice; - return Status::fromExceptionCode(errno, ("Cannot open " + blockDevice).c_str()); - } - alignas(alignof(log_sector)) char ls_buffer[kBlockSize]; - device.read(ls_buffer, kBlockSize); - log_sector& ls = *reinterpret_cast(ls_buffer); - if (ls.magic != kMagic) { - LOG(ERROR) << "No magic"; - return Status::fromExceptionCode(EINVAL, "No magic"); +static void read(std::fstream& device, std::vector const& logs, sector_t sector, + char* buffer) { + for (auto l = logs.rbegin(); l != logs.rend(); l++) + if (sector >= l->source && (sector - l->source) * kSectorSize < l->size) + sector = sector - l->source + l->dest; + + device.seekg(sector * kSectorSize); + device.read(buffer, kBlockSize); +} + +static std::vector read(std::fstream& device, std::vector const& logs, + bool validating, sector_t sector, uint32_t size) { + if (!validating) { + std::vector buffer(size); + device.seekg(sector * kSectorSize); + device.read(&buffer[0], size); + return buffer; } - LOG(INFO) << "Restoring " << ls.sequence << " log sectors"; + // Crude approach at first where we do this sector by sector and just scan + // the entire logs for remappings each time + std::vector buffer(size); + + for (uint32_t i = 0; i < size; i += kBlockSize, sector += kBlockSize / kSectorSize) + read(device, logs, sector, &buffer[i]); - for (int sequence = ls.sequence; sequence >= 0; sequence--) { - device.seekg(0); - device.read(ls_buffer, kBlockSize); - ls = *reinterpret_cast(ls_buffer); + return buffer; +} + +Status cp_restoreCheckpoint(const std::string& blockDevice) { + bool validating = true; + std::string action = "Validating"; + + for (;;) { + std::vector logs; + Status status = Status::ok(); + + LOG(INFO) << action << " checkpoint on " << blockDevice; + std::fstream device(blockDevice, std::ios::binary | std::ios::in | std::ios::out); + if (!device) { + PLOG(ERROR) << "Cannot open " << blockDevice; + return Status::fromExceptionCode(errno, ("Cannot open " + blockDevice).c_str()); + } + auto buffer = read(device, logs, validating, 0, kBlockSize); + log_sector& ls = *reinterpret_cast(&buffer[0]); if (ls.magic != kMagic) { - LOG(ERROR) << "No magic!"; + LOG(ERROR) << "No magic"; return Status::fromExceptionCode(EINVAL, "No magic"); } - if ((int)ls.sequence != sequence) { - LOG(ERROR) << "Expecting log sector " << sequence << " but got " << ls.sequence; - return Status::fromExceptionCode( - EINVAL, ("Expecting log sector " + std::to_string(sequence) + " but got " + - std::to_string(ls.sequence)) - .c_str()); - } + LOG(INFO) << action << " " << ls.sequence << " log sectors"; - LOG(INFO) << "Restoring from log sector " << ls.sequence; + for (int sequence = ls.sequence; sequence >= 0 && status.isOk(); sequence--) { + auto buffer = read(device, logs, validating, 0, kBlockSize); + log_sector& ls = *reinterpret_cast(&buffer[0]); + if (ls.magic != kMagic) { + LOG(ERROR) << "No magic!"; + status = Status::fromExceptionCode(EINVAL, "No magic"); + break; + } - for (log_entry* le = &ls.entries[ls.count - 1]; le >= ls.entries; --le) { - LOG(INFO) << "Restoring " << le->size << " bytes from sector " << le->dest << " to " - << le->source << " with checksum " << std::hex << le->checksum; - std::vector buffer(le->size); - device.seekg(le->dest * kSectorSize); - device.read(&buffer[0], le->size); + if ((int)ls.sequence != sequence) { + LOG(ERROR) << "Expecting log sector " << sequence << " but got " << ls.sequence; + status = Status::fromExceptionCode( + EINVAL, ("Expecting log sector " + std::to_string(sequence) + " but got " + + std::to_string(ls.sequence)) + .c_str()); + break; + } + + LOG(INFO) << action << " from log sector " << ls.sequence; + + for (log_entry* le = &ls.entries[ls.count - 1]; le >= ls.entries; --le) { + LOG(INFO) << action << " " << le->size << " bytes from sector " << le->dest + << " to " << le->source << " with checksum " << std::hex << le->checksum; + auto buffer = read(device, logs, validating, le->dest, le->size); + uint32_t checksum = le->source / (kBlockSize / kSectorSize); + for (size_t i = 0; i < le->size; i += kBlockSize) { + crc32(&buffer[i], kBlockSize, &checksum); + } + + if (le->checksum && checksum != le->checksum) { + LOG(ERROR) << "Checksums don't match " << std::hex << checksum; + status = Status::fromExceptionCode(EINVAL, "Checksums don't match"); + break; + } - uint32_t checksum = le->source / (kBlockSize / kSectorSize); - for (size_t i = 0; i < le->size; i += kBlockSize) { - crc32(&buffer[i], kBlockSize, &checksum); + logs.push_back(*le); + + if (!validating) { + device.seekg(le->source * kSectorSize); + device.write(&buffer[0], le->size); + } } + } - if (le->checksum && checksum != le->checksum) { - LOG(ERROR) << "Checksums don't match " << std::hex << checksum; - return Status::fromExceptionCode(EINVAL, "Checksums don't match"); + if (!status.isOk()) { + if (!validating) { + LOG(ERROR) << "Checkpoint restore failed even though checkpoint validation passed"; + return status; } - device.seekg(le->source * kSectorSize); - device.write(&buffer[0], le->size); + LOG(WARNING) << "Checkpoint validation failed - attempting to roll forward"; + auto buffer = read(device, logs, false, ls.sector0, kBlockSize); + device.seekg(0); + device.write(&buffer[0], kBlockSize); + return Status::ok(); } + + if (!validating) break; + + validating = false; + action = "Restoring"; } return Status::ok();