mirror of
https://github.com/tkuschel/bees.git
synced 2025-11-17 23:09:14 +01:00
The btrfs LOGICAL_INO ioctl has no way to report references to compressed
blocks precisely, so we must always consider all references to a
compressed block, and discard those that do not have the desired offset.
When we encounter compressed shared extents containing a mix of unique
and duplicate data, we attempt to replace all references to the mixed
extent with the same number of references to multiple extents consisting
entirely of unique or duplicate blocks. An early exit from the loop
in BeesResolver::for_each_extent_ref was stopping this operation early,
after replacing as few as one shared reference. This left other shared
references to the unique data on the filesystem, effectively creating
new dup data.
The failing pattern looks like this:
dedup: replace 0x14000..0x18000 from some other extent
copy: 0x10000..0x14000
dedup: replace 0x10000..0x14000 with the copy
[may be multiple dedup lines due to multiple shared references]
copy: 0x18000..0x1c000
[missing dedup 0x18000..0x1c000 with the copy here]
scan: 0x10000 [++++dddd++++] 0x1c000
If the extent 0x10000..0x1c000 is shared and compressed, we will make
a copy of the extent at 0x18000..1c0000. When we try to dedup this
copy extent, LOGICAL_INO will return a mix of references to the data
at logical 0x10000 and 0x18000 (which are both references to the
original shared extent with different offsets). If we break out
of the loop too early, we will stop as soon as a reference to 0x10000
is found, and ignore all other references to the extent we are trying
to remove.
The copy at the beginning of the extent (0x10000..0x14000) usually
works because all references to the extent cover the entire extent.
When bees performs the dedup at 0x14000..0x18000, bees itself creates
the shared references with different offsets.
Uncompressed extents were not affected because LOGICAL_INO can locate
physical blocks precisely if they reside in uncompressed extents.
This change will hurt performance when looking up old physical addresses
that belong to new data, but that is a much less urgent problem.
Signed-off-by: Zygo Blaxell <bees@furryterror.org>
486 lines
14 KiB
C++
486 lines
14 KiB
C++
#include "bees.h"
|
|
|
|
#include "crucible/limits.h"
|
|
#include "crucible/string.h"
|
|
|
|
using namespace crucible;
|
|
using namespace std;
|
|
|
|
BeesAddress
|
|
BeesResolver::addr(BeesAddress new_addr)
|
|
{
|
|
THROW_CHECK1(invalid_argument, new_addr, !new_addr.is_magic());
|
|
|
|
m_found_data = false;
|
|
m_found_dup = false;
|
|
m_found_hash = false;
|
|
m_wrong_data = false;
|
|
m_biors.clear();
|
|
m_ranges.clear();
|
|
m_addr = new_addr;
|
|
m_bior_count = 0;
|
|
|
|
auto rv = m_ctx->resolve_addr(m_addr);
|
|
m_biors = rv.m_biors;
|
|
m_is_toxic = rv.m_is_toxic;
|
|
m_bior_count = m_biors.size();
|
|
|
|
return m_addr;
|
|
}
|
|
|
|
BeesResolver::BeesResolver(shared_ptr<BeesContext> ctx, BeesAddress new_addr) :
|
|
m_ctx(ctx),
|
|
m_bior_count(0)
|
|
{
|
|
addr(new_addr);
|
|
}
|
|
|
|
BeesBlockData
|
|
BeesResolver::adjust_offset(const BeesFileRange &haystack, const BeesBlockData &needle)
|
|
{
|
|
BEESTRACE("Searching for needle " << needle << "\n\tin haystack " << haystack);
|
|
|
|
BEESCOUNT(adjust_try);
|
|
|
|
// Constraint checks
|
|
THROW_CHECK1(invalid_argument, needle.begin(), (needle.begin() & BLOCK_MASK_CLONE) == 0);
|
|
THROW_CHECK1(invalid_argument, haystack.begin(), (haystack.begin() & BLOCK_MASK_CLONE) == 0);
|
|
|
|
// Need to know the precise dimensions of the haystack and needle
|
|
off_t haystack_size = haystack.file_size();
|
|
|
|
// If the needle is not a full block then it can only match at EOF
|
|
off_t needle_len = needle.size();
|
|
bool is_unaligned_eof = needle_len & BLOCK_MASK_CLONE;
|
|
BEESTRACE("is_unaligned_eof = " << is_unaligned_eof << ", needle_len = " << to_hex(needle_len) << ", haystack_size = " << to_hex(haystack_size));
|
|
|
|
// Unaligned EOF can only match at EOF, so only check there
|
|
if (is_unaligned_eof) {
|
|
BEESTRACE("Construct needle_bfr from " << needle);
|
|
BeesFileRange needle_bfr(needle);
|
|
|
|
// Census
|
|
if (haystack_size & BLOCK_MASK_CLONE) {
|
|
BEESCOUNT(adjust_eof_haystack);
|
|
}
|
|
if (needle_bfr.end() & BLOCK_MASK_CLONE) {
|
|
BEESCOUNT(adjust_eof_needle);
|
|
}
|
|
|
|
// Non-aligned part of the lengths must be the same
|
|
if ( (haystack_size & BLOCK_MASK_CLONE) != (needle_bfr.end() & BLOCK_MASK_CLONE) ) {
|
|
BEESCOUNT(adjust_eof_fail);
|
|
return BeesBlockData();
|
|
}
|
|
|
|
// Read the haystack block
|
|
BEESTRACE("Reading haystack (haystack_size = " << to_hex(haystack_size) << ")");
|
|
BeesBlockData straw(haystack.fd(), haystack_size & ~BLOCK_MASK_CLONE, haystack_size & BLOCK_MASK_CLONE);
|
|
|
|
// It either matches or it doesn't
|
|
BEESTRACE("Verifying haystack " << straw);
|
|
if (straw.is_data_equal(needle)) {
|
|
BEESCOUNT(adjust_eof_hit);
|
|
m_found_data = true;
|
|
m_found_hash = true;
|
|
return straw;
|
|
}
|
|
|
|
// Check for matching hash
|
|
BEESTRACE("Verifying haystack hash");
|
|
if (straw.hash() == needle.hash()) {
|
|
// OK at least the hash is still valid
|
|
m_found_hash = true;
|
|
}
|
|
|
|
BEESCOUNT(adjust_eof_miss);
|
|
// BEESLOG("adjust_eof_miss " << straw);
|
|
return BeesBlockData();
|
|
}
|
|
|
|
off_t lower_offset = haystack.begin();
|
|
off_t upper_offset = haystack.end();
|
|
bool is_compressed_offset = false;
|
|
bool is_exact = false;
|
|
bool is_legacy = false;
|
|
if (m_addr.is_compressed()) {
|
|
BtrfsExtentWalker ew(haystack.fd(), haystack.begin(), m_ctx->root_fd());
|
|
BEESTRACE("haystack extent data " << ew);
|
|
Extent e = ew.current();
|
|
if (m_addr.has_compressed_offset()) {
|
|
off_t coff = m_addr.get_compressed_offset();
|
|
if (e.offset() > coff) {
|
|
// this extent begins after the target block
|
|
BEESCOUNT(adjust_offset_low);
|
|
return BeesBlockData();
|
|
}
|
|
coff -= e.offset();
|
|
if (e.size() <= coff) {
|
|
// this extent ends before the target block
|
|
BEESCOUNT(adjust_offset_high);
|
|
return BeesBlockData();
|
|
}
|
|
lower_offset = e.begin() + coff;
|
|
upper_offset = lower_offset + BLOCK_SIZE_CLONE;
|
|
BEESCOUNT(adjust_offset_hit);
|
|
is_compressed_offset = true;
|
|
} else {
|
|
lower_offset = e.begin();
|
|
upper_offset = e.end();
|
|
BEESCOUNT(adjust_legacy);
|
|
is_legacy = true;
|
|
}
|
|
} else {
|
|
BEESCOUNT(adjust_exact);
|
|
is_exact = true;
|
|
}
|
|
|
|
BEESTRACE("Checking haystack " << haystack << " offsets " << to_hex(lower_offset) << ".." << to_hex(upper_offset));
|
|
|
|
// Check all the blocks in the list
|
|
for (off_t haystack_offset = lower_offset; haystack_offset < upper_offset; haystack_offset += BLOCK_SIZE_CLONE) {
|
|
THROW_CHECK1(out_of_range, haystack_offset, (haystack_offset & BLOCK_MASK_CLONE) == 0);
|
|
|
|
// Straw cannot extend beyond end of haystack
|
|
if (haystack_offset + needle.size() > haystack_size) {
|
|
BEESCOUNT(adjust_needle_too_long);
|
|
break;
|
|
}
|
|
|
|
// Read the haystack
|
|
BEESTRACE("straw " << name_fd(haystack.fd()) << ", offset " << to_hex(haystack_offset) << ", length " << needle.size());
|
|
BeesBlockData straw(haystack.fd(), haystack_offset, needle.size());
|
|
|
|
BEESTRACE("straw = " << straw);
|
|
|
|
// Stop if we find a match
|
|
if (straw.is_data_equal(needle)) {
|
|
BEESCOUNT(adjust_hit);
|
|
m_found_data = true;
|
|
m_found_hash = true;
|
|
if (is_compressed_offset) BEESCOUNT(adjust_compressed_offset_correct);
|
|
if (is_legacy) BEESCOUNT(adjust_legacy_correct);
|
|
if (is_exact) BEESCOUNT(adjust_exact_correct);
|
|
return straw;
|
|
}
|
|
|
|
if (straw.hash() != needle.hash()) {
|
|
// Not the same hash or data, try next block
|
|
BEESCOUNT(adjust_miss);
|
|
continue;
|
|
}
|
|
|
|
// Found the hash but not the data. Yay!
|
|
m_found_hash = true;
|
|
BEESLOG("HASH COLLISION\n"
|
|
<< "\tneedle " << needle << "\n"
|
|
<< "\tstraw " << straw);
|
|
BEESCOUNT(hash_collision);
|
|
}
|
|
|
|
// Ran out of offsets to try
|
|
BEESCOUNT(adjust_no_match);
|
|
if (is_compressed_offset) BEESCOUNT(adjust_compressed_offset_wrong);
|
|
if (is_legacy) BEESCOUNT(adjust_legacy_wrong);
|
|
if (is_exact) BEESCOUNT(adjust_exact_wrong);
|
|
m_wrong_data = true;
|
|
return BeesBlockData();
|
|
}
|
|
|
|
BeesFileRange
|
|
BeesResolver::chase_extent_ref(const BtrfsInodeOffsetRoot &bior, BeesBlockData &needle_bbd)
|
|
{
|
|
BEESTRACE("chase_extent_ref bior " << bior << " needle_bbd " << needle_bbd);
|
|
BEESNOTE("chase_extent_ref bior " << bior << " needle_bbd " << needle_bbd);
|
|
BEESCOUNT(chase_try);
|
|
|
|
Fd file_fd = m_ctx->roots()->open_root_ino(bior.m_root, bior.m_inum);
|
|
if (!file_fd) {
|
|
// Deleted snapshots generate craptons of these
|
|
// BEESINFO("No FD in chase_extent_ref " << bior);
|
|
BEESCOUNT(chase_no_fd);
|
|
return BeesFileRange();
|
|
}
|
|
|
|
BEESNOTE("searching at offset " << to_hex(bior.m_offset) << " in file " << name_fd(file_fd) << "\n\tfor " << needle_bbd);
|
|
|
|
BEESTRACE("bior file " << name_fd(file_fd));
|
|
BEESTRACE("get file_addr " << bior);
|
|
BeesAddress file_addr(file_fd, bior.m_offset, m_ctx);
|
|
BEESTRACE("file_addr " << file_addr);
|
|
|
|
// ...or are we?
|
|
if (file_addr.is_magic()) {
|
|
BEESINFO("file_addr is magic: file_addr = " << file_addr << " bior = " << bior << " needle_bbd = " << needle_bbd);
|
|
BEESCOUNT(chase_wrong_magic);
|
|
return BeesFileRange();
|
|
}
|
|
THROW_CHECK1(invalid_argument, m_addr, !m_addr.is_magic());
|
|
|
|
// Did we get the physical block we asked for? The magic bits have to match too,
|
|
// but the compressed offset bits do not.
|
|
if (file_addr.get_physical_or_zero() != m_addr.get_physical_or_zero()) {
|
|
// BEESINFO("found addr " << file_addr << " at " << name_fd(file_fd) << " offset " << to_hex(bior.m_offset) << " but looking for " << m_addr);
|
|
// FIEMAP/resolve are working, but the data is old.
|
|
BEESCOUNT(chase_wrong_addr);
|
|
return BeesFileRange();
|
|
}
|
|
|
|
// Calculate end of range, which is a sum block or less
|
|
// It's a sum block because we have to compare content now
|
|
off_t file_size = Stat(file_fd).st_size;
|
|
off_t bior_offset = ranged_cast<off_t>(bior.m_offset);
|
|
off_t end_offset = min(file_size, bior_offset + needle_bbd.size());
|
|
BeesBlockData haystack_bbd(file_fd, bior_offset, end_offset - bior_offset);
|
|
|
|
BEESTRACE("matched haystack_bbd " << haystack_bbd << " file_addr " << file_addr);
|
|
|
|
// If the data was compressed and no offset was captured then
|
|
// we won't get an exact address from resolve.
|
|
// Search near the resolved address for a matching data block.
|
|
// ...even if it's not compressed, we should do this sanity
|
|
// check before considering the block as a duplicate candidate.
|
|
auto new_bbd = adjust_offset(haystack_bbd, needle_bbd);
|
|
if (new_bbd.empty()) {
|
|
// matching offset search failed
|
|
BEESCOUNT(chase_wrong_data);
|
|
return BeesFileRange();
|
|
}
|
|
if (new_bbd.begin() == haystack_bbd.begin()) {
|
|
BEESCOUNT(chase_uncorrected);
|
|
} else {
|
|
// corrected the bfr
|
|
BEESCOUNT(chase_corrected);
|
|
haystack_bbd = new_bbd;
|
|
}
|
|
|
|
// We have found at least one duplicate block, so resolve was a success
|
|
BEESCOUNT(chase_hit);
|
|
|
|
// Matching block
|
|
BEESTRACE("Constructing dst_bfr { " << BeesFileId(haystack_bbd.fd()) << ", " << to_hex(haystack_bbd.begin()) << ".." << to_hex(haystack_bbd.end()) << " }");
|
|
BeesFileRange dst_bfr(BeesFileId(haystack_bbd.fd()), haystack_bbd.begin(), haystack_bbd.end());
|
|
|
|
return dst_bfr;
|
|
}
|
|
|
|
void
|
|
BeesResolver::replace_src(const BeesFileRange &src_bfr)
|
|
{
|
|
BEESTRACE("replace_src src_bfr " << src_bfr);
|
|
THROW_CHECK0(runtime_error, !m_is_toxic);
|
|
BEESCOUNT(replacesrc_try);
|
|
|
|
// Open src, reuse it for all dst
|
|
auto i_bfr = src_bfr;
|
|
BEESNOTE("Opening src bfr " << i_bfr);
|
|
BEESTRACE("Opening src bfr " << i_bfr);
|
|
i_bfr.fd(m_ctx);
|
|
|
|
BeesBlockData bbd(i_bfr);
|
|
|
|
for_each_extent_ref(bbd, [&](const BeesFileRange &j) -> bool {
|
|
// Open dst
|
|
auto j_bfr = j;
|
|
BEESNOTE("Opening dst bfr " << j_bfr);
|
|
BEESTRACE("Opening dst bfr " << j_bfr);
|
|
j_bfr.fd(m_ctx);
|
|
|
|
if (i_bfr.overlaps(j_bfr)) {
|
|
BEESCOUNT(replacesrc_overlaps);
|
|
return false; // i.e. continue
|
|
}
|
|
|
|
// Make pair(src, dst)
|
|
BEESTRACE("creating brp (" << i_bfr << ", " << j_bfr << ")");
|
|
BeesRangePair brp(i_bfr, j_bfr);
|
|
BEESTRACE("Found matching range: " << brp);
|
|
|
|
// Extend range at beginning
|
|
BEESNOTE("Extending matching range: " << brp);
|
|
// No particular reason to be constrained?
|
|
if (brp.grow(m_ctx, true)) {
|
|
BEESCOUNT(replacesrc_grown);
|
|
}
|
|
|
|
// Dedup
|
|
BEESNOTE("dedup " << brp);
|
|
if (m_ctx->dedup(brp)) {
|
|
BEESCOUNT(replacesrc_dedup_hit);
|
|
m_found_dup = true;
|
|
} else {
|
|
BEESCOUNT(replacesrc_dedup_miss);
|
|
}
|
|
return false; // i.e. continue
|
|
});
|
|
}
|
|
|
|
void
|
|
BeesResolver::find_matches(bool just_one, BeesBlockData &bbd)
|
|
{
|
|
// Walk through the (ino, offset, root) tuples until we find a match.
|
|
BEESTRACE("finding all matches for " << bbd << " at " << m_addr << ": " << m_biors.size() << " found");
|
|
THROW_CHECK0(runtime_error, !m_is_toxic);
|
|
bool stop_now = false;
|
|
for (auto ino_off_root : m_biors) {
|
|
if (m_wrong_data) {
|
|
return;
|
|
}
|
|
|
|
BEESTRACE("ino_off_root " << ino_off_root);
|
|
BeesFileId this_fid(ino_off_root.m_root, ino_off_root.m_inum);
|
|
|
|
// Silently ignore blacklisted files, e.g. BeesTempFile files
|
|
if (m_ctx->is_blacklisted(this_fid)) {
|
|
continue;
|
|
}
|
|
|
|
// Look at the old data
|
|
catch_all([&]() {
|
|
BEESTRACE("chase_extent_ref ino " << ino_off_root << " bbd " << bbd);
|
|
auto new_range = chase_extent_ref(ino_off_root, bbd);
|
|
if (new_range) {
|
|
m_ranges.insert(new_range.copy_closed());
|
|
stop_now = true;
|
|
}
|
|
});
|
|
|
|
if (just_one && stop_now) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool
|
|
BeesResolver::for_each_extent_ref(BeesBlockData bbd, function<bool(const BeesFileRange &bfr)> visitor)
|
|
{
|
|
// Walk through the (ino, offset, root) tuples until we are told to stop
|
|
BEESTRACE("for_each_extent_ref " << bbd << " at " << m_addr << ": " << m_biors.size() << " found");
|
|
THROW_CHECK0(runtime_error, !m_is_toxic);
|
|
bool stop_now = false;
|
|
for (auto ino_off_root : m_biors) {
|
|
BEESTRACE("ino_off_root " << ino_off_root);
|
|
BeesFileId this_fid(ino_off_root.m_root, ino_off_root.m_inum);
|
|
|
|
// Silently ignore blacklisted files, e.g. BeesTempFile files
|
|
if (m_ctx->is_blacklisted(this_fid)) {
|
|
continue;
|
|
}
|
|
|
|
// Look at the old data
|
|
catch_all([&]() {
|
|
BEESTRACE("chase_extent_ref ino " << ino_off_root << " bbd " << bbd);
|
|
auto new_range = chase_extent_ref(ino_off_root, bbd);
|
|
// XXX: should we catch visitor's exceptions here?
|
|
if (new_range) {
|
|
stop_now = visitor(new_range);
|
|
} else {
|
|
// We have reliable block addresses now, so we guarantee we can hit the desired block.
|
|
// Failure in chase_extent_ref means we are done, and don't need to look up all the
|
|
// other references.
|
|
// Or...not? If we have a compressed extent, some refs will not match
|
|
// if there is are two references to the same extent with a reference
|
|
// to a different extent between them.
|
|
// stop_now = true;
|
|
}
|
|
});
|
|
|
|
if (stop_now) {
|
|
break;
|
|
}
|
|
}
|
|
return stop_now;
|
|
}
|
|
|
|
BeesFileRange
|
|
BeesResolver::replace_dst(const BeesFileRange &dst_bfr)
|
|
{
|
|
BEESTRACE("replace_dst dst_bfr " << dst_bfr);
|
|
BEESCOUNT(replacedst_try);
|
|
|
|
// Open dst, reuse it for all src
|
|
BEESNOTE("Opening dst bfr " << dst_bfr);
|
|
BEESTRACE("Opening dst bfr " << dst_bfr);
|
|
dst_bfr.fd(m_ctx);
|
|
|
|
BeesFileRange overlap_bfr;
|
|
BEESTRACE("overlap_bfr " << overlap_bfr);
|
|
|
|
BeesBlockData bbd(dst_bfr);
|
|
|
|
for_each_extent_ref(bbd, [&](const BeesFileRange &src_bfr) -> bool {
|
|
// Open src
|
|
BEESNOTE("Opening src bfr " << src_bfr);
|
|
BEESTRACE("Opening src bfr " << src_bfr);
|
|
src_bfr.fd(m_ctx);
|
|
|
|
if (dst_bfr.overlaps(src_bfr)) {
|
|
BEESCOUNT(replacedst_overlaps);
|
|
return false; // i.e. continue
|
|
}
|
|
|
|
// If dst is already occupying src, skip.
|
|
// FIXME: BeesContext::scan_one_extent should be weeding these out, but does not.
|
|
BeesBlockData src_bbd(src_bfr.fd(), src_bfr.begin(), min(BLOCK_SIZE_SUMS, src_bfr.size()));
|
|
if (bbd.addr().get_physical_or_zero() == src_bbd.addr().get_physical_or_zero()) {
|
|
BEESCOUNT(replacedst_same);
|
|
return false; // i.e. continue
|
|
}
|
|
|
|
// Make pair(src, dst)
|
|
BEESTRACE("creating brp (" << src_bfr << ", " << dst_bfr << ")");
|
|
BeesRangePair brp(src_bfr, dst_bfr);
|
|
BEESTRACE("Found matching range: " << brp);
|
|
|
|
// Extend range at beginning
|
|
BEESNOTE("Extending matching range: " << brp);
|
|
// 'false' Has nasty loops, and may not be faster.
|
|
// 'true' At best, keeps fragmentation constant...but can also make it worse
|
|
if (brp.grow(m_ctx, true)) {
|
|
BEESCOUNT(replacedst_grown);
|
|
}
|
|
|
|
// Dedup
|
|
BEESNOTE("dedup " << brp);
|
|
if (m_ctx->dedup(brp)) {
|
|
BEESCOUNT(replacedst_dedup_hit);
|
|
m_found_dup = true;
|
|
overlap_bfr = brp.second;
|
|
// FIXME: find best range first, then dedup that
|
|
return true; // i.e. break
|
|
} else {
|
|
BEESCOUNT(replacedst_dedup_miss);
|
|
return false; // i.e. continue
|
|
}
|
|
});
|
|
// BEESLOG("overlap_bfr after " << overlap_bfr);
|
|
return overlap_bfr.copy_closed();
|
|
}
|
|
|
|
BeesFileRange
|
|
BeesResolver::find_one_match(BeesBlockData &bbd)
|
|
{
|
|
THROW_CHECK0(runtime_error, !m_is_toxic);
|
|
find_matches(true, bbd);
|
|
if (m_ranges.empty()) {
|
|
return BeesFileRange();
|
|
} else {
|
|
return *m_ranges.begin();
|
|
}
|
|
}
|
|
|
|
set<BeesFileRange>
|
|
BeesResolver::find_all_matches(BeesBlockData &bbd)
|
|
{
|
|
THROW_CHECK0(runtime_error, !m_is_toxic);
|
|
find_matches(false, bbd);
|
|
return m_ranges;
|
|
}
|
|
|
|
bool
|
|
BeesResolver::operator<(const BeesResolver &that) const
|
|
{
|
|
// Lowest count, highest address
|
|
return tie(that.m_bior_count, m_addr) < tie(m_bior_count, that.m_addr);
|
|
}
|