btrfs-progs/check/mode-lowmem.c
Qu Wenruo fc38ae7f48 btrfs-progs: check: detect and warn about tree blocks crossing 64K page boundary
For the incoming subpage support, there is a new requirement for tree
blocks.  Tree blocks should not cross 64K page boundary.

For current btrfs-progs and kernel, there shouldn't be any causes to
create such tree blocks.  But still, we want to detect such tree blocks
in the wild before subpage support fully lands in upstream.

This patch will add such check for both lowmem and original mode.
Currently it's just a warning, since there aren't many users using 64K
page size yet.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2021-02-19 15:24:42 +01:00

5557 lines
141 KiB
C

/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <time.h>
#include "kernel-shared/ctree.h"
#include "repair.h"
#include "kernel-shared/transaction.h"
#include "common/messages.h"
#include "kernel-shared/disk-io.h"
#include "kernel-shared/backref.h"
#include "common/internal.h"
#include "common/utils.h"
#include "kernel-shared/volumes.h"
#include "check/mode-common.h"
#include "check/mode-lowmem.h"
static u64 last_allocated_chunk;
static int calc_extent_flag(struct btrfs_root *root, struct extent_buffer *eb,
u64 *flags_ret)
{
struct btrfs_root *extent_root = gfs_info->extent_root;
struct btrfs_root_item *ri = &root->root_item;
struct btrfs_extent_inline_ref *iref;
struct btrfs_extent_item *ei;
struct btrfs_key key;
struct btrfs_path *path = NULL;
unsigned long ptr;
unsigned long end;
u64 flags;
u64 owner = 0;
u64 offset;
int slot;
int type;
int ret = 0;
/*
* Except file/reloc tree, we can not have FULL BACKREF MODE
*/
if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
goto normal;
/* root node */
if (eb->start == btrfs_root_bytenr(ri))
goto normal;
if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
goto full_backref;
owner = btrfs_header_owner(eb);
if (owner == root->objectid)
goto normal;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = btrfs_header_bytenr(eb);
key.type = (u8)-1;
key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret <= 0) {
ret = -EIO;
goto out;
}
if (ret > 0) {
ret = btrfs_previous_extent_item(extent_root, path,
key.objectid);
if (ret)
goto full_backref;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
eb = path->nodes[0];
slot = path->slots[0];
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
flags = btrfs_extent_flags(eb, ei);
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
goto full_backref;
ptr = (unsigned long)(ei + 1);
end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
if (key.type == BTRFS_EXTENT_ITEM_KEY)
ptr += sizeof(struct btrfs_tree_block_info);
next:
/* Reached extent item ends normally */
if (ptr == end)
goto full_backref;
/* Beyond extent item end, wrong item size */
if (ptr > end) {
error("extent item at bytenr %llu slot %d has wrong size",
eb->start, slot);
goto full_backref;
}
iref = (struct btrfs_extent_inline_ref *)ptr;
offset = btrfs_extent_inline_ref_offset(eb, iref);
type = btrfs_extent_inline_ref_type(eb, iref);
if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
goto normal;
ptr += btrfs_extent_inline_ref_size(type);
goto next;
normal:
*flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
goto out;
full_backref:
*flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
out:
btrfs_free_path(path);
return ret;
}
/*
* for a tree node or leaf, if it's shared, indeed we don't need to iterate it
* in every fs or file tree check. Here we find its all root ids, and only check
* it in the fs or file tree which has the smallest root id.
*/
static int need_check(struct btrfs_root *root, struct ulist *roots)
{
struct rb_node *node;
struct ulist_node *u;
/*
* @roots can be empty if it belongs to tree reloc tree
* In that case, we should always check the leaf, as we can't use
* the tree owner to ensure some other root will check it.
*/
if (roots->nnodes == 1 || roots->nnodes == 0)
return 1;
node = rb_first(&roots->root);
u = rb_entry(node, struct ulist_node, rb_node);
/*
* current root id is not smallest, we skip it and let it be checked
* in the fs or file tree who hash the smallest root id.
*/
if (root->objectid != u->val)
return 0;
return 1;
}
/*
* for a tree node or leaf, we record its reference count, so later if we still
* process this node or leaf, don't need to compute its reference count again.
*
* @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
*/
static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
struct extent_buffer *eb, struct node_refs *nrefs,
u64 level, int check_all)
{
struct ulist *roots;
u64 refs = 0;
u64 flags = 0;
int root_level = btrfs_header_level(root->node);
int check;
int ret;
if (nrefs->bytenr[level] == bytenr)
return 0;
if (bytenr != (u64)-1) {
/* the return value of this function seems a mistake */
ret = btrfs_lookup_extent_info(NULL, gfs_info, bytenr,
level, 1, &refs, &flags);
/* temporary fix */
if (ret < 0 && !check_all)
return ret;
nrefs->bytenr[level] = bytenr;
nrefs->refs[level] = refs;
nrefs->full_backref[level] = 0;
nrefs->checked[level] = 0;
if (refs > 1) {
ret = btrfs_find_all_roots(NULL, gfs_info, bytenr,
0, &roots);
if (ret)
return -EIO;
check = need_check(root, roots);
ulist_free(roots);
nrefs->need_check[level] = check;
} else {
if (!check_all) {
nrefs->need_check[level] = 1;
} else {
if (level == root_level) {
nrefs->need_check[level] = 1;
} else {
/*
* The node refs may have not been
* updated if upper needs checking (the
* lowest root_objectid) the node can
* be checked.
*/
nrefs->need_check[level] =
nrefs->need_check[level + 1];
}
}
}
}
if (check_all && eb) {
calc_extent_flag(root, eb, &flags);
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
nrefs->full_backref[level] = 1;
}
return 0;
}
/*
* Mark all extents unfree in the block group. And set @block_group->cached
* according to @cache.
*/
static int modify_block_group_cache(struct btrfs_block_group *block_group, int cache)
{
struct extent_io_tree *free_space_cache = &gfs_info->free_space_cache;
u64 start = block_group->start;
u64 end = start + block_group->length;
if (cache && !block_group->cached) {
block_group->cached = 1;
clear_extent_dirty(free_space_cache, start, end - 1);
}
if (!cache && block_group->cached) {
block_group->cached = 0;
clear_extent_dirty(free_space_cache, start, end - 1);
}
return 0;
}
/*
* Modify block groups which have @flags unfree in free space cache.
*
* @cache: if 0, clear block groups cache state;
* not 0, mark blocks groups cached.
*/
static int modify_block_groups_cache(u64 flags, int cache)
{
struct btrfs_root *root = gfs_info->extent_root;
struct btrfs_key key;
struct btrfs_path path;
struct btrfs_block_group *bg_cache;
struct btrfs_block_group_item *bi;
struct btrfs_block_group_item bg_item;
struct extent_buffer *eb;
int slot;
int ret;
key.objectid = 0;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = 0;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0) {
errno = -ret;
error("fail to search block groups due to %m");
goto out;
}
while (1) {
eb = path.nodes[0];
slot = path.slots[0];
btrfs_item_key_to_cpu(eb, &key, slot);
bg_cache = btrfs_lookup_block_group(gfs_info, key.objectid);
if (!bg_cache) {
ret = -ENOENT;
goto out;
}
bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
read_extent_buffer(eb, &bg_item, (unsigned long)bi,
sizeof(bg_item));
if (btrfs_stack_block_group_flags(&bg_item) & flags)
modify_block_group_cache(bg_cache, cache);
ret = btrfs_next_item(root, &path);
if (ret > 0) {
ret = 0;
goto out;
}
if (ret < 0)
goto out;
}
out:
btrfs_release_path(&path);
return ret;
}
static int mark_block_groups_full(u64 flags)
{
return modify_block_groups_cache(flags, 1);
}
static int clear_block_groups_full(u64 flags)
{
return modify_block_groups_cache(flags, 0);
}
static int create_chunk_and_block_group(u64 flags, u64 *start, u64 *nbytes)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = gfs_info->extent_root;
int ret;
if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
return -EINVAL;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error("error starting transaction %m");
return ret;
}
ret = btrfs_alloc_chunk(trans, gfs_info, start, nbytes, flags);
if (ret) {
errno = -ret;
error("fail to allocate new chunk %m");
goto out;
}
ret = btrfs_make_block_group(trans, gfs_info, 0, flags, *start,
*nbytes);
if (ret) {
errno = -ret;
error("fail to make block group for chunk %llu %llu %m",
*start, *nbytes);
goto out;
}
out:
btrfs_commit_transaction(trans, root);
return ret;
}
static int force_cow_in_new_chunk(u64 *start_ret)
{
struct btrfs_block_group *bg;
u64 start;
u64 nbytes;
u64 alloc_profile;
u64 flags;
int ret;
alloc_profile = (gfs_info->avail_metadata_alloc_bits &
gfs_info->metadata_alloc_profile);
flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
if (btrfs_fs_incompat(gfs_info, MIXED_GROUPS))
flags |= BTRFS_BLOCK_GROUP_DATA;
ret = create_chunk_and_block_group(flags, &start, &nbytes);
if (ret)
goto err;
printf("Created new chunk [%llu %llu]\n", start, nbytes);
flags = BTRFS_BLOCK_GROUP_METADATA;
/* Mark all metadata block groups cached and full in free space*/
ret = mark_block_groups_full(flags);
if (ret)
goto clear_bgs_full;
bg = btrfs_lookup_block_group(gfs_info, start);
if (!bg) {
ret = -ENOENT;
error("fail to look up block group %llu %llu", start, nbytes);
goto clear_bgs_full;
}
/* Clear block group cache just allocated */
ret = modify_block_group_cache(bg, 0);
if (ret)
goto clear_bgs_full;
if (start_ret)
*start_ret = start;
return 0;
clear_bgs_full:
clear_block_groups_full(flags);
err:
return ret;
}
/*
* Returns 0 means not almost full.
* Returns >0 means almost full.
* Returns <0 means fatal error.
*/
static int is_chunk_almost_full(u64 start)
{
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_root *root = gfs_info->extent_root;
struct btrfs_block_group_item *bi;
struct btrfs_block_group_item bg_item;
struct extent_buffer *eb;
u64 used;
u64 total;
u64 min_free;
int ret;
int slot;
key.objectid = start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = (u64)-1;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (!ret)
ret = -EIO;
if (ret < 0)
goto out;
ret = btrfs_previous_item(root, &path, start,
BTRFS_BLOCK_GROUP_ITEM_KEY);
if (ret) {
error("failed to find block group %llu", start);
ret = -ENOENT;
goto out;
}
eb = path.nodes[0];
slot = path.slots[0];
btrfs_item_key_to_cpu(eb, &key, slot);
if (key.objectid != start) {
ret = -ENOENT;
goto out;
}
total = key.offset;
bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
used = btrfs_stack_block_group_used(&bg_item);
/*
* if the free space in the chunk is less than %10 of total,
* or not not enough for CoW once, we think the chunk is almost full.
*/
min_free = max_t(u64, (BTRFS_MAX_LEVEL + 1) * gfs_info->nodesize,
div_factor(total, 1));
if ((total - used) > min_free)
ret = 0;
else
ret = 1;
out:
btrfs_release_path(&path);
return ret;
}
/*
* Returns <0 for error.
* Returns 0 for success.
*/
static int try_to_force_cow_in_new_chunk(u64 old_start, u64 *new_start)
{
int ret;
if (old_start) {
ret = is_chunk_almost_full(old_start);
if (ret <= 0)
return ret;
}
ret = force_cow_in_new_chunk(new_start);
return ret;
}
static int avoid_extents_overwrite(void)
{
int ret;
int mixed = btrfs_fs_incompat(gfs_info, MIXED_GROUPS);
if (gfs_info->excluded_extents)
return 0;
if (last_allocated_chunk != (u64)-1) {
ret = try_to_force_cow_in_new_chunk(last_allocated_chunk,
&last_allocated_chunk);
if (!ret)
goto out;
/*
* If failed, do not try to allocate chunk again in
* next call.
* If there is no space left to allocate, try to exclude all
* metadata blocks. Mixed filesystem is unsupported.
*/
last_allocated_chunk = (u64)-1;
if (ret != -ENOSPC || mixed)
goto out;
}
printf(
"Try to exclude all metadata blocks and extents, it may be slow\n");
ret = exclude_metadata_blocks();
out:
if (ret) {
errno = -ret;
error("failed to avoid extents overwrite %m");
}
return ret;
}
static int end_avoid_extents_overwrite(void)
{
int ret = 0;
cleanup_excluded_extents();
if (last_allocated_chunk)
ret = clear_block_groups_full(BTRFS_BLOCK_GROUP_METADATA);
return ret;
}
/*
* Delete the item @path point to. A wrapper of btrfs_del_item().
*
* If deleted successfully, @path will point to the previous item of the
* deleted item.
*/
static int delete_item(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
struct btrfs_trans_handle *trans;
int ret = 0;
ret = avoid_extents_overwrite();
if (ret)
return ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
error("fail to start transaction %s", strerror(-ret));
goto out;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret) {
ret = -ENOENT;
goto out;
}
ret = btrfs_del_item(trans, root, path);
if (ret)
goto out;
if (path->slots[0] == 0)
btrfs_prev_leaf(root, path);
else
path->slots[0]--;
out:
btrfs_commit_transaction(trans, root);
if (ret)
error("failed to delete root %llu item[%llu, %u, %llu]",
root->objectid, key.objectid, key.type, key.offset);
else
printf("Deleted root %llu item[%llu, %u, %llu]\n",
root->objectid, key.objectid, key.type, key.offset);
return ret;
}
/*
* Wrapper function for btrfs_fix_block_accounting().
*
* Returns 0 on success.
* Returns != 0 on error.
*/
static int repair_block_accounting(void)
{
struct btrfs_trans_handle *trans = NULL;
struct btrfs_root *root = gfs_info->extent_root;
int ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error("fail to start transaction: %m");
return ret;
}
ret = btrfs_fix_block_accounting(trans);
btrfs_commit_transaction(trans, root);
return ret;
}
/*
* This function only handles BACKREF_MISSING,
* If corresponding extent item exists, increase the ref, else insert an extent
* item and backref.
*
* Returns error bits after repair.
*/
static int repair_tree_block_ref(struct btrfs_root *root,
struct extent_buffer *node,
struct node_refs *nrefs, int level, int err)
{
struct btrfs_trans_handle *trans = NULL;
struct btrfs_root *extent_root = gfs_info->extent_root;
struct btrfs_path path;
struct btrfs_extent_item *ei;
struct btrfs_tree_block_info *bi;
struct btrfs_key key;
struct extent_buffer *eb;
u32 size = sizeof(*ei);
u32 node_size = gfs_info->nodesize;
int insert_extent = 0;
int skinny_metadata = btrfs_fs_incompat(gfs_info, SKINNY_METADATA);
int root_level = btrfs_header_level(root->node);
int generation;
int ret;
u64 owner;
u64 bytenr;
u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
u64 parent = 0;
if ((err & BACKREF_MISSING) == 0)
return err;
WARN_ON(level > BTRFS_MAX_LEVEL);
WARN_ON(level < 0);
btrfs_init_path(&path);
bytenr = btrfs_header_bytenr(node);
owner = btrfs_header_owner(node);
generation = btrfs_header_generation(node);
key.objectid = bytenr;
key.type = (u8)-1;
key.offset = (u64)-1;
/* Search for the extent item */
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret <= 0) {
ret = -EIO;
goto out;
}
ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
if (ret)
insert_extent = 1;
/* calculate if the extent item flag is full backref or not */
if (nrefs->full_backref[level] != 0)
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
ret = avoid_extents_overwrite();
if (ret)
goto out;
trans = btrfs_start_transaction(extent_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
errno = -ret;
error("fail to start transaction: %m");
goto out;
}
/* insert an extent item */
if (insert_extent) {
struct btrfs_disk_key copy_key;
generation = btrfs_header_generation(node);
if (level < root_level && nrefs->full_backref[level + 1] &&
owner != root->objectid) {
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
}
key.objectid = bytenr;
if (!skinny_metadata) {
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = node_size;
size += sizeof(*bi);
} else {
key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = level;
}
btrfs_release_path(&path);
ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
size);
if (ret)
goto out;
eb = path.nodes[0];
ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
btrfs_set_extent_refs(eb, ei, 0);
btrfs_set_extent_generation(eb, ei, generation);
btrfs_set_extent_flags(eb, ei, flags);
if (!skinny_metadata) {
bi = (struct btrfs_tree_block_info *)(ei + 1);
memset_extent_buffer(eb, 0, (unsigned long)bi,
sizeof(*bi));
btrfs_set_disk_key_objectid(&copy_key, root->objectid);
btrfs_set_disk_key_type(&copy_key, 0);
btrfs_set_disk_key_offset(&copy_key, 0);
btrfs_set_tree_block_level(eb, bi, level);
btrfs_set_tree_block_key(eb, bi, &copy_key);
}
btrfs_mark_buffer_dirty(eb);
printf("Added an extent item [%llu %u]\n", bytenr, node_size);
btrfs_update_block_group(trans, bytenr, node_size, 1, 0);
nrefs->refs[level] = 0;
nrefs->full_backref[level] =
flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
btrfs_release_path(&path);
}
if (level < root_level && nrefs->full_backref[level + 1] &&
owner != root->objectid)
parent = nrefs->bytenr[level + 1];
/* increase the ref */
ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
parent, root->objectid, level, 0);
nrefs->refs[level]++;
out:
if (trans)
btrfs_commit_transaction(trans, extent_root);
btrfs_release_path(&path);
if (ret) {
errno = -ret;
error(
"failed to repair tree block ref start %llu root %llu due to %m",
bytenr, root->objectid);
} else {
printf("Added one tree block ref start %llu %s %llu\n",
bytenr, parent ? "parent" : "root",
parent ? parent : root->objectid);
err &= ~BACKREF_MISSING;
}
return err;
}
/*
* Update global fs information.
*/
static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
int level)
{
u32 free_nrs;
struct extent_buffer *eb = path->nodes[level];
total_btree_bytes += eb->len;
if (fs_root_objectid(root->objectid))
total_fs_tree_bytes += eb->len;
if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
total_extent_tree_bytes += eb->len;
if (level == 0) {
btree_space_waste += btrfs_leaf_free_space(eb);
} else {
free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(gfs_info) -
btrfs_header_nritems(eb));
btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
}
}
/*
* Find the @index according by @ino and name.
* Notice:time efficiency is O(N)
*
* @root: the root of the fs/file tree
* @index_ret: the index as return value
* @namebuf: the name to match
* @name_len: the length of name to match
* @file_type: the file_type of INODE_ITEM to match
*
* Returns 0 if found and *@index_ret will be modified with right value
* Returns< 0 not found and *@index_ret will be (u64)-1
*/
static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
u64 *index_ret, char *namebuf, u32 name_len,
u8 file_type)
{
struct btrfs_path path;
struct extent_buffer *node;
struct btrfs_dir_item *di;
struct btrfs_key key;
struct btrfs_key location;
char name[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 data_len;
u8 filetype;
int slot;
int ret;
ASSERT(index_ret);
/* search from the last index */
key.objectid = dirid;
key.offset = (u64)-1;
key.type = BTRFS_DIR_INDEX_KEY;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
return ret;
loop:
ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
if (ret) {
ret = -ENOENT;
*index_ret = (64)-1;
goto out;
}
/* Check whether inode_id/filetype/name match */
node = path.nodes[0];
slot = path.slots[0];
di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
total = btrfs_item_size_nr(node, slot);
while (cur < total) {
ret = -ENOENT;
len = btrfs_dir_name_len(node, di);
data_len = btrfs_dir_data_len(node, di);
btrfs_dir_item_key_to_cpu(node, di, &location);
if (location.objectid != location_id ||
location.type != BTRFS_INODE_ITEM_KEY ||
location.offset != 0)
goto next;
filetype = btrfs_dir_type(node, di);
if (file_type != filetype)
goto next;
if (len > BTRFS_NAME_LEN)
len = BTRFS_NAME_LEN;
read_extent_buffer(node, name, (unsigned long)(di + 1), len);
if (len != name_len || strncmp(namebuf, name, len))
goto next;
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
*index_ret = key.offset;
ret = 0;
goto out;
next:
len += sizeof(*di) + data_len;
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
}
goto loop;
out:
btrfs_release_path(&path);
return ret;
}
/*
* Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
* INODE_REF/INODE_EXTREF match.
*
* @root: the root of the fs/file tree
* @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
* value while find index
* @location_key: location key of the struct btrfs_dir_item to match
* @name: the name to match
* @namelen: the length of name
* @file_type: the type of file to math
*
* Return 0 if no error occurred.
* Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
* DIR_ITEM/DIR_INDEX
* Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
* and DIR_ITEM/DIR_INDEX mismatch
*/
static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
struct btrfs_key *location_key, char *name,
u32 namelen, u8 file_type)
{
struct btrfs_path path;
struct extent_buffer *node;
struct btrfs_dir_item *di;
struct btrfs_key location;
char namebuf[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 data_len;
u8 filetype;
int slot;
int ret;
/* get the index by traversing all index */
if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
ret = find_dir_index(root, key->objectid,
location_key->objectid, &key->offset,
name, namelen, file_type);
if (ret)
ret = DIR_INDEX_MISSING;
return ret;
}
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
if (ret) {
ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
DIR_INDEX_MISSING;
goto out;
}
/* Check whether inode_id/filetype/name match */
node = path.nodes[0];
slot = path.slots[0];
di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
total = btrfs_item_size_nr(node, slot);
while (cur < total) {
ret = key->type == BTRFS_DIR_ITEM_KEY ?
DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
len = btrfs_dir_name_len(node, di);
data_len = btrfs_dir_data_len(node, di);
btrfs_dir_item_key_to_cpu(node, di, &location);
if (location.objectid != location_key->objectid ||
location.type != location_key->type ||
location.offset != location_key->offset)
goto next;
filetype = btrfs_dir_type(node, di);
if (file_type != filetype)
goto next;
if (len > BTRFS_NAME_LEN) {
len = BTRFS_NAME_LEN;
warning("root %llu %s[%llu %llu] name too long %u, trimmed",
root->objectid,
key->type == BTRFS_DIR_ITEM_KEY ?
"DIR_ITEM" : "DIR_INDEX",
key->objectid, key->offset, len);
}
read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
len);
if (len != namelen || strncmp(namebuf, name, len))
goto next;
ret = 0;
goto out;
next:
len += sizeof(*di) + data_len;
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
}
out:
btrfs_release_path(&path);
return ret;
}
/*
* The ternary means dir item, dir index and relative inode ref.
* The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
* DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
* strategy:
* If two of three is missing or mismatched, delete the existing one.
* If one of three is missing or mismatched, add the missing one.
*
* returns 0 means success.
* returns not 0 means on error;
*/
static int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
u64 index, char *name, int name_len, u8 filetype,
int err)
{
struct btrfs_trans_handle *trans;
int stage = 0;
int ret = 0;
/*
* stage shall be one of following valild values:
* 0: Fine, nothing to do.
* 1: One of three is wrong, so add missing one.
* 2: Two of three is wrong, so delete existed one.
*/
if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
stage++;
if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
stage++;
if (err & (INODE_REF_MISSING))
stage++;
/* stage must be smllarer than 3 */
ASSERT(stage < 3);
trans = btrfs_start_transaction(root, 1);
if (stage == 2) {
ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
name_len, 0);
goto out;
}
if (stage == 1) {
ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
name_len, 0);
if (ret)
goto out;
ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
filetype, &index, 1, 1);
goto out;
}
out:
btrfs_commit_transaction(trans, root);
if (ret)
error("fail to repair inode %llu name %s filetype %u",
ino, name, filetype);
else
printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
stage == 2 ? "Delete" : "Add",
ino, name, filetype);
return ret;
}
/*
* Prints inode ref error message
*/
static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
u64 index, const char *namebuf, int name_len,
u8 filetype, int err)
{
if (!err)
return;
/* root dir error */
if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
error(
"root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
root->objectid, key->objectid, key->offset, namebuf);
return;
}
/* normal error */
if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
root->objectid, key->offset,
btrfs_name_hash(namebuf, name_len),
err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
namebuf, filetype);
if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
root->objectid, key->offset, index,
err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
namebuf, filetype);
}
/*
* Traverse the given INODE_REF and call find_dir_item() to find related
* DIR_ITEM/DIR_INDEX.
*
* @root: the root of the fs/file tree
* @ref_key: the key of the INODE_REF
* @path the path provides node and slot
* @refs: the count of INODE_REF
* @mode: the st_mode of INODE_ITEM
* @name_ret: returns with the first ref's name
* @name_len_ret: len of the name_ret
*
* Return 0 if no error occurred.
*/
static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
struct btrfs_path *path, char *name_ret,
u32 *namelen_ret, u64 *refs_ret, int mode)
{
struct btrfs_key key;
struct btrfs_key location;
struct btrfs_inode_ref *ref;
struct extent_buffer *node;
char namebuf[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u64 index;
int ret;
int err = 0;
int tmp_err;
int slot;
int need_research = 0;
u64 refs;
begin:
err = 0;
cur = 0;
refs = *refs_ret;
/* since after repair, path and the dir item may be changed */
if (need_research) {
need_research = 0;
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
/*
* The item was deleted, let the path point to the last checked
* item.
*/
if (ret > 0) {
if (path->slots[0] == 0)
btrfs_prev_leaf(root, path);
else
path->slots[0]--;
}
if (ret)
goto out;
}
location.objectid = ref_key->objectid;
location.type = BTRFS_INODE_ITEM_KEY;
location.offset = 0;
node = path->nodes[0];
slot = path->slots[0];
memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
total = btrfs_item_size_nr(node, slot);
next:
/* Update inode ref count */
refs++;
tmp_err = 0;
index = btrfs_inode_ref_index(node, ref);
name_len = btrfs_inode_ref_name_len(node, ref);
if (name_len <= BTRFS_NAME_LEN) {
len = name_len;
} else {
len = BTRFS_NAME_LEN;
warning("root %llu INODE_REF[%llu %llu] name too long",
root->objectid, ref_key->objectid, ref_key->offset);
}
read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
/* copy the first name found to name_ret */
if (refs == 1 && name_ret) {
memcpy(name_ret, namebuf, len);
*namelen_ret = len;
}
/* Check root dir ref */
if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
if (index != 0 || len != strlen("..") ||
strncmp("..", namebuf, len) ||
ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
/* set err bits then repair will delete the ref */
err |= DIR_INDEX_MISSING;
err |= DIR_ITEM_MISSING;
}
goto end;
}
/* Find related DIR_INDEX */
key.objectid = ref_key->offset;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = index;
tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
imode_to_type(mode));
/* Find related dir_item */
key.objectid = ref_key->offset;
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(namebuf, len);
tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
imode_to_type(mode));
end:
if (tmp_err && repair) {
ret = repair_ternary_lowmem(root, ref_key->offset,
ref_key->objectid, index, namebuf,
name_len, imode_to_type(mode),
tmp_err);
if (!ret) {
need_research = 1;
goto begin;
}
}
print_inode_ref_err(root, ref_key, index, namebuf, name_len,
imode_to_type(mode), tmp_err);
err |= tmp_err;
len = sizeof(*ref) + name_len;
ref = (struct btrfs_inode_ref *)((char *)ref + len);
cur += len;
if (cur < total)
goto next;
out:
*refs_ret = refs;
return err;
}
/*
* Traverse the given INODE_EXTREF and call find_dir_item() to find related
* DIR_ITEM/DIR_INDEX.
*
* @root: the root of the fs/file tree
* @ref_key: the key of the INODE_EXTREF
* @refs: the count of INODE_EXTREF
* @mode: the st_mode of INODE_ITEM
*
* Return 0 if no error occurred.
*/
static int check_inode_extref(struct btrfs_root *root,
struct btrfs_key *ref_key,
struct extent_buffer *node, int slot, u64 *refs,
int mode)
{
struct btrfs_key key;
struct btrfs_key location;
struct btrfs_inode_extref *extref;
char namebuf[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u64 index;
u64 parent;
int ret;
int err = 0;
location.objectid = ref_key->objectid;
location.type = BTRFS_INODE_ITEM_KEY;
location.offset = 0;
extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
total = btrfs_item_size_nr(node, slot);
next:
/* update inode ref count */
(*refs)++;
name_len = btrfs_inode_extref_name_len(node, extref);
index = btrfs_inode_extref_index(node, extref);
parent = btrfs_inode_extref_parent(node, extref);
if (name_len <= BTRFS_NAME_LEN) {
len = name_len;
} else {
len = BTRFS_NAME_LEN;
warning("root %llu INODE_EXTREF[%llu %llu] name too long",
root->objectid, ref_key->objectid, ref_key->offset);
}
read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
/* Check root dir ref name */
if (index == 0 && strncmp(namebuf, "..", name_len)) {
error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
root->objectid, ref_key->objectid, ref_key->offset,
namebuf);
err |= ROOT_DIR_ERROR;
}
/* find related dir_index */
key.objectid = parent;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = index;
ret = find_dir_item(root, &key, &location, namebuf, len, mode);
err |= ret;
/* find related dir_item */
key.objectid = parent;
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(namebuf, len);
ret = find_dir_item(root, &key, &location, namebuf, len, mode);
err |= ret;
len = sizeof(*extref) + name_len;
extref = (struct btrfs_inode_extref *)((char *)extref + len);
cur += len;
if (cur < total)
goto next;
return err;
}
/*
* Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
* DIR_ITEM/DIR_INDEX match.
* Return with @index_ret.
*
* @root: the root of the fs/file tree
* @key: the key of the INODE_REF/INODE_EXTREF
* @name: the name in the INODE_REF/INODE_EXTREF
* @namelen: the length of name in the INODE_REF/INODE_EXTREF
* @index_ret: the index in the INODE_REF/INODE_EXTREF,
* value (64)-1 means do not check index
*
* Return 0 if no error occurred.
* Return >0 for error bitmap
*/
static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
char *name, int namelen, u64 *index_ret)
{
struct btrfs_path path;
struct btrfs_inode_ref *ref;
struct btrfs_inode_extref *extref;
struct extent_buffer *node;
char ref_namebuf[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 ref_namelen;
u64 ref_index;
u64 parent;
u64 dir_id;
int slot;
int ret;
ASSERT(index_ret);
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
if (ret) {
ret = INODE_REF_MISSING;
goto extref;
}
node = path.nodes[0];
slot = path.slots[0];
ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
total = btrfs_item_size_nr(node, slot);
/* Iterate all entry of INODE_REF */
while (cur < total) {
ret = INODE_REF_MISSING;
ref_namelen = btrfs_inode_ref_name_len(node, ref);
ref_index = btrfs_inode_ref_index(node, ref);
if (*index_ret != (u64)-1 && *index_ret != ref_index)
goto next_ref;
if (cur + sizeof(*ref) + ref_namelen > total ||
ref_namelen > BTRFS_NAME_LEN) {
warning("root %llu INODE %s[%llu %llu] name too long",
root->objectid,
key->type == BTRFS_INODE_REF_KEY ?
"REF" : "EXTREF",
key->objectid, key->offset);
if (cur + sizeof(*ref) > total)
break;
len = min_t(u32, total - cur - sizeof(*ref),
BTRFS_NAME_LEN);
} else {
len = ref_namelen;
}
read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
len);
if (len != namelen || strncmp(ref_namebuf, name, len))
goto next_ref;
*index_ret = ref_index;
ret = 0;
goto out;
next_ref:
len = sizeof(*ref) + ref_namelen;
ref = (struct btrfs_inode_ref *)((char *)ref + len);
cur += len;
}
extref:
/* Skip if not support EXTENDED_IREF feature */
if (!btrfs_fs_incompat(gfs_info, EXTENDED_IREF))
goto out;
btrfs_release_path(&path);
btrfs_init_path(&path);
dir_id = key->offset;
key->type = BTRFS_INODE_EXTREF_KEY;
key->offset = btrfs_extref_hash(dir_id, name, namelen);
ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
if (ret) {
ret = INODE_REF_MISSING;
goto out;
}
node = path.nodes[0];
slot = path.slots[0];
extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
cur = 0;
total = btrfs_item_size_nr(node, slot);
/* Iterate all entry of INODE_EXTREF */
while (cur < total) {
ret = INODE_REF_MISSING;
ref_namelen = btrfs_inode_extref_name_len(node, extref);
ref_index = btrfs_inode_extref_index(node, extref);
parent = btrfs_inode_extref_parent(node, extref);
if (*index_ret != (u64)-1 && *index_ret != ref_index)
goto next_extref;
if (parent != dir_id)
goto next_extref;
if (ref_namelen <= BTRFS_NAME_LEN) {
len = ref_namelen;
} else {
len = BTRFS_NAME_LEN;
warning("root %llu INODE %s[%llu %llu] name too long",
root->objectid,
key->type == BTRFS_INODE_REF_KEY ?
"REF" : "EXTREF",
key->objectid, key->offset);
}
read_extent_buffer(node, ref_namebuf,
(unsigned long)(extref + 1), len);
if (len != namelen || strncmp(ref_namebuf, name, len))
goto next_extref;
*index_ret = ref_index;
ret = 0;
goto out;
next_extref:
len = sizeof(*extref) + ref_namelen;
extref = (struct btrfs_inode_extref *)((char *)extref + len);
cur += len;
}
out:
btrfs_release_path(&path);
return ret;
}
static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 ino,
u8 filetype)
{
u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
}
/*
* Insert the missing inode item.
*
* Returns 0 means success.
* Returns <0 means error.
*/
static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
u8 filetype)
{
struct btrfs_key key;
struct btrfs_trans_handle *trans;
struct btrfs_path path;
int ret;
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
btrfs_init_path(&path);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = -EIO;
goto out;
}
ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
if (ret < 0 || !ret)
goto fail;
/* insert inode item */
create_inode_item_lowmem(trans, root, ino, filetype);
ret = 0;
fail:
btrfs_commit_transaction(trans, root);
out:
if (ret)
error("failed to repair root %llu INODE ITEM[%llu] missing",
root->objectid, ino);
btrfs_release_path(&path);
return ret;
}
/*
* A wrapper for delete_corrupted_dir_item(), with support part like
* start/commit transaction.
*/
static int lowmem_delete_corrupted_dir_item(struct btrfs_root *root,
struct btrfs_key *di_key,
char *namebuf, u32 name_len)
{
struct btrfs_trans_handle *trans;
int ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
error("failed to start transaction: %d", ret);
return ret;
}
ret = delete_corrupted_dir_item(trans, root, di_key, namebuf, name_len);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
} else {
ret = btrfs_commit_transaction(trans, root);
if (ret < 0)
error("failed to commit transaction: %d", ret);
}
return ret;
}
static int try_repair_imode(struct btrfs_root *root, u64 ino)
{
struct btrfs_inode_item *iitem;
struct btrfs_path path;
struct btrfs_key key;
int ret;
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret > 0)
ret = -ENOENT;
if (ret < 0)
goto out;
iitem = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_inode_item);
if (!is_valid_imode(btrfs_inode_mode(path.nodes[0], iitem))) {
ret = repair_imode_common(root, &path);
} else {
ret = -ENOTTY;
}
out:
btrfs_release_path(&path);
return ret;
}
/*
* Call repair_inode_item_missing and repair_ternary_lowmem to repair
*
* Returns error after repair
*/
static int repair_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
u64 ino, u64 index, u8 filetype, char *namebuf,
u32 name_len, int err)
{
u64 dirid = di_key->objectid;
int ret;
if (err & (DIR_ITEM_HASH_MISMATCH)) {
ret = lowmem_delete_corrupted_dir_item(root, di_key, namebuf,
name_len);
if (!ret)
err &= ~(DIR_ITEM_HASH_MISMATCH);
}
if (err & INODE_ITEM_MISSING) {
ret = repair_inode_item_missing(root, ino, filetype);
if (!ret)
err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
}
if (err & INODE_ITEM_MISMATCH) {
/*
* INODE_ITEM mismatch can be caused by bad imode, so check if
* it's a bad imode, then repair if possible.
*/
ret = try_repair_imode(root, ino);
if (!ret)
err &= ~INODE_ITEM_MISMATCH;
}
if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
name_len, filetype, err);
if (!ret) {
err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
err &= ~(INODE_REF_MISSING);
}
}
return err;
}
static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
u64 ino, u64 index, const char *namebuf,
int name_len, u8 filetype, int err)
{
if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
root->objectid, key->objectid, key->offset, namebuf,
filetype,
err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
}
if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
root->objectid, key->objectid, index, namebuf, filetype,
err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
}
if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
error(
"root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
root->objectid, ino, index, namebuf, filetype,
err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
}
if (err & INODE_REF_MISSING)
error(
"root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
root->objectid, ino, key->objectid, namebuf, filetype);
}
/*
* Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
* call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
*
* @root: the root of the fs/file tree
* @key: the key of the INODE_REF/INODE_EXTREF
* @path: the path
* @size: the st_size of the INODE_ITEM
*
* Return 0 if no error occurred.
* Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
*/
static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
struct btrfs_path *path, u64 *size)
{
struct btrfs_dir_item *di;
struct btrfs_inode_item *ii;
struct btrfs_key key;
struct btrfs_key location;
struct extent_buffer *node;
int slot;
char namebuf[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u32 data_len;
u8 filetype;
u32 mode = 0;
u64 index;
int ret;
int err;
int tmp_err;
int need_research = 0;
begin:
err = 0;
cur = 0;
/* since after repair, path and the dir item may be changed */
if (need_research) {
need_research = 0;
err |= DIR_COUNT_AGAIN;
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
/* the item was deleted, let path point the last checked item */
if (ret > 0) {
if (path->slots[0] == 0)
btrfs_prev_leaf(root, path);
else
path->slots[0]--;
}
if (ret)
goto out;
}
node = path->nodes[0];
slot = path->slots[0];
di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
total = btrfs_item_size_nr(node, slot);
memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
while (cur < total) {
/*
* For DIR_ITEM set index to (u64)-1, so that find_inode_ref
* ignore index check.
*/
if (di_key->type == BTRFS_DIR_INDEX_KEY)
index = di_key->offset;
else
index = (u64)-1;
data_len = btrfs_dir_data_len(node, di);
tmp_err = 0;
if (data_len)
error("root %llu %s[%llu %llu] data_len shouldn't be %u",
root->objectid,
di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
di_key->objectid, di_key->offset, data_len);
name_len = btrfs_dir_name_len(node, di);
if (name_len <= BTRFS_NAME_LEN) {
len = name_len;
} else {
len = BTRFS_NAME_LEN;
warning("root %llu %s[%llu %llu] name too long",
root->objectid,
di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
di_key->objectid, di_key->offset);
}
(*size) += name_len;
read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
len);
filetype = btrfs_dir_type(node, di);
if (di_key->type == BTRFS_DIR_ITEM_KEY &&
di_key->offset != btrfs_name_hash(namebuf, len)) {
error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
root->objectid, di_key->objectid, di_key->offset,
namebuf, len, filetype, di_key->offset,
btrfs_name_hash(namebuf, len));
tmp_err |= DIR_ITEM_HASH_MISMATCH;
goto next;
}
btrfs_dir_item_key_to_cpu(node, di, &location);
/* Ignore related ROOT_ITEM check */
if (location.type == BTRFS_ROOT_ITEM_KEY)
goto next;
btrfs_release_path(path);
/* Check relative INODE_ITEM(existence/filetype) */
ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
if (ret) {
tmp_err |= INODE_ITEM_MISSING;
goto next;
}
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
mode = btrfs_inode_mode(path->nodes[0], ii);
if (imode_to_type(mode) != filetype) {
tmp_err |= INODE_ITEM_MISMATCH;
goto next;
}
/* Check relative INODE_REF/INODE_EXTREF */
key.objectid = location.objectid;
key.type = BTRFS_INODE_REF_KEY;
key.offset = di_key->objectid;
tmp_err |= find_inode_ref(root, &key, namebuf, len, &index);
/* check relative INDEX/ITEM */
key.objectid = di_key->objectid;
if (key.type == BTRFS_DIR_ITEM_KEY) {
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = index;
} else {
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(namebuf, name_len);
}
tmp_err |= find_dir_item(root, &key, &location, namebuf,
name_len, filetype);
/* find_dir_item may find index */
if (key.type == BTRFS_DIR_INDEX_KEY)
index = key.offset;
next:
if (tmp_err && repair) {
ret = repair_dir_item(root, di_key,
location.objectid, index,
imode_to_type(mode), namebuf,
name_len, tmp_err);
if (ret != tmp_err) {
need_research = 1;
goto begin;
}
}
btrfs_release_path(path);
print_dir_item_err(root, di_key, location.objectid, index,
namebuf, name_len, filetype, tmp_err);
err |= tmp_err;
len = sizeof(*di) + name_len + data_len;
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
root->objectid, di_key->objectid,
di_key->offset);
break;
}
}
out:
/* research path */
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
if (ret)
err |= ret > 0 ? -ENOENT : ret;
return err;
}
/*
* Wrapper function of btrfs_punch_hole.
*
* @path: The path holder, will point to the same key after hole punching.
*
* Returns 0 means success.
* Returns not 0 means error.
*/
static int punch_extent_hole(struct btrfs_root *root, struct btrfs_path *path,
u64 ino, u64 start, u64 len)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_punch_hole(trans, root, ino, start, len);
if (ret) {
error("failed to add hole [%llu, %llu] in inode [%llu]",
start, len, ino);
btrfs_abort_transaction(trans, ret);
return ret;
}
printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len, ino);
btrfs_commit_transaction(trans, root);
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret > 0)
ret = -ENOENT;
return ret;
}
static int repair_inline_ram_bytes(struct btrfs_root *root,
struct btrfs_path *path, u64 *ram_bytes_ret)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
struct btrfs_item *item;
u32 on_disk_data_len;
int ret;
int recover_ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
return ret;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
/* Not really possible */
if (ret > 0) {
ret = -ENOENT;
btrfs_release_path(path);
goto recover;
}
if (ret < 0)
goto recover;
item = btrfs_item_nr(path->slots[0]);
on_disk_data_len = btrfs_file_extent_inline_item_len(path->nodes[0],
item);
fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(path->nodes[0], fi) !=
BTRFS_FILE_EXTENT_INLINE ||
btrfs_file_extent_compression(path->nodes[0], fi) !=
BTRFS_COMPRESS_NONE)
return -EINVAL;
btrfs_set_file_extent_ram_bytes(path->nodes[0], fi, on_disk_data_len);
btrfs_mark_buffer_dirty(path->nodes[0]);
ret = btrfs_commit_transaction(trans, root);
if (!ret) {
printf(
"Successfully repaired inline ram_bytes for root %llu ino %llu\n",
root->objectid, key.objectid);
*ram_bytes_ret = on_disk_data_len;
}
return ret;
recover:
/*
* COW search failed, mostly due to the extra COW work (extent
* allocation, etc). Since we have a good path from before, readonly
* search should still work, or later checks will fail due to empty
* path.
*/
recover_ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
/* This really shouldn't happen, or we have a big problem */
ASSERT(recover_ret == 0);
return ret;
}
static int check_file_extent_inline(struct btrfs_root *root,
struct btrfs_path *path, u64 *size,
u64 *end)
{
u32 max_inline_extent_size = min_t(u32, gfs_info->sectorsize - 1,
BTRFS_MAX_INLINE_DATA_SIZE(gfs_info));
struct extent_buffer *node = path->nodes[0];
struct btrfs_item *e = btrfs_item_nr(path->slots[0]);
struct btrfs_file_extent_item *fi;
struct btrfs_key fkey;
u64 extent_num_bytes;
u32 item_inline_len;
int ret;
int compressed = 0;
int err = 0;
fi = btrfs_item_ptr(node, path->slots[0], struct btrfs_file_extent_item);
item_inline_len = btrfs_file_extent_inline_item_len(node, e);
extent_num_bytes = btrfs_file_extent_ram_bytes(node, fi);
compressed = btrfs_file_extent_compression(node, fi);
btrfs_item_key_to_cpu(node, &fkey, path->slots[0]);
if (extent_num_bytes == 0) {
error(
"root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
root->objectid, fkey.objectid, fkey.offset);
err |= FILE_EXTENT_ERROR;
}
if (compressed) {
if (extent_num_bytes > gfs_info->sectorsize) {
error(
"root %llu EXTENT_DATA[%llu %llu] too large inline extent ram size, have %llu, max: %u",
root->objectid, fkey.objectid, fkey.offset,
extent_num_bytes, gfs_info->sectorsize - 1);
err |= FILE_EXTENT_ERROR;
}
if (item_inline_len > max_inline_extent_size) {
error(
"root %llu EXTENT_DATA[%llu %llu] too large inline extent on-disk size, have %u, max: %u",
root->objectid, fkey.objectid, fkey.offset,
item_inline_len, max_inline_extent_size);
err |= FILE_EXTENT_ERROR;
}
} else {
if (extent_num_bytes > max_inline_extent_size) {
error(
"root %llu EXTENT_DATA[%llu %llu] too large inline extent size, have %llu, max: %u",
root->objectid, fkey.objectid, fkey.offset,
extent_num_bytes, max_inline_extent_size);
err |= FILE_EXTENT_ERROR;
}
if (extent_num_bytes != item_inline_len) {
error(
"root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
root->objectid, fkey.objectid, fkey.offset,
extent_num_bytes, item_inline_len);
if (repair) {
ret = repair_inline_ram_bytes(root, path,
&extent_num_bytes);
if (ret)
err |= FILE_EXTENT_ERROR;
} else {
err |= FILE_EXTENT_ERROR;
}
}
}
*end += extent_num_bytes;
*size += extent_num_bytes;
return err;
}
/*
* Check file extent datasum/hole, update the size of the file extents,
* check and update the last offset of the file extent.
*
* @root: the root of fs/file tree.
* @nodatasum: INODE_NODATASUM feature.
* @size: the sum of all EXTENT_DATA items size for this inode.
* @end: the offset of the last extent.
*
* Return 0 if no error occurred.
*/
static int check_file_extent(struct btrfs_root *root, struct btrfs_path *path,
unsigned int nodatasum, u64 isize, u64 *size,
u64 *end)
{
struct btrfs_file_extent_item *fi;
struct btrfs_key fkey;
struct extent_buffer *node = path->nodes[0];
u64 disk_bytenr;
u64 disk_num_bytes;
u64 extent_num_bytes;
u64 extent_offset;
u64 csum_found; /* In byte size, sectorsize aligned */
u64 search_start; /* Logical range start we search for csum */
u64 search_len; /* Logical range len we search for csum */
u64 gen;
u64 super_gen;
unsigned int extent_type;
unsigned int is_hole;
int slot = path->slots[0];
int compressed = 0;
int ret;
int err = 0;
btrfs_item_key_to_cpu(node, &fkey, slot);
fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(node, fi);
/* Check extent type */
if (extent_type != BTRFS_FILE_EXTENT_REG &&
extent_type != BTRFS_FILE_EXTENT_PREALLOC &&
extent_type != BTRFS_FILE_EXTENT_INLINE) {
err |= FILE_EXTENT_ERROR;
error("root %llu EXTENT_DATA[%llu %llu] type bad",
root->objectid, fkey.objectid, fkey.offset);
return err;
}
/* Check inline extent */
if (extent_type == BTRFS_FILE_EXTENT_INLINE)
return check_file_extent_inline(root, path, size, end);
/* Check REG_EXTENT/PREALLOC_EXTENT */
gen = btrfs_file_extent_generation(node, fi);
disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
extent_offset = btrfs_file_extent_offset(node, fi);
compressed = btrfs_file_extent_compression(node, fi);
is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
super_gen = btrfs_super_generation(gfs_info->super_copy);
if (gen > super_gen + 1) {
error(
"invalid file extent generation, have %llu expect (0, %llu]",
gen, super_gen + 1);
err |= INVALID_GENERATION;
}
/*
* Check EXTENT_DATA csum
*
* For plain (uncompressed) extent, we should only check the range
* we're referring to, as it's possible that part of prealloc extent
* has been written, and has csum:
*
* |<--- Original large preallocated extent A ---->|
* |<- Prealloc File Extent ->|<- Regular Extent ->|
* No csum Has csum
*
* For compressed extent, we should check the whole range.
*/
if (!compressed) {
search_start = disk_bytenr + extent_offset;
search_len = extent_num_bytes;
} else {
search_start = disk_bytenr;
search_len = disk_num_bytes;
}
ret = count_csum_range(search_start, search_len, &csum_found);
if (csum_found > 0 && nodatasum) {
err |= ODD_CSUM_ITEM;
error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
root->objectid, fkey.objectid, fkey.offset);
} else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
!is_hole && (ret < 0 || csum_found < search_len)) {
err |= CSUM_ITEM_MISSING;
error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
root->objectid, fkey.objectid, fkey.offset,
csum_found, search_len);
} else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
csum_found > 0) {
ret = check_prealloc_extent_written(disk_bytenr, disk_num_bytes);
if (ret < 0)
return ret;
if (ret == 0) {
err |= ODD_CSUM_ITEM;
error(
"root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
root->objectid, fkey.objectid, fkey.offset,
csum_found);
}
}
/*
* Extra check for compressed extents:
* Btrfs doesn't allow NODATASUM and compressed extent co-exist, thus
* all compressed extents should have a checksum.
*/
if (compressed && csum_found < search_len) {
error(
"root %llu EXTENT_DATA[%llu %llu] compressed extent must have csum, but only %llu bytes have, expect %llu",
root->objectid, fkey.objectid, fkey.offset, csum_found,
search_len);
err |= CSUM_ITEM_MISSING;
}
if (compressed && nodatasum) {
error(
"root %llu EXTENT_DATA[%llu %llu] is compressed, but inode flag doesn't allow it",
root->objectid, fkey.objectid, fkey.offset);
err |= FILE_EXTENT_ERROR;
}
/* Check EXTENT_DATA hole */
if (!no_holes && (fkey.offset < isize) && (*end != fkey.offset)) {
if (repair)
ret = punch_extent_hole(root, path, fkey.objectid,
*end, fkey.offset - *end);
if (!repair || ret) {
err |= FILE_EXTENT_ERROR;
error(
"root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
root->objectid, fkey.objectid, fkey.offset,
fkey.objectid, *end);
}
}
/*
* Don't update extent end beyond rounded up isize. As holes
* after isize is not considered as missing holes.
*/
*end = min(round_up(isize, gfs_info->sectorsize),
fkey.offset + extent_num_bytes);
if (!is_hole)
*size += extent_num_bytes;
return err;
}
static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
u64 *size_ret)
{
struct btrfs_key key;
struct btrfs_path path;
u32 len;
struct btrfs_dir_item *di;
int ret;
int cur = 0;
int total = 0;
ASSERT(size_ret);
*size_ret = 0;
key.objectid = ino;
key.type = type;
key.offset = (u64)-1;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0) {
ret = -EIO;
goto out;
}
/* if found, go to spacial case */
if (ret == 0)
goto special_case;
loop:
ret = btrfs_previous_item(root, &path, ino, type);
if (ret) {
ret = 0;
goto out;
}
special_case:
di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
cur = 0;
total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
while (cur < total) {
len = btrfs_dir_name_len(path.nodes[0], di);
if (len > BTRFS_NAME_LEN)
len = BTRFS_NAME_LEN;
*size_ret += len;
len += btrfs_dir_data_len(path.nodes[0], di);
len += sizeof(*di);
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
}
goto loop;
out:
btrfs_release_path(&path);
return ret;
}
static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
{
u64 item_size;
u64 index_size;
int ret;
ASSERT(size);
ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
if (ret)
goto out;
ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
if (ret)
goto out;
*size = item_size + index_size;
out:
if (ret)
error("failed to count root %llu INODE[%llu] root size",
root->objectid, ino);
return ret;
}
/*
* Set inode item nbytes to @nbytes
*
* Returns 0 on success
* Returns != 0 on error
*/
static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
struct btrfs_path *path,
u64 ino, u64 nbytes)
{
struct btrfs_trans_handle *trans;
struct btrfs_inode_item *ii;
struct btrfs_key key;
struct btrfs_key research_key;
int err = 0;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
err |= ret;
goto out;
}
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret) {
err |= ret;
goto fail;
}
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
btrfs_mark_buffer_dirty(path->nodes[0]);
fail:
btrfs_commit_transaction(trans, root);
out:
if (ret)
error("failed to set nbytes in inode %llu root %llu",
ino, root->root_key.objectid);
else
printf("Set nbytes in inode item %llu root %llu to %llu\n", ino,
root->root_key.objectid, nbytes);
/* research path */
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
err |= ret;
return err;
}
/*
* Set directory inode isize to @isize.
*
* Returns 0 on success.
* Returns != 0 on error.
*/
static int repair_dir_isize_lowmem(struct btrfs_root *root,
struct btrfs_path *path,
u64 ino, u64 isize)
{
struct btrfs_trans_handle *trans;
struct btrfs_inode_item *ii;
struct btrfs_key key;
struct btrfs_key research_key;
int ret;
int err = 0;
btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
err |= ret;
goto out;
}
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret) {
err |= ret;
goto fail;
}
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_size(path->nodes[0], ii, isize);
btrfs_mark_buffer_dirty(path->nodes[0]);
fail:
btrfs_commit_transaction(trans, root);
out:
if (ret)
error("failed to set isize in inode %llu root %llu",
ino, root->root_key.objectid);
else
printf("Set isize in inode %llu root %llu to %llu\n",
ino, root->root_key.objectid, isize);
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
err |= ret;
return err;
}
/*
* Wrapper function for btrfs_add_orphan_item().
*
* Returns 0 on success.
* Returns != 0 on error.
*/
static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
struct btrfs_path *path, u64 ino)
{
struct btrfs_trans_handle *trans;
struct btrfs_key research_key;
int ret;
int err = 0;
btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
err |= ret;
goto out;
}
btrfs_release_path(path);
ret = btrfs_add_orphan_item(trans, root, path, ino);
err |= ret;
btrfs_commit_transaction(trans, root);
out:
if (ret)
error("failed to add inode %llu as orphan item root %llu",
ino, root->root_key.objectid);
else
printf("Added inode %llu as orphan item root %llu\n",
ino, root->root_key.objectid);
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
err |= ret;
return err;
}
/* Set inode_item nlink to @ref_count.
* If @ref_count == 0, move it to "lost+found" and increase @ref_count.
*
* Returns 0 on success
*/
static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
struct btrfs_path *path, u64 ino,
const char *name, u32 namelen,
u64 ref_count, u8 filetype, u64 *nlink)
{
struct btrfs_trans_handle *trans;
struct btrfs_inode_item *ii;
struct btrfs_key key;
struct btrfs_key old_key;
char namebuf[BTRFS_NAME_LEN] = {0};
int name_len;
int ret;
int ret2;
/* save the key */
btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
if (name && namelen) {
ASSERT(namelen <= BTRFS_NAME_LEN);
memcpy(namebuf, name, namelen);
name_len = namelen;
} else {
sprintf(namebuf, "%llu", ino);
name_len = count_digits(ino);
printf("Can't find file name for inode %llu, use %s instead\n",
ino, namebuf);
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
btrfs_release_path(path);
/* if refs is 0, put it into lostfound */
if (ref_count == 0) {
ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
name_len, filetype, &ref_count);
if (ret)
goto fail;
}
/* reset inode_item's nlink to ref_count */
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret)
goto fail;
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
btrfs_mark_buffer_dirty(path->nodes[0]);
if (nlink)
*nlink = ref_count;
fail:
btrfs_commit_transaction(trans, root);
out:
if (ret)
error(
"fail to repair nlink of inode %llu root %llu name %s filetype %u",
root->objectid, ino, namebuf, filetype);
else
printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
root->objectid, ino, namebuf, filetype);
/* research */
btrfs_release_path(path);
ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
if (ret2 < 0)
return ret |= ret2;
return ret;
}
static bool has_orphan_item(struct btrfs_root *root, u64 ino)
{
struct btrfs_path path;
struct btrfs_key key;
int ret;
btrfs_init_path(&path);
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = ino;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
btrfs_release_path(&path);
if (ret == 0)
return true;
return false;
}
static int repair_inode_gen_lowmem(struct btrfs_root *root,
struct btrfs_path *path)
{
struct btrfs_trans_handle *trans;
struct btrfs_inode_item *ii;
struct btrfs_key key;
u64 transid;
int ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error("failed to start transaction for inode gen repair: %m");
return ret;
}
transid = trans->transid;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
ASSERT(key.type == BTRFS_INODE_ITEM_KEY);
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret > 0) {
ret = -ENOENT;
error("no inode item found for ino %llu", key.objectid);
goto error;
}
if (ret < 0) {
errno = -ret;
error("failed to find inode item for ino %llu: %m", key.objectid);
goto error;
}
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_generation(path->nodes[0], ii, trans->transid);
btrfs_set_inode_transid(path->nodes[0], ii, trans->transid);
btrfs_mark_buffer_dirty(path->nodes[0]);
ret = btrfs_commit_transaction(trans, root);
if (ret < 0) {
errno = -ret;
error("failed to commit transaction: %m");
goto error;
}
printf("resetting inode generation/transid to %llu for ino %llu\n",
transid, key.objectid);
return ret;
error:
btrfs_abort_transaction(trans, ret);
return ret;
}
/*
* Check INODE_ITEM and related ITEMs (the same inode number)
* 1. check link count
* 2. check inode ref/extref
* 3. check dir item/index
*
* Return 0 if no error occurred.
* Return >0 for error or hit the traversal is done(by error bitmap)
*/
static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path)
{
struct extent_buffer *node;
struct btrfs_inode_item *ii;
struct btrfs_key key;
struct btrfs_key last_key;
struct btrfs_super_block *super = gfs_info->super_copy;
u64 inode_id;
u32 mode;
u64 flags;
u64 nlink;
u64 nbytes;
u64 isize;
u64 size = 0;
u64 refs = 0;
u64 extent_end = 0;
u64 extent_size = 0;
u64 generation;
u64 transid;
u64 gen_uplimit;
unsigned int dir;
unsigned int nodatasum;
bool is_orphan = false;
int slot;
int ret;
int err = 0;
char namebuf[BTRFS_NAME_LEN] = {0};
u32 name_len = 0;
node = path->nodes[0];
slot = path->slots[0];
btrfs_item_key_to_cpu(node, &key, slot);
inode_id = key.objectid;
if (inode_id == BTRFS_ORPHAN_OBJECTID) {
ret = btrfs_next_item(root, path);
if (ret > 0)
err |= LAST_ITEM;
return err;
}
is_orphan = has_orphan_item(root, inode_id);
ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
isize = btrfs_inode_size(node, ii);
nbytes = btrfs_inode_nbytes(node, ii);
mode = btrfs_inode_mode(node, ii);
flags = btrfs_inode_flags(node, ii);
dir = imode_to_type(mode) == BTRFS_FT_DIR;
nlink = btrfs_inode_nlink(node, ii);
generation = btrfs_inode_generation(node, ii);
transid = btrfs_inode_transid(node, ii);
nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
if (!is_valid_imode(mode)) {
error("invalid imode mode bits: 0%o", mode);
if (repair) {
ret = repair_imode_common(root, path);
if (ret < 0)
err |= INODE_MODE_ERROR;
} else {
err |= INODE_MODE_ERROR;
}
}
if (btrfs_super_log_root(super) != 0 &&
root->objectid == BTRFS_TREE_LOG_OBJECTID)
gen_uplimit = btrfs_super_generation(super) + 1;
else
gen_uplimit = btrfs_super_generation(super);
if (generation > gen_uplimit || transid > gen_uplimit) {
error(
"invalid inode generation %llu or transid %llu for ino %llu, expect [0, %llu)",
generation, transid, inode_id, gen_uplimit);
if (repair) {
ret = repair_inode_gen_lowmem(root, path);
if (ret < 0)
err |= INVALID_GENERATION;
} else {
err |= INVALID_GENERATION;
}
}
if (S_ISLNK(mode) &&
flags & (BTRFS_INODE_IMMUTABLE | BTRFS_INODE_APPEND)) {
err |= INODE_FLAGS_ERROR;
error(
"symlinks must never have immutable/append flags set, root %llu inode item %llu flags %llu may be corrupted",
root->objectid, inode_id, flags);
}
while (1) {
btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
ret = btrfs_next_item(root, path);
if (ret < 0) {
/* out will fill 'err' rusing current statistics */
goto out;
} else if (ret > 0) {
err |= LAST_ITEM;
goto out;
}
node = path->nodes[0];
slot = path->slots[0];
btrfs_item_key_to_cpu(node, &key, slot);
if (key.objectid != inode_id)
goto out;
switch (key.type) {
case BTRFS_INODE_REF_KEY:
ret = check_inode_ref(root, &key, path, namebuf,
&name_len, &refs, mode);
err |= ret;
break;
case BTRFS_INODE_EXTREF_KEY:
{
bool ext_ref = btrfs_fs_incompat(gfs_info,
EXTENDED_IREF);
if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
warning("root %llu EXTREF[%llu %llu] isn't supported",
root->objectid, key.objectid,
key.offset);
ret = check_inode_extref(root, &key, node, slot, &refs,
mode);
err |= ret;
break;
}
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
if (!dir) {
warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
root->objectid, inode_id,
imode_to_type(mode), key.objectid,
key.offset);
}
ret = check_dir_item(root, &key, path, &size);
err |= ret;
break;
case BTRFS_EXTENT_DATA_KEY:
if (dir) {
warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
root->objectid, inode_id, key.objectid,
key.offset);
}
ret = check_file_extent(root, path, nodatasum, isize,
&extent_size, &extent_end);
err |= ret;
break;
case BTRFS_XATTR_ITEM_KEY:
break;
default:
error("ITEM[%llu %u %llu] UNKNOWN TYPE",
key.objectid, key.type, key.offset);
}
}
out:
if (err & LAST_ITEM) {
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
if (ret)
return err;
}
/* verify INODE_ITEM nlink/isize/nbytes */
if (dir) {
if (repair && (err & DIR_COUNT_AGAIN)) {
err &= ~DIR_COUNT_AGAIN;
count_dir_isize(root, inode_id, &size);
}
if ((nlink != 1 || refs != 1) && repair) {
ret = repair_inode_nlinks_lowmem(root, path, inode_id,
namebuf, name_len, refs, imode_to_type(mode),
&nlink);
}
if (nlink != 1) {
err |= LINK_COUNT_ERROR;
error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
root->objectid, inode_id, nlink);
}
/*
* Just a warning, as dir inode nbytes is just an
* instructive value.
*/
if (!IS_ALIGNED(nbytes, gfs_info->nodesize)) {
warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
root->objectid, inode_id,
gfs_info->nodesize);
}
if (isize != size) {
if (repair)
ret = repair_dir_isize_lowmem(root, path,
inode_id, size);
if (!repair || ret) {
err |= ISIZE_ERROR;
error(
"root %llu DIR INODE [%llu] size %llu not equal to %llu",
root->objectid, inode_id, isize, size);
}
}
} else {
if (nlink != refs) {
if (repair)
ret = repair_inode_nlinks_lowmem(root, path,
inode_id, namebuf, name_len, refs,
imode_to_type(mode), &nlink);
if (!repair || ret) {
err |= LINK_COUNT_ERROR;
error(
"root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
root->objectid, inode_id, nlink, refs);
}
} else if (!nlink && !is_orphan) {
if (repair)
ret = repair_inode_orphan_item_lowmem(root,
path, inode_id);
if (!repair || ret) {
err |= ORPHAN_ITEM;
error("root %llu INODE[%llu] is orphan item",
root->objectid, inode_id);
}
}
/*
* For orphan inode, updating nbytes/size is just a waste of
* time, so skip such repair and don't report them as error.
*/
if (nbytes != extent_size && !is_orphan) {
if (repair) {
ret = repair_inode_nbytes_lowmem(root, path,
inode_id, extent_size);
if (!ret)
nbytes = extent_size;
}
if (!repair || ret) {
err |= NBYTES_ERROR;
error(
"root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
root->objectid, inode_id, nbytes,
extent_size);
}
}
if (!nbytes && !no_holes && extent_end < isize) {
if (repair)
ret = punch_extent_hole(root, path, inode_id,
extent_end, isize - extent_end);
if (!repair || ret) {
err |= NBYTES_ERROR;
error(
"root %llu INODE[%llu] size %llu should have a file extent hole",
root->objectid, inode_id, isize);
}
}
}
if (err & LAST_ITEM)
btrfs_next_item(root, path);
return err;
}
/*
* Returns >0 Found error, not fatal, should continue
* Returns <0 Fatal error, must exit the whole check
* Returns 0 No errors found
*/
static int process_one_leaf(struct btrfs_root *root, struct btrfs_path *path,
struct node_refs *nrefs, int *level)
{
struct extent_buffer *cur = path->nodes[0];
struct btrfs_key key;
u64 cur_bytenr;
u32 nritems;
u64 first_ino = 0;
int root_level = btrfs_header_level(root->node);
int i;
int ret = 0; /* Final return value */
int err = 0; /* Positive error bitmap */
cur_bytenr = cur->start;
/* skip to first inode item or the first inode number change */
nritems = btrfs_header_nritems(cur);
for (i = 0; i < nritems; i++) {
btrfs_item_key_to_cpu(cur, &key, i);
if (i == 0)
first_ino = key.objectid;
if (key.type == BTRFS_INODE_ITEM_KEY ||
(first_ino && first_ino != key.objectid))
break;
}
if (i == nritems) {
path->slots[0] = nritems;
return 0;
}
path->slots[0] = i;
again:
err |= check_inode_item(root, path);
/* modify cur since check_inode_item may change path */
cur = path->nodes[0];
if (err & LAST_ITEM)
goto out;
/* still have inode items in this leaf */
if (cur->start == cur_bytenr)
goto again;
/*
* we have switched to another leaf, above nodes may
* have changed, here walk down the path, if a node
* or leaf is shared, check whether we can skip this
* node or leaf.
*/
for (i = root_level; i >= 0; i--) {
if (path->nodes[i]->start == nrefs->bytenr[i])
continue;
ret = update_nodes_refs(root, path->nodes[i]->start,
path->nodes[i], nrefs, i, 0);
if (ret)
goto out;
if (!nrefs->need_check[i]) {
*level += 1;
break;
}
}
for (i = 0; i < *level; i++) {
free_extent_buffer(path->nodes[i]);
path->nodes[i] = NULL;
}
out:
err &= ~LAST_ITEM;
if (err && !ret)
ret = err;
return ret;
}
/*
* @level if @level == -1 means extent data item
* else normal treeblock.
*/
static int should_check_extent_strictly(struct btrfs_root *root,
struct node_refs *nrefs, int level)
{
int root_level = btrfs_header_level(root->node);
if (level > root_level || level < -1)
return 1;
if (level == root_level)
return 1;
/*
* if the upper node is marked full backref, it should contain shared
* backref of the parent (except owner == root->objectid).
*/
while (++level <= root_level)
if (nrefs->refs[level] > 1)
return 0;
return 1;
}
static int check_extent_inline_ref(struct extent_buffer *eb,
struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
{
int ret;
u8 type = btrfs_extent_inline_ref_type(eb, iref);
switch (type) {
case BTRFS_TREE_BLOCK_REF_KEY:
case BTRFS_EXTENT_DATA_REF_KEY:
case BTRFS_SHARED_BLOCK_REF_KEY:
case BTRFS_SHARED_DATA_REF_KEY:
ret = 0;
break;
default:
error("extent[%llu %u %llu] has unknown ref type: %d",
key->objectid, key->type, key->offset, type);
ret = UNKNOWN_TYPE;
break;
}
return ret;
}
/*
* Check backrefs of a tree block given by @bytenr or @eb.
*
* @root: the root containing the @bytenr or @eb
* @eb: tree block extent buffer, can be NULL
* @bytenr: bytenr of the tree block to search
* @level: tree level of the tree block
* @owner: owner of the tree block
*
* Return >0 for any error found and output error message
* Return 0 for no error found
*/
static int check_tree_block_ref(struct btrfs_root *root,
struct extent_buffer *eb, u64 bytenr,
int level, u64 owner, struct node_refs *nrefs)
{
struct btrfs_key key;
struct btrfs_root *extent_root = gfs_info->extent_root;
struct btrfs_path path;
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
struct extent_buffer *leaf;
unsigned long end;
unsigned long ptr;
int slot;
int skinny_level;
int root_level = btrfs_header_level(root->node);
int type;
u32 nodesize = gfs_info->nodesize;
u32 item_size;
u64 offset;
int found_ref = 0;
int err = 0;
int ret;
int strict = 1;
int parent = 0;
btrfs_init_path(&path);
key.objectid = bytenr;
if (btrfs_fs_incompat(gfs_info, SKINNY_METADATA))
key.type = BTRFS_METADATA_ITEM_KEY;
else
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = (u64)-1;
/* Search for the backref in extent tree */
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret < 0) {
err |= BACKREF_MISSING;
goto out;
}
ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
if (ret) {
err |= BACKREF_MISSING;
goto out;
}
leaf = path.nodes[0];
slot = path.slots[0];
btrfs_item_key_to_cpu(leaf, &key, slot);
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
if (key.type == BTRFS_METADATA_ITEM_KEY) {
skinny_level = (int)key.offset;
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
} else {
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)(ei + 1);
skinny_level = btrfs_tree_block_level(leaf, info);
iref = (struct btrfs_extent_inline_ref *)(info + 1);
}
if (eb) {
u64 header_gen;
u64 extent_gen;
/*
* Due to the feature of shared tree blocks, if the upper node
* is a fs root or shared node, the extent of checked node may
* not be updated until the next CoW.
*/
if (nrefs)
strict = should_check_extent_strictly(root, nrefs,
level);
if (!(btrfs_extent_flags(leaf, ei) &
BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
error(
"extent[%llu %u] backref type mismatch, missing bit: %llx",
key.objectid, nodesize,
BTRFS_EXTENT_FLAG_TREE_BLOCK);
err = BACKREF_MISMATCH;
}
header_gen = btrfs_header_generation(eb);
extent_gen = btrfs_extent_generation(leaf, ei);
if (header_gen != extent_gen) {
error(
"extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
key.objectid, nodesize, header_gen,
extent_gen);
err = BACKREF_MISMATCH;
}
if (level != skinny_level) {
error(
"extent[%llu %u] level mismatch, wanted: %u, have: %u",
key.objectid, nodesize, level, skinny_level);
err = BACKREF_MISMATCH;
}
if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
error(
"extent[%llu %u] is referred by other roots than %llu",
key.objectid, nodesize, root->objectid);
err = BACKREF_MISMATCH;
}
}
/*
* Iterate the extent/metadata item to find the exact backref
*/
item_size = btrfs_item_size_nr(leaf, slot);
ptr = (unsigned long)iref;
end = (unsigned long)ei + item_size;
while (ptr < end) {
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_extent_inline_ref_type(leaf, iref);
offset = btrfs_extent_inline_ref_offset(leaf, iref);
ret = check_extent_inline_ref(leaf, &key, iref);
if (ret) {
err |= ret;
break;
}
if (type == BTRFS_TREE_BLOCK_REF_KEY) {
if (offset == root->objectid)
found_ref = 1;
if (!strict && owner == offset)
found_ref = 1;
} else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
/*
* Backref of tree reloc root points to itself, no need
* to check backref any more.
*
* This may be an error of loop backref, but extent tree
* checker should have already handled it.
* Here we only need to avoid infinite iteration.
*/
if (offset == bytenr) {
found_ref = 1;
} else {
/*
* Check if the backref points to valid
* referencer
*/
found_ref = !check_tree_block_ref(root, NULL,
offset, level + 1, owner, NULL);
}
}
if (found_ref)
break;
ptr += btrfs_extent_inline_ref_size(type);
}
/*
* Inlined extent item doesn't have what we need, check
* TREE_BLOCK_REF_KEY
*/
if (!found_ref) {
btrfs_release_path(&path);
key.objectid = bytenr;
key.type = BTRFS_TREE_BLOCK_REF_KEY;
key.offset = root->objectid;
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (!ret)
found_ref = 1;
}
/*
* Finally check SHARED BLOCK REF, any found will be good
* Here we're not doing comprehensive extent backref checking,
* only need to ensure there is some extent referring to this
* tree block.
*/
if (!found_ref) {
btrfs_release_path(&path);
key.objectid = bytenr;
key.type = BTRFS_SHARED_BLOCK_REF_KEY;
key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret < 0) {
err |= BACKREF_MISSING;
goto out;
}
ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
if (ret) {
err |= BACKREF_MISSING;
goto out;
}
found_ref = 1;
}
if (!found_ref)
err |= BACKREF_MISSING;
out:
btrfs_release_path(&path);
if (nrefs && strict &&
level < root_level && nrefs->full_backref[level + 1])
parent = nrefs->bytenr[level + 1];
if (eb && (err & BACKREF_MISSING))
error(
"extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
bytenr, nodesize, owner, level,
parent ? "parent" : "root",
parent ? parent : root->objectid);
return err;
}
/*
* If @err contains BYTES_UNALIGNED then delete the extent data item.
* If @err contains BACKREF_MISSING then add extent of the
* file_extent_data_item.
*
* Returns error bits after reapir.
*/
static int repair_extent_data_item(struct btrfs_root *root,
struct btrfs_path *pathp,
struct node_refs *nrefs,
int err)
{
struct btrfs_trans_handle *trans = NULL;
struct btrfs_file_extent_item *fi;
struct btrfs_key fi_key;
struct btrfs_key key;
struct btrfs_extent_item *ei;
struct btrfs_path path;
struct btrfs_root *extent_root = gfs_info->extent_root;
struct extent_buffer *eb;
u64 size;
u64 disk_bytenr;
u64 num_bytes;
u64 parent;
u64 offset;
u64 extent_offset;
u64 file_offset;
int generation;
int slot;
int need_insert = 0;
int ret = 0;
eb = pathp->nodes[0];
slot = pathp->slots[0];
btrfs_item_key_to_cpu(eb, &fi_key, slot);
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
btrfs_file_extent_disk_bytenr(eb, fi) == 0)
return err;
file_offset = fi_key.offset;
generation = btrfs_file_extent_generation(eb, fi);
disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
extent_offset = btrfs_file_extent_offset(eb, fi);
offset = file_offset - extent_offset;
if (nrefs->full_backref[0])
parent = btrfs_header_bytenr(eb);
else
parent = 0;
if (err & BYTES_UNALIGNED) {
ret = delete_item(root, pathp);
if (!ret)
err = 0;
goto out_no_release;
}
/* now repair only adds backref */
if ((err & BACKREF_MISSING) == 0)
return err;
/* search extent item */
key.objectid = disk_bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret < 0) {
ret = -EIO;
goto out;
}
need_insert = ret;
ret = avoid_extents_overwrite();
if (ret)
goto out;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
errno = -ret;
error("fail to start transaction: %m");
goto out;
}
/* insert an extent item */
if (need_insert) {
key.objectid = disk_bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
size = sizeof(*ei);
btrfs_release_path(&path);
ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
size);
if (ret)
goto out;
eb = path.nodes[0];
ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
btrfs_set_extent_refs(eb, ei, 0);
btrfs_set_extent_generation(eb, ei, generation);
btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
btrfs_mark_buffer_dirty(eb);
ret = btrfs_update_block_group(trans, disk_bytenr, num_bytes,
1, 0);
btrfs_release_path(&path);
}
ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
root->objectid,
parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
offset);
if (ret) {
error(
"failed to increase extent data backref[%llu %llu] root %llu",
disk_bytenr, num_bytes, root->objectid);
goto out;
} else {
printf("Add one extent data backref [%llu %llu]\n",
disk_bytenr, num_bytes);
}
err &= ~BACKREF_MISSING;
out:
if (trans)
btrfs_commit_transaction(trans, root);
btrfs_release_path(&path);
out_no_release:
if (ret)
error("can't repair root %llu extent data item[%llu %llu]",
root->objectid, disk_bytenr, num_bytes);
return err;
}
/*
* Check EXTENT_DATA item, mainly for its dbackref in extent tree
*
* Return >0 any error found and output error message
* Return 0 for no error found
*/
static int check_extent_data_item(struct btrfs_root *root,
struct btrfs_path *pathp,
struct node_refs *nrefs, int account_bytes)
{
struct btrfs_file_extent_item *fi;
struct extent_buffer *eb = pathp->nodes[0];
struct btrfs_path path;
struct btrfs_root *extent_root = gfs_info->extent_root;
struct btrfs_key fi_key;
struct btrfs_key dbref_key;
struct extent_buffer *leaf;
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
struct btrfs_extent_data_ref *dref;
u64 owner;
u64 disk_bytenr;
u64 disk_num_bytes;
u64 extent_num_bytes;
u64 extent_flags;
u64 offset;
u32 item_size;
unsigned long end;
unsigned long ptr;
int type;
int found_dbackref = 0;
int slot = pathp->slots[0];
int err = 0;
int ret;
int strict;
btrfs_item_key_to_cpu(eb, &fi_key, slot);
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
/* Nothing to check for hole and inline data extents */
if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
btrfs_file_extent_disk_bytenr(eb, fi) == 0)
return 0;
disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
offset = btrfs_file_extent_offset(eb, fi);
/* Check unaligned disk_bytenr, disk_num_bytes and num_bytes */
if (!IS_ALIGNED(disk_bytenr, gfs_info->sectorsize)) {
error(
"file extent [%llu, %llu] has unaligned disk bytenr: %llu, should be aligned to %u",
fi_key.objectid, fi_key.offset, disk_bytenr,
gfs_info->sectorsize);
err |= BYTES_UNALIGNED;
}
if (!IS_ALIGNED(disk_num_bytes, gfs_info->sectorsize)) {
error(
"file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
fi_key.objectid, fi_key.offset, disk_num_bytes,
gfs_info->sectorsize);
err |= BYTES_UNALIGNED;
} else if (account_bytes) {
data_bytes_allocated += disk_num_bytes;
}
if (!IS_ALIGNED(extent_num_bytes, gfs_info->sectorsize)) {
error(
"file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
fi_key.objectid, fi_key.offset, extent_num_bytes,
gfs_info->sectorsize);
err |= BYTES_UNALIGNED;
} else if (account_bytes) {
data_bytes_referenced += extent_num_bytes;
}
owner = btrfs_header_owner(eb);
/* Check the extent item of the file extent in extent tree */
btrfs_init_path(&path);
dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
if (ret)
goto out;
leaf = path.nodes[0];
slot = path.slots[0];
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
extent_flags = btrfs_extent_flags(leaf, ei);
if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
error(
"file extent[%llu %llu] root %llu owner %llu backref type mismatch, wanted bit: %llx",
fi_key.objectid, fi_key.offset, root->objectid, owner,
BTRFS_EXTENT_FLAG_DATA);
err |= BACKREF_MISMATCH;
}
/* Check data backref inside that extent item */
item_size = btrfs_item_size_nr(leaf, path.slots[0]);
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
ptr = (unsigned long)iref;
end = (unsigned long)ei + item_size;
strict = should_check_extent_strictly(root, nrefs, -1);
while (ptr < end) {
u64 ref_root;
u64 ref_objectid;
u64 ref_offset;
bool match = false;
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_extent_inline_ref_type(leaf, iref);
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
ret = check_extent_inline_ref(leaf, &dbref_key, iref);
if (ret) {
err |= ret;
break;
}
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
ref_root = btrfs_extent_data_ref_root(leaf, dref);
ref_objectid = btrfs_extent_data_ref_objectid(leaf,
dref);
ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
if (ref_objectid == fi_key.objectid &&
ref_offset == fi_key.offset - offset)
match = true;
if (ref_root == root->objectid && match)
found_dbackref = 1;
else if (!strict && owner == ref_root && match)
found_dbackref = 1;
} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
found_dbackref = !check_tree_block_ref(root, NULL,
btrfs_extent_inline_ref_offset(leaf, iref),
0, owner, NULL);
}
if (found_dbackref)
break;
ptr += btrfs_extent_inline_ref_size(type);
}
if (!found_dbackref) {
btrfs_release_path(&path);
/* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
dbref_key.offset = hash_extent_data_ref(owner, fi_key.objectid,
fi_key.offset - offset);
ret = btrfs_search_slot(NULL, gfs_info->extent_root,
&dbref_key, &path, 0, 0);
if (!ret) {
found_dbackref = 1;
goto out;
}
btrfs_release_path(&path);
/*
* Neither inlined nor EXTENT_DATA_REF found, try
* SHARED_DATA_REF as last chance.
*/
dbref_key.objectid = disk_bytenr;
dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
dbref_key.offset = eb->start;
ret = btrfs_search_slot(NULL, gfs_info->extent_root,
&dbref_key, &path, 0, 0);
if (!ret) {
found_dbackref = 1;
goto out;
}
}
out:
if (!found_dbackref)
err |= BACKREF_MISSING;
btrfs_release_path(&path);
if (err & BACKREF_MISSING) {
error(
"file extent[%llu %llu] root %llu owner %llu backref lost",
fi_key.objectid, fi_key.offset, root->objectid, owner);
}
return err;
}
/*
* Check a block group item with its referener (chunk) and its used space
* with extent/metadata item
*/
static int check_block_group_item(struct extent_buffer *eb, int slot)
{
struct btrfs_root *extent_root = gfs_info->extent_root;
struct btrfs_root *chunk_root = gfs_info->chunk_root;
struct btrfs_block_group_item *bi;
struct btrfs_block_group_item bg_item;
struct btrfs_path path;
struct btrfs_key bg_key;
struct btrfs_key chunk_key;
struct btrfs_key extent_key;
struct btrfs_chunk *chunk;
struct extent_buffer *leaf;
struct btrfs_extent_item *ei;
u32 nodesize = btrfs_super_nodesize(gfs_info->super_copy);
u64 flags;
u64 bg_flags;
u64 used;
u64 total = 0;
int ret;
int err = 0;
btrfs_item_key_to_cpu(eb, &bg_key, slot);
bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
used = btrfs_stack_block_group_used(&bg_item);
bg_flags = btrfs_stack_block_group_flags(&bg_item);
chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
chunk_key.offset = bg_key.objectid;
btrfs_init_path(&path);
/* Search for the referencer chunk */
ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
if (ret) {
error(
"block group[%llu %llu] did not find the related chunk item",
bg_key.objectid, bg_key.offset);
err |= REFERENCER_MISSING;
} else {
chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_chunk);
if (btrfs_chunk_length(path.nodes[0], chunk) !=
bg_key.offset) {
error(
"block group[%llu %llu] related chunk item length does not match",
bg_key.objectid, bg_key.offset);
err |= REFERENCER_MISMATCH;
}
}
btrfs_release_path(&path);
/* Search from the block group bytenr */
extent_key.objectid = bg_key.objectid;
extent_key.type = 0;
extent_key.offset = 0;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
if (ret < 0)
goto out;
/* Iterate extent tree to account used space */
while (1) {
leaf = path.nodes[0];
/* Search slot can point to the last item beyond leaf nritems */
if (path.slots[0] >= btrfs_header_nritems(leaf))
goto next;
btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
break;
if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
extent_key.type != BTRFS_EXTENT_ITEM_KEY)
goto next;
if (extent_key.objectid < bg_key.objectid)
goto next;
if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
total += nodesize;
else
total += extent_key.offset;
ei = btrfs_item_ptr(leaf, path.slots[0],
struct btrfs_extent_item);
flags = btrfs_extent_flags(leaf, ei);
if (flags & BTRFS_EXTENT_FLAG_DATA) {
if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
error(
"bad extent[%llu, %llu) type mismatch with chunk",
extent_key.objectid,
extent_key.objectid + extent_key.offset);
err |= CHUNK_TYPE_MISMATCH;
}
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
BTRFS_BLOCK_GROUP_METADATA))) {
error(
"bad extent[%llu, %llu) type mismatch with chunk",
extent_key.objectid,
extent_key.objectid + nodesize);
err |= CHUNK_TYPE_MISMATCH;
}
}
next:
ret = btrfs_next_item(extent_root, &path);
if (ret)
break;
}
out:
btrfs_release_path(&path);
if (total != used) {
error(
"block group[%llu %llu] used %llu but extent items used %llu",
bg_key.objectid, bg_key.offset, used, total);
err |= BG_ACCOUNTING_ERROR;
}
return err;
}
/*
* Get real tree block level for the case like shared block
* Return >= 0 as tree level
* Return <0 for error
*/
static int query_tree_block_level(u64 bytenr)
{
struct extent_buffer *eb;
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_extent_item *ei;
u64 flags;
u64 transid;
u8 backref_level;
u8 header_level;
int ret;
/* Search extent tree for extent generation and level */
key.objectid = bytenr;
key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = (u64)-1;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, gfs_info->extent_root, &key, &path, 0, 0);
if (ret < 0)
goto release_out;
ret = btrfs_previous_extent_item(gfs_info->extent_root, &path, bytenr);
if (ret < 0)
goto release_out;
if (ret > 0) {
ret = -ENOENT;
goto release_out;
}
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_extent_item);
flags = btrfs_extent_flags(path.nodes[0], ei);
if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
ret = -ENOENT;
goto release_out;
}
/* Get transid for later read_tree_block() check */
transid = btrfs_extent_generation(path.nodes[0], ei);
/* Get backref level as one source */
if (key.type == BTRFS_METADATA_ITEM_KEY) {
backref_level = key.offset;
} else {
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)(ei + 1);
backref_level = btrfs_tree_block_level(path.nodes[0], info);
}
btrfs_release_path(&path);
/* Get level from tree block as an alternative source */
eb = read_tree_block(gfs_info, bytenr, transid);
if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
header_level = btrfs_header_level(eb);
free_extent_buffer(eb);
if (header_level != backref_level)
return -EIO;
return header_level;
release_out:
btrfs_release_path(&path);
return ret;
}
/*
* Check if a tree block backref is valid (points to a valid tree block)
* if level == -1, level will be resolved
* Return >0 for any error found and print error message
*/
static int check_tree_block_backref(u64 root_id, u64 bytenr, int level)
{
struct btrfs_root *root;
struct btrfs_key key;
struct btrfs_path path;
struct extent_buffer *eb;
struct extent_buffer *node;
u32 nodesize = btrfs_super_nodesize(gfs_info->super_copy);
int err = 0;
int ret;
/* Query level for level == -1 special case */
if (level == -1)
level = query_tree_block_level(bytenr);
if (level < 0) {
err |= REFERENCER_MISSING;
goto out;
}
key.objectid = root_id;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_read_fs_root(gfs_info, &key);
if (IS_ERR(root)) {
err |= REFERENCER_MISSING;
goto out;
}
/* Read out the tree block to get item/node key */
eb = read_tree_block(gfs_info, bytenr, 0);
if (!extent_buffer_uptodate(eb)) {
err |= REFERENCER_MISSING;
free_extent_buffer(eb);
goto out;
}
/* Empty tree, no need to check key */
if (!btrfs_header_nritems(eb) && !level) {
free_extent_buffer(eb);
goto out;
}
if (level)
btrfs_node_key_to_cpu(eb, &key, 0);
else
btrfs_item_key_to_cpu(eb, &key, 0);
free_extent_buffer(eb);
btrfs_init_path(&path);
path.lowest_level = level;
/* Search with the first key, to ensure we can reach it */
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0) {
err |= REFERENCER_MISSING;
goto release_out;
}
node = path.nodes[level];
if (btrfs_header_bytenr(node) != bytenr) {
error(
"extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
bytenr, nodesize, bytenr,
btrfs_header_bytenr(node));
err |= REFERENCER_MISMATCH;
}
if (btrfs_header_level(node) != level) {
error(
"extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
bytenr, nodesize, level,
btrfs_header_level(node));
err |= REFERENCER_MISMATCH;
}
release_out:
btrfs_release_path(&path);
out:
if (err & REFERENCER_MISSING) {
if (level < 0)
error("extent [%llu %d] lost referencer (owner: %llu)",
bytenr, nodesize, root_id);
else
error(
"extent [%llu %d] lost referencer (owner: %llu, level: %u)",
bytenr, nodesize, root_id, level);
}
return err;
}
/*
* Check if tree block @eb is tree reloc root.
* Return 0 if it's not or any problem happens
* Return 1 if it's a tree reloc root
*/
static int is_tree_reloc_root(struct extent_buffer *eb)
{
struct btrfs_root *tree_reloc_root;
struct btrfs_key key;
u64 bytenr = btrfs_header_bytenr(eb);
u64 owner = btrfs_header_owner(eb);
int ret = 0;
key.objectid = BTRFS_TREE_RELOC_OBJECTID;
key.offset = owner;
key.type = BTRFS_ROOT_ITEM_KEY;
tree_reloc_root = btrfs_read_fs_root_no_cache(gfs_info, &key);
if (IS_ERR(tree_reloc_root))
return 0;
if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
ret = 1;
btrfs_free_fs_root(tree_reloc_root);
return ret;
}
/*
* Check referencer for shared block backref
* If level == -1, this function will resolve the level.
*/
static int check_shared_block_backref(u64 parent, u64 bytenr, int level)
{
struct extent_buffer *eb;
u32 nr;
int found_parent = 0;
int i;
eb = read_tree_block(gfs_info, parent, 0);
if (!extent_buffer_uptodate(eb))
goto out;
if (level == -1)
level = query_tree_block_level(bytenr);
if (level < 0)
goto out;
/* It's possible it's a tree reloc root */
if (parent == bytenr) {
if (is_tree_reloc_root(eb))
found_parent = 1;
goto out;
}
if (level + 1 != btrfs_header_level(eb))
goto out;
nr = btrfs_header_nritems(eb);
for (i = 0; i < nr; i++) {
if (bytenr == btrfs_node_blockptr(eb, i)) {
found_parent = 1;
break;
}
}
out:
free_extent_buffer(eb);
if (!found_parent) {
error(
"shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
bytenr, gfs_info->nodesize, parent, level);
return REFERENCER_MISSING;
}
return 0;
}
/*
* Check referencer for normal (inlined) data ref
* If len == 0, it will be resolved by searching in extent tree
*/
static int check_extent_data_backref(u64 root_id, u64 objectid, u64 offset,
u64 bytenr, u64 len, u32 count)
{
struct btrfs_root *root;
struct btrfs_root *extent_root = gfs_info->extent_root;
struct btrfs_key key;
struct btrfs_path path;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
u32 found_count = 0;
int slot;
int ret = 0;
if (!len) {
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = (u64)-1;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
if (ret < 0)
goto out;
ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
if (ret)
goto out;
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.objectid != bytenr ||
key.type != BTRFS_EXTENT_ITEM_KEY)
goto out;
len = key.offset;
btrfs_release_path(&path);
}
key.objectid = root_id;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
btrfs_init_path(&path);
root = btrfs_read_fs_root(gfs_info, &key);
if (IS_ERR(root))
goto out;
key.objectid = objectid;
key.type = BTRFS_EXTENT_DATA_KEY;
/*
* It can be nasty as data backref offset is
* file offset - file extent offset, which is smaller or
* equal to original backref offset. The only special case is
* overflow. So we need to special check and do further search.
*/
key.offset = offset & (1ULL << 63) ? 0 : offset;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
goto out;
/*
* Search afterwards to get correct one
* NOTE: As we must do a comprehensive check on the data backref to
* make sure the dref count also matches, we must iterate all file
* extents for that inode.
*/
while (1) {
leaf = path.nodes[0];
slot = path.slots[0];
if (slot >= btrfs_header_nritems(leaf) ||
btrfs_header_owner(leaf) != root_id)
goto next;
/*
* For tree blocks have been relocated, data backref are
* shared instead of keyed. Do not account it.
*/
if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
/*
* skip the leaf to speed up.
*/
slot = btrfs_header_nritems(leaf);
goto next;
}
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid != objectid ||
key.type != BTRFS_EXTENT_DATA_KEY)
break;
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
/*
* Except normal disk bytenr and disk num bytes, we still
* need to do extra check on dbackref offset as
* dbackref offset = file_offset - file_extent_offset
*
* Also, we must check the leaf owner.
* In case of shared tree blocks (snapshots) we can inherit
* leaves from source snapshot.
* In that case, reference from source snapshot should not
* count.
*/
if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
(u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
offset && btrfs_header_owner(leaf) == root_id)
found_count++;
next:
ret = btrfs_next_item(root, &path);
if (ret)
break;
}
out:
btrfs_release_path(&path);
if (found_count != count) {
error(
"extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
bytenr, len, root_id, objectid, offset, count,
found_count);
return REFERENCER_MISSING;
}
return 0;
}
/*
* Check if the referencer of a shared data backref exists
*/
static int check_shared_data_backref(u64 parent, u64 bytenr)
{
struct extent_buffer *eb;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
u32 nr;
int found_parent = 0;
int i;
eb = read_tree_block(gfs_info, parent, 0);
if (!extent_buffer_uptodate(eb))
goto out;
nr = btrfs_header_nritems(eb);
for (i = 0; i < nr; i++) {
btrfs_item_key_to_cpu(eb, &key, i);
if (key.type != BTRFS_EXTENT_DATA_KEY)
continue;
fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
continue;
if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
found_parent = 1;
break;
}
}
out:
free_extent_buffer(eb);
if (!found_parent) {
error("shared extent %llu referencer lost (parent: %llu)",
bytenr, parent);
return REFERENCER_MISSING;
}
return 0;
}
/*
* Only delete backref if REFERENCER_MISSING or REFERENCER_MISMATCH.
*
* Returns <0 error
* Returns >0 the backref was deleted but extent still exists
* Returns =0 the whole extent item was deleted
*/
static int repair_extent_item(struct btrfs_root *root, struct btrfs_path *path,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *extent_root = gfs_info->extent_root;
struct btrfs_key old_key;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
ret = avoid_extents_overwrite();
if (ret)
return ret;
trans = btrfs_start_transaction(extent_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error("fail to start transaction: %m");
goto out;
}
/* delete the backref */
ret = btrfs_free_extent(trans, gfs_info->fs_root, bytenr,
num_bytes, parent, root_objectid, owner, offset);
if (!ret)
printf("Delete backref in extent [%llu %llu]\n",
bytenr, num_bytes);
else {
error("fail to delete backref in extent [%llu %llu]",
bytenr, num_bytes);
btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_commit_transaction(trans, extent_root);
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
if (ret > 0) {
/* odd, there must be one block group before at least */
if (path->slots[0] == 0) {
ret = -EUCLEAN;
goto out;
}
/*
* btrfs_free_extent() has deleted the extent item,
* let path point to last checked item.
*/
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
path->slots[0] = btrfs_header_nritems(path->nodes[0]) - 1;
else
path->slots[0]--;
ret = 0;
} else if (ret == 0) {
ret = 1;
}
out:
return ret;
}
/*
* Reset generation for extent item specified by @path.
* Will try to grab the proper generation number from other sources, but if
* it fails, then use current transid as fallback.
*
* Returns < 0 for error.
* Return 0 if the generation is reset.
*/
static int repair_extent_item_generation(struct btrfs_path *path)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_extent_item *ei;
struct btrfs_root *extent_root = gfs_info->extent_root;
u64 new_gen = 0;;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
ASSERT(key.type == BTRFS_METADATA_ITEM_KEY ||
key.type == BTRFS_EXTENT_ITEM_KEY);
get_extent_item_generation(key.objectid, &new_gen);
ret = avoid_extents_overwrite();
if (ret)
return ret;
btrfs_release_path(path);
trans = btrfs_start_transaction(extent_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error("failed to start transaction: %m");
return ret;
}
ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
if (ret < 0) {
errno = -ret;
error("failed to locate extent item for %llu: %m", key.objectid);
btrfs_abort_transaction(trans, ret);
return ret;
}
if (!new_gen)
new_gen = trans->transid;
ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_item);
btrfs_set_extent_generation(path->nodes[0], ei, new_gen);
ret = btrfs_commit_transaction(trans, extent_root);
if (ret < 0) {
errno = -ret;
error("failed to commit transaction: %m");
btrfs_abort_transaction(trans, ret);
return ret;
}
printf("Reset extent item (%llu) generation to %llu\n",
key.objectid, new_gen);
return ret;
}
/*
* This function will check a given extent item, including its backref and
* itself (like crossing stripe boundary and type)
*
* Since we don't use extent_record anymore, introduce new error bit
*/
static int check_extent_item(struct btrfs_path *path)
{
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
struct btrfs_extent_data_ref *dref;
struct extent_buffer *eb = path->nodes[0];
unsigned long ptr;
int slot = path->slots[0];
int type;
u32 nodesize = btrfs_super_nodesize(gfs_info->super_copy);
u32 item_size = btrfs_item_size_nr(eb, slot);
u64 flags;
u64 offset;
u64 parent;
u64 num_bytes;
u64 root_objectid;
u64 gen;
u64 owner;
u64 owner_offset;
u64 super_gen;
int metadata = 0;
int level;
struct btrfs_key key;
int ret;
int err = 0;
int tmp_err = 0;
u32 ptr_offset;
btrfs_item_key_to_cpu(eb, &key, slot);
if (key.type == BTRFS_EXTENT_ITEM_KEY) {
bytes_used += key.offset;
num_bytes = key.offset;
} else {
bytes_used += nodesize;
num_bytes = nodesize;
}
if (item_size < sizeof(*ei)) {
/*
* COMPAT_EXTENT_TREE_V0 case, but it's already a super
* old thing when on disk format is still un-determined.
* No need to care about it anymore
*/
error("unsupported COMPAT_EXTENT_TREE_V0 detected");
return -ENOTTY;
}
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
flags = btrfs_extent_flags(eb, ei);
gen = btrfs_extent_generation(eb, ei);
super_gen = btrfs_super_generation(gfs_info->super_copy);
if (gen > super_gen + 1) {
error(
"invalid generation for extent %llu, have %llu expect (0, %llu]",
key.objectid, gen, super_gen + 1);
tmp_err |= INVALID_GENERATION;
}
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
metadata = 1;
if (metadata && check_crossing_stripes(gfs_info, key.objectid,
eb->len)) {
error("bad metadata [%llu, %llu) crossing stripe boundary",
key.objectid, key.objectid + nodesize);
err |= CROSSING_STRIPE_BOUNDARY;
}
if (metadata)
btrfs_check_subpage_eb_alignment(key.objectid, nodesize);
ptr = (unsigned long)(ei + 1);
if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
/* Old EXTENT_ITEM metadata */
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)ptr;
level = btrfs_tree_block_level(eb, info);
ptr += sizeof(struct btrfs_tree_block_info);
} else {
/* New METADATA_ITEM */
level = key.offset;
}
ptr_offset = ptr - (unsigned long)ei;
next:
/* Reached extent item end normally */
if (ptr_offset == item_size)
goto out;
/* Beyond extent item end, wrong item size */
if (ptr_offset > item_size) {
err |= ITEM_SIZE_MISMATCH;
error("extent item at bytenr %llu slot %d has wrong size",
eb->start, slot);
goto out;
}
ptr = (unsigned long)ei + ptr_offset;
parent = 0;
root_objectid = 0;
owner = 0;
owner_offset = 0;
/* Now check every backref in this extent item */
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_extent_inline_ref_type(eb, iref);
offset = btrfs_extent_inline_ref_offset(eb, iref);
switch (type) {
case BTRFS_TREE_BLOCK_REF_KEY:
root_objectid = offset;
owner = level;
tmp_err |= check_tree_block_backref(offset, key.objectid, level);
break;
case BTRFS_SHARED_BLOCK_REF_KEY:
parent = offset;
tmp_err |= check_shared_block_backref(offset, key.objectid, level);
break;
case BTRFS_EXTENT_DATA_REF_KEY:
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
root_objectid = btrfs_extent_data_ref_root(eb, dref);
owner = btrfs_extent_data_ref_objectid(eb, dref);
owner_offset = btrfs_extent_data_ref_offset(eb, dref);
tmp_err |= check_extent_data_backref(root_objectid,
owner, owner_offset, key.objectid, key.offset,
btrfs_extent_data_ref_count(eb, dref));
break;
case BTRFS_SHARED_DATA_REF_KEY:
parent = offset;
tmp_err |= check_shared_data_backref(offset, key.objectid);
break;
default:
error("extent[%llu %d %llu] has unknown ref type: %d",
key.objectid, key.type, key.offset, type);
err |= UNKNOWN_TYPE;
goto out;
}
if ((tmp_err & (REFERENCER_MISSING | REFERENCER_MISMATCH))
&& repair) {
ret = repair_extent_item(gfs_info->extent_root, path,
key.objectid, num_bytes, parent, root_objectid,
owner, owner_offset);
if (ret < 0) {
err |= tmp_err;
err |= FATAL_ERROR;
goto out;
} else if (ret == 0) {
err = 0;
goto out;
} else if (ret > 0) {
/*
* The error has been repaired which means the
* extent item is still existed with other backrefs,
* go to check next.
*/
tmp_err &= ~REFERENCER_MISSING;
tmp_err &= ~REFERENCER_MISMATCH;
err |= tmp_err;
eb = path->nodes[0];
slot = path->slots[0];
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
item_size = btrfs_item_size_nr(eb, slot);
goto next;
}
}
if ((tmp_err & INVALID_GENERATION) && repair){
ret = repair_extent_item_generation(path);
if (ret < 0) {
err |= tmp_err;
err |= FATAL_ERROR;
goto out;
}
/* Error has been repaired */
tmp_err &= ~INVALID_GENERATION;
err |= tmp_err;
eb = path->nodes[0];
slot = path->slots[0];
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
item_size = btrfs_item_size_nr(eb, slot);
ptr_offset += btrfs_extent_inline_ref_size(type);
goto next;
}
ptr_offset += btrfs_extent_inline_ref_size(type);
goto next;
out:
return err;
}
/*
* Check if a dev extent item is referred correctly by its chunk
*/
static int check_dev_extent_item(struct extent_buffer *eb, int slot)
{
struct btrfs_root *chunk_root = gfs_info->chunk_root;
struct btrfs_dev_extent *ptr;
struct btrfs_path path;
struct btrfs_key chunk_key;
struct btrfs_key devext_key;
struct btrfs_chunk *chunk;
struct extent_buffer *l;
int num_stripes;
u64 length;
int i;
int found_chunk = 0;
int ret;
btrfs_item_key_to_cpu(eb, &devext_key, slot);
ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
length = btrfs_dev_extent_length(eb, ptr);
chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
if (ret)
goto out;
l = path.nodes[0];
chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
ret = btrfs_check_chunk_valid(gfs_info, l, chunk, path.slots[0],
chunk_key.offset);
if (ret < 0)
goto out;
if (btrfs_stripe_length(gfs_info, l, chunk) != length)
goto out;
num_stripes = btrfs_chunk_num_stripes(l, chunk);
for (i = 0; i < num_stripes; i++) {
u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
if (devid == devext_key.objectid &&
offset == devext_key.offset) {
found_chunk = 1;
break;
}
}
out:
btrfs_release_path(&path);
if (!found_chunk) {
error(
"device extent[%llu, %llu, %llu] did not find the related chunk",
devext_key.objectid, devext_key.offset, length);
return REFERENCER_MISSING;
}
return 0;
}
/*
* Check if the used space is correct with the dev item
*/
static int check_dev_item(struct extent_buffer *eb, int slot)
{
struct btrfs_root *dev_root = gfs_info->dev_root;
struct btrfs_dev_item *dev_item;
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_dev_extent *ptr;
u64 total_bytes;
u64 dev_id;
u64 used;
u64 total = 0;
u64 prev_devid = 0;
u64 prev_dev_ext_end = 0;
int ret;
dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
dev_id = btrfs_device_id(eb, dev_item);
used = btrfs_device_bytes_used(eb, dev_item);
total_bytes = btrfs_device_total_bytes(eb, dev_item);
if (used > total_bytes) {
error(
"device %llu has incorrect used bytes %llu > total bytes %llu",
dev_id, used, total_bytes);
return ACCOUNTING_MISMATCH;
}
key.objectid = dev_id;
key.type = BTRFS_DEV_EXTENT_KEY;
key.offset = 0;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
if (ret < 0) {
btrfs_item_key_to_cpu(eb, &key, slot);
error("cannot find any related dev extent for dev[%llu, %u, %llu]",
key.objectid, key.type, key.offset);
btrfs_release_path(&path);
return REFERENCER_MISSING;
}
/*
* Iterate dev_extents to calculate the used space of a device
*
* Also make sure no dev extents overlap and end beyond device boundary
*/
while (1) {
u64 devid;
u64 physical_offset;
u64 physical_len;
if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
goto next;
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.objectid > dev_id)
break;
if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
goto next;
ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_dev_extent);
devid = key.objectid;
physical_offset = key.offset;
physical_len = btrfs_dev_extent_length(path.nodes[0], ptr);
if (prev_devid == devid && physical_offset < prev_dev_ext_end) {
error(
"dev extent devid %llu offset %llu len %llu overlap with previous dev extent end %llu",
devid, physical_offset, physical_len,
prev_dev_ext_end);
return ACCOUNTING_MISMATCH;
}
if (physical_offset + physical_len > total_bytes) {
error(
"dev extent devid %llu offset %llu len %llu is beyond device boundary %llu",
devid, physical_offset, physical_len,
total_bytes);
return ACCOUNTING_MISMATCH;
}
prev_devid = devid;
prev_dev_ext_end = physical_offset + physical_len;
total += physical_len;
next:
ret = btrfs_next_item(dev_root, &path);
if (ret)
break;
}
btrfs_release_path(&path);
if (used != total) {
btrfs_item_key_to_cpu(eb, &key, slot);
error(
"Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
total, used, BTRFS_ROOT_TREE_OBJECTID,
BTRFS_DEV_EXTENT_KEY, dev_id);
return ACCOUNTING_MISMATCH;
}
check_dev_size_alignment(dev_id, total_bytes, gfs_info->sectorsize);
return 0;
}
/*
* Find the block group item with @bytenr, @len and @type
*
* Return 0 if found.
* Return -ENOENT if not found.
* Return <0 for fatal error.
*/
static int find_block_group_item(struct btrfs_path *path, u64 bytenr, u64 len,
u64 type)
{
struct btrfs_block_group_item bgi;
struct btrfs_key key;
int ret;
key.objectid = bytenr;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = len;
ret = btrfs_search_slot(NULL, gfs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
if (ret > 0) {
ret = -ENOENT;
error("chunk [%llu %llu) doesn't have related block group item",
bytenr, bytenr + len);
goto out;
}
read_extent_buffer(path->nodes[0], &bgi,
btrfs_item_ptr_offset(path->nodes[0], path->slots[0]),
sizeof(bgi));
if (btrfs_stack_block_group_flags(&bgi) != type) {
error(
"chunk [%llu %llu) type mismatch with block group, block group has 0x%llx chunk has %llx",
bytenr, bytenr + len, btrfs_stack_block_group_flags(&bgi),
type);
ret = -EUCLEAN;
}
out:
btrfs_release_path(path);
return ret;
}
/*
* Check a chunk item.
* Including checking all referred dev_extents and block group
*/
static int check_chunk_item(struct extent_buffer *eb, int slot)
{
struct btrfs_root *dev_root = gfs_info->dev_root;
struct btrfs_path path;
struct btrfs_key chunk_key;
struct btrfs_key devext_key;
struct btrfs_chunk *chunk;
struct extent_buffer *leaf;
struct btrfs_dev_extent *ptr;
u64 length;
u64 chunk_end;
u64 stripe_len;
u64 type;
int num_stripes;
u64 offset;
u64 objectid;
int i;
int ret;
int err = 0;
btrfs_item_key_to_cpu(eb, &chunk_key, slot);
chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
length = btrfs_chunk_length(eb, chunk);
chunk_end = chunk_key.offset + length;
ret = btrfs_check_chunk_valid(gfs_info, eb, chunk, slot,
chunk_key.offset);
if (ret < 0) {
error("chunk[%llu %llu) is invalid", chunk_key.offset,
chunk_end);
err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
goto out;
}
type = btrfs_chunk_type(eb, chunk);
btrfs_init_path(&path);
ret = find_block_group_item(&path, chunk_key.offset, length, type);
if (ret < 0)
err |= REFERENCER_MISSING;
num_stripes = btrfs_chunk_num_stripes(eb, chunk);
stripe_len = btrfs_stripe_length(gfs_info, eb, chunk);
for (i = 0; i < num_stripes; i++) {
btrfs_release_path(&path);
btrfs_init_path(&path);
devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
devext_key.type = BTRFS_DEV_EXTENT_KEY;
devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
0, 0);
if (ret)
goto not_match_dev;
leaf = path.nodes[0];
ptr = btrfs_item_ptr(leaf, path.slots[0],
struct btrfs_dev_extent);
objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
if (objectid != chunk_key.objectid ||
offset != chunk_key.offset ||
btrfs_dev_extent_length(leaf, ptr) != stripe_len)
goto not_match_dev;
continue;
not_match_dev:
err |= BACKREF_MISSING;
error(
"chunk[%llu %llu) stripe %d did not find the related dev extent",
chunk_key.objectid, chunk_end, i);
continue;
}
btrfs_release_path(&path);
out:
return err;
}
/*
* Add block group item to the extent tree if @err contains REFERENCER_MISSING.
* FIXME: We still need to repair error of dev_item.
*
* Returns error after repair.
*/
static int repair_chunk_item(struct btrfs_root *chunk_root,
struct btrfs_path *path, int err)
{
struct btrfs_chunk *chunk;
struct btrfs_key chunk_key;
struct extent_buffer *eb = path->nodes[0];
struct btrfs_root *extent_root = gfs_info->extent_root;
struct btrfs_trans_handle *trans;
u64 length;
int slot = path->slots[0];
u64 type;
int ret = 0;
btrfs_item_key_to_cpu(eb, &chunk_key, slot);
if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
return err;
chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
type = btrfs_chunk_type(path->nodes[0], chunk);
length = btrfs_chunk_length(eb, chunk);
/* now repair only adds block group */
if ((err & REFERENCER_MISSING) == 0)
return err;
ret = avoid_extents_overwrite();
if (ret)
return ret;
trans = btrfs_start_transaction(extent_root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
errno = -ret;
error("fail to start transaction: %m");
return ret;
}
ret = btrfs_make_block_group(trans, gfs_info, 0, type,
chunk_key.offset, length);
if (ret) {
error("fail to add block group item [%llu %llu]",
chunk_key.offset, length);
} else {
err &= ~REFERENCER_MISSING;
printf("Added block group item[%llu %llu]\n", chunk_key.offset,
length);
}
btrfs_commit_transaction(trans, extent_root);
if (ret)
error("fail to repair item(s) related to chunk item [%llu %llu]",
chunk_key.objectid, chunk_key.offset);
return err;
}
/*
* Main entry function to check known items and update related accounting info
*/
static int check_leaf_items(struct btrfs_root *root, struct btrfs_path *path,
struct node_refs *nrefs, int account_bytes)
{
struct btrfs_key key;
struct extent_buffer *eb;
int slot;
int type;
struct btrfs_extent_data_ref *dref;
int ret = 0;
int err = 0;
again:
eb = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(eb)) {
if (slot == 0) {
error("empty leaf [%llu %u] root %llu", eb->start,
gfs_info->nodesize, root->objectid);
err |= EIO;
}
goto out;
}
btrfs_item_key_to_cpu(eb, &key, slot);
type = key.type;
switch (type) {
case BTRFS_EXTENT_DATA_KEY:
ret = check_extent_data_item(root, path, nrefs, account_bytes);
if (repair && ret)
ret = repair_extent_data_item(root, path, nrefs, ret);
err |= ret;
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
ret = check_block_group_item(eb, slot);
if (repair &&
ret & REFERENCER_MISSING)
ret = delete_item(root, path);
err |= ret;
break;
case BTRFS_DEV_ITEM_KEY:
ret = check_dev_item(eb, slot);
err |= ret;
break;
case BTRFS_CHUNK_ITEM_KEY:
ret = check_chunk_item(eb, slot);
if (repair && ret)
ret = repair_chunk_item(root, path, ret);
err |= ret;
break;
case BTRFS_DEV_EXTENT_KEY:
ret = check_dev_extent_item(eb, slot);
err |= ret;
break;
case BTRFS_EXTENT_ITEM_KEY:
case BTRFS_METADATA_ITEM_KEY:
ret = check_extent_item(path);
err |= ret;
break;
case BTRFS_EXTENT_CSUM_KEY:
total_csum_bytes += btrfs_item_size_nr(eb, slot);
err |= ret;
break;
case BTRFS_TREE_BLOCK_REF_KEY:
ret = check_tree_block_backref(key.offset, key.objectid, -1);
if (repair &&
ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
ret = delete_item(root, path);
err |= ret;
break;
case BTRFS_EXTENT_DATA_REF_KEY:
dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
ret = check_extent_data_backref(
btrfs_extent_data_ref_root(eb, dref),
btrfs_extent_data_ref_objectid(eb, dref),
btrfs_extent_data_ref_offset(eb, dref),
key.objectid, 0,
btrfs_extent_data_ref_count(eb, dref));
if (repair &&
ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
ret = delete_item(root, path);
err |= ret;
break;
case BTRFS_SHARED_BLOCK_REF_KEY:
ret = check_shared_block_backref(key.offset, key.objectid, -1);
if (repair &&
ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
ret = delete_item(root, path);
err |= ret;
break;
case BTRFS_SHARED_DATA_REF_KEY:
ret = check_shared_data_backref(key.offset, key.objectid);
if (repair &&
ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
ret = delete_item(root, path);
err |= ret;
break;
default:
break;
}
++path->slots[0];
goto again;
out:
return err;
}
/*
* @trans just for lowmem repair mode
* @check all if not 0 then check all tree block backrefs and items
* 0 then just check relationship of items in fs tree(s)
*
* Returns >0 Found error, should continue
* Returns <0 Fatal error, must exit the whole check
* Returns 0 No errors found
*/
static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
int *level, struct node_refs *nrefs, int check_all)
{
enum btrfs_tree_block_status status;
u64 bytenr;
u64 ptr_gen;
struct extent_buffer *next;
struct extent_buffer *cur;
int ret;
int err = 0;
int check;
int account_file_data = 0;
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
path->nodes[*level], nrefs, *level, check_all);
if (ret < 0)
return ret;
while (*level >= 0) {
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
bytenr = btrfs_header_bytenr(cur);
check = nrefs->need_check[*level];
if (btrfs_header_level(cur) != *level)
WARN_ON(1);
/*
* Update bytes accounting and check tree block ref
* NOTE: Doing accounting and check before checking nritems
* is necessary because of empty node/leaf.
*/
if ((check_all && !nrefs->checked[*level]) ||
(!check_all && nrefs->need_check[*level])) {
ret = check_tree_block_ref(root, cur,
btrfs_header_bytenr(cur), btrfs_header_level(cur),
btrfs_header_owner(cur), nrefs);
if (repair && ret)
ret = repair_tree_block_ref(root,
path->nodes[*level], nrefs, *level, ret);
err |= ret;
if (check_all && nrefs->need_check[*level] &&
nrefs->refs[*level]) {
account_bytes(root, path, *level);
account_file_data = 1;
}
nrefs->checked[*level] = 1;
}
if (path->slots[*level] >= btrfs_header_nritems(cur))
break;
/* Don't forgot to check leaf/node validation */
if (*level == 0) {
/* skip duplicate check */
if (check || !check_all) {
ret = btrfs_check_leaf(gfs_info, NULL, cur);
if (ret != BTRFS_TREE_BLOCK_CLEAN) {
err |= -EIO;
break;
}
}
ret = 0;
if (!check_all)
ret = process_one_leaf(root, path, nrefs, level);
else
ret = check_leaf_items(root, path,
nrefs, account_file_data);
err |= ret;
break;
}
if (check || !check_all) {
ret = btrfs_check_node(gfs_info, NULL, cur);
if (ret != BTRFS_TREE_BLOCK_CLEAN) {
err |= -EIO;
break;
}
}
bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
check_all);
if (ret < 0)
break;
/*
* check all trees in check_chunks_and_extent
* check shared node once in check_fs_roots
*/
if (!check_all && !nrefs->need_check[*level - 1]) {
path->slots[*level]++;
continue;
}
next = btrfs_find_tree_block(gfs_info, bytenr, gfs_info->nodesize);
if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
free_extent_buffer(next);
reada_walk_down(root, cur, path->slots[*level]);
next = read_tree_block(gfs_info, bytenr, ptr_gen);
if (!extent_buffer_uptodate(next)) {
struct btrfs_key node_key;
btrfs_node_key_to_cpu(path->nodes[*level],
&node_key,
path->slots[*level]);
btrfs_add_corrupt_extent_record(gfs_info,
&node_key, path->nodes[*level]->start,
gfs_info->nodesize, *level);
err |= -EIO;
break;
}
}
ret = check_child_node(cur, path->slots[*level], next);
err |= ret;
if (ret < 0)
break;
if (btrfs_is_leaf(next))
status = btrfs_check_leaf(gfs_info, NULL, next);
else
status = btrfs_check_node(gfs_info, NULL, next);
if (status != BTRFS_TREE_BLOCK_CLEAN) {
free_extent_buffer(next);
err |= -EIO;
break;
}
*level = *level - 1;
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = next;
path->slots[*level] = 0;
account_file_data = 0;
update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
}
return err;
}
static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
int *level)
{
int i;
struct extent_buffer *leaf;
for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
leaf = path->nodes[i];
if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
path->slots[i]++;
*level = i;
return 0;
}
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
*level = i + 1;
}
return 1;
}
/*
* Insert the missing inode item and inode ref.
*
* Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
* Root dir should be handled specially because root dir is the root of fs.
*
* returns err (>0 or 0) after repair
*/
static int repair_fs_first_inode(struct btrfs_root *root, int err)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_path path;
int filetype = BTRFS_FT_DIR;
int ret = 0;
btrfs_init_path(&path);
if (err & INODE_REF_MISSING) {
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_REF_KEY;
key.offset = BTRFS_FIRST_FREE_OBJECTID;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
btrfs_release_path(&path);
ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
if (ret)
goto trans_fail;
ret = btrfs_insert_inode_ref(trans, root, "..", 2,
BTRFS_FIRST_FREE_OBJECTID,
BTRFS_FIRST_FREE_OBJECTID, 0);
if (ret)
goto trans_fail;
printf("Add INODE_REF[%llu %llu] name %s\n",
BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
"..");
err &= ~INODE_REF_MISSING;
trans_fail:
if (ret)
error("fail to insert first inode's ref");
btrfs_commit_transaction(trans, root);
}
if (err & INODE_ITEM_MISSING) {
ret = repair_inode_item_missing(root,
BTRFS_FIRST_FREE_OBJECTID, filetype);
if (ret)
goto out;
err &= ~INODE_ITEM_MISSING;
}
out:
if (ret)
error("fail to repair first inode");
btrfs_release_path(&path);
return err;
}
/*
* check first root dir's inode_item and inode_ref
*
* returns 0 means no error
* returns >0 means error
* returns <0 means fatal error
*/
static int check_fs_first_inode(struct btrfs_root *root)
{
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_inode_item *ii;
u64 index;
u32 mode;
int err = 0;
int ret;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
/* For root being dropped, we don't need to check first inode */
if (btrfs_root_refs(&root->root_item) == 0 &&
btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
BTRFS_FIRST_FREE_OBJECTID)
return 0;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
goto out;
if (ret > 0) {
ret = 0;
err |= INODE_ITEM_MISSING;
} else {
ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_inode_item);
mode = btrfs_inode_mode(path.nodes[0], ii);
if (imode_to_type(mode) != BTRFS_FT_DIR)
err |= INODE_ITEM_MISMATCH;
}
/* lookup first inode ref */
key.offset = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_REF_KEY;
/* special index value */
index = 0;
ret = find_inode_ref(root, &key, "..", strlen(".."), &index);
if (ret < 0)
goto out;
err |= ret;
out:
btrfs_release_path(&path);
if (err && repair)
err = repair_fs_first_inode(root, err);
if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
error("root dir INODE_ITEM is %s",
err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
if (err & INODE_REF_MISSING)
error("root dir INODE_REF is missing");
return ret < 0 ? ret : err;
}
/*
* This function calls walk_down_tree and walk_up_tree to check tree
* blocks and integrity of fs tree items.
*
* @root: the root of the tree to be checked.
* @account if NOT 0 means check the tree (including tree)'s treeblocks.
* otherwise means check fs tree(s) items relationship and
* @root MUST be a fs tree root.
* Returns 0 represents OK.
* Returns >0 represents error bits.
*/
static int check_btrfs_root(struct btrfs_root *root, int check_all)
{
struct btrfs_path path;
struct node_refs nrefs;
struct btrfs_root_item *root_item = &root->root_item;
u64 super_generation = btrfs_super_generation(gfs_info->super_copy);
int ret;
int level;
int err = 0;
memset(&nrefs, 0, sizeof(nrefs));
if (!check_all) {
/*
* We need to manually check the first inode item (256)
* As the following traversal function will only start from
* the first inode item in the leaf, if inode item (256) is
* missing we will skip it forever.
*/
ret = check_fs_first_inode(root);
if (ret < 0)
return FATAL_ERROR;
}
level = btrfs_header_level(root->node);
btrfs_init_path(&path);
if (btrfs_root_generation(root_item) > super_generation + 1) {
error(
"invalid root generation for root %llu, have %llu expect (0, %llu)",
root->root_key.objectid, btrfs_root_generation(root_item),
super_generation + 1);
err |= INVALID_GENERATION;
if (repair) {
root->node->flags |= EXTENT_BAD_TRANSID;
ret = recow_extent_buffer(root, root->node);
if (!ret) {
printf("Reset generation for root %llu\n",
root->root_key.objectid);
err &= ~INVALID_GENERATION;
}
}
}
if (btrfs_root_refs(root_item) > 0 ||
btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
path.nodes[level] = root->node;
path.slots[level] = 0;
extent_buffer_get(root->node);
} else {
struct btrfs_key key;
btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
level = root_item->drop_level;
path.lowest_level = level;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
goto out;
ret = 0;
}
while (1) {
ctx.item_count++;
ret = walk_down_tree(root, &path, &level, &nrefs, check_all);
if (ret > 0)
err |= ret;
/* if ret is negative, walk shall stop */
if (ret < 0) {
ret = err | FATAL_ERROR;
break;
}
ret = walk_up_tree(root, &path, &level);
if (ret != 0) {
/* Normal exit, reset ret to err */
ret = err;
break;
}
}
out:
btrfs_release_path(&path);
return ret;
}
/*
* Iterate all items in the tree and call check_inode_item() to check.
*
* @root: the root of the tree to be checked.
*
* Return 0 if no error found.
* Return <0 for error.
*/
static int check_fs_root(struct btrfs_root *root)
{
reset_cached_block_groups();
return check_btrfs_root(root, 0);
}
/*
* Find the relative ref for root_ref and root_backref.
*
* @root: the root of the root tree.
* @ref_key: the key of the root ref.
*
* Return 0 if no error occurred.
*/
static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
struct extent_buffer *node, int slot)
{
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_root_ref *ref;
struct btrfs_root_ref *backref;
char ref_name[BTRFS_NAME_LEN] = {0};
char backref_name[BTRFS_NAME_LEN] = {0};
u64 ref_dirid;
u64 ref_seq;
u32 ref_namelen;
u64 backref_dirid;
u64 backref_seq;
u32 backref_namelen;
u32 len;
int ret;
int err = 0;
ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
ref_dirid = btrfs_root_ref_dirid(node, ref);
ref_seq = btrfs_root_ref_sequence(node, ref);
ref_namelen = btrfs_root_ref_name_len(node, ref);
if (ref_namelen <= BTRFS_NAME_LEN) {
len = ref_namelen;
} else {
len = BTRFS_NAME_LEN;
warning("%s[%llu %llu] ref_name too long",
ref_key->type == BTRFS_ROOT_REF_KEY ?
"ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
ref_key->offset);
}
read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
/* Find relative root_ref */
key.objectid = ref_key->offset;
key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
key.offset = ref_key->objectid;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret) {
err |= ROOT_REF_MISSING;
error("%s[%llu %llu] couldn't find relative ref",
ref_key->type == BTRFS_ROOT_REF_KEY ?
"ROOT_REF" : "ROOT_BACKREF",
ref_key->objectid, ref_key->offset);
goto out;
}
backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_root_ref);
backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
if (backref_namelen <= BTRFS_NAME_LEN) {
len = backref_namelen;
} else {
len = BTRFS_NAME_LEN;
warning("%s[%llu %llu] ref_name too long",
key.type == BTRFS_ROOT_REF_KEY ?
"ROOT_REF" : "ROOT_BACKREF",
key.objectid, key.offset);
}
read_extent_buffer(path.nodes[0], backref_name,
(unsigned long)(backref + 1), len);
if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
ref_namelen != backref_namelen ||
strncmp(ref_name, backref_name, len)) {
err |= ROOT_REF_MISMATCH;
error("%s[%llu %llu] mismatch relative ref",
ref_key->type == BTRFS_ROOT_REF_KEY ?
"ROOT_REF" : "ROOT_BACKREF",
ref_key->objectid, ref_key->offset);
}
out:
btrfs_release_path(&path);
return err;
}
/*
* Check all fs/file tree in low_memory mode.
*
* 1. for fs tree root item, call check_fs_root()
* 2. for fs tree root ref/backref, call check_root_ref()
*
* Return 0 if no error occurred.
*/
int check_fs_roots_lowmem(void)
{
struct btrfs_root *tree_root = gfs_info->tree_root;
struct btrfs_root *cur_root = NULL;
struct btrfs_path path;
struct btrfs_key key;
struct extent_buffer *node;
int slot;
int ret;
int err = 0;
btrfs_init_path(&path);
key.objectid = BTRFS_FS_TREE_OBJECTID;
key.offset = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
if (ret < 0) {
err = ret;
goto out;
} else if (ret > 0) {
err = -ENOENT;
goto out;
}
while (1) {
node = path.nodes[0];
slot = path.slots[0];
btrfs_item_key_to_cpu(node, &key, slot);
if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
goto out;
if (key.type == BTRFS_INODE_ITEM_KEY &&
is_fstree(key.objectid)) {
ret = check_repair_free_space_inode(&path);
/* Check if we still have a valid path to continue */
if (ret < 0 && path.nodes[0]) {
err |= ret;
goto next;
}
if (ret < 0 && !path.nodes[0])
goto out;
}
if (key.type == BTRFS_ROOT_ITEM_KEY &&
fs_root_objectid(key.objectid)) {
if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
cur_root = btrfs_read_fs_root_no_cache(gfs_info,
&key);
} else {
key.offset = (u64)-1;
cur_root = btrfs_read_fs_root(gfs_info, &key);
}
if (IS_ERR(cur_root)) {
error("Fail to read fs/subvol tree: %lld",
key.objectid);
err = -EIO;
goto next;
}
ret = check_fs_root(cur_root);
err |= ret;
if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
btrfs_free_fs_root(cur_root);
} else if (key.type == BTRFS_ROOT_REF_KEY ||
key.type == BTRFS_ROOT_BACKREF_KEY) {
ret = check_root_ref(tree_root, &key, node, slot);
err |= ret;
}
next:
/*
* In repair mode, our path is no longer reliable as CoW can
* happen. We need to reset our path.
*/
if (repair) {
btrfs_release_path(&path);
ret = btrfs_search_slot(NULL, tree_root, &key, &path,
0, 0);
if (ret < 0) {
if (!err)
err = ret;
goto out;
}
if (ret > 0) {
/* Key not found, but already at next item */
if (path.slots[0] <
btrfs_header_nritems(path.nodes[0]))
continue;
/* falls through to next leaf */
}
}
ret = btrfs_next_item(tree_root, &path);
if (ret > 0)
goto out;
if (ret < 0) {
err = ret;
goto out;
}
}
out:
btrfs_release_path(&path);
return err;
}
/*
* Low memory usage version check_chunks_and_extents.
*/
int check_chunks_and_extents_lowmem(void)
{
struct btrfs_path path;
struct btrfs_key old_key;
struct btrfs_key key;
struct btrfs_root *root;
struct btrfs_root *cur_root;
int err = 0;
int ret;
root = gfs_info->chunk_root;
ret = check_btrfs_root(root, 1);
err |= ret;
root = gfs_info->tree_root;
ret = check_btrfs_root(root, 1);
err |= ret;
btrfs_init_path(&path);
key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
key.offset = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret) {
error("cannot find extent tree in tree_root");
goto out;
}
while (1) {
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.type != BTRFS_ROOT_ITEM_KEY)
goto next;
old_key = key;
key.offset = (u64)-1;
if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
cur_root = btrfs_read_fs_root_no_cache(gfs_info,
&key);
else
cur_root = btrfs_read_fs_root(gfs_info, &key);
if (IS_ERR(cur_root) || !cur_root) {
error("failed to read tree: %lld", key.objectid);
goto next;
}
ret = check_btrfs_root(cur_root, 1);
err |= ret;
if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
btrfs_free_fs_root(cur_root);
btrfs_release_path(&path);
ret = btrfs_search_slot(NULL, gfs_info->tree_root,
&old_key, &path, 0, 0);
if (ret)
goto out;
next:
ret = btrfs_next_item(root, &path);
if (ret)
goto out;
}
out:
if (repair) {
ret = end_avoid_extents_overwrite();
if (ret < 0)
ret = FATAL_ERROR;
err |= ret;
reset_cached_block_groups();
/* update block accounting */
ret = repair_block_accounting();
if (ret)
err |= ret;
else
err &= ~BG_ACCOUNTING_ERROR;
}
btrfs_release_path(&path);
return err;
}