btrfs-progs/delayed-ref.h
Qu Wenruo a1dce63749 btrfs-progs: delayed-ref: Fix memory leak and use-after-free caused by wrong condition to free delayed ref/head.
[BUG]
When btrfs-progs is compiled with D=asan, it can't pass even the very
basic fsck tests due to btrfs-image has memory leak:
  === START TEST /home/adam/btrfs/btrfs-progs/tests//fsck-tests/001-bad-file-extent-bytenr
  restoring image default_case.img

  =================================================================
  ==7790==ERROR: LeakSanitizer: detected memory leaks

  Direct leak of 104 byte(s) in 1 object(s) allocated from:
      #0 0x7f1d3b738389 in __interceptor_malloc /build/gcc/src/gcc/libsanitizer/asan/asan_malloc_linux.cc:86
      #1 0x560ca6b7f4ff in btrfs_add_delayed_tree_ref /home/adam/btrfs/btrfs-progs/delayed-ref.c:569
      #2 0x560ca6af2d0b in btrfs_free_extent /home/adam/btrfs/btrfs-progs/extent-tree.c:2155
      #3 0x560ca6ac16ca in __btrfs_cow_block /home/adam/btrfs/btrfs-progs/ctree.c:319
      #4 0x560ca6ac1d8c in btrfs_cow_block /home/adam/btrfs/btrfs-progs/ctree.c:383
      #5 0x560ca6ac6c8e in btrfs_search_slot /home/adam/btrfs/btrfs-progs/ctree.c:1153
      #6 0x560ca6ab7e83 in fixup_device_size image/main.c:2113
      #7 0x560ca6ab9279 in fixup_chunks_and_devices image/main.c:2333
      #8 0x560ca6ab9ada in restore_metadump image/main.c:2455
      #9 0x560ca6abaeba in main image/main.c:2723
      #10 0x7f1d3b148ce2 in __libc_start_main (/usr/lib/libc.so.6+0x23ce2)

  ... tons of similar leakage for delayed_tree_ref ...

  Direct leak of 96 byte(s) in 1 object(s) allocated from:
      #0 0x7f1d3b738389 in __interceptor_malloc /build/gcc/src/gcc/libsanitizer/asan/asan_malloc_linux.cc:86
      #1 0x560ca6b7f5fb in btrfs_add_delayed_tree_ref /home/adam/btrfs/btrfs-progs/delayed-ref.c:583
      #2 0x560ca6af5679 in alloc_tree_block /home/adam/btrfs/btrfs-progs/extent-tree.c:2503
      #3 0x560ca6af57ac in btrfs_alloc_free_block /home/adam/btrfs/btrfs-progs/extent-tree.c:2524
      #4 0x560ca6ac115b in __btrfs_cow_block /home/adam/btrfs/btrfs-progs/ctree.c:290
      #5 0x560ca6ac1d8c in btrfs_cow_block /home/adam/btrfs/btrfs-progs/ctree.c:383
      #6 0x560ca6b7bb15 in commit_tree_roots /home/adam/btrfs/btrfs-progs/transaction.c:98
      #7 0x560ca6b7c525 in btrfs_commit_transaction /home/adam/btrfs/btrfs-progs/transaction.c:192
      #8 0x560ca6ab92be in fixup_chunks_and_devices image/main.c:2337
      #9 0x560ca6ab9ada in restore_metadump image/main.c:2455
      #10 0x560ca6abaeba in main image/main.c:2723
      #11 0x7f1d3b148ce2 in __libc_start_main (/usr/lib/libc.so.6+0x23ce2)

  ... tons of similar leakage for delayed_ref_head ...

  SUMMARY: AddressSanitizer: 1600 byte(s) leaked in 16 allocation(s).
  failed to restore image ./default_case.img

[CAUSE]
Commit c6039704c5 ("btrfs-progs: Add delayed refs infrastructure")
introduces delayed ref infrastructure for free space tree, however the
refcount_dec_and_test() from kernel code is wrongly backported.

refcount_dec_and_test() will return true if the refcount reaches 0.
So kernel code will free the allocated space as expected:
	if (refcount_dec_and_test(&ref->refs)) {
		kmem_cache_free();
	}

However btrfs-progs backport is using the opposite condition:
	if (--ref->refs) {
		kfree();
	}

This will not free the memory for the last user, but for refs >= 2.
Causing both use-after-free and memory leak for any offline write
operation.

[FIX]
Fix the (--ref->refs) condition to (--ref->refs == 0) to fix the
backport error.

Fixes: c6039704c5 ("btrfs-progs: Add delayed refs infrastructure")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2019-07-03 13:31:14 +02:00

214 lines
5.7 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*/
#ifndef BTRFS_DELAYED_REF_H
#define BTRFS_DELAYED_REF_H
#include "kerncompat.h"
/* these are the possible values of struct btrfs_delayed_ref_node->action */
#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
struct btrfs_delayed_ref_node {
struct rb_node ref_node;
/*
* If action is BTRFS_ADD_DELAYED_REF, also link this node to
* ref_head->ref_add_list, then we do not need to iterate the
* whole ref_head->ref_list to find BTRFS_ADD_DELAYED_REF nodes.
*/
struct list_head add_list;
/* the starting bytenr of the extent */
u64 bytenr;
/* the size of the extent */
u64 num_bytes;
/* seq number to keep track of insertion order */
u64 seq;
/* ref count on this data structure */
u64 refs;
/*
* how many refs is this entry adding or deleting. For
* head refs, this may be a negative number because it is keeping
* track of the total mods done to the reference count.
* For individual refs, this will always be a positive number
*
* It may be more than one, since it is possible for a single
* parent to have more than one ref on an extent
*/
int ref_mod;
unsigned int action:8;
unsigned int type:8;
/* is this node still in the rbtree? */
unsigned int is_head:1;
unsigned int in_tree:1;
};
struct btrfs_delayed_extent_op {
struct btrfs_disk_key key;
u8 level;
bool update_key;
bool update_flags;
bool is_data;
u64 flags_to_set;
};
/*
* the head refs are used to hold a lock on a given extent, which allows us
* to make sure that only one process is running the delayed refs
* at a time for a single extent. They also store the sum of all the
* reference count modifications we've queued up.
*/
struct btrfs_delayed_ref_head {
u64 bytenr;
u64 num_bytes;
u64 refs;
struct rb_root ref_tree;
/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
struct list_head ref_add_list;
struct rb_node href_node;
struct btrfs_delayed_extent_op *extent_op;
/*
* This is used to track the final ref_mod from all the refs associated
* with this head ref, this is not adjusted as delayed refs are run,
* this is meant to track if we need to do the csum accounting or not.
*/
int total_ref_mod;
/*
* This is the current outstanding mod references for this bytenr. This
* is used with lookup_extent_info to get an accurate reference count
* for a bytenr, so it is adjusted as delayed refs are run so that any
* on disk reference count + ref_mod is accurate.
*/
int ref_mod;
/*
* when a new extent is allocated, it is just reserved in memory
* The actual extent isn't inserted into the extent allocation tree
* until the delayed ref is processed. must_insert_reserved is
* used to flag a delayed ref so the accounting can be updated
* when a full insert is done.
*
* It is possible the extent will be freed before it is ever
* inserted into the extent allocation tree. In this case
* we need to update the in ram accounting to properly reflect
* the free has happened.
*/
unsigned int must_insert_reserved:1;
unsigned int is_data:1;
unsigned int is_system:1;
unsigned int processing:1;
};
struct btrfs_delayed_tree_ref {
struct btrfs_delayed_ref_node node;
u64 root;
u64 parent;
int level;
};
struct btrfs_delayed_ref_root {
/* head ref rbtree */
struct rb_root href_root;
/* dirty extent records */
struct rb_root dirty_extent_root;
/* total number of head nodes in tree */
unsigned long num_heads;
/* total number of head nodes ready for processing */
unsigned long num_heads_ready;
/*
* set when the tree is flushing before a transaction commit,
* used by the throttling code to decide if new updates need
* to be run right away
*/
int flushing;
u64 run_delayed_start;
};
static inline struct btrfs_delayed_extent_op *
btrfs_alloc_delayed_extent_op(void)
{
return kmalloc(sizeof(struct btrfs_delayed_extent_op), GFP_KERNEL);
}
static inline void
btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
{
if (op)
kfree(op);
}
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
{
WARN_ON(ref->refs == 0);
if (--ref->refs == 0) {
WARN_ON(ref->in_tree);
switch (ref->type) {
case BTRFS_TREE_BLOCK_REF_KEY:
case BTRFS_SHARED_BLOCK_REF_KEY:
kfree(ref);
break;
case BTRFS_EXTENT_DATA_REF_KEY:
case BTRFS_SHARED_DATA_REF_KEY:
kfree(ref);
break;
default:
BUG();
}
}
}
static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head)
{
if (--head->refs == 0)
kfree(head);
}
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int *old_ref_mod, int *new_ref_mod);
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head);
struct btrfs_delayed_ref_head *
btrfs_select_ref_head(struct btrfs_trans_handle *trans);
/*
* helper functions to cast a node into its container
*/
static inline struct btrfs_delayed_tree_ref *
btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
{
return container_of(node, struct btrfs_delayed_tree_ref, node);
}
int cleanup_ref_head(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head);
void btrfs_destroy_delayed_refs(struct btrfs_trans_handle *trans);
#endif