btrfs-progs: wip raid10c34

- works: mkfs.btrfs -d raid10c3 -m raid10c4 ...
- works: dump-super
- works: dump-tree

Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
David Sterba 2021-09-01 23:04:11 +02:00
parent aa2f0c3f38
commit e96b91078f
10 changed files with 97 additions and 18 deletions

View file

@ -1642,7 +1642,7 @@ static int btrfs_calc_stripe_index(struct chunk_record *chunk, u64 logical)
stripe_nr = offset / chunk->stripe_len;
if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID0) {
index = stripe_nr % chunk->num_stripes;
} else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID10) {
} else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID10_MASK) {
index = stripe_nr % (chunk->num_stripes / chunk->sub_stripes);
index *= chunk->sub_stripes;
} else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID56_MASK) {

View file

@ -121,6 +121,15 @@ static const struct btrfs_feature mkfs_features[] = {
VERSION_NULL(default),
.desc = "RAID1 with 3 or 4 copies"
},
{
.name = "raid10c34",
.flag = BTRFS_FEATURE_INCOMPAT_RAID10C34,
.sysfs_name = "raid10c34",
VERSION_TO_STRING2(compat, 5,17),
VERSION_NULL(safe),
VERSION_NULL(default),
.desc = "RAID10 with 3 or 4 copies"
},
#ifdef BTRFS_ZONED
{
.name = "zoned",

View file

@ -790,6 +790,8 @@ enum btrfs_err_code {
BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS,
BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID10C3_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID10C4_MIN_NOT_MET,
};
/* An error code to error string mapping for the kernel
@ -806,6 +808,10 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
return "unable to go below four devices on raid1c4";
case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET:
return "unable to go below four/two devices on raid10";
case BTRFS_ERROR_DEV_RAID10C3_MIN_NOT_MET:
return "unable to go below three devices on raid10c3";
case BTRFS_ERROR_DEV_RAID10C4_MIN_NOT_MET:
return "unable to go below four devices on raid10c4";
case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET:
return "unable to go below two devices on raid5";
case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET:

View file

@ -511,6 +511,7 @@ BUILD_ASSERT(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
#define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12)
#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13)
#define BTRFS_FEATURE_INCOMPAT_RAID10C34 (1ULL << 14)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
@ -552,6 +553,7 @@ BUILD_ASSERT(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
BTRFS_FEATURE_INCOMPAT_RAID10C34 | \
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
BTRFS_FEATURE_INCOMPAT_ZONED)
#endif
@ -1016,6 +1018,8 @@ struct btrfs_csum_item {
#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8)
#define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9)
#define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10)
#define BTRFS_BLOCK_GROUP_RAID10C3 (1ULL << 11)
#define BTRFS_BLOCK_GROUP_RAID10C4 (1ULL << 12)
#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
BTRFS_SPACE_INFO_GLOBAL_RSV)
@ -1029,6 +1033,8 @@ enum btrfs_raid_types {
BTRFS_RAID_RAID6,
BTRFS_RAID_RAID1C3,
BTRFS_RAID_RAID1C4,
BTRFS_RAID_RAID10C3,
BTRFS_RAID_RAID10C4,
BTRFS_NR_RAID_TYPES
};
@ -1043,7 +1049,9 @@ enum btrfs_raid_types {
BTRFS_BLOCK_GROUP_RAID1C3 | \
BTRFS_BLOCK_GROUP_RAID1C4 | \
BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID10)
BTRFS_BLOCK_GROUP_RAID10 | \
BTRFS_BLOCK_GROUP_RAID10C3 | \
BTRFS_BLOCK_GROUP_RAID10C4)
#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
BTRFS_BLOCK_GROUP_RAID6)
@ -1052,6 +1060,10 @@ enum btrfs_raid_types {
BTRFS_BLOCK_GROUP_RAID1C3 | \
BTRFS_BLOCK_GROUP_RAID1C4)
#define BTRFS_BLOCK_GROUP_RAID10_MASK (BTRFS_BLOCK_GROUP_RAID10 | \
BTRFS_BLOCK_GROUP_RAID10C3 | \
BTRFS_BLOCK_GROUP_RAID10C4)
/* used in struct btrfs_balance_args fields */
#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)

View file

@ -1671,7 +1671,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1_MASK |
BTRFS_BLOCK_GROUP_RAID10 |
BTRFS_BLOCK_GROUP_RAID10_MASK |
BTRFS_BLOCK_GROUP_RAID56_MASK |
BTRFS_BLOCK_GROUP_DUP);
if (extra_flags) {
@ -3129,6 +3129,8 @@ static u64 get_dev_extent_len(struct map_lookup *map)
div = map->num_stripes - btrfs_bg_type_to_nparity(map->type);
break;
case BTRFS_BLOCK_GROUP_RAID10:
case BTRFS_BLOCK_GROUP_RAID10C3:
case BTRFS_BLOCK_GROUP_RAID10C4:
div = (map->num_stripes / map->sub_stripes);
break;
default:

View file

@ -1689,6 +1689,7 @@ static struct readable_flag_entry incompat_flags_array[] = {
DEF_INCOMPAT_FLAG_ENTRY(METADATA_UUID),
DEF_INCOMPAT_FLAG_ENTRY(RAID1C34),
DEF_INCOMPAT_FLAG_ENTRY(ZONED),
DEF_INCOMPAT_FLAG_ENTRY(RAID10C34),
};
static const int incompat_flags_num = sizeof(incompat_flags_array) /
sizeof(struct readable_flag_entry);

View file

@ -47,6 +47,34 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.bg_flag = BTRFS_BLOCK_GROUP_RAID10,
.mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
},
[BTRFS_RAID_RAID10C3] = {
.sub_stripes = 3,
.dev_stripes = 1,
.devs_max = 0, /* 0 == as many as possible */
.devs_min = 3,
.tolerated_failures = 2,
.devs_increment = 3,
.ncopies = 3,
.nparity = 0,
.lower_name = "raid10c3",
.upper_name = "RAID10C3",
.bg_flag = BTRFS_BLOCK_GROUP_RAID10C3,
.mindev_error = BTRFS_ERROR_DEV_RAID10C3_MIN_NOT_MET,
},
[BTRFS_RAID_RAID10C4] = {
.sub_stripes = 4,
.dev_stripes = 1,
.devs_max = 0, /* 0 == as many as possible */
.devs_min = 4,
.tolerated_failures = 3,
.devs_increment = 4,
.ncopies = 4,
.nparity = 0,
.lower_name = "raid10c4",
.upper_name = "RAID10C4",
.bg_flag = BTRFS_BLOCK_GROUP_RAID10C4,
.mindev_error = BTRFS_ERROR_DEV_RAID10C4_MIN_NOT_MET,
},
[BTRFS_RAID_RAID1] = {
.sub_stripes = 1,
.dev_stripes = 1,
@ -195,6 +223,10 @@ enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
{
if (flags & BTRFS_BLOCK_GROUP_RAID10)
return BTRFS_RAID_RAID10;
else if (flags & BTRFS_BLOCK_GROUP_RAID10C3)
return BTRFS_RAID_RAID10C3;
else if (flags & BTRFS_BLOCK_GROUP_RAID10C4)
return BTRFS_RAID_RAID10C4;
else if (flags & BTRFS_BLOCK_GROUP_RAID1)
return BTRFS_RAID_RAID1;
else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
@ -1093,7 +1125,7 @@ static u64 chunk_bytes_by_type(struct alloc_chunk_ctl *ctl)
if (type & (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_DUP))
return stripe_size;
else if (type & BTRFS_BLOCK_GROUP_RAID10)
else if (type & (BTRFS_BLOCK_GROUP_RAID10_MASK))
return stripe_size * (ctl->num_stripes / ctl->sub_stripes);
else if (type & BTRFS_BLOCK_GROUP_RAID56_MASK)
return stripe_size * (ctl->num_stripes - btrfs_bg_type_to_nparity(type));
@ -1302,6 +1334,8 @@ static void init_alloc_chunk_ctl(struct btrfs_fs_info *info,
break;
case BTRFS_RAID_RAID0:
case BTRFS_RAID_RAID10:
case BTRFS_RAID_RAID10C3:
case BTRFS_RAID_RAID10C4:
case BTRFS_RAID_RAID5:
case BTRFS_RAID_RAID6:
ctl->num_stripes = min(ctl->max_stripes, ctl->total_devs);
@ -1549,11 +1583,9 @@ again:
list_splice(&private_devs, dev_list);
if (index >= ctl.min_stripes) {
ctl.num_stripes = index;
if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
/* We know this should be 2, but just in case */
ASSERT(is_power_of_2(ctl.sub_stripes));
ctl.num_stripes = round_down(ctl.num_stripes,
ctl.sub_stripes);
if (type & (BTRFS_BLOCK_GROUP_RAID10_MASK)) {
ctl.num_stripes /= ctl.sub_stripes;
ctl.num_stripes *= ctl.sub_stripes;
}
looped = 1;
goto again;
@ -1643,7 +1675,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK))
ret = map->num_stripes;
else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
else if (map->type & (BTRFS_BLOCK_GROUP_RAID10_MASK))
ret = map->sub_stripes;
else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
ret = 2;
@ -1709,7 +1741,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
length = ce->size;
rmap_len = map->stripe_len;
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
if (map->type & BTRFS_BLOCK_GROUP_RAID10_MASK)
length = ce->size / (map->num_stripes / map->sub_stripes);
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
length = ce->size / map->num_stripes;
@ -1728,7 +1760,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
stripe_nr = (physical - map->stripes[i].physical) /
map->stripe_len;
if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
if (map->type & BTRFS_BLOCK_GROUP_RAID10_MASK) {
stripe_nr = (stripe_nr * map->num_stripes + i) /
map->sub_stripes;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
@ -1838,7 +1870,7 @@ again:
if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
BTRFS_BLOCK_GROUP_DUP)) {
stripes_required = map->num_stripes;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
} else if (map->type & (BTRFS_BLOCK_GROUP_RAID10_MASK)) {
stripes_required = map->sub_stripes;
}
}
@ -1879,7 +1911,7 @@ again:
if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1_MASK |
BTRFS_BLOCK_GROUP_RAID56_MASK |
BTRFS_BLOCK_GROUP_RAID10 |
BTRFS_BLOCK_GROUP_RAID10_MASK |
BTRFS_BLOCK_GROUP_DUP)) {
/* we limit the length of each bio to what fits in a stripe */
*length = min_t(u64, ce->size - offset,
@ -1900,7 +1932,7 @@ again:
stripe_index = mirror_num - 1;
else
stripe_index = stripe_nr % map->num_stripes;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
} else if (map->type & BTRFS_BLOCK_GROUP_RAID10_MASK) {
int factor = map->num_stripes / map->sub_stripes;
stripe_index = stripe_nr % factor;
@ -2193,8 +2225,14 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
*/
min_devs = btrfs_bg_type_to_devs_min(type);
table_sub_stripes = btrfs_bg_type_to_sub_stripes(type);
if ((type & BTRFS_BLOCK_GROUP_RAID10 && (sub_stripes != table_sub_stripes ||
printf("sub_stripes %d\ntable_sub_stripes %d\nnum_stripes %d\n",
sub_stripes, table_sub_stripes, num_stripes);
#if 0
/* FIXME: kernel does not check the alignment */
if ((type & BTRFS_BLOCK_GROUP_RAID10_MASK && (sub_stripes != table_sub_stripes ||
!IS_ALIGNED(num_stripes, sub_stripes))) ||
#endif
if ((type & BTRFS_BLOCK_GROUP_RAID10_MASK && (sub_stripes != table_sub_stripes)) ||
(type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < min_devs) ||
(type & BTRFS_BLOCK_GROUP_RAID1C3 && num_stripes < min_devs) ||
(type & BTRFS_BLOCK_GROUP_RAID1C4 && num_stripes < min_devs) ||
@ -2773,6 +2811,8 @@ u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info,
stripe_len = chunk_len / (num_stripes - btrfs_bg_type_to_nparity(profile));
break;
case BTRFS_BLOCK_GROUP_RAID10:
case BTRFS_BLOCK_GROUP_RAID10C3:
case BTRFS_BLOCK_GROUP_RAID10C4:
stripe_len = chunk_len / (num_stripes /
btrfs_chunk_sub_stripes(leaf, chunk));
break;

View file

@ -213,6 +213,8 @@ static inline int check_crossing_stripes(struct btrfs_fs_info *fs_info,
(bg_offset + len - 1) / BTRFS_STRIPE_LEN);
}
/* FIXME */
int btrfs_bg_type_to_sub_stripes(u64 flags);
static inline u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
{
u64 stripe_size;
@ -220,8 +222,8 @@ static inline u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
if (type & BTRFS_BLOCK_GROUP_RAID0) {
stripe_size = length;
stripe_size /= num_stripes;
} else if (type & BTRFS_BLOCK_GROUP_RAID10) {
stripe_size = length * 2;
} else if (type & BTRFS_BLOCK_GROUP_RAID10_MASK) {
stripe_size = length * btrfs_bg_type_to_sub_stripes(type);
stripe_size /= num_stripes;
} else if (type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
stripe_size = length;

View file

@ -838,6 +838,8 @@ enum btrfs_err_code {
BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS,
BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID10C3_MIN_NOT_MET,
BTRFS_ERROR_DEV_RAID10C4_MIN_NOT_MET,
};
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \

View file

@ -1229,6 +1229,11 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
runtime_features |= BTRFS_RUNTIME_FEATURE_FREE_SPACE_TREE;
}
if ((data_profile | metadata_profile) &
(BTRFS_BLOCK_GROUP_RAID10C3 | BTRFS_BLOCK_GROUP_RAID10C4)) {
features |= BTRFS_FEATURE_INCOMPAT_RAID10C34;
}
if (zoned) {
if (source_dir_set) {
error("the option -r and zoned mode are incompatible");