BPF: support inlining __builtin_memcmp intrinsic call

Delyan Kratunov reported an issue where __builtin_memcmp is
not inlined into simple load/compare instructions.
This is a known issue. In the current state, __builtin_memcmp
will be converted to memcmp call which won't work for
bpf programs.

This patch added support for expanding __builtin_memcmp with
actual loads and compares up to currently maximum 128 total loads.
The implementation is identical to PowerPC.

Differential Revision: https://reviews.llvm.org/D122676
This commit is contained in:
Yonghong Song 2022-03-28 23:30:10 -07:00
parent 4d1010909f
commit 5898979387
3 changed files with 83 additions and 0 deletions

View file

@ -168,6 +168,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
MaxLoadsPerMemcmp = 0;
} else {
// inline memcpy() for kernel to see explicit copy
unsigned CommonMaxStores =
@ -176,6 +177,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;
}
// CPU/Feature control

View file

@ -71,6 +71,15 @@ public:
Opd2Info, Opd1PropInfo,
Opd2PropInfo);
}
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const {
TTI::MemCmpExpansionOptions Options;
Options.LoadSizes = {8, 4, 2, 1};
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
return Options;
}
};
} // end namespace llvm

View file

@ -0,0 +1,72 @@
; RUN: llc -march=bpfel < %s | FileCheck %s
; RUN: llc -march=bpfel -mcpu=v3 < %s | FileCheck %s
;
; Source code:
; /* set aligned 4 to minimize the number of loads */
; struct build_id {
; unsigned char id[20];
; } __attribute__((aligned(4)));
;
; /* try to compute a local build_id */
; void bar1(void *);
;
; /* the global build_id to compare */
; struct build_id id2;
;
; int foo()
; {
; struct build_id id1;
;
; bar1(&id1);
; return __builtin_memcmp(&id1, &id2, sizeof(id1)) == 0;
; }
; Compilation flags:
; clang -target bpf -S -O2 t.c -emit-llvm
%struct.build_id = type { [20 x i8] }
@id2 = dso_local global %struct.build_id zeroinitializer, align 4
; Function Attrs: nounwind
define dso_local i32 @foo() local_unnamed_addr #0 {
entry:
%id11 = alloca [20 x i8], align 4
%id11.sub = getelementptr inbounds [20 x i8], [20 x i8]* %id11, i64 0, i64 0
call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %id11.sub) #4
call void @bar1(i8* noundef nonnull %id11.sub) #4
%call = call i32 @memcmp(i8* noundef nonnull dereferenceable(20) %id11.sub, i8* noundef nonnull dereferenceable(20) getelementptr inbounds (%struct.build_id, %struct.build_id* @id2, i64 0, i32 0, i64 0), i64 noundef 20) #4
%cmp = icmp eq i32 %call, 0
%conv = zext i1 %cmp to i32
call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %id11.sub) #4
ret i32 %conv
}
; CHECK: *(u32 *)(r1 + 0)
; CHECK: *(u32 *)(r10 - 20)
; CHECK: *(u32 *)(r10 - 12)
; CHECK: *(u32 *)(r1 + 8)
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
declare dso_local void @bar1(i8* noundef) local_unnamed_addr #2
; Function Attrs: argmemonly mustprogress nofree nounwind readonly willreturn
declare dso_local i32 @memcmp(i8* nocapture noundef, i8* nocapture noundef, i64 noundef) local_unnamed_addr #3
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #3 = { argmemonly mustprogress nofree nounwind readonly willreturn "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #4 = { nounwind }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"frame-pointer", i32 2}
!2 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git dea65874b2505f8f5e8e51fd8cad6908feb375ec)"}