From e8b0e7479cf5d745d4a40801668be92f876c13ae Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Sat, 26 Jul 2025 21:02:51 -0400 Subject: [PATCH 01/10] added opaque pointers & imported base types --- Project.toml | 2 +- src/VectorizationBase.jl | 14 ++++ src/llvm_intrin/binary_ops.jl | 1 - src/llvm_intrin/conflict.jl | 1 - src/llvm_intrin/integer_fma.jl | 1 - src/llvm_intrin/intrin_funcs.jl | 1 - src/llvm_intrin/masks.jl | 1 - src/llvm_intrin/memory_addr.jl | 87 +++++++++++---------- src/llvm_intrin/nonbroadcastingops.jl | 1 - src/llvm_intrin/unary_ops.jl | 1 - src/llvm_intrin/vbroadcast.jl | 5 +- src/llvm_intrin/vector_ops.jl | 1 - src/llvm_intrin/vfmaddsub.jl | 1 - src/llvm_types.jl | 11 ++- src/ranges.jl | 1 - src/special/exp.jl | 1 - src/special/log.jl | 1 - src/static.jl | 3 +- src/strided_pointers/cartesian_indexing.jl | 1 - src/strided_pointers/cse_stridemultiples.jl | 1 + src/strided_pointers/stridedpointers.jl | 3 +- src/vector_width.jl | 1 - src/vecunroll/mappedloadstore.jl | 1 - 23 files changed, 72 insertions(+), 69 deletions(-) diff --git a/Project.toml b/Project.toml index 5be8b427..aa7abee3 100644 --- a/Project.toml +++ b/Project.toml @@ -32,4 +32,4 @@ julia = "1.10" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Test"] \ No newline at end of file diff --git a/src/VectorizationBase.jl b/src/VectorizationBase.jl index 16b594e8..91c1ea3b 100644 --- a/src/VectorizationBase.jl +++ b/src/VectorizationBase.jl @@ -62,6 +62,20 @@ using HostCPUFeatures: nextpow2, fast_half +import Base: + Float16, + Float32, + Float64, + Int8, + Int16, + Int32, + Int64, + UInt8, + UInt16, + UInt32, + UInt64, + Bool + using SIMDTypes: Bit, FloatingTypes, diff --git a/src/llvm_intrin/binary_ops.jl b/src/llvm_intrin/binary_ops.jl index e7925222..fc7019fe 100644 --- a/src/llvm_intrin/binary_ops.jl +++ b/src/llvm_intrin/binary_ops.jl @@ -1,4 +1,3 @@ - function binary_op(op, W, @nospecialize(T)) ty = LLVM_TYPES[T] if isone(W) diff --git a/src/llvm_intrin/conflict.jl b/src/llvm_intrin/conflict.jl index 1b640e10..8f3b15ed 100644 --- a/src/llvm_intrin/conflict.jl +++ b/src/llvm_intrin/conflict.jl @@ -1,4 +1,3 @@ - function conflictquote(W::Int = 16, bits::Int = 32) @assert bits == 32 || bits == 64 s = bits == 32 ? 'd' : 'q' diff --git a/src/llvm_intrin/integer_fma.jl b/src/llvm_intrin/integer_fma.jl index 97aeee99..5b215694 100644 --- a/src/llvm_intrin/integer_fma.jl +++ b/src/llvm_intrin/integer_fma.jl @@ -1,4 +1,3 @@ - # This is experimental, as few arches support it, and I can't think of many uses other than floating point RNGs. @inline __ifmalo(v1, v2, v3) = diff --git a/src/llvm_intrin/intrin_funcs.jl b/src/llvm_intrin/intrin_funcs.jl index 8ca70c79..536b5b71 100644 --- a/src/llvm_intrin/intrin_funcs.jl +++ b/src/llvm_intrin/intrin_funcs.jl @@ -1,4 +1,3 @@ - @generated function saturated(::F, x::I, y::I) where {I<:IntegerTypesHW,F} typ = "i$(8sizeof(I))" s = I <: Signed ? 's' : 'u' diff --git a/src/llvm_intrin/masks.jl b/src/llvm_intrin/masks.jl index 40740923..46e3eed8 100644 --- a/src/llvm_intrin/masks.jl +++ b/src/llvm_intrin/masks.jl @@ -1,4 +1,3 @@ -# # We use these definitions because when we have other SIMD operations with masks # LLVM optimizes the masks better. function truncate_mask!(instrs, input, W, suffix, reverse_load::Bool = false) diff --git a/src/llvm_intrin/memory_addr.jl b/src/llvm_intrin/memory_addr.jl index 8ad9cf44..10d3bdc5 100644 --- a/src/llvm_intrin/memory_addr.jl +++ b/src/llvm_intrin/memory_addr.jl @@ -1,3 +1,4 @@ +# TODO - Review branching redundancy near the top of this code #################################################################################################### ###################################### Memory Addressing ########################################### #################################################################################################### @@ -168,7 +169,7 @@ function offset_ptr( if iszero(O) push!( instrs, - "%ptr.$(i) = inttoptr $(JULIAPOINTERTYPE) %0 to $(index_gep_typ)*" + "%ptr.$(i) = bitcast $(JULIAPOINTERTYPE) %0 to ptr" ) i += 1 else # !iszero(O) @@ -181,25 +182,25 @@ function offset_ptr( end push!( instrs, - "%ptr.$(i) = inttoptr $(JULIAPOINTERTYPE) %0 to $(offset_gep_typ)*" + "%ptr.$(i) = bitcast $(JULIAPOINTERTYPE) %0 to ptr" ) i += 1 push!( instrs, - "%ptr.$(i) = getelementptr inbounds $(offset_gep_typ), $(offset_gep_typ)* %ptr.$(i-1), i32 $(offset)" + "%ptr.$(i) = getelementptr inbounds $(offset_gep_typ), ptr %ptr.$(i-1), i32 $(offset)" ) i += 1 if forgep && iszero(M) && (iszero(X) || isone(X)) push!( instrs, - "%ptr.$(i) = ptrtoint $(offset_gep_typ)* %ptr.$(i-1) to $(JULIAPOINTERTYPE)" + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to $(JULIAPOINTERTYPE)" ) i += 1 return instrs, i elseif offset_gep_typ != index_gep_typ push!( instrs, - "%ptr.$(i) = bitcast $(offset_gep_typ)* %ptr.$(i-1) to $(index_gep_typ)*" + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" ) i += 1 end @@ -218,19 +219,19 @@ function offset_ptr( end push!( instrs, - "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), $(index_gep_typ)* %ptr.$(i-1), <$W x i$(ibits)> %$(indname)" + "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), ptr %ptr.$(i-1), <$W x i$(ibits)> %$(indname)" ) i += 1 if forgep push!( instrs, - "%ptr.$(i) = ptrtoint <$W x $index_gep_typ*> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" + "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" ) i += 1 elseif index_gep_typ != vtyp push!( instrs, - "%ptr.$(i) = bitcast <$W x $index_gep_typ*> %ptr.$(i-1) to <$W x $typ*>" + "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x ptr>" ) i += 1 end @@ -278,7 +279,7 @@ function offset_ptr( end push!( instrs, - "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), $(index_gep_typ)* %ptr.$(i-1), i$(ibits) %$(indname)" + "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), ptr %ptr.$(i-1), i$(ibits) %$(indname)" ) i += 1 end @@ -293,19 +294,19 @@ function offset_ptr( if typ !== index_gep_typ push!( instrs, - "%ptr.$(i) = bitcast $(index_gep_typ)* %ptr.$(i-1) to $(typ)*" + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" ) i += 1 end push!( instrs, - "%ptr.$(i) = getelementptr inbounds $(typ), $(typ)* %ptr.$(i-1), <$W x $(vityp)> <$vityp $vi>" + "%ptr.$(i) = getelementptr inbounds $(typ), ptr %ptr.$(i-1), <$W x $(vityp)> <$vityp $vi>" ) i += 1 if forgep push!( instrs, - "%ptr.$(i) = ptrtoint <$W x $typ*> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" + "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" ) i += 1 end @@ -314,13 +315,13 @@ function offset_ptr( if forgep # if forgep, just return now push!( instrs, - "%ptr.$(i) = ptrtoint $(index_gep_typ)* %ptr.$(i-1) to $JULIAPOINTERTYPE" + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to $JULIAPOINTERTYPE" ) i += 1 elseif index_gep_typ != vtyp push!( instrs, - "%ptr.$(i) = bitcast $(index_gep_typ)* %ptr.$(i-1) to $(vtyp)*" + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" ) i += 1 end @@ -822,27 +823,27 @@ function vload_quote_llvmcall_core( suffix(W, T_sym) * '.' * ptr_suffix(W, T_sym) - decl *= "declare $loadinstr(<$W x $typ*>, i32, <$W x i1>, $vtyp)" + decl *= "declare $loadinstr(<$W x ptr>, i32, <$W x i1>, $vtyp)" m = mask ? m = "%mask.0" : llvmconst(W, "i1 1") passthrough = mask ? "zeroinitializer" : "undef" push!( instrs, - "%res = call $loadinstr(<$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m, $vtyp $passthrough)" * + "%res = call $loadinstr(<$W x ptr> %ptr.$(i-1), i32 $alignment, <$W x i1> $m, $vtyp $passthrough)" * LOAD_SCOPE_TBAA_FLAGS ) elseif mask suff = suffix(W, T_sym) loadinstr = "$vtyp @llvm.masked.load." * suff * ".p0" * suff - decl *= "declare $loadinstr($vtyp*, i32, <$W x i1>, $vtyp)" + decl *= "declare $loadinstr(ptr, i32, <$W x i1>, $vtyp)" push!( instrs, - "%res = call $loadinstr($vtyp* %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0, $vtyp zeroinitializer)" * + "%res = call $loadinstr(ptr %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0, $vtyp zeroinitializer)" * LOAD_SCOPE_TBAA_FLAGS ) else push!( instrs, - "%res = load $vtyp, $vtyp* %ptr.$(i-1), align $alignment" * + "%res = load $vtyp, ptr %ptr.$(i-1), align $alignment" * LOAD_SCOPE_TBAA_FLAGS ) end @@ -1254,33 +1255,33 @@ function vstore_quote( suffix(W, T_sym) * '.' * ptr_suffix(W, T_sym) - decl *= "declare $storeinstr($vtyp, <$W x $typ*>, i32, <$W x i1>)" + decl *= "declare $storeinstr($vtyp, <$W x ptr>, i32, <$W x i1>)" m = mask ? m = "%mask.0" : llvmconst(W, "i1 1") push!( instrs, - "call $storeinstr($vtyp $(argtostore), <$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)" * + "call $storeinstr($vtyp $(argtostore), <$W x ptr> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)" * metadata ) # push!(instrs, "call $storeinstr($vtyp $(argtostore), <$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)") elseif mask suff = suffix(W, T_sym) storeinstr = "void @llvm.masked.store." * suff * ".p0" * suff - decl *= "declare $storeinstr($vtyp, $vtyp*, i32, <$W x i1>)" + decl *= "declare $storeinstr($vtyp, ptr, i32, <$W x i1>)" push!( instrs, - "call $storeinstr($vtyp $(argtostore), $vtyp* %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0)" * + "call $storeinstr($vtyp $(argtostore), ptr %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0)" * metadata ) elseif nontemporal push!( instrs, - "store $vtyp $(argtostore), $vtyp* %ptr.$(i-1), align $alignment, !nontemporal !{i32 1}" * + "store $vtyp $(argtostore), ptr %ptr.$(i-1), align $alignment, !nontemporal !{i32 1}" * metadata ) else push!( instrs, - "store $vtyp $(argtostore), $vtyp* %ptr.$(i-1), align $alignment" * + "store $vtyp $(argtostore), ptr %ptr.$(i-1), align $alignment" * metadata ) end @@ -2170,10 +2171,10 @@ end "Prefetch intrinsic requires a read/write argument of 0, 1, but received $R." ) ) - decl = "declare void @llvm.prefetch(i8*, i32, i32, i32)" + decl = "declare void @llvm.prefetch(ptr, i32, i32, i32)" + # should I cast instrs = """ - %addr = inttoptr $JULIAPOINTERTYPE %0 to i8* - call void @llvm.prefetch(i8* %addr, i32 $R, i32 $L, i32 1) + call void @llvm.prefetch(ptr %0, i32 $R, i32 $L, i32 1) ret void """ llvmcall_expr( @@ -2227,9 +2228,11 @@ end # @inline prefetch2(x, i, j, oi, oj) = prefetch(gep(stridedpointer(x), (data(i) + data(oi) - 1, data(j) + data(oj) - 1)), Val{1}(), Val{0}()) @generated function lifetime_start!(ptr::Ptr{T}, ::Val{L}) where {L,T} - ptyp = LLVM_TYPES[T] - decl = "declare void @llvm.lifetime.start(i64, $ptyp* nocapture)" - instrs = "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $ptyp*\ncall void @llvm.lifetime.start(i64 $L, $ptyp* %ptr)\nret void" + decl = "declare void @llvm.lifetime.start(i64, ptr nocapture)" + instrs = """ + call void @llvm.lifetime.start(i64 $L, ptr %0) + ret void + """ llvmcall_expr( decl, instrs, @@ -2243,9 +2246,11 @@ end ) end @generated function lifetime_end!(ptr::Ptr{T}, ::Val{L}) where {L,T} - ptyp = LLVM_TYPES[T] - decl = "declare void @llvm.lifetime.end(i64, $ptyp* nocapture)" - instrs = "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $ptyp*\ncall void @llvm.lifetime.end(i64 $L, $ptyp* %ptr)\nret void" + decl = "declare void @llvm.lifetime.end(i64, ptr nocapture)" + instrs = """ + call void @llvm.lifetime.end(i64 $L, ptr %0) + ret void + """ llvmcall_expr( decl, instrs, @@ -2284,12 +2289,12 @@ end vtyp = "<$W x $typ>" mtyp_input = LLVM_TYPES[U] mtyp_trunc = "i$W" - instrs = String["%ptr = inttoptr $JULIAPOINTERTYPE %1 to $typ*"] + instrs = String["%ptr = bitcast $JULIAPOINTERTYPE %1 to ptr"] truncate_mask!(instrs, '2', W, 0) - decl = "declare void @llvm.masked.compressstore.$(suffix(W,T))($vtyp, $typ*, <$W x i1>)" + decl = "declare void @llvm.masked.compressstore.$(suffix(W,T))($vtyp, ptr, <$W x i1>)" push!( instrs, - "call void @llvm.masked.compressstore.$(suffix(W,T))($vtyp %0, $typ* %ptr, <$W x i1> %mask.0)\nret void" + "call void @llvm.masked.compressstore.$(suffix(W,T))($vtyp %0, ptr %ptr, <$W x i1> %mask.0)\nret void" ) llvmcall_expr( decl, @@ -2310,21 +2315,21 @@ end ) where {W,T<:NativeTypes,U<:Unsigned} typ = LLVM_TYPES[T] vtyp = "<$W x $typ>" - vptrtyp = "<$W x $typ*>" + vptrtyp = "<$W x ptr>" mtyp_input = LLVM_TYPES[U] mtyp_trunc = "i$W" instrs = String[] - push!(instrs, "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $typ*") + push!(instrs, "%ptr = bitcast $JULIAPOINTERTYPE %0 to ptr") if mtyp_input == mtyp_trunc push!(instrs, "%mask = bitcast $mtyp_input %1 to <$W x i1>") else push!(instrs, "%masktrunc = trunc $mtyp_input %1 to $mtyp_trunc") push!(instrs, "%mask = bitcast $mtyp_trunc %masktrunc to <$W x i1>") end - decl = "declare $vtyp @llvm.masked.expandload.$(suffix(W,T))($typ*, <$W x i1>, $vtyp)" + decl = "declare $vtyp @llvm.masked.expandload.$(suffix(W,T))(ptr, <$W x i1>, $vtyp)" push!( instrs, - "%res = call $vtyp @llvm.masked.expandload.$(suffix(W,T))($typ* %ptr, <$W x i1> %mask, $vtyp zeroinitializer)\nret $vtyp %res" + "%res = call $vtyp @llvm.masked.expandload.$(suffix(W,T))(ptr %ptr, <$W x i1> %mask, $vtyp zeroinitializer)\nret $vtyp %res" ) llvmcall_expr( decl, diff --git a/src/llvm_intrin/nonbroadcastingops.jl b/src/llvm_intrin/nonbroadcastingops.jl index 7bc699c5..3337d002 100644 --- a/src/llvm_intrin/nonbroadcastingops.jl +++ b/src/llvm_intrin/nonbroadcastingops.jl @@ -1,4 +1,3 @@ - @generated function addscalar(v::Vec{W,T}, s::T) where {W,T<:IntegerTypesHW} typ = "i$(8sizeof(T))" vtyp = "<$W x $typ>" diff --git a/src/llvm_intrin/unary_ops.jl b/src/llvm_intrin/unary_ops.jl index 03a561d1..35256f2b 100644 --- a/src/llvm_intrin/unary_ops.jl +++ b/src/llvm_intrin/unary_ops.jl @@ -1,4 +1,3 @@ - function sub_quote(W::Int, T::Symbol, fast::Bool)::Expr vtyp = vtype(W, T) instrs = "%res = fneg $(fast_flags(fast)) $vtyp %0\nret $vtyp %res" diff --git a/src/llvm_intrin/vbroadcast.jl b/src/llvm_intrin/vbroadcast.jl index 29cae454..14af038f 100644 --- a/src/llvm_intrin/vbroadcast.jl +++ b/src/llvm_intrin/vbroadcast.jl @@ -1,4 +1,3 @@ - @inline vzero(::Val{1}, ::Type{T}) where {T<:NativeTypes} = zero(T) @inline vzero(::StaticInt{1}, ::Type{T}) where {T<:NativeTypes} = zero(T) @inline _vzero(::StaticInt{W}, ::Type{Float16}, ::StaticInt{RS}) where {W,RS} = @@ -164,12 +163,10 @@ end ) where {W,T} isone(W) && return Expr(:block, Expr(:meta, :inline), :(vload(ptr))) typ = LLVM_TYPES[T] - ptyp = JULIAPOINTERTYPE vtyp = "<$W x $typ>" alignment = Base.datatype_alignment(T) instrs = """ - %ptr = inttoptr $ptyp %0 to $typ* - %res = load $typ, $typ* %ptr, align $alignment + %res = load $typ, ptr %0, align $alignment %ie = insertelement $vtyp undef, $typ %res, i32 0 %v = shufflevector $vtyp %ie, $vtyp undef, <$W x i32> zeroinitializer ret $vtyp %v diff --git a/src/llvm_intrin/vector_ops.jl b/src/llvm_intrin/vector_ops.jl index ab99a688..154ef5b9 100644 --- a/src/llvm_intrin/vector_ops.jl +++ b/src/llvm_intrin/vector_ops.jl @@ -1,4 +1,3 @@ - function shufflevector_instrs( W::Int, @nospecialize(T), diff --git a/src/llvm_intrin/vfmaddsub.jl b/src/llvm_intrin/vfmaddsub.jl index e1430001..770578f7 100644 --- a/src/llvm_intrin/vfmaddsub.jl +++ b/src/llvm_intrin/vfmaddsub.jl @@ -1,4 +1,3 @@ - @inline function vfmaddsub( x::AbstractSIMD{W}, y::AbstractSIMD{W}, diff --git a/src/llvm_types.jl b/src/llvm_types.jl index 266ceb2c..4e7c5aeb 100644 --- a/src/llvm_types.jl +++ b/src/llvm_types.jl @@ -114,7 +114,7 @@ function _get_alignment(W::Int, sym::Symbol)::Int end end -const JULIAPOINTERTYPE = 'i' * string(8sizeof(Int)) +const JULIAPOINTERTYPE = "ptr" vtype(W, typ::String) = (isone(abs(W)) ? typ : "<$W x $typ>")::String vtype(W, T::DataType) = vtype(W, LLVM_TYPES[T])::String @@ -132,11 +132,12 @@ append_julia_type!(x, Ws, Ts) = push_julia_type!(x, Ws[i], Ts[i]) end -ptr_suffix(T) = "p0" * suffix(T) -ptr_suffix(W, T) = suffix(W, ptr_suffix(T)) +# TODO determine if ptr_suffix is needed +ptr_suffix(T) = "p0" +ptr_suffix(W, T) = suffix(W, ptr_suffix(T)) # keeping this for now suffix(W::Int, s::String) = W == -1 ? s : 'v' * string(W) * s suffix(W::Int, T) = suffix(W, suffix(T)) -suffix(::Type{Ptr{T}}) where {T} = "p0" * suffix(T) +suffix(::Type{Ptr{T}}) where {T} = "p0" suffix_jlsym(W::Int, s::Symbol) = suffix(W, suffix(s)) function suffix(T::Symbol)::String if T === :Float64 @@ -169,6 +170,7 @@ end function llvmconst(W::Int, v::String)::String '<' * join((v for _ in Base.OneTo(W)), ", ") * '>' end + # function llvmtypedconst(T, val) # typ = LLVM_TYPES[T] # iszero(val) && return "$typ zeroinitializer" @@ -177,6 +179,7 @@ end # function llvmtypedconst(::Type{Bool}, val) # Bool(val) ? "i1 1" : "i1 zeroinitializer" # end + function _llvmcall_expr(ff, WR, R, argt) if WR ≤ 1 Expr(:call, :ccall, ff, :llvmcall, R, argt) diff --git a/src/ranges.jl b/src/ranges.jl index e4eed1ed..e286be75 100644 --- a/src/ranges.jl +++ b/src/ranges.jl @@ -1,4 +1,3 @@ - @generated function _vrange( ::Val{W}, ::Type{T}, diff --git a/src/special/exp.jl b/src/special/exp.jl index aa114709..a337a7bd 100644 --- a/src/special/exp.jl +++ b/src/special/exp.jl @@ -1,4 +1,3 @@ - # `_vscalef` for architectures without `vscalef`. # magic rounding constant: 1.5*2^52 Adding, then subtracting it from a float rounds it to an Int. MAGIC_ROUND_CONST(::Type{Float64}) = 6.755399441055744e15 diff --git a/src/special/log.jl b/src/special/log.jl index 18dc60c0..029bc5d2 100644 --- a/src/special/log.jl +++ b/src/special/log.jl @@ -1,4 +1,3 @@ - # log2(x) = vgetexp(x) + log2(vgetmant8(x)) @inline function vlog_v1( x1::VectorizationBase.AbstractSIMD{W,Float64} diff --git a/src/static.jl b/src/static.jl index f925b355..7cc0f4f0 100644 --- a/src/static.jl +++ b/src/static.jl @@ -1,6 +1,5 @@ #TODO: Document interface to support static size -# Define maybestaticsize, maybestaticlength, and maybestaticfirstindex - +# Define maybestaticsize, maybestaticlength, and maybestaticfirstindex @inline maybestaticfirst(a) = static_first(a) @inline maybestaticlast(a) = static_last(a) @inline maybestaticlength(a) = static_length(a) diff --git a/src/strided_pointers/cartesian_indexing.jl b/src/strided_pointers/cartesian_indexing.jl index 725b4da3..d3984423 100644 --- a/src/strided_pointers/cartesian_indexing.jl +++ b/src/strided_pointers/cartesian_indexing.jl @@ -1,4 +1,3 @@ - # Overloadable method, e.g to insert OffsetPrecalc's precalculated stride multiples @inline tdot(ptr::AbstractStridedPointer, ::Tuple{}, ::Tuple{}) = (pointer(ptr), Zero()) diff --git a/src/strided_pointers/cse_stridemultiples.jl b/src/strided_pointers/cse_stridemultiples.jl index f2aedb27..0a2f83ae 100644 --- a/src/strided_pointers/cse_stridemultiples.jl +++ b/src/strided_pointers/cse_stridemultiples.jl @@ -34,6 +34,7 @@ end getfield(sptr, :precalc) ) end + @inline function LayoutPointers.similar_with_offset( sptr::OffsetPrecalc, ptr::Ptr, diff --git a/src/strided_pointers/stridedpointers.jl b/src/strided_pointers/stridedpointers.jl index 72a91772..83806c72 100644 --- a/src/strided_pointers/stridedpointers.jl +++ b/src/strided_pointers/stridedpointers.jl @@ -1,4 +1,3 @@ - @inline vstore!(ptr::AbstractStridedPointer{T}, v::Number) where {T<:Number} = __vstore!( pointer(ptr), @@ -568,7 +567,7 @@ BenchmarkTools.Trial: function llvmptr_comp_quote(cmp, Tsym) pt = Expr(:curly, GlobalRef(Core, :LLVMPtr), Tsym, 0) - instrs = "%cmpi1 = icmp $cmp i8* %0, %1\n%cmpi8 = zext i1 %cmpi1 to i8\nret i8 %cmpi8" + instrs = "%cmpi1 = icmp $cmp ptr %0, %1\n%cmpi8 = zext i1 %cmpi1 to i8\nret i8 %cmpi8" Expr( :block, Expr(:meta, :inline), diff --git a/src/vector_width.jl b/src/vector_width.jl index 2bc84c3e..448feb3b 100644 --- a/src/vector_width.jl +++ b/src/vector_width.jl @@ -1,4 +1,3 @@ - # nextpow2(W) = vshl(one(W), vsub_fast(8sizeof(W), leading_zeros(vsub_fast(W, one(W))))) # @inline _pick_vector(::StaticInt{W}, ::Type{T}) where {W,T} = Vec{W,T} diff --git a/src/vecunroll/mappedloadstore.jl b/src/vecunroll/mappedloadstore.jl index ee5b5a9c..62fd999a 100644 --- a/src/vecunroll/mappedloadstore.jl +++ b/src/vecunroll/mappedloadstore.jl @@ -1,4 +1,3 @@ - @inline _maybefirst(x) = x @inline _maybefirst(x::VecUnroll) = first(data(x)) @inline _maybetail(x) = x From 0ef4d392843cd2fb9db4ceddc67303ee8297fa22 Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Tue, 29 Jul 2025 00:41:41 -0400 Subject: [PATCH 02/10] added support for julia 1.6 --- src/llvm_intrin/memory_addr.jl | 449 ++++++++++++++++++------ src/llvm_intrin/vbroadcast.jl | 15 +- src/llvm_types.jl | 28 +- src/strided_pointers/stridedpointers.jl | 8 +- 4 files changed, 376 insertions(+), 124 deletions(-) diff --git a/src/llvm_intrin/memory_addr.jl b/src/llvm_intrin/memory_addr.jl index 10d3bdc5..e514d544 100644 --- a/src/llvm_intrin/memory_addr.jl +++ b/src/llvm_intrin/memory_addr.jl @@ -167,10 +167,17 @@ function offset_ptr( end # after this block, we will have a index_gep_typ pointer if iszero(O) - push!( - instrs, - "%ptr.$(i) = bitcast $(JULIAPOINTERTYPE) %0 to ptr" - ) + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = bitcast $(JULIAPOINTERTYPE) %0 to ptr" + ) + else + push!( + instrs, + "%ptr.$(i) = inttoptr $(JULIAPOINTERTYPE) %0 to $(index_gep_typ)*" + ) + end i += 1 else # !iszero(O) if !iszero(O & (tzf - 1)) # then index_gep_typ works for the constant offset @@ -180,28 +187,56 @@ function offset_ptr( offset_gep_typ = index_gep_typ offset = O >> tz end - push!( + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = bitcast $(JULIAPOINTERTYPE) %0 to ptr" + ) + else + push!( instrs, - "%ptr.$(i) = bitcast $(JULIAPOINTERTYPE) %0 to ptr" - ) + "%ptr.$(i) = inttoptr $(JULIAPOINTERTYPE) %0 to $(offset_gep_typ)*" + ) + end i += 1 - push!( + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = getelementptr inbounds $(offset_gep_typ), ptr %ptr.$(i-1), i32 $(offset)" + ) + else + push!( instrs, - "%ptr.$(i) = getelementptr inbounds $(offset_gep_typ), ptr %ptr.$(i-1), i32 $(offset)" + "%ptr.$(i) = getelementptr inbounds $(offset_gep_typ), $(offset_gep_typ)* %ptr.$(i-1), i32 $(offset)" ) + end i += 1 if forgep && iszero(M) && (iszero(X) || isone(X)) - push!( + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to $(JULIAPOINTERTYPE)" + ) + else + push!( instrs, - "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to $(JULIAPOINTERTYPE)" - ) + "%ptr.$(i) = ptrtoint $(offset_gep_typ)* %ptr.$(i-1) to $(JULIAPOINTERTYPE)" + ) + end i += 1 return instrs, i elseif offset_gep_typ != index_gep_typ - push!( + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" + ) + else + push!( instrs, - "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" + "%ptr.$(i) = bitcast $(offset_gep_typ)* %ptr.$(i-1) to $(index_gep_typ)*" ) + end i += 1 end end @@ -217,22 +252,43 @@ function offset_ptr( "%$(indname) = mul nsw <$W x i$(ibits)> %$(indargname), $(constmul)" ) end - push!( - instrs, - "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), ptr %ptr.$(i-1), <$W x i$(ibits)> %$(indname)" - ) - i += 1 - if forgep + @static if USE_OPAQUE_PTR push!( instrs, - "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" + "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), ptr %ptr.$(i-1), <$W x i$(ibits)> %$(indname)" ) - i += 1 - elseif index_gep_typ != vtyp + else push!( instrs, - "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x ptr>" + "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), $(index_gep_typ)* %ptr.$(i-1), <$W x i$(ibits)> %$(indname)" ) + end + i += 1 + if forgep + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" + ) + else + push!( + instrs, + "%ptr.$(i) = ptrtoint <$W x $index_gep_typ*> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" + ) + end + i += 1 + elseif index_gep_typ != vtyp + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x ptr>" + ) + else + push!( + instrs, + "%ptr.$(i) = bitcast <$W x $index_gep_typ*> %ptr.$(i-1) to <$W x $typ*>" + ) + end i += 1 end return instrs, i @@ -277,10 +333,17 @@ function offset_ptr( end # TODO: if X != 1 and X != 0, check if it is better to gep -> gep, or broadcast -> add -> gep end - push!( - instrs, - "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), ptr %ptr.$(i-1), i$(ibits) %$(indname)" - ) + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), ptr %ptr.$(i-1), i$(ibits) %$(indname)" + ) + else + push!( + instrs, + "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), $(index_gep_typ)* %ptr.$(i-1), i$(ibits) %$(indname)" + ) + end i += 1 end # ind_type === :Integer || ind_type === :StaticInt @@ -292,37 +355,72 @@ function offset_ptr( vityp = "i$(8vibytes)" vi = join((X * w for w ∈ 0:W-1), ", $vityp ") if typ !== index_gep_typ - push!( + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" + ) + else + push!( instrs, - "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" + "%ptr.$(i) = bitcast $(index_gep_typ)* %ptr.$(i-1) to $(typ)*" ) + end i += 1 end - push!( + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = getelementptr inbounds $(typ), ptr %ptr.$(i-1), <$W x $(vityp)> <$vityp $vi>" + ) + else + push!( instrs, - "%ptr.$(i) = getelementptr inbounds $(typ), ptr %ptr.$(i-1), <$W x $(vityp)> <$vityp $vi>" - ) + "%ptr.$(i) = getelementptr inbounds $(typ), $(typ)* %ptr.$(i-1), <$W x $(vityp)> <$vityp $vi>" + ) + end i += 1 if forgep - push!( + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" + ) + else + push!( instrs, - "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" - ) + "%ptr.$(i) = ptrtoint <$W x $typ*> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" + ) + end i += 1 end return instrs, i end if forgep # if forgep, just return now - push!( - instrs, - "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to $JULIAPOINTERTYPE" - ) + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to $JULIAPOINTERTYPE" + ) + else + push!( + instrs, + "%ptr.$(i) = ptrtoint $(index_gep_typ)* %ptr.$(i-1) to $JULIAPOINTERTYPE" + ) + end i += 1 elseif index_gep_typ != vtyp - push!( + @static if USE_OPAQUE_PTR + push!( + instrs, + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" + ) + else + push!( instrs, - "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" - ) + "%ptr.$(i) = bitcast $(index_gep_typ)* %ptr.$(i-1) to $(vtyp)*" + ) + end i += 1 end instrs, i @@ -818,34 +916,67 @@ function vload_quote_llvmcall_core( end end if grv - loadinstr = - "$vtyp @llvm.masked.gather." * - suffix(W, T_sym) * - '.' * - ptr_suffix(W, T_sym) - decl *= "declare $loadinstr(<$W x ptr>, i32, <$W x i1>, $vtyp)" + @static if USE_OPAQUE_PTR + loadinstr = + "$vtyp @llvm.masked.gather." * + suffix(W, T_sym) + decl *= "declare $loadinstr(<$W x ptr>, i32, <$W x i1>, $vtyp)" + else + loadinstr = "$vtyp @llvm.masked.gather." * + suffix(W, T_sym) * + '.' * + ptr_suffix(W, T_sym) + decl *= "declare $loadinstr(<$W x $typ*>, i32, <$W x i1>, $vtyp)" + end + m = mask ? m = "%mask.0" : llvmconst(W, "i1 1") passthrough = mask ? "zeroinitializer" : "undef" - push!( + @static if USE_OPAQUE_PTR + push!( + instrs, + "%res = call $loadinstr(<$W x ptr> %ptr.$(i-1), i32 $alignment, <$W x i1> $m, $vtyp $passthrough)" * + LOAD_SCOPE_TBAA_FLAGS + ) + else + push!( instrs, - "%res = call $loadinstr(<$W x ptr> %ptr.$(i-1), i32 $alignment, <$W x i1> $m, $vtyp $passthrough)" * + "%res = call $loadinstr(<$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m, $vtyp $passthrough)" * LOAD_SCOPE_TBAA_FLAGS - ) + ) + end elseif mask suff = suffix(W, T_sym) - loadinstr = "$vtyp @llvm.masked.load." * suff * ".p0" * suff - decl *= "declare $loadinstr(ptr, i32, <$W x i1>, $vtyp)" - push!( - instrs, - "%res = call $loadinstr(ptr %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0, $vtyp zeroinitializer)" * - LOAD_SCOPE_TBAA_FLAGS - ) + if USE_OPAQUE_PTR + loadinstr = "$vtyp @llvm.masked.load." * suff + decl *= "declare $loadinstr(ptr, i32, <$W x i1>, $vtyp)" + push!( + instrs, + "%res = call $loadinstr(ptr %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0, $vtyp zeroinitializer)" * + LOAD_SCOPE_TBAA_FLAGS + ) + else + loadinstr = "$vtyp @llvm.masked.load." * suff * ".p0" * suff + decl *= "declare $loadinstr($vtyp*, i32, <$W x i1>, $vtyp)" + push!( + instrs, + "%res = call $loadinstr($vtyp* %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0, $vtyp zeroinitializer)" * + LOAD_SCOPE_TBAA_FLAGS + ) + end else - push!( + @static if USE_OPAQUE_PTR + push!( + instrs, + "%res = load $vtyp, ptr %ptr.$(i-1), align $alignment" * + LOAD_SCOPE_TBAA_FLAGS + ) + else + push!( instrs, - "%res = load $vtyp, ptr %ptr.$(i-1), align $alignment" * + "%res = load $vtyp, $vtyp* %ptr.$(i-1), align $alignment" * LOAD_SCOPE_TBAA_FLAGS - ) + ) + end end if isbit lret = string('i', max(8, nextpow2(W))) @@ -1250,40 +1381,81 @@ function vstore_quote( argtostore = "%1" end if grv - storeinstr = + @static if USE_OPAQUE_PTR + storeinstr = "void @llvm.masked.scatter." * - suffix(W, T_sym) * - '.' * - ptr_suffix(W, T_sym) - decl *= "declare $storeinstr($vtyp, <$W x ptr>, i32, <$W x i1>)" - m = mask ? m = "%mask.0" : llvmconst(W, "i1 1") - push!( + suffix(W, T_sym) + decl *= "declare $storeinstr($vtyp, <$W x ptr>, i32, <$W x i1>)" + else + storeinstr = + "void @llvm.masked.scatter." * + suffix(W, T_sym) * + '.' * + ptr_suffix(W, T_sym) + decl *= "declare $storeinstr($vtyp, <$W x $typ*>, i32, <$W x i1>)" + end + m = mask ? m = "%mask.0" : llvmconst(W, "i1 1") + @static if USE_OPAQUE_PTR + push!( + instrs, + "call $storeinstr($vtyp $(argtostore), <$W x ptr> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)" * + metadata + ) + else + push!( instrs, - "call $storeinstr($vtyp $(argtostore), <$W x ptr> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)" * + "call $storeinstr($vtyp $(argtostore), <$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)" * metadata - ) + ) + end # push!(instrs, "call $storeinstr($vtyp $(argtostore), <$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)") elseif mask suff = suffix(W, T_sym) - storeinstr = "void @llvm.masked.store." * suff * ".p0" * suff - decl *= "declare $storeinstr($vtyp, ptr, i32, <$W x i1>)" + @static if USE_OPAQUE_PTR + storeinstr = "void @llvm.masked.store." * suff + decl *= "declare $storeinstr($vtyp, ptr, i32, <$W x i1>)" + push!( + instrs, + "call $storeinstr($vtyp $(argtostore), ptr %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0)" * + metadata + ) + else + storeinstr = "void @llvm.masked.store." * suff + decl *= "declare $storeinstr($vtyp, $vtyp*, i32, <$W x i1>)" push!( - instrs, - "call $storeinstr($vtyp $(argtostore), ptr %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0)" * - metadata - ) + instrs, + "call $storeinstr($vtyp $(argtostore), $vtyp* %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0)" * + metadata + ) + end elseif nontemporal - push!( - instrs, - "store $vtyp $(argtostore), ptr %ptr.$(i-1), align $alignment, !nontemporal !{i32 1}" * - metadata - ) + @static if USE_OPAQUE_PTR + push!( + instrs, + "store $vtyp $(argtostore), ptr %ptr.$(i-1), align $alignment, !nontemporal !{i32 1}" * + metadata + ) + else + push!( + instrs, + "store $vtyp $(argtostore), $vtyp* %ptr.$(i-1), align $alignment, !nontemporal !{i32 1}" * + metadata + ) + end else - push!( - instrs, - "store $vtyp $(argtostore), ptr %ptr.$(i-1), align $alignment" * - metadata - ) + @static if USE_OPAQUE_PTR + push!( + instrs, + "store $vtyp $(argtostore), ptr %ptr.$(i-1), align $alignment" * + metadata + ) + else + push!( + instrs, + "store $vtyp $(argtostore), $vtyp* %ptr.$(i-1), align $alignment" * + metadata + ) + end end push!(instrs, "ret void") ret = :Cvoid @@ -2171,12 +2343,21 @@ end "Prefetch intrinsic requires a read/write argument of 0, 1, but received $R." ) ) - decl = "declare void @llvm.prefetch(ptr, i32, i32, i32)" - # should I cast - instrs = """ + @static if USE_OPAQUE_PTR + decl = "declare void @llvm.prefetch(ptr, i32, i32, i32)" + instrs = """ call void @llvm.prefetch(ptr %0, i32 $R, i32 $L, i32 1) ret void - """ + """ + else + decl = "declare void @llvm.prefetch(i8*, i32, i32, i32)" + instrs = """ + %addr = inttoptr $JULIAPOINTERTYPE %0 to i8* + call void @llvm.prefetch(i8* %addr, i32 $R, i32 $L, i32 1) + ret void + """ + end + llvmcall_expr( decl, instrs, @@ -2228,11 +2409,17 @@ end # @inline prefetch2(x, i, j, oi, oj) = prefetch(gep(stridedpointer(x), (data(i) + data(oi) - 1, data(j) + data(oj) - 1)), Val{1}(), Val{0}()) @generated function lifetime_start!(ptr::Ptr{T}, ::Val{L}) where {L,T} - decl = "declare void @llvm.lifetime.start(i64, ptr nocapture)" - instrs = """ - call void @llvm.lifetime.start(i64 $L, ptr %0) - ret void - """ + @static if USE_OPAQUE_PTR + decl = "declare void @llvm.lifetime.start(i64, ptr nocapture)" + instrs = """ + call void @llvm.lifetime.start(i64 $L, ptr %0) + ret void + """ + else + ptyp = LLVM_TYPES[T] + decl = "declare void @llvm.lifetime.start(i64, $ptyp* nocapture)" + instrs = "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $ptyp*\ncall void @llvm.lifetime.start(i64 $L, $ptyp* %ptr)\nret void" + end llvmcall_expr( decl, instrs, @@ -2246,11 +2433,17 @@ end ) end @generated function lifetime_end!(ptr::Ptr{T}, ::Val{L}) where {L,T} - decl = "declare void @llvm.lifetime.end(i64, ptr nocapture)" - instrs = """ - call void @llvm.lifetime.end(i64 $L, ptr %0) - ret void - """ + @static if USE_OPAQUE_PTR + decl = "declare void @llvm.lifetime.end(i64, ptr nocapture)" + instrs = """ + call void @llvm.lifetime.end(i64 $L, ptr %0) + ret void + """ + else + ptyp = LLVM_TYPES[T] + decl = "declare void @llvm.lifetime.end(i64, $ptyp* nocapture)" + instrs = "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $ptyp*\ncall void @llvm.lifetime.end(i64 $L, $ptyp* %ptr)\nret void" + end llvmcall_expr( decl, instrs, @@ -2289,13 +2482,25 @@ end vtyp = "<$W x $typ>" mtyp_input = LLVM_TYPES[U] mtyp_trunc = "i$W" - instrs = String["%ptr = bitcast $JULIAPOINTERTYPE %1 to ptr"] + @static if USE_OPAQUE_PTR + instrs = String["%ptr = bitcast $JULIAPOINTERTYPE %1 to ptr"] + else + instrs = String["%ptr = inttoptr $JULIAPOINTERTYPE %1 to $typ*"] + end truncate_mask!(instrs, '2', W, 0) - decl = "declare void @llvm.masked.compressstore.$(suffix(W,T))($vtyp, ptr, <$W x i1>)" - push!( - instrs, - "call void @llvm.masked.compressstore.$(suffix(W,T))($vtyp %0, ptr %ptr, <$W x i1> %mask.0)\nret void" - ) + @static if USE_OPAQUE_PTR + decl = "declare void @llvm.masked.compressstore.$(suffix(W,T))($vtyp, ptr, <$W x i1>)" + push!( + instrs, + "call void @llvm.masked.compressstore.$(suffix(W,T))($vtyp %0, ptr %ptr, <$W x i1> %mask.0)\nret void" + ) + else + decl = "declare void @llvm.masked.compressstore.$(suffix(W,T))($vtyp, $typ*, <$W x i1>)" + push!( + instrs, + "call void @llvm.masked.compressstore.$(suffix(W,T))($vtyp %0, $typ* %ptr, <$W x i1> %mask.0)\nret void" + ) + end llvmcall_expr( decl, join(instrs, "\n"), @@ -2315,22 +2520,38 @@ end ) where {W,T<:NativeTypes,U<:Unsigned} typ = LLVM_TYPES[T] vtyp = "<$W x $typ>" - vptrtyp = "<$W x ptr>" + @static if USE_OPAQUE_PTR + vptrtyp = "<$W x ptr>" + else + vptrtyp = "<$W x $typ*>" + end mtyp_input = LLVM_TYPES[U] mtyp_trunc = "i$W" instrs = String[] - push!(instrs, "%ptr = bitcast $JULIAPOINTERTYPE %0 to ptr") + @static if USE_OPAQUE_PTR + push!(instrs, "%ptr = bitcast $JULIAPOINTERTYPE %0 to ptr") + else + push!(instrs, "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $typ*") + end if mtyp_input == mtyp_trunc push!(instrs, "%mask = bitcast $mtyp_input %1 to <$W x i1>") else push!(instrs, "%masktrunc = trunc $mtyp_input %1 to $mtyp_trunc") push!(instrs, "%mask = bitcast $mtyp_trunc %masktrunc to <$W x i1>") end - decl = "declare $vtyp @llvm.masked.expandload.$(suffix(W,T))(ptr, <$W x i1>, $vtyp)" - push!( - instrs, - "%res = call $vtyp @llvm.masked.expandload.$(suffix(W,T))(ptr %ptr, <$W x i1> %mask, $vtyp zeroinitializer)\nret $vtyp %res" - ) + @static if USE_OPAQUE_PTR + decl = "declare $vtyp @llvm.masked.expandload.$(suffix(W,T))(ptr, <$W x i1>, $vtyp)" + push!( + instrs, + "%res = call $vtyp @llvm.masked.expandload.$(suffix(W,T))(ptr %ptr, <$W x i1> %mask, $vtyp zeroinitializer)\nret $vtyp %res" + ) + else + decl = "declare $vtyp @llvm.masked.expandload.$(suffix(W,T))($typ*, <$W x i1>, $vtyp)" + push!( + instrs, + "%res = call $vtyp @llvm.masked.expandload.$(suffix(W,T))($typ* %ptr, <$W x i1> %mask, $vtyp zeroinitializer)\nret $vtyp %res" + ) + end llvmcall_expr( decl, join(instrs, "\n"), diff --git a/src/llvm_intrin/vbroadcast.jl b/src/llvm_intrin/vbroadcast.jl index 14af038f..facfdf25 100644 --- a/src/llvm_intrin/vbroadcast.jl +++ b/src/llvm_intrin/vbroadcast.jl @@ -164,13 +164,24 @@ end isone(W) && return Expr(:block, Expr(:meta, :inline), :(vload(ptr))) typ = LLVM_TYPES[T] vtyp = "<$W x $typ>" + ptyp = JULIAPOINTERTYPE alignment = Base.datatype_alignment(T) - instrs = """ + @static if USE_OPAQUE_PTR + instrs = """ %res = load $typ, ptr %0, align $alignment %ie = insertelement $vtyp undef, $typ %res, i32 0 %v = shufflevector $vtyp %ie, $vtyp undef, <$W x i32> zeroinitializer ret $vtyp %v - """ + """ + else + instrs = """ + %ptr = inttoptr $ptyp %0 to $typ* + %res = load $typ, $typ* %ptr, align $alignment + %ie = insertelement $vtyp undef, $typ %res, i32 0 + %v = shufflevector $vtyp %ie, $vtyp undef, <$W x i32> zeroinitializer + ret $vtyp %v + """ + end quote $(Expr(:meta, :inline)) Vec($LLVMCALL($instrs, _Vec{$W,$T}, Tuple{Ptr{$T}}, ptr)) diff --git a/src/llvm_types.jl b/src/llvm_types.jl index 4e7c5aeb..60409cda 100644 --- a/src/llvm_types.jl +++ b/src/llvm_types.jl @@ -114,7 +114,19 @@ function _get_alignment(W::Int, sym::Symbol)::Int end end -const JULIAPOINTERTYPE = "ptr" +""" +use opaque pointer +Ref: +- Switch LLVM codegen of Ptr{T} to an actual pointer type. + https://github.com/JuliaLang/julia/pull/53687 +""" +const USE_OPAQUE_PTR = VERSION >= v"1.12-DEV" + +@static if !USE_OPAQUE_PTR + const JULIAPOINTERTYPE = 'i' * string(8sizeof(Int)) +else + const JULIAPOINTERTYPE = "ptr" +end vtype(W, typ::String) = (isone(abs(W)) ? typ : "<$W x $typ>")::String vtype(W, T::DataType) = vtype(W, LLVM_TYPES[T])::String @@ -132,12 +144,8 @@ append_julia_type!(x, Ws, Ts) = push_julia_type!(x, Ws[i], Ts[i]) end -# TODO determine if ptr_suffix is needed -ptr_suffix(T) = "p0" -ptr_suffix(W, T) = suffix(W, ptr_suffix(T)) # keeping this for now +ptr_suffix(W, T) = suffix(W, ptr_suffix(T)) suffix(W::Int, s::String) = W == -1 ? s : 'v' * string(W) * s -suffix(W::Int, T) = suffix(W, suffix(T)) -suffix(::Type{Ptr{T}}) where {T} = "p0" suffix_jlsym(W::Int, s::Symbol) = suffix(W, suffix(s)) function suffix(T::Symbol)::String if T === :Float64 @@ -149,6 +157,14 @@ function suffix(T::Symbol)::String end end suffix(@nospecialize(T))::String = suffix(JULIA_TYPES[T]) +@static if !USE_OPAQUE_PTR + ptr_suffix(T) = "p0" * suffix(T) + suffix(::Type{Ptr{T}}) where {T} = "p0" * suffix(T) +else + ptr_suffix(T) = "p0" + suffix(::Type{Ptr{T}}) where {T} = "p0" +end +suffix(W::Int, T) = suffix(W, suffix(T)) # Type-dependent LLVM constants function llvmconst(T, val)::String diff --git a/src/strided_pointers/stridedpointers.jl b/src/strided_pointers/stridedpointers.jl index 83806c72..32cd1e12 100644 --- a/src/strided_pointers/stridedpointers.jl +++ b/src/strided_pointers/stridedpointers.jl @@ -567,8 +567,12 @@ BenchmarkTools.Trial: function llvmptr_comp_quote(cmp, Tsym) pt = Expr(:curly, GlobalRef(Core, :LLVMPtr), Tsym, 0) - instrs = "%cmpi1 = icmp $cmp ptr %0, %1\n%cmpi8 = zext i1 %cmpi1 to i8\nret i8 %cmpi8" - Expr( + @static if USE_OPAQUE_PTR + instrs = "%cmpi1 = icmp $cmp ptr %0, %1\n%cmpi8 = zext i1 %cmpi1 to i8\nret i8 %cmpi8" + else + instrs = "%cmpi1 = icmp $cmp i8* %0, %1\n%cmpi8 = zext i1 %cmpi1 to i8\nret i8 %cmpi8" + end + Expr( :block, Expr(:meta, :inline), :($(Base.llvmcall)($instrs, Bool, Tuple{$pt,$pt}, p1, p2)) From 3ff76ea59d6d97f711142bcadd40ba52d4268c53 Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Fri, 1 Aug 2025 19:25:23 -0400 Subject: [PATCH 03/10] removed unused variable --- src/llvm_intrin/memory_addr.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/llvm_intrin/memory_addr.jl b/src/llvm_intrin/memory_addr.jl index e514d544..8042e9f4 100644 --- a/src/llvm_intrin/memory_addr.jl +++ b/src/llvm_intrin/memory_addr.jl @@ -2520,11 +2520,6 @@ end ) where {W,T<:NativeTypes,U<:Unsigned} typ = LLVM_TYPES[T] vtyp = "<$W x $typ>" - @static if USE_OPAQUE_PTR - vptrtyp = "<$W x ptr>" - else - vptrtyp = "<$W x $typ*>" - end mtyp_input = LLVM_TYPES[U] mtyp_trunc = "i$W" instrs = String[] From c2e98ffbc7f4d7c2e5cc0a11858a8d8f959c027c Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Fri, 1 Aug 2025 20:13:33 -0400 Subject: [PATCH 04/10] searchsortedlast now returns same vector element type as input vector --- src/special/misc.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/special/misc.jl b/src/special/misc.jl index 0e851008..ed1665c7 100644 --- a/src/special/misc.jl +++ b/src/special/misc.jl @@ -244,7 +244,7 @@ for TType in [:Integer, :(AbstractSIMDVector{W,<:Integer})] lo = ifelse(b, lo, m) st = lo < hi - u end - return lo + return convert(typeof(x), lo) end end end From 2804ed35ed47c00c6fc972f3574a9f02d8bb5a64 Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Tue, 5 Aug 2025 23:03:59 -0400 Subject: [PATCH 05/10] tests: treating pirated types as own --- Project.toml | 1 + test/Project.toml | 2 ++ test/runtests.jl | 17 ++++++++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index aa7abee3..d0a21567 100644 --- a/Project.toml +++ b/Project.toml @@ -27,6 +27,7 @@ SIMDTypes = "0.1" Static = "0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1" StaticArrayInterface = "1" julia = "1.10" +Test = "<0.0.1, 1" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/Project.toml b/test/Project.toml index d7a7e2b3..30d4cd7c 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -3,5 +3,7 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +LayoutPointers = "10f19ff3-798f-405d-979b-55457f8fc047" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 7afe72e4..c9cde993 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,7 +9,22 @@ include("testsetup.jl") println("Aqua.test_all") t0 = time_ns() deps_compat = VERSION <= v"1.8" || isempty(VERSION.prerelease) - Aqua.test_all(VectorizationBase; deps_compat = deps_compat) + + # TODO - Will need a code refactor to properly address these type piracies. + # Either: + # 1. Create type wrappers in VectorizationBase + # 2. Implement overloading upstream + # 3. Use package extensions (still buggy in current Julia LTS v1.10.10) + + pirated_types = [ + VectorizationBase.FastRange, + VectorizationBase.AbstractStridedPointer, + VectorizationBase.StridedBitPointer, + VectorizationBase.StaticInt, + VectorizationBase.AbstractSIMD, + VectorizationBase.Bit, + ] + Aqua.test_all(VectorizationBase; deps_compat = deps_compat, piracies=(treat_as_own = pirated_types,)) println("Aqua took $((time_ns() - t0)*1e-9) seconds") # @test isempty(detect_unbound_args(VectorizationBase)) # @test isempty(detect_ambiguities(VectorizationBase)) From 68f31d716a5c668285619f15f46cbb468a0579d7 Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Tue, 5 Aug 2025 23:28:50 -0400 Subject: [PATCH 06/10] docs: removed strict and adjusted doctest --- docs/make.jl | 1 - src/VectorizationBase.jl | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 0383bc0c..15edee97 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,7 +11,6 @@ makedocs(; canonical = "https://JuliaSIMD.github.io/VectorizationBase.jl" ), pages = ["Home" => "index.md"], - strict = false ) deploydocs(; repo = "github.com/JuliaSIMD/VectorizationBase.jl") diff --git a/src/VectorizationBase.jl b/src/VectorizationBase.jl index 91c1ea3b..0e0fe0e5 100644 --- a/src/VectorizationBase.jl +++ b/src/VectorizationBase.jl @@ -181,7 +181,7 @@ julia> rgbs = [ B = Float32(i + 200) / 255 ) for i = 0:7:49 ] -8-element Vector{NamedTuple{(:R, :G, :B), Tuple{Float32, Float32, Float32}}}: +8-element Vector{@NamedTuple{R::Float32, G::Float32, B::Float32}}: (R = 0.0, G = 0.39215687, B = 0.78431374) (R = 0.02745098, G = 0.41960785, B = 0.8117647) (R = 0.05490196, G = 0.44705883, B = 0.8392157) From 794d07a9e0cc69deac8197cba607cdb33c1534f5 Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski Date: Thu, 7 Aug 2025 14:47:48 -0400 Subject: [PATCH 07/10] ci: removed redundant file --- .github/workflows/ci-julia-nightly.yml | 46 -------------------------- 1 file changed, 46 deletions(-) delete mode 100644 .github/workflows/ci-julia-nightly.yml diff --git a/.github/workflows/ci-julia-nightly.yml b/.github/workflows/ci-julia-nightly.yml deleted file mode 100644 index 266ad2bf..00000000 --- a/.github/workflows/ci-julia-nightly.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: CI (Julia nightly) -on: - pull_request: - branches: - - master - push: - branches: - - master - tags: '*' -jobs: - test-julia-nightly: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - version: - - 'nightly' - os: - - ubuntu-latest - - macOS-latest - - windows-latest - arch: - - x64 - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: actions/cache@v4 - env: - cache-name: cache-artifacts - with: - path: ~/.julia/artifacts - key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} - restore-keys: | - ${{ runner.os }}-test-${{ env.cache-name }}- - ${{ runner.os }}-test- - ${{ runner.os }}- - - uses: julia-actions/julia-buildpkg@v1 - - uses: julia-actions/julia-runtest@v1 - - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v3 - with: - file: lcov.info From 676b6688687c0e09b691ffda517148fdf45486e4 Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Wed, 6 Aug 2025 22:27:09 -0400 Subject: [PATCH 08/10] bumped integration tests --- .github/workflows/Downstream.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml index 0de012e9..b92f576e 100644 --- a/.github/workflows/Downstream.yml +++ b/.github/workflows/Downstream.yml @@ -14,7 +14,7 @@ jobs: strategy: fail-fast: false matrix: - julia-version: [1,1.6] + julia-version: ['lts', '1', 'pre'] os: [ubuntu-latest] package: - {user: JuliaSIMD, repo: LoopVectorization.jl, group: Interface} From 814d181b4b791b684cae8f39c0c16c9ee42fd4ea Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Sat, 16 Aug 2025 04:34:43 -0400 Subject: [PATCH 09/10] Update ci.yml --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b26fd298..f4792c41 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,7 +53,7 @@ jobs: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 with: - version: 'nightly' + version: 'pre' - run: julia --color=yes -e 'using Pkg; VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")' shell: bash env: @@ -74,7 +74,7 @@ jobs: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 with: - version: 'nightly' + version: 'pre' - run: julia --color=yes -e 'using Pkg; VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")' shell: bash env: From a74ebae09a2f2ea0285f5d0fab6fc3f28c95709d Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Sat, 16 Aug 2025 04:58:42 -0400 Subject: [PATCH 10/10] Update cix86.yml --- .github/workflows/cix86.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cix86.yml b/.github/workflows/cix86.yml index 2df66c05..77b5dfe4 100644 --- a/.github/workflows/cix86.yml +++ b/.github/workflows/cix86.yml @@ -51,7 +51,7 @@ jobs: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 with: - version: 'nightly' + version: 'pre' - run: julia --color=yes -e 'using Pkg; VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")' shell: bash env: @@ -72,7 +72,7 @@ jobs: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 with: - version: 'nightly' + version: 'pre' - run: julia --color=yes -e 'using Pkg; VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")' shell: bash env: