diff --git a/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp index 6e33c33c0ed75..50f3c9e0841f1 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include @@ -24,6 +25,39 @@ namespace sycl { inline namespace _V1 { + +namespace detail { +__SYCL_EXPORT std::shared_ptr +link_impl(const kernel_bundle *ObjectBundles, + size_t NumObjectBundles, const std::vector &Devs, + bool FastLink); + +template < + typename PropertyListT = ext::oneapi::experimental::empty_properties_t, + typename = std::enable_if_t< + ext::oneapi::experimental::detail::all_are_properties_of_v< + ext::oneapi::experimental::detail::link_props, PropertyListT>>> +kernel_bundle +link_common(const kernel_bundle *ObjectBundles, + size_t NumObjectBundles, const std::vector &Devs, + PropertyListT Props = {}) { + std::vector UniqueDevices = removeDuplicateDevices(Devs); + + bool UseFastLink = [&]() { + if constexpr (Props.template has_property< + ext::oneapi::experimental::fast_link>()) + return Props.template get_property() + .value; + return false; + }(); + + KernelBundleImplPtr Impl = + link_impl(ObjectBundles, NumObjectBundles, UniqueDevices, UseFastLink); + return createSyclObjFromImpl>( + std::move(Impl)); +} +} // namespace detail + namespace ext::oneapi::experimental { template @@ -77,6 +111,46 @@ get_kernel_bundle(const context &Ctxt, const std::filesystem::path &Filename, } #endif +template >> +kernel_bundle +link(const std::vector> &ObjectBundles, + const std::vector &Devs, PropertyListT Props = {}) { + return sycl::detail::link_common(ObjectBundles.data(), ObjectBundles.size(), + Devs, Props); +} + +template >> +kernel_bundle +link(const kernel_bundle &ObjectBundle, + const std::vector &Devs, PropertyListT Props = {}) { + return sycl::detail::link_common(&ObjectBundle, 1, Devs, Props); +} + +template >> +kernel_bundle +link(const std::vector> &ObjectBundles, + PropertyListT Props = {}) { + std::vector IntersectDevices = + sycl::detail::find_device_intersection(ObjectBundles); + return link(ObjectBundles, IntersectDevices, Props); +} + +template >> +kernel_bundle +link(const kernel_bundle &ObjectBundle, + PropertyListT Props = {}) { + return link(std::vector>{ObjectBundle}, + ObjectBundle.get_devices(), Props); +} + } // namespace ext::oneapi::experimental } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/experimental/syclbin_properties.hpp b/sycl/include/sycl/ext/oneapi/experimental/syclbin_properties.hpp new file mode 100644 index 0000000000000..3b74faeff9c8e --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/syclbin_properties.hpp @@ -0,0 +1,39 @@ +//==------------------------------------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +namespace sycl { +inline namespace _V1 { + +namespace detail { +struct link_props; +} // namespace detail + +namespace ext::oneapi::experimental { + +///////////////////////// +// PropertyT syclex::fast_link +///////////////////////// +struct fast_link + : detail::run_time_property_key { + fast_link(bool DoFastLink = true) : value(DoFastLink) {} + + bool value; +}; +using fast_link_key = fast_link; + +template <> +struct is_property_key_of + : std::true_type {}; +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index d68ec8884d93b..c2b790f5bc3ba 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -228,8 +228,9 @@ enum PropKind : uint32_t { InitialThreshold = 83, MaximumSize = 84, ZeroInit = 85, + FastLink = 86, // PropKindSize must always be the last value. - PropKindSize = 86, + PropKindSize = 87, }; template struct PropertyToKind { diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index ec3708a32cf63..3799ee3c71e20 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -137,6 +137,7 @@ can be disabled by setting SYCL_DISABLE_FSYCL_SYCLHPP_WARNING macro.") #include #include #include +#include #include #include #include diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 0c9222e9fa47b..f1e485409655f 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -61,6 +61,80 @@ inline bool checkAllDevicesHaveAspect(devices_range Devices, aspect Aspect) { [&Aspect](device_impl &Dev) { return Dev.has(Aspect); }); } +// Creates a link graph where the edges represent the relationship between +// imported and exported symbols in the provided images. +// The link graph takes a vector of images and a vector of vectors of integral +// values. The latter is the dependencies of the images in the former argument. +// Each vector of dependencies correspond 1:1 with the images in the device +// images, and the values in each of these vectors correspond to the index of +// each of the images it depends on. +inline LinkGraph +CreateLinkGraph(const std::vector &DevImages) { + // Create a map between exported symbols and their indices in the device + // images collection. + std::map ExportMap; + for (size_t I = 0; I < DevImages.size(); ++I) { + device_image_impl &DevImageImpl = *getSyclObjImpl(DevImages[I]); + if (DevImageImpl.get_bin_image_ref() == nullptr) + continue; + for (const sycl_device_binary_property &ESProp : + DevImageImpl.get_bin_image_ref()->getExportedSymbols()) { + if (ExportMap.find(ESProp->Name) != ExportMap.end()) + throw sycl::exception(make_error_code(errc::invalid), + "Duplicate exported symbol \"" + + std::string{ESProp->Name} + + "\" found in binaries."); + ExportMap.emplace(ESProp->Name, I); + } + } + + // Create dependency mappings. + std::vector> Dependencies; + Dependencies.resize(DevImages.size()); + for (size_t I = 0; I < DevImages.size(); ++I) { + device_image_impl &DevImageImpl = *getSyclObjImpl(DevImages[I]); + if (DevImageImpl.get_bin_image_ref() == nullptr) + continue; + std::set DeviceImageDepsSet; + for (const sycl_device_binary_property &ISProp : + DevImageImpl.get_bin_image_ref()->getImportedSymbols()) { + auto ExportSymbolIt = ExportMap.find(ISProp->Name); + if (ExportSymbolIt == ExportMap.end()) + throw sycl::exception(make_error_code(errc::invalid), + "No exported symbol \"" + + std::string{ISProp->Name} + + "\" found in linked images."); + DeviceImageDepsSet.emplace(ExportSymbolIt->second); + } + Dependencies[I].insert(Dependencies[I].end(), + std::make_move_iterator(DeviceImageDepsSet.begin()), + std::make_move_iterator(DeviceImageDepsSet.end())); + } + return LinkGraph{DevImages, Dependencies}; +} + +inline void +ThrowIfConflictingKernels(const std::vector &DevImages) { + std::set> SeenKernelNames; + std::set> Conflicts; + for (const device_image_plain &DevImage : DevImages) { + const KernelNameSetT &KernelNames = + getSyclObjImpl(DevImage)->getKernelNames(); + std::vector Intersect; + std::set_intersection(SeenKernelNames.begin(), SeenKernelNames.end(), + KernelNames.begin(), KernelNames.end(), + std::inserter(Conflicts, Conflicts.begin())); + SeenKernelNames.insert(KernelNames.begin(), KernelNames.end()); + } + if (Conflicts.empty()) + return; + std::stringstream MsgS; + MsgS << "Conflicting kernel definitions: "; + for (const std::string_view &Conflict : Conflicts) + MsgS << " " << Conflict; + throw sycl::exception(make_error_code(errc::invalid), MsgS.str()); +} + namespace syclex = sycl::ext::oneapi::experimental; class kernel_impl; @@ -203,8 +277,9 @@ class kernel_bundle_impl // Matches sycl::link kernel_bundle_impl( - const std::vector> &ObjectBundles, - devices_range Devs, const property_list &PropList, private_tag) + sycl::span> &ObjectBundles, + devices_range Devs, const property_list &PropList, bool FastLink, + private_tag) : MDevices(Devs.to>()), MState(bundle_state::executable) { if (MDevices.empty()) @@ -263,97 +338,123 @@ class kernel_bundle_impl } } - // Collect all unique images. - std::vector DevImages; - { - std::set DevImagesSet; - std::unordered_set SeenBinImgs; - for (const kernel_bundle &ObjectBundle : - ObjectBundles) - for (device_image_impl &DevImg : - getSyclObjImpl(ObjectBundle)->device_images()) - if (OfflineDeviceImageSet.find(&DevImg) == - OfflineDeviceImageSet.end()) + std::map> DevImageLinkGraphs; + if (FastLink) { + // When doing fast-linking, we insert the suitable AOT binaries from the + // object bundles. This needs to be done per-device, as AOT binaries may + // not be compatible across different architectures. + for (device_impl &Dev : get_devices()) { + std::vector DevImages; + std::set DevImagesSet; + for (const kernel_bundle &ObjectBundle : + ObjectBundles) { + detail::kernel_bundle_impl &ObjectBundleImpl = + *getSyclObjImpl(ObjectBundle); + + // Firstly find all suitable AOT binaries, if the object bundle was + // made from SYCLBIN. + std::vector AOTBinaries = + ObjectBundleImpl.GetSYCLBINAOTBinaries(Dev); + + // The AOT binaries need to be brought into executable state. They + // are considered unique, so they are placed directly into the unique + // images list. + DevImages.reserve(AOTBinaries.size()); + for (const detail::RTDeviceBinaryImage *Image : AOTBinaries) { + device_image_plain &AOTDevImg = + DevImages.emplace_back(device_image_impl::create( + Image, MContext, devices_range{Dev}, + bundle_state::executable, + /*KernelIDs=*/nullptr, Managed{}, + ImageOriginSYCLBIN)); + DevImgPlainWithDeps AOTDevImgWithDeps{AOTDevImg}; + ProgramManager::getInstance().bringSYCLDeviceImageToState( + AOTDevImgWithDeps, bundle_state::executable); + } + + // Record all the AOT exported symbols and kernels. + std::unordered_set AOTExportedSymbols; + std::unordered_set AOTKernelNames; + for (const RTDeviceBinaryImage *AOTBin : AOTBinaries) { + for (const sycl_device_binary_property &ESProp : + AOTBin->getExportedSymbols()) + AOTExportedSymbols.insert(ESProp->Name); + for (const sycl_device_binary_property &KNProp : + AOTBin->getKernelNames()) + AOTKernelNames.insert(KNProp->Name); + } + + for (device_image_impl &DevImg : ObjectBundleImpl.device_images()) { + // If the image is the same as one of the offline images, we can + // skip it. + if (OfflineDeviceImageSet.find(&DevImg) != + OfflineDeviceImageSet.end()) + continue; + + // If any of the exported symbols overlap with an AOT binary, skip + // this image as fast-linking prioritizes AOT binaries. + // This can happen if the same source files have been compiled to + // both a usable AOT and JIT binary. + for (const sycl_device_binary_property &ESProp : + DevImg.get_bin_image_ref()->getExportedSymbols()) + if (AOTExportedSymbols.find(ESProp->Name) != + AOTExportedSymbols.end()) + continue; + + // If any of the kernels overlap with an AOT binary, skip this + // image as fast-linking prioritizes AOT binaries. + // This can happen if the same source files have been compiled to + // both a usable AOT and JIT binary. + for (const sycl_device_binary_property &KNProp : + DevImg.get_bin_image_ref()->getKernelNames()) + if (AOTKernelNames.find(KNProp->Name) != AOTKernelNames.end()) + continue; + DevImagesSet.insert(&DevImg); - DevImages.reserve(DevImagesSet.size()); - for (auto It = DevImagesSet.begin(); It != DevImagesSet.end();) - DevImages.push_back(createSyclObjFromImpl( - *DevImagesSet.extract(It++).value())); - } + } + } - // Check for conflicting kernels in RTC kernel bundles. - { - std::set> SeenKernelNames; - std::set> Conflicts; - for (const device_image_plain &DevImage : DevImages) { - const KernelNameSetT &KernelNames = - getSyclObjImpl(DevImage)->getKernelNames(); - std::vector Intersect; - std::set_intersection(SeenKernelNames.begin(), SeenKernelNames.end(), - KernelNames.begin(), KernelNames.end(), - std::inserter(Conflicts, Conflicts.begin())); - SeenKernelNames.insert(KernelNames.begin(), KernelNames.end()); - } + DevImages.reserve(DevImages.size() + DevImagesSet.size()); + for (auto It = std::make_move_iterator(DevImagesSet.begin()), + End = std::make_move_iterator(DevImagesSet.end()); + It != End; ++It) + DevImages.push_back(createSyclObjFromImpl(**It)); - if (!Conflicts.empty()) { - std::stringstream MsgS; - MsgS << "Conflicting kernel definitions: "; - for (const std::string_view &Conflict : Conflicts) - MsgS << " " << Conflict; - throw sycl::exception(make_error_code(errc::invalid), MsgS.str()); - } - } + // Check for conflicting kernels in RTC kernel bundles. + ThrowIfConflictingKernels(DevImages); - // Create a map between exported symbols and their indices in the device - // images collection. - std::map ExportMap; - for (size_t I = 0; I < DevImages.size(); ++I) { - device_image_impl &DevImageImpl = *getSyclObjImpl(DevImages[I]); - if (DevImageImpl.get_bin_image_ref() == nullptr) - continue; - for (const sycl_device_binary_property &ESProp : - DevImageImpl.get_bin_image_ref()->getExportedSymbols()) { - if (ExportMap.find(ESProp->Name) != ExportMap.end()) - throw sycl::exception(make_error_code(errc::invalid), - "Duplicate exported symbol \"" + - std::string{ESProp->Name} + - "\" found in binaries."); - ExportMap.emplace(ESProp->Name, I); + // Create and insert the corresponding link graph. + DevImageLinkGraphs.emplace(&Dev, CreateLinkGraph(DevImages)); } - } - - // Create dependency mappings. - std::vector> Dependencies; - Dependencies.resize(DevImages.size()); - for (size_t I = 0; I < DevImages.size(); ++I) { - device_image_impl &DevImageImpl = *getSyclObjImpl(DevImages[I]); - if (DevImageImpl.get_bin_image_ref() == nullptr) - continue; - std::set DeviceImageDepsSet; - for (const sycl_device_binary_property &ISProp : - DevImageImpl.get_bin_image_ref()->getImportedSymbols()) { - auto ExportSymbolIt = ExportMap.find(ISProp->Name); - if (ExportSymbolIt == ExportMap.end()) - throw sycl::exception(make_error_code(errc::invalid), - "No exported symbol \"" + - std::string{ISProp->Name} + - "\" found in linked images."); - DeviceImageDepsSet.emplace(ExportSymbolIt->second); + } else { + // Collect all unique images. + std::vector DevImages; + { + std::set DevImagesSet; + for (const kernel_bundle &ObjectBundle : + ObjectBundles) + for (device_image_impl &DevImg : + getSyclObjImpl(ObjectBundle)->device_images()) + if (OfflineDeviceImageSet.find(&DevImg) == + OfflineDeviceImageSet.end()) + DevImagesSet.insert(&DevImg); + DevImages.reserve(DevImagesSet.size()); + for (auto It = DevImagesSet.begin(); It != DevImagesSet.end();) + DevImages.push_back(createSyclObjFromImpl( + *DevImagesSet.extract(It++).value())); } - Dependencies[I].insert(Dependencies[I].end(), DeviceImageDepsSet.begin(), - DeviceImageDepsSet.end()); - } - // Create a link graph and clone it for each device. - device_impl &FirstDevice = get_devices().front(); - std::map> DevImageLinkGraphs; - const auto &FirstGraph = - DevImageLinkGraphs - .emplace(&FirstDevice, - LinkGraph{DevImages, Dependencies}) - .first->second; - for (device_impl &Dev : get_devices()) - DevImageLinkGraphs.emplace(&Dev, FirstGraph.Clone()); + // Check for conflicting kernels in RTC kernel bundles. + ThrowIfConflictingKernels(DevImages); + + // Create a link graph and clone it for each device. + device_impl &FirstDevice = get_devices().front(); + const auto &FirstGraph = + DevImageLinkGraphs.emplace(&FirstDevice, CreateLinkGraph(DevImages)) + .first->second; + for (device_impl &Dev : get_devices()) + DevImageLinkGraphs.emplace(&Dev, FirstGraph.Clone()); + } // Poison the images based on whether the corresponding device supports it. for (auto &GraphIt : DevImageLinkGraphs) { @@ -369,9 +470,49 @@ class kernel_bundle_impl // Link based on the resulting graphs. for (auto &GraphIt : UnifiedGraphs) { + const std::vector &GraphDevs = GraphIt.first; + std::vector GraphImgs = + GraphIt.second.GetNodeValues(); + + sycl::span JITImgs{GraphImgs}; + sycl::span AOTImgs{}; + if (FastLink) { + std::sort( + GraphImgs.begin(), GraphImgs.end(), + [](const device_image_plain &LHS, const device_image_plain &RHS) { + // Sort by state: That leaves objects (JIT) at the beginning and + // executables (AOT) at the end. + return getSyclObjImpl(LHS)->get_state() < + getSyclObjImpl(RHS)->get_state(); + }); + auto AOTImgsBegin = + std::find_if(GraphImgs.begin(), GraphImgs.end(), + [](const device_image_plain &Img) { + return getSyclObjImpl(Img)->get_state() == + bundle_state::executable; + }); + size_t NumJITImgs = std::distance(GraphImgs.begin(), AOTImgsBegin); + JITImgs = sycl::span( + GraphImgs.data(), NumJITImgs); + AOTImgs = sycl::span( + GraphImgs.data() + NumJITImgs, GraphImgs.size() - NumJITImgs); + } + + // If there AOT binaries, the link should allow unresolved symbols. std::vector LinkedResults = detail::ProgramManager::getInstance().link( - GraphIt.second.GetNodeValues(), GraphIt.first, PropList); + JITImgs, GraphDevs, PropList, + /*AllowUnresolvedSymbols=*/!AOTImgs.empty()); + + if (!AOTImgs.empty()) { + // In dynamic linking, AOT binaries count as results as well. + LinkedResults.insert(LinkedResults.end(), AOTImgs.begin(), + AOTImgs.end()); + sycl::span LinkedResultsSpan( + LinkedResults.data(), LinkedResults.size()); + detail::ProgramManager::getInstance().dynamicLink(LinkedResultsSpan); + } + MDeviceImages.insert(MDeviceImages.end(), LinkedResults.begin(), LinkedResults.end()); MUniqueDeviceImages.insert(MUniqueDeviceImages.end(), @@ -390,9 +531,12 @@ class kernel_bundle_impl })) continue; + const std::vector &AllDevImgs = + DeviceImageWithDeps->getAll(); + sycl::span AllDevImgsSpan(AllDevImgs); std::vector LinkedResults = - detail::ProgramManager::getInstance().link( - DeviceImageWithDeps->getAll(), MDevices, PropList); + detail::ProgramManager::getInstance().link(AllDevImgsSpan, MDevices, + PropList); MDeviceImages.insert(MDeviceImages.end(), LinkedResults.begin(), LinkedResults.end()); MUniqueDeviceImages.insert(MUniqueDeviceImages.end(), @@ -1076,6 +1220,23 @@ class kernel_bundle_impl MUniqueDeviceImages.erase(It, MUniqueDeviceImages.end()); } + std::vector + GetSYCLBINAOTBinaries(device_impl &Dev) { + if (MSYCLBINs.size() == 1) + return MSYCLBINs[0]->getNativeBinaryImages(Dev); + + std::vector Result; + for (auto &SYCLBIN : MSYCLBINs) { + std::vector NativeBinImgs = + SYCLBIN->getNativeBinaryImages(Dev); + Result.insert(Result.end(), + std::make_move_iterator(NativeBinImgs.begin()), + std::make_move_iterator(NativeBinImgs.end())); + } + + return Result; + } + context MContext; std::vector MDevices; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 1d39a89a4dd45..278b2395839ad 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2919,7 +2919,7 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, // Returns a merged device binary image, new set of kernel IDs and new // specialization constant data. static const RTDeviceBinaryImage * -mergeImageData(const std::vector &Imgs, +mergeImageData(sycl::span Imgs, std::vector &KernelIDs, std::vector &NewSpecConstBlob, device_image_impl::SpecConstMapT &NewSpecConstMap, @@ -2980,8 +2980,9 @@ mergeImageData(const std::vector &Imgs, } std::vector -ProgramManager::link(const std::vector &Imgs, - devices_range Devs, const property_list &PropList) { +ProgramManager::link(sycl::span Imgs, + devices_range Devs, const property_list &PropList, + bool AllowUnresolvedSymbols) { { auto NoAllowedPropertiesCheck = [](int) { return false; }; detail::PropertyValidator::checkPropsAndThrow( @@ -3008,11 +3009,15 @@ ProgramManager::link(const std::vector &Imgs, context_impl &ContextImpl = *getSyclObjImpl(Context); adapter_impl &Adapter = ContextImpl.getAdapter(); + ur_exp_program_flags_t UrLinkFlags{}; + if (AllowUnresolvedSymbols) + UrLinkFlags &= UR_EXP_PROGRAM_FLAG_ALLOW_UNRESOLVED_SYMBOLS; + Managed LinkedProg{Adapter}; auto doLink = [&] { auto Res = Adapter.call_nocheck( ContextImpl.getHandleRef(), URDevices.size(), URDevices.data(), - ur_exp_program_flags_t{}, URPrograms.size(), URPrograms.data(), + UrLinkFlags, URPrograms.size(), URPrograms.data(), LinkOptionsStr.c_str(), &LinkedProg); if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { Res = Adapter.call_nocheck( @@ -3095,6 +3100,22 @@ ProgramManager::link(const std::vector &Imgs, std::move(MergedImageStorage)))}; } +void ProgramManager::dynamicLink(sycl::span Imgs) { + if (Imgs.empty()) + return; + + std::vector URPrograms; + URPrograms.reserve(Imgs.size()); + for (const device_image_plain &Img : Imgs) + URPrograms.push_back(getSyclObjImpl(Img)->get_ur_program()); + + device_image_impl &FirstImgImpl = *getSyclObjImpl(Imgs[0]); + auto [URCtx, Adapter] = + get_ur_handles(*getSyclObjImpl(FirstImgImpl.get_context())); + Adapter->call(URCtx, URPrograms.size(), + URPrograms.data()); +} + // The function duplicates most of the code from existing getBuiltPIProgram. // The differences are: // Different API - uses different objects to extract required info diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 73eca2cd86e0a..3d2f5a8974349 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -356,8 +357,11 @@ class ProgramManager { // Produces set of device images by convering input device images to object // the executable state std::vector - link(const std::vector &Imgs, devices_range Devs, - const property_list &PropList); + link(sycl::span Imgs, devices_range Devs, + const property_list &PropList, bool AllowUnresolvedSymbols = false); + + // Dynamically links images in executable state. + void dynamicLink(sycl::span Imgs); // Produces new device image by converting input device image to the // executable state diff --git a/sycl/source/detail/syclbin.cpp b/sycl/source/detail/syclbin.cpp index b95eb4c70e55e..55b9d150f92f7 100644 --- a/sycl/source/detail/syclbin.cpp +++ b/sycl/source/detail/syclbin.cpp @@ -453,6 +453,25 @@ SYCLBINBinaries::getBestCompatibleImages(devices_range Devs, return {Images.cbegin(), Images.cend()}; } +std::vector +SYCLBINBinaries::getNativeBinaryImages(device_impl &Dev) { + std::vector Images; + for (size_t I = 0; I < getNumAbstractModules(); ++I) { + const AbstractModuleDesc &AMDesc = AbstractModuleDescriptors[I]; + // If the target state is executable, try with native images first. + + const RTDeviceBinaryImage *CompatImagePtr = std::find_if( + AMDesc.NativeBinaries, AMDesc.NativeBinaries + AMDesc.NumNativeBinaries, + [&](const RTDeviceBinaryImage &Img) { + return doesDevSupportDeviceRequirements(Dev, Img) && + doesImageTargetMatchDevice(Img, Dev); + }); + if (CompatImagePtr != AMDesc.NativeBinaries + AMDesc.NumNativeBinaries) + Images.push_back(CompatImagePtr); + } + return Images; +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/syclbin.hpp b/sycl/source/detail/syclbin.hpp index 02b9ec8348c99..c9e20bbc4458a 100644 --- a/sycl/source/detail/syclbin.hpp +++ b/sycl/source/detail/syclbin.hpp @@ -128,6 +128,9 @@ struct SYCLBINBinaries { std::vector getBestCompatibleImages(devices_range Dev, bundle_state State); + std::vector + getNativeBinaryImages(device_impl &Dev); + uint8_t getState() const { PropertySet &GlobalMetadata = (*ParsedSYCLBIN diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 1d844dd9517d6..14d19ddacaa6b 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -311,7 +312,20 @@ compile_impl(const kernel_bundle &InputBundle, std::shared_ptr link_impl(const std::vector> &ObjectBundles, const std::vector &Devs, const property_list &PropList) { - return detail::kernel_bundle_impl::create(ObjectBundles, Devs, PropList); + sycl::span> ObjectBundlesView( + ObjectBundles.data(), ObjectBundles.size()); + return detail::kernel_bundle_impl::create(ObjectBundlesView, Devs, PropList, + /*FastLink=*/false); +} + +std::shared_ptr +link_impl(const kernel_bundle *ObjectBundles, + size_t NumObjectBundles, const std::vector &Devs, + bool FastLink) { + sycl::span> ObjectBundlesView( + ObjectBundles, NumObjectBundles); + return detail::kernel_bundle_impl::create(ObjectBundlesView, Devs, + property_list{}, FastLink); } std::shared_ptr diff --git a/sycl/test-e2e/SYCLBIN/Inputs/link.hpp b/sycl/test-e2e/SYCLBIN/Inputs/link.hpp index ce14379c4e19a..729171b43df25 100644 --- a/sycl/test-e2e/SYCLBIN/Inputs/link.hpp +++ b/sycl/test-e2e/SYCLBIN/Inputs/link.hpp @@ -4,6 +4,12 @@ namespace syclex = sycl::ext::oneapi::experimental; +#ifdef SYCLBIN_USE_FAST_LINK +static constexpr bool USE_FAST_LINK = true; +#else +static constexpr bool USE_FAST_LINK = false; +#endif + static constexpr size_t NUM = 10; int main(int argc, char *argv[]) { @@ -34,7 +40,8 @@ int main(int argc, char *argv[]) { #endif // Link the bundles. - auto KBExe = sycl::link({KBObj1, KBObj2}); + auto KBExe = syclexp::link( + {KBObj1, KBObj2}, syclexp::properties{syclexp::fast_link{USE_FAST_LINK}}); // TestKernel1 does not have any requirements, so should be there always. assert(KBExe.ext_oneapi_has_kernel("TestKernel1")); diff --git a/sycl/test-e2e/SYCLBIN/link_object_fast_link.cpp b/sycl/test-e2e/SYCLBIN/link_object_fast_link.cpp new file mode 100644 index 0000000000000..4b63fe930a95e --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/link_object_fast_link.cpp @@ -0,0 +1,21 @@ +// REQUIRES: aspect-usm_shared_allocations + +// -- Test for linking two SYCLBIN kernel_bundle. + +// REQUIRES: target-spir + +// RUN: %clangxx --offload-new-driver -fsycl-rdc -fsyclbin=object %{syclbin_exec_opts} -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export_w_aot.syclbin +// RUN: %clangxx --offload-new-driver -fsycl-rdc -fsyclbin=object %{syclbin_exec_opts} -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import_w_aot.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/exporting_function.cpp -o %t.export.syclbin +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies %S/Inputs/importing_kernel.cpp -o %t.import.syclbin +// RUN: %{build} -o %t.out + +// RUN: %{run} %t.out %t.export.syclbin %t.import.syclbin +// RUN: %{run} %t.out %t.export_w_aot.syclbin %t.import_w_aot.syclbin +// RUN: %{run} %t.out %t.export.syclbin %t.import_w_aot.syclbin +// RUN: %{run} %t.out %t.export_w_aot.syclbin %t.import.syclbin + +#define SYCLBIN_OBJECT_STATE +#define SYCLBIN_USE_FAST_LINK + +#include "Inputs/link.hpp" diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index e42759431d374..461b6d2289944 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3385,6 +3385,7 @@ _ZN4sycl3_V16detail6OSUtil12getOSMemSizeEv _ZN4sycl3_V16detail6OSUtil16getCurrentDSODirB5cxx11Ev _ZN4sycl3_V16detail6OSUtil7makeDirEPKc _ZN4sycl3_V16detail9join_implERKSt6vectorISt10shared_ptrINS1_18kernel_bundle_implEESaIS5_EENS0_12bundle_stateE +_ZN4sycl3_V16detail9link_implEPKNS0_13kernel_bundleILNS0_12bundle_stateE1EEEmRKSt6vectorINS0_6deviceESaIS8_EEb _ZN4sycl3_V16detail9link_implERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EERKS2_INS0_6deviceESaISA_EERKNS0_13property_listE _ZN4sycl3_V16detail9modf_implENS1_9half_impl4halfEPS3_ _ZN4sycl3_V16detail9modf_implEdPd diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 1db98f1b5cf9d..65d43051e566e 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4293,6 +4293,7 @@ ?lgamma_r_impl@detail@_V1@sycl@@YAMMPEAH@Z ?lgamma_r_impl@detail@_V1@sycl@@YANNPEAH@Z ?link_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$vector@V?$kernel_bundle@$00@_V1@sycl@@V?$allocator@V?$kernel_bundle@$00@_V1@sycl@@@std@@@5@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z +?link_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@PEBV?$kernel_bundle@$00@23@_KAEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@_N@Z ?makeDir@OSUtil@detail@_V1@sycl@@SAHPEBD@Z ?make_context@detail@_V1@sycl@@YA?AVcontext@23@_KAEBV?$function@$$A6AXVexception_list@_V1@sycl@@@Z@std@@W4backend@23@_NAEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@6@@Z ?make_device@detail@_V1@sycl@@YA?AVdevice@23@_KW4backend@23@@Z