Merge branch 'refactor-mesh-position-generic' into refactor-mesh-corners-generic

2023-01-04 20:27:39 -05:00 · 2023-01-04 20:27:39 -05:00 · bb83e1ef48
parent 06265a6a06 3100fc0f2f
commit bb83e1ef48
269 changed files with 3666 additions and 2508 deletions
--- a/.clang-format
+++ b/.clang-format
@ -61,17 +61,17 @@ ContinuationIndentWidth: 4
 # This tries to match Blender's style as much as possible. One
 BreakBeforeBraces: Custom
 BraceWrapping: {
-    AfterClass: 'false'
-    AfterControlStatement: 'false'
-    AfterEnum : 'false'
-    AfterFunction : 'true'
-    AfterNamespace : 'false'
-    AfterStruct : 'false'
-    AfterUnion : 'false'
-    BeforeCatch : 'true'
-    BeforeElse : 'true'
-    IndentBraces : 'false'
-    AfterObjCDeclaration: 'true'
+    AfterClass: 'false',
+    AfterControlStatement: 'false',
+    AfterEnum : 'false',
+    AfterFunction : 'true',
+    AfterNamespace : 'false',
+    AfterStruct : 'false',
+    AfterUnion : 'false',
+    BeforeCatch : 'true',
+    BeforeElse : 'true',
+    IndentBraces : 'false',
+    AfterObjCDeclaration: 'true',
 }

 # For switch statements, indent the cases.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -501,12 +501,14 @@ endif()
 if(NOT APPLE)
  option(WITH_CYCLES_DEVICE_ONEAPI "Enable Cycles oneAPI compute support" OFF)
  option(WITH_CYCLES_ONEAPI_BINARIES "Enable Ahead-Of-Time compilation for Cycles oneAPI device" OFF)
+  option(WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION "Switch target of oneAPI implementation from SYCL devices to Host Task (single thread on CPU). This option is only for debugging purposes." OFF)

  # https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-cpp-compiler-dev-guide-and-reference/top/compilation/ahead-of-time-compilation.html
  # acm-g10 is the target for the first Intel Arc Alchemist GPUs.
  set(CYCLES_ONEAPI_SPIR64_GEN_DEVICES "acm-g10" CACHE STRING "oneAPI Intel GPU architectures to build binaries for")
  set(CYCLES_ONEAPI_SYCL_TARGETS spir64 spir64_gen CACHE STRING "oneAPI targets to build AOT binaries for")

+  mark_as_advanced(WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
  mark_as_advanced(CYCLES_ONEAPI_SPIR64_GEN_DEVICES)
  mark_as_advanced(CYCLES_ONEAPI_SYCL_TARGETS)
 endif()
@ -830,27 +832,17 @@ endif()
 # enable boost for cycles, audaspace or i18n
 # otherwise if the user disabled

-set_and_warn_dependency(WITH_BOOST WITH_CYCLES         OFF)
 set_and_warn_dependency(WITH_BOOST WITH_INTERNATIONAL  OFF)
 set_and_warn_dependency(WITH_BOOST WITH_OPENVDB        OFF)
-set_and_warn_dependency(WITH_BOOST WITH_OPENCOLORIO    OFF)
 set_and_warn_dependency(WITH_BOOST WITH_QUADRIFLOW     OFF)
 set_and_warn_dependency(WITH_BOOST WITH_USD            OFF)
-set_and_warn_dependency(WITH_BOOST WITH_ALEMBIC        OFF)
 if(WITH_CYCLES)
+  set_and_warn_dependency(WITH_BOOST   WITH_CYCLES_OSL   OFF)
  set_and_warn_dependency(WITH_PUGIXML WITH_CYCLES_OSL   OFF)
 endif()
-set_and_warn_dependency(WITH_PUGIXML WITH_OPENIMAGEIO  OFF)
-
-if(WITH_BOOST AND NOT (WITH_CYCLES OR WITH_OPENIMAGEIO OR WITH_INTERNATIONAL OR
-  WITH_OPENVDB OR WITH_OPENCOLORIO OR WITH_USD OR WITH_ALEMBIC))
-  message(STATUS "No dependencies need 'WITH_BOOST' forcing WITH_BOOST=OFF")
-  set(WITH_BOOST OFF)
-endif()

 set_and_warn_dependency(WITH_TBB WITH_CYCLES            OFF)
 set_and_warn_dependency(WITH_TBB WITH_USD               OFF)
-set_and_warn_dependency(WITH_TBB WITH_OPENIMAGEDENOISE  OFF)
 set_and_warn_dependency(WITH_TBB WITH_OPENVDB           OFF)
 set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID         OFF)

@ -859,14 +851,10 @@ set_and_warn_dependency(WITH_OPENVDB WITH_NANOVDB       OFF)

 # OpenVDB and OpenColorIO uses 'half' type from OpenEXR
 set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_OPENVDB OFF)
-set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_OPENCOLORIO  OFF)

 # Haru needs `TIFFFaxBlackCodes` & `TIFFFaxWhiteCodes` symbols from TIFF.
 set_and_warn_dependency(WITH_IMAGE_TIFF WITH_HARU       OFF)

-# USD needs OpenSubDiv, since that is used by the Cycles Hydra render delegate.
-set_and_warn_dependency(WITH_OPENSUBDIV WITH_USD        OFF)
-
 # auto enable openimageio for cycles
 if(WITH_CYCLES)
  set(WITH_OPENIMAGEIO ON)
@ -880,17 +868,6 @@ else()
  set(WITH_CYCLES_OSL OFF)
 endif()

-# auto enable openimageio linking dependencies
-if(WITH_OPENIMAGEIO)
-  set(WITH_IMAGE_OPENEXR ON)
-  set(WITH_IMAGE_TIFF ON)
-endif()
-
-# auto enable alembic linking dependencies
-if(WITH_ALEMBIC)
-  set(WITH_IMAGE_OPENEXR ON)
-endif()
-
 # don't store paths to libs for portable distribution
 if(WITH_INSTALL_PORTABLE)
  set(CMAKE_SKIP_BUILD_RPATH TRUE)
@ -1093,14 +1070,6 @@ if(WITH_CYCLES)
      "Configure OIIO or disable WITH_CYCLES"
    )
  endif()
-  if(NOT WITH_BOOST)
-    message(
-      FATAL_ERROR
-      "Cycles requires WITH_BOOST, the library may not have been found. "
-      "Configure BOOST or disable WITH_CYCLES"
-    )
-  endif()
-
  if(WITH_CYCLES_OSL)
    if(NOT WITH_LLVM)
      message(
@ -2007,24 +1976,6 @@ if(0)
  print_all_vars()
 endif()

-set(LIBDIR_STALE)
-
-if(UNIX AND NOT APPLE)
-  # Only search for the path if it's found on the system.
-  if(EXISTS "../lib/linux_centos7_x86_64")
-    set(LIBDIR_STALE "/lib/linux_centos7_x86_64/")
-  endif()
-endif()
-
-if(LIBDIR_STALE)
-  print_cached_vars_containing_value(
-    "${LIBDIR_STALE}"
-    "\nWARNING: found cached references to old library paths!\n"
-    "\nIt is *strongly* recommended to reference updated library paths!\n"
-  )
-endif()
-unset(LIBDIR_STALE)
-
 # Should be the last step of configuration.
 if(POSTCONFIGURE_SCRIPT)
  include(${POSTCONFIGURE_SCRIPT})
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@ -1209,43 +1209,6 @@ function(print_all_vars)
  endforeach()
 endfunction()

-# Print a list of all cached variables with values containing `contents`.
-function(print_cached_vars_containing_value
-  contents
-  msg_header
-  msg_footer
-  )
-  set(_list_info)
-  set(_found)
-  get_cmake_property(_vars VARIABLES)
-  foreach(_var ${_vars})
-    if(DEFINED CACHE{${_var}})
-      # Skip "_" prefixed variables, these are used for internal book-keeping,
-      # not under user control.
-      string(FIND "${_var}" "_" _found)
-      if(NOT (_found EQUAL 0))
-        string(FIND "${${_var}}" "${contents}" _found)
-        if(NOT (_found EQUAL -1))
-          if(_found)
-            list(APPEND _list_info "${_var}=${${_var}}")
-          endif()
-        endif()
-      endif()
-    endif()
-  endforeach()
-  unset(_var)
-  unset(_vars)
-  unset(_found)
-  if(_list_info)
-    message(${msg_header})
-    foreach(_var ${_list_info})
-      message(" * ${_var}")
-    endforeach()
-    message(${msg_footer})
-  endif()
-  unset(_list_info)
-endfunction()
-
 macro(openmp_delayload
  projectname
  )
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@ -86,16 +86,14 @@ endif()

 if(WITH_USD)
  find_package(USD REQUIRED)
-  add_bundled_libraries(usd/lib)
 endif()
+add_bundled_libraries(usd/lib)

 if(WITH_MATERIALX)
  find_package(MaterialX)
  set_and_warn_library_found("MaterialX" MaterialX_FOUND WITH_MATERIALX)
-  if(WITH_MATERIALX)
-    add_bundled_libraries(materialx/lib)
-  endif()
 endif()
+add_bundled_libraries(materialx/lib)

 if(WITH_VULKAN_BACKEND)
  find_package(MoltenVK REQUIRED)
@ -117,8 +115,8 @@ endif()

 if(WITH_OPENSUBDIV)
  find_package(OpenSubdiv)
-  add_bundled_libraries(opensubdiv/lib)
 endif()
+add_bundled_libraries(opensubdiv/lib)

 if(WITH_CODEC_SNDFILE)
  find_package(SndFile)
@ -156,9 +154,9 @@ list(APPEND FREETYPE_LIBRARIES

 if(WITH_IMAGE_OPENEXR)
  find_package(OpenEXR)
-  add_bundled_libraries(openexr/lib)
-  add_bundled_libraries(imath/lib)
 endif()
+add_bundled_libraries(openexr/lib)
+add_bundled_libraries(imath/lib)

 if(WITH_CODEC_FFMPEG)
  set(FFMPEG_ROOT_DIR ${LIBDIR}/ffmpeg)
@ -270,12 +268,11 @@ if(WITH_BOOST)
  set(BOOST_INCLUDE_DIR ${Boost_INCLUDE_DIRS})
  set(BOOST_DEFINITIONS)

-  add_bundled_libraries(boost/lib)
-
  mark_as_advanced(Boost_LIBRARIES)
  mark_as_advanced(Boost_INCLUDE_DIRS)
  unset(_boost_FIND_COMPONENTS)
 endif()
+add_bundled_libraries(boost/lib)

 if(WITH_INTERNATIONAL OR WITH_CODEC_FFMPEG)
  string(APPEND PLATFORM_LINKFLAGS " -liconv") # boost_locale and ffmpeg needs it !
@ -297,13 +294,13 @@ if(WITH_OPENIMAGEIO)
  )
  set(OPENIMAGEIO_DEFINITIONS "-DOIIO_STATIC_BUILD")
  set(OPENIMAGEIO_IDIFF "${LIBDIR}/openimageio/bin/idiff")
-  add_bundled_libraries(openimageio/lib)
 endif()
+add_bundled_libraries(openimageio/lib)

 if(WITH_OPENCOLORIO)
  find_package(OpenColorIO 2.0.0 REQUIRED)
-  add_bundled_libraries(opencolorio/lib)
 endif()
+add_bundled_libraries(opencolorio/lib)

 if(WITH_OPENVDB)
  find_package(OpenVDB)
@ -314,8 +311,8 @@ if(WITH_OPENVDB)
    unset(BLOSC_LIBRARIES CACHE)
  endif()
  set(OPENVDB_DEFINITIONS)
-  add_bundled_libraries(openvdb/lib)
 endif()
+add_bundled_libraries(openvdb/lib)

 if(WITH_NANOVDB)
  find_package(NanoVDB)
@ -363,8 +360,8 @@ endif()

 if(WITH_TBB)
  find_package(TBB REQUIRED)
-  add_bundled_libraries(tbb/lib)
 endif()
+add_bundled_libraries(tbb/lib)

 if(WITH_POTRACE)
  find_package(Potrace REQUIRED)
@ -382,9 +379,9 @@ if(WITH_OPENMP)
    set(OpenMP_LIBRARY_DIR "${LIBDIR}/openmp/lib/")
    set(OpenMP_LINKER_FLAGS "-L'${OpenMP_LIBRARY_DIR}' -lomp")
    set(OpenMP_LIBRARY "${OpenMP_LIBRARY_DIR}/libomp.dylib")
-    add_bundled_libraries(openmp/lib)
  endif()
 endif()
+add_bundled_libraries(openmp/lib)

 if(WITH_XR_OPENXR)
  find_package(XR_OpenXR_SDK REQUIRED)
--- a/build_files/cmake/platform/platform_old_libs_update.cmake
+++ b/build_files/cmake/platform/platform_old_libs_update.cmake
@ -3,6 +3,7 @@

 # Auto update existing CMake caches for new libraries

+# Clear cached variables whose name matches `pattern`.
 function(unset_cache_variables pattern)
  get_cmake_property(_cache_variables CACHE_VARIABLES)
  foreach(_cache_variable ${_cache_variables})
@ -12,6 +13,30 @@ function(unset_cache_variables pattern)
  endforeach()
 endfunction()

+# Clear cached variables with values containing `contents`.
+function(unset_cached_varables_containting contents msg)
+  get_cmake_property(_cache_variables CACHE_VARIABLES)
+  set(_found)
+  set(_print_msg)
+  foreach(_cache_variable ${_cache_variables})
+    # Skip "_" prefixed variables, these are used for internal book-keeping,
+    # not under user control.
+    string(FIND "${_cache_variable}" "_" _found)
+    if(NOT (_found EQUAL 0))
+      string(FIND "${${_cache_variable}}" "${contents}" _found)
+      if(NOT (_found EQUAL -1))
+        if(_found)
+          unset(${_cache_variable} CACHE)
+          set(_print_msg ON)
+        endif()
+      endif()
+    endif()
+  endforeach()
+  if(_print_msg)
+    message(STATUS ${msg})
+  endif()
+endfunction()
+
 # Detect update from 3.1 to 3.2 libs.
 if(UNIX AND
   DEFINED OPENEXR_VERSION AND
@ -63,3 +88,13 @@ if(UNIX AND
  unset_cache_variables("^TBB")
  unset_cache_variables("^USD")
 endif()
+
+if(UNIX AND (NOT APPLE) AND LIBDIR AND (EXISTS ${LIBDIR}))
+  # Only search for the path if it's found on the system.
+  set(_libdir_stale "/lib/linux_centos7_x86_64/")
+  unset_cached_varables_containting(
+    "${_libdir_stale}"
+    "Auto clearing old ${_libdir_stale} paths from CMake configuration"
+  )
+  unset(_libdir_stale)
+endif()
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@ -166,11 +166,9 @@ endif()
 if(WITH_IMAGE_OPENEXR)
  find_package_wrapper(OpenEXR)  # our own module
  set_and_warn_library_found("OpenEXR" OPENEXR_FOUND WITH_IMAGE_OPENEXR)
-  if(WITH_IMAGE_OPENEXR)
-    add_bundled_libraries(openexr/lib)
-    add_bundled_libraries(imath/lib)
-  endif()
 endif()
+add_bundled_libraries(openexr/lib)
+add_bundled_libraries(imath/lib)

 if(WITH_IMAGE_OPENJPEG)
  find_package_wrapper(OpenJPEG)
@ -328,11 +326,8 @@ endif()
 if(WITH_OPENVDB)
  find_package(OpenVDB)
  set_and_warn_library_found("OpenVDB" OPENVDB_FOUND WITH_OPENVDB)
-
-  if(WITH_OPENVDB)
-    add_bundled_libraries(openvdb/lib)
-  endif()
 endif()
+add_bundled_libraries(openvdb/lib)

 if(WITH_NANOVDB)
  find_package_wrapper(NanoVDB)
@ -351,18 +346,14 @@ endif()
 if(WITH_USD)
  find_package_wrapper(USD)
  set_and_warn_library_found("USD" USD_FOUND WITH_USD)
-  if(WITH_USD)
-    add_bundled_libraries(usd/lib)
-  endif()
 endif()
+add_bundled_libraries(usd/lib)

 if(WITH_MATERIALX)
  find_package_wrapper(MaterialX)
  set_and_warn_library_found("MaterialX" MaterialX_FOUND WITH_MATERIALX)
-  if(WITH_MATERIALX)
-    add_bundled_libraries(materialx/lib)
-  endif()
 endif()
+add_bundled_libraries(materialx/lib)

 if(WITH_BOOST)
  # uses in build instructions to override include and library variables
@ -418,9 +409,8 @@ if(WITH_BOOST)
    find_package(IcuLinux)
    list(APPEND BOOST_LIBRARIES ${ICU_LIBRARIES})
  endif()
-
-  add_bundled_libraries(boost/lib)
 endif()
+add_bundled_libraries(boost/lib)

 if(WITH_PUGIXML)
  find_package_wrapper(PugiXML)
@ -455,21 +445,16 @@ if(WITH_OPENIMAGEIO)
  endif()

  set_and_warn_library_found("OPENIMAGEIO" OPENIMAGEIO_FOUND WITH_OPENIMAGEIO)
-  if(WITH_OPENIMAGEIO)
-    add_bundled_libraries(openimageio/lib)
-  endif()
 endif()
+add_bundled_libraries(openimageio/lib)

 if(WITH_OPENCOLORIO)
  find_package_wrapper(OpenColorIO 2.0.0)

  set(OPENCOLORIO_DEFINITIONS)
  set_and_warn_library_found("OpenColorIO" OPENCOLORIO_FOUND WITH_OPENCOLORIO)
-
-  if(WITH_OPENCOLORIO)
-    add_bundled_libraries(opencolorio/lib)
-  endif()
 endif()
+add_bundled_libraries(opencolorio/lib)

 if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
  find_package(Embree 3.8.0 REQUIRED)
@ -510,18 +495,14 @@ if(WITH_OPENSUBDIV)
  set(OPENSUBDIV_LIBPATH)  # TODO, remove and reference the absolute path everywhere

  set_and_warn_library_found("OpenSubdiv" OPENSUBDIV_FOUND WITH_OPENSUBDIV)
-  if(WITH_OPENSUBDIV)
-    add_bundled_libraries(opensubdiv/lib)
-  endif()
 endif()
+add_bundled_libraries(opensubdiv/lib)

 if(WITH_TBB)
  find_package_wrapper(TBB)
  set_and_warn_library_found("TBB" TBB_FOUND WITH_TBB)
-  if(WITH_TBB)
-    add_bundled_libraries(tbb/lib)
-  endif()
 endif()
+add_bundled_libraries(tbb/lib)

 if(WITH_XR_OPENXR)
  find_package(XR_OpenXR_SDK)
@ -1013,18 +994,6 @@ endfunction()

 configure_atomic_lib_if_needed()

-# Handle library inter-dependencies.
-# FIXME: find a better place to handle inter-library dependencies.
-# This is done near the end of the file to ensure bundled libraries are not added multiple times.
-if(WITH_USD)
-  if(NOT WITH_OPENIMAGEIO)
-    add_bundled_libraries(openimageio/lib)
-  endif()
-  if(NOT WITH_OPENVDB)
-    add_bundled_libraries(openvdb/lib)
-  endif()
-endif()
-
 if(PLATFORM_BUNDLED_LIBRARIES)
  # For the installed Python module and installed Blender executable, we set the
  # rpath to the relative path where the install step will copy the shared libraries.
--- a/intern/cycles/blender/display_driver.cpp
+++ b/intern/cycles/blender/display_driver.cpp
@ -1,12 +1,6 @@
 /* SPDX-License-Identifier: Apache-2.0
 * Copyright 2021-2022 Blender Foundation */

-#include "blender/display_driver.h"
-
-#include "device/device.h"
-#include "util/log.h"
-#include "util/math.h"
-
 #include "GPU_context.h"
 #include "GPU_immediate.h"
 #include "GPU_shader.h"
@ -15,6 +9,12 @@

 #include "RE_engine.h"

+#include "blender/display_driver.h"
+
+#include "device/device.h"
+#include "util/log.h"
+#include "util/math.h"
+
 CCL_NAMESPACE_BEGIN

 /* --------------------------------------------------------------------
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@ -163,6 +163,9 @@ if(WITH_CYCLES_DEVICE_METAL)
 endif()

 if(WITH_CYCLES_DEVICE_ONEAPI)
+  if(WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
+    add_definitions(-DWITH_ONEAPI_SYCL_HOST_TASK)
+  endif()
  if(WITH_CYCLES_ONEAPI_BINARIES)
    set(cycles_kernel_oneapi_lib_suffix "_aot")
  else()
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@ -167,6 +167,17 @@ class Device {
    return true;
  }

+  /* Request cancellation of any long-running work. */
+  virtual void cancel()
+  {
+  }
+
+  /* Return true if device is ready for rendering, or report status if not. */
+  virtual bool is_ready(string & /*status*/) const
+  {
+    return true;
+  }
+
  /* GPU device only functions.
   * These may not be used on CPU or multi-devices. */

--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@ -76,7 +76,20 @@ class MetalDevice : public Device {

  bool use_metalrt = false;
  MetalPipelineType kernel_specialization_level = PSO_GENERIC;
-  std::atomic_bool async_compile_and_load = false;
+
+  int device_id = 0;
+
+  static thread_mutex existing_devices_mutex;
+  static std::map<int, MetalDevice *> active_device_ids;
+
+  static bool is_device_cancelled(int device_id);
+
+  static MetalDevice *get_device_by_ID(int device_idID,
+                                       thread_scoped_lock &existing_devices_mutex_lock);
+
+  virtual bool is_ready(string &status) const override;
+
+  virtual void cancel() override;

  virtual BVHLayoutMask get_bvh_layout_mask() const override;

@ -92,14 +105,12 @@ class MetalDevice : public Device {

  bool use_adaptive_compilation();

+  bool make_source_and_check_if_compile_needed(MetalPipelineType pso_type);
+
  void make_source(MetalPipelineType pso_type, const uint kernel_features);

  virtual bool load_kernels(const uint kernel_features) override;

-  void reserve_local_memory(const uint kernel_features);
-
-  void init_host_memory();
-
  void load_texture_info();

  void erase_allocation(device_memory &mem);
@ -112,7 +123,7 @@ class MetalDevice : public Device {

  virtual void optimize_for_scene(Scene *scene) override;

-  bool compile_and_load(MetalPipelineType pso_type);
+  static void compile_and_load(int device_id, MetalPipelineType pso_type);

  /* ------------------------------------------------------------------ */
  /* low-level memory management */
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@ -13,10 +13,32 @@
 #  include "util/path.h"
 #  include "util/time.h"

+#  include <crt_externs.h>
+
 CCL_NAMESPACE_BEGIN

 class MetalDevice;

+thread_mutex MetalDevice::existing_devices_mutex;
+std::map<int, MetalDevice *> MetalDevice::active_device_ids;
+
+/* Thread-safe device access for async work. Calling code must pass an appropriatelty scoped lock
+ * to existing_devices_mutex to safeguard against destruction of the returned instance. */
+MetalDevice *MetalDevice::get_device_by_ID(int ID, thread_scoped_lock &existing_devices_mutex_lock)
+{
+  auto it = active_device_ids.find(ID);
+  if (it != active_device_ids.end()) {
+    return it->second;
+  }
+  return nullptr;
+}
+
+bool MetalDevice::is_device_cancelled(int ID)
+{
+  thread_scoped_lock lock(existing_devices_mutex);
+  return get_device_by_ID(ID, lock) == nullptr;
+}
+
 BVHLayoutMask MetalDevice::get_bvh_layout_mask() const
 {
  return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
@ -40,6 +62,15 @@ void MetalDevice::set_error(const string &error)
 MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
    : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
 {
+  {
+    /* Assign an ID for this device which we can use to query whether async shader compilation
+     * requests are still relevant. */
+    thread_scoped_lock lock(existing_devices_mutex);
+    static int existing_devices_counter = 1;
+    device_id = existing_devices_counter++;
+    active_device_ids[device_id] = this;
+  }
+
  mtlDevId = info.num;

  /* select chosen device */
@ -57,7 +88,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
  if (@available(macos 11.0, *)) {
    if ([mtlDevice hasUnifiedMemory]) {
      default_storage_mode = MTLResourceStorageModeShared;
-      init_host_memory();
    }
  }

@ -181,6 +211,13 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile

 MetalDevice::~MetalDevice()
 {
+  /* Cancel any async shader compilations that are in flight. */
+  cancel();
+
+  /* This lock safeguards against destruction during use (see other uses of
+   * existing_devices_mutex). */
+  thread_scoped_lock lock(existing_devices_mutex);
+
  for (auto &tex : texture_slot_map) {
    if (tex) {
      [tex release];
@ -326,21 +363,66 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
   * active, but may still need to be rendered without motion blur if that isn't active as well. */
  motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;

-  bool result = compile_and_load(PSO_GENERIC);
+  /* Only request generic kernels if they aren't cached in memory. */
+  if (make_source_and_check_if_compile_needed(PSO_GENERIC)) {
+    /* If needed, load them asynchronously in order to responsively message progess to the user. */
+    int this_device_id = this->device_id;
+    auto compile_kernels_fn = ^() {
+      compile_and_load(this_device_id, PSO_GENERIC);
+    };

-  reserve_local_memory(kernel_features);
-  return result;
+    dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
+                   compile_kernels_fn);
+  }
+
+  return true;
 }

-bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
+bool MetalDevice::make_source_and_check_if_compile_needed(MetalPipelineType pso_type)
 {
-  make_source(pso_type, kernel_features);
-
-  if (!MetalDeviceKernels::should_load_kernels(this, pso_type)) {
-    /* We already have a full set of matching pipelines which are cached or queued. */
-    metal_printf("%s kernels already requested\n", kernel_type_as_string(pso_type));
-    return true;
+  if (this->source[pso_type].empty()) {
+    make_source(pso_type, kernel_features);
  }
+  return MetalDeviceKernels::should_load_kernels(this, pso_type);
+}
+
+void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
+{
+  /* Thread-safe front-end compilation. Typically the MSL->AIR compilation can take a few seconds,
+   * so we avoid blocking device teardown if the user cancels a render immediately.
+   */
+
+  id<MTLDevice> mtlDevice;
+  string source;
+  MetalGPUVendor device_vendor;
+
+  /* Safely gather any state required for the MSL->AIR compilation. */
+  {
+    thread_scoped_lock lock(existing_devices_mutex);
+
+    /* Check whether the device still exists. */
+    MetalDevice *instance = get_device_by_ID(device_id, lock);
+    if (!instance) {
+      metal_printf("Ignoring %s compilation request - device no longer exists\n",
+                   kernel_type_as_string(pso_type));
+      return;
+    }
+
+    if (!instance->make_source_and_check_if_compile_needed(pso_type)) {
+      /* We already have a full set of matching pipelines which are cached or queued. Return early
+       * to avoid redundant MTLLibrary compilation. */
+      metal_printf("Ignoreing %s compilation request - kernels already requested\n",
+                   kernel_type_as_string(pso_type));
+      return;
+    }
+
+    mtlDevice = instance->mtlDevice;
+    device_vendor = instance->device_vendor;
+    source = instance->source[pso_type];
+  }
+
+  /* Perform the actual compilation using our cached context. The MetalDevice can safely destruct
+   * in this time. */

  MTLCompileOptions *options = [[MTLCompileOptions alloc] init];

@ -359,20 +441,15 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)

  if (getenv("CYCLES_METAL_PROFILING") || getenv("CYCLES_METAL_DEBUG")) {
    path_write_text(path_cache_get(string_printf("%s.metal", kernel_type_as_string(pso_type))),
-                    source[pso_type]);
+                    source);
  }

  const double starttime = time_dt();

  NSError *error = NULL;
-  mtlLibrary[pso_type] = [mtlDevice newLibraryWithSource:@(source[pso_type].c_str())
-                                                 options:options
-                                                   error:&error];
-
-  if (!mtlLibrary[pso_type]) {
-    NSString *err = [error localizedDescription];
-    set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
-  }
+  id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
+                                                      options:options
+                                                        error:&error];

  metal_printf("Front-end compilation finished in %.1f seconds (%s)\n",
               time_dt() - starttime,
@ -380,17 +457,21 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)

  [options release];

-  return MetalDeviceKernels::load(this, pso_type);
-}
-
-void MetalDevice::reserve_local_memory(const uint kernel_features)
-{
-  /* METAL_WIP - implement this */
-}
-
-void MetalDevice::init_host_memory()
-{
-  /* METAL_WIP - implement this */
+  /* Save the compiled MTLLibrary and trigger the AIR->PSO builds (if the MetalDevice still
+   * exists). */
+  {
+    thread_scoped_lock lock(existing_devices_mutex);
+    if (MetalDevice *instance = get_device_by_ID(device_id, lock)) {
+      if (mtlLibrary) {
+        instance->mtlLibrary[pso_type] = mtlLibrary;
+        MetalDeviceKernels::load(instance, pso_type);
+      }
+      else {
+        NSString *err = [error localizedDescription];
+        instance->set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
+      }
+    }
+  }
 }

 void MetalDevice::load_texture_info()
@ -700,55 +781,74 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
  return 0;
 }

+void MetalDevice::cancel()
+{
+  /* Remove this device's ID from the list of active devices. Any pending compilation requests
+   * originating from this session will be cancelled. */
+  thread_scoped_lock lock(existing_devices_mutex);
+  if (device_id) {
+    active_device_ids.erase(device_id);
+    device_id = 0;
+  }
+}
+
+bool MetalDevice::is_ready(string &status) const
+{
+  int num_loaded = MetalDeviceKernels::get_loaded_kernel_count(this, PSO_GENERIC);
+  if (num_loaded < DEVICE_KERNEL_NUM) {
+    status = string_printf("%d / %d render kernels loaded (may take a few minutes the first time)",
+                           num_loaded,
+                           DEVICE_KERNEL_NUM);
+    return false;
+  }
+  metal_printf("MetalDevice::is_ready(...) --> true\n");
+  return true;
+}
+
 void MetalDevice::optimize_for_scene(Scene *scene)
 {
  MetalPipelineType specialization_level = kernel_specialization_level;

-  if (specialization_level < PSO_SPECIALIZED_INTERSECT) {
-    return;
-  }
-
-  /* PSO_SPECIALIZED_INTERSECT kernels are fast to specialize, so we always load them
-   * synchronously. */
-  compile_and_load(PSO_SPECIALIZED_INTERSECT);
-
-  if (specialization_level < PSO_SPECIALIZED_SHADE) {
-    return;
-  }
  if (!scene->params.background) {
-    /* Don't load PSO_SPECIALIZED_SHADE kernels during viewport rendering as they are slower to
-     * build. */
-    return;
+    /* In live viewport, don't specialize beyond intersection kernels for responsiveness. */
+    specialization_level = (MetalPipelineType)min(specialization_level, PSO_SPECIALIZED_INTERSECT);
  }

-  /* PSO_SPECIALIZED_SHADE kernels are slower to specialize, so we load them asynchronously, and
-   * only if there isn't an existing load in flight.
-   */
-  auto specialize_shade_fn = ^() {
-    compile_and_load(PSO_SPECIALIZED_SHADE);
-    async_compile_and_load = false;
+  /* For responsive rendering, specialize the kernels in the background, and only if there isn't an
+   * existing "optimize_for_scene" request in flight. */
+  int this_device_id = this->device_id;
+  auto specialize_kernels_fn = ^() {
+    for (int level = 1; level <= int(specialization_level); level++) {
+      compile_and_load(this_device_id, MetalPipelineType(level));
+    }
  };

-  bool async_specialize_shade = true;
+  /* In normal use, we always compile the specialized kernels in the background. */
+  bool specialize_in_background = true;

  /* Block if a per-kernel profiling is enabled (ensure steady rendering rate). */
  if (getenv("CYCLES_METAL_PROFILING") != nullptr) {
-    async_specialize_shade = false;
+    specialize_in_background = false;
  }

-  if (async_specialize_shade) {
-    if (!async_compile_and_load) {
-      async_compile_and_load = true;
+  /* Block during benchmark warm-up to ensure kernels are cached prior to the observed run. */
+  for (int i = 0; i < *_NSGetArgc(); i++) {
+    if (!strcmp((*_NSGetArgv())[i], "--warm-up")) {
+      specialize_in_background = false;
+    }
+  }
+
+  if (specialize_in_background) {
+    if (!MetalDeviceKernels::any_specialization_happening_now()) {
      dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
-                     specialize_shade_fn);
+                     specialize_kernels_fn);
    }
    else {
-      metal_printf(
-          "Async PSO_SPECIALIZED_SHADE load request already in progress - dropping request\n");
+      metal_printf("\"optimize_for_scene\" request already in flight - dropping request\n");
    }
  }
  else {
-    specialize_shade_fn();
+    specialize_kernels_fn();
  }
 }

--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@ -64,6 +64,8 @@ struct MetalKernelPipeline {

  void compile();

+  int originating_device_id;
+
  id<MTLLibrary> mtlLibrary = nil;
  MetalPipelineType pso_type;
  string source_md5;
@ -94,7 +96,9 @@ struct MetalKernelPipeline {
 /* Cache of Metal kernels for each DeviceKernel. */
 namespace MetalDeviceKernels {

-bool should_load_kernels(MetalDevice *device, MetalPipelineType pso_type);
+bool any_specialization_happening_now();
+int get_loaded_kernel_count(MetalDevice const *device, MetalPipelineType pso_type);
+bool should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type);
 bool load(MetalDevice *device, MetalPipelineType pso_type);
 const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);

--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@ -86,23 +86,17 @@ struct ShaderCache {
  void load_kernel(DeviceKernel kernel, MetalDevice *device, MetalPipelineType pso_type);

  bool should_load_kernel(DeviceKernel device_kernel,
-                          MetalDevice *device,
+                          MetalDevice const *device,
                          MetalPipelineType pso_type);

  void wait_for_all();

- private:
  friend ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice);

  void compile_thread_func(int thread_index);

  using PipelineCollection = std::vector<unique_ptr<MetalKernelPipeline>>;

-  struct PipelineRequest {
-    MetalKernelPipeline *pipeline = nullptr;
-    std::function<void(MetalKernelPipeline *)> completionHandler;
-  };
-
  struct OccupancyTuningParameters {
    int threads_per_threadgroup = 0;
    int num_threads_per_block = 0;
@ -113,13 +107,15 @@ struct ShaderCache {
  PipelineCollection pipelines[DEVICE_KERNEL_NUM];
  id<MTLDevice> mtlDevice;

-  bool running = false;
+  static bool running;
  std::condition_variable cond_var;
-  std::deque<PipelineRequest> request_queue;
+  std::deque<MetalKernelPipeline *> request_queue;
  std::vector<std::thread> compile_threads;
  std::atomic_int incomplete_requests = 0;
+  std::atomic_int incomplete_specialization_requests = 0;
 };

+bool ShaderCache::running = true;
 std::mutex g_shaderCacheMutex;
 std::map<id<MTLDevice>, unique_ptr<ShaderCache>> g_shaderCache;

@ -137,11 +133,25 @@ ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice)

 ShaderCache::~ShaderCache()
 {
-  metal_printf("ShaderCache shutting down with incomplete_requests = %d\n",
-               int(incomplete_requests));
-
  running = false;
  cond_var.notify_all();
+
+  int num_incomplete = int(incomplete_requests);
+  if (num_incomplete) {
+    /* Shutting down the app with incomplete shader compilation requests. Give 1 second's grace for
+     * clean shutdown. */
+    metal_printf("ShaderCache busy (incomplete_requests = %d)...\n", num_incomplete);
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+    num_incomplete = int(incomplete_requests);
+  }
+
+  if (num_incomplete) {
+    metal_printf("ShaderCache still busy (incomplete_requests = %d). Terminating...\n",
+                 num_incomplete);
+    std::terminate();
+  }
+
+  metal_printf("ShaderCache idle. Shutting down.\n");
  for (auto &thread : compile_threads) {
    thread.join();
  }
@ -156,35 +166,69 @@ void ShaderCache::wait_for_all()

 void ShaderCache::compile_thread_func(int thread_index)
 {
-  while (1) {
+  while (running) {

    /* wait for / acquire next request */
-    PipelineRequest request;
+    MetalKernelPipeline *pipeline;
    {
      thread_scoped_lock lock(cache_mutex);
      cond_var.wait(lock, [&] { return !running || !request_queue.empty(); });
-      if (!running) {
-        break;
+      if (!running || request_queue.empty()) {
+        continue;
      }

-      if (!request_queue.empty()) {
-        request = request_queue.front();
-        request_queue.pop_front();
-      }
+      pipeline = request_queue.front();
+      request_queue.pop_front();
    }

-    /* service request */
-    if (request.pipeline) {
-      request.pipeline->compile();
-      incomplete_requests--;
+    /* Service the request. */
+    DeviceKernel device_kernel = pipeline->device_kernel;
+    MetalPipelineType pso_type = pipeline->pso_type;
+
+    if (MetalDevice::is_device_cancelled(pipeline->originating_device_id)) {
+      /* The originating MetalDevice is no longer active, so this request is obsolete. */
+      metal_printf("Cancelling compilation of %s (%s)\n",
+                   device_kernel_as_string(device_kernel),
+                   kernel_type_as_string(pso_type));
+    }
+    else {
+      /* Do the actual compilation. */
+      pipeline->compile();
+
+      thread_scoped_lock lock(cache_mutex);
+      auto &collection = pipelines[device_kernel];
+
+      /* Cache up to 3 kernel variants with the same pso_type in memory, purging oldest first. */
+      int max_entries_of_same_pso_type = 3;
+      for (int i = (int)collection.size() - 1; i >= 0; i--) {
+        if (collection[i]->pso_type == pso_type) {
+          max_entries_of_same_pso_type -= 1;
+          if (max_entries_of_same_pso_type == 0) {
+            metal_printf("Purging oldest %s:%s kernel from ShaderCache\n",
+                         kernel_type_as_string(pso_type),
+                         device_kernel_as_string(device_kernel));
+            collection.erase(collection.begin() + i);
+            break;
+          }
+        }
+      }
+      collection.push_back(unique_ptr<MetalKernelPipeline>(pipeline));
+    }
+    incomplete_requests--;
+    if (pso_type != PSO_GENERIC) {
+      incomplete_specialization_requests--;
    }
  }
 }

 bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
-                                     MetalDevice *device,
+                                     MetalDevice const *device,
                                     MetalPipelineType pso_type)
 {
+  if (!running) {
+    return false;
+  }
+
  if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
    /* Skip megakernel. */
    return false;
@ -240,7 +284,6 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
    /* create compiler threads on first run */
    thread_scoped_lock lock(cache_mutex);
    if (compile_threads.empty()) {
-      running = true;
      for (int i = 0; i < max_mtlcompiler_threads; i++) {
        compile_threads.push_back(std::thread([&] { compile_thread_func(i); }));
      }
@ -252,53 +295,39 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
  }

  incomplete_requests++;
+  if (pso_type != PSO_GENERIC) {
+    incomplete_specialization_requests++;
+  }

-  PipelineRequest request;
-  request.pipeline = new MetalKernelPipeline;
-  memcpy(&request.pipeline->kernel_data_,
-         &device->launch_params.data,
-         sizeof(request.pipeline->kernel_data_));
-  request.pipeline->pso_type = pso_type;
-  request.pipeline->mtlDevice = mtlDevice;
-  request.pipeline->source_md5 = device->source_md5[pso_type];
-  request.pipeline->mtlLibrary = device->mtlLibrary[pso_type];
-  request.pipeline->device_kernel = device_kernel;
-  request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
+  MetalKernelPipeline *pipeline = new MetalKernelPipeline;
+
+  /* Keep track of the originating device's ID so that we can cancel requests if the device ceases
+   * to be active. */
+  pipeline->originating_device_id = device->device_id;
+  memcpy(&pipeline->kernel_data_, &device->launch_params.data, sizeof(pipeline->kernel_data_));
+  pipeline->pso_type = pso_type;
+  pipeline->mtlDevice = mtlDevice;
+  pipeline->source_md5 = device->source_md5[pso_type];
+  pipeline->mtlLibrary = device->mtlLibrary[pso_type];
+  pipeline->device_kernel = device_kernel;
+  pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;

  if (occupancy_tuning[device_kernel].threads_per_threadgroup) {
-    request.pipeline->threads_per_threadgroup =
+    pipeline->threads_per_threadgroup =
        occupancy_tuning[device_kernel].threads_per_threadgroup;
-    request.pipeline->num_threads_per_block =
+    pipeline->num_threads_per_block =
        occupancy_tuning[device_kernel].num_threads_per_block;
  }

  /* metalrt options */
-  request.pipeline->use_metalrt = device->use_metalrt;
-  request.pipeline->metalrt_features = device->use_metalrt ?
-                                           (device->kernel_features & METALRT_FEATURE_MASK) :
-                                           0;
+  pipeline->use_metalrt = device->use_metalrt;
+  pipeline->metalrt_features = device->use_metalrt ?
+                               (device->kernel_features & METALRT_FEATURE_MASK) :
+                               0;

  {
    thread_scoped_lock lock(cache_mutex);
-    auto &collection = pipelines[device_kernel];
-
-    /* Cache up to 3 kernel variants with the same pso_type, purging oldest first. */
-    int max_entries_of_same_pso_type = 3;
-    for (int i = (int)collection.size() - 1; i >= 0; i--) {
-      if (collection[i]->pso_type == pso_type) {
-        max_entries_of_same_pso_type -= 1;
-        if (max_entries_of_same_pso_type == 0) {
-          metal_printf("Purging oldest %s:%s kernel from ShaderCache\n",
-                       kernel_type_as_string(pso_type),
-                       device_kernel_as_string(device_kernel));
-          collection.erase(collection.begin() + i);
-          break;
-        }
-      }
-    }
-
-    collection.push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
-    request_queue.push_back(request);
+    request_queue.push_back(pipeline);
  }
  cond_var.notify_one();
 }
@ -664,51 +693,61 @@ void MetalKernelPipeline::compile()

  double starttime = time_dt();

-  MTLNewComputePipelineStateWithReflectionCompletionHandler completionHandler = ^(
-      id<MTLComputePipelineState> computePipelineState,
-      MTLComputePipelineReflection *reflection,
-      NSError *error) {
-    bool recreate_archive = false;
-    if (computePipelineState == nil && archive) {
+  /* Block on load to ensure we continue with a valid kernel function */
+  if (creating_new_archive) {
+    starttime = time_dt();
+    NSError *error;
+    if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
+                                                      error:&error]) {
      NSString *errStr = [error localizedDescription];
-      metal_printf(
-          "Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
-          "(error: %s)\n",
-          device_kernel_as_string((DeviceKernel)device_kernel),
-          errStr ? [errStr UTF8String] : "nil");
-      computePipelineState = [mtlDevice
-          newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
-                                        options:MTLPipelineOptionNone
-                                     reflection:nullptr
-                                          error:&error];
-      recreate_archive = true;
+      metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil");
    }
+  }

-    double duration = time_dt() - starttime;
+  pipeline = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
+                                                      options:pipelineOptions
+                                                   reflection:nullptr
+                                                        error:&error];

-    if (computePipelineState == nil) {
-      NSString *errStr = [error localizedDescription];
-      error_str = string_printf("Failed to create compute pipeline state \"%s\", error: \n",
-                                device_kernel_as_string((DeviceKernel)device_kernel));
-      error_str += (errStr ? [errStr UTF8String] : "nil");
-      metal_printf("%16s | %2d | %-55s | %7.2fs | FAILED!\n",
-                   kernel_type_as_string(pso_type),
-                   device_kernel,
-                   device_kernel_as_string((DeviceKernel)device_kernel),
-                   duration);
-      return;
-    }
+  bool recreate_archive = false;
+  if (pipeline == nil && archive) {
+    NSString *errStr = [error localizedDescription];
+    metal_printf(
+        "Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
+        "(error: %s)\n",
+        device_kernel_as_string((DeviceKernel)device_kernel),
+        errStr ? [errStr UTF8String] : "nil");
+    pipeline = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
+                                                        options:MTLPipelineOptionNone
+                                                     reflection:nullptr
+                                                          error:&error];
+    recreate_archive = true;
+  }

-    if (!num_threads_per_block) {
-      num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
-                                         computePipelineState.threadExecutionWidth);
-      num_threads_per_block = std::max(num_threads_per_block,
-                                       (int)computePipelineState.threadExecutionWidth);
-    }
+  double duration = time_dt() - starttime;

-    this->pipeline = computePipelineState;
+  if (pipeline == nil) {
+    NSString *errStr = [error localizedDescription];
+    error_str = string_printf("Failed to create compute pipeline state \"%s\", error: \n",
+                              device_kernel_as_string((DeviceKernel)device_kernel));
+    error_str += (errStr ? [errStr UTF8String] : "nil");
+    metal_printf("%16s | %2d | %-55s | %7.2fs | FAILED!\n",
+                 kernel_type_as_string(pso_type),
+                 device_kernel,
+                 device_kernel_as_string((DeviceKernel)device_kernel),
+                 duration);
+    return;
+  }

-    if (@available(macOS 11.0, *)) {
+  if (!num_threads_per_block) {
+    num_threads_per_block = round_down(pipeline.maxTotalThreadsPerThreadgroup,
+                                       pipeline.threadExecutionWidth);
+    num_threads_per_block = std::max(num_threads_per_block,
+                                     (int)pipeline.threadExecutionWidth);
+  }
+
+  if (@available(macOS 11.0, *)) {
+    if (ShaderCache::running) {
      if (creating_new_archive || recreate_archive) {
        if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())]
                               error:&error]) {
@ -720,24 +759,7 @@ void MetalKernelPipeline::compile()
        }
      }
    }
-  };
-
-  /* Block on load to ensure we continue with a valid kernel function */
-  if (creating_new_archive) {
-    starttime = time_dt();
-    NSError *error;
-    if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
-                                                      error:&error]) {
-      NSString *errStr = [error localizedDescription];
-      metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil");
-    }
  }
-  id<MTLComputePipelineState> pipeline = [mtlDevice
-      newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
-                                    options:pipelineOptions
-                                 reflection:nullptr
-                                      error:&error];
-  completionHandler(pipeline, nullptr, error);

  this->loaded = true;
  [computePipelineStateDescriptor release];
@ -763,8 +785,6 @@ void MetalKernelPipeline::compile()
    }
  }

-  double duration = time_dt() - starttime;
-
  if (!use_binary_archive) {
    metal_printf("%16s | %2d | %-55s | %7.2fs\n",
                 kernel_type_as_string(pso_type),
@ -791,24 +811,46 @@ bool MetalDeviceKernels::load(MetalDevice *device, MetalPipelineType pso_type)
    shader_cache->load_kernel((DeviceKernel)i, device, pso_type);
  }

-  shader_cache->wait_for_all();
-  metal_printf("Back-end compilation finished in %.1f seconds (%s)\n",
-               time_dt() - starttime,
-               kernel_type_as_string(pso_type));
+  if (getenv("CYCLES_METAL_PROFILING")) {
+    shader_cache->wait_for_all();
+    metal_printf("Back-end compilation finished in %.1f seconds (%s)\n",
+                 time_dt() - starttime,
+                 kernel_type_as_string(pso_type));
+  }
  return true;
 }

-bool MetalDeviceKernels::should_load_kernels(MetalDevice *device, MetalPipelineType pso_type)
+bool MetalDeviceKernels::any_specialization_happening_now()
 {
-  auto shader_cache = get_shader_cache(device->mtlDevice);
-  for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
-    if (shader_cache->should_load_kernel((DeviceKernel)i, device, pso_type)) {
+  /* Return true if any ShaderCaches have ongoing specialization requests (typically there will be
+   * only 1). */
+  thread_scoped_lock lock(g_shaderCacheMutex);
+  for (auto &it : g_shaderCache) {
+    if (it.second->incomplete_specialization_requests > 0) {
      return true;
    }
  }
  return false;
 }

+int MetalDeviceKernels::get_loaded_kernel_count(MetalDevice const *device,
+                                                MetalPipelineType pso_type)
+{
+  auto shader_cache = get_shader_cache(device->mtlDevice);
+  int loaded_count = DEVICE_KERNEL_NUM;
+  for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
+    if (shader_cache->should_load_kernel((DeviceKernel)i, device, pso_type)) {
+      loaded_count -= 1;
+    }
+  }
+  return loaded_count;
+}
+
+bool MetalDeviceKernels::should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type)
+{
+  return get_loaded_kernel_count(device, pso_type) != DEVICE_KERNEL_NUM;
+}
+
 const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device,
                                                                 DeviceKernel kernel)
 {
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@ -702,6 +702,10 @@ bool MetalDeviceQueue::synchronize()

 void MetalDeviceQueue::zero_to_device(device_memory &mem)
 {
+  if (metal_device_->have_error()) {
+    return;
+  }
+
  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);

  if (mem.memory_size() == 0) {
@ -729,6 +733,10 @@ void MetalDeviceQueue::zero_to_device(device_memory &mem)

 void MetalDeviceQueue::copy_to_device(device_memory &mem)
 {
+  if (metal_device_->have_error()) {
+    return;
+  }
+
  if (mem.memory_size() == 0) {
    return;
  }
@ -771,6 +779,10 @@ void MetalDeviceQueue::copy_to_device(device_memory &mem)

 void MetalDeviceQueue::copy_from_device(device_memory &mem)
 {
+  if (metal_device_->have_error()) {
+    return;
+  }
+
  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);

  if (mem.memory_size() == 0) {
--- a/intern/cycles/device/oneapi/device_impl.cpp
+++ b/intern/cycles/device/oneapi/device_impl.cpp
@ -429,7 +429,12 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
      queue->get_device().get_info<sycl::info::device::device_type>();
  sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
  (void)usm_type;
-  assert(usm_type == sycl::usm::alloc::device ||
+#    ifndef WITH_ONEAPI_SYCL_HOST_TASK
+  const sycl::usm::alloc main_memory_type = sycl::usm::alloc::device;
+#    else
+  const sycl::usm::alloc main_memory_type = sycl::usm::alloc::host;
+#    endif
+  assert(usm_type == main_memory_type ||
         (usm_type == sycl::usm::alloc::host &&
          (allow_host || device_type == sycl::info::device_type::cpu)) ||
         usm_type == sycl::usm::alloc::unknown);
@ -478,7 +483,11 @@ void *OneapiDevice::usm_alloc_device(SyclQueue *queue_, size_t memory_size)
 {
  assert(queue_);
  sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+#  ifndef WITH_ONEAPI_SYCL_HOST_TASK
  return sycl::malloc_device(memory_size, *queue);
+#  else
+  return sycl::malloc_host(memory_size, *queue);
+#  endif
 }

 void OneapiDevice::usm_free(SyclQueue *queue_, void *usm_ptr)
@ -736,7 +745,11 @@ char *OneapiDevice::device_capabilities()

  const std::vector<sycl::device> &oneapi_devices = available_devices();
  for (const sycl::device &device : oneapi_devices) {
+#  ifndef WITH_ONEAPI_SYCL_HOST_TASK
    const std::string &name = device.get_info<sycl::info::device::name>();
+#  else
+    const std::string &name = "SYCL Host Task (Debug)";
+#  endif

    capabilities << std::string("\t") << name << "\n";
 #  define WRITE_ATTR(attribute_name, attribute_variable) \
@ -813,7 +826,11 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p
  for (sycl::device &device : devices) {
    const std::string &platform_name =
        device.get_platform().get_info<sycl::info::platform::name>();
+#  ifndef WITH_ONEAPI_SYCL_HOST_TASK
    std::string name = device.get_info<sycl::info::device::name>();
+#  else
+    std::string name = "SYCL Host Task (Debug)";
+#  endif
    std::string id = "ONEAPI_" + platform_name + "_" + name;
    if (device.has(sycl::aspect::ext_intel_pci_address)) {
      id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@ -390,6 +390,9 @@ void PathTrace::path_trace(RenderWork &render_work)
    const int num_samples = render_work.path_trace.num_samples;

    PathTraceWork *path_trace_work = path_trace_works_[i].get();
+    if (path_trace_work->get_device()->have_error()) {
+      return;
+    }

    PathTraceWork::RenderStatistics statistics;
    path_trace_work->render_samples(statistics,
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@ -752,6 +752,10 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
      ${SYCL_CPP_FLAGS}
      )

+  if (WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
+    list(APPEND sycl_compiler_flags -DWITH_ONEAPI_SYCL_HOST_TASK)
+  endif()
+
  # Set defaults for spir64 and spir64_gen options
  if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
    set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
@ -763,7 +767,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
  string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "--format zebin ")
  string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${CYCLES_ONEAPI_SPIR64_GEN_DEVICES} ")

-  if(WITH_CYCLES_ONEAPI_BINARIES)
+  # Host execution won't use GPU binaries, no need to compile them.
+  if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
    # AoT binaries aren't currently reused when calling sycl::build.
    list(APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
    # Iterate over all targest and their options
--- a/intern/cycles/kernel/device/gpu/parallel_active_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h
@ -30,6 +30,16 @@ void gpu_parallel_active_index_array_impl(const uint num_states,
                                          ccl_global int *ccl_restrict num_indices,
                                          IsActiveOp is_active_op)
 {
+#  ifdef WITH_ONEAPI_SYCL_HOST_TASK
+  int write_index = 0;
+  for (int state_index = 0; state_index < num_states; state_index++) {
+    if (is_active_op(state_index))
+      indices[write_index++] = state_index;
+  }
+  *num_indices = write_index;
+  return;
+#  endif /* WITH_ONEAPI_SYCL_HOST_TASK */
+
  const sycl::nd_item<1> &item_id = sycl::ext::oneapi::experimental::this_nd_item<1>();
  const uint blocksize = item_id.get_local_range(0);

--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@ -56,7 +56,8 @@
 #define ccl_gpu_kernel(block_num_threads, thread_num_registers)
 #define ccl_gpu_kernel_threads(block_num_threads)

-#define ccl_gpu_kernel_signature(name, ...) \
+#ifndef WITH_ONEAPI_SYCL_HOST_TASK
+#  define ccl_gpu_kernel_signature(name, ...) \
 void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
                          size_t kernel_global_size, \
                          size_t kernel_local_size, \
@ -67,9 +68,37 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
          sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
          [=](sycl::nd_item<1> item) {

-#define ccl_gpu_kernel_postfix \
+#  define ccl_gpu_kernel_postfix \
          }); \
    }
+#else
+/* Additional anonymous lambda is required to handle all "return" statements in the kernel code */
+#  define ccl_gpu_kernel_signature(name, ...) \
+void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
+                          size_t kernel_global_size, \
+                          size_t kernel_local_size, \
+                          sycl::handler &cgh, \
+                          __VA_ARGS__) { \
+      (kg); \
+      (kernel_local_size); \
+      cgh.host_task( \
+          [=]() {\
+            for (size_t gid = (size_t)0; gid < kernel_global_size; gid++) { \
+                kg->nd_item_local_id_0 = 0; \
+                kg->nd_item_local_range_0 = 1; \
+                kg->nd_item_group_id_0 = gid; \
+                kg->nd_item_group_range_0 = kernel_global_size; \
+                kg->nd_item_global_id_0 = gid; \
+                kg->nd_item_global_range_0 = kernel_global_size; \
+                auto kernel = [=]() {
+
+#  define ccl_gpu_kernel_postfix \
+                }; \
+                kernel(); \
+            } \
+      }); \
+}
+#endif

 #define ccl_gpu_kernel_call(x) ((ONEAPIKernelContext*)kg)->x

@ -83,23 +112,40 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
  } ccl_gpu_kernel_lambda_pass((ONEAPIKernelContext *)kg)

 /* GPU thread, block, grid size and index */
-#define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0))
-#define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0))
-#define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0))
-#define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0))
-#define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
-#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))

-#define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0))
-#define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0))
+#ifndef WITH_ONEAPI_SYCL_HOST_TASK
+#  define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0))
+#  define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0))
+#  define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0))
+#  define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0))
+#  define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
+#  define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
+
+#  define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0))
+#  define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0))

 /* GPU warp synchronization */
-#define ccl_gpu_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier()
-#define ccl_gpu_local_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier(sycl::access::fence_space::local_space)
-#ifdef __SYCL_DEVICE_ONLY__
-  #define ccl_gpu_ballot(predicate) (sycl::ext::oneapi::group_ballot(sycl::ext::oneapi::experimental::this_sub_group(), predicate).count())
+#  define ccl_gpu_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier()
+#  define ccl_gpu_local_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier(sycl::access::fence_space::local_space)
+#  ifdef __SYCL_DEVICE_ONLY__
+#    define ccl_gpu_ballot(predicate) (sycl::ext::oneapi::group_ballot(sycl::ext::oneapi::experimental::this_sub_group(), predicate).count())
+#  else
+#    define ccl_gpu_ballot(predicate) (predicate ? 1 : 0)
+#  endif
 #else
-  #define ccl_gpu_ballot(predicate) (predicate ? 1 : 0)
+#  define ccl_gpu_thread_idx_x (kg->nd_item_local_id_0)
+#  define ccl_gpu_block_dim_x (kg->nd_item_local_range_0)
+#  define ccl_gpu_block_idx_x (kg->nd_item_group_id_0)
+#  define ccl_gpu_grid_dim_x (kg->nd_item_group_range_0)
+#  define ccl_gpu_warp_size (1)
+#  define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
+
+#  define ccl_gpu_global_id_x() (kg->nd_item_global_id_0)
+#  define ccl_gpu_global_size_x() (kg->nd_item_global_range_0)
+
+#  define ccl_gpu_syncthreads()
+#  define ccl_gpu_local_syncthreads()
+#  define ccl_gpu_ballot(predicate) (predicate ? 1 : 0)
 #endif

 /* Debug defines */
--- a/intern/cycles/kernel/device/oneapi/globals.h
+++ b/intern/cycles/kernel/device/oneapi/globals.h
@ -23,6 +23,15 @@ typedef struct KernelGlobalsGPU {
 #undef KERNEL_DATA_ARRAY
  IntegratorStateGPU *integrator_state;
  const KernelData *__data;
+
+#ifdef WITH_ONEAPI_SYCL_HOST_TASK
+  size_t nd_item_local_id_0;
+  size_t nd_item_local_range_0;
+  size_t nd_item_group_id_0;
+  size_t nd_item_group_range_0;
+  size_t nd_item_global_id_0;
+  size_t nd_item_global_range_0;
+#endif
 } KernelGlobalsGPU;

 typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@ -230,6 +230,12 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
    /* NOTE(@nsirgien): As for now non-uniform work-groups don't work on most oneAPI devices,
     * we extend work size to fit uniformity requirements. */
    global_size = groups_count * local_size;
+
+#  ifdef WITH_ONEAPI_SYCL_HOST_TASK
+    /* Path array implementation is serial in case of SYCL Host Task execution. */
+    global_size = 1;
+    local_size = 1;
+#  endif
  }

  /* Let the compiler throw an error if there are any kernels missing in this implementation. */
--- a/intern/cycles/scene/image_oiio.cpp
+++ b/intern/cycles/scene/image_oiio.cpp
@ -113,14 +113,18 @@ static void oiio_load_pixels(const ImageMetaData &metadata,

  if (depth <= 1) {
    size_t scanlinesize = width * components * sizeof(StorageType);
-    in->read_image(FileFormat,
+    in->read_image(0,
+                   0,
+                   0,
+                   components,
+                   FileFormat,
                   (uchar *)readpixels + (height - 1) * scanlinesize,
                   AutoStride,
                   -scanlinesize,
                   AutoStride);
  }
  else {
-    in->read_image(FileFormat, (uchar *)readpixels);
+    in->read_image(0, 0, 0, components, FileFormat, (uchar *)readpixels);
  }

  if (components > 4) {
--- a/intern/cycles/session/denoising.cpp
+++ b/intern/cycles/session/denoising.cpp
@ -439,9 +439,12 @@ bool DenoiseImage::read_previous_pixels(const DenoiseImageLayer &layer,
 {
  /* Load pixels from neighboring frames, and copy them into device buffer
   * with channels reshuffled. */
-  size_t num_pixels = (size_t)width * (size_t)height;
+  const size_t num_pixels = (size_t)width * (size_t)height;
+  const int num_channels = in_previous->spec().nchannels;
+
  array<float> neighbor_pixels(num_pixels * num_channels);
-  if (!in_previous->read_image(TypeDesc::FLOAT, neighbor_pixels.data())) {
+
+  if (!in_previous->read_image(0, 0, 0, num_channels, TypeDesc::FLOAT, neighbor_pixels.data())) {
    return false;
  }

@ -491,7 +494,7 @@ bool DenoiseImage::load(const string &in_filepath, string &error)

  /* Read all channels into buffer. Reading all channels at once is faster
   * than individually due to interleaved EXR channel storage. */
-  if (!in->read_image(TypeDesc::FLOAT, pixels.data())) {
+  if (!in->read_image(0, 0, 0, num_channels, TypeDesc::FLOAT, pixels.data())) {
    error = "Failed to read image: " + in_filepath;
    return false;
  }
--- a/intern/cycles/session/merge.cpp
+++ b/intern/cycles/session/merge.cpp
@ -401,8 +401,8 @@ static bool merge_pixels(const vector<MergeImage> &images,
     * faster than individually due to interleaved EXR channel storage. */
    array<float> pixels;
    alloc_pixels(image.in->spec(), pixels);
-
-    if (!image.in->read_image(TypeDesc::FLOAT, pixels.data())) {
+    const int num_channels = image.in->spec().nchannels;
+    if (!image.in->read_image(0, 0, 0, num_channels, TypeDesc::FLOAT, pixels.data())) {
      error = "Failed to read image: " + image.filepath;
      return false;
    }
@ -538,6 +538,7 @@ static void read_layer_samples(vector<MergeImage> &images,
        /* Load the "Debug Sample Count" pass and add the samples to the layer's sample count. */
        array<float> sample_count_buffer;
        sample_count_buffer.resize(in_spec.width * in_spec.height);
+
        image.in->read_image(0,
                             0,
                             layer.sample_pass_offset,
--- a/intern/cycles/session/session.cpp
+++ b/intern/cycles/session/session.cpp
@ -113,6 +113,9 @@ void Session::start()

 void Session::cancel(bool quick)
 {
+  /* Cancel any long running device operations (e.g. shader compilations). */
+  device->cancel();
+
  /* Check if session thread is rendering. */
  const bool rendering = is_session_thread_rendering();

@ -401,6 +404,16 @@ RenderWork Session::run_update_for_next_iteration()
    path_trace_->load_kernels();
    path_trace_->alloc_work_memory();

+    /* Wait for device to be ready (e.g. finish any background compilations). */
+    string device_status;
+    while (!device->is_ready(device_status)) {
+      progress.set_status(device_status);
+      if (progress.get_cancel()) {
+        break;
+      }
+      std::this_thread::sleep_for(std::chrono::milliseconds(200));
+    }
+
    progress.add_skip_time(update_timer, params.background);
  }

--- a/intern/cycles/session/tile.cpp
+++ b/intern/cycles/session/tile.cpp
@ -646,7 +646,8 @@ bool TileManager::read_full_buffer_from_disk(const string_view filename,
    return false;
  }

-  if (!in->read_image(TypeDesc::FLOAT, buffers->buffer.data())) {
+  const int num_channels = in->spec().nchannels;
+  if (!in->read_image(0, 0, 0, num_channels, TypeDesc::FLOAT, buffers->buffer.data())) {
    LOG(ERROR) << "Error reading pixels from the tile file " << in->geterror();
    return false;
  }
--- a/intern/ghost/intern/GHOST_SystemWayland.cpp
+++ b/intern/ghost/intern/GHOST_SystemWayland.cpp
@ -1223,13 +1223,12 @@ static void gwl_registry_entry_update_all(GWL_Display *display, const int interf
      continue;
    }

-    GWL_RegisteryUpdate_Params params = {
-        .name = reg->name,
-        .interface_slot = reg->interface_slot,
-        .version = reg->version,
+    GWL_RegisteryUpdate_Params params{};
+    params.name = reg->name;
+    params.interface_slot = reg->interface_slot;
+    params.version = reg->version;
+    params.user_data = reg->user_data;

-        .user_data = reg->user_data,
-    };
    handler->update_fn(display, &params);
  }
 }
@ -4535,18 +4534,7 @@ static void output_handle_scale(void *data, struct wl_output * /*wl_output*/, co
  CLOG_INFO(LOG, 2, "scale");
  GWL_Output *output = static_cast<GWL_Output *>(data);
  output->scale = factor;
-
-  GHOST_WindowManager *window_manager = output->system->getWindowManager();
-  if (window_manager) {
-    for (GHOST_IWindow *iwin : window_manager->getWindows()) {
-      GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(iwin);
-      const std::vector<GWL_Output *> &outputs = win->outputs();
-      if (std::find(outputs.begin(), outputs.end(), output) == outputs.cend()) {
-        continue;
-      }
-      win->outputs_changed_update_scale();
-    }
-  }
+  output->system->output_scale_update_maybe_leave(output, false);
 }

 static const struct wl_output_listener output_listener = {
@ -4736,11 +4724,24 @@ static void gwl_registry_wl_output_update(GWL_Display *display,
 }
 static void gwl_registry_wl_output_remove(GWL_Display *display,
                                          void *user_data,
-                                          const bool /*on_exit*/)
+                                          const bool on_exit)
 {
  /* While windows & cursors hold references to outputs, there is no need to manually remove
-   * these references as the compositor will remove references via #wl_surface_listener.leave. */
+   * these references as the compositor will remove references via #wl_surface_listener.leave.
+   *
+   * WARNING: this is not the case for WLROOTS based compositors which have a (bug?)
+   * where surface leave events don't run. So `system->output_leave(..)` is needed
+   * until the issue is resolved in WLROOTS. */
  GWL_Output *output = static_cast<GWL_Output *>(user_data);
+
+  if (!on_exit) {
+    /* Needed for WLROOTS, does nothing if surface leave callbacks have already run. */
+    output->system->output_scale_update_maybe_leave(output, true);
+  }
+
+  if (output->xdg_output) {
+    zxdg_output_v1_destroy(output->xdg_output);
+  }
  wl_output_destroy(output->wl_output);
  std::vector<GWL_Output *>::iterator iter = std::find(
      display->outputs.begin(), display->outputs.end(), output);
@ -5176,11 +5177,10 @@ static void global_handle_add(void *data,
    const GWL_RegistryEntry *registry_entry_prev = display->registry_entry;

    /* The interface name that is ensured not to be freed. */
-    GWL_RegisteryAdd_Params params = {
-        .name = name,
-        .interface_slot = interface_slot,
-        .version = version,
-    };
+    GWL_RegisteryAdd_Params params{};
+    params.name = name;
+    params.interface_slot = interface_slot;
+    params.version = version;

    handler->add_fn(display, &params);

@ -6762,6 +6762,49 @@ void GHOST_SystemWayland::window_surface_unref(const wl_surface *wl_surface)
 #undef SURFACE_CLEAR_PTR
 }

+void GHOST_SystemWayland::output_scale_update_maybe_leave(GWL_Output *output, bool leave)
+{
+  /* Update scale, optionally leaving the outputs beforehand. */
+  GHOST_WindowManager *window_manager = output->system->getWindowManager();
+  if (window_manager) {
+    for (GHOST_IWindow *iwin : window_manager->getWindows()) {
+      GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(iwin);
+      const std::vector<GWL_Output *> &outputs = win->outputs();
+      bool found = leave ? win->outputs_leave(output) :
+                           !(std::find(outputs.begin(), outputs.end(), output) == outputs.cend());
+      if (found) {
+        win->outputs_changed_update_scale();
+      }
+    }
+  }
+
+  for (GWL_Seat *seat : display_->seats) {
+    bool found;
+
+    found = leave ? seat->pointer.outputs.erase(output) : seat->pointer.outputs.count(output);
+    if (found) {
+      if (seat->cursor.wl_surface_cursor != nullptr) {
+        update_cursor_scale(
+            seat->cursor, seat->system->wl_shm(), &seat->pointer, seat->cursor.wl_surface_cursor);
+      }
+    }
+
+    found = leave ? seat->tablet.outputs.erase(output) : seat->tablet.outputs.count(output);
+    if (found) {
+      for (struct zwp_tablet_tool_v2 *zwp_tablet_tool_v2 : seat->tablet_tools) {
+        GWL_TabletTool *tablet_tool = static_cast<GWL_TabletTool *>(
+            zwp_tablet_tool_v2_get_user_data(zwp_tablet_tool_v2));
+        if (tablet_tool->wl_surface_cursor != nullptr) {
+          update_cursor_scale(seat->cursor,
+                              seat->system->wl_shm(),
+                              &seat->pointer,
+                              tablet_tool->wl_surface_cursor);
+        }
+      }
+    }
+  }
+}
+
 bool GHOST_SystemWayland::window_cursor_grab_set(const GHOST_TGrabCursorMode mode,
                                                 const GHOST_TGrabCursorMode mode_current,
                                                 int32_t init_grab_xy[2],
--- a/intern/ghost/intern/GHOST_SystemWayland.h
+++ b/intern/ghost/intern/GHOST_SystemWayland.h
@ -194,6 +194,8 @@ class GHOST_SystemWayland : public GHOST_System {
  /** Set this seat to be active. */
  void seat_active_set(const struct GWL_Seat *seat);

+  void output_scale_update_maybe_leave(GWL_Output *output, bool leave);
+
  /** Clear all references to this surface to prevent accessing NULL pointers. */
  void window_surface_unref(const wl_surface *wl_surface);

--- a/intern/ghost/intern/GHOST_WindowWayland.cpp
+++ b/intern/ghost/intern/GHOST_WindowWayland.cpp
@ -1361,9 +1361,6 @@ GHOST_TSuccess GHOST_WindowWayland::notify_size()
 * Functionality only used for the WAYLAND implementation.
 * \{ */

-/**
- * Return true when the windows scale or DPI changes.
- */
 bool GHOST_WindowWayland::outputs_changed_update_scale()
 {
 #ifdef USE_EVENT_BACKGROUND_THREAD
--- a/intern/ghost/intern/GHOST_WindowWayland.h
+++ b/intern/ghost/intern/GHOST_WindowWayland.h
@ -156,6 +156,9 @@ class GHOST_WindowWayland : public GHOST_Window {
  bool outputs_enter(GWL_Output *output);
  bool outputs_leave(GWL_Output *output);

+  /**
+   * Return true when the windows scale or DPI changes.
+   */
  bool outputs_changed_update_scale();

 #ifdef USE_EVENT_BACKGROUND_THREAD
--- a/intern/guardedalloc/CMakeLists.txt
+++ b/intern/guardedalloc/CMakeLists.txt
@ -20,6 +20,7 @@ set(SRC
  ./intern/mallocn.c
  ./intern/mallocn_guarded_impl.c
  ./intern/mallocn_lockfree_impl.c
+  ./intern/memory_usage.cc

  MEM_guardedalloc.h
  ./intern/mallocn_inline.h
--- a/intern/guardedalloc/intern/leak_detector.cc
+++ b/intern/guardedalloc/intern/leak_detector.cc
@ -53,6 +53,9 @@ class MemLeakPrinter {

 void MEM_init_memleak_detection()
 {
+  /* Calling this ensures that the memory usage counters outlive the memory leak detection. */
+  memory_usage_init();
+
  /**
   * This variable is constructed when this function is first called. This should happen as soon as
   * possible when the program starts.
--- a/intern/guardedalloc/intern/mallocn_intern.h
+++ b/intern/guardedalloc/intern/mallocn_intern.h
@ -89,6 +89,14 @@ void aligned_free(void *ptr);
 extern bool leak_detector_has_run;
 extern char free_after_leak_detection_message[];

+void memory_usage_init(void);
+void memory_usage_block_alloc(size_t size);
+void memory_usage_block_free(size_t size);
+size_t memory_usage_block_num(void);
+size_t memory_usage_current(void);
+size_t memory_usage_peak(void);
+void memory_usage_peak_reset(void);
+
 /* Prototypes for counted allocator functions */
 size_t MEM_lockfree_allocN_len(const void *vmemh) ATTR_WARN_UNUSED_RESULT;
 void MEM_lockfree_freeN(void *vmemh);
--- a/intern/guardedalloc/intern/mallocn_lockfree_impl.c
+++ b/intern/guardedalloc/intern/mallocn_lockfree_impl.c
@ -30,8 +30,6 @@ typedef struct MemHeadAligned {
  size_t len;
 } MemHeadAligned;

-static unsigned int totblock = 0;
-static size_t mem_in_use = 0, peak_mem = 0;
 static bool malloc_debug_memset = false;

 static void (*error_callback)(const char *) = NULL;
@ -46,18 +44,6 @@ enum {
 #define MEMHEAD_IS_ALIGNED(memhead) ((memhead)->len & (size_t)MEMHEAD_ALIGN_FLAG)
 #define MEMHEAD_LEN(memhead) ((memhead)->len & ~((size_t)(MEMHEAD_ALIGN_FLAG)))

-/* Uncomment this to have proper peak counter. */
-#define USE_ATOMIC_MAX
-
-MEM_INLINE void update_maximum(size_t *maximum_value, size_t value)
-{
-#ifdef USE_ATOMIC_MAX
-  atomic_fetch_and_update_max_z(maximum_value, value);
-#else
-  *maximum_value = value > *maximum_value ? value : *maximum_value;
-#endif
-}
-
 #ifdef __GNUC__
 __attribute__((format(printf, 1, 2)))
 #endif
@ -103,8 +89,7 @@ void MEM_lockfree_freeN(void *vmemh)
  MemHead *memh = MEMHEAD_FROM_PTR(vmemh);
  size_t len = MEMHEAD_LEN(memh);

-  atomic_sub_and_fetch_u(&totblock, 1);
-  atomic_sub_and_fetch_z(&mem_in_use, len);
+  memory_usage_block_free(len);

  if (UNLIKELY(malloc_debug_memset && len)) {
    memset(memh + 1, 255, len);
@ -224,16 +209,14 @@ void *MEM_lockfree_callocN(size_t len, const char *str)

  if (LIKELY(memh)) {
    memh->len = len;
-    atomic_add_and_fetch_u(&totblock, 1);
-    atomic_add_and_fetch_z(&mem_in_use, len);
-    update_maximum(&peak_mem, mem_in_use);
+    memory_usage_block_alloc(len);

    return PTR_FROM_MEMHEAD(memh);
  }
  print_error("Calloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
              SIZET_ARG(len),
              str,
-              (uint)mem_in_use);
+              (uint)memory_usage_current());
  return NULL;
 }

@ -247,7 +230,7 @@ void *MEM_lockfree_calloc_arrayN(size_t len, size_t size, const char *str)
        SIZET_ARG(len),
        SIZET_ARG(size),
        str,
-        (unsigned int)mem_in_use);
+        (unsigned int)memory_usage_current());
    abort();
    return NULL;
  }
@ -269,16 +252,14 @@ void *MEM_lockfree_mallocN(size_t len, const char *str)
    }

    memh->len = len;
-    atomic_add_and_fetch_u(&totblock, 1);
-    atomic_add_and_fetch_z(&mem_in_use, len);
-    update_maximum(&peak_mem, mem_in_use);
+    memory_usage_block_alloc(len);

    return PTR_FROM_MEMHEAD(memh);
  }
  print_error("Malloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
              SIZET_ARG(len),
              str,
-              (uint)mem_in_use);
+              (uint)memory_usage_current());
  return NULL;
 }

@ -292,7 +273,7 @@ void *MEM_lockfree_malloc_arrayN(size_t len, size_t size, const char *str)
        SIZET_ARG(len),
        SIZET_ARG(size),
        str,
-        (uint)mem_in_use);
+        (uint)memory_usage_current());
    abort();
    return NULL;
  }
@ -340,16 +321,14 @@ void *MEM_lockfree_mallocN_aligned(size_t len, size_t alignment, const char *str

    memh->len = len | (size_t)MEMHEAD_ALIGN_FLAG;
    memh->alignment = (short)alignment;
-    atomic_add_and_fetch_u(&totblock, 1);
-    atomic_add_and_fetch_z(&mem_in_use, len);
-    update_maximum(&peak_mem, mem_in_use);
+    memory_usage_block_alloc(len);

    return PTR_FROM_MEMHEAD(memh);
  }
  print_error("Malloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
              SIZET_ARG(len),
              str,
-              (uint)mem_in_use);
+              (uint)memory_usage_current());
  return NULL;
 }

@ -369,8 +348,8 @@ void MEM_lockfree_callbackmemlist(void (*func)(void *))

 void MEM_lockfree_printmemlist_stats(void)
 {
-  printf("\ntotal memory len: %.3f MB\n", (double)mem_in_use / (double)(1024 * 1024));
-  printf("peak memory len: %.3f MB\n", (double)peak_mem / (double)(1024 * 1024));
+  printf("\ntotal memory len: %.3f MB\n", (double)memory_usage_current() / (double)(1024 * 1024));
+  printf("peak memory len: %.3f MB\n", (double)memory_usage_peak() / (double)(1024 * 1024));
  printf(
      "\nFor more detailed per-block statistics run Blender with memory debugging command line "
      "argument.\n");
@ -398,23 +377,23 @@ void MEM_lockfree_set_memory_debug(void)

 size_t MEM_lockfree_get_memory_in_use(void)
 {
-  return mem_in_use;
+  return memory_usage_current();
 }

 uint MEM_lockfree_get_memory_blocks_in_use(void)
 {
-  return totblock;
+  return (uint)memory_usage_block_num();
 }

 /* dummy */
 void MEM_lockfree_reset_peak_memory(void)
 {
-  peak_mem = mem_in_use;
+  memory_usage_peak_reset();
 }

 size_t MEM_lockfree_get_peak_memory(void)
 {
-  return peak_mem;
+  return memory_usage_peak();
 }

 #ifndef NDEBUG
--- a/intern/guardedalloc/intern/memory_usage.cc
+++ b/intern/guardedalloc/intern/memory_usage.cc
@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <algorithm>
+#include <atomic>
+#include <cassert>
+#include <iostream>
+#include <mutex>
+#include <vector>
+
+#include "MEM_guardedalloc.h"
+#include "mallocn_intern.h"
+
+#include "../../source/blender/blenlib/BLI_strict_flags.h"
+
+namespace {
+
+/**
+ * This is stored per thread. Align to cache line size to avoid false sharing.
+ */
+struct alignas(64) Local {
+  /** Helps to find bugs during program shutdown. */
+  bool destructed = false;
+  /**
+   * This is the first created #Local and on the main thread. When the main local data is
+   * destructed, we know that Blender is quitting and that we can't rely on thread locals being
+   * available still.
+   */
+  bool is_main = false;
+  /**
+   * Number of bytes. This can be negative when e.g. one thread allocates a lot of memory, and
+   * another frees it. It has to be an atomic, because it may be accessed by other threads when the
+   * total memory usage is counted.
+   */
+  std::atomic<int64_t> mem_in_use = 0;
+  /**
+   * Number of allocated blocks. Can be negative and is atomic for the same reason as above.
+   */
+  std::atomic<int64_t> blocks_num = 0;
+  /**
+   * Amount of memory used when the peak was last updated. This is used so that we don't have to
+   * update the peak memory usage after every memory allocation. Instead it's only updated when "a
+   * lot" of new memory has been allocated. This makes the peak memory usage a little bit less
+   * accurate, but it's still good enough for practical purposes.
+   */
+  std::atomic<int64_t> mem_in_use_during_peak_update = 0;
+
+  Local();
+  ~Local();
+};
+
+/**
+ * This is a singleton that stores global data.
+ */
+struct Global {
+  /**
+   * Mutex that protects the vector below.
+   */
+  std::mutex locals_mutex;
+  /**
+   * All currently constructed #Local. This must only be accessed when the mutex above is
+   * locked. Individual threads insert and remove themselves here.
+   */
+  std::vector<Local *> locals;
+  /**
+   * Number of bytes that are not tracked by #Local. This is necessary because when a thread exits,
+   * its #Local data is freed. The memory counts stored there would be lost. The memory counts may
+   * be non-zero during thread destruction, if the thread did an unequal amount of allocations and
+   * frees (which is perfectly valid behavior as long as other threads have the responsibility to
+   * free any memory that the thread allocated).
+   *
+   * To solve this, the memory counts are added to these global counters when the thread
+   * exists. The global counters are also used when the entire process starts to exit, because the
+   * #Local data of the main thread is already destructed when the leak detection happens (during
+   * destruction of static variables which happens after destruction of threadlocals).
+   */
+  std::atomic<int64_t> mem_in_use_outside_locals = 0;
+  /**
+   * Number of blocks that are not tracked by #Local, for the same reason as above.
+   */
+  std::atomic<int64_t> blocks_num_outside_locals = 0;
+  /**
+   * Peak memory usage since the last reset.
+   */
+  std::atomic<size_t> peak = 0;
+};
+
+}  // namespace
+
+/**
+ * This is true for most of the lifetime of the program. Only when it starts exiting this becomes
+ * false indicating that global counters should be used for correctness.
+ */
+static std::atomic<bool> use_local_counters = true;
+/**
+ * When a thread allocated this amount of memory, the peak memory usage is updated. An alternative
+ * would be to update the global peak memory after every allocation, but that would cause much more
+ * overhead with little benefit.
+ */
+static constexpr int64_t peak_update_threshold = 1024 * 1024;
+
+static Global &get_global()
+{
+  static Global global;
+  return global;
+}
+
+static Local &get_local_data()
+{
+  static thread_local Local local;
+  assert(!local.destructed);
+  return local;
+}
+
+Local::Local()
+{
+  Global &global = get_global();
+  std::lock_guard lock{global.locals_mutex};
+
+  if (global.locals.empty()) {
+    /* This is the first thread creating #Local, it is therefore the main thread because it's
+     * created through #memory_usage_init. */
+    this->is_main = true;
+  }
+  /* Register self in the global list. */
+  global.locals.push_back(this);
+}
+
+Local::~Local()
+{
+  Global &global = get_global();
+  std::lock_guard lock{global.locals_mutex};
+
+  /* Unregister self from the global list. */
+  global.locals.erase(std::find(global.locals.begin(), global.locals.end(), this));
+  /* Don't forget the memory counts stored locally. */
+  global.blocks_num_outside_locals.fetch_add(this->blocks_num, std::memory_order_relaxed);
+  global.mem_in_use_outside_locals.fetch_add(this->mem_in_use, std::memory_order_relaxed);
+
+  if (this->is_main) {
+    /* The main thread started shutting down. Use global counters from now on to avoid accessing
+     * threadlocals after they have been destructed. */
+    use_local_counters.store(false, std::memory_order_relaxed);
+  }
+  /* Helps to detect when thread locals are accidentally accessed after destruction. */
+  this->destructed = true;
+}
+
+/** Check if the current memory usage is higher than the peak and update it if yes. */
+static void update_global_peak()
+{
+  Global &global = get_global();
+  /* Update peak. */
+  global.peak = std::max<size_t>(global.peak, memory_usage_current());
+
+  std::lock_guard lock{global.locals_mutex};
+
+  for (Local *local : global.locals) {
+    assert(!local->destructed);
+    /* Updating this makes sure that the peak is not updated too often, which would degrade
+     * performance. */
+    local->mem_in_use_during_peak_update = local->mem_in_use.load(std::memory_order_relaxed);
+  }
+}
+
+void memory_usage_init()
+{
+  /* Makes sure that the static and threadlocal variables on the main thread are initialized. */
+  get_local_data();
+}
+
+void memory_usage_block_alloc(const size_t size)
+{
+  if (LIKELY(use_local_counters.load(std::memory_order_relaxed))) {
+    Local &local = get_local_data();
+    /* Increase local memory counts. This does not cause thread synchronization in the majority of
+     * cases, because each thread has these counters on a separate cache line. It may only cause
+     * synchronization if another thread is computing the total current memory usage at the same
+     * time, which is very rare compared to doing allocations. */
+    local.blocks_num.fetch_add(1, std::memory_order_relaxed);
+    local.mem_in_use.fetch_add(int64_t(size), std::memory_order_relaxed);
+
+    /* If a certain amount of new memory has been allocated, update the peak. */
+    if (local.mem_in_use - local.mem_in_use_during_peak_update > peak_update_threshold) {
+      update_global_peak();
+    }
+  }
+  else {
+    Global &global = get_global();
+    /* Increase global memory counts. */
+    global.blocks_num_outside_locals.fetch_add(1, std::memory_order_relaxed);
+    global.mem_in_use_outside_locals.fetch_add(int64_t(size), std::memory_order_relaxed);
+  }
+}
+
+void memory_usage_block_free(const size_t size)
+{
+  if (LIKELY(use_local_counters)) {
+    /* Decrease local memory counts. See comment in #memory_usage_block_alloc for details regarding
+     * thread synchronization. */
+    Local &local = get_local_data();
+    local.mem_in_use.fetch_sub(int64_t(size), std::memory_order_relaxed);
+    local.blocks_num.fetch_sub(1, std::memory_order_relaxed);
+  }
+  else {
+    Global &global = get_global();
+    /* Decrease global memory counts. */
+    global.blocks_num_outside_locals.fetch_sub(1, std::memory_order_relaxed);
+    global.mem_in_use_outside_locals.fetch_sub(int64_t(size), std::memory_order_relaxed);
+  }
+}
+
+size_t memory_usage_block_num()
+{
+  Global &global = get_global();
+  std::lock_guard lock{global.locals_mutex};
+
+  /* Count the number of active blocks. */
+  int64_t blocks_num = global.blocks_num_outside_locals;
+  for (Local *local : global.locals) {
+    blocks_num += local->blocks_num;
+  }
+  return size_t(blocks_num);
+}
+
+size_t memory_usage_current()
+{
+  Global &global = get_global();
+  std::lock_guard lock{global.locals_mutex};
+
+  /* Count the memory that's currently in use. */
+  int64_t mem_in_use = global.mem_in_use_outside_locals;
+  for (Local *local : global.locals) {
+    mem_in_use += local->mem_in_use;
+  }
+  return size_t(mem_in_use);
+}
+
+/**
+ * Get the approximate peak memory usage since the last call to #memory_usage_peak_reset.
+ * This is approximate, because the peak usage is not updated after every allocation (see
+ * #peak_update_threshold).
+ *
+ * In the worst case, the peak memory usage is underestimated by
+ * `peak_update_threshold * #threads`. After large allocations (larger than the threshold), the
+ * peak usage is always updated so those allocations will always be taken into account.
+ */
+size_t memory_usage_peak()
+{
+  update_global_peak();
+  Global &global = get_global();
+  return global.peak;
+}
+
+void memory_usage_peak_reset()
+{
+  Global &global = get_global();
+  global.peak = memory_usage_current();
+}
--- a/intern/locale/boost_locale_wrapper.cpp
+++ b/intern/locale/boost_locale_wrapper.cpp
@ -6,6 +6,7 @@
 */

 #include <boost/locale.hpp>
+#include <iostream>
 #include <stdio.h>

 #include "boost_locale_wrapper.h"
--- a/release/scripts/presets/keyconfig/keymap_data/blender_default.py
+++ b/release/scripts/presets/keyconfig/keymap_data/blender_default.py
@ -5050,30 +5050,33 @@ def km_sculpt(params):
        # Expand
        ("sculpt.expand", {"type": 'A', "value": 'PRESS', "shift": True},
         {"properties": [
-            ("target", "MASK"),
-            ("falloff_type", "GEODESIC"),
-            ("invert", True),
-            ("use_auto_mask", True),
-            ("use_mask_preserve" , True)]}),
+             ("target", "MASK"),
+             ("falloff_type", "GEODESIC"),
+             ("invert", True),
+             ("use_auto_mask", True),
+             ("use_mask_preserve", True),
+         ]}),
        ("sculpt.expand", {"type": 'A', "value": 'PRESS', "shift": True, "alt": True},
         {"properties": [
-            ("target", "MASK"),
-            ("falloff_type", "NORMALS"),
-            ("invert", False),
-            ("use_mask_preserve" , True)]}),
+             ("target", "MASK"),
+             ("falloff_type", "NORMALS"),
+             ("invert", False),
+             ("use_mask_preserve", True),
+         ]}),
        ("sculpt.expand", {"type": 'W', "value": 'PRESS', "shift": True},
         {"properties": [
             ("target", "FACE_SETS"),
             ("falloff_type", "GEODESIC"),
             ("invert", False),
-             ("use_mask_preserve" , False),
-             ("use_modify_active", False)]}),
+             ("use_mask_preserve", False),
+             ("use_modify_active", False),
+         ]}),
        ("sculpt.expand", {"type": 'W', "value": 'PRESS', "shift": True, "alt": True},
         {"properties": [
             ("target", "FACE_SETS"),
             ("falloff_type", "BOUNDARY_FACE_SET"),
             ("invert", False),
-             ("use_mask_preserve" , False),
+             ("use_mask_preserve", False),
             ("use_modify_active", True),
         ]}),
        # Partial Visibility Show/hide
--- a/release/scripts/startup/bl_ui/properties_animviz.py
+++ b/release/scripts/startup/bl_ui/properties_animviz.py
@ -57,19 +57,19 @@ class MotionPathButtonsPanel:
            # Update Selected.
            col = layout.column(align=True)
            row = col.row(align=True)
-            row.operator(f"{op_category}.paths_update", text="Update Path", icon=icon)
-            row.operator(f"{op_category}.paths_clear", text="", icon='X').only_selected = True
+            row.operator(op_category + ".paths_update", text="Update Path", icon=icon)
+            row.operator(op_category + ".paths_clear", text="", icon='X').only_selected = True
        else:
            # Calculate.
            col = layout.column(align=True)
            col.label(text="Nothing to show yet...", icon='ERROR')
-            col.operator(f"{op_category}.paths_calculate", text="Calculate...", icon=icon)
+            col.operator(op_category + ".paths_calculate", text="Calculate...", icon=icon)

        # Update All & Clear All.
        # Note that 'col' is from inside the preceeding `if` or `else` block.
        row = col.row(align=True)
        row.operator("object.paths_update_visible", text="Update All Paths", icon='WORLD')
-        row.operator(f"{op_category}.paths_clear", text="", icon='X').only_selected = False
+        row.operator(op_category + ".paths_clear", text="", icon='X').only_selected = False


 class MotionPathButtonsPanel_display:
--- a/release/scripts/startup/bl_ui/space_node.py
+++ b/release/scripts/startup/bl_ui/space_node.py
@ -318,7 +318,9 @@ class NODE_MT_node(Menu):

        layout.separator()
        layout.operator("node.clipboard_copy", text="Copy")
-        layout.operator("node.clipboard_paste", text="Paste")
+        row = layout.row()
+        row.operator_context = 'EXEC_DEFAULT'
+        row.operator("node.clipboard_paste", text="Paste")
        layout.operator("node.duplicate_move")
        layout.operator("node.duplicate_move_linked")
        layout.operator("node.delete")
--- a/release/scripts/startup/bl_ui/space_view3d.py
+++ b/release/scripts/startup/bl_ui/space_view3d.py
@ -723,8 +723,18 @@ class VIEW3D_HT_header(Header):

                row = layout.row(align=True)
                domain = curves.selection_domain
-                row.operator("curves.set_selection_domain", text="", icon='CURVE_BEZCIRCLE', depress=(domain == 'POINT')).domain = 'POINT'
-                row.operator("curves.set_selection_domain", text="", icon='CURVE_PATH', depress=(domain == 'CURVE')).domain = 'CURVE'
+                row.operator(
+                    "curves.set_selection_domain",
+                    text="",
+                    icon='CURVE_BEZCIRCLE',
+                    depress=(domain == 'POINT'),
+                ).domain = 'POINT'
+                row.operator(
+                    "curves.set_selection_domain",
+                    text="",
+                    icon='CURVE_PATH',
+                    depress=(domain == 'CURVE'),
+                ).domain = 'CURVE'

        # Grease Pencil
        if obj and obj.type == 'GPENCIL' and context.gpencil_data:
--- a/source/blender/blenkernel/BKE_blender_version.h
+++ b/source/blender/blenkernel/BKE_blender_version.h
@ -25,7 +25,7 @@ extern "C" {

 /* Blender file format version. */
 #define BLENDER_FILE_VERSION BLENDER_VERSION
-#define BLENDER_FILE_SUBVERSION 6
+#define BLENDER_FILE_SUBVERSION 7

 /* Minimum Blender version that supports reading file written with the current
 * version. Older Blender versions will test this and show a warning if the file
--- a/source/blender/blenkernel/BKE_curves.hh
+++ b/source/blender/blenkernel/BKE_curves.hh
@ -287,11 +287,6 @@ class CurvesGeometry : public ::CurvesGeometry {
  Span<float2> surface_uv_coords() const;
  MutableSpan<float2> surface_uv_coords_for_write();

-  VArray<float> selection_point_float() const;
-  MutableSpan<float> selection_point_float_for_write();
-  VArray<float> selection_curve_float() const;
-  MutableSpan<float> selection_curve_float_for_write();
-
  /**
   * Calculate the largest and smallest position values, only including control points
   * (rather than evaluated points). The existing values of `min` and `max` are taken into account.
--- a/source/blender/blenkernel/BKE_node.h
+++ b/source/blender/blenkernel/BKE_node.h
@ -713,6 +713,11 @@ bNode *node_copy_with_mapping(bNodeTree *dst_tree,

 bNode *node_copy(bNodeTree *dst_tree, const bNode &src_node, int flag, bool use_unique);

+/**
+ * Free the node itself.
+ *
+ * \note ID user reference-counting and changing the `nodes_by_id` vector are up to the caller.
+ */
 void node_free_node(bNodeTree *tree, bNode *node);

 }  // namespace blender::bke
--- a/source/blender/blenkernel/BKE_node_runtime.hh
+++ b/source/blender/blenkernel/BKE_node_runtime.hh
@ -169,7 +169,10 @@ class bNodeSocketRuntime : NonCopyable, NonMovable {
  float locx = 0;
  float locy = 0;

-  /* Runtime-only cache of the number of input links, for multi-input sockets. */
+  /**
+   * Runtime-only cache of the number of input links, for multi-input sockets,
+   * including dragged node links that aren't actually in the tree.
+   */
  short total_inputs = 0;

  /** Only valid when #topology_cache_is_dirty is false. */
@ -652,6 +655,11 @@ inline bool bNodeLink::is_available() const
  return this->fromsock->is_available() && this->tosock->is_available();
 }

+inline bool bNodeLink::is_used() const
+{
+  return !this->is_muted() && this->is_available();
+}
+
 /** \} */

 /* -------------------------------------------------------------------- */
@ -670,6 +678,20 @@ inline int bNodeSocket::index_in_tree() const
  return this->runtime->index_in_all_sockets;
 }

+inline int bNodeSocket::index_in_all_inputs() const
+{
+  BLI_assert(blender::bke::node_tree_runtime::topology_cache_is_available(*this));
+  BLI_assert(this->is_input());
+  return this->runtime->index_in_inout_sockets;
+}
+
+inline int bNodeSocket::index_in_all_outputs() const
+{
+  BLI_assert(blender::bke::node_tree_runtime::topology_cache_is_available(*this));
+  BLI_assert(this->is_output());
+  return this->runtime->index_in_inout_sockets;
+}
+
 inline bool bNodeSocket::is_hidden() const
 {
  return (this->flag & SOCK_HIDDEN) != 0;
--- a/source/blender/blenkernel/CMakeLists.txt
+++ b/source/blender/blenkernel/CMakeLists.txt
@ -452,6 +452,7 @@ set(SRC
  BKE_pbvh_pixels.hh
  BKE_pointcache.h
  BKE_pointcloud.h
+  BKE_pose_backup.h
  BKE_preferences.h
  BKE_report.h
  BKE_rigidbody.h
--- a/source/blender/blenkernel/intern/action.c
+++ b/source/blender/blenkernel/intern/action.c
@ -108,7 +108,7 @@ static void action_copy_data(Main *UNUSED(bmain), ID *id_dst, const ID *id_src,
    /* Duplicate F-Curve. */

    /* XXX TODO: pass subdata flag?
-     * But surprisingly does not seem to be doing any ID refcounting... */
+     * But surprisingly does not seem to be doing any ID reference-counting. */
    fcurve_dst = BKE_fcurve_copy(fcurve_src);

    BLI_addtail(&action_dst->curves, fcurve_dst);
--- a/source/blender/blenkernel/intern/blendfile.c
+++ b/source/blender/blenkernel/intern/blendfile.c
@ -416,9 +416,9 @@ static void setup_app_data(bContext *C,
     * means that we do not reset their user count, however we do increase that one when doing
     * lib_link on local IDs using linked ones.
     * There is no real way to predict amount of changes here, so we have to fully redo
-     * refcounting.
-     * Now that we re-use (and do not liblink in readfile.c) most local datablocks as well, we have
-     * to recompute refcount for all local IDs too. */
+     * reference-counting.
+     * Now that we re-use (and do not liblink in readfile.c) most local data-blocks as well,
+     * we have to recompute reference-counts for all local IDs too. */
    BKE_main_id_refcount_recompute(bmain, false);
  }

--- a/source/blender/blenkernel/intern/cloth.cc
+++ b/source/blender/blenkernel/intern/cloth.cc
@ -1464,9 +1464,9 @@ static bool cloth_build_springs(ClothModifierData *clmd, Mesh *mesh)
  Cloth *cloth = clmd->clothObject;
  ClothSpring *spring = nullptr, *tspring = nullptr, *tspring2 = nullptr;
  uint struct_springs = 0, shear_springs = 0, bend_springs = 0, struct_springs_real = 0;
-  uint mvert_num = (uint)mesh->totvert;
+  uint mvert_num = uint(mesh->totvert);
  uint numedges = uint(mesh->totedge);
-  uint numpolys = (uint)mesh->totpoly;
+  uint numpolys = uint(mesh->totpoly);
  float shrink_factor;
  const MEdge *medge = BKE_mesh_edges(mesh);
  const MPoly *mpoly = BKE_mesh_polys(mesh);
@ -1652,7 +1652,7 @@ static bool cloth_build_springs(ClothModifierData *clmd, Mesh *mesh)
  for (int i = 0; i < mvert_num; i++) {
    if (cloth->verts[i].spring_count > 0) {
      cloth->verts[i].avg_spring_len = cloth->verts[i].avg_spring_len * 0.49f /
-                                       (float(cloth->verts[i].spring_count));
+                                       float(cloth->verts[i].spring_count);
    }
  }

--- a/source/blender/blenkernel/intern/context.cc
+++ b/source/blender/blenkernel/intern/context.cc
@ -1511,8 +1511,8 @@ Depsgraph *CTX_data_expect_evaluated_depsgraph(const bContext *C)
 {
  Depsgraph *depsgraph = CTX_data_depsgraph_pointer(C);
  /* TODO(sergey): Assert that the dependency graph is fully evaluated.
-   * Note that first the depsgraph and scene post-eval hooks needs to run extra round of updates
-   * first to make check here really reliable. */
+   * Note that first the depsgraph and scene post-evaluation hooks needs to run extra round of
+   * updates first to make check here really reliable. */
  return depsgraph;
 }

--- a/source/blender/blenkernel/intern/curves_geometry.cc
+++ b/source/blender/blenkernel/intern/curves_geometry.cc
@ -38,8 +38,6 @@ static const std::string ATTR_HANDLE_POSITION_RIGHT = "handle_right";
 static const std::string ATTR_NURBS_ORDER = "nurbs_order";
 static const std::string ATTR_NURBS_WEIGHT = "nurbs_weight";
 static const std::string ATTR_NURBS_KNOTS_MODE = "knots_mode";
-static const std::string ATTR_SELECTION_POINT_FLOAT = ".selection_point_float";
-static const std::string ATTR_SELECTION_CURVE_FLOAT = ".selection_curve_float";
 static const std::string ATTR_SURFACE_UV_COORDINATE = "surface_uv_coordinate";

 /* -------------------------------------------------------------------- */
@ -433,26 +431,6 @@ MutableSpan<float2> CurvesGeometry::surface_uv_coords_for_write()
  return get_mutable_attribute<float2>(*this, ATTR_DOMAIN_CURVE, ATTR_SURFACE_UV_COORDINATE);
 }

-VArray<float> CurvesGeometry::selection_point_float() const
-{
-  return get_varray_attribute<float>(*this, ATTR_DOMAIN_POINT, ATTR_SELECTION_POINT_FLOAT, 1.0f);
-}
-
-MutableSpan<float> CurvesGeometry::selection_point_float_for_write()
-{
-  return get_mutable_attribute<float>(*this, ATTR_DOMAIN_POINT, ATTR_SELECTION_POINT_FLOAT, 1.0f);
-}
-
-VArray<float> CurvesGeometry::selection_curve_float() const
-{
-  return get_varray_attribute<float>(*this, ATTR_DOMAIN_CURVE, ATTR_SELECTION_CURVE_FLOAT, 1.0f);
-}
-
-MutableSpan<float> CurvesGeometry::selection_curve_float_for_write()
-{
-  return get_mutable_attribute<float>(*this, ATTR_DOMAIN_CURVE, ATTR_SELECTION_CURVE_FLOAT, 1.0f);
-}
-
 /** \} */

 /* -------------------------------------------------------------------- */
--- a/source/blender/blenkernel/intern/geometry_component_mesh.cc
+++ b/source/blender/blenkernel/intern/geometry_component_mesh.cc
@ -971,9 +971,11 @@ class VArrayImpl_For_VertexWeights final : public VMutableArrayImpl<float> {

  void set_all(Span<float> src) override
  {
-    for (const int64_t index : src.index_range()) {
-      this->set(index, src[index]);
-    }
+    threading::parallel_for(src.index_range(), 4096, [&](const IndexRange range) {
+      for (const int64_t i : range) {
+        this->set(i, src[i]);
+      }
+    });
  }

  void materialize(IndexMask mask, MutableSpan<float> r_span) const override
@ -981,14 +983,16 @@ class VArrayImpl_For_VertexWeights final : public VMutableArrayImpl<float> {
    if (dverts_ == nullptr) {
      return r_span.fill_indices(mask, 0.0f);
    }
-    for (const int64_t index : mask) {
-      if (const MDeformWeight *weight = this->find_weight_at_index(index)) {
-        r_span[index] = weight->weight;
+    threading::parallel_for(mask.index_range(), 4096, [&](const IndexRange range) {
+      for (const int64_t i : mask.slice(range)) {
+        if (const MDeformWeight *weight = this->find_weight_at_index(i)) {
+          r_span[i] = weight->weight;
+        }
+        else {
+          r_span[i] = 0.0f;
+        }
      }
-      else {
-        r_span[index] = 0.0f;
-      }
-    }
+    });
  }

  void materialize_to_uninitialized(IndexMask mask, MutableSpan<float> r_span) const override
@ -1091,15 +1095,18 @@ class VertexGroupsAttributeProvider final : public DynamicAttributesProvider {
      return true;
    }

-    for (MDeformVert &dvert : mesh->deform_verts_for_write()) {
-      MDeformWeight *weight = BKE_defvert_find_index(&dvert, index);
-      BKE_defvert_remove_group(&dvert, weight);
-      for (MDeformWeight &weight : MutableSpan(dvert.dw, dvert.totweight)) {
-        if (weight.def_nr > index) {
-          weight.def_nr--;
+    MutableSpan<MDeformVert> dverts = mesh->deform_verts_for_write();
+    threading::parallel_for(dverts.index_range(), 1024, [&](IndexRange range) {
+      for (MDeformVert &dvert : dverts.slice(range)) {
+        MDeformWeight *weight = BKE_defvert_find_index(&dvert, index);
+        BKE_defvert_remove_group(&dvert, weight);
+        for (MDeformWeight &weight : MutableSpan(dvert.dw, dvert.totweight)) {
+          if (weight.def_nr > index) {
+            weight.def_nr--;
+          }
        }
      }
-    }
+    });
    return true;
  }

--- a/source/blender/blenkernel/intern/key.cc
+++ b/source/blender/blenkernel/intern/key.cc
@ -732,7 +732,7 @@ static void cp_key(const int start,

    if (flagflo) {
      ktot += start * kd;
-      a = (int)floor(ktot);
+      a = int(floor(ktot));
      if (a) {
        ktot -= a;
        k1 += a * key->elemsize;
@ -1078,7 +1078,7 @@ static void do_key(const int start,
    if (flagdo & 1) {
      if (flagflo & 1) {
        k1tot += start * k1d;
-        a = (int)floor(k1tot);
+        a = int(floor(k1tot));
        if (a) {
          k1tot -= a;
          k1 += a * key->elemsize;
@ -1091,7 +1091,7 @@ static void do_key(const int start,
    if (flagdo & 2) {
      if (flagflo & 2) {
        k2tot += start * k2d;
-        a = (int)floor(k2tot);
+        a = int(floor(k2tot));
        if (a) {
          k2tot -= a;
          k2 += a * key->elemsize;
@ -1104,7 +1104,7 @@ static void do_key(const int start,
    if (flagdo & 4) {
      if (flagflo & 4) {
        k3tot += start * k3d;
-        a = (int)floor(k3tot);
+        a = int(floor(k3tot));
        if (a) {
          k3tot -= a;
          k3 += a * key->elemsize;
@ -1117,7 +1117,7 @@ static void do_key(const int start,
    if (flagdo & 8) {
      if (flagflo & 8) {
        k4tot += start * k4d;
-        a = (int)floor(k4tot);
+        a = int(floor(k4tot));
        if (a) {
          k4tot -= a;
          k4 += a * key->elemsize;
@ -1660,7 +1660,7 @@ int BKE_keyblock_element_count(const Key *key)

 size_t BKE_keyblock_element_calc_size_from_shape(const Key *key, const int shape_index)
 {
-  return (size_t)BKE_keyblock_element_count_from_shape(key, shape_index) * key->elemsize;
+  return size_t(BKE_keyblock_element_count_from_shape(key, shape_index)) * key->elemsize;
 }

 size_t BKE_keyblock_element_calc_size(const Key *key)
--- a/source/blender/blenkernel/intern/layer.cc
+++ b/source/blender/blenkernel/intern/layer.cc
@ -1057,7 +1057,7 @@ static void layer_collection_objects_sync(ViewLayer *view_layer,
    }

    /* Holdout and indirect only */
-    if ((layer->flag & LAYER_COLLECTION_HOLDOUT)) {
+    if (layer->flag & LAYER_COLLECTION_HOLDOUT) {
      base->flag_from_collection |= BASE_HOLDOUT;
    }
    if (layer->flag & LAYER_COLLECTION_INDIRECT_ONLY) {
--- a/source/blender/blenkernel/intern/lib_id.c
+++ b/source/blender/blenkernel/intern/lib_id.c
@ -322,8 +322,8 @@ void id_us_min(ID *id)

    if (id->us <= limit) {
      if (!ID_TYPE_IS_DEPRECATED(GS(id->name))) {
-        /* Do not assert on deprecated ID types, we cannot really ensure that their ID refcounting
-         * is valid... */
+        /* Do not assert on deprecated ID types, we cannot really ensure that their ID
+         * reference-counting is valid. */
        CLOG_ERROR(&LOG,
                   "ID user decrement error: %s (from '%s'): %d <= %d",
                   id->name,
--- a/source/blender/blenkernel/intern/lib_query.c
+++ b/source/blender/blenkernel/intern/lib_query.c
@ -261,7 +261,7 @@ static bool library_foreach_ID_link(Main *bmain,
     * (the node tree), but re-use those generated for the 'owner' ID (the material). */
    if (inherit_data == NULL) {
      data.cb_flag = ID_IS_LINKED(id) ? IDWALK_CB_INDIRECT_USAGE : 0;
-      /* When an ID is defined as not refcounting its ID usages, it should never do it. */
+      /* When an ID is defined as not reference-counting its ID usages, it should never do it. */
      data.cb_flag_clear = (id->tag & LIB_TAG_NO_USER_REFCOUNT) ?
                               IDWALK_CB_USER | IDWALK_CB_USER_ONE :
                               0;
--- a/source/blender/blenkernel/intern/mesh_convert.cc
+++ b/source/blender/blenkernel/intern/mesh_convert.cc
@ -1046,7 +1046,7 @@ Mesh *BKE_mesh_new_from_object_to_bmain(Main *bmain,
   * everything is only allowed to reference original data-blocks.
   *
   * Note that user-count updates has to be done *after* mesh has been transferred to Main database
-   * (since doing refcounting on non-Main IDs is forbidden). */
+   * (since doing reference-counting on non-Main IDs is forbidden). */
  BKE_library_foreach_ID_link(
      nullptr, &mesh->id, foreach_libblock_make_original_callback, nullptr, IDWALK_NOP);

--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@ -928,7 +928,7 @@ static void loop_manifold_fan_around_vert_next(const Span<int> corner_verts,
  const uint vert_fan_next = corner_verts[*r_mlfan_curr_index];
  const MPoly &mpfan_next = polys[*r_mpfan_curr_index];
  if ((vert_fan_orig == vert_fan_next && vert_fan_orig == mv_pivot_index) ||
-      (vert_fan_orig != vert_fan_next && vert_fan_orig != mv_pivot_index)) {
+      (!ELEM(vert_fan_orig, vert_fan_next, mv_pivot_index))) {
    /* We need the previous loop, but current one is our vertex's loop. */
    *r_mlfan_vert_index = *r_mlfan_curr_index;
    if (--(*r_mlfan_curr_index) < mpfan_next.loopstart) {
--- a/source/blender/blenkernel/intern/mesh_remap.cc
+++ b/source/blender/blenkernel/intern/mesh_remap.cc
@ -1593,7 +1593,7 @@ void BKE_mesh_remap_calc_loops_from_mesh(const int mode,
        }
      }

-      if ((size_t)mp_dst->totloop > islands_res_buff_size) {
+      if (size_t(mp_dst->totloop) > islands_res_buff_size) {
        islands_res_buff_size = size_t(mp_dst->totloop) + MREMAP_DEFAULT_BUFSIZE;
        for (tindex = 0; tindex < num_trees; tindex++) {
          islands_res[tindex] = static_cast<IslandResult *>(
@ -2256,7 +2256,7 @@ void BKE_mesh_remap_calc_polys_from_mesh(const int mode,
       */
      RNG *rng = BLI_rng_new(0);

-      const size_t numpolys_src = (size_t)me_src->totpoly;
+      const size_t numpolys_src = size_t(me_src->totpoly);

      /* Here it's simpler to just allocate for all polys :/ */
      int *indices = static_cast<int *>(MEM_mallocN(sizeof(*indices) * numpolys_src, __func__));
--- a/source/blender/blenkernel/intern/node.cc
+++ b/source/blender/blenkernel/intern/node.cc
@ -2952,11 +2952,6 @@ void nodeRebuildIDVector(bNodeTree *node_tree)

 namespace blender::bke {

-/**
- * Free the node itself.
- *
- * \note: ID user refcounting and changing the `nodes_by_id` vector are up to the caller.
- */
 void node_free_node(bNodeTree *ntree, bNode *node)
 {
  /* since it is called while free database, node->id is undefined */
@ -3031,7 +3026,7 @@ void ntreeFreeLocalNode(bNodeTree *ntree, bNode *node)
 void nodeRemoveNode(Main *bmain, bNodeTree *ntree, bNode *node, bool do_id_user)
 {
  /* This function is not for localized node trees, we do not want
-   * do to ID user refcounting and removal of animdation data then. */
+   * do to ID user reference-counting and removal of animdation data then. */
  BLI_assert((ntree->id.tag & LIB_TAG_LOCALIZED) == 0);

  bool node_has_id = false;
--- a/source/blender/blenkernel/intern/object.cc
+++ b/source/blender/blenkernel/intern/object.cc
@ -2891,6 +2891,7 @@ void BKE_object_obdata_size_init(struct Object *ob, const float size)
    case OB_LAMP: {
      Light *lamp = (Light *)ob->data;
      lamp->dist *= size;
+      lamp->radius *= size;
      lamp->area_size *= size;
      lamp->area_sizey *= size;
      lamp->area_sizez *= size;
--- a/source/blender/blenkernel/intern/pbvh_uv_islands.cc
+++ b/source/blender/blenkernel/intern/pbvh_uv_islands.cc
@ -36,7 +36,7 @@ MeshUVVert *MeshPrimitive::get_other_uv_vertex(const MeshVertex *v1, const MeshV
  BLI_assert(vertices[0].vertex == v1 || vertices[1].vertex == v1 || vertices[2].vertex == v1);
  BLI_assert(vertices[0].vertex == v2 || vertices[1].vertex == v2 || vertices[2].vertex == v2);
  for (MeshUVVert &uv_vertex : vertices) {
-    if (uv_vertex.vertex != v1 && uv_vertex.vertex != v2) {
+    if (!ELEM(uv_vertex.vertex, v1, v2)) {
      return &uv_vertex;
    }
  }
--- a/source/blender/blenlib/BLI_array_utils.hh
+++ b/source/blender/blenlib/BLI_array_utils.hh
@ -112,4 +112,6 @@ inline void gather(const VArray<T> &src,
  });
 }

+void invert_booleans(MutableSpan<bool> span);
+
 }  // namespace blender::array_utils
--- a/source/blender/blenlib/intern/array_utils.cc
+++ b/source/blender/blenlib/intern/array_utils.cc
@ -33,4 +33,13 @@ void gather(const GSpan src, const IndexMask indices, GMutableSpan dst, const in
  gather(GVArray::ForSpan(src), indices, dst, grain_size);
 }

+void invert_booleans(MutableSpan<bool> span)
+{
+  threading::parallel_for(span.index_range(), 4096, [&](IndexRange range) {
+    for (const int i : range) {
+      span[i] = !span[i];
+    }
+  });
+}
+
 }  // namespace blender::array_utils
--- a/source/blender/blenloader/intern/versioning_280.c
+++ b/source/blender/blenloader/intern/versioning_280.c
@ -1782,12 +1782,6 @@ void blo_do_versions_280(FileData *fd, Library *UNUSED(lib), Main *bmain)
  }

  if (!MAIN_VERSION_ATLEAST(bmain, 280, 1)) {
-    if (!DNA_struct_elem_find(fd->filesdna, "Lamp", "float", "bleedexp")) {
-      for (Light *la = bmain->lights.first; la; la = la->id.next) {
-        la->bleedexp = 2.5f;
-      }
-    }
-
    if (!DNA_struct_elem_find(fd->filesdna, "GPUDOFSettings", "float", "ratio")) {
      for (Camera *ca = bmain->cameras.first; ca; ca = ca->id.next) {
        ca->gpu_dof.ratio = 1.0f;
@ -1820,7 +1814,6 @@ void blo_do_versions_280(FileData *fd, Library *UNUSED(lib), Main *bmain)
      for (Light *la = bmain->lights.first; la; la = la->id.next) {
        la->contact_dist = 0.2f;
        la->contact_bias = 0.03f;
-        la->contact_spread = 0.2f;
        la->contact_thickness = 0.2f;
      }
    }
--- a/source/blender/blenloader/intern/versioning_300.cc
+++ b/source/blender/blenloader/intern/versioning_300.cc
@ -28,6 +28,7 @@
 #include "DNA_curves_types.h"
 #include "DNA_genfile.h"
 #include "DNA_gpencil_modifier_types.h"
+#include "DNA_light_types.h"
 #include "DNA_lineart_types.h"
 #include "DNA_listBase.h"
 #include "DNA_mask_types.h"
@ -50,6 +51,7 @@
 #include "BKE_collection.h"
 #include "BKE_colortools.h"
 #include "BKE_curve.h"
+#include "BKE_curves.hh"
 #include "BKE_data_transfer.h"
 #include "BKE_deform.h"
 #include "BKE_fcurve.h"
@ -3789,7 +3791,7 @@ void blo_do_versions_300(FileData *fd, Library * /*lib*/, Main *bmain)
    LISTBASE_FOREACH (MovieClip *, clip, &bmain->movieclips) {
      MovieTracking *tracking = &clip->tracking;

-      const float frame_center_x = (float(clip->lastsize[0])) / 2;
+      const float frame_center_x = float(clip->lastsize[0]) / 2;
      const float frame_center_y = float(clip->lastsize[1]) / 2;

      tracking->camera.principal_point[0] = (tracking->camera.principal_legacy[0] -
@ -3828,13 +3830,20 @@ void blo_do_versions_300(FileData *fd, Library * /*lib*/, Main *bmain)
        LISTBASE_FOREACH (SpaceLink *, sl, &area->spacedata) {
          if (sl->spacetype == SPACE_VIEW3D) {
            View3D *v3d = (View3D *)sl;
-            v3d->overlay.flag |= (int)(V3D_OVERLAY_SCULPT_SHOW_MASK |
-                                       V3D_OVERLAY_SCULPT_SHOW_FACE_SETS);
+            v3d->overlay.flag |= int(V3D_OVERLAY_SCULPT_SHOW_MASK |
+                                     V3D_OVERLAY_SCULPT_SHOW_FACE_SETS);
          }
        }
      }
    }
  }
+
+  if (!MAIN_VERSION_ATLEAST(bmain, 305, 7)) {
+    LISTBASE_FOREACH (Light *, light, &bmain->lights) {
+      light->radius = light->area_size;
+    }
+  }
+
  /**
   * Versioning code until next subversion bump goes here.
   *
@ -3850,5 +3859,9 @@ void blo_do_versions_300(FileData *fd, Library * /*lib*/, Main *bmain)
    LISTBASE_FOREACH (Curves *, curves_id, &bmain->hair_curves) {
      curves_id->flag &= ~CV_SCULPT_SELECTION_ENABLED;
    }
+    LISTBASE_FOREACH (Curves *, curves_id, &bmain->hair_curves) {
+      BKE_id_attribute_rename(&curves_id->id, ".selection_point_float", ".selection", nullptr);
+      BKE_id_attribute_rename(&curves_id->id, ".selection_curve_float", ".selection", nullptr);
+    }
  }
 }
--- a/source/blender/bmesh/intern/bmesh_mesh_normals.cc
+++ b/source/blender/bmesh/intern/bmesh_mesh_normals.cc
@ -919,7 +919,7 @@ static void bm_mesh_loops_calc_normals_for_vert_with_clnors(BMesh *bm,
        BLI_linklist_prepend_alloca(&loops_of_vert, l_curr);
        loops_of_vert_count += 1;

-        const uint index_test = (uint)BM_elem_index_get(l_curr);
+        const uint index_test = uint(BM_elem_index_get(l_curr));
        if (index_best > index_test) {
          index_best = index_test;
          link_best = loops_of_vert;
--- a/source/blender/compositor/nodes/COM_OutputFileNode.cc
+++ b/source/blender/compositor/nodes/COM_OutputFileNode.cc
@ -104,7 +104,13 @@ void OutputFileNode::convert_to_operations(NodeConverter &converter,
        char path[FILE_MAX];

        /* combine file path for the input */
-        BLI_path_join(path, FILE_MAX, storage->base_path, sockdata->path);
+        if (sockdata->path[0]) {
+          BLI_path_join(path, FILE_MAX, storage->base_path, sockdata->path);
+        }
+        else {
+          BLI_strncpy(path, storage->base_path, FILE_MAX);
+          BLI_path_slash_ensure(path, FILE_MAX);
+        }

        NodeOperation *output_operation = nullptr;

--- a/source/blender/draw/engines/eevee/eevee_lights.c
+++ b/source/blender/draw/engines/eevee/eevee_lights.c
@ -45,7 +45,7 @@ static void light_shape_parameters_set(EEVEE_Light *evli, const Light *la, const
    evli->sizey = scale[1] / scale[2];
    evli->spotsize = cosf(la->spotsize * 0.5f);
    evli->spotblend = (1.0f - evli->spotsize) * la->spotblend;
-    evli->radius = max_ff(0.001f, la->area_size);
+    evli->radius = max_ff(0.001f, la->radius);
  }
  else if (la->type == LA_AREA) {
    evli->sizex = max_ff(0.003f, la->area_size * scale[0] * 0.5f);
@ -62,7 +62,7 @@ static void light_shape_parameters_set(EEVEE_Light *evli, const Light *la, const
    evli->radius = max_ff(0.001f, tanf(min_ff(la->sun_angle, DEG2RADF(179.9f)) / 2.0f));
  }
  else {
-    evli->radius = max_ff(0.001f, la->area_size);
+    evli->radius = max_ff(0.001f, la->radius);
  }
 }

--- a/source/blender/draw/engines/eevee_next/eevee_light.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_light.cc
@ -178,7 +178,7 @@ void Light::shape_parameters_set(const ::Light *la, const float scale[3])
      _area_size_x = tanf(min_ff(la->sun_angle, DEG2RADF(179.9f)) / 2.0f);
    }
    else {
-      _area_size_x = la->area_size;
+      _area_size_x = la->radius;
    }
    _area_size_y = _area_size_x = max_ff(0.001f, _area_size_x);
    radius_squared = square_f(_area_size_x);
--- a/source/blender/draw/engines/overlay/overlay_engine.cc
+++ b/source/blender/draw/engines/overlay/overlay_engine.cc
@ -92,10 +92,10 @@ static void OVERLAY_engine_init(void *vedata)
  }

  if (ts->sculpt) {
-    if (!(v3d->overlay.flag & (int)V3D_OVERLAY_SCULPT_SHOW_FACE_SETS)) {
+    if (!(v3d->overlay.flag & int(V3D_OVERLAY_SCULPT_SHOW_FACE_SETS))) {
      pd->overlay.sculpt_mode_face_sets_opacity = 0.0f;
    }
-    if (!(v3d->overlay.flag & (int)V3D_OVERLAY_SCULPT_SHOW_MASK)) {
+    if (!(v3d->overlay.flag & int(V3D_OVERLAY_SCULPT_SHOW_MASK))) {
      pd->overlay.sculpt_mode_mask_opacity = 0.0f;
    }
  }
--- a/source/blender/draw/engines/overlay/overlay_extra.cc
+++ b/source/blender/draw/engines/overlay/overlay_extra.cc
@ -637,7 +637,7 @@ void OVERLAY_light_cache_populate(OVERLAY_Data *vedata, Object *ob)
  DRW_buffer_add_entry(cb->groundline, instdata.pos);

  if (la->type == LA_LOCAL) {
-    instdata.area_size_x = instdata.area_size_y = la->area_size;
+    instdata.area_size_x = instdata.area_size_y = la->radius;
    DRW_buffer_add_entry(cb->light_point, color, &instdata);
  }
  else if (la->type == LA_SUN) {
@ -661,7 +661,7 @@ void OVERLAY_light_cache_populate(OVERLAY_Data *vedata, Object *ob)
    instdata.spot_blend = sqrtf((-a - c * a) / (c - c * a));
    instdata.spot_cosine = a;
    /* HACK: We pack the area size in alpha color. This is decoded by the shader. */
-    color[3] = -max_ff(la->area_size, FLT_MIN);
+    color[3] = -max_ff(la->radius, FLT_MIN);
    DRW_buffer_add_entry(cb->light_spot, color, &instdata);

    if ((la->mode & LA_SHOW_CONE) && !DRW_state_is_select()) {
--- a/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc
+++ b/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc
@ -10,6 +10,7 @@
 #include "draw_cache_impl.h"
 #include "overlay_private.hh"

+#include "BKE_attribute.hh"
 #include "BKE_curves.hh"

 void OVERLAY_sculpt_curves_cache_init(OVERLAY_Data *vedata)
@ -31,18 +32,11 @@ void OVERLAY_sculpt_curves_cache_init(OVERLAY_Data *vedata)

 static bool everything_selected(const Curves &curves_id)
 {
-  const blender::bke::CurvesGeometry &curves = blender::bke::CurvesGeometry::wrap(
-      curves_id.geometry);
-  blender::VArray<float> selection;
-  switch (curves_id.selection_domain) {
-    case ATTR_DOMAIN_POINT:
-      selection = curves.selection_point_float();
-      break;
-    case ATTR_DOMAIN_CURVE:
-      selection = curves.selection_curve_float();
-      break;
-  }
-  return selection.is_single() && selection.get_internal_single() == 1.0f;
+  using namespace blender;
+  const bke::CurvesGeometry &curves = bke::CurvesGeometry::wrap(curves_id.geometry);
+  const VArray<bool> selection = curves.attributes().lookup_or_default<bool>(
+      ".selection", ATTR_DOMAIN_POINT, true);
+  return selection.is_single() && selection.get_internal_single();
 }

 void OVERLAY_sculpt_curves_cache_populate(OVERLAY_Data *vedata, Object *object)
@ -56,12 +50,9 @@ void OVERLAY_sculpt_curves_cache_populate(OVERLAY_Data *vedata, Object *object)
  }

  /* Retrieve the location of the texture. */
-  const char *name = curves->selection_domain == ATTR_DOMAIN_POINT ? ".selection_point_float" :
-                                                                     ".selection_curve_float";
-
  bool is_point_domain;
  GPUVertBuf **texture = DRW_curves_texture_for_evaluated_attribute(
-      curves, name, &is_point_domain);
+      curves, ".selection", &is_point_domain);
  if (texture == nullptr) {
    return;
  }
--- a/source/blender/draw/intern/draw_cache_impl_curves.cc
+++ b/source/blender/draw/intern/draw_cache_impl_curves.cc
@ -11,6 +11,7 @@

 #include "MEM_guardedalloc.h"

+#include "BLI_devirtualize_parameters.hh"
 #include "BLI_listbase.h"
 #include "BLI_math_base.h"
 #include "BLI_math_vec_types.hh"
@ -334,17 +335,16 @@ static void curves_batch_cache_ensure_edit_points_data(const Curves &curves_id,
  GPU_vertbuf_init_with_format(cache.edit_points_data, &format_data);
  GPU_vertbuf_data_alloc(cache.edit_points_data, curves.points_num());

-  VArray<float> selection;
+  const VArray<bool> selection = curves.attributes().lookup_or_default<bool>(
+      ".selection", eAttrDomain(curves_id.selection_domain), true);
  switch (curves_id.selection_domain) {
    case ATTR_DOMAIN_POINT:
-      selection = curves.selection_point_float();
      for (const int point_i : selection.index_range()) {
        const float point_selection = (selection[point_i] > 0.0f) ? 1.0f : 0.0f;
        GPU_vertbuf_attr_set(cache.edit_points_data, color, point_i, &point_selection);
      }
      break;
    case ATTR_DOMAIN_CURVE:
-      selection = curves.selection_curve_float();
      for (const int curve_i : curves.curves_range()) {
        const float curve_selection = (selection[curve_i] > 0.0f) ? 1.0f : 0.0f;
        const IndexRange points = curves.points_for_curve(curve_i);
--- a/source/blender/draw/intern/draw_cache_impl_gpencil.cc
+++ b/source/blender/draw/intern/draw_cache_impl_gpencil.cc
@ -276,7 +276,7 @@ BLI_INLINE int32_t pack_rotation_aspect_hardness(float rot, float asp, float har
  int32_t packed = 0;
  /* Aspect uses 9 bits */
  float asp_normalized = (asp > 1.0f) ? (1.0f / asp) : asp;
-  packed |= (int32_t)unit_float_to_uchar_clamp(asp_normalized);
+  packed |= int32_t(unit_float_to_uchar_clamp(asp_normalized));
  /* Store if inversed in the 9th bit. */
  if (asp > 1.0f) {
    packed |= 1 << 8;
@ -284,13 +284,13 @@ BLI_INLINE int32_t pack_rotation_aspect_hardness(float rot, float asp, float har
  /* Rotation uses 9 bits */
  /* Rotation are in [-90°..90°] range, so we can encode the sign of the angle + the cosine
   * because the cosine will always be positive. */
-  packed |= (int32_t)unit_float_to_uchar_clamp(cosf(rot)) << 9;
+  packed |= int32_t(unit_float_to_uchar_clamp(cosf(rot))) << 9;
  /* Store sine sign in 9th bit. */
  if (rot < 0.0f) {
    packed |= 1 << 17;
  }
  /* Hardness uses 8 bits */
-  packed |= (int32_t)unit_float_to_uchar_clamp(hard) << 18;
+  packed |= int32_t(unit_float_to_uchar_clamp(hard)) << 18;
  return packed;
 }

@ -315,7 +315,7 @@ static void gpencil_buffer_add_point(GPUIndexBufBuilder *ibo,
  /* Encode fill opacity defined by opacity modifier in vertex color alpha. If
   * no opacity modifier, the value will be always 1.0f. The opacity factor can be any
   * value between 0.0f and 2.0f */
-  col->fcol[3] = ((int)(col->fcol[3] * 10000.0f) * 10.0f) + gps->fill_opacity_fac;
+  col->fcol[3] = (int(col->fcol[3] * 10000.0f) * 10.0f) + gps->fill_opacity_fac;

  vert->strength = (round_cap0) ? pt->strength : -pt->strength;
  vert->u_stroke = pt->uv_fac;
@ -579,7 +579,7 @@ bGPDstroke *DRW_cache_gpencil_sbuffer_stroke_data_get(Object *ob)
    gps->runtime.stroke_start = 0;
    copy_v4_v4(gps->vert_color_fill, gpd->runtime.vert_color_fill);
    /* Caps. */
-    gps->caps[0] = gps->caps[1] = (short)brush->gpencil_settings->caps_type;
+    gps->caps[0] = gps->caps[1] = short(brush->gpencil_settings->caps_type);

    gpd->runtime.sbuffer_gps = gps;
  }
--- a/source/blender/editors/armature/pose_lib_2.c
+++ b/source/blender/editors/armature/pose_lib_2.c
@ -66,13 +66,14 @@ typedef struct PoseBlendData {
  /* For temp-loading the Action from the pose library. */
  AssetTempIDConsumer *temp_id_consumer;

-  /* Blend factor, interval [0, 1] for interpolating between current and given pose. */
+  /* Blend factor, interval [-1, 1] for interpolating between current and given pose.
+   * Positive factors will blend in `act`, whereas negative factors will blend in `act_flipped`. */
  float blend_factor;
  struct PoseBackup *pose_backup;

-  Object *ob;   /* Object to work on. */
-  bAction *act; /* Pose to blend into the current pose. */
-  bool free_action;
+  Object *ob;           /* Object to work on. */
+  bAction *act;         /* Pose to blend into the current pose. */
+  bAction *act_flipped; /* Flipped copy of `act`. */

  Scene *scene;  /* For auto-keying. */
  ScrArea *area; /* For drawing status text. */
@ -83,12 +84,19 @@ typedef struct PoseBlendData {
  char headerstr[UI_MAX_DRAW_STR];
 } PoseBlendData;

-static void poselib_blend_flip_pose(bContext *C, wmOperator *op);
+/** Return the bAction that should be blended.
+ * This is either pbd->act or pbd->act_flipped, depending on the sign of the blend factor.
+ */
+static bAction *poselib_action_to_blend(PoseBlendData *pbd)
+{
+  return (pbd->blend_factor >= 0) ? pbd->act : pbd->act_flipped;
+}

 /* Makes a copy of the current pose for restoration purposes - doesn't do constraints currently */
 static void poselib_backup_posecopy(PoseBlendData *pbd)
 {
-  pbd->pose_backup = BKE_pose_backup_create_selected_bones(pbd->ob, pbd->act);
+  const bAction *action = poselib_action_to_blend(pbd);
+  pbd->pose_backup = BKE_pose_backup_create_selected_bones(pbd->ob, action);

  if (pbd->state == POSE_BLEND_INIT) {
    /* Ready for blending now. */
@ -168,19 +176,32 @@ static void poselib_blend_apply(bContext *C, wmOperator *op)
  /* Perform the actual blending. */
  struct Depsgraph *depsgraph = CTX_data_depsgraph_pointer(C);
  AnimationEvalContext anim_eval_context = BKE_animsys_eval_context_construct(depsgraph, 0.0f);
-  BKE_pose_apply_action_blend(pbd->ob, pbd->act, &anim_eval_context, pbd->blend_factor);
+  bAction *to_blend = poselib_action_to_blend(pbd);
+  BKE_pose_apply_action_blend(pbd->ob, to_blend, &anim_eval_context, fabs(pbd->blend_factor));
 }

 /* ---------------------------- */

 static void poselib_blend_set_factor(PoseBlendData *pbd, const float new_factor)
 {
-  pbd->blend_factor = CLAMPIS(new_factor, 0.0f, 1.0f);
+  const bool sign_changed = signf(new_factor) != signf(pbd->blend_factor);
+  if (sign_changed) {
+    /* The zero point was crossed, meaning that the pose will be flipped. This means the pose
+     * backup has to change, as it only contains the bones for one side. */
+    BKE_pose_backup_restore(pbd->pose_backup);
+    BKE_pose_backup_free(pbd->pose_backup);
+  }
+
+  pbd->blend_factor = CLAMPIS(new_factor, -1.0f, 1.0f);
  pbd->needs_redraw = true;
+
+  if (sign_changed) {
+    poselib_backup_posecopy(pbd);
+  }
 }

 /* Return operator return value. */
-static int poselib_blend_handle_event(bContext *C, wmOperator *op, const wmEvent *event)
+static int poselib_blend_handle_event(bContext *UNUSED(C), wmOperator *op, const wmEvent *event)
 {
  PoseBlendData *pbd = op->customdata;

@ -226,10 +247,6 @@ static int poselib_blend_handle_event(bContext *C, wmOperator *op, const wmEvent
      pbd->state = pbd->state == POSE_BLEND_BLENDING ? POSE_BLEND_ORIGINAL : POSE_BLEND_BLENDING;
      pbd->needs_redraw = true;
      break;
-
-    case EVT_FKEY:
-      poselib_blend_flip_pose(C, op);
-      break;
  }

  return OPERATOR_RUNNING_MODAL;
@ -280,30 +297,6 @@ static bAction *flip_pose(bContext *C, Object *ob, bAction *action)
  return action_copy;
 }

-/* Flip the target pose the interactive blend operator is currently using. */
-static void poselib_blend_flip_pose(bContext *C, wmOperator *op)
-{
-  PoseBlendData *pbd = op->customdata;
-  bAction *old_action = pbd->act;
-  bAction *new_action = flip_pose(C, pbd->ob, old_action);
-
-  /* Before flipping over to the other side, this side needs to be restored. */
-  BKE_pose_backup_restore(pbd->pose_backup);
-  BKE_pose_backup_free(pbd->pose_backup);
-  pbd->pose_backup = NULL;
-
-  if (pbd->free_action) {
-    BKE_id_free(NULL, old_action);
-  }
-
-  pbd->free_action = true;
-  pbd->act = new_action;
-  pbd->needs_redraw = true;
-
-  /* Refresh the pose backup to use the flipped bones. */
-  poselib_backup_posecopy(pbd);
-}
-
 /* Return true on success, false if the context isn't suitable. */
 static bool poselib_blend_init_data(bContext *C, wmOperator *op, const wmEvent *event)
 {
@ -320,18 +313,21 @@ static bool poselib_blend_init_data(bContext *C, wmOperator *op, const wmEvent *
  PoseBlendData *pbd;
  op->customdata = pbd = MEM_callocN(sizeof(PoseBlendData), "PoseLib Preview Data");

-  bAction *action = poselib_blend_init_get_action(C, op);
-  if (action == NULL) {
+  pbd->act = poselib_blend_init_get_action(C, op);
+  if (pbd->act == NULL) {
    return false;
  }

-  /* Maybe flip the Action. */
+  /* Passing `flipped=True` is the same as flipping the sign of the blend factor. */
  const bool apply_flipped = RNA_boolean_get(op->ptr, "flipped");
-  if (apply_flipped) {
-    action = flip_pose(C, ob, action);
-    pbd->free_action = true;
+  const float multiply_factor = apply_flipped ? -1.0f : 1.0f;
+  pbd->blend_factor = multiply_factor * RNA_float_get(op->ptr, "blend_factor");
+
+  /* Only construct the flipped pose if there is a chance it's actually needed. */
+  const bool is_interactive = (event != NULL);
+  if (is_interactive || pbd->blend_factor < 0) {
+    pbd->act_flipped = flip_pose(C, ob, pbd->act);
  }
-  pbd->act = action;

  /* Get the basic data. */
  pbd->ob = ob;
@ -342,12 +338,12 @@ static bool poselib_blend_init_data(bContext *C, wmOperator *op, const wmEvent *

  pbd->state = POSE_BLEND_INIT;
  pbd->needs_redraw = true;
-  pbd->blend_factor = RNA_float_get(op->ptr, "blend_factor");
+
  /* Just to avoid a clang-analyzer warning (false positive), it's set properly below. */
  pbd->release_confirm_info.use_release_confirm = false;

  /* Release confirm data. Only available if there's an event to work with. */
-  if (event != NULL) {
+  if (is_interactive) {
    PropertyRNA *release_confirm_prop = RNA_struct_find_property(op->ptr, "release_confirm");
    pbd->release_confirm_info.use_release_confirm = (release_confirm_prop != NULL) &&
                                                    RNA_property_boolean_get(op->ptr,
@ -356,10 +352,11 @@ static bool poselib_blend_init_data(bContext *C, wmOperator *op, const wmEvent *
    ED_slider_init(pbd->slider, event);
    ED_slider_factor_set(pbd->slider, pbd->blend_factor);
    ED_slider_allow_overshoot_set(pbd->slider, false);
+    ED_slider_is_bidirectional_set(pbd->slider, true);
  }

  if (pbd->release_confirm_info.use_release_confirm) {
-    BLI_assert(event != NULL);
+    BLI_assert(is_interactive);
    pbd->release_confirm_info.init_event_type = WM_userdef_event_type_from_keymap_type(
        event->type);
  }
@ -397,7 +394,8 @@ static void poselib_blend_cleanup(bContext *C, wmOperator *op)
      poselib_keytag_pose(C, scene, pbd);

      /* Ensure the redo panel has the actually-used value, instead of the initial value. */
-      RNA_float_set(op->ptr, "blend_factor", pbd->blend_factor);
+      RNA_float_set(op->ptr, "blend_factor", fabs(pbd->blend_factor));
+      RNA_boolean_set(op->ptr, "flipped", pbd->blend_factor < 0);
      break;
    }

@ -426,10 +424,8 @@ static void poselib_blend_free(wmOperator *op)
    return;
  }

-  if (pbd->free_action) {
-    /* Run before #poselib_tempload_exit to avoid any problems from indirectly
-     * referenced ID pointers. */
-    BKE_id_free(NULL, pbd->act);
+  if (pbd->act_flipped) {
+    BKE_id_free(NULL, pbd->act_flipped);
  }
  poselib_tempload_exit(pbd);

@ -489,11 +485,7 @@ static int poselib_blend_modal(bContext *C, wmOperator *op, const wmEvent *event
      strcpy(tab_string, TIP_("[Tab] - Show blended pose"));
    }

-    BLI_snprintf(status_string,
-                 sizeof(status_string),
-                 "[F] - Flip pose | %s | %s",
-                 tab_string,
-                 slider_string);
+    BLI_snprintf(status_string, sizeof(status_string), "%s | %s", tab_string, slider_string);
    ED_workspace_status_text(C, status_string);

    poselib_blend_apply(C, op);
@ -558,6 +550,8 @@ static bool poselib_blend_poll(bContext *C)

 void POSELIB_OT_apply_pose_asset(wmOperatorType *ot)
 {
+  PropertyRNA *prop;
+
  /* Identifiers: */
  ot->name = "Apply Pose Asset";
  ot->idname = "POSELIB_OT_apply_pose_asset";
@ -574,17 +568,21 @@ void POSELIB_OT_apply_pose_asset(wmOperatorType *ot)
  RNA_def_float_factor(ot->srna,
                       "blend_factor",
                       1.0f,
-                       0.0f,
+                       -1.0f,
                       1.0f,
                       "Blend Factor",
-                       "Amount that the pose is applied on top of the existing poses",
-                       0.0f,
+                       "Amount that the pose is applied on top of the existing poses. A negative "
+                       "value will apply the pose flipped over the X-axis",
+                       -1.0f,
                       1.0f);
-  RNA_def_boolean(ot->srna,
-                  "flipped",
-                  false,
-                  "Apply Flipped",
-                  "When enabled, applies the pose flipped over the X-axis");
+  prop = RNA_def_boolean(
+      ot->srna,
+      "flipped",
+      false,
+      "Apply Flipped",
+      "When enabled, applies the pose flipped over the X-axis. This is the same as "
+      "passing a negative `blend_factor`");
+  RNA_def_property_flag(prop, PROP_SKIP_SAVE);
 }

 void POSELIB_OT_blend_pose_asset(wmOperatorType *ot)
@ -610,22 +608,26 @@ void POSELIB_OT_blend_pose_asset(wmOperatorType *ot)
  prop = RNA_def_float_factor(ot->srna,
                              "blend_factor",
                              0.0f,
-                              0.0f,
+                              -1.0f,
                              1.0f,
                              "Blend Factor",
-                              "Amount that the pose is applied on top of the existing poses",
-                              0.0f,
+                              "Amount that the pose is applied on top of the existing poses. A "
+                              "negative value will apply the pose flipped over the X-axis",
+                              -1.0f,
                              1.0f);
  /* Blending should always start at 0%, and not at whatever percentage was last used. This RNA
   * property just exists for symmetry with the Apply operator (and thus simplicity of the rest of
   * the code, which can assume this property exists). */
  RNA_def_property_flag(prop, PROP_SKIP_SAVE);

-  RNA_def_boolean(ot->srna,
-                  "flipped",
-                  false,
-                  "Apply Flipped",
-                  "When enabled, applies the pose flipped over the X-axis");
+  prop = RNA_def_boolean(ot->srna,
+                         "flipped",
+                         false,
+                         "Apply Flipped",
+                         "When enabled, applies the pose flipped over the X-axis. This is the "
+                         "same as passing a negative `blend_factor`");
+  RNA_def_property_flag(prop, PROP_SKIP_SAVE);
+
  prop = RNA_def_boolean(ot->srna,
                         "release_confirm",
                         false,
--- a/source/blender/editors/curves/CMakeLists.txt
+++ b/source/blender/editors/curves/CMakeLists.txt
@ -22,6 +22,7 @@ set(INC
 set(SRC
  intern/curves_add.cc
  intern/curves_ops.cc
+  intern/curves_selection.cc
 )

 set(LIB
--- a/source/blender/editors/curves/intern/curves_ops.cc
+++ b/source/blender/editors/curves/intern/curves_ops.cc
@ -6,7 +6,9 @@

 #include <atomic>

+#include "BLI_array_utils.hh"
 #include "BLI_devirtualize_parameters.hh"
+#include "BLI_index_mask_ops.hh"
 #include "BLI_utildefines.h"
 #include "BLI_vector_set.hh"

@ -748,7 +750,6 @@ static int curves_set_selection_domain_exec(bContext *C, wmOperator *op)
      continue;
    }

-    const eAttrDomain old_domain = eAttrDomain(curves_id->selection_domain);
    curves_id->selection_domain = domain;

    CurvesGeometry &curves = CurvesGeometry::wrap(curves_id->geometry);
@ -756,18 +757,21 @@ static int curves_set_selection_domain_exec(bContext *C, wmOperator *op)
    if (curves.points_num() == 0) {
      continue;
    }
-
-    if (old_domain == ATTR_DOMAIN_POINT && domain == ATTR_DOMAIN_CURVE) {
-      VArray<float> curve_selection = curves.adapt_domain(
-          curves.selection_point_float(), ATTR_DOMAIN_POINT, ATTR_DOMAIN_CURVE);
-      curve_selection.materialize(curves.selection_curve_float_for_write());
-      attributes.remove(".selection_point_float");
+    const GVArray src = attributes.lookup(".selection", domain);
+    if (src.is_empty()) {
+      continue;
    }
-    else if (old_domain == ATTR_DOMAIN_CURVE && domain == ATTR_DOMAIN_POINT) {
-      VArray<float> point_selection = curves.adapt_domain(
-          curves.selection_curve_float(), ATTR_DOMAIN_CURVE, ATTR_DOMAIN_POINT);
-      point_selection.materialize(curves.selection_point_float_for_write());
-      attributes.remove(".selection_curve_float");
+
+    const CPPType &type = src.type();
+    void *dst = MEM_malloc_arrayN(attributes.domain_size(domain), type.size(), __func__);
+    src.materialize(dst);
+
+    attributes.remove(".selection");
+    if (!attributes.add(".selection",
+                        domain,
+                        bke::cpp_type_to_custom_data_type(type),
+                        bke::AttributeInitMoveArray(dst))) {
+      MEM_freeN(dst);
    }

    /* Use #ID_RECALC_GEOMETRY instead of #ID_RECALC_SELECT because it is handled as a generic
@ -801,46 +805,54 @@ static void CURVES_OT_set_selection_domain(wmOperatorType *ot)
  RNA_def_property_flag(prop, (PropertyFlag)(PROP_HIDDEN | PROP_SKIP_SAVE));
 }

-static bool varray_contains_nonzero(const VArray<float> &data)
+static bool contains(const VArray<bool> &varray, const bool value)
 {
-  bool contains_nonzero = false;
-  devirtualize_varray(data, [&](const auto array) {
-    for (const int i : data.index_range()) {
-      if (array[i] != 0.0f) {
-        contains_nonzero = true;
-        break;
-      }
-    }
-  });
-  return contains_nonzero;
+  const CommonVArrayInfo info = varray.common_info();
+  if (info.type == CommonVArrayInfo::Type::Single) {
+    return *static_cast<const bool *>(info.data) == value;
+  }
+  if (info.type == CommonVArrayInfo::Type::Span) {
+    const Span<bool> span(static_cast<const bool *>(info.data), varray.size());
+    return threading::parallel_reduce(
+        span.index_range(),
+        4096,
+        false,
+        [&](const IndexRange range, const bool init) {
+          return init || span.slice(range).contains(value);
+        },
+        [&](const bool a, const bool b) { return a || b; });
+  }
+  return threading::parallel_reduce(
+      varray.index_range(),
+      2048,
+      false,
+      [&](const IndexRange range, const bool init) {
+        if (init) {
+          return init;
+        }
+        /* Alternatively, this could use #materialize to retrieve many values at once. */
+        for (const int64_t i : range) {
+          if (varray[i] == value) {
+            return true;
+          }
+        }
+        return false;
+      },
+      [&](const bool a, const bool b) { return a || b; });
 }

 bool has_anything_selected(const Curves &curves_id)
 {
  const CurvesGeometry &curves = CurvesGeometry::wrap(curves_id.geometry);
-  switch (curves_id.selection_domain) {
-    case ATTR_DOMAIN_POINT:
-      return varray_contains_nonzero(curves.selection_point_float());
-    case ATTR_DOMAIN_CURVE:
-      return varray_contains_nonzero(curves.selection_curve_float());
-  }
-  BLI_assert_unreachable();
-  return false;
+  const VArray<bool> selection = curves.attributes().lookup<bool>(".selection");
+  return !selection || contains(selection, true);
 }

-static bool any_point_selected(const CurvesGeometry &curves)
+static bool has_anything_selected(const Span<Curves *> curves_ids)
 {
-  return varray_contains_nonzero(curves.selection_point_float());
-}
-
-static bool any_point_selected(const Span<Curves *> curves_ids)
-{
-  for (const Curves *curves_id : curves_ids) {
-    if (any_point_selected(CurvesGeometry::wrap(curves_id->geometry))) {
-      return true;
-    }
-  }
-  return false;
+  return std::any_of(curves_ids.begin(), curves_ids.end(), [](const Curves *curves_id) {
+    return has_anything_selected(*curves_id);
+  });
 }

 namespace select_all {
@ -854,6 +866,16 @@ static void invert_selection(MutableSpan<float> selection)
  });
 }

+static void invert_selection(GMutableSpan selection)
+{
+  if (selection.type().is<bool>()) {
+    array_utils::invert_booleans(selection.typed<bool>());
+  }
+  else if (selection.type().is<float>()) {
+    invert_selection(selection.typed<float>());
+  }
+}
+
 static int select_all_exec(bContext *C, wmOperator *op)
 {
  int action = RNA_enum_get(op->ptr, "action");
@ -861,27 +883,34 @@ static int select_all_exec(bContext *C, wmOperator *op)
  VectorSet<Curves *> unique_curves = get_unique_editable_curves(*C);

  if (action == SEL_TOGGLE) {
-    action = any_point_selected(unique_curves) ? SEL_DESELECT : SEL_SELECT;
+    action = has_anything_selected(unique_curves) ? SEL_DESELECT : SEL_SELECT;
  }

  for (Curves *curves_id : unique_curves) {
    CurvesGeometry &curves = CurvesGeometry::wrap(curves_id->geometry);
+    bke::MutableAttributeAccessor attributes = curves.attributes_for_write();
    if (action == SEL_SELECT) {
      /* As an optimization, just remove the selection attributes when everything is selected. */
-      bke::MutableAttributeAccessor attributes = curves.attributes_for_write();
-      attributes.remove(".selection_point_float");
-      attributes.remove(".selection_curve_float");
+      attributes.remove(".selection");
+    }
+    else if (!attributes.contains(".selection")) {
+      BLI_assert(ELEM(action, SEL_INVERT, SEL_DESELECT));
+      /* If the attribute doesn't exist and it's either deleted or inverted, create
+       * it with nothing selected, since that means everything was selected before. */
+      attributes.add(".selection",
+                     eAttrDomain(curves_id->selection_domain),
+                     CD_PROP_BOOL,
+                     bke::AttributeInitDefaultValue());
    }
    else {
-      MutableSpan<float> selection = curves_id->selection_domain == ATTR_DOMAIN_POINT ?
-                                         curves.selection_point_float_for_write() :
-                                         curves.selection_curve_float_for_write();
+      bke::GSpanAttributeWriter selection = attributes.lookup_for_write_span(".selection");
      if (action == SEL_DESELECT) {
-        selection.fill(0.0f);
+        fill_selection_false(selection.span);
      }
      else if (action == SEL_INVERT) {
-        invert_selection(selection);
+        invert_selection(selection.span);
      }
+      selection.finish();
    }

    /* Use #ID_RECALC_GEOMETRY instead of #ID_RECALC_SELECT because it is handled as a generic
--- a/source/blender/editors/curves/intern/curves_selection.cc
+++ b/source/blender/editors/curves/intern/curves_selection.cc
@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup edcurves
+ */
+
+#include "BLI_index_mask_ops.hh"
+
+#include "BKE_attribute.hh"
+#include "BKE_curves.hh"
+
+#include "ED_curves.h"
+#include "ED_object.h"
+
+namespace blender::ed::curves {
+
+static IndexMask retrieve_selected_curves(const bke::CurvesGeometry &curves,
+                                          Vector<int64_t> &r_indices)
+{
+  const IndexRange curves_range = curves.curves_range();
+  const bke::AttributeAccessor attributes = curves.attributes();
+
+  /* Interpolate from points to curves manually as a performance improvement, since we are only
+   * interested in whether any point in each curve is selected. Retrieve meta data since
+   * #lookup_or_default from the attribute API doesn't give the domain of the attribute. */
+  std::optional<bke::AttributeMetaData> meta_data = attributes.lookup_meta_data(".selection");
+  if (meta_data && meta_data->domain == ATTR_DOMAIN_POINT) {
+    /* Avoid the interpolation from interpolating the attribute to the
+     * curve domain by retrieving the point domain values directly. */
+    const VArray<bool> selection = attributes.lookup_or_default<bool>(
+        ".selection", ATTR_DOMAIN_POINT, true);
+    if (selection.is_single()) {
+      return selection.get_internal_single() ? IndexMask(curves_range) : IndexMask();
+    }
+    return index_mask_ops::find_indices_based_on_predicate(
+        curves_range, 512, r_indices, [&](const int64_t curve_i) {
+          const IndexRange points = curves.points_for_curve(curve_i);
+          /* The curve is selected if any of its points are selected. */
+          Array<bool, 32> point_selection(points.size());
+          selection.materialize_compressed(points, point_selection);
+          return point_selection.as_span().contains(true);
+        });
+  }
+  const VArray<bool> selection = attributes.lookup_or_default<bool>(
+      ".selection", ATTR_DOMAIN_CURVE, true);
+  return index_mask_ops::find_indices_from_virtual_array(curves_range, selection, 2048, r_indices);
+}
+
+IndexMask retrieve_selected_curves(const Curves &curves_id, Vector<int64_t> &r_indices)
+{
+  const bke::CurvesGeometry &curves = bke::CurvesGeometry::wrap(curves_id.geometry);
+  return retrieve_selected_curves(curves, r_indices);
+}
+
+static IndexMask retrieve_selected_points(const bke::CurvesGeometry &curves,
+                                          Vector<int64_t> &r_indices)
+{
+  return index_mask_ops::find_indices_from_virtual_array(
+      curves.points_range(),
+      curves.attributes().lookup_or_default<bool>(".selection", ATTR_DOMAIN_POINT, true),
+      2048,
+      r_indices);
+}
+
+IndexMask retrieve_selected_points(const Curves &curves_id, Vector<int64_t> &r_indices)
+{
+  const bke::CurvesGeometry &curves = bke::CurvesGeometry::wrap(curves_id.geometry);
+  return retrieve_selected_points(curves, r_indices);
+}
+
+void ensure_selection_attribute(Curves &curves_id, const eCustomDataType create_type)
+{
+  bke::CurvesGeometry &curves = bke::CurvesGeometry::wrap(curves_id.geometry);
+  bke::MutableAttributeAccessor attributes = curves.attributes_for_write();
+  if (attributes.contains(".selection")) {
+    return;
+  }
+  const eAttrDomain domain = eAttrDomain(curves_id.selection_domain);
+  const int domain_size = attributes.domain_size(domain);
+  switch (create_type) {
+    case CD_PROP_BOOL:
+      attributes.add(".selection",
+                     domain,
+                     CD_PROP_BOOL,
+                     bke::AttributeInitVArray(VArray<bool>::ForSingle(true, domain_size)));
+      break;
+    case CD_PROP_FLOAT:
+      attributes.add(".selection",
+                     domain,
+                     CD_PROP_FLOAT,
+                     bke::AttributeInitVArray(VArray<float>::ForSingle(1.0f, domain_size)));
+      break;
+    default:
+      BLI_assert_unreachable();
+  }
+}
+
+void fill_selection_false(GMutableSpan selection)
+{
+  if (selection.type().is<bool>()) {
+    selection.typed<bool>().fill(false);
+  }
+  else if (selection.type().is<float>()) {
+    selection.typed<float>().fill(0.0f);
+  }
+}
+void fill_selection_true(GMutableSpan selection)
+{
+  if (selection.type().is<bool>()) {
+    selection.typed<bool>().fill(true);
+  }
+  else if (selection.type().is<float>()) {
+    selection.typed<float>().fill(1.0f);
+  }
+}
+
+}  // namespace blender::ed::curves
--- a/source/blender/editors/include/ED_curves.h
+++ b/source/blender/editors/include/ED_curves.h
@ -20,20 +20,69 @@ void ED_operatortypes_curves(void);

 #ifdef __cplusplus

-#  include "BKE_curves.hh"
+#  include "BKE_attribute.hh"
+#  include "BLI_index_mask.hh"
+#  include "BLI_vector.hh"
 #  include "BLI_vector_set.hh"

+#  include "BKE_curves.hh"
+
 namespace blender::ed::curves {

 bke::CurvesGeometry primitive_random_sphere(int curves_size, int points_per_curve);
-bool has_anything_selected(const Curves &curves_id);
 VectorSet<Curves *> get_unique_editable_curves(const bContext &C);
 void ensure_surface_deformation_node_exists(bContext &C, Object &curves_ob);

+/* -------------------------------------------------------------------- */
+/** \name Poll Functions
+ * \{ */
+
 bool editable_curves_with_surface_poll(bContext *C);
 bool curves_with_surface_poll(bContext *C);
 bool editable_curves_poll(bContext *C);
 bool curves_poll(bContext *C);

+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Selection
+ *
+ * Selection on curves can be stored on either attribute domain: either per-curve or per-point. It
+ * can be stored with a float or boolean data-type. The boolean data-type is faster, smaller, and
+ * corresponds better to edit-mode selections, but the float data type is useful for soft selection
+ * (like masking) in sculpt mode.
+ *
+ * The attribute API is used to do the necessary type and domain conversions when necessary, and
+ * can handle most interaction with the selection attribute, but these functions implement some
+ * helpful utilities on top of that.
+ * \{ */
+
+void fill_selection_false(GMutableSpan span);
+void fill_selection_true(GMutableSpan span);
+
+/**
+ * Return true if any element is selected, on either domain with either type.
+ */
+bool has_anything_selected(const Curves &curves_id);
+
+/**
+ * Find curves that have any point selected (a selection factor greater than zero),
+ * or curves that have their own selection factor greater than zero.
+ */
+IndexMask retrieve_selected_curves(const Curves &curves_id, Vector<int64_t> &r_indices);
+
+/**
+ * Find points that are selected (a selection factor greater than zero),
+ * or points in curves with a selection factor greater than zero).
+ */
+IndexMask retrieve_selected_points(const Curves &curves_id, Vector<int64_t> &r_indices);
+
+/**
+ * If the ".selection" attribute doesn't exist, create it with the requested type (bool or float).
+ */
+void ensure_selection_attribute(Curves &curves_id, const eCustomDataType create_type);
+
+/** \} */
+
 }  // namespace blender::ed::curves
 #endif
--- a/source/blender/editors/include/ED_curves_sculpt.h
+++ b/source/blender/editors/include/ED_curves_sculpt.h
@ -17,26 +17,3 @@ void ED_operatortypes_sculpt_curves(void);
 #ifdef __cplusplus
 }
 #endif
-
-#ifdef __cplusplus
-
-#  include "BLI_index_mask.hh"
-#  include "BLI_vector.hh"
-
-namespace blender::ed::sculpt_paint {
-
-/**
- * Find curves that have any point selected (a selection factor greater than zero),
- * or curves that have their own selection factor greater than zero.
- */
-IndexMask retrieve_selected_curves(const Curves &curves_id, Vector<int64_t> &r_indices);
-
-/**
- * Find points that are selected (a selection factor greater than zero),
- * or points in curves with a selection factor greater than zero).
- */
-IndexMask retrieve_selected_points(const Curves &curves_id, Vector<int64_t> &r_indices);
-
-}  // namespace blender::ed::sculpt_paint
-
-#endif
--- a/source/blender/editors/include/ED_node.h
+++ b/source/blender/editors/include/ED_node.h
@ -78,15 +78,6 @@ void ED_node_draw_snap(

 /* node_draw.cc */

-/**
- * Draw a single node socket at default size.
- * \note this is only called from external code, internally #node_socket_draw_nested() is used for
- *       optimized drawing of multiple/all sockets of a node.
- */
-void ED_node_socket_draw(struct bNodeSocket *sock,
-                         const struct rcti *rect,
-                         const float color[4],
-                         float scale);
 void ED_node_tree_update(const struct bContext *C);
 void ED_node_tag_update_id(struct ID *id);

--- a/source/blender/editors/include/ED_node.hh
+++ b/source/blender/editors/include/ED_node.hh
@ -9,6 +9,7 @@ struct SpaceNode;
 struct ARegion;
 struct Main;
 struct bNodeTree;
+struct rcti;

 namespace blender::ed::space_node {

@ -22,4 +23,11 @@ void node_insert_on_link_flags_set(SpaceNode &snode, const ARegion &region);
 void node_insert_on_link_flags(Main &bmain, SpaceNode &snode);
 void node_insert_on_link_flags_clear(bNodeTree &node_tree);

+/**
+ * Draw a single node socket at default size.
+ * \note this is only called from external code, internally #node_socket_draw_nested() is used for
+ *       optimized drawing of multiple/all sockets of a node.
+ */
+void node_socket_draw(bNodeSocket *sock, const rcti *rect, const float color[4], float scale);
+
 }  // namespace blender::ed::space_node
--- a/source/blender/editors/include/ED_screen.h
+++ b/source/blender/editors/include/ED_screen.h
@ -217,7 +217,8 @@ void ED_area_tag_refresh(ScrArea *area);
 void ED_area_do_refresh(struct bContext *C, ScrArea *area);
 struct AZone *ED_area_azones_update(ScrArea *area, const int mouse_xy[2]);
 /**
- * Use NULL to disable it.
+ * Show the given text in the area's header, instead of its regular contents.
+ * Use NULL to disable this and show the regular header contents again.
 */
 void ED_area_status_text(ScrArea *area, const char *str);
 /**
--- a/source/blender/editors/include/ED_util.h
+++ b/source/blender/editors/include/ED_util.h
@ -98,6 +98,9 @@ void ED_slider_factor_set(struct tSlider *slider, float factor);
 bool ED_slider_allow_overshoot_get(struct tSlider *slider);
 void ED_slider_allow_overshoot_set(struct tSlider *slider, bool value);

+bool ED_slider_is_bidirectional_get(struct tSlider *slider);
+void ED_slider_is_bidirectional_set(struct tSlider *slider, bool value);
+
 /* ************** XXX OLD CRUFT WARNING ************* */

 /**
--- a/source/blender/editors/interface/eyedroppers/eyedropper_color.cc
+++ b/source/blender/editors/interface/eyedroppers/eyedropper_color.cc
@ -175,8 +175,8 @@ static bool eyedropper_cryptomatte_sample_renderlayer_fl(RenderLayer *render_lay
    if (STRPREFIX(render_pass->name, render_pass_name_prefix) &&
        !STREQLEN(render_pass->name, render_pass_name_prefix, sizeof(render_pass->name))) {
      BLI_assert(render_pass->channels == 4);
-      const int x = (int)(fpos[0] * render_pass->rectx);
-      const int y = (int)(fpos[1] * render_pass->recty);
+      const int x = int(fpos[0] * render_pass->rectx);
+      const int y = int(fpos[1] * render_pass->recty);
      const int offset = 4 * (y * render_pass->rectx + x);
      zero_v3(r_col);
      r_col[0] = render_pass->rect[offset];
--- a/source/blender/editors/interface/interface_draw.cc
+++ b/source/blender/editors/interface/interface_draw.cc
@ -115,10 +115,10 @@ void UI_draw_roundbox_3ub_alpha(
    const rctf *rect, bool filled, float rad, const uchar col[3], uchar alpha)
 {
  const float colv[4] = {
-      (float(col[0])) / 255.0f,
-      (float(col[1])) / 255.0f,
-      (float(col[2])) / 255.0f,
-      (float(alpha)) / 255.0f,
+      float(col[0]) / 255.0f,
+      float(col[1]) / 255.0f,
+      float(col[2]) / 255.0f,
+      float(alpha) / 255.0f,
  };
  UI_draw_roundbox_4fv_ex(rect, (filled) ? colv : nullptr, nullptr, 1.0f, colv, U.pixelsize, rad);
 }
@ -1791,7 +1791,7 @@ void ui_draw_but_CURVEPROFILE(ARegion *region,
  /* Create array of the positions of the table's points. */
  float(*table_coords)[2] = static_cast<float(*)[2]>(
      MEM_mallocN(sizeof(*table_coords) * tot_points, __func__));
-  for (uint i = 0; i < (uint)BKE_curveprofile_table_size(profile); i++) {
+  for (uint i = 0; i < uint(BKE_curveprofile_table_size(profile)); i++) {
    /* Only add the points from the table here. */
    table_coords[i][0] = pts[i].x;
    table_coords[i][1] = pts[i].y;
--- a/source/blender/editors/interface/interface_handlers.cc
+++ b/source/blender/editors/interface/interface_handlers.cc
@ -9684,14 +9684,20 @@ static int ui_handle_view_items_hover(const wmEvent *event, const ARegion *regio

 static int ui_handle_view_item_event(bContext *C,
                                     const wmEvent *event,
-                                     ARegion *region,
-                                     uiBut *view_but)
+                                     uiBut *active_but,
+                                     ARegion *region)
 {
-  BLI_assert(view_but->type == UI_BTYPE_VIEW_ITEM);
  if (event->type == LEFTMOUSE) {
+    /* Only bother finding the active view item button if the active button isn't already a view
+     * item. */
+    uiBut *view_but = (active_but && active_but->type == UI_BTYPE_VIEW_ITEM) ?
+                          active_but :
+                          ui_view_item_find_mouse_over(region, event->xy);
    /* Will free active button if there already is one. */
-    ui_handle_button_activate(C, region, view_but, BUTTON_ACTIVATE_OVER);
-    return ui_do_button(C, view_but->block, view_but, event);
+    if (view_but) {
+      ui_handle_button_activate(C, region, view_but, BUTTON_ACTIVATE_OVER);
+      return ui_do_button(C, view_but->block, view_but, event);
+    }
  }

  return WM_UI_HANDLER_CONTINUE;
@ -11302,10 +11308,7 @@ static int ui_region_handler(bContext *C, const wmEvent *event, void * /*userdat
   * nested in the item (it's an overlapping layout). */
  ui_handle_view_items_hover(event, region);
  if (retval == WM_UI_HANDLER_CONTINUE) {
-    uiBut *view_item = ui_view_item_find_mouse_over(region, event->xy);
-    if (view_item) {
-      retval = ui_handle_view_item_event(C, event, region, view_item);
-    }
+    retval = ui_handle_view_item_event(C, event, but, region);
  }

  /* delayed apply callbacks */
--- a/source/blender/editors/interface/interface_icons.cc
+++ b/source/blender/editors/interface/interface_icons.cc
@ -837,7 +837,7 @@ static ImBuf *create_mono_icon_with_border(ImBuf *buf,
          blend_color_interpolate_float(dest_rgba, orig_rgba, border_rgba, 1.0 - orig_rgba[3]);
          linearrgb_to_srgb_v4(dest_srgb, dest_rgba);

-          const uint alpha_mask = (uint)(dest_srgb[3] * 255) << 24;
+          const uint alpha_mask = uint(dest_srgb[3] * 255) << 24;
          const uint cpack = rgb_to_cpack(dest_srgb[0], dest_srgb[1], dest_srgb[2]) | alpha_mask;
          result->rect[offset_write] = cpack;
        }
@ -1549,11 +1549,11 @@ static void icon_draw_rect(float x,
    /* preserve aspect ratio and center */
    if (rw > rh) {
      draw_w = w;
-      draw_h = (int)((float(rh) / float(rw)) * float(w));
+      draw_h = int((float(rh) / float(rw)) * float(w));
      draw_y += (h - draw_h) / 2;
    }
    else if (rw < rh) {
-      draw_w = (int)((float(rw) / float(rh)) * float(h));
+      draw_w = int((float(rw) / float(rh)) * float(h));
      draw_h = h;
      draw_x += (w - draw_w) / 2;
    }
@ -1772,7 +1772,7 @@ static void icon_draw_texture(float x,
                      sizeof(text_overlay->text),
                      text_color,
                      &params);
-    text_width = (float)UI_fontstyle_string_width(&fstyle_small, text_overlay->text) / UI_UNIT_X /
+    text_width = float(UI_fontstyle_string_width(&fstyle_small, text_overlay->text)) / UI_UNIT_X /
                 zoom_factor;
  }

@ -1868,7 +1868,7 @@ static void icon_draw_size(float x,
  }

  /* scale width and height according to aspect */
-  int w = (int)(fdraw_size / aspect + 0.5f);
+  int w = int(fdraw_size / aspect + 0.5f);
  int h = int(fdraw_size / aspect + 0.5f);

  DrawInfo *di = icon_ensure_drawinfo(icon);
--- a/source/blender/editors/interface/interface_layout.cc
+++ b/source/blender/editors/interface/interface_layout.cc
@ -335,7 +335,7 @@ static int ui_text_icon_width_ex(uiLayout *layout,
    const float aspect = layout->root->block->aspect;
    const uiFontStyle *fstyle = UI_FSTYLE_WIDGET;
    return UI_fontstyle_string_width_with_block_aspect(fstyle, name, aspect) +
-           (int)ceilf(unit_x * margin);
+           int(ceilf(unit_x * margin));
  }
  return unit_x * 10;
 }
@ -4315,7 +4315,7 @@ static void ui_litem_grid_flow_compute(ListBase *items,
    int item_w, item_h;
    ui_item_size(item, &item_w, &item_h);

-    global_avg_w += (float)(item_w * item_w);
+    global_avg_w += float(item_w * item_w);
    global_totweight_w += float(item_w);
    global_max_h = max_ii(global_max_h, item_h);

@ -4361,8 +4361,8 @@ static void ui_litem_grid_flow_compute(ListBase *items,
  /* Compute positions and sizes of all cells. */
  if (results->cos_x_array != nullptr && results->widths_array != nullptr) {
    /* We enlarge/narrow columns evenly to match available width. */
-    const float wfac = (float)(parameters->litem_w -
-                               (parameters->tot_columns - 1) * parameters->space_x) /
+    const float wfac = float(parameters->litem_w -
+                             (parameters->tot_columns - 1) * parameters->space_x) /
                       tot_w;

    for (int col = 0; col < parameters->tot_columns; col++) {
@ -4382,7 +4382,7 @@ static void ui_litem_grid_flow_compute(ListBase *items,
                                     (results->cos_x_array[col] - parameters->litem_x);
      }
      else {
-        results->widths_array[col] = (int)(avg_w[col] * wfac);
+        results->widths_array[col] = int(avg_w[col] * wfac);
      }
    }
  }
@ -4460,10 +4460,10 @@ static void ui_litem_estimate_grid_flow(uiLayout *litem)
        gflow->tot_columns = 1;
      }
      else {
-        gflow->tot_columns = min_ii(max_ii((int)(litem->w / avg_w), 1), gflow->tot_items);
+        gflow->tot_columns = min_ii(max_ii(int(litem->w / avg_w), 1), gflow->tot_items);
      }
    }
-    gflow->tot_rows = (int)ceilf(float(gflow->tot_items) / gflow->tot_columns);
+    gflow->tot_rows = int(ceilf(float(gflow->tot_items) / gflow->tot_columns));

    /* Try to tweak number of columns and rows to get better filling of last column or row,
     * and apply 'modulo' value to number of columns or rows.
@ -4479,9 +4479,9 @@ static void ui_litem_estimate_grid_flow(uiLayout *litem)
          gflow->tot_columns = gflow->tot_columns - (gflow->tot_columns % modulo);
        }
        /* Find smallest number of columns conserving computed optimal number of rows. */
-        for (gflow->tot_rows = (int)ceilf(float(gflow->tot_items) / gflow->tot_columns);
+        for (gflow->tot_rows = int(ceilf(float(gflow->tot_items) / gflow->tot_columns));
             (gflow->tot_columns - step) > 0 &&
-             (int)ceilf(float(gflow->tot_items) / (gflow->tot_columns - step)) <= gflow->tot_rows;
+             int(ceilf(float(gflow->tot_items) / (gflow->tot_columns - step))) <= gflow->tot_rows;
             gflow->tot_columns -= step) {
          /* pass */
        }
@ -4493,9 +4493,9 @@ static void ui_litem_estimate_grid_flow(uiLayout *litem)
                                   gflow->tot_items);
        }
        /* Find smallest number of rows conserving computed optimal number of columns. */
-        for (gflow->tot_columns = (int)ceilf(float(gflow->tot_items) / gflow->tot_rows);
+        for (gflow->tot_columns = int(ceilf(float(gflow->tot_items) / gflow->tot_rows));
             (gflow->tot_rows - step) > 0 &&
-             (int)ceilf(float(gflow->tot_items) / (gflow->tot_rows - step)) <= gflow->tot_columns;
+             int(ceilf(float(gflow->tot_items) / (gflow->tot_rows - step))) <= gflow->tot_columns;
             gflow->tot_rows -= step) {
          /* pass */
        }
@ -4505,8 +4505,8 @@ static void ui_litem_estimate_grid_flow(uiLayout *litem)
    /* Set evenly-spaced axes size
     * (quick optimization in case we have even columns and rows). */
    if (gflow->even_columns && gflow->even_rows) {
-      litem->w = (int)(gflow->tot_columns * avg_w) + space_x * (gflow->tot_columns - 1);
-      litem->h = (int)(gflow->tot_rows * max_h) + space_y * (gflow->tot_rows - 1);
+      litem->w = int(gflow->tot_columns * avg_w) + space_x * (gflow->tot_columns - 1);
+      litem->h = int(gflow->tot_rows * max_h) + space_y * (gflow->tot_rows - 1);
      return;
    }
  }
@ -4714,7 +4714,7 @@ static void ui_litem_layout_split(uiLayout *litem)
    x += colw;

    if (item->next) {
-      const float width = extra_pixel + (w - (int)(w * percentage)) / (float(tot) - 1);
+      const float width = extra_pixel + (w - int(w * percentage)) / (float(tot) - 1);
      extra_pixel = width - int(width);
      colw = int(width);
      colw = MAX2(colw, 0);
@ -6135,7 +6135,7 @@ uiLayout *uiItemsAlertBox(uiBlock *block, const int size, const eAlertIcon icon)
  const float icon_padding = 5.0f * U.dpi_fac;
  /* Calculate the factor of the fixed icon column depending on the block width. */
  const float split_factor = (float(icon_size) + icon_padding) /
-                             (float)(dialog_width - style->columnspace);
+                             float(dialog_width - style->columnspace);

  uiLayout *block_layout = UI_block_layout(
      block, UI_LAYOUT_VERTICAL, UI_LAYOUT_PANEL, 0, 0, dialog_width, 0, 0, style);
--- a/source/blender/editors/interface/interface_templates.cc
+++ b/source/blender/editors/interface/interface_templates.cc
@ -3183,7 +3183,7 @@ void uiTemplatePreview(uiLayout *layout,
  if (!ui_preview) {
    ui_preview = MEM_cnew<uiPreview>(__func__);
    BLI_strncpy(ui_preview->preview_id, preview_id, sizeof(ui_preview->preview_id));
-    ui_preview->height = (short)(UI_UNIT_Y * 7.6f);
+    ui_preview->height = short(UI_UNIT_Y * 7.6f);
    BLI_addtail(&region->ui_previews, ui_preview);
  }

@ -3225,7 +3225,7 @@ void uiTemplatePreview(uiLayout *layout,
                0,
                0,
                UI_UNIT_X * 10,
-                (short)(UI_UNIT_Y * 0.3f),
+                short(UI_UNIT_Y * 0.3f),
                &ui_preview->height,
                UI_UNIT_Y,
                UI_UNIT_Y * 50.0f,
@ -4028,7 +4028,7 @@ void uiTemplateHistogram(uiLayout *layout, PointerRNA *ptr, const char *propname
                0,
                0,
                UI_UNIT_X * 10,
-                (short)(UI_UNIT_Y * 0.3f),
+                short(UI_UNIT_Y * 0.3f),
                &hist->height,
                UI_UNIT_Y,
                UI_UNIT_Y * 20.0f,
@ -4090,7 +4090,7 @@ void uiTemplateWaveform(uiLayout *layout, PointerRNA *ptr, const char *propname)
                0,
                0,
                UI_UNIT_X * 10,
-                (short)(UI_UNIT_Y * 0.3f),
+                short(UI_UNIT_Y * 0.3f),
                &scopes->wavefrm_height,
                UI_UNIT_Y,
                UI_UNIT_Y * 20.0f,
@ -4152,7 +4152,7 @@ void uiTemplateVectorscope(uiLayout *layout, PointerRNA *ptr, const char *propna
                0,
                0,
                UI_UNIT_X * 10,
-                (short)(UI_UNIT_Y * 0.3f),
+                short(UI_UNIT_Y * 0.3f),
                &scopes->vecscope_height,
                UI_UNIT_Y,
                UI_UNIT_Y * 20.0f,
--- a/source/blender/editors/interface/interface_widgets.cc
+++ b/source/blender/editors/interface/interface_widgets.cc
@ -26,7 +26,7 @@

 #include "BLF_api.h"

-#include "ED_node.h"
+#include "ED_node.hh"

 #include "UI_interface.h"
 #include "UI_interface_icons.h"
@ -133,9 +133,9 @@ static const uiWidgetStateInfo STATE_INFO_NULL = {0};
 static void color_blend_v3_v3(uchar cp[3], const uchar cpstate[3], const float fac)
 {
  if (fac != 0.0f) {
-    cp[0] = (int)((1.0f - fac) * cp[0] + fac * cpstate[0]);
-    cp[1] = (int)((1.0f - fac) * cp[1] + fac * cpstate[1]);
-    cp[2] = (int)((1.0f - fac) * cp[2] + fac * cpstate[2]);
+    cp[0] = int((1.0f - fac) * cp[0] + fac * cpstate[0]);
+    cp[1] = int((1.0f - fac) * cp[1] + fac * cpstate[1]);
+    cp[2] = int((1.0f - fac) * cp[2] + fac * cpstate[2]);
  }
 }

@ -880,7 +880,7 @@ static void shape_preset_init_trias_ex(uiWidgetTrias *tria,
  const float minsize = ELEM(where, 'r', 'l') ? BLI_rcti_size_y(rect) : BLI_rcti_size_x(rect);

  /* center position and size */
-  float centx = (float)rect->xmin + 0.4f * minsize;
+  float centx = float(rect->xmin) + 0.4f * minsize;
  float centy = float(rect->ymin) + 0.5f * minsize;
  tria->size = sizex = sizey = -0.5f * triasize * minsize;

@ -1448,8 +1448,8 @@ static void widget_draw_submenu_tria(const uiBut *but,
                                     const uiWidgetColors *wcol)
 {
  const float aspect = but->block->aspect * U.inv_dpi_fac;
-  const int tria_height = (int)(ICON_DEFAULT_HEIGHT / aspect);
-  const int tria_width = (int)(ICON_DEFAULT_WIDTH / aspect) - 2 * U.pixelsize;
+  const int tria_height = int(ICON_DEFAULT_HEIGHT / aspect);
+  const int tria_width = int(ICON_DEFAULT_WIDTH / aspect) - 2 * U.pixelsize;
  const int xs = rect->xmax - tria_width;
  const int ys = (rect->ymin + rect->ymax - tria_height) / 2.0f;

@ -1507,7 +1507,7 @@ static void ui_text_clip_right_ex(const uiFontStyle *fstyle,
    /* At least one character, so clip and add the ellipsis. */
    memcpy(str + l_end, sep, sep_len + 1); /* +1 for trailing '\0'. */
    if (r_final_len) {
-      *r_final_len = (size_t)(l_end) + sep_len;
+      *r_final_len = size_t(l_end) + sep_len;
    }
  }
  else {
@ -1602,7 +1602,7 @@ float UI_text_clip_middle_ex(const uiFontStyle *fstyle,
        memmove(str + l_end + sep_len, str + r_offset, r_len);
        memcpy(str + l_end, sep, sep_len);
        /* -1 to remove trailing '\0'! */
-        final_lpart_len = (size_t)(l_end + sep_len + r_len - 1);
+        final_lpart_len = size_t(l_end + sep_len + r_len - 1);

        while (BLF_width(fstyle->uifont_id, str, max_len) > okwidth) {
          /* This will happen because a lot of string width processing is done in integer pixels,
@ -1638,10 +1638,10 @@ static void ui_text_clip_middle(const uiFontStyle *fstyle, uiBut *but, const rct
  /* No margin for labels! */
  const int border = ELEM(but->type, UI_BTYPE_LABEL, UI_BTYPE_MENU, UI_BTYPE_POPOVER) ?
                         0 :
-                         (int)(UI_TEXT_CLIP_MARGIN + 0.5f);
-  const float okwidth = (float)max_ii(BLI_rcti_size_x(rect) - border, 0);
+                         int(UI_TEXT_CLIP_MARGIN + 0.5f);
+  const float okwidth = float(max_ii(BLI_rcti_size_x(rect) - border, 0));
  const size_t max_len = sizeof(but->drawstr);
-  const float minwidth = (float)(UI_DPI_ICON_SIZE) / but->block->aspect * 2.0f;
+  const float minwidth = float(UI_DPI_ICON_SIZE) / but->block->aspect * 2.0f;

  but->ofs = 0;
  but->strwidth = UI_text_clip_middle_ex(fstyle, but->drawstr, okwidth, minwidth, max_len, '\0');
@ -1661,10 +1661,10 @@ static void ui_text_clip_middle_protect_right(const uiFontStyle *fstyle,
  /* No margin for labels! */
  const int border = ELEM(but->type, UI_BTYPE_LABEL, UI_BTYPE_MENU, UI_BTYPE_POPOVER) ?
                         0 :
-                         (int)(UI_TEXT_CLIP_MARGIN + 0.5f);
-  const float okwidth = (float)max_ii(BLI_rcti_size_x(rect) - border, 0);
+                         int(UI_TEXT_CLIP_MARGIN + 0.5f);
+  const float okwidth = float(max_ii(BLI_rcti_size_x(rect) - border, 0));
  const size_t max_len = sizeof(but->drawstr);
-  const float minwidth = (float)(UI_DPI_ICON_SIZE) / but->block->aspect * 2.0f;
+  const float minwidth = float(UI_DPI_ICON_SIZE) / but->block->aspect * 2.0f;

  but->ofs = 0;
  but->strwidth = UI_text_clip_middle_ex(fstyle, but->drawstr, okwidth, minwidth, max_len, rsep);
@ -1675,7 +1675,7 @@ static void ui_text_clip_middle_protect_right(const uiFontStyle *fstyle,
 */
 static void ui_text_clip_cursor(const uiFontStyle *fstyle, uiBut *but, const rcti *rect)
 {
-  const int border = (int)(UI_TEXT_CLIP_MARGIN + 0.5f);
+  const int border = int(UI_TEXT_CLIP_MARGIN + 0.5f);
  const int okwidth = max_ii(BLI_rcti_size_x(rect) - border, 0);

  BLI_assert(but->editstr && but->pos >= 0);
@ -2119,7 +2119,7 @@ static void widget_draw_text(const uiFontStyle *fstyle,
          for (int i = 0; i < ARRAY_SIZE(keys); i++) {
            const char *drawstr_menu = strchr(drawstr_ofs, keys[i]);
            if (drawstr_menu != nullptr && drawstr_menu < drawstr_end) {
-              ul_index = (int)(drawstr_menu - drawstr_ofs);
+              ul_index = int(drawstr_menu - drawstr_ofs);
              break;
            }
          }
@ -2213,7 +2213,8 @@ static void widget_draw_node_link_socket(const uiWidgetColors *wcol,
    UI_widgetbase_draw_cache_flush();
    GPU_blend(GPU_BLEND_NONE);

-    ED_node_socket_draw(static_cast<bNodeSocket *>(but->custom_data), rect, col, scale);
+    blender::ed::space_node::node_socket_draw(
+        static_cast<bNodeSocket *>(but->custom_data), rect, col, scale);
  }
  else {
    widget_draw_icon(but, ICON_LAYER_USED, alpha, rect, wcol->text);
@ -2814,7 +2815,7 @@ void ui_hsvcircle_vals_from_pos(
  /* duplication of code... well, simple is better now */
  const float centx = BLI_rcti_cent_x_fl(rect);
  const float centy = BLI_rcti_cent_y_fl(rect);
-  const float radius = (float)min_ii(BLI_rcti_size_x(rect), BLI_rcti_size_y(rect)) / 2.0f;
+  const float radius = float(min_ii(BLI_rcti_size_x(rect), BLI_rcti_size_y(rect))) / 2.0f;
  const float m_delta[2] = {mx - centx, my - centy};
  const float dist_sq = len_squared_v2(m_delta);

@ -2828,7 +2829,7 @@ void ui_hsvcircle_pos_from_vals(
  /* duplication of code... well, simple is better now */
  const float centx = BLI_rcti_cent_x_fl(rect);
  const float centy = BLI_rcti_cent_y_fl(rect);
-  const float radius = (float)min_ii(BLI_rcti_size_x(rect), BLI_rcti_size_y(rect)) / 2.0f;
+  const float radius = float(min_ii(BLI_rcti_size_x(rect), BLI_rcti_size_y(rect))) / 2.0f;

  const float ang = 2.0f * float(M_PI) * hsv[0] + float(M_PI_2);

@ -2853,7 +2854,7 @@ static void ui_draw_but_HSVCIRCLE(uiBut *but, const uiWidgetColors *wcol, const
  const float radstep = 2.0f * float(M_PI) / float(tot);
  const float centx = BLI_rcti_cent_x_fl(rect);
  const float centy = BLI_rcti_cent_y_fl(rect);
-  const float radius = (float)min_ii(BLI_rcti_size_x(rect), BLI_rcti_size_y(rect)) / 2.0f;
+  const float radius = float(min_ii(BLI_rcti_size_x(rect), BLI_rcti_size_y(rect))) / 2.0f;

  ColorPicker *cpicker = static_cast<ColorPicker *>(but->custom_data);
  float rgb[3], hsv[3], rgb_center[3];
@ -3086,7 +3087,7 @@ void ui_draw_gradient(const rcti *rect,
    sx1 = rect->xmin + dx * BLI_rcti_size_x(rect);
    sx2 = rect->xmin + dx_next * BLI_rcti_size_x(rect);
    sy = rect->ymin;
-    dy = (float)BLI_rcti_size_y(rect) / 3.0f;
+    dy = float(BLI_rcti_size_y(rect)) / 3.0f;

    for (a = 0; a < 3; a++, sy += dy) {
      immAttr4f(col, col0[a][0], col0[a][1], col0[a][2], alpha);
@ -3551,7 +3552,7 @@ static void widget_scroll(uiBut *but,
                          const float /*zoom*/)
 {
  /* calculate slider part */
-  const float value = (float)ui_but_value_get(but);
+  const float value = float(ui_but_value_get(but));

  const float size = max_ff((but->softmax + but->a1 - but->softmin), 2.0f);

@ -3726,7 +3727,7 @@ static void widget_numslider(uiBut *but,
    rcti rect1 = *rect;
    float factor, factor_ui;
    float factor_discard = 1.0f; /* No discard. */
-    const float value = (float)ui_but_value_get(but);
+    const float value = float(ui_but_value_get(but));
    const float softmin = but->softmin;
    const float softmax = but->softmax;
    const float softrange = softmax - softmin;
@ -3758,7 +3759,7 @@ static void widget_numslider(uiBut *but,
      }
    }

-    const float width = (float)BLI_rcti_size_x(rect);
+    const float width = float(BLI_rcti_size_x(rect));
    factor_ui = factor * width;
    /* The rectangle width needs to be at least twice the corner radius for the round corners
     * to be drawn properly. */
@ -4979,9 +4980,9 @@ static void ui_draw_clip_tri(uiBlock *block, rcti *rect, uiWidgetType *wt)
    float draw_color[4];
    const uchar *color = wt->wcol.text;

-    draw_color[0] = (float(color[0])) / 255.0f;
-    draw_color[1] = (float(color[1])) / 255.0f;
-    draw_color[2] = (float(color[2])) / 255.0f;
+    draw_color[0] = float(color[0]) / 255.0f;
+    draw_color[1] = float(color[1]) / 255.0f;
+    draw_color[2] = float(color[2]) / 255.0f;
    draw_color[3] = 1.0f;

    if (block->flag & UI_BLOCK_CLIPTOP) {
@ -5129,7 +5130,7 @@ static void draw_disk_shaded(float start,

  immBegin(GPU_PRIM_TRI_STRIP, subd * 2);
  for (int i = 0; i < subd; i++) {
-    const float a = start + ((i) / (float)(subd - 1)) * angle;
+    const float a = start + ((i) / float(subd - 1)) * angle;
    const float s = sinf(a);
    const float c = cosf(a);
    const float y1 = s * radius_int;
@ -5380,9 +5381,9 @@ void ui_draw_menu_item(const uiFontStyle *fstyle,

  {
    char drawstr[UI_MAX_DRAW_STR];
-    const float okwidth = (float)BLI_rcti_size_x(rect);
+    const float okwidth = float(BLI_rcti_size_x(rect));
    const size_t max_len = sizeof(drawstr);
-    const float minwidth = (float)(UI_DPI_ICON_SIZE);
+    const float minwidth = float(UI_DPI_ICON_SIZE);

    BLI_strncpy(drawstr, name, sizeof(drawstr));
    if (drawstr[0]) {
@ -5431,7 +5432,7 @@ void ui_draw_menu_item(const uiFontStyle *fstyle,
      char hint_drawstr[UI_MAX_DRAW_STR];
      {
        const size_t max_len = sizeof(hint_drawstr);
-        const float minwidth = (float)(UI_DPI_ICON_SIZE);
+        const float minwidth = float(UI_DPI_ICON_SIZE);

        BLI_strncpy(hint_drawstr, cpoin + 1, sizeof(hint_drawstr));
        if (hint_drawstr[0] && (max_hint_width < INT_MAX)) {
@ -5484,7 +5485,7 @@ void ui_draw_preview_item_stateless(const uiFontStyle *fstyle,

  {
    char drawstr[UI_MAX_DRAW_STR];
-    const float okwidth = (float)BLI_rcti_size_x(&trect);
+    const float okwidth = float(BLI_rcti_size_x(&trect));
    const size_t max_len = sizeof(drawstr);
    const float minwidth = float(UI_DPI_ICON_SIZE);

--- a/source/blender/editors/interface/resources.cc
+++ b/source/blender/editors/interface/resources.cc
@ -1141,51 +1141,51 @@ void UI_FontThemeColor(int fontid, int colorid)
 float UI_GetThemeValuef(int colorid)
 {
  const uchar *cp = UI_ThemeGetColorPtr(g_theme_state.theme, g_theme_state.spacetype, colorid);
-  return (float(cp[0]));
+  return float(cp[0]);
 }

 int UI_GetThemeValue(int colorid)
 {
  const uchar *cp = UI_ThemeGetColorPtr(g_theme_state.theme, g_theme_state.spacetype, colorid);
-  return (int(cp[0]));
+  return int(cp[0]);
 }

 float UI_GetThemeValueTypef(int colorid, int spacetype)
 {
  const uchar *cp = UI_ThemeGetColorPtr(g_theme_state.theme, spacetype, colorid);
-  return (float(cp[0]));
+  return float(cp[0]);
 }

 int UI_GetThemeValueType(int colorid, int spacetype)
 {
  const uchar *cp = UI_ThemeGetColorPtr(g_theme_state.theme, spacetype, colorid);
-  return (int(cp[0]));
+  return int(cp[0]);
 }

 void UI_GetThemeColor3fv(int colorid, float col[3])
 {
  const uchar *cp = UI_ThemeGetColorPtr(g_theme_state.theme, g_theme_state.spacetype, colorid);
-  col[0] = (float(cp[0])) / 255.0f;
-  col[1] = (float(cp[1])) / 255.0f;
-  col[2] = (float(cp[2])) / 255.0f;
+  col[0] = float(cp[0]) / 255.0f;
+  col[1] = float(cp[1]) / 255.0f;
+  col[2] = float(cp[2]) / 255.0f;
 }

 void UI_GetThemeColor4fv(int colorid, float col[4])
 {
  const uchar *cp = UI_ThemeGetColorPtr(g_theme_state.theme, g_theme_state.spacetype, colorid);
-  col[0] = (float(cp[0])) / 255.0f;
-  col[1] = (float(cp[1])) / 255.0f;
-  col[2] = (float(cp[2])) / 255.0f;
-  col[3] = (float(cp[3])) / 255.0f;
+  col[0] = float(cp[0]) / 255.0f;
+  col[1] = float(cp[1]) / 255.0f;
+  col[2] = float(cp[2]) / 255.0f;
+  col[3] = float(cp[3]) / 255.0f;
 }

 void UI_GetThemeColorType4fv(int colorid, int spacetype, float col[4])
 {
  const uchar *cp = UI_ThemeGetColorPtr(g_theme_state.theme, spacetype, colorid);
-  col[0] = (float(cp[0])) / 255.0f;
-  col[1] = (float(cp[1])) / 255.0f;
-  col[2] = (float(cp[2])) / 255.0f;
-  col[3] = (float(cp[3])) / 255.0f;
+  col[0] = float(cp[0]) / 255.0f;
+  col[1] = float(cp[1]) / 255.0f;
+  col[2] = float(cp[2]) / 255.0f;
+  col[3] = float(cp[3]) / 255.0f;
 }

 void UI_GetThemeColorShade3fv(int colorid, int offset, float col[3])
@ -1361,9 +1361,9 @@ void UI_GetThemeColor4ubv(int colorid, uchar col[4])
 void UI_GetThemeColorType3fv(int colorid, int spacetype, float col[3])
 {
  const uchar *cp = UI_ThemeGetColorPtr(g_theme_state.theme, spacetype, colorid);
-  col[0] = (float(cp[0])) / 255.0f;
-  col[1] = (float(cp[1])) / 255.0f;
-  col[2] = (float(cp[2])) / 255.0f;
+  col[0] = float(cp[0]) / 255.0f;
+  col[1] = float(cp[1]) / 255.0f;
+  col[2] = float(cp[2]) / 255.0f;
 }

 void UI_GetThemeColorType3ubv(int colorid, int spacetype, uchar col[3])
--- a/source/blender/editors/sculpt_paint/curves_sculpt_add.cc
+++ b/source/blender/editors/sculpt_paint/curves_sculpt_add.cc
@ -241,6 +241,13 @@ struct AddOperationExecutor {

    const geometry::AddCurvesOnMeshOutputs add_outputs = geometry::add_curves_on_mesh(
        *curves_orig_, add_inputs);
+    bke::MutableAttributeAccessor attributes = curves_orig_->attributes_for_write();
+    if (bke::GSpanAttributeWriter selection = attributes.lookup_for_write_span(".selection")) {
+      curves::fill_selection_true(selection.span.slice(selection.domain == ATTR_DOMAIN_POINT ?
+                                                           add_outputs.new_points_range :
+                                                           add_outputs.new_curves_range));
+      selection.finish();
+    }

    if (add_outputs.uv_error) {
      report_invalid_uv_map(stroke_extension.reports);
--- a/source/blender/editors/sculpt_paint/curves_sculpt_comb.cc
+++ b/source/blender/editors/sculpt_paint/curves_sculpt_comb.cc
@ -132,8 +132,9 @@ struct CombOperationExecutor {

    transforms_ = CurvesSurfaceTransforms(*curves_ob_orig_, curves_id_orig_->surface);

-    point_factors_ = get_point_selection(*curves_id_orig_);
-    curve_selection_ = retrieve_selected_curves(*curves_id_orig_, selected_curve_indices_);
+    point_factors_ = curves_orig_->attributes().lookup_or_default<float>(
+        ".selection", ATTR_DOMAIN_POINT, 1.0f);
+    curve_selection_ = curves::retrieve_selected_curves(*curves_id_orig_, selected_curve_indices_);

    brush_pos_prev_re_ = self_->brush_pos_last_re_;
    brush_pos_re_ = stroke_extension.mouse_position;
--- a/source/blender/editors/sculpt_paint/curves_sculpt_delete.cc
+++ b/source/blender/editors/sculpt_paint/curves_sculpt_delete.cc
@ -97,7 +97,7 @@ struct DeleteOperationExecutor {
    curves_ = &CurvesGeometry::wrap(curves_id_->geometry);

    selected_curve_indices_.clear();
-    curve_selection_ = retrieve_selected_curves(*curves_id_, selected_curve_indices_);
+    curve_selection_ = curves::retrieve_selected_curves(*curves_id_, selected_curve_indices_);

    curves_sculpt_ = ctx_.scene->toolsettings->curves_sculpt;
    brush_ = BKE_paint_brush_for_read(&curves_sculpt_->paint);
--- a/source/blender/editors/sculpt_paint/curves_sculpt_density.cc
+++ b/source/blender/editors/sculpt_paint/curves_sculpt_density.cc
@ -286,6 +286,13 @@ struct DensityAddOperationExecutor {

    const geometry::AddCurvesOnMeshOutputs add_outputs = geometry::add_curves_on_mesh(
        *curves_orig_, add_inputs);
+    bke::MutableAttributeAccessor attributes = curves_orig_->attributes_for_write();
+    if (bke::GSpanAttributeWriter selection = attributes.lookup_for_write_span(".selection")) {
+      curves::fill_selection_true(selection.span.slice(selection.domain == ATTR_DOMAIN_POINT ?
+                                                           add_outputs.new_points_range :
+                                                           add_outputs.new_curves_range));
+      selection.finish();
+    }

    if (add_outputs.uv_error) {
      report_invalid_uv_map(stroke_extension.reports);
@ -562,7 +569,7 @@ struct DensitySubtractOperationExecutor {

    minimum_distance_ = brush_->curves_sculpt_settings->minimum_distance;

-    curve_selection_ = retrieve_selected_curves(*curves_id_, selected_curve_indices_);
+    curve_selection_ = curves::retrieve_selected_curves(*curves_id_, selected_curve_indices_);

    transforms_ = CurvesSurfaceTransforms(*object_, curves_id_->surface);
    const eBrushFalloffShape falloff_shape = static_cast<eBrushFalloffShape>(
--- a/source/blender/editors/sculpt_paint/curves_sculpt_grow_shrink.cc
+++ b/source/blender/editors/sculpt_paint/curves_sculpt_grow_shrink.cc
@ -280,8 +280,9 @@ struct CurvesEffectOperationExecutor {
      return;
    }

-    curve_selection_factors_ = get_curves_selection(*curves_id_);
-    curve_selection_ = retrieve_selected_curves(*curves_id_, selected_curve_indices_);
+    curve_selection_factors_ = curves_->attributes().lookup_or_default(
+        ".selection", ATTR_DOMAIN_CURVE, 1.0f);
+    curve_selection_ = curves::retrieve_selected_curves(*curves_id_, selected_curve_indices_);

    const CurvesSculpt &curves_sculpt = *ctx_.scene->toolsettings->curves_sculpt;
    brush_ = BKE_paint_brush_for_read(&curves_sculpt.paint);
--- a/source/blender/editors/sculpt_paint/curves_sculpt_intern.hh
+++ b/source/blender/editors/sculpt_paint/curves_sculpt_intern.hh
@ -11,10 +11,11 @@
 #include "BLI_vector.hh"
 #include "BLI_virtual_array.hh"

-#include "BKE_attribute.h"
+#include "BKE_attribute.hh"
 #include "BKE_crazyspace.hh"
 #include "BKE_curves.hh"

+#include "ED_curves.h"
 #include "ED_curves_sculpt.h"

 struct ARegion;
@ -92,15 +93,7 @@ std::optional<CurvesBrush3D> sample_curves_3d_brush(const Depsgraph &depsgraph,

 Vector<float4x4> get_symmetry_brush_transforms(eCurvesSymmetryType symmetry);

-/**
- * Get the floating point selection on the curve domain, averaged from points if necessary.
- */
-VArray<float> get_curves_selection(const Curves &curves_id);
-
-/**
- * Get the floating point selection on the curve domain, copied from curves if necessary.
- */
-VArray<float> get_point_selection(const Curves &curves_id);
+bke::SpanAttributeWriter<float> float_selection_ensure(Curves &curves_id);

 /** See #move_last_point_and_resample. */
 struct MoveAndResampleBuffers {
--- a/source/blender/editors/sculpt_paint/curves_sculpt_ops.cc
+++ b/source/blender/editors/sculpt_paint/curves_sculpt_ops.cc
@ -363,12 +363,14 @@ static int select_random_exec(bContext *C, wmOperator *op)
  for (Curves *curves_id : unique_curves) {
    CurvesGeometry &curves = CurvesGeometry::wrap(curves_id->geometry);
    const bool was_anything_selected = curves::has_anything_selected(*curves_id);
+
+    bke::SpanAttributeWriter<float> attribute = float_selection_ensure(*curves_id);
+    MutableSpan<float> selection = attribute.span;
+    if (!was_anything_selected) {
+      selection.fill(1.0f);
+    }
    switch (curves_id->selection_domain) {
      case ATTR_DOMAIN_POINT: {
-        MutableSpan<float> selection = curves.selection_point_float_for_write();
-        if (!was_anything_selected) {
-          selection.fill(1.0f);
-        }
        if (partial) {
          if (constant_per_curve) {
            for (const int curve_i : curves.curves_range()) {
@ -408,10 +410,6 @@ static int select_random_exec(bContext *C, wmOperator *op)
        break;
      }
      case ATTR_DOMAIN_CURVE: {
-        MutableSpan<float> selection = curves.selection_curve_float_for_write();
-        if (!was_anything_selected) {
-          selection.fill(1.0f);
-        }
        if (partial) {
          for (const int curve_i : curves.curves_range()) {
            const float random_value = next_partial_random_value();
@ -429,9 +427,6 @@ static int select_random_exec(bContext *C, wmOperator *op)
        break;
      }
    }
-    MutableSpan<float> selection = curves_id->selection_domain == ATTR_DOMAIN_POINT ?
-                                       curves.selection_point_float_for_write() :
-                                       curves.selection_curve_float_for_write();
    const bool was_any_selected = std::any_of(
        selection.begin(), selection.end(), [](const float v) { return v > 0.0f; });
    if (was_any_selected) {
@ -445,6 +440,8 @@ static int select_random_exec(bContext *C, wmOperator *op)
      }
    }

+    attribute.finish();
+
    /* Use #ID_RECALC_GEOMETRY instead of #ID_RECALC_SELECT because it is handled as a generic
     * attribute for now. */
    DEG_id_tag_update(&curves_id->id, ID_RECALC_GEOMETRY);
@ -541,22 +538,35 @@ static int select_end_exec(bContext *C, wmOperator *op)

  for (Curves *curves_id : unique_curves) {
    CurvesGeometry &curves = CurvesGeometry::wrap(curves_id->geometry);
+    bke::MutableAttributeAccessor attributes = curves.attributes_for_write();
+
    const bool was_anything_selected = curves::has_anything_selected(*curves_id);
-    MutableSpan<float> selection = curves.selection_point_float_for_write();
+    curves::ensure_selection_attribute(*curves_id, CD_PROP_BOOL);
+    bke::GSpanAttributeWriter selection = attributes.lookup_for_write_span(".selection");
    if (!was_anything_selected) {
-      selection.fill(1.0f);
+      curves::fill_selection_true(selection.span);
    }
-    threading::parallel_for(curves.curves_range(), 256, [&](const IndexRange range) {
-      for (const int curve_i : range) {
-        const IndexRange points = curves.points_for_curve(curve_i);
-        if (end_points) {
-          selection.slice(points.drop_back(amount)).fill(0.0f);
-        }
-        else {
-          selection.slice(points.drop_front(amount)).fill(0.0f);
-        }
+    selection.span.type().to_static_type_tag<bool, float>([&](auto type_tag) {
+      using T = typename decltype(type_tag)::type;
+      if constexpr (std::is_void_v<T>) {
+        BLI_assert_unreachable();
+      }
+      else {
+        MutableSpan<T> selection_typed = selection.span.typed<T>();
+        threading::parallel_for(curves.curves_range(), 256, [&](const IndexRange range) {
+          for (const int curve_i : range) {
+            const IndexRange points = curves.points_for_curve(curve_i);
+            if (end_points) {
+              selection_typed.slice(points.drop_back(amount)).fill(T(0));
+            }
+            else {
+              selection_typed.slice(points.drop_front(amount)).fill(T(0));
+            }
+          }
+        });
      }
    });
+    selection.finish();

    /* Use #ID_RECALC_GEOMETRY instead of #ID_RECALC_SELECT because it is handled as a generic
     * attribute for now. */
@ -592,12 +602,14 @@ namespace select_grow {

 struct GrowOperatorDataPerCurve : NonCopyable, NonMovable {
  Curves *curves_id;
-  Vector<int> selected_points;
-  Vector<int> unselected_points;
+  Vector<int64_t> selected_point_indices;
+  Vector<int64_t> unselected_point_indices;
+  IndexMask selected_points;
+  IndexMask unselected_points;
  Array<float> distances_to_selected;
  Array<float> distances_to_unselected;

-  Array<float> original_selection;
+  GArray<> original_selection;
  float pixel_to_distance_factor;
 };

@ -621,7 +633,7 @@ static void update_points_selection(const GrowOperatorDataPerCurve &data,
          }
        });
    threading::parallel_for(data.selected_points.index_range(), 512, [&](const IndexRange range) {
-      for (const int point_i : data.selected_points.as_span().slice(range)) {
+      for (const int point_i : data.selected_points.slice(range)) {
        points_selection[point_i] = 1.0f;
      }
    });
@ -637,7 +649,7 @@ static void update_points_selection(const GrowOperatorDataPerCurve &data,
    });
    threading::parallel_for(
        data.unselected_points.index_range(), 512, [&](const IndexRange range) {
-          for (const int point_i : data.unselected_points.as_span().slice(range)) {
+          for (const int point_i : data.unselected_points.slice(range)) {
            points_selection[point_i] = 0.0f;
          }
        });
@ -653,18 +665,19 @@ static int select_grow_update(bContext *C, wmOperator *op, const float mouse_dif
    CurvesGeometry &curves = CurvesGeometry::wrap(curves_id.geometry);
    const float distance = curve_op_data->pixel_to_distance_factor * mouse_diff_x;

+    bke::SpanAttributeWriter<float> selection = float_selection_ensure(curves_id);
+
    /* Grow or shrink selection based on precomputed distances. */
-    switch (curves_id.selection_domain) {
+    switch (selection.domain) {
      case ATTR_DOMAIN_POINT: {
-        MutableSpan<float> points_selection = curves.selection_point_float_for_write();
-        update_points_selection(*curve_op_data, distance, points_selection);
+        update_points_selection(*curve_op_data, distance, selection.span);
        break;
      }
      case ATTR_DOMAIN_CURVE: {
        Array<float> new_points_selection(curves.points_num());
        update_points_selection(*curve_op_data, distance, new_points_selection);
        /* Propagate grown point selection to the curve selection. */
-        MutableSpan<float> curves_selection = curves.selection_curve_float_for_write();
+        MutableSpan<float> curves_selection = selection.span;
        for (const int curve_i : curves.curves_range()) {
          const IndexRange points = curves.points_for_curve(curve_i);
          const Span<float> points_selection = new_points_selection.as_span().slice(points);
@ -674,8 +687,12 @@ static int select_grow_update(bContext *C, wmOperator *op, const float mouse_dif
        }
        break;
      }
+      default:
+        BLI_assert_unreachable();
    }

+    selection.finish();
+
    /* Use #ID_RECALC_GEOMETRY instead of #ID_RECALC_SELECT because it is handled as a generic
     * attribute for now. */
    DEG_id_tag_update(&curves_id.id, ID_RECALC_GEOMETRY);
@ -685,57 +702,28 @@ static int select_grow_update(bContext *C, wmOperator *op, const float mouse_dif
  return OPERATOR_FINISHED;
 }

-static void select_grow_invoke_per_curve(Curves &curves_id,
-                                         Object &curves_ob,
+static void select_grow_invoke_per_curve(const Curves &curves_id,
+                                         const Object &curves_ob,
                                         const ARegion &region,
                                         const View3D &v3d,
                                         const RegionView3D &rv3d,
                                         GrowOperatorDataPerCurve &curve_op_data)
 {
-  curve_op_data.curves_id = &curves_id;
-  CurvesGeometry &curves = CurvesGeometry::wrap(curves_id.geometry);
+  const CurvesGeometry &curves = CurvesGeometry::wrap(curves_id.geometry);
  const Span<float3> positions = curves.positions();

-  /* Find indices of selected and unselected points. */
-  switch (curves_id.selection_domain) {
-    case ATTR_DOMAIN_POINT: {
-      const VArray<float> points_selection = curves.selection_point_float();
-      curve_op_data.original_selection.reinitialize(points_selection.size());
-      points_selection.materialize(curve_op_data.original_selection);
-      for (const int point_i : points_selection.index_range()) {
-        const float point_selection = points_selection[point_i];
-        if (point_selection > 0.0f) {
-          curve_op_data.selected_points.append(point_i);
-        }
-        else {
-          curve_op_data.unselected_points.append(point_i);
-        }
-      }
-
-      break;
-    }
-    case ATTR_DOMAIN_CURVE: {
-      const VArray<float> curves_selection = curves.selection_curve_float();
-      curve_op_data.original_selection.reinitialize(curves_selection.size());
-      curves_selection.materialize(curve_op_data.original_selection);
-      for (const int curve_i : curves_selection.index_range()) {
-        const float curve_selection = curves_selection[curve_i];
-        const IndexRange points = curves.points_for_curve(curve_i);
-        if (curve_selection > 0.0f) {
-          for (const int point_i : points) {
-            curve_op_data.selected_points.append(point_i);
-          }
-        }
-        else {
-          for (const int point_i : points) {
-            curve_op_data.unselected_points.append(point_i);
-          }
-        }
-      }
-      break;
-    }
+  if (const bke::GAttributeReader original_selection = curves.attributes().lookup(".selection")) {
+    curve_op_data.original_selection = GArray<>(original_selection.varray.type(),
+                                                original_selection.varray.size());
+    original_selection.varray.materialize(curve_op_data.original_selection.data());
  }

+  /* Find indices of selected and unselected points. */
+  curve_op_data.selected_points = curves::retrieve_selected_points(
+      curves_id, curve_op_data.selected_point_indices);
+  curve_op_data.unselected_points = curve_op_data.selected_points.invert(
+      curves.points_range(), curve_op_data.unselected_point_indices);
+
  threading::parallel_invoke(
      1024 < curve_op_data.selected_points.size() + curve_op_data.unselected_points.size(),
      [&]() {
@ -838,6 +826,7 @@ static int select_grow_invoke(bContext *C, wmOperator *op, const wmEvent *event)

  Curves &curves_id = *static_cast<Curves *>(active_ob->data);
  auto curve_op_data = std::make_unique<GrowOperatorDataPerCurve>();
+  curve_op_data->curves_id = &curves_id;
  select_grow_invoke_per_curve(curves_id, *active_ob, *region, *v3d, *rv3d, *curve_op_data);
  op_data->per_curve.append(std::move(curve_op_data));

@ -865,17 +854,15 @@ static int select_grow_modal(bContext *C, wmOperator *op, const wmEvent *event)
      for (std::unique_ptr<GrowOperatorDataPerCurve> &curve_op_data : op_data.per_curve) {
        Curves &curves_id = *curve_op_data->curves_id;
        CurvesGeometry &curves = CurvesGeometry::wrap(curves_id.geometry);
-        switch (curves_id.selection_domain) {
-          case ATTR_DOMAIN_POINT: {
-            MutableSpan<float> points_selection = curves.selection_point_float_for_write();
-            points_selection.copy_from(curve_op_data->original_selection);
-            break;
-          }
-          case ATTR_DOMAIN_CURVE: {
-            MutableSpan<float> curves_seletion = curves.selection_curve_float_for_write();
-            curves_seletion.copy_from(curve_op_data->original_selection);
-            break;
-          }
+        bke::MutableAttributeAccessor attributes = curves.attributes_for_write();
+
+        attributes.remove(".selection");
+        if (!curve_op_data->original_selection.is_empty()) {
+          attributes.add(
+              ".selection",
+              eAttrDomain(curves_id.selection_domain),
+              bke::cpp_type_to_custom_data_type(curve_op_data->original_selection.type()),
+              bke::AttributeInitVArray(GVArray::ForSpan(curve_op_data->original_selection)));
        }

        /* Use #ID_RECALC_GEOMETRY instead of #ID_RECALC_SELECT because it is handled as a generic
--- a/source/blender/editors/sculpt_paint/curves_sculpt_pinch.cc
+++ b/source/blender/editors/sculpt_paint/curves_sculpt_pinch.cc
@ -105,8 +105,9 @@ struct PinchOperationExecutor {

    transforms_ = CurvesSurfaceTransforms(*object_, curves_id_->surface);

-    point_factors_ = get_point_selection(*curves_id_);
-    curve_selection_ = retrieve_selected_curves(*curves_id_, selected_curve_indices_);
+    point_factors_ = curves_->attributes().lookup_or_default<float>(
+        ".selection", ATTR_DOMAIN_POINT, 1.0f);
+    curve_selection_ = curves::retrieve_selected_curves(*curves_id_, selected_curve_indices_);

    brush_pos_re_ = stroke_extension.mouse_position;
    const eBrushFalloffShape falloff_shape = static_cast<eBrushFalloffShape>(
--- a/Show More
+++ b/Show More