From a7ad2cc27d6105b4e089411e4a582915f85d0aad Mon Sep 17 00:00:00 2001 From: Ilya Kurdyukov Date: Thu, 21 Sep 2023 17:13:27 +0700 Subject: [PATCH] blender-3.6.2 e2k support Using LCC 1.26.20 --- .../btSequentialImpulseConstraintSolver.cpp | 4 ++++ extern/cuew/include/cuew.h | 2 +- extern/fast_float/fast_float.h | 2 +- extern/hipew/include/hipew.h | 2 +- intern/cycles/util/defines.h | 2 +- intern/cycles/util/math_float3.h | 6 +++--- intern/cycles/util/math_float4.h | 2 +- intern/cycles/util/optimization.h | 8 ++++++++ intern/cycles/util/simd.h | 4 ++-- intern/cycles/util/system.cpp | 6 ++++++ intern/cycles/util/transform_inverse.h | 2 +- intern/libmv/libmv/build/build_config.h | 8 ++++++++ .../simple_pipeline/camera_intrinsics_impl.h | 6 ++++-- .../intern/geometry_component_curves.cc | 2 +- .../intern/geometry_component_instances.cc | 2 +- .../blenkernel/intern/geometry_component_mesh.cc | 2 +- .../intern/geometry_component_pointcloud.cc | 4 ++-- source/blender/blenkernel/intern/volume.cc | 3 +++ .../blenlib/BLI_enumerable_thread_specific.hh | 7 +++++++ source/blender/blenlib/BLI_hash.hh | 3 +++ source/blender/blenlib/BLI_math_matrix_types.hh | 16 ++++++++++++++++ source/blender/blenlib/BLI_memory_utils.hh | 1 + source/blender/blenlib/BLI_utildefines.h | 2 +- source/blender/blenlib/BLI_virtual_array.hh | 3 +++ .../interface_template_attribute_search.cc | 4 ++++ 25 files changed, 84 insertions(+), 19 deletions(-) diff --git a/extern/bullet2/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp b/extern/bullet2/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp index d2641c5..a0af3a4 100644 --- a/extern/bullet2/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp +++ b/extern/bullet2/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp @@ -115,7 +115,11 @@ static inline __m128 btSimdDot3(__m128 vec0, __m128 vec1) #define USE_FMA 1 #define USE_FMA3_INSTEAD_FMA4 1 +#ifdef __e2k__ +#define USE_SSE4_DOT 0 +#else #define USE_SSE4_DOT 1 +#endif #define SSE4_DP(a, b) _mm_dp_ps(a, b, 0x7f) #define SSE4_DP_FP(a, b) _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7f)) diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h index 278fb11..ab32d0c 100644 --- a/extern/cuew/include/cuew.h +++ b/extern/cuew/include/cuew.h @@ -127,7 +127,7 @@ typedef uint32_t cuuint32_t; typedef uint64_t cuuint64_t; #endif -#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__) || defined(__ppc64__) || defined(__PPC64__) +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__) || defined(__ppc64__) || defined(__PPC64__) || defined(__LP64__) typedef unsigned long long CUdeviceptr; #else typedef unsigned int CUdeviceptr; diff --git a/extern/fast_float/fast_float.h b/extern/fast_float/fast_float.h index 479aecc..32337f9 100644 --- a/extern/fast_float/fast_float.h +++ b/extern/fast_float/fast_float.h @@ -2200,7 +2200,7 @@ struct pow5_tables { 298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL, }; #ifdef FASTFLOAT_64BIT_LIMB - constexpr static limb large_power_of_5[] = { + constexpr static limb large_power_of_5[5] = { 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, 10482974169319127550UL, 198276706040285095UL}; #else diff --git a/extern/hipew/include/hipew.h b/extern/hipew/include/hipew.h index f82654f..b9c3b7e 100644 --- a/extern/hipew/include/hipew.h +++ b/extern/hipew/include/hipew.h @@ -86,7 +86,7 @@ typedef uint32_t hipuint32_t; typedef uint64_t hipuint64_t; #endif -#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__) || defined(__ppc64__) || defined(__PPC64__) +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__) || defined(__ppc64__) || defined(__PPC64__) || defined(__LP64__) typedef unsigned long long hipDeviceptr_t; #else typedef unsigned int hipDeviceptr_t; diff --git a/intern/cycles/util/defines.h b/intern/cycles/util/defines.h index d5be14c..a217455 100644 --- a/intern/cycles/util/defines.h +++ b/intern/cycles/util/defines.h @@ -12,7 +12,7 @@ /* Bitness */ #if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \ - defined(_M_X64) || defined(__aarch64__) + defined(_M_X64) || defined(__aarch64__) || defined(__e2k__) # define __KERNEL_64_BIT__ #endif diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h index 7376240..629cc57 100644 --- a/intern/cycles/util/math_float3.h +++ b/intern/cycles/util/math_float3.h @@ -199,7 +199,7 @@ ccl_device_inline bool operator!=(const float3 a, const float3 b) ccl_device_inline float dot(const float3 a, const float3 b) { -# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) +# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) && !defined(__e2k__) return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F)); # else return a.x * b.x + a.y * b.y + a.z * b.z; @@ -219,7 +219,7 @@ ccl_device_inline float dot_xy(const float3 a, const float3 b) ccl_device_inline float len(const float3 a) { -#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) +#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) && !defined(__e2k__) return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F))); #else return sqrtf(dot(a, a)); @@ -263,7 +263,7 @@ ccl_device_inline float3 cross(const float3 a, const float3 b) ccl_device_inline float3 normalize(const float3 a) { -# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) +# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) && !defined(__e2k__) __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F)); return float3(_mm_div_ps(a.m128, norm)); # else diff --git a/intern/cycles/util/math_float4.h b/intern/cycles/util/math_float4.h index 1f5f391..11a5ef6 100644 --- a/intern/cycles/util/math_float4.h +++ b/intern/cycles/util/math_float4.h @@ -363,7 +363,7 @@ ccl_device_inline float reduce_max(const float4 a) #if !defined(__KERNEL_METAL__) ccl_device_inline float dot(const float4 a, const float4 b) { -# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) +# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) && !defined(__e2k__) # if defined(__KERNEL_NEON__) __m128 t = vmulq_f32(a, b); return vaddvq_f32(t); diff --git a/intern/cycles/util/optimization.h b/intern/cycles/util/optimization.h index b6194dc..6be3940 100644 --- a/intern/cycles/util/optimization.h +++ b/intern/cycles/util/optimization.h @@ -48,6 +48,14 @@ # define __KERNEL_SSE3__ # define __KERNEL_SSE41__ +# elif defined(__e2k__) + +# define __KERNEL_SSE__ +# define __KERNEL_SSE2__ +# define __KERNEL_SSE3__ +# define __KERNEL_SSSE3__ +# define __KERNEL_SSE41__ + # endif #endif diff --git a/intern/cycles/util/simd.h b/intern/cycles/util/simd.h index 7da0a0f..5f9e6f2 100644 --- a/intern/cycles/util/simd.h +++ b/intern/cycles/util/simd.h @@ -20,7 +20,7 @@ # include "util/windows.h" #elif defined(_MSC_VER) # include -#elif (defined(__x86_64__) || defined(__i386__)) +#elif (defined(__x86_64__) || defined(__i386__) || defined(__e2k__)) # include #elif defined(__KERNEL_NEON__) # define SSE2NEON_PRECISE_MINMAX 1 @@ -224,7 +224,7 @@ type shuffle_neon(const type &a, const type &b) # endif #endif -#if defined(__LZCNT__) +#if defined(__LZCNT__) && !defined(__e2k__) # define _lzcnt_u32 __lzcnt32 # define _lzcnt_u64 __lzcnt64 #endif diff --git a/intern/cycles/util/system.cpp b/intern/cycles/util/system.cpp index c1c4963..d40e29a 100644 --- a/intern/cycles/util/system.cpp +++ b/intern/cycles/util/system.cpp @@ -215,6 +215,12 @@ bool system_cpu_support_avx2() CPUCapabilities &caps = system_cpu_capabilities(); return caps.avx2; } +#elif defined(__e2k__) +bool system_cpu_support_sse2() { return true; } +bool system_cpu_support_sse3() { return true; } +bool system_cpu_support_sse41() { return true; } +bool system_cpu_support_avx() { return false; } +bool system_cpu_support_avx2() { return false; } #else bool system_cpu_support_sse2() diff --git a/intern/cycles/util/transform_inverse.h b/intern/cycles/util/transform_inverse.h index 2faac57..69b359f 100644 --- a/intern/cycles/util/transform_inverse.h +++ b/intern/cycles/util/transform_inverse.h @@ -29,7 +29,7 @@ ccl_device_forceinline float3 transform_inverse_cross(const float3 a_, const flo ccl_device_forceinline float transform_inverse_dot(const float3 a_, const float3 b_) { -#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) && !defined(__e2k__) const __m128 a = (const __m128 &)a_; const __m128 b = (const __m128 &)b_; return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F)); diff --git a/intern/libmv/libmv/build/build_config.h b/intern/libmv/libmv/build/build_config.h index 1d87660..d00de4d 100644 --- a/intern/libmv/libmv/build/build_config.h +++ b/intern/libmv/libmv/build/build_config.h @@ -308,6 +308,11 @@ # define ARCH_CPU_PPC64 1 # define ARCH_CPU_64_BITS 1 # define ARCH_CPU_LITTLE_ENDIAN 1 +#elif defined(__e2k__) +# define ARCH_CPU_E2K_FAMILY 1 +# define ARCH_CPU_E2K 1 +# define ARCH_CPU_64_BITS 1 +# define ARCH_CPU_LITTLE_ENDIAN 1 #elif defined(__ARMEL__) # define ARCH_CPU_ARM_FAMILY 1 # define ARCH_CPU_ARMEL 1 @@ -372,6 +377,9 @@ #if !defined(ARCH_CPU_MIPS_FAMILY) # define ARCH_CPU_MIPS_FAMILY 0 #endif +#if !defined(ARCH_CPU_E2K_FAMILY) +# define ARCH_CPU_E2K_FAMILY 0 +#endif #if !defined(ARCH_CPU_PPC64_FAMILY) # define ARCH_CPU_PPC64_FAMILY 0 #endif diff --git a/intern/libmv/libmv/simple_pipeline/camera_intrinsics_impl.h b/intern/libmv/libmv/simple_pipeline/camera_intrinsics_impl.h index c8c4700..633ee11 100644 --- a/intern/libmv/libmv/simple_pipeline/camera_intrinsics_impl.h +++ b/intern/libmv/libmv/simple_pipeline/camera_intrinsics_impl.h @@ -64,8 +64,9 @@ void LookupWarpGrid::Compute(const CameraIntrinsics& intrinsics, double aspx = (double)w / intrinsics.image_width(); double aspy = (double)h / intrinsics.image_height(); #if defined(_OPENMP) + int nthreads __attribute__((unused)) = threads_; # pragma omp parallel for schedule(static) \ - num_threads(threads_) if (threads_ > 1 && height > 100) + num_threads(nthreads) if (nthreads > 1 && height > 100) #endif for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { @@ -133,8 +134,9 @@ void LookupWarpGrid::Apply(const PixelType* input_buffer, int channels, PixelType* output_buffer) { #if defined(_OPENMP) + int nthreads __attribute__((unused)) = threads_; # pragma omp parallel for schedule(static) \ - num_threads(threads_) if (threads_ > 1 && height > 100) + num_threads(nthreads) if (nthreads > 1 && height > 100) #endif for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { diff --git a/source/blender/blenkernel/intern/geometry_component_curves.cc b/source/blender/blenkernel/intern/geometry_component_curves.cc index e20c130..954e4b7 100644 --- a/source/blender/blenkernel/intern/geometry_component_curves.cc +++ b/source/blender/blenkernel/intern/geometry_component_curves.cc @@ -565,9 +565,9 @@ static ComponentAttributeProviders create_attribute_providers_for_curve() /** \} */ +static const ComponentAttributeProviders providers = create_attribute_providers_for_curve(); static AttributeAccessorFunctions get_curves_accessor_functions() { - static const ComponentAttributeProviders providers = create_attribute_providers_for_curve(); AttributeAccessorFunctions fn = attribute_accessor_functions::accessor_functions_for_providers(); fn.domain_size = [](const void *owner, const eAttrDomain domain) { diff --git a/source/blender/blenkernel/intern/geometry_component_instances.cc b/source/blender/blenkernel/intern/geometry_component_instances.cc index 3af5d46..59044ef 100644 --- a/source/blender/blenkernel/intern/geometry_component_instances.cc +++ b/source/blender/blenkernel/intern/geometry_component_instances.cc @@ -210,9 +210,9 @@ static ComponentAttributeProviders create_attribute_providers_for_instances() return ComponentAttributeProviders({&position, &id}, {&instance_custom_data}); } +static const ComponentAttributeProviders providers = create_attribute_providers_for_instances(); static AttributeAccessorFunctions get_instances_accessor_functions() { - static const ComponentAttributeProviders providers = create_attribute_providers_for_instances(); AttributeAccessorFunctions fn = attribute_accessor_functions::accessor_functions_for_providers(); fn.domain_size = [](const void *owner, const eAttrDomain domain) { diff --git a/source/blender/blenkernel/intern/geometry_component_mesh.cc b/source/blender/blenkernel/intern/geometry_component_mesh.cc index 3a2cbe7..08f3903 100644 --- a/source/blender/blenkernel/intern/geometry_component_mesh.cc +++ b/source/blender/blenkernel/intern/geometry_component_mesh.cc @@ -1242,9 +1242,9 @@ static ComponentAttributeProviders create_attribute_providers_for_mesh() &face_custom_data}); } +static const ComponentAttributeProviders providers = create_attribute_providers_for_mesh(); static AttributeAccessorFunctions get_mesh_accessor_functions() { - static const ComponentAttributeProviders providers = create_attribute_providers_for_mesh(); AttributeAccessorFunctions fn = attribute_accessor_functions::accessor_functions_for_providers(); fn.domain_size = [](const void *owner, const eAttrDomain domain) { diff --git a/source/blender/blenkernel/intern/geometry_component_pointcloud.cc b/source/blender/blenkernel/intern/geometry_component_pointcloud.cc index f9261fd..859feef 100644 --- a/source/blender/blenkernel/intern/geometry_component_pointcloud.cc +++ b/source/blender/blenkernel/intern/geometry_component_pointcloud.cc @@ -163,10 +163,10 @@ static ComponentAttributeProviders create_attribute_providers_for_point_cloud() return ComponentAttributeProviders({&position, &radius, &id}, {&point_custom_data}); } +static const ComponentAttributeProviders providers = + create_attribute_providers_for_point_cloud(); static AttributeAccessorFunctions get_pointcloud_accessor_functions() { - static const ComponentAttributeProviders providers = - create_attribute_providers_for_point_cloud(); AttributeAccessorFunctions fn = attribute_accessor_functions::accessor_functions_for_providers(); fn.domain_size = [](const void *owner, const eAttrDomain domain) { diff --git a/source/blender/blenkernel/intern/volume.cc b/source/blender/blenkernel/intern/volume.cc index 1102102..98f7043 100644 --- a/source/blender/blenkernel/intern/volume.cc +++ b/source/blender/blenkernel/intern/volume.cc @@ -1544,6 +1544,9 @@ struct CreateGridOp { else { return GridType::create(); } +#ifdef __EDG__ + return 0; +#endif } }; #endif diff --git a/source/blender/blenlib/BLI_enumerable_thread_specific.hh b/source/blender/blenlib/BLI_enumerable_thread_specific.hh index 1774230..f8ec93a 100644 --- a/source/blender/blenlib/BLI_enumerable_thread_specific.hh +++ b/source/blender/blenlib/BLI_enumerable_thread_specific.hh @@ -33,7 +33,14 @@ namespace blender::threading { #ifndef WITH_TBB namespace enumerable_thread_specific_utils { inline std::atomic next_id = 0; +#ifdef __EDG__ +extern thread_local int thread_id; +#ifdef EDG_THREAD_FIX +thread_local int thread_id = next_id.fetch_add(1, std::memory_order_relaxed); +#endif +#else inline thread_local int thread_id = next_id.fetch_add(1, std::memory_order_relaxed); +#endif } // namespace enumerable_thread_specific_utils #endif diff --git a/source/blender/blenlib/BLI_hash.hh b/source/blender/blenlib/BLI_hash.hh index 7be25b6..dcf9053 100644 --- a/source/blender/blenlib/BLI_hash.hh +++ b/source/blender/blenlib/BLI_hash.hh @@ -92,6 +92,9 @@ template struct DefaultHash { /* If this results in a compiler error, no hash function for the type has been found. */ return value.hash(); } +#ifdef __EDG__ + return 0; +#endif } template uint64_t operator()(const U &value) const diff --git a/source/blender/blenlib/BLI_math_matrix_types.hh b/source/blender/blenlib/BLI_math_matrix_types.hh index 46e098a..cfd75d8 100644 --- a/source/blender/blenlib/BLI_math_matrix_types.hh +++ b/source/blender/blenlib/BLI_math_matrix_types.hh @@ -427,7 +427,12 @@ struct alignas(Alignment) MatBase : public vec_struct_base, N /* This is the reference implementation. * Might be overloaded with vectorized / optimized code. */ row_type result(0); +#ifdef __EDG__ + const int nrow = NumRow; + unroll([&](auto c) { unroll([&](auto r) { result[c] += b[c][r] * a[r]; }); }); +#else unroll([&](auto c) { unroll([&](auto r) { result[c] += b[c][r] * a[r]; }); }); +#endif return result; } @@ -487,7 +492,14 @@ struct alignas(Alignment) MatBase : public vec_struct_base, N friend std::ostream &operator<<(std::ostream &stream, const MatBase &mat) { stream << "(\n"; +#ifdef __EDG__ + const int nrow = NumRow, ncol = NumCol; + unroll([&,nrow](auto i) { +#define NumRow nrow +#define NumCol ncol +#else unroll([&](auto i) { +#endif stream << "("; unroll([&](auto j) { /** NOTE: j and i are swapped to follow mathematical convention. */ @@ -500,6 +512,10 @@ struct alignas(Alignment) MatBase : public vec_struct_base, N if (i < NumCol - 1) { stream << ","; } +#ifdef __EDG__ +#undef NumRow +#undef NumCol +#endif stream << "\n"; }); stream << ")\n"; diff --git a/source/blender/blenlib/BLI_memory_utils.hh b/source/blender/blenlib/BLI_memory_utils.hh index 1eedd89..85cc6ac 100644 --- a/source/blender/blenlib/BLI_memory_utils.hh +++ b/source/blender/blenlib/BLI_memory_utils.hh @@ -10,6 +10,7 @@ #include #include #include +#include // std::byte #include "BLI_utildefines.h" #include "MEM_guardedalloc.h" diff --git a/source/blender/blenlib/BLI_utildefines.h b/source/blender/blenlib/BLI_utildefines.h index bfa5102..04e3ef9 100644 --- a/source/blender/blenlib/BLI_utildefines.h +++ b/source/blender/blenlib/BLI_utildefines.h @@ -493,7 +493,7 @@ extern "C" { ((void)0) /* assuming a static array */ -#if defined(__GNUC__) && !defined(__cplusplus) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#if defined(__GNUC__) && !defined(__cplusplus) && !defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__EDG__) # define ARRAY_SIZE(arr) \ ((sizeof(struct { int isnt_array : ((const void *)&(arr) == &(arr)[0]); }) * 0) + \ (sizeof(arr) / sizeof(*(arr)))) diff --git a/source/blender/blenlib/BLI_virtual_array.hh b/source/blender/blenlib/BLI_virtual_array.hh index 54a5ece..2529534 100644 --- a/source/blender/blenlib/BLI_virtual_array.hh +++ b/source/blender/blenlib/BLI_virtual_array.hh @@ -504,6 +504,9 @@ template struct VArrayAnyExtraInfo { BLI_assert_unreachable(); return {}; } +#ifdef __EDG__ + return {}; +#endif } }; diff --git a/source/blender/editors/interface/interface_template_attribute_search.cc b/source/blender/editors/interface/interface_template_attribute_search.cc index 2ac8e96..a0437d4 100644 --- a/source/blender/editors/interface/interface_template_attribute_search.cc +++ b/source/blender/editors/interface/interface_template_attribute_search.cc @@ -1,5 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifdef __EDG__ +#define EDG_THREAD_FIX +#endif + /** \file * \ingroup edinterface */ -- 2.34.1