1Source: https://github.com/intel/gmmlib/pull/13323From b406bb2f34d7f59a2b406e9911cb353140363d0a Mon Sep 17 00:00:00 20014From: Jianfeng Liu <liujianfeng1994@gmail.com>5Date: Thu, 5 Jun 2025 09:38:36 +08006Subject: [PATCH 1/2] Split common flags of GMMLIB_COMPILER_FLAGS_COMMON for7 multi architectures89---10 Source/GmmLib/Linux.cmake | 84 ++++++++++++---------------------------11 1 file changed, 26 insertions(+), 58 deletions(-)1213diff --git a/Source/GmmLib/Linux.cmake b/Source/GmmLib/Linux.cmake14index 87b74d89..f9398e65 10064415--- a/Source/GmmLib/Linux.cmake16+++ b/Source/GmmLib/Linux.cmake17@@ -19,46 +19,8 @@18 # OTHER DEALINGS IN THE SOFTWARE.1920 #this file should contain only compiler and linker flags21-if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")22- SET (GMMLIB_COMPILER_FLAGS_COMMON23- #general warnings24- #-Wall25- -Winit-self26- -Winvalid-pch27- -Wpointer-arith28- -Wno-unused29- -Wno-unknown-pragmas30- -Wno-comments31- -Wno-narrowing32- -Wno-overflow33- -Wno-parentheses34- -Wno-missing-braces35- -Wno-sign-compare36- -Werror=address37- -Werror=format-security38- -Werror=return-type3940- # General optimization options41- -march=${GMMLIB_MARCH}42- -finline-functions43- -fno-short-enums44- -Wa,--noexecstack45- -fno-strict-aliasing46- # Common defines47- -DUSE_NEON48- # Other common flags49- -fstack-protector50- -fdata-sections51- -ffunction-sections52- -fmessage-length=053- -fvisibility=hidden54- -fPIC55- -g56- )57-else()58- SET (GMMLIB_COMPILER_FLAGS_COMMON59- #general warnings60- -Wall61+SET (GMMLIB_COMPILER_FLAGS_COMMON62 -Winit-self63 -Winvalid-pch64 -Wpointer-arith65@@ -70,32 +32,16 @@ else()66 -Wno-parentheses67 -Wno-missing-braces68 -Wno-sign-compare69- -Wno-enum-compare70 -Werror=address71 -Werror=format-security72 -Werror=return-type7374 # General optimization options75 -march=${GMMLIB_MARCH}76- -mpopcnt77- -msse78- -msse279- -msse380- -mssse381- -msse482- -msse4.183- -msse4.284- -mfpmath=sse85 -finline-functions86 -fno-short-enums87 -Wa,--noexecstack88 -fno-strict-aliasing89- # Common defines90- -DUSE_MMX91- -DUSE_SSE92- -DUSE_SSE293- -DUSE_SSE394- -DUSE_SSSE395 # Other common flags96 -fstack-protector97 -fdata-sections98@@ -104,9 +50,31 @@ else()99 -fvisibility=hidden100 -fPIC101 -g102- # -m32 or -m64103- -m${GMMLIB_ARCH}104- )105+)106+107+if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")108+ list(APPEND GMMLIB_COMPILER_FLAGS_COMMON "-DUSE_NEON")109+else()110+ list (APPEND GMMLIB_COMPILER_FLAGS_COMMON111+ -Wall112+ -Wno-enum-compare113+ -mpopcnt114+ -msse115+ -msse2116+ -msse3117+ -mssse3118+ -msse4119+ -msse4.1120+ -msse4.2121+ -mfpmath=sse122+ -DUSE_MMX123+ -DUSE_SSE124+ -DUSE_SSE2125+ -DUSE_SSE3126+ -DUSE_SSSE3127+ # -m32 or -m64128+ -m${GMMLIB_ARCH}129+ )130 endif()131132 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")133134From e8556a5be0bf054aa712d78eb1b34cd05adb4ca7 Mon Sep 17 00:00:00 2001135From: Jianfeng Liu <liujianfeng1994@gmail.com>136Date: Thu, 5 Jun 2025 09:48:24 +0800137Subject: [PATCH 2/2] add loongarch64 build support138139---140 Source/GmmLib/CMakeLists.txt | 14 ++++++++++++++141 Source/GmmLib/Linux.cmake | 5 +++++142 .../GmmLib/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c | 12 +++++++++++-143 3 files changed, 30 insertions(+), 1 deletion(-)144145diff --git a/Source/GmmLib/CMakeLists.txt b/Source/GmmLib/CMakeLists.txt146index 48d054ce..29a1e275 100644147--- a/Source/GmmLib/CMakeLists.txt148+++ b/Source/GmmLib/CMakeLists.txt149@@ -167,6 +167,8 @@ endif()150151 if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")152 set(GMMLIB_MARCH "armv8-a+fp+simd")153+elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^loongarch")154+ set(GMMLIB_MARCH "la464")155 elseif("${GMMLIB_MARCH}" STREQUAL "")156 set(GMMLIB_MARCH "corei7")157 endif()158@@ -443,6 +445,18 @@ include_directories(BEFORE ${PROJECT_SOURCE_DIR})159160 if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")161 include_directories(${GFX_DEVELOPMENT_DIR}/third_party/sse2neon)162+elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^loongarch")163+ find_path(SIMDE_INCLUDE_DIR164+ NAMES simde/simde-common.h # A key SIMDE header165+ PATHS /usr/include /usr/local/include # Default paths166+ DOC "Path to SIMDE headers"167+ )168+ if(SIMDE_INCLUDE_DIR)169+ include_directories(${SIMDE_INCLUDE_DIR})170+ message(STATUS "Found SIMDE: ${SIMDE_INCLUDE_DIR}")171+ else()172+ message(FATAL_ERROR "SIMDE not found. Install it or set SIMDE_INCLUDE_DIR manually.")173+ endif()174 endif()175176 set(headers177diff --git a/Source/GmmLib/Linux.cmake b/Source/GmmLib/Linux.cmake178index f9398e65..62b29ba5 100644179--- a/Source/GmmLib/Linux.cmake180+++ b/Source/GmmLib/Linux.cmake181@@ -54,6 +54,8 @@ SET (GMMLIB_COMPILER_FLAGS_COMMON182183 if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")184 list(APPEND GMMLIB_COMPILER_FLAGS_COMMON "-DUSE_NEON")185+elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^loongarch")186+ list(APPEND GMMLIB_COMPILER_FLAGS_COMMON "-Wno-attributes")187 else()188 list (APPEND GMMLIB_COMPILER_FLAGS_COMMON189 -Wall190@@ -145,6 +147,9 @@ endforeach()191 if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")192 SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")193 SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}")194+elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^loongarch")195+ SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")196+ SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}")197 else()198 SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -m${GMMLIB_ARCH}")199 SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -m${GMMLIB_ARCH}")200diff --git a/Source/GmmLib/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c b/Source/GmmLib/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c201index e090fd61..36cc76af 100644202--- a/Source/GmmLib/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c203+++ b/Source/GmmLib/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c204@@ -375,6 +375,9 @@ extern void CpuSwizzleBlt(CPU_SWIZZLE_BLT_SURFACE *pDest, CPU_SWIZZLE_BLT_SURFAC205 #include <intrin.h>206 #elif defined(__ARM_ARCH)207 #include <sse2neon.h>208+#elif defined(__loongarch64)209+ #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES210+ #include <simde/x86/sse2.h>211 #elif((defined __clang__) ||(__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))212 #include <cpuid.h>213 #include <x86intrin.h>214@@ -749,6 +752,9 @@ void CpuSwizzleBlt( // #########################################################215 #elif(defined(__ARM_ARCH))216 #define MOVNTDQA_R(Reg, Src) ((Reg) = (Reg))217 StreamingLoadSupported = 0;218+ #elif(defined(__loongarch64))219+ #define MOVNTDQA_R(Reg, Src) ((Reg) = (Reg))220+ StreamingLoadSupported = 0;221 #elif((defined __clang__) || (__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 5))222 #define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src)))223 unsigned int eax, ebx, ecx, edx;224@@ -1148,7 +1154,11 @@ void CpuSwizzleBlt( // #########################################################225226 } // foreach(y)227228- _mm_sfence(); // Flush Non-Temporal Writes229+ #if(defined(__loongarch64))230+ __sync_synchronize();231+ #else232+ _mm_sfence(); // Flush Non-Temporal Writes233+ #endif234235 #if(_MSC_VER)236 #pragma warning(pop)