From 133a5a5a8cb1b7a4958edd4a5bc8a6d7b6c6649b Mon Sep 17 00:00:00 2001 From: barracuda156 Date: Mon, 17 Jul 2023 09:15:17 +0800 Subject: [PATCH 2/2] Add optimization opts for PowerPC --- CMakeLists.txt | 132 +++++++++++++++++++++++++++++---------------- src/CMakeLists.txt | 30 ++++++----- 2 files changed, 103 insertions(+), 59 deletions(-) diff --git CMakeLists.txt CMakeLists.txt index ecbd37e..92f6e15 100644 --- CMakeLists.txt +++ CMakeLists.txt @@ -2,7 +2,12 @@ # flags manually on cmd line # 2/ Should we standardise on just AVX? As machine we run on # may be different to machine we build on -set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version") + +if(APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "ppc|ppc64") + set(CMAKE_OSX_DEPLOYMENT_TARGET "10.5" CACHE STRING "Minimum OS X deployment version") +else() + set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version") +endif() cmake_minimum_required(VERSION 3.0) @@ -14,10 +19,23 @@ endif() project(LPCNet C) option(DISABLE_CPU_OPTIMIZATION "Disable CPU optimization discovery." OFF) + option(AVX2 "Enable AVX2 CPU optimizations." OFF) -option(AVX "Enable AVX CPU optimizations." ON) -option(SSE "Enable SSE CPU optimizations." ON) -option(NEON "Enable NEON CPU optimizations for RPi." ON) + +if(DISABLE_CPU_OPTIMIZATION) + option(AVX "Enable AVX CPU optimizations." OFF) + option(SSE "Enable SSE CPU optimizations." OFF) + option(NEON "Enable NEON CPU optimizations for RPi." OFF) + option(PPC_OPTS "Enable PowerPC CPU optimizations." OFF) +else() + if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc") + option(PPC_OPTS "Enable PowerPC CPU optimizations." ON) + else() + option(AVX "Enable AVX CPU optimizations." ON) + option(SSE "Enable SSE CPU optimizations." ON) + option(NEON "Enable NEON CPU optimizations for RPi." ON) + endif() +endif() include(GNUInstallDirs) mark_as_advanced(CLEAR @@ -26,9 +44,13 @@ mark_as_advanced(CLEAR CMAKE_INSTALL_LIBDIR ) -# Build universal ARM64 and x86_64 binaries on Mac. +# Build universal ARM64 and x86_64 binaries on Mac, unless on PowerPC. if(BUILD_OSX_UNIVERSAL) -set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64") + if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc") + set(CMAKE_OSX_ARCHITECTURES "ppc;ppc64") + else() + set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64") + endif() endif(BUILD_OSX_UNIVERSAL) # @@ -93,36 +115,46 @@ message(STATUS "Host system arch is: ${CMAKE_SYSTEM_PROCESSOR}") # Detection of available CPU optimizations if(NOT DISABLE_CPU_OPTIMIZATION) if(UNIX AND NOT APPLE) - message(STATUS "Looking for available CPU optimizations on Linux/BSD system...") - execute_process(COMMAND grep -c "avx2" /proc/cpuinfo - OUTPUT_VARIABLE AVX2_PRESENT) - execute_process(COMMAND grep -c "avx " /proc/cpuinfo - OUTPUT_VARIABLE AVX_PRESENT) - execute_process(COMMAND grep -c "sse4_1 " /proc/cpuinfo - OUTPUT_VARIABLE SSE_PRESENT) - execute_process(COMMAND grep -c "neon" /proc/cpuinfo - OUTPUT_VARIABLE NEON_PRESENT) - elseif(APPLE) - if(BUILD_OSX_UNIVERSAL) - # Presume AVX and SSE are enabled on the x86 side. (AVX2 is not guaranteed depending - # on model.) The ARM side will auto-enable NEON optimizations by virtue of being aarch64. - set(AVX_PRESENT TRUE) - set(SSE_PRESENT TRUE) - set(NEON_PRESENT TRUE) + if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc") + message(STATUS "PowerPC processor detected.") + set(PPC_PRESENT TRUE) else() - # Under OSX we need to look through a few sysctl entries to determine what our CPU supports. - message(STATUS "Looking for available CPU optimizations on an OSX system...") - execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2 + message(STATUS "Looking for available CPU optimizations on Linux/BSD system...") + execute_process(COMMAND grep -c "avx2" /proc/cpuinfo OUTPUT_VARIABLE AVX2_PRESENT) - execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX + execute_process(COMMAND grep -c "avx " /proc/cpuinfo OUTPUT_VARIABLE AVX_PRESENT) - execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c SSE4.1 + execute_process(COMMAND grep -c "sse4_1 " /proc/cpuinfo OUTPUT_VARIABLE SSE_PRESENT) + execute_process(COMMAND grep -c "neon" /proc/cpuinfo + OUTPUT_VARIABLE NEON_PRESENT) + endif() + elseif(APPLE) + if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc") + message(STATUS "PowerPC processor detected.") + set(PPC_PRESENT TRUE) + else() + if(BUILD_OSX_UNIVERSAL) + # Presume AVX and SSE are enabled on the x86 side. (AVX2 is not guaranteed depending + # on model.) The ARM side will auto-enable NEON optimizations by virtue of being aarch64. + set(AVX_PRESENT TRUE) + set(SSE_PRESENT TRUE) + set(NEON_PRESENT TRUE) + else() + # Under OSX we need to look through a few sysctl entries to determine what our CPU supports. + message(STATUS "Looking for available CPU optimizations on an OSX system...") + execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2 + OUTPUT_VARIABLE AVX2_PRESENT) + execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX + OUTPUT_VARIABLE AVX_PRESENT) + execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c SSE4.1 + OUTPUT_VARIABLE SSE_PRESENT) - # Unlike with the above, NEON *is* guaranteed if on ARM as there were never any ARM32 Macs - # available. We don't need any specific compiler flags for this, though. - set(NEON_PRESENT TRUE) - endif(BUILD_OSX_UNIVERSAL) + # Unlike with the above, NEON *is* guaranteed if on ARM as there were never any ARM32 Macs + # available. We don't need any specific compiler flags for this, though. + set(NEON_PRESENT TRUE) + endif(BUILD_OSX_UNIVERSAL) + endif() elseif(WIN32) message(STATUS "No detection capability on Windows, assuming AVX is available.") set(AVX_PRESENT TRUE) @@ -140,21 +172,29 @@ endif() set(LPCNET_C_PROC_FLAGS "") -if(${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0)) - message(STATUS "avx2 processor flags found or enabled.") - set(LPCNET_C_PROC_FLAGS -mavx2 -mfma) -elseif(${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0)) -# AVX2 machines will also match on AVX - message(STATUS "avx processor flags found or enabled.") - set(LPCNET_C_PROC_FLAGS -mavx) -elseif(${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0)) -# AVX and AVX2 machines will also match on SSE - message(STATUS "sse processor flags found or enabled.") - set(LPCNET_C_PROC_FLAGS -msse4.1) -elseif(${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0)) - # RPi / ARM 32bit - message(STATUS "neon processor flags found or enabled.") - set(LPCNET_C_PROC_FLAGS -mfpu=neon -march=armv8-a -mtune=cortex-a53) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc") + if(${PPC_OPTS} AND (${PPC_PRESENT} OR ${PPC_PRESENT} GREATER 0)) + # PowerPC + message(STATUS "PowerPC processor flags found or enabled.") + set(LPCNET_C_PROC_FLAGS "-mcpu=native -mtune=native") + endif() +else() + if(${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0)) + message(STATUS "avx2 processor flags found or enabled.") + set(LPCNET_C_PROC_FLAGS -mavx2 -mfma) + elseif(${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0)) + # AVX2 machines will also match on AVX + message(STATUS "avx processor flags found or enabled.") + set(LPCNET_C_PROC_FLAGS -mavx) + elseif(${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0)) + # AVX and AVX2 machines will also match on SSE + message(STATUS "sse processor flags found or enabled.") + set(LPCNET_C_PROC_FLAGS -msse4.1) + elseif(${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0)) + # RPi / ARM 32bit + message(STATUS "neon processor flags found or enabled.") + set(LPCNET_C_PROC_FLAGS -mfpu=neon -march=armv8-a -mtune=cortex-a53) + endif() endif() # grab latest NN model (or substitute your own) diff --git src/CMakeLists.txt src/CMakeLists.txt index 7dcb181..72f3765 100644 --- src/CMakeLists.txt +++ src/CMakeLists.txt @@ -72,20 +72,24 @@ target_link_libraries(dump_data lpcnetfreedv m) add_executable(test_lpcnet test_lpcnet.c nnet_rw.c) target_link_libraries(test_lpcnet lpcnetfreedv m) -if( - (${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0)) OR - (${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0)) OR - (${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0)) OR - (${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0)) OR - CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") - add_executable(test_vec test_vec.c) - target_link_libraries(test_vec m) - - if(LPCNET_C_PROC_FLAGS) - set_source_files_properties(test_vec.c PROPERTIES COMPILE_FLAGS ${LPCNET_C_PROC_FLAGS}) - endif(LPCNET_C_PROC_FLAGS) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc") + message(WARNING "Tests not implemented for PowerPC, not building test_vec.") else() - message(WARNING "No SSE/AVX/AVX2 CPU flags identified, not building test_vec.") + if( + (${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0)) OR + (${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0)) OR + (${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0)) OR + (${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0)) OR + CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + add_executable(test_vec test_vec.c) + target_link_libraries(test_vec m) + + if(LPCNET_C_PROC_FLAGS) + set_source_files_properties(test_vec.c PROPERTIES COMPILE_FLAGS ${LPCNET_C_PROC_FLAGS}) + endif(LPCNET_C_PROC_FLAGS) + else() + message(WARNING "No SSE/AVX/AVX2 CPU flags identified, not building test_vec.") + endif() endif() add_executable(quant_feat quant_feat.c)