From 346757a9ea7cf9fa7432fdf693797138ac36e6fe Mon Sep 17 00:00:00 2001 From: Jon Daniel Date: Tue, 19 May 2026 17:10:03 +0200 Subject: [PATCH 1/3] add x86_64 optimization to Makefile --- Makefile | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index e7a4bef25f..449ebfa02e 100644 --- a/Makefile +++ b/Makefile @@ -125,6 +125,12 @@ else endif export CROSS_COMPILING +ifneq ($(CROSS_COMPILING),1) + BASE_CFLAGS ?= -march=native +else + BASE_CFLAGS ?= +endif + ifndef DESTDIR DESTDIR=/usr/local/games/quake3 endif @@ -310,8 +316,6 @@ ifeq ($(ARCH),ppc64) HAVE_VM_COMPILED = true endif -BASE_CFLAGS = - ifeq ($(USE_SYSTEM_JPEG),1) BASE_CFLAGS += -DUSE_SYSTEM_JPEG endif @@ -428,7 +432,7 @@ ifdef MINGW ifeq ($(ARCH),x86_64) ARCHEXT = .x64 BASE_CFLAGS += -m64 - OPTIMIZE = -O2 -ffast-math + OPTIMIZE = -mfpmath=sse -O3 -ffast-math endif ifeq ($(ARCH),x86) BASE_CFLAGS += -m32 @@ -494,7 +498,7 @@ ifeq ($(COMPILE_PLATFORM),darwin) BASE_CFLAGS += -DMACOS_X - OPTIMIZE = -O2 -fvisibility=hidden + OPTIMIZE = -O3 -ftree-vectorize -fopenmp -fopenmp-simd -fvisibility=hidden SHLIBEXT = dylib SHLIBCFLAGS = -fPIC -fvisibility=hidden @@ -554,10 +558,11 @@ else BASE_CFLAGS += -I/usr/include -I/usr/local/include - OPTIMIZE = -O2 -fvisibility=hidden + OPTIMIZE = -O3 -ftree-vectorize -fopenmp -fopenmp-simd -fvisibility=hidden ifeq ($(ARCH),x86_64) ARCHEXT = .x64 + OPTIMIZE += -mfpmath=sse else ifeq ($(ARCH),x86) OPTIMIZE += -march=i586 -mtune=i686 @@ -627,7 +632,7 @@ else endif DEBUG_CFLAGS = $(BASE_CFLAGS) -DDEBUG -D_DEBUG -g -O0 - RELEASE_CFLAGS = $(BASE_CFLAGS) -DNDEBUG $(OPTIMIZE) + RELEASE_CFLAGS = $(BASE_CFLAGS) -DNDEBUG $(OPTIMIZE) -fdata-sections -ffunction-sections DEBUG_LDFLAGS = -rdynamic From 6669fd3ecd645eddd051f932296d156c29472f19 Mon Sep 17 00:00:00 2001 From: Jon Daniel Date: Thu, 21 May 2026 18:44:44 +0200 Subject: [PATCH 2/3] add vector type alignment und update inline - assure ID_INLINE inlines everything - add vec(4,byte) and reindent not dependent on ts - inline ColorBytes and move to q_shared.h - add avec(N,T) and use it for avec3_t/avec5_t - add clang support - add evec(N,T) SIMD vector extension type - use __typeof__ instead of typeof --- Makefile | 19 +++- code/qcommon/q_math.c | 22 ----- code/qcommon/q_platform.h | 10 ++- code/qcommon/q_shared.h | 179 ++++++++++++++++++++++++++------------ 4 files changed, 145 insertions(+), 85 deletions(-) diff --git a/Makefile b/Makefile index 449ebfa02e..470a4c81b1 100644 --- a/Makefile +++ b/Makefile @@ -424,6 +424,12 @@ ifdef MINGW $(error Cannot find a suitable cross compiler for $(PLATFORM)) endif + # Detect if CC is clang + COMPILER_VERSION := $(shell $(CC) --version) + ifneq '' '$(findstring clang,$(COMPILER_VERSION))' + BASE_CFLAGS += std=gnu2x -Wno-gnu-alignof-expression + endif + BASE_CFLAGS += -Wall -Wimplicit -Wstrict-prototypes -DUSE_ICON -DMINGW=1 BASE_CFLAGS += -Wno-unused-result -fvisibility=hidden @@ -491,6 +497,11 @@ ifeq ($(COMPILE_PLATFORM),darwin) ############################################################################# # SETUP AND BUILD -- MACOS ############################################################################# + # Detect if CC is clang + COMPILER_VERSION := $(shell $(CC) --version) + ifneq '' '$(findstring clang,$(COMPILER_VERSION))' + BASE_CFLAGS += -std=gnu2x -Wno-gnu-alignof-expression + endif BASE_CFLAGS += -Wall -Wimplicit -Wstrict-prototypes -pipe @@ -498,7 +509,7 @@ ifeq ($(COMPILE_PLATFORM),darwin) BASE_CFLAGS += -DMACOS_X - OPTIMIZE = -O3 -ftree-vectorize -fopenmp -fopenmp-simd -fvisibility=hidden + OPTIMIZE = -O3 -ftree-vectorize -fopenmp-simd -fvisibility=hidden SHLIBEXT = dylib SHLIBCFLAGS = -fPIC -fvisibility=hidden @@ -560,6 +571,12 @@ else OPTIMIZE = -O3 -ftree-vectorize -fopenmp -fopenmp-simd -fvisibility=hidden + # Detect if CC is clang + COMPILER_VERSION := $(shell $(CC) --version) + ifneq '' '$(findstring clang,$(COMPILER_VERSION))' + BASE_CFLAGS += -std=gnu2x -Wno-gnu-alignof-expression + endif + ifeq ($(ARCH),x86_64) ARCHEXT = .x64 OPTIMIZE += -mfpmath=sse diff --git a/code/qcommon/q_math.c b/code/qcommon/q_math.c index 6fca8e334b..329c397f4d 100644 --- a/code/qcommon/q_math.c +++ b/code/qcommon/q_math.c @@ -275,28 +275,6 @@ void ByteToDir( int b, vec3_t dir ) { VectorCopy (bytedirs[b], dir); } - -unsigned ColorBytes3 (float r, float g, float b) { - unsigned i; - - ( (byte *)&i )[0] = r * 255; - ( (byte *)&i )[1] = g * 255; - ( (byte *)&i )[2] = b * 255; - - return i; -} - -unsigned ColorBytes4 (float r, float g, float b, float a) { - unsigned i; - - ( (byte *)&i )[0] = r * 255; - ( (byte *)&i )[1] = g * 255; - ( (byte *)&i )[2] = b * 255; - ( (byte *)&i )[3] = a * 255; - - return i; -} - float NormalizeColor( const vec3_t in, vec3_t out ) { float max; diff --git a/code/qcommon/q_platform.h b/code/qcommon/q_platform.h index e4c1864033..b6dbd3fb78 100644 --- a/code/qcommon/q_platform.h +++ b/code/qcommon/q_platform.h @@ -49,15 +49,17 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #if defined( _MSC_VER ) && _MSC_VER >= 1400 // MSVC++ 8.0 at least #define OS_STRING "win_msvc" +#define ID_INLINE __inline #elif defined __MINGW32__ #define OS_STRING "win_mingw" +#define ID_INLINE __attribute__((always_inline,flatten)) inline #elif defined __MINGW64__ #define OS_STRING "win_mingw64" +#define ID_INLINE __attribute__((always_inline,flatten)) inline #else #error "Compiler not supported" #endif -#define ID_INLINE __inline #define PATH_SEP '\\' #define PATH_SEP_FOREIGN '/' #define DLL_EXT ".dll" @@ -156,7 +158,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #define OS_STRING "linux" -#define ID_INLINE inline +#define ID_INLINE __attribute__((always_inline,flatten)) inline #endif // __linux___ @@ -176,7 +178,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define OS_STRING "openbsd" #endif -#define ID_INLINE inline +#define ID_INLINE __attribute__((always_inline,flatten)) inline #if BYTE_ORDER == BIG_ENDIAN #define Q3_BIG_ENDIAN #else @@ -190,7 +192,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #ifdef __APPLE__ #define OS_STRING "macos" -#define ID_INLINE inline +#define ID_INLINE __attribute__((always_inline,flatten)) inline #undef DLL_EXT #define DLL_EXT ".dylib" diff --git a/code/qcommon/q_shared.h b/code/qcommon/q_shared.h index 5f818a0abc..9ed9a51e81 100644 --- a/code/qcommon/q_shared.h +++ b/code/qcommon/q_shared.h @@ -213,36 +213,36 @@ int Q_longjmp_c(void *, int); #define Q_longjmp longjmp #endif -typedef unsigned char byte; - -typedef enum { qfalse = 0, qtrue } qboolean; - -typedef union floatint_u -{ - int32_t i; - uint32_t u; - float f; - byte b[4]; -} -floatint_t; - -typedef union { - byte rgba[4]; - uint32_t u32; -} color4ub_t; - - -typedef int qhandle_t; -typedef int sfxHandle_t; -typedef int fileHandle_t; -typedef int clipHandle_t; - #define PAD(base, alignment) (((base)+(alignment)-1) & ~((alignment)-1)) #define PADLEN(base, alignment) (PAD((base), (alignment)) - (base)) #define PADP(base, alignment) ((void *) PAD((intptr_t) (base), (alignment))) -#ifdef __GNUC__ +#ifndef MAX +#define MAX(x,y) ((x)>(y)?(x):(y)) +#endif + +#ifndef MIN +#define MIN(x,y) ((x)<(y)?(x):(y)) +#endif + +#if __STDC_VERSION__ < 202311L +#ifndef _MSC_VER +#include +#endif +#if !__alignas_is_defined +#define alignas _Alignas +#define __alignas_is_defined 1 +#endif +#if !__alignof_is_defined +#define alignof _Alignof +#define __alignof_is_defined 1 +#endif +#endif + +#ifdef _MSC_VER +#define QALIGN(x) __declspec(align(x)) +#elif defined(__GNUC__) || defined(__clang__) #define QALIGN(x) __attribute__((aligned(x))) #else #define QALIGN(x) @@ -394,20 +394,85 @@ MATHLIB ============================================================== */ +#define BITOP_RUP01__(x) ( (x) | ( (x) >> 1)) +#define BITOP_RUP02__(x) (BITOP_RUP01__(x) | (BITOP_RUP01__(x) >> 2)) +#define BITOP_RUP04__(x) (BITOP_RUP02__(x) | (BITOP_RUP02__(x) >> 4)) +#define BITOP_RUP08__(x) (BITOP_RUP04__(x) | (BITOP_RUP04__(x) >> 8)) +#define BITOP_RUP16__(x) (BITOP_RUP08__(x) | (BITOP_RUP08__(x) >> 16)) + +#define bitceil(x) (const uint32_t)(BITOP_RUP16__(((uint32_t)(x)) - 1) + 1) + +#define isarray(a) __builtin_choose_expr(__builtin_types_compatible_p(__typeof__((a)[0]) [], __typeof__((a))), true, false) +#define vec(N,T) QALIGN((N == bitceil(N) ? N : 1) * alignof(N)) __typeof__(__typeof__(T)[N]) +#define avec(N,T) QALIGN(bitceil(N)) __typeof__(__typeof__(T)[N]) + +#ifdef __clang__ +#define evec(N,T) __attribute__((ext_vector_type(N))) __typeof__(T) +#elif defined(__GNUC__) +#define evec(N,T) __attribute__((vector_size(bitceil(N) * alignof(T)))) __typeof__(T) +#else +#define evec(N,T) avec(N,T) +#endif + +#define evec_load(n,v) \ +({ \ + evec(n,__typeof__((v)[0])) dst; \ + _Pragma("omp simd") \ + for(size_t i = 0; i < n; i++) \ + dst[i] = (v)[i]; \ + dst; \ +}) + +#define evec_store(n,dst,v) \ +({ \ + _Pragma("omp simd") \ + for(size_t i = 0; i < n; i++) \ + (dst)[i] = (v)[i]; \ + (dst); \ +}) typedef float vec_t; -typedef vec_t vec2_t[2]; -typedef vec_t vec3_t[3]; -typedef vec_t vec4_t[4]; -typedef vec_t vec5_t[5]; +typedef vec(2,vec_t) vec2_t; +typedef vec(3,vec_t) vec3_t; +typedef avec(3,vec_t) avec3_t; +typedef vec(4,vec_t) vec4_t; +typedef vec(5,vec_t) vec5_t; +typedef avec(5,vec_t) avec5_t; -typedef vec_t quat_t[4]; +typedef vec(4,vec_t) quat_t; typedef int fixed4_t; typedef int fixed8_t; typedef int fixed16_t; +typedef unsigned char byte; +typedef vec(4,byte) vec4ub_t; + +typedef enum { qfalse = 0, qtrue } qboolean; + +#pragma pack(push,1) +typedef union QALIGN(4) floatint_u +{ + int32_t i; + uint32_t u; + float f; + vec4ub_t b; +} floatint_t; +#pragma pack(pop) + +#pragma pack(push,1) +typedef union QALIGN(4) { + vec4ub_t rgba; + uint32_t u32; +} color4ub_t; +#pragma pack(pop) + +typedef int qhandle_t; +typedef int sfxHandle_t; +typedef int fileHandle_t; +typedef int clipHandle_t; + #ifndef M_PI #define M_PI 3.14159265358979323846f // matches value in gcc v2 math.h #endif @@ -526,15 +591,14 @@ void ByteToDir( int b, vec3_t dir ); #if 1 -#define DotProduct(x,y) ((x)[0]*(y)[0]+(x)[1]*(y)[1]+(x)[2]*(y)[2]) -#define VectorSubtract(a,b,c) ((c)[0]=(a)[0]-(b)[0],(c)[1]=(a)[1]-(b)[1],(c)[2]=(a)[2]-(b)[2]) -#define VectorAdd(a,b,c) ((c)[0]=(a)[0]+(b)[0],(c)[1]=(a)[1]+(b)[1],(c)[2]=(a)[2]+(b)[2]) -#define VectorCopy(a,b) ((b)[0]=(a)[0],(b)[1]=(a)[1],(b)[2]=(a)[2]) -#define VectorScale(v, s, o) ((o)[0]=(v)[0]*(s),(o)[1]=(v)[1]*(s),(o)[2]=(v)[2]*(s)) -#define VectorMA(v, s, b, o) ((o)[0]=(v)[0]+(b)[0]*(s),(o)[1]=(v)[1]+(b)[1]*(s),(o)[2]=(v)[2]+(b)[2]*(s)) - -#define DotProduct4(a,b) ((a)[0]*(b)[0] + (a)[1]*(b)[1] + (a)[2]*(b)[2] + (a)[3]*(b)[3]) -#define VectorScale4(a,b,c) ((c)[0]=(a)[0]*(b),(c)[1]=(a)[1]*(b),(c)[2]=(a)[2]*(b),(c)[3]=(a)[3]*(b)) +#define DotProduct(x,y) ((x)[0]*(y)[0]+(x)[1]*(y)[1]+(x)[2]*(y)[2]) +#define DotProduct4(x,y) ((x)[0]*(y)[0]+(x)[1]*(y)[1]+(x)[2]*(y)[2]+(x)[3]*(y)[3]) +#define VectorSubtract(a,b,c) ((c)[0]=(a)[0]-(b)[0],(c)[1]=(a)[1]-(b)[1],(c)[2]=(a)[2]-(b)[2]) +#define VectorAdd(a,b,c) ((c)[0]=(a)[0]+(b)[0],(c)[1]=(a)[1]+(b)[1],(c)[2]=(a)[2]+(b)[2]) +#define VectorCopy(a,b) ((b)[0]=(a)[0],(b)[1]=(a)[1],(b)[2]=(a)[2]) +#define VectorScale(v, s, o) ((o)[0]=(v)[0]*(s),(o)[1]=(v)[1]*(s),(o)[2]=(v)[2]*(s)) +#define VectorScale4(a,b,c) ((c)[0]=(a)[0]*(b),(c)[1]=(a)[1]*(b),(c)[2]=(a)[2]*(b),(c)[3]=(a)[3]*(b)) +#define VectorMA(v, s, b, o) ((o)[0]=(v)[0]+(b)[0]*(s),(o)[1]=(v)[1]+(b)[1]*(s),(o)[2]=(v)[2]+(b)[2]*(s)) #else @@ -554,21 +618,21 @@ void ByteToDir( int b, vec3_t dir ); typedef struct { float v[3]; } vec3struct_t; -#define VectorCopy(a,b) (*(vec3struct_t *)b=*(vec3struct_t *)a) +#define VectorCopy(a,b) (*(vec3struct_t *)b=*(vec3struct_t *)a) #endif #endif -#define VectorClear(a) ((a)[0]=(a)[1]=(a)[2]=0) -#define VectorNegate(a,b) ((b)[0]=-(a)[0],(b)[1]=-(a)[1],(b)[2]=-(a)[2]) -#define VectorSet(v, x, y, z) ((v)[0]=(x), (v)[1]=(y), (v)[2]=(z)) -#define Vector4Set(v,x,y,z,w) ((v)[0]=(x), (v)[1]=(y), (v)[2]=(z), v[3]=(w)) -#define Vector4Copy(a,b) ((b)[0]=(a)[0],(b)[1]=(a)[1],(b)[2]=(a)[2],(b)[3]=(a)[3]) +#define VectorClear(a) ((a)[0]=(a)[1]=(a)[2]=0) +#define VectorNegate(a,b) ((b)[0]=-(a)[0],(b)[1]=-(a)[1],(b)[2]=-(a)[2]) +#define VectorSet(v, x, y, z) ((v)[0]=(x), (v)[1]=(y), (v)[2]=(z)) +#define Vector4Set(v,x,y,z,w) ((v)[0]=(x), (v)[1]=(y), (v)[2]=(z), v[3]=(w)) +#define Vector4Copy(a,b) ((b)[0]=(a)[0],(b)[1]=(a)[1],(b)[2]=(a)[2],(b)[3]=(a)[3]) -#define Byte4Copy(a,b) ((b)[0]=(a)[0],(b)[1]=(a)[1],(b)[2]=(a)[2],(b)[3]=(a)[3]) +#define Byte4Copy(a,b) ((b)[0]=(a)[0],(b)[1]=(a)[1],(b)[2]=(a)[2],(b)[3]=(a)[3]) -#define QuatCopy(a,b) ((b)[0]=(a)[0],(b)[1]=(a)[1],(b)[2]=(a)[2],(b)[3]=(a)[3]) +#define QuatCopy(a,b) ((b)[0]=(a)[0],(b)[1]=(a)[1],(b)[2]=(a)[2],(b)[3]=(a)[3]) -#define SnapVector(v) {v[0]=((int)(v[0]));v[1]=((int)(v[1]));v[2]=((int)(v[2]));} +#define SnapVector(v) {v[0]=((int)(v[0]));v[1]=((int)(v[1]));v[2]=((int)(v[2]));} // just in case you don't want to use the macros vec_t _DotProduct( const vec3_t v1, const vec3_t v2 ); void _VectorSubtract( const vec3_t veca, const vec3_t vecb, vec3_t out ); @@ -577,8 +641,15 @@ void _VectorCopy( const vec3_t in, vec3_t out ); void _VectorScale( const vec3_t in, float scale, vec3_t out ); void _VectorMA( const vec3_t veca, float scale, const vec3_t vecb, vec3_t vecc ); -unsigned ColorBytes3 (float r, float g, float b); -unsigned ColorBytes4 (float r, float g, float b, float a); +ID_INLINE uint32_t ColorBytes3 (float r, float g, float b) +{ + return *(uint32_t*)(byte[4]){ (byte)(r * 255), (byte)(g * 255), (byte)(b * 255) }; +} + +ID_INLINE uint32_t ColorBytes4 (float r, float g, float b, float a) +{ + return *(uint32_t*)(byte[4]){ (byte)(r * 255), (byte)(g * 255), (byte)(b * 255), (byte)(a * 255) }; +} float NormalizeColor( const vec3_t in, vec3_t out ); @@ -717,14 +788,6 @@ void PerpendicularVector( vec3_t dst, const vec3_t src ); int Q_isnan( float x ); float Q_atof( const char *str ); -#ifndef MAX -#define MAX(x,y) ((x)>(y)?(x):(y)) -#endif - -#ifndef MIN -#define MIN(x,y) ((x)<(y)?(x):(y)) -#endif - //============================================= float Com_Clamp( float min, float max, float value ); From 79494a01c5d55ccbb2335bf0650edaa780d4e5e5 Mon Sep 17 00:00:00 2001 From: Jon Daniel Date: Thu, 21 May 2026 19:08:51 +0200 Subject: [PATCH 3/3] use #define instead of typedef for _MSC_VER --- code/qcommon/q_shared.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/code/qcommon/q_shared.h b/code/qcommon/q_shared.h index 9ed9a51e81..710756dc44 100644 --- a/code/qcommon/q_shared.h +++ b/code/qcommon/q_shared.h @@ -433,6 +433,16 @@ MATHLIB }) typedef float vec_t; +#ifdef _MSC_VER +#define vec2_t vec(2,vec_t) +#define vec3_t vec(3,vec_t) +#define avec3_t avec(3,vec_t) +#define vec4_t vec(4,vec_t) +#define vec5_t vec(5,vec_t) +#define avec5_t avec(5,vec_t) + +#define quat_t vec(4,vec_t) +#else typedef vec(2,vec_t) vec2_t; typedef vec(3,vec_t) vec3_t; typedef avec(3,vec_t) avec3_t; @@ -441,13 +451,19 @@ typedef vec(5,vec_t) vec5_t; typedef avec(5,vec_t) avec5_t; typedef vec(4,vec_t) quat_t; +#endif typedef int fixed4_t; typedef int fixed8_t; typedef int fixed16_t; typedef unsigned char byte; + +#ifdef _MSC_VER +#define vec4ub_t vec(4,byte) +#else typedef vec(4,byte) vec4ub_t; +#endif typedef enum { qfalse = 0, qtrue } qboolean;