Author: Michael R. Crusoe <crusoe@debian.org>
Description: support non-x86 systems via libsimde-dev
--- a/Makefile
+++ b/Makefile
@@ -107,11 +107,16 @@
 	RADULS_OBJS = \
 	$(KMC_MAIN_DIR)/raduls_neon.o
 else
+ifeq (1,$(SIMD))
 	RADULS_OBJS = \
 	$(KMC_MAIN_DIR)/raduls_sse2.o \
 	$(KMC_MAIN_DIR)/raduls_sse41.o \
 	$(KMC_MAIN_DIR)/raduls_avx2.o \
 	$(KMC_MAIN_DIR)/raduls_avx.o
+else
+	RADULS_OBJS = \
+	$(KMC_MAIN_DIR)/raduls_sse2.o
+endif
 endif
 endif
 
@@ -147,6 +152,7 @@
 $(KMC_CLI_OBJS) $(KMC_CORE_OBJS) $(KMC_DUMP_OBJS) $(KMC_API_OBJS) $(KFF_OBJS) $(KMC_TOOLS_OBJS): %.o: %.cpp
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@
 
+ifeq (1,$(SIMD))
 $(KMC_MAIN_DIR)/raduls_sse2.o: $(KMC_MAIN_DIR)/raduls_sse2.cpp
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -msse2 -c $< -o $@
 $(KMC_MAIN_DIR)/raduls_sse41.o: $(KMC_MAIN_DIR)/raduls_sse41.cpp
@@ -155,6 +161,10 @@
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -mavx -c $< -o $@
 $(KMC_MAIN_DIR)/raduls_avx2.o: $(KMC_MAIN_DIR)/raduls_avx2.cpp
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -mavx2 -c $< -o $@
+else
+$(KMC_MAIN_DIR)/raduls_sse2.o: $(KMC_MAIN_DIR)/raduls_sse2.cpp
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@
+endif
 
 $(KMC_MAIN_DIR)/raduls_neon.o: $(KMC_MAIN_DIR)/raduls_neon.cpp
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@
--- a/kmc_core/cpu_info.cpp
+++ b/kmc_core/cpu_info.cpp
@@ -57,6 +57,7 @@
 #else
 	void cpuid(int *result, int function_id) const
 	{
+#if defined(__x86_64__) || defined(__i386__)
 #ifdef _MSC_VER
 		__cpuidex(result, function_id, 0);
 
@@ -72,10 +73,12 @@
 		__asm__("cpuid\n\t"
 			: "=a" (result[0]), "=b" (result[1]), "=c" (result[2]), "=d" (result[3]) : "0" (function_id), "c"(0));
 #endif  
+#endif
 	}
 
 	CpuInfoImpl()
 	{
+#if defined(__x86_64__) || defined(__i386__)
 		array<int, 4> cpui = { -1 };
 		cpuid(cpui.data(), 0);
 		int nIds_ = cpui[0];
@@ -109,6 +112,9 @@
 			std::bitset<32> EBX = data_[7][1];
 			avx2 = EBX[5];
 		}
+#else
+		sse2=true;
+#endif
 	}
 
 	const string& GetVendor() const
@@ -166,4 +172,4 @@
 bool CCpuInfo::AVX2_Enabled() { return cpu_info_impl.avx2; }
 bool CCpuInfo::NEON_Enabled() { return cpu_info_impl.neon; }
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- a/kmc_core/intr_copy.h
+++ b/kmc_core/intr_copy.h
@@ -18,8 +18,8 @@
 #if defined(__aarch64__)
 #include <arm_neon.h>
 #else
-#include <emmintrin.h>
-#include <immintrin.h>
+#define SIMDE_ENABLE_NATIVE_ALIASES
+#include <simde/x86/sse2.h>
 #endif
 
 
@@ -41,7 +41,7 @@
 #if defined(__aarch64__)
 		dest[i] = src[i];
 #else
-		_mm_stream_si64(dest + i, src[i]);
+		simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, dest + i), src[i]);
 #endif
 }
 
@@ -59,7 +59,7 @@
 #if defined(__aarch64__)
 			dest[i] = src[i];
 #else
-			_mm_stream_si64(dest + i, src[i]);
+			simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, dest + i), src[i]);
 #endif
 	}
 };
@@ -117,4 +117,4 @@
 
 #endif
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- a/kmc_core/kmc.h
+++ b/kmc_core/kmc.h
@@ -1530,7 +1530,7 @@
 #ifdef __aarch64__
 	sort_func = RadulsSort::RadixSortMSD_NEON<CKmer<SIZE>>;
 	CSmallSort<SIZE>::Adjust(384);
-#else
+#elif defined(__x86_64__)
 	auto proc_name = CCpuInfo::GetBrand();
 	bool is_intel = CCpuInfo::GetVendor() == "GenuineIntel";
 	bool at_least_avx = CCpuInfo::AVX_Enabled();
@@ -1557,6 +1557,8 @@
 		sort_func = RadixSort::RadixSortMSD<CKmer<SIZE>, SIZE>;
 		CSmallSort<SIZE>::Adjust(384);
 	}
+#else
+	sort_func = RadulsSort::RadixSortMSD_SSE2<CKmer<SIZE>>;
 #endif
 #endif
 
--- a/kmc_core/raduls.h
+++ b/kmc_core/raduls.h
@@ -24,7 +24,9 @@
 #ifndef __aarch64__
 	template<typename KMER_T>
 	void RadixSortMSD_SSE2(KMER_T* kmers, KMER_T* tmp, uint64 n_recs, uint32 byte, uint32 n_threads, CMemoryPool* pmm_radix_buf);
+#endif
 
+#if defined(__x86_64__) || defined(__i386__)
 	template<typename KMER_T>
 	void RadixSortMSD_SSE41(KMER_T* kmers, KMER_T* tmp, uint64 n_recs, uint32 byte, uint32 n_threads, CMemoryPool* pmm_radix_buf);
 
@@ -41,4 +43,4 @@
 
 #endif // RADULS_H
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- a/kmc_core/raduls_impl.h
+++ b/kmc_core/raduls_impl.h
@@ -759,10 +759,10 @@
 #define RADULS_RADIX_SORT_FUNNAME RadixSortMSD_AVX
 #elif defined(__SSE4_1__)
 #define RADULS_RADIX_SORT_FUNNAME RadixSortMSD_SSE41
-#elif defined(__SSE2__)
-#define RADULS_RADIX_SORT_FUNNAME RadixSortMSD_SSE2
 #elif defined(__aarch64__)
 #define RADULS_RADIX_SORT_FUNNAME RadixSortMSD_NEON
+#else
+#define RADULS_RADIX_SORT_FUNNAME RadixSortMSD_SSE2
 #endif
 
 
@@ -800,4 +800,4 @@
 
 #endif
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- a/kmc_core/splitter.cpp
+++ b/kmc_core/splitter.cpp
@@ -1043,4 +1043,4 @@
 template class CWSmallKSplitter<uint32>;
 template class CWSmallKSplitter<uint64>;
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
