[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[sc-dev] [PATCH] nova-simd: fixed frac implementation



Signed-off-by: Tim Blechmann <tim@xxxxxxxxxx>
---
 nova-simd/generator_round_generic.py |    3 ++-
 nova-simd/simd_round_generic.hpp     |    3 ++-
 nova-simd/simd_round_vec.hpp         |   28 ++++++++++++++++++++++++----
 3 files changed, 28 insertions(+), 6 deletions(-)


diff --git a/nova-simd/generator_round_generic.py b/nova-simd/generator_round_generic.py
index 32e11d8..60e66e3 100644
--- a/nova-simd/generator_round_generic.py
+++ b/nova-simd/generator_round_generic.py
@@ -39,7 +39,8 @@ inline float_type round(float_type f)
 template <typename float_type>
 inline float_type frac(float_type f)
 {
-    return f - std::floor(f + float_type(0.5));
+    float_type intpart;
+    return std::modf(f, &intpart);
 }
 
 } /* namespace detail */
diff --git a/nova-simd/simd_round_generic.hpp b/nova-simd/simd_round_generic.hpp
index 57ddf33..f65427d 100644
--- a/nova-simd/simd_round_generic.hpp
+++ b/nova-simd/simd_round_generic.hpp
@@ -37,7 +37,8 @@ inline float_type round(float_type f)
 template <typename float_type>
 inline float_type frac(float_type f)
 {
-    return f - std::floor(f + float_type(0.5));
+    float_type intpart;
+    return std::modf(f, &intpart);
 }
 
 } /* namespace detail */
diff --git a/nova-simd/simd_round_vec.hpp b/nova-simd/simd_round_vec.hpp
index 9dc5e06..bdc0fe9 100644
--- a/nova-simd/simd_round_vec.hpp
+++ b/nova-simd/simd_round_vec.hpp
@@ -81,8 +81,18 @@ inline void frac_vec_simd(float * out, const float * arg, unsigned int n)
         const vec_float4 * in1 = (const vec_float4*)(arg + 4);
         vec_float4 * out0 = (vec_float4*)out;
         vec_float4 * out1 = (vec_float4*)(out+4);
-        *out0 = *in0 - _roundf4(*in0);
-        *out1 = *in1 - _roundf4(*in1);
+
+        vec_int4 sign0      = _signf4(*in0);
+        vec_float4 abs_in0  = _fabsf4(*in0);
+        vec_float4 frac0 = *in0 - _floorf4(abs_in0);
+        VEC_OR(frac0, sign0);
+        *out0 = frac0;
+
+        vec_int4 sign1      = _signf4(*in1);
+        vec_float4 abs_in1  = _fabsf4(*in1);
+        vec_float4 frac1 = *in1 - _floorf4(abs_in1);
+        VEC_OR(frac1, sign1);
+        *out1 = frac1;
 
         out += 8;
         arg += 8;
@@ -99,8 +109,18 @@ inline void frac_vec_simd(float * out, const float * arg)
         const vec_float4 * in1 = (const vec_float4*)(arg + 4);
         vec_float4 * out0 = (vec_float4*)out;
         vec_float4 * out1 = (vec_float4*)(out+4);
-        *out0 = *in0 - _roundf4(*in0);
-        *out1 = *in1 - _roundf4(*in1);
+
+        vec_int4 sign0      = _signf4(*in0);
+        vec_float4 abs_in0  = _fabsf4(*in0);
+        vec_float4 frac0 = *in0 - _floorf4(abs_in0);
+        VEC_OR(frac0, sign0);
+        *out0 = frac0;
+
+        vec_int4 sign1      = _signf4(*in1);
+        vec_float4 abs_in1  = _fabsf4(*in1);
+        vec_float4 frac1 = *in1 - _floorf4(abs_in1);
+        VEC_OR(frac1, sign1);
+        *out1 = frac1;
 
         out += 8;
         arg += 8;

Attachment: signature.asc
Description: OpenPGP digital signature