Signed-off-by: Tim Blechmann <tim@xxxxxxxxxx> --- nova-simd/generator_round_generic.py | 3 ++- nova-simd/simd_round_generic.hpp | 3 ++- nova-simd/simd_round_vec.hpp | 28 ++++++++++++++++++++++++---- 3 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/nova-simd/generator_round_generic.py b/nova-simd/generator_round_generic.py index 32e11d8..60e66e3 100644 --- a/nova-simd/generator_round_generic.py +++ b/nova-simd/generator_round_generic.py @@ -39,7 +39,8 @@ inline float_type round(float_type f) template <typename float_type> inline float_type frac(float_type f) { - return f - std::floor(f + float_type(0.5)); + float_type intpart; + return std::modf(f, &intpart); } } /* namespace detail */ diff --git a/nova-simd/simd_round_generic.hpp b/nova-simd/simd_round_generic.hpp index 57ddf33..f65427d 100644 --- a/nova-simd/simd_round_generic.hpp +++ b/nova-simd/simd_round_generic.hpp @@ -37,7 +37,8 @@ inline float_type round(float_type f) template <typename float_type> inline float_type frac(float_type f) { - return f - std::floor(f + float_type(0.5)); + float_type intpart; + return std::modf(f, &intpart); } } /* namespace detail */ diff --git a/nova-simd/simd_round_vec.hpp b/nova-simd/simd_round_vec.hpp index 9dc5e06..bdc0fe9 100644 --- a/nova-simd/simd_round_vec.hpp +++ b/nova-simd/simd_round_vec.hpp @@ -81,8 +81,18 @@ inline void frac_vec_simd(float * out, const float * arg, unsigned int n) const vec_float4 * in1 = (const vec_float4*)(arg + 4); vec_float4 * out0 = (vec_float4*)out; vec_float4 * out1 = (vec_float4*)(out+4); - *out0 = *in0 - _roundf4(*in0); - *out1 = *in1 - _roundf4(*in1); + + vec_int4 sign0 = _signf4(*in0); + vec_float4 abs_in0 = _fabsf4(*in0); + vec_float4 frac0 = *in0 - _floorf4(abs_in0); + VEC_OR(frac0, sign0); + *out0 = frac0; + + vec_int4 sign1 = _signf4(*in1); + vec_float4 abs_in1 = _fabsf4(*in1); + vec_float4 frac1 = *in1 - _floorf4(abs_in1); + VEC_OR(frac1, sign1); + *out1 = frac1; out += 8; arg += 8; @@ -99,8 +109,18 @@ inline void frac_vec_simd(float * out, const float * arg) const vec_float4 * in1 = (const vec_float4*)(arg + 4); vec_float4 * out0 = (vec_float4*)out; vec_float4 * out1 = (vec_float4*)(out+4); - *out0 = *in0 - _roundf4(*in0); - *out1 = *in1 - _roundf4(*in1); + + vec_int4 sign0 = _signf4(*in0); + vec_float4 abs_in0 = _fabsf4(*in0); + vec_float4 frac0 = *in0 - _floorf4(abs_in0); + VEC_OR(frac0, sign0); + *out0 = frac0; + + vec_int4 sign1 = _signf4(*in1); + vec_float4 abs_in1 = _fabsf4(*in1); + vec_float4 frac1 = *in1 - _floorf4(abs_in1); + VEC_OR(frac1, sign1); + *out1 = frac1; out += 8; arg += 8;
Attachment:
signature.asc
Description: OpenPGP digital signature