Fix T98672: Noise texture shows incorrect behaviour for large scales

This was a floating point precision issue - or, to be more precise,
an issue with how Cycles split floats into the integer and fractional
parts for Perlin noise.
For coordinates below -2^24, the integer could be wrong, leading to
the fractional part being outside of 0-1 range, which breaks all sorts
of other things. 2^24 sounds like a lot, but due to how the detail
octaves work, it's not that hard to reach when combined with a large

Since this code is originally based on OSL, I checked if they changed
it in the meantime, and sure enough, there's a fix for it:

So, this basically just ports over that change to Cycles.

The original code mentions being faster, but as pointed out in the
linked commit, the performance impact is actually irrelevant.

I also checked in a simple scene with eight Noise textures at
detail 15 (with >90% of render time being spent on the noise), and
the render time went from 13.06sec to 13.05sec. So, yeah, no issue.
This commit is contained in:
Lukas Stockner 2022-10-16 01:57:44 +02:00 committed by Philipp Oeser
parent 5ff62df238
commit 30774f01cd
Notes: blender-bot 2023-02-14 02:45:41 +01:00
Referenced by issue #100749, Blender LTS: Maintenance Task 3.3
Referenced by issue #98672, Noise texture shows incorrect behaviour for large scale values
3 changed files with 14 additions and 31 deletions

View File

@ -417,15 +417,11 @@ ccl_device_inline int floor_to_int(float f)
return float_to_int(floorf(f));
ccl_device_inline int quick_floor_to_int(float x)
return float_to_int(x) - ((x < 0) ? 1 : 0);
ccl_device_inline float floorfrac(float x, ccl_private int *i)
*i = quick_floor_to_int(x);
return x - *i;
float f = floorf(x);
*i = float_to_int(f);
return x - f;
ccl_device_inline int ceil_to_int(float f)

View File

@ -535,18 +535,6 @@ ccl_device_inline float3 pow(float3 v, float e)
return make_float3(powf(v.x, e), powf(v.y, e), powf(v.z, e));
ccl_device_inline int3 quick_floor_to_int3(const float3 a)
#ifdef __KERNEL_SSE__
int3 b = int3(_mm_cvttps_epi32(a.m128));
int3 isneg = int3(_mm_castps_si128(_mm_cmplt_ps(a.m128, _mm_set_ps1(0.0f))));
/* Unsaturated add 0xffffffff is the same as subtract -1. */
return b + isneg;
return make_int3(quick_floor_to_int(a.x), quick_floor_to_int(a.y), quick_floor_to_int(a.z));
ccl_device_inline bool isfinite_safe(float3 v)
return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z);

View File

@ -5,6 +5,8 @@
#ifndef __UTIL_SSEF_H__
#define __UTIL_SSEF_H__
#include <math.h>
#include "util/ssei.h"
@ -534,6 +536,12 @@ __forceinline const ssef ceil(const ssef &a)
return _mm_round_ps(a, _MM_FROUND_TO_POS_INF);
# endif
# else
/* Non-SSE4.1 fallback, needed for floorfrac. */
__forceinline const ssef floor(const ssef &a)
return _mm_set_ps(floorf(a.f[3]), floorf(a.f[2]), floorf(a.f[1]), floorf(a.f[0]));
# endif
__forceinline ssei truncatei(const ssef &a)
@ -541,20 +549,11 @@ __forceinline ssei truncatei(const ssef &a)
return _mm_cvttps_epi32(a.m128);
/* This is about 25% faster than straightforward floor to integer conversion
* due to better pipelining.
* Unsaturated add 0xffffffff (a < 0) is the same as subtract -1.
__forceinline ssei floori(const ssef &a)
return truncatei(a) + cast((a < 0.0f).m128);
__forceinline ssef floorfrac(const ssef &x, ssei *i)
*i = floori(x);
return x - ssef(*i);
ssef f = floor(x);
*i = truncatei(f);
return x - f;