Cycles: Deduplicte CPU kernel declaration and definition code

Main goal is to make kernel signatures editing easier and less prone to the
errors caused by missing function signature update or so.

This will also make it easier to add new CPU architectures.

Reviewers: juicyfruit, dingto, lukasstockner97, brecht

Reviewed By: dingto, lukasstockner97, brecht

Differential Revision: https://developer.blender.org/D1703
This commit is contained in:
Sergey Sharybin 2015-12-30 17:54:02 +05:00
parent f320724195
commit 2b5d60eb2d
10 changed files with 239 additions and 359 deletions

View File

@ -63,6 +63,9 @@ set(SRC_HEADERS
kernel_types.h
kernel_volume.h
kernel_work_stealing.h
kernels/cpu/kernel_cpu.h
kernels/cpu/kernel_cpu_impl.h
)
set(SRC_CLOSURE_HEADERS

View File

@ -23,6 +23,10 @@
CCL_NAMESPACE_BEGIN
#define KERNEL_NAME_JOIN(x, y, z) x ## _ ## y ## _ ## z
#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name)
#define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name)
struct KernelGlobals;
KernelGlobals *kernel_globals_create();
@ -41,69 +45,33 @@ void kernel_tex_copy(KernelGlobals *kg,
InterpolationType interpolation=INTERPOLATION_LINEAR,
ExtensionType extension = EXTENSION_REPEAT);
void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int offset, int sample);
#define KERNEL_ARCH cpu
#include "kernels/cpu/kernel_cpu.h"
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int offset, int sample);
#endif
# define KERNEL_ARCH cpu_sse2
# include "kernels/cpu/kernel_cpu.h"
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int offset, int sample);
#endif
# define KERNEL_ARCH cpu_sse3
# include "kernels/cpu/kernel_cpu.h"
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
void kernel_cpu_sse41_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int offset, int sample);
#endif
# define KERNEL_ARCH cpu_sse41
# include "kernels/cpu/kernel_cpu.h"
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
void kernel_cpu_avx_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int offset, int sample);
#endif
# define KERNEL_ARCH cpu_avx
# include "kernels/cpu/kernel_cpu.h"
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
void kernel_cpu_avx2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer,
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i, int offset, int sample);
#endif
# define KERNEL_ARCH cpu_avx2
# include "kernels/cpu/kernel_cpu.h"
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
CCL_NAMESPACE_END

View File

@ -16,15 +16,19 @@
/* CPU kernel entry points */
#include "kernel_compat_cpu.h"
/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */
#if defined(__x86_64__) || defined(_M_X64)
#define __KERNEL_SSE2__
#endif
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__)
/* do nothing */
#endif
#include "kernel.h"
#include "kernel_math.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
#include "kernel_path_branched.h"
#include "kernel_bake.h"
#define KERNEL_ARCH cpu
#include "kernel_cpu_impl.h"
CCL_NAMESPACE_BEGIN
@ -94,49 +98,4 @@ void kernel_tex_copy(KernelGlobals *kg,
assert(0);
}
/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */
#if defined(__x86_64__) || defined(_M_X64)
#define __KERNEL_SSE2__
#endif
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__)
/* do nothing */
#endif
/* Path Tracing */
void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
{
#ifdef __BRANCHED_PATH__
if(kernel_data.integrator.branched)
kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
else
#endif
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
}
/* Film */
void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
/* Shader Evaluation */
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END

View File

@ -30,58 +30,13 @@
#include "util_optimization.h"
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
#include "kernel_compat_cpu.h"
#include "kernel.h"
#include "kernel_math.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
/* Path Tracing */
void kernel_cpu_avx_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
{
#ifdef __BRANCHED_PATH__
if(kernel_data.integrator.branched)
kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
else
#endif
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
}
/* Film */
void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
/* Shader Evaluate */
void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
#else
# include "kernel.h"
# define KERNEL_ARCH cpu_avx
# include "kernel_cpu_impl.h"
#else /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_avx(void);
void __dummy_function_cycles_avx(void) {}
#endif
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */

View File

@ -27,62 +27,17 @@
#define __KERNEL_AVX__
#define __KERNEL_AVX2__
#endif
#include "util_optimization.h"
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
#include "kernel_compat_cpu.h"
#include "kernel.h"
#include "kernel_math.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
/* Path Tracing */
void kernel_cpu_avx2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
{
#ifdef __BRANCHED_PATH__
if(kernel_data.integrator.branched)
kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
else
#endif
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
}
/* Film */
void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
/* Shader Evaluate */
void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
#else
# include "kernel.h"
# define KERNEL_ARCH cpu_avx2
# include "kernel_cpu_impl.h"
#else /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_avx2(void);
void __dummy_function_cycles_avx2(void) {}
#endif
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */

View File

@ -0,0 +1,50 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Templated common declaration part of all CPU kernels. */
void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
float *buffer,
unsigned int *rng_state,
int sample,
int x, int y,
int offset,
int stride);
void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
int x, int y,
int offset, int stride);
void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
int x, int y,
int offset,
int stride);
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
uint4 *input,
float4 *output,
int type,
int i,
int offset,
int sample);
#undef KERNEL_ARCH

View File

@ -0,0 +1,126 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Templated common implementation part of all CPU kernels.
*
* The idea is that particular .cpp files sets needed optimization flags and
* simply includes this file without worry of copying actual implementation over.
*/
#include "kernel_compat_cpu.h"
#include "kernel_math.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
/* Path Tracing */
void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg,
float *buffer,
unsigned int *rng_state,
int sample,
int x, int y,
int offset,
int stride)
{
#ifdef __BRANCHED_PATH__
if(kernel_data.integrator.branched) {
kernel_branched_path_trace(kg,
buffer,
rng_state,
sample,
x, y,
offset,
stride);
}
else
#endif
{
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
}
}
/* Film */
void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
int x, int y,
int offset,
int stride)
{
kernel_film_convert_to_byte(kg,
rgba,
buffer,
sample_scale,
x, y,
offset,
stride);
}
void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
uchar4 *rgba,
float *buffer,
float sample_scale,
int x, int y,
int offset,
int stride)
{
kernel_film_convert_to_half_float(kg,
rgba,
buffer,
sample_scale,
x, y,
offset,
stride);
}
/* Shader Evaluate */
void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
uint4 *input,
float4 *output,
int type,
int i,
int offset,
int sample)
{
if(type >= SHADER_EVAL_BAKE) {
kernel_bake_evaluate(kg,
input,
output,
(ShaderEvalType)type,
i,
offset,
sample);
}
else {
kernel_shader_evaluate(kg,
input,
output,
(ShaderEvalType)type,
i,
sample);
}
}
CCL_NAMESPACE_END

View File

@ -26,59 +26,13 @@
#include "util_optimization.h"
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
#include "kernel_compat_cpu.h"
#include "kernel.h"
#include "kernel_math.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
/* Path Tracing */
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
{
#ifdef __BRANCHED_PATH__
if(kernel_data.integrator.branched)
kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
else
#endif
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
}
/* Film */
void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
/* Shader Evaluate */
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
#else
# include "kernel.h"
# define KERNEL_ARCH cpu_sse2
# include "kernel_cpu_impl.h"
#else /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_sse2(void);
void __dummy_function_cycles_sse2(void) {}
#endif
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */

View File

@ -28,58 +28,13 @@
#include "util_optimization.h"
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
#include "kernel_compat_cpu.h"
#include "kernel.h"
#include "kernel_math.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
/* Path Tracing */
void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
{
#ifdef __BRANCHED_PATH__
if(kernel_data.integrator.branched)
kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
else
#endif
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
}
/* Film */
void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
/* Shader Evaluate */
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
#else
# include "kernel.h"
# define KERNEL_ARCH cpu_sse3
# include "kernel_cpu_impl.h"
#else /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_sse3(void);
void __dummy_function_cycles_sse3(void) {}
#endif
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */

View File

@ -29,58 +29,13 @@
#include "util_optimization.h"
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
#include "kernel_compat_cpu.h"
#include "kernel.h"
#include "kernel_math.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
#include "kernel_path_branched.h"
#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
/* Path Tracing */
void kernel_cpu_sse41_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
{
#ifdef __BRANCHED_PATH__
if(kernel_data.integrator.branched)
kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
else
#endif
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
}
/* Film */
void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride)
{
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
}
/* Shader Evaluate */
void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample)
{
if(type >= SHADER_EVAL_BAKE)
kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample);
else
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample);
}
CCL_NAMESPACE_END
#else
# include "kernel.h"
# define KERNEL_ARCH cpu_sse41
# include "kernel_cpu_impl.h"
#else /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_sse41(void);
void __dummy_function_cycles_sse41(void) {}
#endif
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */