Unlimited number of textures for Cycles
This patch allows for an unlimited number of textures in Cycles where the hardware allows. It replaces a number static arrays with dynamic arrays and changes the way the flat_slot indices are calculated. Eventually, I'd like to get to a point where there are only flat slots left and textures off all kinds are stored in a single array. Note that the arrays in DeviceScene are changed from containing device_vector<T> objects to device_vector<T>* pointers. Ideally, I'd like to store objects, but dynamic resizing of a std:vector in pre-C++11 calls the copy constructor, which for a good reason is not implemented for device_vector. Once we require C++11 for Cycles builds, we can implement a move constructor for device_vector and store objects again. The limits for CUDA Fermi hardware still apply. Reviewers: tod_baudais, InsigMathK, dingto, #cycles Reviewed By: dingto, #cycles Subscribers: dingto, smellslikedonkey Differential Revision: https://developer.blender.org/D2650
This commit is contained in:
parent
a6b9bd023b
commit
ec25060a05
|
@ -19,6 +19,10 @@
|
|||
#ifndef __KERNEL_GLOBALS_H__
|
||||
#define __KERNEL_GLOBALS_H__
|
||||
|
||||
#ifdef __KERNEL_CPU__
|
||||
#include "util/util_vector.h"
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
|
||||
|
@ -38,12 +42,12 @@ struct Intersection;
|
|||
struct VolumeStep;
|
||||
|
||||
typedef struct KernelGlobals {
|
||||
texture_image_uchar4 texture_byte4_images[TEX_NUM_BYTE4_CPU];
|
||||
texture_image_float4 texture_float4_images[TEX_NUM_FLOAT4_CPU];
|
||||
texture_image_half4 texture_half4_images[TEX_NUM_HALF4_CPU];
|
||||
texture_image_float texture_float_images[TEX_NUM_FLOAT_CPU];
|
||||
texture_image_uchar texture_byte_images[TEX_NUM_BYTE_CPU];
|
||||
texture_image_half texture_half_images[TEX_NUM_HALF_CPU];
|
||||
vector<texture_image_uchar4> texture_byte4_images;
|
||||
vector<texture_image_float4> texture_float4_images;
|
||||
vector<texture_image_half4> texture_half4_images;
|
||||
vector<texture_image_float> texture_float_images;
|
||||
vector<texture_image_uchar> texture_byte_images;
|
||||
vector<texture_image_half> texture_half_images;
|
||||
|
||||
# define KERNEL_TEX(type, ttype, name) ttype name;
|
||||
# define KERNEL_IMAGE_TEX(type, ttype, name)
|
||||
|
|
|
@ -20,18 +20,19 @@
|
|||
|
||||
ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
|
||||
{
|
||||
const texture_type = kernel_tex_type(id);
|
||||
/* Float4 */
|
||||
if(id < TEX_START_BYTE4_OPENCL) {
|
||||
if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
|
||||
return kernel_tex_fetch(__tex_image_float4_packed, offset);
|
||||
}
|
||||
/* Byte4 */
|
||||
else if(id < TEX_START_FLOAT_OPENCL) {
|
||||
else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
|
||||
uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
|
||||
float f = 1.0f/255.0f;
|
||||
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
|
||||
}
|
||||
/* Float */
|
||||
else if(id < TEX_START_BYTE_OPENCL) {
|
||||
else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
|
||||
float f = kernel_tex_fetch(__tex_image_float_packed, offset);
|
||||
return make_float4(f, f, f, 1.0f);
|
||||
}
|
||||
|
|
|
@ -95,9 +95,12 @@ void kernel_tex_copy(KernelGlobals *kg,
|
|||
else if(strstr(name, "__tex_image_float4")) {
|
||||
texture_image_float4 *tex = NULL;
|
||||
int id = atoi(name + strlen("__tex_image_float4_"));
|
||||
int array_index = id;
|
||||
int array_index = kernel_tex_index(id);
|
||||
|
||||
if(array_index >= 0 && array_index < TEX_NUM_FLOAT4_CPU) {
|
||||
if(array_index >= 0) {
|
||||
if(array_index >= kg->texture_float4_images.size()) {
|
||||
kg->texture_float4_images.resize(array_index+1);
|
||||
}
|
||||
tex = &kg->texture_float4_images[array_index];
|
||||
}
|
||||
|
||||
|
@ -111,9 +114,12 @@ void kernel_tex_copy(KernelGlobals *kg,
|
|||
else if(strstr(name, "__tex_image_float")) {
|
||||
texture_image_float *tex = NULL;
|
||||
int id = atoi(name + strlen("__tex_image_float_"));
|
||||
int array_index = id - TEX_START_FLOAT_CPU;
|
||||
|
||||
if(array_index >= 0 && array_index < TEX_NUM_FLOAT_CPU) {
|
||||
int array_index = kernel_tex_index(id);
|
||||
|
||||
if(array_index >= 0) {
|
||||
if(array_index >= kg->texture_float_images.size()) {
|
||||
kg->texture_float_images.resize(array_index+1);
|
||||
}
|
||||
tex = &kg->texture_float_images[array_index];
|
||||
}
|
||||
|
||||
|
@ -127,9 +133,12 @@ void kernel_tex_copy(KernelGlobals *kg,
|
|||
else if(strstr(name, "__tex_image_byte4")) {
|
||||
texture_image_uchar4 *tex = NULL;
|
||||
int id = atoi(name + strlen("__tex_image_byte4_"));
|
||||
int array_index = id - TEX_START_BYTE4_CPU;
|
||||
|
||||
if(array_index >= 0 && array_index < TEX_NUM_BYTE4_CPU) {
|
||||
int array_index = kernel_tex_index(id);
|
||||
|
||||
if(array_index >= 0) {
|
||||
if(array_index >= kg->texture_byte4_images.size()) {
|
||||
kg->texture_byte4_images.resize(array_index+1);
|
||||
}
|
||||
tex = &kg->texture_byte4_images[array_index];
|
||||
}
|
||||
|
||||
|
@ -143,9 +152,12 @@ void kernel_tex_copy(KernelGlobals *kg,
|
|||
else if(strstr(name, "__tex_image_byte")) {
|
||||
texture_image_uchar *tex = NULL;
|
||||
int id = atoi(name + strlen("__tex_image_byte_"));
|
||||
int array_index = id - TEX_START_BYTE_CPU;
|
||||
|
||||
if(array_index >= 0 && array_index < TEX_NUM_BYTE_CPU) {
|
||||
int array_index = kernel_tex_index(id);
|
||||
|
||||
if(array_index >= 0) {
|
||||
if(array_index >= kg->texture_byte_images.size()) {
|
||||
kg->texture_byte_images.resize(array_index+1);
|
||||
}
|
||||
tex = &kg->texture_byte_images[array_index];
|
||||
}
|
||||
|
||||
|
@ -159,9 +171,12 @@ void kernel_tex_copy(KernelGlobals *kg,
|
|||
else if(strstr(name, "__tex_image_half4")) {
|
||||
texture_image_half4 *tex = NULL;
|
||||
int id = atoi(name + strlen("__tex_image_half4_"));
|
||||
int array_index = id - TEX_START_HALF4_CPU;
|
||||
|
||||
if(array_index >= 0 && array_index < TEX_NUM_HALF4_CPU) {
|
||||
int array_index = kernel_tex_index(id);
|
||||
|
||||
if(array_index >= 0) {
|
||||
if(array_index >= kg->texture_half4_images.size()) {
|
||||
kg->texture_half4_images.resize(array_index+1);
|
||||
}
|
||||
tex = &kg->texture_half4_images[array_index];
|
||||
}
|
||||
|
||||
|
@ -175,9 +190,12 @@ void kernel_tex_copy(KernelGlobals *kg,
|
|||
else if(strstr(name, "__tex_image_half")) {
|
||||
texture_image_half *tex = NULL;
|
||||
int id = atoi(name + strlen("__tex_image_half_"));
|
||||
int array_index = id - TEX_START_HALF_CPU;
|
||||
|
||||
if(array_index >= 0 && array_index < TEX_NUM_HALF_CPU) {
|
||||
int array_index = kernel_tex_index(id);
|
||||
|
||||
if(array_index >= 0) {
|
||||
if(array_index >= kg->texture_half_images.size()) {
|
||||
kg->texture_half_images.resize(array_index+1);
|
||||
}
|
||||
tex = &kg->texture_half_images[array_index];
|
||||
}
|
||||
|
||||
|
|
|
@ -23,51 +23,59 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float x, float y)
|
||||
{
|
||||
if(tex >= TEX_START_HALF_CPU)
|
||||
return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp(x, y);
|
||||
else if(tex >= TEX_START_BYTE_CPU)
|
||||
return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp(x, y);
|
||||
else if(tex >= TEX_START_FLOAT_CPU)
|
||||
return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp(x, y);
|
||||
else if(tex >= TEX_START_HALF4_CPU)
|
||||
return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y);
|
||||
else if(tex >= TEX_START_BYTE4_CPU)
|
||||
return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp(x, y);
|
||||
else
|
||||
return kg->texture_float4_images[tex].interp(x, y);
|
||||
switch(kernel_tex_type(tex)) {
|
||||
case IMAGE_DATA_TYPE_HALF:
|
||||
return kg->texture_half_images[kernel_tex_index(tex)].interp(x, y);
|
||||
case IMAGE_DATA_TYPE_BYTE:
|
||||
return kg->texture_byte_images[kernel_tex_index(tex)].interp(x, y);
|
||||
case IMAGE_DATA_TYPE_FLOAT:
|
||||
return kg->texture_float_images[kernel_tex_index(tex)].interp(x, y);
|
||||
case IMAGE_DATA_TYPE_HALF4:
|
||||
return kg->texture_half4_images[kernel_tex_index(tex)].interp(x, y);
|
||||
case IMAGE_DATA_TYPE_BYTE4:
|
||||
return kg->texture_byte4_images[kernel_tex_index(tex)].interp(x, y);
|
||||
case IMAGE_DATA_TYPE_FLOAT4:
|
||||
default:
|
||||
return kg->texture_float4_images[kernel_tex_index(tex)].interp(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, float x, float y, float z)
|
||||
{
|
||||
if(tex >= TEX_START_HALF_CPU)
|
||||
return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d(x, y, z);
|
||||
else if(tex >= TEX_START_BYTE_CPU)
|
||||
return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d(x, y, z);
|
||||
else if(tex >= TEX_START_FLOAT_CPU)
|
||||
return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d(x, y, z);
|
||||
else if(tex >= TEX_START_HALF4_CPU)
|
||||
return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z);
|
||||
else if(tex >= TEX_START_BYTE4_CPU)
|
||||
return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d(x, y, z);
|
||||
else
|
||||
return kg->texture_float4_images[tex].interp_3d(x, y, z);
|
||||
|
||||
switch(kernel_tex_type(tex)) {
|
||||
case IMAGE_DATA_TYPE_HALF:
|
||||
return kg->texture_half_images[kernel_tex_index(tex)].interp_3d(x, y, z);
|
||||
case IMAGE_DATA_TYPE_BYTE:
|
||||
return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d(x, y, z);
|
||||
case IMAGE_DATA_TYPE_FLOAT:
|
||||
return kg->texture_float_images[kernel_tex_index(tex)].interp_3d(x, y, z);
|
||||
case IMAGE_DATA_TYPE_HALF4:
|
||||
return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
|
||||
case IMAGE_DATA_TYPE_BYTE4:
|
||||
return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
|
||||
case IMAGE_DATA_TYPE_FLOAT4:
|
||||
default:
|
||||
return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation)
|
||||
{
|
||||
if(tex >= TEX_START_HALF_CPU)
|
||||
return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
else if(tex >= TEX_START_BYTE_CPU)
|
||||
return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
else if(tex >= TEX_START_FLOAT_CPU)
|
||||
return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
else if(tex >= TEX_START_HALF4_CPU)
|
||||
return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
else if(tex >= TEX_START_BYTE4_CPU)
|
||||
return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d_ex(x, y, z, interpolation);
|
||||
else
|
||||
return kg->texture_float4_images[tex].interp_3d_ex(x, y, z, interpolation);
|
||||
switch(kernel_tex_type(tex)) {
|
||||
case IMAGE_DATA_TYPE_HALF:
|
||||
return kg->texture_half_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
|
||||
case IMAGE_DATA_TYPE_BYTE:
|
||||
return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
|
||||
case IMAGE_DATA_TYPE_FLOAT:
|
||||
return kg->texture_float_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
|
||||
case IMAGE_DATA_TYPE_HALF4:
|
||||
return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
|
||||
case IMAGE_DATA_TYPE_BYTE4:
|
||||
return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
|
||||
case IMAGE_DATA_TYPE_FLOAT4:
|
||||
default:
|
||||
return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -151,8 +151,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
|
|||
# else
|
||||
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
|
||||
/* float4, byte4 and half4 */
|
||||
if(id < TEX_START_FLOAT_CUDA_KEPLER)
|
||||
const int texture_type = kernel_tex_type(id);
|
||||
if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_HALF4) {
|
||||
r = kernel_tex_image_interp_float4(tex, x, y);
|
||||
}
|
||||
/* float, byte and half */
|
||||
else {
|
||||
float f = kernel_tex_image_interp_float(tex, x, y);
|
||||
|
@ -166,8 +168,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
|
|||
|
||||
if(use_alpha && alpha != 1.0f && alpha != 0.0f) {
|
||||
r_ssef = r_ssef / ssef(alpha);
|
||||
if(id >= TEX_NUM_FLOAT4_IMAGES)
|
||||
const int texture_type = kernel_tex_type(id);
|
||||
if(texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) {
|
||||
r_ssef = min(r_ssef, ssef(1.0f));
|
||||
}
|
||||
r.w = alpha;
|
||||
}
|
||||
|
||||
|
@ -181,8 +185,9 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
|
|||
r.x *= invw;
|
||||
r.y *= invw;
|
||||
r.z *= invw;
|
||||
|
||||
if(id >= TEX_NUM_FLOAT4_IMAGES) {
|
||||
|
||||
const int texture_type = kernel_tex_type(id);
|
||||
if(texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) {
|
||||
r.x = min(r.x, 1.0f);
|
||||
r.y = min(r.y, 1.0f);
|
||||
r.z = min(r.z, 1.0f);
|
||||
|
|
|
@ -46,7 +46,7 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
|
|||
# if defined(__KERNEL_CUDA__)
|
||||
# if __CUDA_ARCH__ >= 300
|
||||
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
|
||||
if(id < TEX_START_HALF4_CUDA_KEPLER)
|
||||
if(kernel_tex_type(id) == IMAGE_DATA_TYPE_FLOAT4 || kernel_tex_type(id) == IMAGE_DATA_TYPE_BYTE4 || kernel_tex_type(id) == IMAGE_DATA_TYPE_HALF4)
|
||||
r = kernel_tex_image_interp_3d_float4(tex, co.x, co.y, co.z);
|
||||
else {
|
||||
float f = kernel_tex_image_interp_3d_float(tex, co.x, co.y, co.z);
|
||||
|
|
|
@ -49,54 +49,24 @@ ImageManager::ImageManager(const DeviceInfo& info)
|
|||
}
|
||||
|
||||
/* Set image limits */
|
||||
#define SET_TEX_IMAGES_LIMITS(ARCH) \
|
||||
{ \
|
||||
tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_ ## ARCH; \
|
||||
tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_ ## ARCH; \
|
||||
tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \
|
||||
tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_ ## ARCH; \
|
||||
tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_ ## ARCH; \
|
||||
tex_num_images[IMAGE_DATA_TYPE_HALF] = TEX_NUM_HALF_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_START_FLOAT4_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_BYTE4] = TEX_START_BYTE4_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_FLOAT] = TEX_START_FLOAT_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_BYTE] = TEX_START_BYTE_ ## ARCH; \
|
||||
tex_start_images[IMAGE_DATA_TYPE_HALF] = TEX_START_HALF_ ## ARCH; \
|
||||
}
|
||||
|
||||
if(device_type == DEVICE_CPU) {
|
||||
SET_TEX_IMAGES_LIMITS(CPU);
|
||||
}
|
||||
else if(device_type == DEVICE_CUDA) {
|
||||
if(info.has_bindless_textures) {
|
||||
SET_TEX_IMAGES_LIMITS(CUDA_KEPLER);
|
||||
}
|
||||
else {
|
||||
SET_TEX_IMAGES_LIMITS(CUDA);
|
||||
max_num_images = TEX_NUM_MAX;
|
||||
has_half_images = true;
|
||||
cuda_fermi_limits = false;
|
||||
|
||||
if(device_type == DEVICE_CUDA) {
|
||||
if(!info.has_bindless_textures) {
|
||||
/* CUDA Fermi hardware (SM 2.x) has a hard limit on the number of textures */
|
||||
cuda_fermi_limits = true;
|
||||
has_half_images = false;
|
||||
}
|
||||
}
|
||||
else if(device_type == DEVICE_OPENCL) {
|
||||
SET_TEX_IMAGES_LIMITS(OPENCL);
|
||||
has_half_images = false;
|
||||
}
|
||||
else {
|
||||
/* Should not happen. */
|
||||
tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
|
||||
tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0;
|
||||
tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0;
|
||||
tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0;
|
||||
tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0;
|
||||
tex_num_images[IMAGE_DATA_TYPE_HALF] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_BYTE4] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_FLOAT] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_BYTE] = 0;
|
||||
tex_start_images[IMAGE_DATA_TYPE_HALF] = 0;
|
||||
assert(0);
|
||||
|
||||
for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
|
||||
tex_num_images[type] = 0;
|
||||
}
|
||||
|
||||
#undef SET_TEX_IMAGES_LIMITS
|
||||
}
|
||||
|
||||
ImageManager::~ImageManager()
|
||||
|
@ -133,7 +103,7 @@ bool ImageManager::set_animation_frame_update(int frame)
|
|||
return false;
|
||||
}
|
||||
|
||||
ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filename,
|
||||
ImageDataType ImageManager::get_image_metadata(const string& filename,
|
||||
void *builtin_data,
|
||||
bool& is_linear)
|
||||
{
|
||||
|
@ -226,26 +196,42 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen
|
|||
}
|
||||
}
|
||||
|
||||
/* We use a consecutive slot counting scheme on the devices, in order
|
||||
* float4, byte4, half4, float, byte, half.
|
||||
/* The lower three bits of a device texture slot number indicate its type.
|
||||
* These functions convert the slot ids from ImageManager "images" ones
|
||||
* to device ones and vice versa. */
|
||||
* to device ones and vice versa.
|
||||
*
|
||||
* There are special cases for CUDA Fermi, since there we have only 90 image texture
|
||||
* slots available and shold keep the flattended numbers in the 0-89 range.
|
||||
*/
|
||||
int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type)
|
||||
{
|
||||
return slot + tex_start_images[type];
|
||||
if(cuda_fermi_limits) {
|
||||
if(type == IMAGE_DATA_TYPE_BYTE4) {
|
||||
return slot + TEX_START_BYTE4_CUDA;
|
||||
}
|
||||
else {
|
||||
return slot;
|
||||
}
|
||||
}
|
||||
|
||||
return (slot << IMAGE_DATA_TYPE_SHIFT) | (type);
|
||||
}
|
||||
|
||||
int ImageManager::flattened_slot_to_type_index(int flat_slot, ImageDataType *type)
|
||||
{
|
||||
for(int i = IMAGE_DATA_NUM_TYPES - 1; i >= 0; i--) {
|
||||
if(flat_slot >= tex_start_images[i]) {
|
||||
*type = (ImageDataType)i;
|
||||
return flat_slot - tex_start_images[i];
|
||||
if(cuda_fermi_limits) {
|
||||
if(flat_slot >= 4) {
|
||||
*type = IMAGE_DATA_TYPE_BYTE4;
|
||||
return flat_slot - TEX_START_BYTE4_CUDA;
|
||||
}
|
||||
else {
|
||||
*type = IMAGE_DATA_TYPE_FLOAT4;
|
||||
return flat_slot;
|
||||
}
|
||||
}
|
||||
|
||||
/* Should not happen. */
|
||||
return flat_slot;
|
||||
*type = (ImageDataType)(flat_slot & IMAGE_DATA_TYPE_MASK);
|
||||
return flat_slot >> IMAGE_DATA_TYPE_SHIFT;
|
||||
}
|
||||
|
||||
string ImageManager::name_from_type(int type)
|
||||
|
@ -299,13 +285,16 @@ int ImageManager::add_image(const string& filename,
|
|||
is_float = (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4);
|
||||
|
||||
/* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */
|
||||
if((type == IMAGE_DATA_TYPE_FLOAT ||
|
||||
type == IMAGE_DATA_TYPE_HALF4 ||
|
||||
type == IMAGE_DATA_TYPE_HALF) &&
|
||||
tex_num_images[type] == 0) {
|
||||
if(type == IMAGE_DATA_TYPE_HALF4 && !has_half_images) {
|
||||
type = IMAGE_DATA_TYPE_FLOAT4;
|
||||
} else if(type == IMAGE_DATA_TYPE_HALF && !has_half_images) {
|
||||
type = IMAGE_DATA_TYPE_FLOAT;
|
||||
}
|
||||
|
||||
if(type == IMAGE_DATA_TYPE_FLOAT && cuda_fermi_limits) {
|
||||
type = IMAGE_DATA_TYPE_FLOAT4;
|
||||
}
|
||||
if(type == IMAGE_DATA_TYPE_BYTE && tex_num_images[type] == 0) {
|
||||
else if(type == IMAGE_DATA_TYPE_BYTE && cuda_fermi_limits) {
|
||||
type = IMAGE_DATA_TYPE_BYTE4;
|
||||
}
|
||||
|
||||
|
@ -338,14 +327,30 @@ int ImageManager::add_image(const string& filename,
|
|||
break;
|
||||
}
|
||||
|
||||
if(slot == images[type].size()) {
|
||||
/* Max images limit reached. */
|
||||
if(images[type].size() == tex_num_images[type]) {
|
||||
/* Count if we're over the limit */
|
||||
if(cuda_fermi_limits) {
|
||||
if(tex_num_images[IMAGE_DATA_TYPE_BYTE4] == TEX_NUM_BYTE4_CUDA
|
||||
|| tex_num_images[IMAGE_DATA_TYPE_FLOAT4] == TEX_NUM_FLOAT4_CUDA)
|
||||
{
|
||||
printf("ImageManager::add_image: Reached %s image limit (%d), skipping '%s'\n",
|
||||
name_from_type(type).c_str(), tex_num_images[type], filename.c_str());
|
||||
name_from_type(type).c_str(), tex_num_images[type], filename.c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
/* Very unlikely, since max_num_images is insanely big. But better safe than sorry. */
|
||||
int tex_count = 0;
|
||||
for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
|
||||
tex_count += tex_num_images[type];
|
||||
}
|
||||
if(tex_count > max_num_images) {
|
||||
printf("ImageManager::add_image: Reached image limit (%d), skipping '%s'\n",
|
||||
max_num_images, filename.c_str());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(slot == images[type].size()) {
|
||||
images[type].resize(images[type].size() + 1);
|
||||
}
|
||||
|
||||
|
@ -362,6 +367,8 @@ int ImageManager::add_image(const string& filename,
|
|||
img->use_alpha = use_alpha;
|
||||
|
||||
images[type][slot] = img;
|
||||
|
||||
++tex_num_images[type];
|
||||
|
||||
need_update = true;
|
||||
|
||||
|
@ -666,16 +673,12 @@ void ImageManager::device_load_image(Device *device,
|
|||
/* Slot assignment */
|
||||
int flat_slot = type_index_to_flattened_slot(slot, type);
|
||||
|
||||
string name;
|
||||
if(flat_slot >= 100)
|
||||
name = string_printf("__tex_image_%s_%d", name_from_type(type).c_str(), flat_slot);
|
||||
else if(flat_slot >= 10)
|
||||
name = string_printf("__tex_image_%s_0%d", name_from_type(type).c_str(), flat_slot);
|
||||
else
|
||||
name = string_printf("__tex_image_%s_00%d", name_from_type(type).c_str(), flat_slot);
|
||||
string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
|
||||
|
||||
if(type == IMAGE_DATA_TYPE_FLOAT4) {
|
||||
device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
|
||||
if(dscene->tex_float4_image[slot] == NULL)
|
||||
dscene->tex_float4_image[slot] = new device_vector<float4>();
|
||||
device_vector<float4>& tex_img = *dscene->tex_float4_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
|
@ -705,7 +708,9 @@ void ImageManager::device_load_image(Device *device,
|
|||
}
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_FLOAT) {
|
||||
device_vector<float>& tex_img = dscene->tex_float_image[slot];
|
||||
if(dscene->tex_float_image[slot] == NULL)
|
||||
dscene->tex_float_image[slot] = new device_vector<float>();
|
||||
device_vector<float>& tex_img = *dscene->tex_float_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
|
@ -732,7 +737,9 @@ void ImageManager::device_load_image(Device *device,
|
|||
}
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_BYTE4) {
|
||||
device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
|
||||
if(dscene->tex_byte4_image[slot] == NULL)
|
||||
dscene->tex_byte4_image[slot] = new device_vector<uchar4>();
|
||||
device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
|
@ -762,7 +769,9 @@ void ImageManager::device_load_image(Device *device,
|
|||
}
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_BYTE){
|
||||
device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
|
||||
if(dscene->tex_byte_image[slot] == NULL)
|
||||
dscene->tex_byte_image[slot] = new device_vector<uchar>();
|
||||
device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
|
@ -788,7 +797,9 @@ void ImageManager::device_load_image(Device *device,
|
|||
}
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_HALF4){
|
||||
device_vector<half4>& tex_img = dscene->tex_half4_image[slot];
|
||||
if(dscene->tex_half4_image[slot] == NULL)
|
||||
dscene->tex_half4_image[slot] = new device_vector<half4>();
|
||||
device_vector<half4>& tex_img = *dscene->tex_half4_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
|
@ -817,7 +828,9 @@ void ImageManager::device_load_image(Device *device,
|
|||
}
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_HALF){
|
||||
device_vector<half>& tex_img = dscene->tex_half_image[slot];
|
||||
if(dscene->tex_half_image[slot] == NULL)
|
||||
dscene->tex_half_image[slot] = new device_vector<half>();
|
||||
device_vector<half>& tex_img = *dscene->tex_half_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
|
@ -857,69 +870,50 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageD
|
|||
((OSL::TextureSystem*)osl_texture_system)->invalidate(filename);
|
||||
#endif
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_FLOAT4) {
|
||||
device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
device->tex_free(tex_img);
|
||||
else {
|
||||
device_memory *tex_img = NULL;
|
||||
switch(type) {
|
||||
case IMAGE_DATA_TYPE_FLOAT4:
|
||||
tex_img = dscene->tex_float4_image[slot];
|
||||
dscene->tex_float4_image[slot] = NULL;
|
||||
break;
|
||||
case IMAGE_DATA_TYPE_FLOAT:
|
||||
tex_img = dscene->tex_float_image[slot];
|
||||
dscene->tex_float_image[slot] = NULL;
|
||||
break;
|
||||
case IMAGE_DATA_TYPE_BYTE:
|
||||
tex_img = dscene->tex_byte_image[slot];
|
||||
dscene->tex_byte_image[slot]= NULL;
|
||||
break;
|
||||
case IMAGE_DATA_TYPE_BYTE4:
|
||||
tex_img = dscene->tex_byte4_image[slot];
|
||||
dscene->tex_byte4_image[slot]= NULL;
|
||||
break;
|
||||
case IMAGE_DATA_TYPE_HALF:
|
||||
tex_img = dscene->tex_half_image[slot];
|
||||
dscene->tex_half_image[slot]= NULL;
|
||||
break;
|
||||
case IMAGE_DATA_TYPE_HALF4:
|
||||
tex_img = dscene->tex_half4_image[slot];
|
||||
dscene->tex_half4_image[slot]= NULL;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
tex_img = NULL;
|
||||
}
|
||||
if(tex_img) {
|
||||
if(tex_img->device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
device->tex_free(*tex_img);
|
||||
}
|
||||
|
||||
tex_img.clear();
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_FLOAT) {
|
||||
device_vector<float>& tex_img = dscene->tex_float_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
device->tex_free(tex_img);
|
||||
delete tex_img;
|
||||
}
|
||||
|
||||
tex_img.clear();
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_BYTE4) {
|
||||
device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
device->tex_free(tex_img);
|
||||
}
|
||||
|
||||
tex_img.clear();
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_BYTE){
|
||||
device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
device->tex_free(tex_img);
|
||||
}
|
||||
|
||||
tex_img.clear();
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_HALF4){
|
||||
device_vector<half4>& tex_img = dscene->tex_half4_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
device->tex_free(tex_img);
|
||||
}
|
||||
|
||||
tex_img.clear();
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_HALF){
|
||||
device_vector<half>& tex_img = dscene->tex_half_image[slot];
|
||||
|
||||
if(tex_img.device_pointer) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
device->tex_free(tex_img);
|
||||
}
|
||||
|
||||
tex_img.clear();
|
||||
}
|
||||
|
||||
delete images[type][slot];
|
||||
images[type][slot] = NULL;
|
||||
--tex_num_images[type];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -934,6 +928,32 @@ void ImageManager::device_update(Device *device,
|
|||
TaskPool pool;
|
||||
|
||||
for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
|
||||
switch(type) {
|
||||
case IMAGE_DATA_TYPE_BYTE4:
|
||||
if(dscene->tex_byte4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_BYTE4])
|
||||
dscene->tex_byte4_image.resize(tex_num_images[IMAGE_DATA_TYPE_BYTE4]);
|
||||
break;
|
||||
case IMAGE_DATA_TYPE_FLOAT4:
|
||||
if(dscene->tex_float4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_FLOAT4])
|
||||
dscene->tex_float4_image.resize(tex_num_images[IMAGE_DATA_TYPE_FLOAT4]);
|
||||
break;
|
||||
case IMAGE_DATA_TYPE_BYTE:
|
||||
if(dscene->tex_byte_image.size() <= tex_num_images[IMAGE_DATA_TYPE_BYTE])
|
||||
dscene->tex_byte_image.resize(tex_num_images[IMAGE_DATA_TYPE_BYTE]);
|
||||
break;
|
||||
case IMAGE_DATA_TYPE_FLOAT:
|
||||
if(dscene->tex_float_image.size() <= tex_num_images[IMAGE_DATA_TYPE_FLOAT])
|
||||
dscene->tex_float_image.resize(tex_num_images[IMAGE_DATA_TYPE_FLOAT]);
|
||||
break;
|
||||
case IMAGE_DATA_TYPE_HALF4:
|
||||
if(dscene->tex_half4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_HALF4])
|
||||
dscene->tex_half4_image.resize(tex_num_images[IMAGE_DATA_TYPE_HALF4]);
|
||||
break;
|
||||
case IMAGE_DATA_TYPE_HALF:
|
||||
if(dscene->tex_half_image.size() <= tex_num_images[IMAGE_DATA_TYPE_HALF])
|
||||
dscene->tex_half_image.resize(tex_num_images[IMAGE_DATA_TYPE_HALF]);
|
||||
break;
|
||||
}
|
||||
for(size_t slot = 0; slot < images[type].size(); slot++) {
|
||||
if(!images[type][slot])
|
||||
continue;
|
||||
|
@ -1029,7 +1049,7 @@ void ImageManager::device_pack_images(Device *device,
|
|||
if(!images[type][slot])
|
||||
continue;
|
||||
|
||||
device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
|
||||
device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot];
|
||||
size += tex_img.size();
|
||||
}
|
||||
|
||||
|
@ -1039,7 +1059,7 @@ void ImageManager::device_pack_images(Device *device,
|
|||
if(!images[type][slot])
|
||||
continue;
|
||||
|
||||
device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
|
||||
device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot];
|
||||
|
||||
uint8_t options = pack_image_options(type, slot);
|
||||
|
||||
|
@ -1059,7 +1079,7 @@ void ImageManager::device_pack_images(Device *device,
|
|||
if(!images[type][slot])
|
||||
continue;
|
||||
|
||||
device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
|
||||
device_vector<float4>& tex_img = *dscene->tex_float4_image[slot];
|
||||
size += tex_img.size();
|
||||
}
|
||||
|
||||
|
@ -1069,7 +1089,7 @@ void ImageManager::device_pack_images(Device *device,
|
|||
if(!images[type][slot])
|
||||
continue;
|
||||
|
||||
device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
|
||||
device_vector<float4>& tex_img = *dscene->tex_float4_image[slot];
|
||||
|
||||
/* todo: support 3D textures, only CPU for now */
|
||||
|
||||
|
@ -1091,7 +1111,7 @@ void ImageManager::device_pack_images(Device *device,
|
|||
if(!images[type][slot])
|
||||
continue;
|
||||
|
||||
device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
|
||||
device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot];
|
||||
size += tex_img.size();
|
||||
}
|
||||
|
||||
|
@ -1101,7 +1121,7 @@ void ImageManager::device_pack_images(Device *device,
|
|||
if(!images[type][slot])
|
||||
continue;
|
||||
|
||||
device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
|
||||
device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot];
|
||||
|
||||
uint8_t options = pack_image_options(type, slot);
|
||||
|
||||
|
@ -1121,7 +1141,7 @@ void ImageManager::device_pack_images(Device *device,
|
|||
if(!images[type][slot])
|
||||
continue;
|
||||
|
||||
device_vector<float>& tex_img = dscene->tex_float_image[slot];
|
||||
device_vector<float>& tex_img = *dscene->tex_float_image[slot];
|
||||
size += tex_img.size();
|
||||
}
|
||||
|
||||
|
@ -1131,7 +1151,7 @@ void ImageManager::device_pack_images(Device *device,
|
|||
if(!images[type][slot])
|
||||
continue;
|
||||
|
||||
device_vector<float>& tex_img = dscene->tex_float_image[slot];
|
||||
device_vector<float>& tex_img = *dscene->tex_float_image[slot];
|
||||
|
||||
/* todo: support 3D textures, only CPU for now */
|
||||
|
||||
|
@ -1200,6 +1220,13 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene)
|
|||
}
|
||||
images[type].clear();
|
||||
}
|
||||
|
||||
dscene->tex_byte4_image.clear();
|
||||
dscene->tex_byte_image.clear();
|
||||
dscene->tex_float4_image.clear();
|
||||
dscene->tex_float_image.clear();
|
||||
dscene->tex_half4_image.clear();
|
||||
dscene->tex_half_image.clear();
|
||||
|
||||
device->tex_free(dscene->tex_image_byte4_packed);
|
||||
device->tex_free(dscene->tex_image_float4_packed);
|
||||
|
|
|
@ -37,17 +37,6 @@ public:
|
|||
explicit ImageManager(const DeviceInfo& info);
|
||||
~ImageManager();
|
||||
|
||||
enum ImageDataType {
|
||||
IMAGE_DATA_TYPE_FLOAT4 = 0,
|
||||
IMAGE_DATA_TYPE_BYTE4 = 1,
|
||||
IMAGE_DATA_TYPE_HALF4 = 2,
|
||||
IMAGE_DATA_TYPE_FLOAT = 3,
|
||||
IMAGE_DATA_TYPE_BYTE = 4,
|
||||
IMAGE_DATA_TYPE_HALF = 5,
|
||||
|
||||
IMAGE_DATA_NUM_TYPES
|
||||
};
|
||||
|
||||
int add_image(const string& filename,
|
||||
void *builtin_data,
|
||||
bool animated,
|
||||
|
@ -124,7 +113,9 @@ public:
|
|||
|
||||
private:
|
||||
int tex_num_images[IMAGE_DATA_NUM_TYPES];
|
||||
int tex_start_images[IMAGE_DATA_NUM_TYPES];
|
||||
int max_num_images;
|
||||
bool has_half_images;
|
||||
bool cuda_fermi_limits;
|
||||
|
||||
thread_mutex device_mutex;
|
||||
int animation_frame;
|
||||
|
|
|
@ -364,9 +364,9 @@ void ImageTextureNode::compile(OSLCompiler& compiler)
|
|||
image_manager = compiler.image_manager;
|
||||
if(is_float == -1) {
|
||||
if(builtin_data == NULL) {
|
||||
ImageManager::ImageDataType type;
|
||||
ImageDataType type;
|
||||
type = image_manager->get_image_metadata(filename.string(), NULL, is_linear);
|
||||
if(type == ImageManager::IMAGE_DATA_TYPE_FLOAT || type == ImageManager::IMAGE_DATA_TYPE_FLOAT4)
|
||||
if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
|
||||
is_float = 1;
|
||||
}
|
||||
else {
|
||||
|
@ -553,9 +553,9 @@ void EnvironmentTextureNode::compile(OSLCompiler& compiler)
|
|||
image_manager = compiler.image_manager;
|
||||
if(is_float == -1) {
|
||||
if(builtin_data == NULL) {
|
||||
ImageManager::ImageDataType type;
|
||||
ImageDataType type;
|
||||
type = image_manager->get_image_metadata(filename.string(), NULL, is_linear);
|
||||
if(type == ImageManager::IMAGE_DATA_TYPE_FLOAT || type == ImageManager::IMAGE_DATA_TYPE_FLOAT4)
|
||||
if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
|
||||
is_float = 1;
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -114,13 +114,13 @@ public:
|
|||
device_vector<uint> sobol_directions;
|
||||
|
||||
/* cpu images */
|
||||
device_vector<uchar4> tex_byte4_image[TEX_NUM_BYTE4_CPU];
|
||||
device_vector<float4> tex_float4_image[TEX_NUM_FLOAT4_CPU];
|
||||
device_vector<float> tex_float_image[TEX_NUM_FLOAT_CPU];
|
||||
device_vector<uchar> tex_byte_image[TEX_NUM_BYTE_CPU];
|
||||
device_vector<half4> tex_half4_image[TEX_NUM_HALF4_CPU];
|
||||
device_vector<half> tex_half_image[TEX_NUM_HALF_CPU];
|
||||
|
||||
std::vector<device_vector<uchar4>* > tex_byte4_image;
|
||||
std::vector<device_vector<float4>* > tex_float4_image;
|
||||
std::vector<device_vector<float>* > tex_float_image;
|
||||
std::vector<device_vector<uchar>* > tex_byte_image;
|
||||
std::vector<device_vector<half4>* > tex_half4_image;
|
||||
std::vector<device_vector<half>* > tex_half_image;
|
||||
|
||||
/* opencl images */
|
||||
device_vector<uchar4> tex_image_byte4_packed;
|
||||
device_vector<float4> tex_image_float4_packed;
|
||||
|
|
|
@ -21,62 +21,22 @@ CCL_NAMESPACE_BEGIN
|
|||
|
||||
/* Texture limits on devices. */
|
||||
|
||||
/* CPU */
|
||||
#define TEX_NUM_FLOAT4_CPU 1024
|
||||
#define TEX_NUM_BYTE4_CPU 1024
|
||||
#define TEX_NUM_HALF4_CPU 1024
|
||||
#define TEX_NUM_FLOAT_CPU 1024
|
||||
#define TEX_NUM_BYTE_CPU 1024
|
||||
#define TEX_NUM_HALF_CPU 1024
|
||||
#define TEX_START_FLOAT4_CPU 0
|
||||
#define TEX_START_BYTE4_CPU TEX_NUM_FLOAT4_CPU
|
||||
#define TEX_START_HALF4_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU)
|
||||
#define TEX_START_FLOAT_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU)
|
||||
#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU)
|
||||
#define TEX_START_HALF_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU)
|
||||
|
||||
/* CUDA (Geforce 4xx and 5xx) */
|
||||
#define TEX_NUM_FLOAT4_CUDA 5
|
||||
#define TEX_NUM_BYTE4_CUDA 85
|
||||
#define TEX_NUM_HALF4_CUDA 0
|
||||
#define TEX_NUM_FLOAT_CUDA 0
|
||||
#define TEX_NUM_BYTE_CUDA 0
|
||||
#define TEX_NUM_HALF_CUDA 0
|
||||
#define TEX_START_FLOAT4_CUDA 0
|
||||
#define TEX_START_BYTE4_CUDA TEX_NUM_FLOAT4_CUDA
|
||||
#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
|
||||
#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA)
|
||||
#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA)
|
||||
#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
|
||||
|
||||
/* CUDA (Kepler, Geforce 6xx and above) */
|
||||
#define TEX_NUM_FLOAT4_CUDA_KEPLER 1024
|
||||
#define TEX_NUM_BYTE4_CUDA_KEPLER 1024
|
||||
#define TEX_NUM_HALF4_CUDA_KEPLER 1024
|
||||
#define TEX_NUM_FLOAT_CUDA_KEPLER 1024
|
||||
#define TEX_NUM_BYTE_CUDA_KEPLER 1024
|
||||
#define TEX_NUM_HALF_CUDA_KEPLER 1024
|
||||
#define TEX_START_FLOAT4_CUDA_KEPLER 0
|
||||
#define TEX_START_BYTE4_CUDA_KEPLER TEX_NUM_FLOAT4_CUDA_KEPLER
|
||||
#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)
|
||||
#define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER)
|
||||
#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER)
|
||||
#define TEX_START_HALF_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER)
|
||||
|
||||
/* OpenCL */
|
||||
#define TEX_NUM_FLOAT4_OPENCL 1024
|
||||
#define TEX_NUM_BYTE4_OPENCL 1024
|
||||
#define TEX_NUM_HALF4_OPENCL 0
|
||||
#define TEX_NUM_FLOAT_OPENCL 1024
|
||||
#define TEX_NUM_BYTE_OPENCL 1024
|
||||
#define TEX_NUM_HALF_OPENCL 0
|
||||
#define TEX_START_FLOAT4_OPENCL 0
|
||||
#define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL
|
||||
#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL)
|
||||
#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL)
|
||||
#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL)
|
||||
#define TEX_START_HALF_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL)
|
||||
#define TEX_NUM_FLOAT4_CUDA 5
|
||||
#define TEX_NUM_BYTE4_CUDA 84
|
||||
#define TEX_NUM_HALF4_CUDA 0
|
||||
#define TEX_NUM_FLOAT_CUDA 0
|
||||
#define TEX_NUM_BYTE_CUDA 0
|
||||
#define TEX_NUM_HALF_CUDA 0
|
||||
#define TEX_START_FLOAT4_CUDA 0
|
||||
#define TEX_START_BYTE4_CUDA TEX_NUM_FLOAT4_CUDA
|
||||
#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
|
||||
#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA)
|
||||
#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA)
|
||||
#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
|
||||
|
||||
/* Any architecture other than old CUDA cards */
|
||||
#define TEX_NUM_MAX (INT_MAX >> 4)
|
||||
|
||||
/* Color to use when textures are not found. */
|
||||
#define TEX_IMAGE_MISSING_R 1
|
||||
|
@ -84,6 +44,14 @@ CCL_NAMESPACE_BEGIN
|
|||
#define TEX_IMAGE_MISSING_B 1
|
||||
#define TEX_IMAGE_MISSING_A 1
|
||||
|
||||
#if defined (__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300)
|
||||
# define kernel_tex_type(tex) (tex < TEX_START_BYTE4_CUDA ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4)
|
||||
# define kernel_tex_index(tex) (tex)
|
||||
#else
|
||||
# define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK)
|
||||
# define kernel_tex_index(tex) (tex >> IMAGE_DATA_TYPE_SHIFT)
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TEXTURE_H__ */
|
||||
|
|
|
@ -154,6 +154,25 @@ enum InterpolationType {
|
|||
INTERPOLATION_NUM_TYPES,
|
||||
};
|
||||
|
||||
/* Texture types
|
||||
* Since we store the type in the lower bits of a flat index,
|
||||
* the shift and bit mask constant below need to be kept in sync.
|
||||
*/
|
||||
|
||||
enum ImageDataType {
|
||||
IMAGE_DATA_TYPE_FLOAT4 = 0,
|
||||
IMAGE_DATA_TYPE_BYTE4 = 1,
|
||||
IMAGE_DATA_TYPE_HALF4 = 2,
|
||||
IMAGE_DATA_TYPE_FLOAT = 3,
|
||||
IMAGE_DATA_TYPE_BYTE = 4,
|
||||
IMAGE_DATA_TYPE_HALF = 5,
|
||||
|
||||
IMAGE_DATA_NUM_TYPES
|
||||
};
|
||||
|
||||
#define IMAGE_DATA_TYPE_SHIFT 3
|
||||
#define IMAGE_DATA_TYPE_MASK 0x7
|
||||
|
||||
/* Extension types for textures.
|
||||
*
|
||||
* Defines how the image is extrapolated past its original bounds.
|
||||
|
|
Loading…
Reference in New Issue