Cycles: reduce triangle memory usage with packed_float3

Depends on D13243

Differential Revision: https://developer.blender.org/D13244
This commit is contained in:
Brecht Van Lommel 2021-11-16 14:03:59 +01:00
parent 9937d5379c
commit 063ad8635e
20 changed files with 237 additions and 157 deletions

View File

@ -159,9 +159,9 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
/* Record geometric normal. */
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0));
const float3 tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1));
const float3 tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0);
const float3 tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1);
const float3 tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
/* Continue tracing (without this the trace call would return after the first hit). */

View File

@ -106,9 +106,9 @@ ccl_device Transform primitive_attribute_matrix(KernelGlobals kg,
{
Transform tfm;
tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
tfm.x = kernel_tex_fetch(__attributes_float4, desc.offset + 0);
tfm.y = kernel_tex_fetch(__attributes_float4, desc.offset + 1);
tfm.z = kernel_tex_fetch(__attributes_float4, desc.offset + 2);
return tfm;
}

View File

@ -126,8 +126,8 @@ ccl_device float3 curve_attribute_float3(KernelGlobals kg,
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
int k1 = k0 + 1;
float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1));
float3 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0);
float3 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + k1);
# ifdef __RAY_DIFFERENTIALS__
if (dx)
@ -149,7 +149,7 @@ ccl_device float3 curve_attribute_float3(KernelGlobals kg,
if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
desc.offset;
return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset));
return kernel_tex_fetch(__attributes_float3, offset);
}
else {
return make_float3(0.0f, 0.0f, 0.0f);
@ -168,8 +168,8 @@ ccl_device float4 curve_attribute_float4(KernelGlobals kg,
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
int k1 = k0 + 1;
float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0);
float4 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + k1);
float4 f0 = kernel_tex_fetch(__attributes_float4, desc.offset + k0);
float4 f1 = kernel_tex_fetch(__attributes_float4, desc.offset + k1);
# ifdef __RAY_DIFFERENTIALS__
if (dx)
@ -191,7 +191,7 @@ ccl_device float4 curve_attribute_float4(KernelGlobals kg,
if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
desc.offset;
return kernel_tex_fetch(__attributes_float3, offset);
return kernel_tex_fetch(__attributes_float4, offset);
}
else {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);

View File

@ -48,8 +48,8 @@ ccl_device_inline void motion_curve_keys_for_step_linear(KernelGlobals kg,
offset += step * numkeys;
keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
keys[0] = kernel_tex_fetch(__attributes_float4, offset + k0);
keys[1] = kernel_tex_fetch(__attributes_float4, offset + k1);
}
}
@ -106,10 +106,10 @@ ccl_device_inline void motion_curve_keys_for_step(KernelGlobals kg,
offset += step * numkeys;
keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2);
keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3);
keys[0] = kernel_tex_fetch(__attributes_float4, offset + k0);
keys[1] = kernel_tex_fetch(__attributes_float4, offset + k1);
keys[2] = kernel_tex_fetch(__attributes_float4, offset + k2);
keys[3] = kernel_tex_fetch(__attributes_float4, offset + k3);
}
}

View File

@ -43,9 +43,9 @@ ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals kg,
{
if (step == numsteps) {
/* center step: regular vertex location */
verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
verts[0] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0);
verts[1] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1);
verts[2] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2);
}
else {
/* center step not store in this array */
@ -54,9 +54,9 @@ ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals kg,
offset += step * numverts;
verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
verts[0] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x);
verts[1] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y);
verts[2] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z);
}
}
@ -70,9 +70,9 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals kg,
{
if (step == numsteps) {
/* center step: regular vertex location */
normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
normals[0] = kernel_tex_fetch(__tri_vnormal, tri_vindex.x);
normals[1] = kernel_tex_fetch(__tri_vnormal, tri_vindex.y);
normals[2] = kernel_tex_fetch(__tri_vnormal, tri_vindex.z);
}
else {
/* center step is not stored in this array */
@ -81,9 +81,9 @@ ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals kg,
offset += step * numverts;
normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
normals[0] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x);
normals[1] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y);
normals[2] = kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z);
}
}

View File

@ -380,7 +380,7 @@ ccl_device float3 patch_eval_float3(KernelGlobals kg,
*dv = make_float3(0.0f, 0.0f, 0.0f);
for (int i = 0; i < num_control; i++) {
float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i]));
float3 v = kernel_tex_fetch(__attributes_float3, offset + indices[i]);
val += v * weights[i];
if (du)
@ -417,7 +417,7 @@ ccl_device float4 patch_eval_float4(KernelGlobals kg,
*dv = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int i = 0; i < num_control; i++) {
float4 v = kernel_tex_fetch(__attributes_float3, offset + indices[i]);
float4 v = kernel_tex_fetch(__attributes_float4, offset + indices[i]);
val += v * weights[i];
if (du)

View File

@ -284,18 +284,33 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals kg,
int numverts, numkeys;
object_motion_info(kg, sd->object, NULL, &numverts, &numkeys);
/* lookup attributes */
motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE) ? numverts : numkeys;
motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
#ifdef __HAIR__
if (is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
object_position_transform(kg, sd, &motion_pre);
object_position_transform(kg, sd, &motion_post);
if (is_curve_primitive) {
motion_pre = float4_to_float3(curve_attribute_float4(kg, sd, desc, NULL, NULL));
desc.offset += numkeys;
motion_post = float4_to_float3(curve_attribute_float4(kg, sd, desc, NULL, NULL));
/* Curve */
if ((sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
object_position_transform(kg, sd, &motion_pre);
object_position_transform(kg, sd, &motion_post);
}
}
else
#endif
if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
/* Triangle */
if (subd_triangle_patch(kg, sd) == ~0) {
motion_pre = triangle_attribute_float3(kg, sd, desc, NULL, NULL);
desc.offset += numverts;
motion_post = triangle_attribute_float3(kg, sd, desc, NULL, NULL);
}
else {
motion_pre = subd_triangle_attribute_float3(kg, sd, desc, NULL, NULL);
desc.offset += numverts;
motion_post = subd_triangle_attribute_float3(kg, sd, desc, NULL, NULL);
}
}
}
/* object motion. note that depending on the mesh having motion vectors, this

View File

@ -443,8 +443,8 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
if (dy)
*dy = make_float3(0.0f, 0.0f, 0.0f);
return float4_to_float3(
kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch)));
return kernel_tex_fetch(__attributes_float3,
desc.offset + subd_triangle_patch_face(kg, patch));
}
else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
float2 uv[3];
@ -452,10 +452,10 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
uint4 v = subd_triangle_patch_indices(kg, patch);
float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x));
float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y));
float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z));
float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w));
float3 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + v.x);
float3 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + v.y);
float3 f2 = kernel_tex_fetch(__attributes_float3, desc.offset + v.z);
float3 f3 = kernel_tex_fetch(__attributes_float3, desc.offset + v.w);
if (subd_triangle_patch_num_corners(kg, patch) != 4) {
f1 = (f1 + f0) * 0.5f;
@ -484,10 +484,10 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
float3 f0, f1, f2, f3;
f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset));
f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset));
f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset));
f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset));
f0 = kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset);
f1 = kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset);
f2 = kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset);
f3 = kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset);
if (subd_triangle_patch_num_corners(kg, patch) != 4) {
f1 = (f1 + f0) * 0.5f;
@ -513,7 +513,7 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
if (dy)
*dy = make_float3(0.0f, 0.0f, 0.0f);
return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset));
return kernel_tex_fetch(__attributes_float3, desc.offset);
}
else {
if (dx)
@ -590,7 +590,7 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
if (dy)
*dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
return kernel_tex_fetch(__attributes_float3,
return kernel_tex_fetch(__attributes_float4,
desc.offset + subd_triangle_patch_face(kg, patch));
}
else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
@ -599,10 +599,10 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
uint4 v = subd_triangle_patch_indices(kg, patch);
float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + v.x);
float4 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + v.y);
float4 f2 = kernel_tex_fetch(__attributes_float3, desc.offset + v.z);
float4 f3 = kernel_tex_fetch(__attributes_float3, desc.offset + v.w);
float4 f0 = kernel_tex_fetch(__attributes_float4, desc.offset + v.x);
float4 f1 = kernel_tex_fetch(__attributes_float4, desc.offset + v.y);
float4 f2 = kernel_tex_fetch(__attributes_float4, desc.offset + v.z);
float4 f3 = kernel_tex_fetch(__attributes_float4, desc.offset + v.w);
if (subd_triangle_patch_num_corners(kg, patch) != 4) {
f1 = (f1 + f0) * 0.5f;
@ -642,10 +642,10 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset)));
}
else {
f0 = kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset);
f1 = kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset);
f2 = kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset);
f3 = kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset);
f0 = kernel_tex_fetch(__attributes_float4, corners[0] + desc.offset);
f1 = kernel_tex_fetch(__attributes_float4, corners[1] + desc.offset);
f2 = kernel_tex_fetch(__attributes_float4, corners[2] + desc.offset);
f3 = kernel_tex_fetch(__attributes_float4, corners[3] + desc.offset);
}
if (subd_triangle_patch_num_corners(kg, patch) != 4) {
@ -672,7 +672,7 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
if (dy)
*dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
return kernel_tex_fetch(__attributes_float3, desc.offset);
return kernel_tex_fetch(__attributes_float4, desc.offset);
}
else {
if (dx)

View File

@ -29,9 +29,9 @@ ccl_device_inline float3 triangle_normal(KernelGlobals kg, ccl_private ShaderDat
{
/* load triangle vertices */
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
const float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
const float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
const float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
const float3 v0 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0);
const float3 v1 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1);
const float3 v2 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2);
/* return normal */
if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
@ -54,9 +54,9 @@ ccl_device_inline void triangle_point_normal(KernelGlobals kg,
{
/* load triangle vertices */
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
float3 v0 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0);
float3 v1 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1);
float3 v2 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2);
/* compute point */
float t = 1.0f - u - v;
*P = (u * v0 + v * v1 + t * v2);
@ -78,9 +78,9 @@ ccl_device_inline void triangle_point_normal(KernelGlobals kg,
ccl_device_inline void triangle_vertices(KernelGlobals kg, int prim, float3 P[3])
{
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
P[0] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0);
P[1] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1);
P[2] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2);
}
/* Triangle vertex locations and vertex normals */
@ -91,12 +91,12 @@ ccl_device_inline void triangle_vertices_and_normals(KernelGlobals kg,
float3 N[3])
{
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
N[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
N[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
N[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
P[0] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0);
P[1] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1);
P[2] = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2);
N[0] = kernel_tex_fetch(__tri_vnormal, tri_vindex.x);
N[1] = kernel_tex_fetch(__tri_vnormal, tri_vindex.y);
N[2] = kernel_tex_fetch(__tri_vnormal, tri_vindex.z);
}
/* Interpolate smooth vertex normal from vertices */
@ -106,9 +106,9 @@ triangle_smooth_normal(KernelGlobals kg, float3 Ng, int prim, float u, float v)
{
/* load triangle vertices */
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
float3 n0 = kernel_tex_fetch(__tri_vnormal, tri_vindex.x);
float3 n1 = kernel_tex_fetch(__tri_vnormal, tri_vindex.y);
float3 n2 = kernel_tex_fetch(__tri_vnormal, tri_vindex.z);
float3 N = safe_normalize((1.0f - u - v) * n2 + u * n0 + v * n1);
@ -120,9 +120,9 @@ ccl_device_inline float3 triangle_smooth_normal_unnormalized(
{
/* load triangle vertices */
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
float3 n0 = kernel_tex_fetch(__tri_vnormal, tri_vindex.x);
float3 n1 = kernel_tex_fetch(__tri_vnormal, tri_vindex.y);
float3 n2 = kernel_tex_fetch(__tri_vnormal, tri_vindex.z);
/* ensure that the normals are in object space */
if (sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED) {
@ -145,9 +145,9 @@ ccl_device_inline void triangle_dPdudv(KernelGlobals kg,
{
/* fetch triangle vertex coordinates */
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
const float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
const float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
const float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
const float3 p0 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 0);
const float3 p1 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 1);
const float3 p2 = kernel_tex_fetch(__tri_verts, tri_vindex.w + 2);
/* compute derivatives of P w.r.t. uv */
*dPdu = (p0 - p2);
@ -267,15 +267,15 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals kg,
if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) {
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x));
f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y));
f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z));
f0 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x);
f1 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y);
f2 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z);
}
else {
const int tri = desc.offset + sd->prim * 3;
f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
f0 = kernel_tex_fetch(__attributes_float3, tri + 0);
f1 = kernel_tex_fetch(__attributes_float3, tri + 1);
f2 = kernel_tex_fetch(__attributes_float3, tri + 2);
}
#ifdef __RAY_DIFFERENTIALS__
@ -298,7 +298,7 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals kg,
if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim :
desc.offset;
return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset));
return kernel_tex_fetch(__attributes_float3, offset);
}
else {
return make_float3(0.0f, 0.0f, 0.0f);
@ -318,16 +318,16 @@ ccl_device float4 triangle_attribute_float4(KernelGlobals kg,
if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) {
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
f0 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x);
f1 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y);
f2 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z);
f0 = kernel_tex_fetch(__attributes_float4, desc.offset + tri_vindex.x);
f1 = kernel_tex_fetch(__attributes_float4, desc.offset + tri_vindex.y);
f2 = kernel_tex_fetch(__attributes_float4, desc.offset + tri_vindex.z);
}
else {
const int tri = desc.offset + sd->prim * 3;
if (desc.element == ATTR_ELEMENT_CORNER) {
f0 = kernel_tex_fetch(__attributes_float3, tri + 0);
f1 = kernel_tex_fetch(__attributes_float3, tri + 1);
f2 = kernel_tex_fetch(__attributes_float3, tri + 2);
f0 = kernel_tex_fetch(__attributes_float4, tri + 0);
f1 = kernel_tex_fetch(__attributes_float4, tri + 1);
f2 = kernel_tex_fetch(__attributes_float4, tri + 2);
}
else {
f0 = color_srgb_to_linear_v4(
@ -359,7 +359,7 @@ ccl_device float4 triangle_attribute_float4(KernelGlobals kg,
if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim :
desc.offset;
return kernel_tex_fetch(__attributes_float3, offset);
return kernel_tex_fetch(__attributes_float4, offset);
}
else {
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);

View File

@ -40,7 +40,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg,
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex];
#else
const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
#endif
@ -51,9 +51,9 @@ ccl_device_inline bool triangle_intersect(KernelGlobals kg,
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
ssef_verts,
#else
float4_to_float3(tri_a),
float4_to_float3(tri_b),
float4_to_float3(tri_c),
tri_a,
tri_b,
tri_c,
#endif
&u,
&v,
@ -109,9 +109,9 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex];
# else
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)),
tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)),
tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
# endif
float t, u, v;
if (!ray_triangle_intersect(P,
@ -179,9 +179,9 @@ ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
/* Record geometric normal. */
# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)),
tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)),
tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
const float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
# endif
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
@ -223,9 +223,9 @@ ccl_device_inline float3 triangle_refine(KernelGlobals kg,
P = P + D * t;
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w;
const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
const packed_float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
@ -280,9 +280,9 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals kg,
# ifdef __INTERSECTION_REFINE__
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w;
const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
const packed_float3 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);

View File

@ -75,7 +75,7 @@ ccl_device float4 volume_attribute_float4(KernelGlobals kg,
const AttributeDescriptor desc)
{
if (desc.element & (ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
return kernel_tex_fetch(__attributes_float3, desc.offset);
return kernel_tex_fetch(__attributes_float4, desc.offset);
}
else if (desc.element == ATTR_ELEMENT_VOXEL) {
/* todo: optimize this so we don't have to transform both here and in

View File

@ -40,11 +40,11 @@ KERNEL_TEX(DecomposedTransform, __camera_motion)
/* triangles */
KERNEL_TEX(uint, __tri_shader)
KERNEL_TEX(float4, __tri_vnormal)
KERNEL_TEX(packed_float3, __tri_vnormal)
KERNEL_TEX(uint4, __tri_vindex)
KERNEL_TEX(uint, __tri_patch)
KERNEL_TEX(float2, __tri_patch_uv)
KERNEL_TEX(float4, __tri_verts)
KERNEL_TEX(packed_float3, __tri_verts)
/* curves */
KERNEL_TEX(KernelCurve, __curves)
@ -58,7 +58,8 @@ KERNEL_TEX(uint, __patches)
KERNEL_TEX(uint4, __attributes_map)
KERNEL_TEX(float, __attributes_float)
KERNEL_TEX(float2, __attributes_float2)
KERNEL_TEX(float4, __attributes_float3)
KERNEL_TEX(packed_float3, __attributes_float3)
KERNEL_TEX(float4, __attributes_float4)
KERNEL_TEX(uchar4, __attributes_uchar4)
/* lights */

View File

@ -404,6 +404,10 @@ AttrKernelDataType Attribute::kernel_type(const Attribute &attr)
return AttrKernelDataType::FLOAT2;
}
if (attr.type == TypeFloat4 || attr.type == TypeRGBA || attr.type == TypeDesc::TypeMatrix) {
return AttrKernelDataType::FLOAT4;
}
return AttrKernelDataType::FLOAT3;
}
@ -585,7 +589,7 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE);
break;
case ATTR_STD_MOTION_VERTEX_POSITION:
attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE_KEY_MOTION);
attr = add(name, TypeDesc::TypeFloat4, ATTR_ELEMENT_CURVE_KEY_MOTION);
break;
case ATTR_STD_CURVE_INTERCEPT:
attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_CURVE_KEY);

View File

@ -47,12 +47,7 @@ struct Transform;
*
* The values of this enumeration are also used as flags to detect changes in AttributeSet. */
enum AttrKernelDataType {
FLOAT = 0,
FLOAT2 = 1,
FLOAT3 = 2,
UCHAR4 = 3,
};
enum AttrKernelDataType { FLOAT = 0, FLOAT2 = 1, FLOAT3 = 2, FLOAT4 = 3, UCHAR4 = 4, NUM = 5 };
/* Attribute
*

View File

@ -551,6 +551,7 @@ static void update_attribute_element_size(Geometry *geom,
size_t *attr_float_size,
size_t *attr_float2_size,
size_t *attr_float3_size,
size_t *attr_float4_size,
size_t *attr_uchar4_size)
{
if (mattr) {
@ -569,7 +570,10 @@ static void update_attribute_element_size(Geometry *geom,
*attr_float2_size += size;
}
else if (mattr->type == TypeDesc::TypeMatrix) {
*attr_float3_size += size * 4;
*attr_float4_size += size * 4;
}
else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) {
*attr_float4_size += size;
}
else {
*attr_float3_size += size;
@ -582,8 +586,10 @@ void GeometryManager::update_attribute_element_offset(Geometry *geom,
size_t &attr_float_offset,
device_vector<float2> &attr_float2,
size_t &attr_float2_offset,
device_vector<float4> &attr_float3,
device_vector<packed_float3> &attr_float3,
size_t &attr_float3_offset,
device_vector<float4> &attr_float4,
size_t &attr_float4_offset,
device_vector<uchar4> &attr_uchar4,
size_t &attr_uchar4_offset,
Attribute *mattr,
@ -646,18 +652,30 @@ void GeometryManager::update_attribute_element_offset(Geometry *geom,
}
else if (mattr->type == TypeDesc::TypeMatrix) {
Transform *tfm = mattr->data_transform();
offset = attr_float3_offset;
offset = attr_float4_offset;
assert(attr_float3.size() >= offset + size * 3);
assert(attr_float4.size() >= offset + size * 3);
if (mattr->modified) {
for (size_t k = 0; k < size * 3; k++) {
attr_float3[offset + k] = (&tfm->x)[k];
attr_float4[offset + k] = (&tfm->x)[k];
}
}
attr_float3_offset += size * 3;
attr_float4_offset += size * 3;
}
else if (mattr->type == TypeFloat4 || mattr->type == TypeRGBA) {
float4 *data = mattr->data_float4();
offset = attr_float4_offset;
assert(attr_float4.size() >= offset + size);
if (mattr->modified) {
for (size_t k = 0; k < size; k++) {
attr_float4[offset + k] = data[k];
}
}
attr_float4_offset += size;
}
else {
float4 *data = mattr->data_float4();
float3 *data = mattr->data_float3();
offset = attr_float3_offset;
assert(attr_float3.size() >= offset + size);
@ -783,6 +801,7 @@ void GeometryManager::device_update_attributes(Device *device,
size_t attr_float_size = 0;
size_t attr_float2_size = 0;
size_t attr_float3_size = 0;
size_t attr_float4_size = 0;
size_t attr_uchar4_size = 0;
for (size_t i = 0; i < scene->geometry.size(); i++) {
@ -797,6 +816,7 @@ void GeometryManager::device_update_attributes(Device *device,
&attr_float_size,
&attr_float2_size,
&attr_float3_size,
&attr_float4_size,
&attr_uchar4_size);
if (geom->is_mesh()) {
@ -809,6 +829,7 @@ void GeometryManager::device_update_attributes(Device *device,
&attr_float_size,
&attr_float2_size,
&attr_float3_size,
&attr_float4_size,
&attr_uchar4_size);
}
}
@ -824,6 +845,7 @@ void GeometryManager::device_update_attributes(Device *device,
&attr_float_size,
&attr_float2_size,
&attr_float3_size,
&attr_float4_size,
&attr_uchar4_size);
}
}
@ -831,19 +853,22 @@ void GeometryManager::device_update_attributes(Device *device,
dscene->attributes_float.alloc(attr_float_size);
dscene->attributes_float2.alloc(attr_float2_size);
dscene->attributes_float3.alloc(attr_float3_size);
dscene->attributes_float4.alloc(attr_float4_size);
dscene->attributes_uchar4.alloc(attr_uchar4_size);
/* The order of those flags needs to match that of AttrKernelDataType. */
const bool attributes_need_realloc[4] = {
const bool attributes_need_realloc[AttrKernelDataType::NUM] = {
dscene->attributes_float.need_realloc(),
dscene->attributes_float2.need_realloc(),
dscene->attributes_float3.need_realloc(),
dscene->attributes_float4.need_realloc(),
dscene->attributes_uchar4.need_realloc(),
};
size_t attr_float_offset = 0;
size_t attr_float2_offset = 0;
size_t attr_float3_offset = 0;
size_t attr_float4_offset = 0;
size_t attr_uchar4_offset = 0;
/* Fill in attributes. */
@ -868,6 +893,8 @@ void GeometryManager::device_update_attributes(Device *device,
attr_float2_offset,
dscene->attributes_float3,
attr_float3_offset,
dscene->attributes_float4,
attr_float4_offset,
dscene->attributes_uchar4,
attr_uchar4_offset,
attr,
@ -891,6 +918,8 @@ void GeometryManager::device_update_attributes(Device *device,
attr_float2_offset,
dscene->attributes_float3,
attr_float3_offset,
dscene->attributes_float4,
attr_float4_offset,
dscene->attributes_uchar4,
attr_uchar4_offset,
subd_attr,
@ -923,6 +952,8 @@ void GeometryManager::device_update_attributes(Device *device,
attr_float2_offset,
dscene->attributes_float3,
attr_float3_offset,
dscene->attributes_float4,
attr_float4_offset,
dscene->attributes_uchar4,
attr_uchar4_offset,
attr,
@ -954,6 +985,7 @@ void GeometryManager::device_update_attributes(Device *device,
dscene->attributes_float.copy_to_device_if_modified();
dscene->attributes_float2.copy_to_device_if_modified();
dscene->attributes_float3.copy_to_device_if_modified();
dscene->attributes_float4.copy_to_device_if_modified();
dscene->attributes_uchar4.copy_to_device_if_modified();
if (progress.get_cancel())
@ -1080,9 +1112,9 @@ void GeometryManager::device_update_mesh(Device *,
/* normals */
progress.set_status("Updating Mesh", "Computing normals");
float4 *tri_verts = dscene->tri_verts.alloc(tri_size * 3);
packed_float3 *tri_verts = dscene->tri_verts.alloc(tri_size * 3);
uint *tri_shader = dscene->tri_shader.alloc(tri_size);
float4 *vnormal = dscene->tri_vnormal.alloc(vert_size);
packed_float3 *vnormal = dscene->tri_vnormal.alloc(vert_size);
uint4 *tri_vindex = dscene->tri_vindex.alloc(tri_size);
uint *tri_patch = dscene->tri_patch.alloc(tri_size);
float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size);
@ -1293,18 +1325,21 @@ enum {
ATTR_FLOAT_MODIFIED = (1 << 2),
ATTR_FLOAT2_MODIFIED = (1 << 3),
ATTR_FLOAT3_MODIFIED = (1 << 4),
ATTR_UCHAR4_MODIFIED = (1 << 5),
ATTR_FLOAT4_MODIFIED = (1 << 5),
ATTR_UCHAR4_MODIFIED = (1 << 6),
CURVE_DATA_NEED_REALLOC = (1 << 6),
MESH_DATA_NEED_REALLOC = (1 << 7),
CURVE_DATA_NEED_REALLOC = (1 << 7),
MESH_DATA_NEED_REALLOC = (1 << 8),
ATTR_FLOAT_NEEDS_REALLOC = (1 << 8),
ATTR_FLOAT2_NEEDS_REALLOC = (1 << 9),
ATTR_FLOAT3_NEEDS_REALLOC = (1 << 10),
ATTR_UCHAR4_NEEDS_REALLOC = (1 << 11),
ATTR_FLOAT_NEEDS_REALLOC = (1 << 9),
ATTR_FLOAT2_NEEDS_REALLOC = (1 << 10),
ATTR_FLOAT3_NEEDS_REALLOC = (1 << 11),
ATTR_FLOAT4_NEEDS_REALLOC = (1 << 12),
ATTR_UCHAR4_NEEDS_REALLOC = (1 << 13),
ATTRS_NEED_REALLOC = (ATTR_FLOAT_NEEDS_REALLOC | ATTR_FLOAT2_NEEDS_REALLOC |
ATTR_FLOAT3_NEEDS_REALLOC | ATTR_UCHAR4_NEEDS_REALLOC),
ATTR_FLOAT3_NEEDS_REALLOC | ATTR_FLOAT4_NEEDS_REALLOC |
ATTR_UCHAR4_NEEDS_REALLOC),
DEVICE_MESH_DATA_NEEDS_REALLOC = (MESH_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC),
DEVICE_CURVE_DATA_NEEDS_REALLOC = (CURVE_DATA_NEED_REALLOC | ATTRS_NEED_REALLOC),
};
@ -1332,10 +1367,17 @@ static void update_device_flags_attribute(uint32_t &device_update_flags,
device_update_flags |= ATTR_FLOAT3_MODIFIED;
break;
}
case AttrKernelDataType::FLOAT4: {
device_update_flags |= ATTR_FLOAT4_MODIFIED;
break;
}
case AttrKernelDataType::UCHAR4: {
device_update_flags |= ATTR_UCHAR4_MODIFIED;
break;
}
case AttrKernelDataType::NUM: {
break;
}
}
}
}
@ -1352,6 +1394,9 @@ static void update_attribute_realloc_flags(uint32_t &device_update_flags,
if (attributes.modified(AttrKernelDataType::FLOAT3)) {
device_update_flags |= ATTR_FLOAT3_NEEDS_REALLOC;
}
if (attributes.modified(AttrKernelDataType::FLOAT4)) {
device_update_flags |= ATTR_FLOAT4_NEEDS_REALLOC;
}
if (attributes.modified(AttrKernelDataType::UCHAR4)) {
device_update_flags |= ATTR_UCHAR4_NEEDS_REALLOC;
}
@ -1553,6 +1598,14 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro
dscene->attributes_float3.tag_modified();
}
if (device_update_flags & ATTR_FLOAT4_NEEDS_REALLOC) {
dscene->attributes_map.tag_realloc();
dscene->attributes_float4.tag_realloc();
}
else if (device_update_flags & ATTR_FLOAT4_MODIFIED) {
dscene->attributes_float4.tag_modified();
}
if (device_update_flags & ATTR_UCHAR4_NEEDS_REALLOC) {
dscene->attributes_map.tag_realloc();
dscene->attributes_uchar4.tag_realloc();
@ -2014,6 +2067,7 @@ void GeometryManager::device_update(Device *device,
dscene->attributes_float.clear_modified();
dscene->attributes_float2.clear_modified();
dscene->attributes_float3.clear_modified();
dscene->attributes_float4.clear_modified();
dscene->attributes_uchar4.clear_modified();
}
@ -2041,6 +2095,7 @@ void GeometryManager::device_free(Device *device, DeviceScene *dscene, bool forc
dscene->attributes_float.free_if_need_realloc(force_free);
dscene->attributes_float2.free_if_need_realloc(force_free);
dscene->attributes_float3.free_if_need_realloc(force_free);
dscene->attributes_float4.free_if_need_realloc(force_free);
dscene->attributes_uchar4.free_if_need_realloc(force_free);
/* Signal for shaders like displacement not to do ray tracing. */

View File

@ -257,8 +257,10 @@ class GeometryManager {
size_t &attr_float_offset,
device_vector<float2> &attr_float2,
size_t &attr_float2_offset,
device_vector<float4> &attr_float3,
device_vector<packed_float3> &attr_float3,
size_t &attr_float3_offset,
device_vector<float4> &attr_float4,
size_t &attr_float4_offset,
device_vector<uchar4> &attr_uchar4,
size_t &attr_uchar4_offset,
Attribute *mattr,

View File

@ -707,7 +707,7 @@ void Mesh::pack_shaders(Scene *scene, uint *tri_shader)
}
}
void Mesh::pack_normals(float4 *vnormal)
void Mesh::pack_normals(packed_float3 *vnormal)
{
Attribute *attr_vN = attributes.find(ATTR_STD_VERTEX_NORMAL);
if (attr_vN == NULL) {
@ -727,11 +727,14 @@ void Mesh::pack_normals(float4 *vnormal)
if (do_transform)
vNi = safe_normalize(transform_direction(&ntfm, vNi));
vnormal[i] = make_float4(vNi.x, vNi.y, vNi.z, 0.0f);
vnormal[i] = make_float3(vNi.x, vNi.y, vNi.z);
}
}
void Mesh::pack_verts(float4 *tri_verts, uint4 *tri_vindex, uint *tri_patch, float2 *tri_patch_uv)
void Mesh::pack_verts(packed_float3 *tri_verts,
uint4 *tri_vindex,
uint *tri_patch,
float2 *tri_patch_uv)
{
size_t verts_size = verts.size();
@ -752,9 +755,9 @@ void Mesh::pack_verts(float4 *tri_verts, uint4 *tri_vindex, uint *tri_patch, flo
tri_patch[i] = (!get_num_subd_faces()) ? -1 : (triangle_patch[i] * 8 + patch_offset);
tri_verts[i * 3] = float3_to_float4(verts[t.v[0]]);
tri_verts[i * 3 + 1] = float3_to_float4(verts[t.v[1]]);
tri_verts[i * 3 + 2] = float3_to_float4(verts[t.v[2]]);
tri_verts[i * 3] = verts[t.v[0]];
tri_verts[i * 3 + 1] = verts[t.v[1]];
tri_verts[i * 3 + 2] = verts[t.v[2]];
}
}

View File

@ -223,8 +223,11 @@ class Mesh : public Geometry {
void get_uv_tiles(ustring map, unordered_set<int> &tiles) override;
void pack_shaders(Scene *scene, uint *shader);
void pack_normals(float4 *vnormal);
void pack_verts(float4 *tri_verts, uint4 *tri_vindex, uint *tri_patch, float2 *tri_patch_uv);
void pack_normals(packed_float3 *vnormal);
void pack_verts(packed_float3 *tri_verts,
uint4 *tri_vindex,
uint *tri_patch,
float2 *tri_patch_uv);
void pack_patches(uint *patch_data);
PrimitiveType primitive_type() const override;

View File

@ -74,6 +74,7 @@ DeviceScene::DeviceScene(Device *device)
attributes_float(device, "__attributes_float", MEM_GLOBAL),
attributes_float2(device, "__attributes_float2", MEM_GLOBAL),
attributes_float3(device, "__attributes_float3", MEM_GLOBAL),
attributes_float4(device, "__attributes_float4", MEM_GLOBAL),
attributes_uchar4(device, "__attributes_uchar4", MEM_GLOBAL),
light_distribution(device, "__light_distribution", MEM_GLOBAL),
lights(device, "__lights", MEM_GLOBAL),

View File

@ -81,9 +81,9 @@ class DeviceScene {
device_vector<float2> prim_time;
/* mesh */
device_vector<float4> tri_verts;
device_vector<packed_float3> tri_verts;
device_vector<uint> tri_shader;
device_vector<float4> tri_vnormal;
device_vector<packed_float3> tri_vnormal;
device_vector<uint4> tri_vindex;
device_vector<uint> tri_patch;
device_vector<float2> tri_patch_uv;
@ -108,7 +108,8 @@ class DeviceScene {
device_vector<uint4> attributes_map;
device_vector<float> attributes_float;
device_vector<float2> attributes_float2;
device_vector<float4> attributes_float3;
device_vector<packed_float3> attributes_float3;
device_vector<float4> attributes_float4;
device_vector<uchar4> attributes_uchar4;
/* lights */