Still a mess but performance and visual quality improved

This commit is contained in:
Matthew Gordon 2025-01-10 16:05:21 -04:00
parent 103cd1bdc2
commit e1aab75ab8
2 changed files with 62 additions and 91 deletions

View File

@ -33,6 +33,11 @@ fn vs_main(
return result; return result;
} }
const RGSS_WIGGLE = array(vec2(0.125, 0.375),
vec2(-0.125, -0.375),
vec2(0.375, 0.125),
vec2(-0.375, -0.125));
@fragment @fragment
fn fs_solid(vertex: VertexOutput) -> @location(0) vec4<f32> { fn fs_solid(vertex: VertexOutput) -> @location(0) vec4<f32> {
var ray: Ray; var ray: Ray;
@ -41,74 +46,60 @@ fn fs_solid(vertex: VertexOutput) -> @location(0) vec4<f32> {
// Spread rays into RGSS antialiasing pattern. // Spread rays into RGSS antialiasing pattern.
let ray_dx = dpdy(ray.direction); let ray_dx = dpdy(ray.direction);
let ray_dy = dpdy(ray.direction); let ray_dy = dpdy(ray.direction);
var ray_directions: array<vec3<f32>,4>; //var ray_directions: array<vec3<f32>,4>;
ray_directions[0] = ray.direction + 0.125*ray_dx + 0.375*ray_dy; //ray_directions[0] = ray.direction + 0.125*ray_dx + 0.375*ray_dy;
ray_directions[1] = ray.direction - 0.125*ray_dx - 0.375*ray_dy; //ray_directions[1] = ray.direction - 0.125*ray_dx - 0.375*ray_dy;
ray_directions[2] = ray.direction + 0.375*ray_dx + 0.125*ray_dy; //ray_directions[2] = ray.direction + 0.375*ray_dx + 0.125*ray_dy;
ray_directions[3] = ray.direction - 0.375*ray_dx - 0.125*ray_dy; //ray_directions[3] = ray.direction - 0.375*ray_dx - 0.125*ray_dy;
// Possibly these ray directions could be stored in a matrix and we could // Possibly these ray directions could be stored in a matrix and we could
// evaluate them all at once instead of looping. // evaluate them all at once instead of looping.
let sun_direction = vec3<f32>(0.761904762, 0.380952381, 0.19047619);
var color_accumulator = vec4<f32>(0); var color_accumulator = vec4<f32>(0);
for(var i=0; i<1; i++) { let rgss_value = vec2(0.125, 0.375);
//ray.direction = ray_directions[i]; var wiggled_ray = ray;
for(var i=0; i<4; i++) {
let rgss_wiggle = select(rgss_value, rgss_value.yx, vec2(bool(i&2))) * select(1.0, -1.0, bool(i&1));
wiggled_ray.direction = ray.direction + rgss_wiggle.x * ray_dx + rgss_wiggle.y * ray_dy;
var root_node: BoundingNode; var root_node: BoundingNode;
root_node.index = vec2<u32>(0); root_node.index = vec2<u32>(0);
root_node.level = textureNumLevels(dembvh_texture) - 1; root_node.level = textureNumLevels(dembvh_texture) - 1;
var hit_index = vec2<u32>(0);
var hit_location: vec3<f32>; var hit_location: vec3<f32>;
var hit_normal: vec3<f32>; var hit_normal: vec3<f32>;
if intersect_ray_with_node(dem_texture, if intersect_ray_with_node(wiggled_ray, root_node, &hit_location, &hit_normal) {
dembvh_texture,
uniforms.dem_min_corner,
uniforms.dem_cell_size,
uniforms.dem_z_range,
ray,
root_node,
&hit_index,
&hit_location,
&hit_normal) {
let sun_direction_samples = array<vec3<f32>,7>(vec3<f32>(1.0, 0.5, 0.25),
vec3<f32>(1.0, 0.33, 0.25),
vec3<f32>(1.1, 0.3, 0.25),
vec3<f32>(1.1, 0.33, 0.25),
vec3<f32>(1.0, 0.27, 0.25),
vec3<f32>(0.9, 0.3, 0.25),
vec3<f32>(0.9, 0.27, 0.25));
var shadow_value = 0.0f; var shadow_value = 0.0f;
var shadow_ray :Ray; var shadow_ray :Ray;
shadow_ray.origin = hit_location + hit_normal * 0.1; shadow_ray.origin = hit_location + hit_normal * 0.1;
for(var i=0; i<1; i++) { for(var j=0; i<8; i++) {
let shadow_sample = 0.02 * (vec3(1.0, 0.0, 0.0) * f32(bool((i*4+j)&1))
+ vec3(0.0, 1.0, 0.0) * f32(bool((i*4+j)&2))
+ vec3(-1.0, 0.0, 0.0) * f32(bool((i*4+j)&4))
+ vec3(0.0, -1.0, 0.0) * f32(bool((i*4+j)&8))
+ vec3(0.0, 0.0, 1.0) * f32(bool((i*4+j)&16)));
// Calculate light // Calculate light
let sun_direction = let sun_direction =
(uniforms.camera_to_world_matrix (uniforms.camera_to_world_matrix
* vec4<f32>(normalize(sun_direction_samples[i]), 0.0)).xyz; * vec4<f32>(sun_direction + shadow_sample, 0.0)).xyz;
shadow_ray.direction = sun_direction; shadow_ray.direction = sun_direction;
var dummy0: vec2<u32>; var dummy0: vec3<f32>;
var dummy1: vec3<f32>; var dummy1: vec3<f32>;
var dummy2: vec3<f32>;
shadow_value += shadow_value +=
select(1.0, 0.0, intersect_ray_with_node(dem_texture, select(0.25, 0.0, intersect_ray_with_node(shadow_ray,
dembvh_texture,
uniforms.dem_min_corner,
uniforms.dem_cell_size,
uniforms.dem_z_range,
shadow_ray,
root_node, root_node,
&dummy0, &dummy0,
&dummy1, &dummy1));
&dummy2));
} }
let sun_direction = let sun_direction =
(uniforms.camera_to_world_matrix (uniforms.camera_to_world_matrix
* vec4<f32>(normalize(sun_direction_samples[0]), 0.0)).xyz; * vec4<f32>(sun_direction, 0.0)).xyz;
let lambertian_value = dot(hit_normal, sun_direction); let lambertian_value = dot(hit_normal, sun_direction);
let ambient_strength = 0.25; let ambient_strength = 0.25;
let l = ambient_strength + (1.0 - ambient_strength) * lambertian_value * shadow_value; let l = ambient_strength + (1.0 - ambient_strength) * lambertian_value * shadow_value;
color_accumulator += vec4<f32>(vec3<f32>(l), 1.0); color_accumulator += 0.25 * vec4<f32>(vec3<f32>(l), 1.0);
} }
} }
if color_accumulator.a == 0.0 { if color_accumulator.a == 0.0 {

View File

@ -34,41 +34,39 @@ fn intersect_ray_with_aabb_optimized(ray_origin: vec3<f32>, inv_ray_direction: v
return select(-1.0, max(t_min, 0.0f), t_min <= t_max && t_max > 0.0); return select(-1.0, max(t_min, 0.0f), t_min <= t_max && t_max > 0.0);
} }
fn get_xy_min_for_node(dem_min_corner: vec2<f32>, fn get_xy_min_for_node(node: BoundingNode) -> vec2<f32> {
dem_cell_size: vec2<f32>, return uniforms.dem_min_corner.xy + uniforms.dem_cell_size * vec2<f32>(node.index) * exp2(f32(node.level));
node: BoundingNode) -> vec2<f32> {
return dem_min_corner.xy + dem_cell_size * vec2<f32>(node.index) * exp2(f32(node.level));
} }
fn get_xy_max_for_node(dem_min_corner: vec2<f32>, fn get_xy_max_for_node(node: BoundingNode) -> vec2<f32> {
dem_cell_size: vec2<f32>, return uniforms.dem_min_corner.xy + uniforms.dem_cell_size * vec2<f32>(node.index + 1) * exp2(f32(node.level));
node: BoundingNode) -> vec2<f32> {
return dem_min_corner.xy + dem_cell_size * vec2<f32>(node.index + 1) * exp2(f32(node.level));
} }
struct NodeStack { struct NodeStack {
stack: array<BoundingNode,64>, stack: array<u32,64>,
count: u32 count: u32
} }
fn push_node_stack(stack: ptr<function,NodeStack>, node: BoundingNode) { fn push_node_stack(stack: ptr<function,NodeStack>, node: BoundingNode) {
(*stack).stack[(*stack).count] = node; var packed_node = (node.index.x & 0x1fff) << 19;
packed_node += (node.index.y & 0x1fff) << 6;
packed_node += node.level & 0x3f;
(*stack).stack[(*stack).count] = packed_node;
(*stack).count++; (*stack).count++;
} }
fn pop_node_stack(stack: ptr<function,NodeStack>) -> BoundingNode { fn pop_node_stack(stack: ptr<function,NodeStack>) -> BoundingNode {
(*stack).count--; (*stack).count--;
return (*stack).stack[(*stack).count]; let packed_node = (*stack).stack[(*stack).count];
var node: BoundingNode;
node.index.x = (packed_node >> 19) & 0x1fff;
node.index.y = (packed_node >> 6) & 0x1fff;
node.level = packed_node & 0x3f;
return node;
} }
fn intersect_ray_with_node(dem_texture: texture_2d<u32>, fn intersect_ray_with_node(ray: Ray,
tree_texture: texture_2d<u32>,
dem_min_corner: vec2<f32>,
dem_cell_size: vec2<f32>,
dem_z_range: vec2<f32>,
ray: Ray,
root_node: BoundingNode, root_node: BoundingNode,
hit_cell: ptr<function, vec2<u32>>,
hit_location: ptr<function, vec3<f32>>, hit_location: ptr<function, vec3<f32>>,
hit_normal: ptr<function, vec3<f32>>) -> bool { hit_normal: ptr<function, vec3<f32>>) -> bool {
let inv_ray_direction = invert_ray_direction(ray.direction); let inv_ray_direction = invert_ray_direction(ray.direction);
@ -78,40 +76,26 @@ fn intersect_ray_with_node(dem_texture: texture_2d<u32>,
var closest_hit_distance = 1.0e+30f; var closest_hit_distance = 1.0e+30f;
while node_stack.count > 0 { while node_stack.count > 0 {
let node = pop_node_stack(&node_stack); let node = pop_node_stack(&node_stack);
let minmax_z = textureLoad(tree_texture, node.index, i32(node.level)).rg; let minmax_z = textureLoad(dembvh_texture, node.index, i32(node.level)).rg;
if minmax_z.r == 0 { if minmax_z.r == 0 {
return false; return false;
} }
let min_z = scale_u16(minmax_z.r, dem_z_range); let min_z = scale_u16(minmax_z.r, uniforms.dem_z_range);
let max_z = scale_u16(minmax_z.g, dem_z_range); let max_z = scale_u16(minmax_z.g, uniforms.dem_z_range);
var aabb: AABB ; var aabb: AABB ;
aabb.min_corner = vec3<f32>(get_xy_min_for_node(dem_min_corner, aabb.min_corner = vec3<f32>(get_xy_min_for_node(node), min_z);
dem_cell_size, aabb.max_corner = vec3<f32>(get_xy_max_for_node(node), max_z);
node),
min_z);
aabb.max_corner = vec3<f32>(get_xy_max_for_node(dem_min_corner,
dem_cell_size,
node),
max_z);
let hit_distance = intersect_ray_with_aabb_optimized(ray.origin, inv_ray_direction, aabb); let hit_distance = intersect_ray_with_aabb_optimized(ray.origin, inv_ray_direction, aabb);
if hit_distance >= 0.0 { if hit_distance >= 0.0 {
if node.level == 0 { if node.level == 0 {
var location: vec3<f32>; var location: vec3<f32>;
var normal: vec3<f32>; var normal: vec3<f32>;
if hit_distance < closest_hit_distance { if hit_distance < closest_hit_distance {
if intersect_ray_with_grid_cell(ray, if intersect_ray_with_grid_cell(ray, node.index, &location, &normal)
node.index,
dem_texture,
dem_min_corner,
dem_cell_size,
dem_z_range,
&location,
&normal)
{ {
// Node bounding boxes are non-overlapping, so we don't need // Node bounding boxes are non-overlapping, so we don't need
// to calculate a more precise hit_distance // to calculate a more precise hit_distance
closest_hit_distance = hit_distance; closest_hit_distance = hit_distance;
*hit_cell = node.index;
*hit_location = location; *hit_location = location;
*hit_normal = normal; *hit_normal = normal;
} }
@ -136,10 +120,6 @@ fn intersect_ray_with_node(dem_texture: texture_2d<u32>,
fn intersect_ray_with_grid_cell(ray: Ray, fn intersect_ray_with_grid_cell(ray: Ray,
cell_index: vec2<u32>, cell_index: vec2<u32>,
dem_texture: texture_2d<u32>,
dem_min_corner: vec2<f32>,
dem_cell_size: vec2<f32>,
dem_z_range: vec2<f32>,
location: ptr<function, vec3<f32>>, location: ptr<function, vec3<f32>>,
normal: ptr<function, vec3<f32>>) -> bool { normal: ptr<function, vec3<f32>>) -> bool {
//Get z-values of cell corners //Get z-values of cell corners
@ -148,16 +128,16 @@ fn intersect_ray_with_grid_cell(ray: Ray,
let v01 = textureLoad(dem_texture, cell_index + vec2<u32>(0,1), 0).r; let v01 = textureLoad(dem_texture, cell_index + vec2<u32>(0,1), 0).r;
let v10 = textureLoad(dem_texture, cell_index + vec2<u32>(1,0), 0).r; let v10 = textureLoad(dem_texture, cell_index + vec2<u32>(1,0), 0).r;
let v11 = textureLoad(dem_texture, cell_index + vec2<u32>(1,1), 0).r; let v11 = textureLoad(dem_texture, cell_index + vec2<u32>(1,1), 0).r;
let z00 = scale_u16(v00, dem_z_range); let z00 = scale_u16(v00, uniforms.dem_z_range);
let z01 = scale_u16(v01, dem_z_range); let z01 = scale_u16(v01, uniforms.dem_z_range);
let z10 = scale_u16(v10, dem_z_range); let z10 = scale_u16(v10, uniforms.dem_z_range);
let z11 = scale_u16(v11, dem_z_range); let z11 = scale_u16(v11, uniforms.dem_z_range);
// Calculate xyz of cell corners // Calculate xyz of cell corners
let xy00 = dem_min_corner.xy + dem_cell_size * vec2<f32>(cell_index); let xy00 = uniforms.dem_min_corner.xy + uniforms.dem_cell_size * vec2<f32>(cell_index);
let p00 = vec3<f32>(xy00, z00); let p00 = vec3<f32>(xy00, z00);
let p01 = vec3<f32>(xy00 + vec2<f32>(0, 1) * dem_cell_size, z01); let p01 = vec3<f32>(xy00 + vec2<f32>(0, 1) * uniforms.dem_cell_size, z01);
let p10 = vec3<f32>(xy00 + vec2<f32>(1, 0) * dem_cell_size, z10); let p10 = vec3<f32>(xy00 + vec2<f32>(1, 0) * uniforms.dem_cell_size, z10);
let p11 = vec3<f32>(xy00 + vec2<f32>(1, 1) * dem_cell_size, z11); let p11 = vec3<f32>(xy00 + vec2<f32>(1, 1) * uniforms.dem_cell_size, z11);
// Intersect ray with the plane of each triangle and then take the // Intersect ray with the plane of each triangle and then take the
// point that's inside it's triangle. // point that's inside it's triangle.