Still a mess but performance and visual quality improved

This commit is contained in:
Matthew Gordon 2025-01-10 16:05:21 -04:00
parent 103cd1bdc2
commit e1aab75ab8
2 changed files with 62 additions and 91 deletions

View File

@ -33,6 +33,11 @@ fn vs_main(
return result;
}
const RGSS_WIGGLE = array(vec2(0.125, 0.375),
vec2(-0.125, -0.375),
vec2(0.375, 0.125),
vec2(-0.375, -0.125));
@fragment
fn fs_solid(vertex: VertexOutput) -> @location(0) vec4<f32> {
var ray: Ray;
@ -41,74 +46,60 @@ fn fs_solid(vertex: VertexOutput) -> @location(0) vec4<f32> {
// Spread rays into RGSS antialiasing pattern.
let ray_dx = dpdy(ray.direction);
let ray_dy = dpdy(ray.direction);
var ray_directions: array<vec3<f32>,4>;
ray_directions[0] = ray.direction + 0.125*ray_dx + 0.375*ray_dy;
ray_directions[1] = ray.direction - 0.125*ray_dx - 0.375*ray_dy;
ray_directions[2] = ray.direction + 0.375*ray_dx + 0.125*ray_dy;
ray_directions[3] = ray.direction - 0.375*ray_dx - 0.125*ray_dy;
//var ray_directions: array<vec3<f32>,4>;
//ray_directions[0] = ray.direction + 0.125*ray_dx + 0.375*ray_dy;
//ray_directions[1] = ray.direction - 0.125*ray_dx - 0.375*ray_dy;
//ray_directions[2] = ray.direction + 0.375*ray_dx + 0.125*ray_dy;
//ray_directions[3] = ray.direction - 0.375*ray_dx - 0.125*ray_dy;
// Possibly these ray directions could be stored in a matrix and we could
// evaluate them all at once instead of looping.
let sun_direction = vec3<f32>(0.761904762, 0.380952381, 0.19047619);
var color_accumulator = vec4<f32>(0);
for(var i=0; i<1; i++) {
//ray.direction = ray_directions[i];
let rgss_value = vec2(0.125, 0.375);
var wiggled_ray = ray;
for(var i=0; i<4; i++) {
let rgss_wiggle = select(rgss_value, rgss_value.yx, vec2(bool(i&2))) * select(1.0, -1.0, bool(i&1));
wiggled_ray.direction = ray.direction + rgss_wiggle.x * ray_dx + rgss_wiggle.y * ray_dy;
var root_node: BoundingNode;
root_node.index = vec2<u32>(0);
root_node.level = textureNumLevels(dembvh_texture) - 1;
var hit_index = vec2<u32>(0);
var hit_location: vec3<f32>;
var hit_normal: vec3<f32>;
if intersect_ray_with_node(dem_texture,
dembvh_texture,
uniforms.dem_min_corner,
uniforms.dem_cell_size,
uniforms.dem_z_range,
ray,
root_node,
&hit_index,
&hit_location,
&hit_normal) {
let sun_direction_samples = array<vec3<f32>,7>(vec3<f32>(1.0, 0.5, 0.25),
vec3<f32>(1.0, 0.33, 0.25),
vec3<f32>(1.1, 0.3, 0.25),
vec3<f32>(1.1, 0.33, 0.25),
vec3<f32>(1.0, 0.27, 0.25),
vec3<f32>(0.9, 0.3, 0.25),
vec3<f32>(0.9, 0.27, 0.25));
if intersect_ray_with_node(wiggled_ray, root_node, &hit_location, &hit_normal) {
var shadow_value = 0.0f;
var shadow_ray :Ray;
shadow_ray.origin = hit_location + hit_normal * 0.1;
for(var i=0; i<1; i++) {
for(var j=0; i<8; i++) {
let shadow_sample = 0.02 * (vec3(1.0, 0.0, 0.0) * f32(bool((i*4+j)&1))
+ vec3(0.0, 1.0, 0.0) * f32(bool((i*4+j)&2))
+ vec3(-1.0, 0.0, 0.0) * f32(bool((i*4+j)&4))
+ vec3(0.0, -1.0, 0.0) * f32(bool((i*4+j)&8))
+ vec3(0.0, 0.0, 1.0) * f32(bool((i*4+j)&16)));
// Calculate light
let sun_direction =
(uniforms.camera_to_world_matrix
* vec4<f32>(normalize(sun_direction_samples[i]), 0.0)).xyz;
* vec4<f32>(sun_direction + shadow_sample, 0.0)).xyz;
shadow_ray.direction = sun_direction;
var dummy0: vec2<u32>;
var dummy0: vec3<f32>;
var dummy1: vec3<f32>;
var dummy2: vec3<f32>;
shadow_value +=
select(1.0, 0.0, intersect_ray_with_node(dem_texture,
dembvh_texture,
uniforms.dem_min_corner,
uniforms.dem_cell_size,
uniforms.dem_z_range,
shadow_ray,
select(0.25, 0.0, intersect_ray_with_node(shadow_ray,
root_node,
&dummy0,
&dummy1,
&dummy2));
&dummy1));
}
let sun_direction =
(uniforms.camera_to_world_matrix
* vec4<f32>(normalize(sun_direction_samples[0]), 0.0)).xyz;
* vec4<f32>(sun_direction, 0.0)).xyz;
let lambertian_value = dot(hit_normal, sun_direction);
let ambient_strength = 0.25;
let l = ambient_strength + (1.0 - ambient_strength) * lambertian_value * shadow_value;
color_accumulator += vec4<f32>(vec3<f32>(l), 1.0);
color_accumulator += 0.25 * vec4<f32>(vec3<f32>(l), 1.0);
}
}
if color_accumulator.a == 0.0 {

View File

@ -34,41 +34,39 @@ fn intersect_ray_with_aabb_optimized(ray_origin: vec3<f32>, inv_ray_direction: v
return select(-1.0, max(t_min, 0.0f), t_min <= t_max && t_max > 0.0);
}
fn get_xy_min_for_node(dem_min_corner: vec2<f32>,
dem_cell_size: vec2<f32>,
node: BoundingNode) -> vec2<f32> {
return dem_min_corner.xy + dem_cell_size * vec2<f32>(node.index) * exp2(f32(node.level));
fn get_xy_min_for_node(node: BoundingNode) -> vec2<f32> {
return uniforms.dem_min_corner.xy + uniforms.dem_cell_size * vec2<f32>(node.index) * exp2(f32(node.level));
}
fn get_xy_max_for_node(dem_min_corner: vec2<f32>,
dem_cell_size: vec2<f32>,
node: BoundingNode) -> vec2<f32> {
return dem_min_corner.xy + dem_cell_size * vec2<f32>(node.index + 1) * exp2(f32(node.level));
fn get_xy_max_for_node(node: BoundingNode) -> vec2<f32> {
return uniforms.dem_min_corner.xy + uniforms.dem_cell_size * vec2<f32>(node.index + 1) * exp2(f32(node.level));
}
struct NodeStack {
stack: array<BoundingNode,64>,
stack: array<u32,64>,
count: u32
}
fn push_node_stack(stack: ptr<function,NodeStack>, node: BoundingNode) {
(*stack).stack[(*stack).count] = node;
var packed_node = (node.index.x & 0x1fff) << 19;
packed_node += (node.index.y & 0x1fff) << 6;
packed_node += node.level & 0x3f;
(*stack).stack[(*stack).count] = packed_node;
(*stack).count++;
}
fn pop_node_stack(stack: ptr<function,NodeStack>) -> BoundingNode {
(*stack).count--;
return (*stack).stack[(*stack).count];
let packed_node = (*stack).stack[(*stack).count];
var node: BoundingNode;
node.index.x = (packed_node >> 19) & 0x1fff;
node.index.y = (packed_node >> 6) & 0x1fff;
node.level = packed_node & 0x3f;
return node;
}
fn intersect_ray_with_node(dem_texture: texture_2d<u32>,
tree_texture: texture_2d<u32>,
dem_min_corner: vec2<f32>,
dem_cell_size: vec2<f32>,
dem_z_range: vec2<f32>,
ray: Ray,
fn intersect_ray_with_node(ray: Ray,
root_node: BoundingNode,
hit_cell: ptr<function, vec2<u32>>,
hit_location: ptr<function, vec3<f32>>,
hit_normal: ptr<function, vec3<f32>>) -> bool {
let inv_ray_direction = invert_ray_direction(ray.direction);
@ -78,40 +76,26 @@ fn intersect_ray_with_node(dem_texture: texture_2d<u32>,
var closest_hit_distance = 1.0e+30f;
while node_stack.count > 0 {
let node = pop_node_stack(&node_stack);
let minmax_z = textureLoad(tree_texture, node.index, i32(node.level)).rg;
let minmax_z = textureLoad(dembvh_texture, node.index, i32(node.level)).rg;
if minmax_z.r == 0 {
return false;
}
let min_z = scale_u16(minmax_z.r, dem_z_range);
let max_z = scale_u16(minmax_z.g, dem_z_range);
let min_z = scale_u16(minmax_z.r, uniforms.dem_z_range);
let max_z = scale_u16(minmax_z.g, uniforms.dem_z_range);
var aabb: AABB ;
aabb.min_corner = vec3<f32>(get_xy_min_for_node(dem_min_corner,
dem_cell_size,
node),
min_z);
aabb.max_corner = vec3<f32>(get_xy_max_for_node(dem_min_corner,
dem_cell_size,
node),
max_z);
aabb.min_corner = vec3<f32>(get_xy_min_for_node(node), min_z);
aabb.max_corner = vec3<f32>(get_xy_max_for_node(node), max_z);
let hit_distance = intersect_ray_with_aabb_optimized(ray.origin, inv_ray_direction, aabb);
if hit_distance >= 0.0 {
if node.level == 0 {
var location: vec3<f32>;
var normal: vec3<f32>;
if hit_distance < closest_hit_distance {
if intersect_ray_with_grid_cell(ray,
node.index,
dem_texture,
dem_min_corner,
dem_cell_size,
dem_z_range,
&location,
&normal)
if intersect_ray_with_grid_cell(ray, node.index, &location, &normal)
{
// Node bounding boxes are non-overlapping, so we don't need
// to calculate a more precise hit_distance
closest_hit_distance = hit_distance;
*hit_cell = node.index;
*hit_location = location;
*hit_normal = normal;
}
@ -136,10 +120,6 @@ fn intersect_ray_with_node(dem_texture: texture_2d<u32>,
fn intersect_ray_with_grid_cell(ray: Ray,
cell_index: vec2<u32>,
dem_texture: texture_2d<u32>,
dem_min_corner: vec2<f32>,
dem_cell_size: vec2<f32>,
dem_z_range: vec2<f32>,
location: ptr<function, vec3<f32>>,
normal: ptr<function, vec3<f32>>) -> bool {
//Get z-values of cell corners
@ -148,16 +128,16 @@ fn intersect_ray_with_grid_cell(ray: Ray,
let v01 = textureLoad(dem_texture, cell_index + vec2<u32>(0,1), 0).r;
let v10 = textureLoad(dem_texture, cell_index + vec2<u32>(1,0), 0).r;
let v11 = textureLoad(dem_texture, cell_index + vec2<u32>(1,1), 0).r;
let z00 = scale_u16(v00, dem_z_range);
let z01 = scale_u16(v01, dem_z_range);
let z10 = scale_u16(v10, dem_z_range);
let z11 = scale_u16(v11, dem_z_range);
let z00 = scale_u16(v00, uniforms.dem_z_range);
let z01 = scale_u16(v01, uniforms.dem_z_range);
let z10 = scale_u16(v10, uniforms.dem_z_range);
let z11 = scale_u16(v11, uniforms.dem_z_range);
// Calculate xyz of cell corners
let xy00 = dem_min_corner.xy + dem_cell_size * vec2<f32>(cell_index);
let xy00 = uniforms.dem_min_corner.xy + uniforms.dem_cell_size * vec2<f32>(cell_index);
let p00 = vec3<f32>(xy00, z00);
let p01 = vec3<f32>(xy00 + vec2<f32>(0, 1) * dem_cell_size, z01);
let p10 = vec3<f32>(xy00 + vec2<f32>(1, 0) * dem_cell_size, z10);
let p11 = vec3<f32>(xy00 + vec2<f32>(1, 1) * dem_cell_size, z11);
let p01 = vec3<f32>(xy00 + vec2<f32>(0, 1) * uniforms.dem_cell_size, z01);
let p10 = vec3<f32>(xy00 + vec2<f32>(1, 0) * uniforms.dem_cell_size, z10);
let p11 = vec3<f32>(xy00 + vec2<f32>(1, 1) * uniforms.dem_cell_size, z11);
// Intersect ray with the plane of each triangle and then take the
// point that's inside it's triangle.