Minor refactor to beable to easily switch number of dimensions
This commit is contained in:
parent
40a1be1c78
commit
95af7079b6
@ -14,8 +14,7 @@ You're on your own with figuring out how to install cuda...but in theory once th
|
|||||||
```
|
```
|
||||||
cmake -S ./ -B build/
|
cmake -S ./ -B build/
|
||||||
cmake --build build/
|
cmake --build build/
|
||||||
cd build/
|
./build/src/particle_sim_exe
|
||||||
./src/particle_sim_exe
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Current application runs 10,000 particles at 50fps on an RTX3060. The gif below only appears to be moving slow because the time steps between renders is much lower then in the python example. The frame is actually higher here then it was in python.
|
Current application runs 10,000 particles at 50fps on an RTX3060. The gif below only appears to be moving slow because the time steps between renders is much lower then in the python example. The frame is actually higher here then it was in python.
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
void custom_kernel_process_ray_collision(int size, float * norms, float * direction, float * vel, float * pos, int array_stride, float * line_vec);
|
void custom_kernel_process_ray_collision(int size, float * norms, float * direction, float * vel, float * pos, int array_stride, float * line_vec, int num_dim);
|
||||||
|
@ -14,8 +14,10 @@ class universe
|
|||||||
int num_of_particles;
|
int num_of_particles;
|
||||||
int array_size;
|
int array_size;
|
||||||
cublasHandle_t cublas_handle;
|
cublasHandle_t cublas_handle;
|
||||||
float * device_ray_position;
|
float * device_boundary_position;
|
||||||
float * device_ray_direction;
|
float * device_boundary_direction;
|
||||||
|
float * host_boundary_position;
|
||||||
|
float * host_boundary_direction;
|
||||||
float * device_position;
|
float * device_position;
|
||||||
float * device_velocity;
|
float * device_velocity;
|
||||||
float * device_acceleration;
|
float * device_acceleration;
|
||||||
@ -26,7 +28,7 @@ class universe
|
|||||||
float * host_acceleration;
|
float * host_acceleration;
|
||||||
curandGenerator_t gen;
|
curandGenerator_t gen;
|
||||||
void process_collisions_with_other_particles(void);
|
void process_collisions_with_other_particles(void);
|
||||||
void process_collisions_with_rays(void);
|
void process_collisions_with_boundary(void);
|
||||||
void process_particles(void);
|
void process_particles(void);
|
||||||
/* data3d */
|
/* data3d */
|
||||||
public:
|
public:
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
SDL_Surface *bitmapSurface[4]= {};
|
SDL_Surface *bitmapSurface[4]= {};
|
||||||
SDL_Texture * bitmapTexture[4] = {};
|
SDL_Texture * bitmapTexture[4] = {};
|
||||||
static std::string color_paths[] = {"blue", "green", "red", "yellow"};
|
static std::string color_paths[] = {"blue", "green", "red", "yellow"};
|
||||||
static std::string sprite_path = "../sdl_sprites/";
|
static std::string sprite_path = "sdl_sprites/";
|
||||||
|
|
||||||
void init_textures(SDL_Renderer *in_renderer)
|
void init_textures(SDL_Renderer *in_renderer)
|
||||||
{
|
{
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
#include <cstddef>
|
||||||
|
#include <cstdlib>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@ -13,26 +15,11 @@
|
|||||||
#include "util.hpp"
|
#include "util.hpp"
|
||||||
|
|
||||||
#define DELTA_TIME 0.005
|
#define DELTA_TIME 0.005
|
||||||
|
#define NUM_DIMENSIONS 2
|
||||||
|
|
||||||
|
|
||||||
void universe::process_collisions_with_rays(void)
|
void universe::process_collisions_with_boundary(void)
|
||||||
{
|
{
|
||||||
float temp_directions[][3] =
|
|
||||||
{
|
|
||||||
{1.0,0.0,0.0},
|
|
||||||
{0.0,1.0,0.0},
|
|
||||||
{1.0,0.0,0.0},
|
|
||||||
{0.0,1.0,0.0}
|
|
||||||
};
|
|
||||||
|
|
||||||
float temp_positions[][3] =
|
|
||||||
{
|
|
||||||
{0.0,0.0,0.0},
|
|
||||||
{0.0,0.0,0.0},
|
|
||||||
{800.0,800.0,800.0},
|
|
||||||
{800.0,800.0,800.0}
|
|
||||||
};
|
|
||||||
|
|
||||||
float alpha = 1.0;
|
float alpha = 1.0;
|
||||||
float beta = 0.0;
|
float beta = 0.0;
|
||||||
|
|
||||||
@ -43,57 +30,53 @@ void universe::process_collisions_with_rays(void)
|
|||||||
long(num_of_particles*sizeof(float))));
|
long(num_of_particles*sizeof(float))));
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(cudaMallocManaged(&(dev_direction_vec),
|
CHECK_CUDA_ERROR(cudaMallocManaged(&(dev_direction_vec),
|
||||||
long(num_of_particles*3*sizeof(float))));
|
long(num_of_particles*NUM_DIMENSIONS*sizeof(float))));
|
||||||
|
|
||||||
for(int i = 0; i < 4; i++)
|
for(int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
//Fill scratchpad A with vec1 = line_position - particle_position
|
//Fill scratchpad A with vec1 = line_position - particle_position
|
||||||
for (int j = 0; j < 3; j++)
|
for (int j = 0; j < NUM_DIMENSIONS; j++)
|
||||||
{
|
{
|
||||||
CHECK_CUDA_ERROR(nppsSubCRev_32f(&device_position[array_size * j],
|
CHECK_CUDA_ERROR(nppsSubCRev_32f(&device_position[array_size * j],
|
||||||
temp_positions[i][j], &device_scratch_padA[num_of_particles * j],
|
host_boundary_position[i*NUM_DIMENSIONS+j], &device_scratch_padA[num_of_particles * j],
|
||||||
num_of_particles));
|
num_of_particles));
|
||||||
}
|
}
|
||||||
alpha = 1.0;
|
alpha = 1.0;
|
||||||
beta = 0.0;
|
beta = 0.0;
|
||||||
CHECK_CUDA_ERROR(cublasSgemm(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N,
|
CHECK_CUDA_ERROR(cublasSgemm(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N,
|
||||||
num_of_particles, 1, 3, &alpha, device_scratch_padA,
|
num_of_particles, 1, NUM_DIMENSIONS, &alpha, device_scratch_padA,
|
||||||
num_of_particles, &device_ray_direction[i * 3], 3, &beta,
|
num_of_particles, &device_boundary_direction[i * NUM_DIMENSIONS], NUM_DIMENSIONS, &beta,
|
||||||
dev_dot_product_result, num_of_particles));
|
dev_dot_product_result, num_of_particles));
|
||||||
|
|
||||||
//Use dot product to find projections onto the line directions
|
//Use dot product to find projections onto the line directions
|
||||||
CHECK_CUDA_ERROR(cublasSgemm(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_T,
|
CHECK_CUDA_ERROR(cublasSgemm(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_T,
|
||||||
num_of_particles, 3, 1, &alpha, dev_dot_product_result,
|
num_of_particles, NUM_DIMENSIONS, 1, &alpha, dev_dot_product_result,
|
||||||
num_of_particles, &device_ray_direction[i * 3], 3, &beta,
|
num_of_particles, &device_boundary_direction[i * NUM_DIMENSIONS], NUM_DIMENSIONS, &beta,
|
||||||
device_scratch_padA, num_of_particles));
|
device_scratch_padA, num_of_particles));
|
||||||
|
|
||||||
//calc norm
|
//calc norm
|
||||||
CHECK_CUDA_ERROR(nppsSqr_32f(device_scratch_padA, dev_direction_vec,
|
CHECK_CUDA_ERROR(nppsSqr_32f(device_scratch_padA, dev_direction_vec,
|
||||||
num_of_particles * 3));
|
num_of_particles * NUM_DIMENSIONS));
|
||||||
|
|
||||||
|
for(int index = 1; index < NUM_DIMENSIONS; index++)
|
||||||
|
{
|
||||||
CHECK_CUDA_ERROR(cublasSaxpy(cublas_handle, num_of_particles, &alpha,
|
CHECK_CUDA_ERROR(cublasSaxpy(cublas_handle, num_of_particles, &alpha,
|
||||||
&dev_direction_vec[num_of_particles * 1], 1,
|
&dev_direction_vec[num_of_particles * index], 1,
|
||||||
dev_direction_vec, 1));
|
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(cublasSaxpy(cublas_handle, num_of_particles, &alpha,
|
|
||||||
&dev_direction_vec[num_of_particles * 2], 1,
|
|
||||||
dev_direction_vec, 1));
|
dev_direction_vec, 1));
|
||||||
|
}
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(nppsSqrt_32f_I(dev_direction_vec, num_of_particles));
|
CHECK_CUDA_ERROR(nppsSqrt_32f_I(dev_direction_vec, num_of_particles));
|
||||||
|
|
||||||
//Normalize vector
|
//Normalize vector
|
||||||
CHECK_CUDA_ERROR(nppsDiv_32f_I(dev_direction_vec, device_scratch_padA,
|
for(int index = 0; index < NUM_DIMENSIONS; index++)
|
||||||
num_of_particles));
|
{
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(nppsDiv_32f_I(dev_direction_vec,
|
CHECK_CUDA_ERROR(nppsDiv_32f_I(dev_direction_vec,
|
||||||
&device_scratch_padA[1 * num_of_particles], num_of_particles));
|
&device_scratch_padA[index * num_of_particles], num_of_particles));
|
||||||
|
}
|
||||||
CHECK_CUDA_ERROR(nppsDiv_32f_I(dev_direction_vec,
|
|
||||||
&device_scratch_padA[2 * num_of_particles], num_of_particles));
|
|
||||||
|
|
||||||
custom_kernel_process_ray_collision(num_of_particles, dev_direction_vec,
|
custom_kernel_process_ray_collision(num_of_particles, dev_direction_vec,
|
||||||
device_scratch_padA, device_velocity, device_position,
|
device_scratch_padA, device_velocity, device_position,
|
||||||
array_size, &device_ray_direction[i*3]);
|
array_size, &device_boundary_direction[i*NUM_DIMENSIONS], NUM_DIMENSIONS);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,15 +97,15 @@ void universe::process_particles(void)
|
|||||||
|
|
||||||
// // Update the positions of the particles:
|
// // Update the positions of the particles:
|
||||||
// // self.positions[counti] = self.velocities[counti] * DELTA_TIME + self.positions[counti]
|
// // self.positions[counti] = self.velocities[counti] * DELTA_TIME + self.positions[counti]
|
||||||
CHECK_CUDA_ERROR(cublasSaxpy(cublas_handle, array_size*3, &delta,
|
CHECK_CUDA_ERROR(cublasSaxpy(cublas_handle, array_size*NUM_DIMENSIONS, &delta,
|
||||||
device_velocity, 1, device_position, 1));
|
device_velocity, 1, device_position, 1));
|
||||||
|
|
||||||
// Update the velocities of the particles:
|
// Update the velocities of the particles:
|
||||||
// self.velocities[counti] = self.accelerations[counti] * DELTA_TIME + self.velocities[counti]
|
// self.velocities[counti] = self.accelerations[counti] * DELTA_TIME + self.velocities[counti]
|
||||||
CHECK_CUDA_ERROR(cublasSaxpy(cublas_handle, array_size*3, &delta,
|
CHECK_CUDA_ERROR(cublasSaxpy(cublas_handle, array_size*NUM_DIMENSIONS, &delta,
|
||||||
device_acceleration, 1, device_velocity, 1));
|
device_acceleration, 1, device_velocity, 1));
|
||||||
|
|
||||||
process_collisions_with_rays();
|
process_collisions_with_boundary();
|
||||||
|
|
||||||
process_collisions_with_other_particles();
|
process_collisions_with_other_particles();
|
||||||
}
|
}
|
||||||
@ -130,7 +113,7 @@ void universe::process_particles(void)
|
|||||||
void universe::get_positions(float ** in_out_buffer, int * size, int * stride)
|
void universe::get_positions(float ** in_out_buffer, int * size, int * stride)
|
||||||
{
|
{
|
||||||
CHECK_CUDA_ERROR(cudaMemcpy(host_position, device_position,
|
CHECK_CUDA_ERROR(cudaMemcpy(host_position, device_position,
|
||||||
long(array_size * 3 * sizeof(float)), cudaMemcpyDeviceToHost));
|
long(array_size * NUM_DIMENSIONS * sizeof(float)), cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
*in_out_buffer = host_position;
|
*in_out_buffer = host_position;
|
||||||
*size = num_of_particles;
|
*size = num_of_particles;
|
||||||
@ -148,49 +131,64 @@ curandCreateGenerator(&gen,
|
|||||||
array_size = size;
|
array_size = size;
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_position),
|
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_position),
|
||||||
long(array_size*3*sizeof(float))));
|
long(array_size*NUM_DIMENSIONS*sizeof(float))));
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_velocity),
|
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_velocity),
|
||||||
long(array_size*3*sizeof(float))));
|
long(array_size*NUM_DIMENSIONS*sizeof(float))));
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_acceleration),
|
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_acceleration),
|
||||||
long(array_size*3*sizeof(float))));
|
long(array_size*NUM_DIMENSIONS*sizeof(float))));
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_scratch_padA),
|
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_scratch_padA),
|
||||||
long(array_size*3*sizeof(float))));
|
long(array_size*NUM_DIMENSIONS*sizeof(float))));
|
||||||
|
|
||||||
host_position = (float *)malloc(long(array_size*3*sizeof(float)));
|
host_position = (float *)malloc(long(array_size*NUM_DIMENSIONS*sizeof(float)));
|
||||||
host_velocity = (float *)malloc(long(array_size*3*sizeof(float)));
|
host_velocity = (float *)malloc(long(array_size*NUM_DIMENSIONS*sizeof(float)));
|
||||||
host_acceleration = (float *)malloc(long(array_size*3*sizeof(float)));
|
host_acceleration = (float *)malloc(long(array_size*NUM_DIMENSIONS*sizeof(float)));
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(curandGenerateUniform(gen, device_position, array_size*2));
|
CHECK_CUDA_ERROR(curandGenerateUniform(gen, device_position, array_size*NUM_DIMENSIONS));
|
||||||
CHECK_CUDA_ERROR(nppsAddC_32f_I(20.0, device_position, array_size*3));
|
CHECK_CUDA_ERROR(nppsAddC_32f_I(20.0, device_position, array_size*NUM_DIMENSIONS));
|
||||||
CHECK_CUDA_ERROR(curandGenerateUniform(gen, device_velocity, array_size*2));
|
CHECK_CUDA_ERROR(curandGenerateUniform(gen, device_velocity, array_size*NUM_DIMENSIONS));
|
||||||
// CHECK_CUDA_ERROR(curandGenerateUniform(gen, device_acceleration, num_of_particles*3));
|
// CHECK_CUDA_ERROR(curandGenerateUniform(gen, device_acceleration, num_of_particles*NUM_DIMENSIONS));
|
||||||
|
|
||||||
//Add rays that can collied with particles to the universe
|
//Add rays that can collied with particles to the universe
|
||||||
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_ray_position),
|
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_boundary_position),
|
||||||
long(4*3*sizeof(float))));
|
long(4*NUM_DIMENSIONS*sizeof(float))));
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_ray_direction),
|
CHECK_CUDA_ERROR(cudaMallocManaged(&(device_boundary_direction),
|
||||||
long(4*3*sizeof(float))));
|
long(4*NUM_DIMENSIONS*sizeof(float))));
|
||||||
|
|
||||||
float temp_positions[] = {0.0,0.0,0.0,0.0,0.0,0.0,800.0,800.0,800.0,800.0,800.0,800.0};
|
float boundary_direction[4*NUM_DIMENSIONS] = {0};
|
||||||
float temp_directions[] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0};
|
float boundary_point[4*NUM_DIMENSIONS] = {0};
|
||||||
|
for(size_t i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
boundary_direction[i*NUM_DIMENSIONS+(i%2)] = 1;
|
||||||
|
if(i >= 2)
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < NUM_DIMENSIONS; j++)
|
||||||
|
{
|
||||||
|
boundary_point[i*NUM_DIMENSIONS+j] = 800;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(cudaMemcpy(device_ray_position, temp_positions,
|
host_boundary_position = (float *)malloc(sizeof(boundary_point));
|
||||||
long(sizeof(temp_positions)), cudaMemcpyHostToDevice));
|
host_boundary_direction = (float *)malloc(sizeof(boundary_direction));
|
||||||
|
memcpy(host_boundary_position, boundary_point, sizeof(boundary_point));
|
||||||
|
memcpy(host_boundary_direction, boundary_direction, sizeof(boundary_direction));
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(cudaMemcpy(device_ray_direction, temp_directions,
|
|
||||||
long(sizeof(temp_directions)), cudaMemcpyHostToDevice));
|
CHECK_CUDA_ERROR(cudaMemcpy(device_boundary_position, boundary_point,
|
||||||
|
long(sizeof(boundary_point)), cudaMemcpyHostToDevice));
|
||||||
|
|
||||||
|
CHECK_CUDA_ERROR(cudaMemcpy(device_boundary_direction, boundary_direction,
|
||||||
|
long(sizeof(boundary_direction)), cudaMemcpyHostToDevice));
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
universe::~universe()
|
universe::~universe()
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
CHECK_CUDA_ERROR(cudaFree(device_position));
|
CHECK_CUDA_ERROR(cudaFree(device_position));
|
||||||
CHECK_CUDA_ERROR(cudaFree(device_velocity));
|
CHECK_CUDA_ERROR(cudaFree(device_velocity));
|
||||||
CHECK_CUDA_ERROR(cudaFree(device_acceleration));
|
CHECK_CUDA_ERROR(cudaFree(device_acceleration));
|
||||||
@ -199,6 +197,8 @@ universe::~universe()
|
|||||||
free(host_position);
|
free(host_position);
|
||||||
free(host_velocity);
|
free(host_velocity);
|
||||||
free(host_acceleration);
|
free(host_acceleration);
|
||||||
|
free(host_boundary_direction);
|
||||||
|
free(host_boundary_position);
|
||||||
CHECK_CUDA_ERROR(cublasDestroy(cublas_handle));
|
CHECK_CUDA_ERROR(cublasDestroy(cublas_handle));
|
||||||
|
|
||||||
|
|
||||||
@ -207,7 +207,6 @@ universe::~universe()
|
|||||||
|
|
||||||
void universe::process(void)
|
void universe::process(void)
|
||||||
{
|
{
|
||||||
|
|
||||||
//First we need to process gravity
|
//First we need to process gravity
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
#include "stdio.h"
|
#include "stdio.h"
|
||||||
#define BLOCK_SIZE 32
|
#define BLOCK_SIZE 32
|
||||||
|
|
||||||
__global__ static void process_ray_collision(int size, float * norms, float * direction, float * vel, float * pos, int array_stride, float * line_vec)
|
__global__ static void process_ray_collision(int size, float * norms, float * direction, float * vel, float * pos, int array_stride, float * line_vec, int num_dim)
|
||||||
{
|
{
|
||||||
int index = blockIdx.x * blockDim.x + threadIdx.x;
|
int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int stride = blockDim.x * gridDim.x;
|
int stride = blockDim.x * gridDim.x;
|
||||||
@ -12,17 +12,17 @@ __global__ static void process_ray_collision(int size, float * norms, float * di
|
|||||||
{
|
{
|
||||||
float speed_norm = 0.0;
|
float speed_norm = 0.0;
|
||||||
float dot_product = 0.0;
|
float dot_product = 0.0;
|
||||||
for(int k = 0; k<3; k++)
|
for(int k = 0; k<num_dim; k++)
|
||||||
{
|
{
|
||||||
dot_product += vel[k*array_stride+i]*line_vec[k];
|
dot_product += vel[k*array_stride+i]*line_vec[k];
|
||||||
}
|
}
|
||||||
for(int k = 0; k<3; k++)
|
for(int k = 0; k<num_dim; k++)
|
||||||
{
|
{
|
||||||
float temp = (dot_product*(line_vec[k]));
|
float temp = (dot_product*(line_vec[k]));
|
||||||
speed_norm += pow(temp, 2);
|
speed_norm += pow(temp, 2);
|
||||||
}
|
}
|
||||||
speed_norm = sqrt(speed_norm)*2.0;
|
speed_norm = sqrt(speed_norm)*2.0;
|
||||||
for(int k = 0; k<3; k++)
|
for(int k = 0; k<num_dim; k++)
|
||||||
{
|
{
|
||||||
vel[k*array_stride+i] = vel[k*array_stride+i] - speed_norm*direction[k*size+i];
|
vel[k*array_stride+i] = vel[k*array_stride+i] - speed_norm*direction[k*size+i];
|
||||||
pos[k*array_stride+i] = pos[k*array_stride+i] - direction[k*size+i]*(5.0-norms[i]);
|
pos[k*array_stride+i] = pos[k*array_stride+i] - direction[k*size+i]*(5.0-norms[i]);
|
||||||
@ -31,9 +31,8 @@ __global__ static void process_ray_collision(int size, float * norms, float * di
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void custom_kernel_process_ray_collision(int size, float * norms, float * direction, float * vel, float * pos, int array_stride, float * line_vec, int num_dim)
|
||||||
void custom_kernel_process_ray_collision(int size, float * norms, float * direction, float * vel, float * pos, int array_stride, float * line_vec)
|
|
||||||
{
|
{
|
||||||
int numBlocks = (size + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
int numBlocks = (size + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||||
process_ray_collision<<<numBlocks,BLOCK_SIZE>>>(size, norms, direction, vel, pos, array_stride, line_vec);
|
process_ray_collision<<<numBlocks,BLOCK_SIZE>>>(size, norms, direction, vel, pos, array_stride, line_vec, num_dim);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user