# -*- coding: utf-8 -*-
import copy
import math
from timeit import default_timer as timer
import numba as nb
import numpy as np
import open3d as o3d
import scipy.stats
from matplotlib import cm
from numba import cuda, float32, jit, njit, guvectorize, prange
from numpy.linalg import norm
from scipy.spatial import distance
import lyceanem.base_types as base_types
from ..utility import math_functions as math_functions
import lyceanem.electromagnetics.empropagation as EM
EPSILON = 1e-6 # how close to zero do we consider zero? example used 1e-7
# # A numpy record array (like a struct) to record triangle
# point_data = np.dtype([
# # conductivity, permittivity and permiability
# #free space values should be
# #permittivity of free space 8.8541878176e-12F/m
# #permeability of free space 1.25663706212e-6H/m
# ('permittivity', 'c8'), ('permeability', 'c8'),
# #electric or magnetic current sources? E if True
# ('Electric', '?'),
# ], align=True)
# point_t = from_dtype(point_data) # Create a type that numba can recognize!
#
# # A numpy record array (like a struct) to record triangle
# triangle_data = np.dtype([
# # v0 data
# ('v0x', 'f4'), ('v0y', 'f4'), ('v0z', 'f4'),
# # v1 data
# ('v1x', 'f4'), ('v1y', 'f4'), ('v1z', 'f4'),
# # v2 data
# ('v2x', 'f4'), ('v2y', 'f4'), ('v2z', 'f4'),
# # normal vector
# #('normx', 'f4'), ('normy', 'f4'), ('normz', 'f4'),
# # ('reflection', np.float64),
# # ('diffuse_c', np.float64),
# # ('specular_c', np.float64),
# ], align=True)
# triangle_t = from_dtype(triangle_data) # Create a type that numba can recognize!
#
# # ray class, to hold the ray origin, direction, and eventuall other data.
# ray_data=np.dtype([
# #origin data
# ('ox','f4'),('oy','f4'),('oz','f4'),
# #direction vector
# ('dx','f4'),('dy','f4'),('dz','f4'),
# #target
# #direction vector
# #('tx','f4'),('ty','f4'),('tz','f4'),
# #distance traveled
# ('dist','f4'),
# #intersection
# ('intersect','?'),
# ],align=True)
# ray_t = from_dtype(ray_data) # Create a type that numba can recognize!
# # We can use that type in our device functions and later the kernel!
#
# scattering_point = np.dtype([
# #position data
# ('px', 'f4'), ('py', 'f4'), ('pz', 'f4'),
# #velocity
# ('vx', 'f4'), ('vy', 'f4'), ('vz', 'f4'),
# #normal
# ('nx', 'f4'), ('ny', 'f4'), ('nz', 'f4'),
# #weights
# ('ex','c8'),('ey','c8'),('ez','c8'),
# # conductivity, permittivity and permiability
# #free space values should be
# #permittivity of free space 8.8541878176e-12F/m
# #permeability of free space 1.25663706212e-6H/m
# ('permittivity', 'c8'), ('permeability', 'c8'),
# #electric or magnetic current sources? E if True
# ('Electric', '?'),
# ], align=True)
# scattering_t = from_dtype(scattering_point) # Create a type that numba can recognize!
#
# @njit
# def cart2pol(x, y):
# rho = np.sqrt(x ** 2 + y ** 2)
# phi = np.arctan2(y, x)
# return (rho, phi)
#
#
# @njit
# def pol2cart(rho, phi):
# x = rho * np.cos(phi)
# y = rho * np.sin(phi)
# return (x, y)
#
#
# @njit
# def cart2sph(x, y, z):
# # radians
# hxy = np.hypot(x, y)
# r = np.hypot(hxy, z)
# el = np.arctan2(z, hxy)
# az = np.arctan2(y, x)
# return az, el, r
#
#
# @njit
# def sph2cart(az, el, r):
# # radians
# rcos_theta = r * np.cos(el)
# x = rcos_theta * np.cos(az)
# y = rcos_theta * np.sin(az)
# z = r * np.sin(el)
# return x, y, z
#
#
# @njit
# def calc_normals(T):
# # calculate triangle norm
# e1x = T["v1x"] - T["v0x"]
# e1y = T["v1y"] - T["v0y"]
# e1z = T["v1z"] - T["v0z"]
#
# e2x = T["v2x"] - T["v0x"]
# e2y = T["v2y"] - T["v0y"]
# e2z = T["v2z"] - T["v0z"]
#
# dirx = e1y * e2z - e1z * e2y
# diry = e1z * e2x - e1x * e2z
# dirz = e1x * e2y - e1y * e2x
#
# normconst = math.sqrt(dirx ** 2 + diry ** 2 + dirz ** 2)
#
# T["normx"] = dirx / normconst
# T["normy"] = diry / normconst
# T["normz"] = dirz / normconst
#
# return T
#
#
# @guvectorize(
# [(float32[:], float32[:], float32[:], float32)],
# "(n),(n)->(n),()",
# target="parallel",
# )
# def fast_calc_dv(source, target, dv, normconst):
# dirx = target[0] - source[0]
# diry = target[1] - source[1]
# dirz = target[2] - source[2]
# normconst = math.sqrt(dirx ** 2 + diry ** 2 + dirz ** 2)
# dv = np.array([dirx, diry, dirz]) / normconst
#
#
# @njit
# def calc_dv(source, target):
# dirx = target[0] - source[0]
# diry = target[1] - source[1]
# dirz = target[2] - source[2]
# normconst = np.sqrt(dirx ** 2 + diry ** 2 + dirz ** 2)
# dv = np.array([dirx, diry, dirz]) / normconst
# return dv[0], dv[1], dv[2], normconst
# @njit
# def calc_dv_norm(source,target,direction,length):
# length[:,0]=np.sqrt((target[:,0]-source[:,0])**2+(target[:,1]-source[:,1])**2+(target[:,2]-source[:,2])**2)
# direction=(target-source)/length
# return direction, length
@cuda.jit(device=True)
def dot(ax1, ay1, az1, ax2, ay2, az2):
result = ax1 * ax2 + ay1 * ay2 + az1 * az2
return result
@cuda.jit(device=True)
def cross(ax1, ay1, az1, ax2, ay2, az2):
rx = ay1 * az2 - az1 * ay2
ry = az1 * ax2 - ax1 * az2
rz = ax1 * ay2 - ay1 * ax2
return rx, ry, rz
# @cuda.jit(boolean,float32(ray_t, triangle_t), device=True, inline=False)
@cuda.jit(device=True, inline=False)
def hit(ray, triangle):
""" Compute compute whether the defined ray will intersect with the defined triangle
using the Möller–Trumbore ray-triangle intersection algorithm
"""
# find edge vectors
e1x = triangle.v1x - triangle.v0x
e1y = triangle.v1y - triangle.v0y
e1z = triangle.v1z - triangle.v0z
e2x = triangle.v2x - triangle.v0x
e2y = triangle.v2y - triangle.v0y
e2z = triangle.v2z - triangle.v0z
# calculate determinant cross product of DV and E2
pvecx, pvecy, pvecz = cross(ray.dx, ray.dy, ray.dz, e2x, e2y, e2z)
# pvecx=ray.dy*e2z-ray.dz*e2y
# pvecy=ray.dz*e2x-ray.dx*e2z
# pvecz=ray.dx*e2y-ray.dy*e2x
# determinant is dot product of edge 1 and pvec
A = dot(pvecx, pvecy, pvecz, e1x, e1y, e1z)
# if A is near zero, then ray lies in the plane of the triangle
if -EPSILON < A < EPSILON:
# print('miss')
return False, math.inf
# if A is less than zero, then the ray is coming from behind the triangle
# cull backface triangles
if A < 0:
return False, math.inf
# calculate distance from vertice 0 to ray origin
tvecx = ray.ox - triangle.v0x # s
tvecy = ray.oy - triangle.v0y # s
tvecz = ray.oz - triangle.v0z # s
# inverse determinant and calculate bounds of triangle
F = 1.0 / A
U = F * dot(tvecx, tvecy, tvecz, pvecx, pvecy, pvecz)
# U=F*(tvecx*pvecx+tvecy*pvecy+tvecz*pvecz)
# print('U',U)
if U < 0.0 or U > (1.0):
# in U coordinates, if U is less than 0 or greater than 1, it is outside the triangle
# print('miss')
return False, math.inf
# cross product of tvec and E1
qx, qy, qz = cross(tvecx, tvecy, tvecz, e1x, e1y, e1z)
# qx=tvecy*e1z-tvecz*e1y
# qy=tvecz*e1x-tvecx*e1z
# qz=tvecx*e1y-tvecy*e1x
V = F * dot(ray.dx, ray.dy, ray.dz, qx, qy, qz)
# print('V,V+U',V,V+U)
# V=F*(ray.dx*qx+ray.dy*qy+ray.dz*qz)
if V < 0.0 or (U + V) > (1.0):
# in UV coordinates, intersection is within triangle
# print('miss')
return False, math.inf
# intersect_distance = F*(e2x*qx + e2y*qy + e2z*qz)
intersect_distance = F * dot(e2x, e2y, e2z, qx, qy, qz)
# print('dist',intersect_distance)
# if (intersect_distance>EPSILON and intersect_distance<(1-EPSILON)):
if (intersect_distance > (2 * EPSILON)) and (
intersect_distance < (ray.dist - (2 * EPSILON))
):
# intersection on triangle
# print('hit')
return True, intersect_distance
return False, math.inf
@cuda.jit
def integratedkernal(ray_index, point_information, environment, ray_flag):
cu_ray_num = cuda.grid(1) # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),
if cu_ray_num < ray_index.shape[0]:
# there are rays to process but must create and launch on the spot, then if succesful go to the next one until the index has been traversed
# print(ray_index[cu_ray_num,0],ray_index[cu_ray_num,1],ray_index[cu_ray_num,2])
# ray_components[cu_ray_num,:]=0.0
# print(scattering_matrix.shape[0],scattering_matrix.shape[1])
for i in range(ray_index.shape[1] - 1):
# print('integrated ray',cu_ray_num)
if ray_index[cu_ray_num, i + 1] != 0:
# #print(i,cu_ray_num,network_index[cu_ray_num,i],network_index[cu_ray_num,i+1])
# #convert source point field to ray
ray = cuda.local.array(shape=(1), dtype=base_types.ray_t)
point1 = point_information[ray_index[cu_ray_num, i] - 1]
point2 = point_information[ray_index[cu_ray_num, i + 1] - 1]
ray[0]["ox"] = point1["px"]
ray[0]["oy"] = point1["py"]
ray[0]["oz"] = point1["pz"]
normconst = math.sqrt(
(point2["px"] - point1["px"]) ** 2
+ (point2["py"] - point1["py"]) ** 2
+ (point2["pz"] - point1["pz"]) ** 2
)
ray[0]["dx"] = (point2["px"] - point1["px"]) / normconst
ray[0]["dy"] = (point2["py"] - point1["py"]) / normconst
ray[0]["dz"] = (point2["pz"] - point1["pz"]) / normconst
ray[0]["dist"] = normconst
ray[0]["intersect"] = False
# ray=rayprep(point1,point2,ray)
# print('integrated ray',cu_ray_num,i,normconst)
for tri_inc in range(len(environment)):
hit_bool, dist_temp = hit(ray[0], environment[tri_inc])
if hit_bool and (dist_temp < ray[0]["dist"]):
# hit something, so break out of loop and null index the first entry of the ray_index
ray[0]["dist"] = dist_temp
ray[0]["intersect"] = True
# if ray[0]['intersect']:
# break
ray_flag[cu_ray_num] = ray[0]["intersect"]
# print('integrated',cu_ray_num,i)
@cuda.jit
def kernel1D(rays, environment, ray_flag, distances):
cu_ray_num = cuda.grid(1) # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),
# threadIdx.y + ( blockIdx.y * blockDim.y )
# margin=1e-5
dist_min = math.inf
intersect_check = False
max_tri_num = len(environment)
i = 0 # emulate a C-style for-loop, exposing the idx increment logic
while i < max_tri_num:
hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
i += 1
if hit_bool and (dist_temp < dist_min):
# hit something
intersect_check = True
dist_min = dist_temp
ray_flag[cu_ray_num] = intersect_check
distances[cu_ray_num] = dist_min
@cuda.jit
def kernel1Dv2(rays, environment):
cu_ray_num = cuda.grid(1) # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),
# threadIdx.y + ( blockIdx.y * blockDim.y )
# margin=1e-5
# dist_min=math.inf
# intersect_check=False
max_tri_num = len(environment)
i = 0 # emulate a C-style for-loop, exposing the idx increment logic
while i < max_tri_num:
hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
i += 1
# if hit_bool:
# print(cu_ray_num)
# if (cu_ray_num==5):
# #if (i==58):
# hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
# print(i,dist_temp)
if hit_bool and (dist_temp < rays[cu_ray_num]["dist"]):
# hit something
# print(cu_ray_num,i,dist_temp,'hit')
# dist_min=dist_temp
rays[cu_ray_num]["dist"] = dist_temp
rays[cu_ray_num]["intersect"] = True
# ray_flag[cu_ray_num]=True
# if rays[cu_ray_num]['intersect']==False:
# print(cu_ray_num,i,dist_temp,'miss')
nb.cuda.syncthreads()
# if (rays[cu_ray_num]['intersect']==True):
# print(cu_ray_num,rays[cu_ray_num]['dist'])
# if (cu_ray_num==1):
# print(i)
@cuda.jit
def kernel1Dv3(rays, environment):
cu_ray_num = cuda.grid(1) # alias for threadIdx.x + ( blockIdx.x * blockDim.x ),
if (
cu_ray_num < rays.shape[0]
): # threadIdx.y + ( blockIdx.y * blockDim.y )
# margin=1e-5
# dist_min=math.inf
# intersect_check=False
max_tri_num = len(environment)
i = 0 # emulate a C-style for-loop, exposing the idx increment logic
while i < max_tri_num:
hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
i += 1
# if hit_bool:
# print(cu_ray_num)
# if (cu_ray_num==5):
# #if (i==58):
# hit_bool, dist_temp = hit(rays[cu_ray_num], environment[i])
# print(i,dist_temp)
if hit_bool and (dist_temp < rays[cu_ray_num]["dist"]):
# hit something
# print(cu_ray_num,i,dist_temp,'hit')
# dist_min=dist_temp
# print('conventional',cu_ray_num,i,dist_temp,rays[cu_ray_num]['dist'])
rays[cu_ray_num]["dist"] = dist_temp
rays[cu_ray_num]["intersect"] = True
# ray_flag[cu_ray_num]=True
# print('conventional',cu_ray_num,dist_temp)
nb.cuda.syncthreads()
def integratedRaycaster(ray_index, scattering_points, environment_local):
start = timer()
prep_dt = timer() - start
raystart = timer()
# Create a container for the pixel RGBA information of our image
chunk_size = 2 ** 11
threads_in_block = 1024
# for idx in range(len(triangle_chunk)-1):
d_environment = cuda.device_array(len(environment_local), dtype=base_types.triangle_t)
d_ray_index = cuda.device_array(
(ray_index.shape[0], ray_index.shape[1]), dtype=np.int32
)
d_ray_index = cuda.to_device(ray_index)
ray_flags = np.full(ray_index.shape[0], False, dtype=np.bool)
d_ray_flag = cuda.device_array(ray_index.shape[0], dtype=np.bool)
d_ray_flag = cuda.to_device(ray_flags)
cuda.to_device(environment_local, to=d_environment)
d_point_information = cuda.device_array(
scattering_points.shape[0], dtype=base_types.scattering_t
)
d_point_information = cuda.to_device(scattering_points)
grids = math.ceil(ray_index.shape[0] / threads_in_block)
threads = threads_in_block
# Execute the kernel
# cuda.profile_start()
# kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag)
integratedkernal[grids, threads](
d_ray_index, d_point_information, d_environment, d_ray_flag
)
# cuda.profile_stop()
# distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host()
propagation_index = d_ray_index.copy_to_host()
ray_flags = d_ray_flag.copy_to_host()
kernel_dt = timer() - raystart
start = timer()
mem_dt = timer() - start
# deallocate memory on gpu
#ctx = cuda.current_context()
#deallocs = ctx.deallocations
#deallocs.clear()
# final_index=np.delete(propagation_index,np.where(propagation_index[:,0]==0),axis=0)
final_index = np.delete(ray_index, ray_flags, axis=0)
return final_index
[docs]def visiblespace(
source_coords,
source_normals,
environment,
vertex_area=0,
az_range=np.linspace(-180.0, 180.0, 19),
elev_range=np.linspace(-90.0, 90.0, 19),
shell_range=0.5,
):
"""
Visiblespace generates a matrix stack of visible space for each element, indexed by source_coordinate.
Parameters
--------------
source_coords : n by 3 numpy array of floats
xyz coordinates of the sources
source_normals : n by 3 numpy array of floats
normal vectors for each source point
environment : :class:`lyceanem.base_classes.triangles`
blocking environment
vertex_area : float or array of floats
the area associated with each source point, defaults to 0, but can also be specified for each source
az_range : array of float
array of azimuth planes in degrees
elev_range : array of float
array of elevation points in degrees
shell_range: float
radius of point cloud shell
Returns
-------------------
visible_patterns : m by l by n array of floats
3D antenna patterns
resultant_pcd : open3d pointcloud
colour data to scale the points fractional visibility from the source aperture
"""
azaz, elel = np.meshgrid(az_range, elev_range)
sourcenum = len(source_coords)
sinks = np.zeros((len(np.ravel(azaz)), 3), dtype=np.float32)
sinks[:, 0], sinks[:, 1], sinks[:, 2] = azeltocart(
np.ravel(azaz), np.ravel(elel), 1e9
)
sinknum = len(sinks)
#
# initial_index=np.full((len(source_coords),2),np.nan,dtype=np.int32)
# initial_index[:,0]=np.arange(len(source_coords))
# pointingindex=np.reshape(np.arange(0,len(sinks)),(len(az_range),len(elev_range)))
# need to create farfield sinks in az,elev coordinates, then convert to xyz sink coordinates, and generate index
# missed_points,hit_points,missed_index,hit_index,shadow_rays=chunkingRaycaster1D(source_coords,sinks,np.zeros((1,3),dtype=np.float32),initial_index,environment,1,terminate_flag=True)
hit_index, _ = workchunkingv2(
source_coords, sinks, np.empty((0, 3), dtype=np.float32), environment, 1
)
unified_model = np.append(
source_coords.astype(np.float32), sinks.astype(np.float32), axis=0
)
# filtered_network2,final_network2,filtered_index2,final_index2,shadow_rays
directions = np.zeros((len(hit_index), 3), dtype=np.float32)
norm_length = np.zeros((len(hit_index), 1), dtype=np.float32)
hit_directions, hit_norms = math_functions.calc_dv_norm(
unified_model[hit_index[:, 0].astype(int) - 1, :],
unified_model[hit_index[:, 1].astype(int) - 1, :],
directions,
norm_length,
)
# angles=Angular_distance(hit_directions,source_normals[hit_index[:,0].astype(int)-1,:].astype(np.float32))
angles = np.zeros((hit_directions.shape[0], 1), dtype=np.float32)
angles = angle_pop(
hit_directions,
source_normals[hit_index[:, 0].astype(int) - 1, :].astype(np.float32),
angles,
)
# angles[np.isnan(angles)]=0
# visible_patterns=quickpatterncreator(az_range,elev_range,source_coords,angles,vertex_area,hit_index)
if len(vertex_area) == 1:
if vertex_area == 0:
portion = np.zeros((len(angles), 1), dtype=np.float32)
portion[:] = 1
else:
portion = vertex_area * np.abs(np.cos(angles))
portion[portion < 0] = 0
else:
portion = np.ravel(vertex_area[hit_index[:, 0].astype(int) - 1]) * np.abs(
np.ravel(np.cos(angles))
)
portion[np.where(np.abs(angles) > (np.pi / 2))[0]] = 0.0
visible_patterns = np.empty((len(az_range) * len(elev_range)), dtype=np.float32)
visible_patterns[:] = 0
visible_patterns = patternsort(
visible_patterns, sourcenum, sinknum, portion, hit_index
).reshape(len(elev_range), len(az_range))
shell_coords = np.zeros((len(np.ravel(azaz)), 3), dtype=np.float32)
shell_coords[:, 0], shell_coords[:, 1], shell_coords[:, 2] = azeltocart(
np.ravel(azaz), np.ravel(elel), shell_range
)
maxarea = np.nanmax(np.nanmax(visible_patterns))
resultant_pcd = patterntocloud(
np.reshape(visible_patterns, (len(az_range) * len(elev_range), 1)),
shell_coords,
maxarea,
)
return visible_patterns, resultant_pcd
@njit(cache=True, nogil=True)
def unit_vector(vector):
""" Returns the unit vector of the vector. """
return vector / np.linalg.norm(vector)
@njit(cache=True, nogil=True)
def angle(vector1, vector2):
""" Returns the angle in radians between given vectors"""
v1_u = unit_vector(vector1)
v2_u = unit_vector(vector2)
minor = np.linalg.det(np.stack((v1_u[-2:], v2_u[-2:])))
if minor == 0:
sign = 1
else:
sign = -np.sign(minor)
dot_p = np.dot(v1_u, v2_u)
dot_p = min(max(dot_p, -1.0), 1.0)
return sign * np.arccos(dot_p)
@njit(parallel=True)
def angle_pop(hit_directions, source_normals, angles):
# parallised angle calculation
for angle_inc in prange(len(angles)):
angles[angle_inc] = angle(
hit_directions[angle_inc, :], source_normals[angle_inc, :]
)
return angles
@njit(parallel=True)
def patternsort(visible_patterns, sourcenum, sinknum, portion, hit_index):
# faster method of summing the array contributions to each farfield point
for n in range(sinknum):
visible_patterns[n] = np.sum(portion[hit_index[:, 1] - 1 - sourcenum == n])
return visible_patterns
def patterncreator(az_range, elev_range, source_coords, pointingindex, hit_index):
# creates visibility pattern from azimuth and elevation ranges
visible_patterns = np.empty(
(len(az_range), len(elev_range), len(source_coords)), dtype=np.int32
)
visible_patterns[:, :, :] = 0
for n in range(len(az_range)):
for m in range(len(elev_range)):
for element in range(len(source_coords)):
if np.any(
np.all(
np.asarray(
[
hit_index[:, 0] == element,
hit_index[:, 1] == pointingindex[n, m],
]
),
axis=0,
)
):
visible_patterns[n, m, element] = 1
return visible_patterns
def quickpatterncreator(
az_range, elev_range, source_coords, angles, vertex_area, hit_index
):
# creates visibility pattern from azimuth and elevation ranges
visible_patterns = np.empty((len(az_range), len(elev_range)), dtype=np.float32)
visible_patterns[:, :] = 0
pointingindex = np.reshape(
np.arange(0, len(az_range) * len(elev_range)), (len(az_range), len(elev_range))
)
sourcenum = len(source_coords)
if vertex_area == 0:
for n in range(len(az_range)):
for m in range(len(elev_range)):
visible_patterns[n, m] = np.sum(
hit_index[:, 1] - sourcenum - 1 == pointingindex[n, m]
)
else:
portion = vertex_area * np.cos(angles)
portion[portion < 0] = 0
for n in range(len(az_range)):
for m in range(len(elev_range)):
visible_patterns[n, m] = np.sum(
portion[hit_index[:, 1] - sourcenum - 1 == pointingindex[n, m]]
)
return visible_patterns
# @njit
def patterntocloud(pattern_data, shell_coords, maxarea):
# takes the pattern_data and shell_coordinates, and creates an open3d point cloud based upon the data.
point_cloud = points2pointcloud(shell_coords)
points, elements = np.shape(pattern_data)
# normdata
viridis = cm.get_cmap("viridis", 40)
visible_elements = np.sum(pattern_data / maxarea, axis=1)
np_colors = viridis(visible_elements)
point_cloud.colors = o3d.utility.Vector3dVector(np_colors[:, 0:3])
return point_cloud
def azeltocart(az_data, el_data, radius):
# convert from az,el and radius data to xyz
x_data = radius * np.cos(np.deg2rad(el_data)) * np.cos(np.deg2rad(az_data))
y_data = radius * np.cos(np.deg2rad(el_data)) * np.sin(np.deg2rad(az_data))
z_data = radius * np.sin(np.deg2rad(el_data))
return x_data, y_data, z_data
def convertTriangles(triangle_object):
"""
convert o3d triangle object to ray tracer triangle class
"""
if triangle_object == None:
triangles = np.empty(0, dtype=base_types.triangle_t)
else:
vertices = np.asarray(triangle_object.vertices)
tri_index = np.asarray(triangle_object.triangles)
normals = np.asarray(triangle_object.triangle_normals)
triangles = np.empty(len(tri_index), dtype=base_types.triangle_t)
for idx in range(len(tri_index)):
triangles[idx]["v0x"] = np.single(vertices[tri_index[idx, 0], 0])
triangles[idx]["v0y"] = np.single(vertices[tri_index[idx, 0], 1])
triangles[idx]["v0z"] = np.single(vertices[tri_index[idx, 0], 2])
triangles[idx]["v1x"] = np.single(vertices[tri_index[idx, 1], 0])
triangles[idx]["v1y"] = np.single(vertices[tri_index[idx, 1], 1])
triangles[idx]["v1z"] = np.single(vertices[tri_index[idx, 1], 2])
triangles[idx]["v2x"] = np.single(vertices[tri_index[idx, 2], 0])
triangles[idx]["v2y"] = np.single(vertices[tri_index[idx, 2], 1])
triangles[idx]["v2z"] = np.single(vertices[tri_index[idx, 2], 2])
# triangles[idx]['normx']=np.single(normals[idx,0])
# triangles[idx]['normy']=np.single(normals[idx,1])
# triangles[idx]['normz']=np.single(normals[idx,2])
return triangles
def pick_points(pcd):
"""
test function based on open3d example to pick points, can be used as the basis of a selection function to pick vertices.
"""
print("")
print("1) Please pick at least three correspondences using [shift + left click]")
print(" Press [shift + right click] to undo point picking")
print("2) Afther picking points, press q for close the window")
vis = o3d.visualization.VisualizerWithEditing()
vis.create_window()
vis.add_geometry(pcd)
vis.run() # user picks points
vis.destroy_window()
print("")
return vis.get_picked_points()
def points2pointcloud(xyz):
"""
turns numpy array of xyz data into a o3d format point cloud
Parameters
----------
xyz : TYPE
DESCRIPTION.
Returns
-------
pcd : point cloud format
"""
pcd = o3d.geometry.PointCloud()
if xyz.shape[1] == 3:
pcd.points = o3d.utility.Vector3dVector(xyz)
else:
reshaped = xyz.reshape((int(len(xyz.ravel()) / 3), 3))
pcd.points = o3d.utility.Vector3dVector(reshaped)
return pcd
@guvectorize([(float32[:, :], float32[:])], "(m,n)->(m)", target="parallel")
def PathLength(partial_network, path_length):
"""
for each row in the partial_network, calculate the distance travelled from the origin to the sink point
#
Parameters
----------
partial_network : n by (3*m) array of coordinates
#
Returns
-------
path_lengths : n by 1 array of path lengths between each point.
#
"""
if partial_network.shape[1] == 6:
for i in range(partial_network.shape[0]):
path_length[i] = np.sqrt(
(partial_network[i, 3] - partial_network[i, 0]) ** 2
+ (partial_network[i, 4] - partial_network[i, 1]) ** 2
+ (partial_network[i, 5] - partial_network[i, 2]) ** 2
)
elif partial_network.shape[1] == 9:
for i in range(partial_network.shape[0]):
path_length[i] = np.sqrt(
(partial_network[i, 3] - partial_network[i, 0]) ** 2
+ (partial_network[i, 4] - partial_network[i, 1]) ** 2
+ (partial_network[i, 5] - partial_network[i, 2]) ** 2
) + np.sqrt(
(partial_network[i, 6] - partial_network[i, 3]) ** 2
+ (partial_network[i, 7] - partial_network[i, 4]) ** 2
+ (partial_network[i, 8] - partial_network[i, 5]) ** 2
)
elif partial_network.shape[1] == 12:
for i in range(partial_network.shape[0]):
path_length[i] = (
np.sqrt(
(partial_network[i, 3] - partial_network[i, 0]) ** 2
+ (partial_network[i, 4] - partial_network[i, 1]) ** 2
+ (partial_network[i, 5] - partial_network[i, 2]) ** 2
)
+ np.sqrt(
(partial_network[i, 6] - partial_network[i, 3]) ** 2
+ (partial_network[i, 7] - partial_network[i, 4]) ** 2
+ (partial_network[i, 8] - partial_network[i, 5]) ** 2
)
+ np.sqrt(
(partial_network[i, 9] - partial_network[i, 6]) ** 2
+ (partial_network[i, 10] - partial_network[i, 7]) ** 2
+ (partial_network[i, 11] - partial_network[i, 8]) ** 2
)
)
# @guvectorize([(int32[:,:], float32[:,:], float32[:,:], float32[:,:], float32, complex64[:,:])], '(m,n)->(m,n)',target='parallel')
@njit(parallel=True, nogil=True, cache=True)
def ScatteringNetworkGenerator(
network_index,
scattering_points,
scattering_normals,
scattering_weights,
point_information,
scattering_coefficient,
wavelength,
local_channel,
):
"""
for each row in the partial_network, calculate the distance travelled from the origin to the sink point
#
Parameters
----------
partial_network : n by (3*m) array of coordinates
#
Returns
-------
local_channel : n by 3 array of electric field vectors at each sink
#
"""
if network_index.shape[1] == 2:
for i in prange(network_index.shape[0]):
local_channel[i, :] = EM.losChannel(
scattering_points[network_index[i, 0] - 1, :],
scattering_normals[network_index[i, 0] - 1, :],
scattering_weights[network_index[i, 0] - 1, :],
point_information[network_index[i, 0] - 1],
scattering_points[network_index[i, 1] - 1, :],
scattering_normals[network_index[i, 1] - 1, :],
scattering_weights[network_index[i, 1] - 1, :],
point_information[network_index[i, 1] - 1],
scattering_coefficient,
wavelength,
)
elif network_index.shape[1] == 3:
for i in prange(network_index.shape[0]):
local_channel[i, :] = EM.losplus1Channel(
scattering_points[network_index[i, 0] - 1, :],
scattering_normals[network_index[i, 0] - 1, :],
scattering_weights[network_index[i, 0] - 1, :],
point_information[network_index[i, 0] - 1],
scattering_points[network_index[i, 1] - 1, :],
scattering_normals[network_index[i, 1] - 1, :],
scattering_weights[network_index[i, 1] - 1, :],
point_information[network_index[i, 1] - 1],
scattering_points[network_index[i, 2] - 1, :],
scattering_normals[network_index[i, 2] - 1, :],
scattering_weights[network_index[i, 2] - 1, :],
point_information[network_index[i, 2] - 1],
scattering_coefficient,
wavelength,
)
elif network_index.shape[1] == 4:
for i in prange(network_index.shape[0]):
local_channel[i, :] = EM.losplus2Channel(
scattering_points[network_index[i, 0] - 1, :],
scattering_normals[network_index[i, 0] - 1, :],
scattering_weights[network_index[i, 0] - 1, :],
point_information[network_index[i, 0] - 1],
scattering_points[network_index[i, 1] - 1, :],
scattering_normals[network_index[i, 1] - 1, :],
scattering_weights[network_index[i, 1] - 1, :],
point_information[network_index[i, 1] - 1],
scattering_points[network_index[i, 2] - 1, :],
scattering_normals[network_index[i, 2] - 1, :],
scattering_weights[network_index[i, 2] - 1, :],
point_information[network_index[i, 2] - 1],
scattering_points[network_index[i, 3] - 1, :],
scattering_normals[network_index[i, 3] - 1, :],
scattering_weights[network_index[i, 3] - 1, :],
point_information[network_index[i, 3] - 1],
scattering_coefficient,
wavelength,
)
return local_channel
@njit(parallel=True, nogil=True, cache=True)
def ScatteringNetworkGeneratorTime(
network_index,
scattering_points,
scattering_normals,
scattering_weights,
point_information,
wavelength,
local_channel,
times,
):
"""
for each row in the partial_network, calculate the distance travelled from the origin to the sink point
#
Parameters
----------
partial_network : n by (3*m) array of coordinates
#
Returns
-------
local_channel : n by 3 array of electric field vectors at each sink
#
"""
if network_index.shape[1] == 2:
for i in prange(network_index.shape[0]):
local_channel[i, :], times[i] = EM.losChannelv2(
scattering_points[network_index[i, 0] - 1, :],
scattering_normals[network_index[i, 0] - 1, :],
scattering_weights[network_index[i, 0] - 1, :],
point_information[network_index[i, 0] - 1],
scattering_points[network_index[i, 1] - 1, :],
scattering_normals[network_index[i, 1] - 1, :],
scattering_weights[network_index[i, 1] - 1, :],
point_information[network_index[i, 1] - 1],
wavelength,
)
elif network_index.shape[1] == 3:
for i in prange(network_index.shape[0]):
local_channel[i, :], times[i] = EM.losplus1Channelv2(
scattering_points[network_index[i, 0] - 1, :],
scattering_normals[network_index[i, 0] - 1, :],
scattering_weights[network_index[i, 0] - 1, :],
point_information[network_index[i, 0] - 1],
scattering_points[network_index[i, 1] - 1, :],
scattering_normals[network_index[i, 1] - 1, :],
scattering_weights[network_index[i, 1] - 1, :],
point_information[network_index[i, 1] - 1],
scattering_points[network_index[i, 2] - 1, :],
scattering_normals[network_index[i, 2] - 1, :],
scattering_weights[network_index[i, 2] - 1, :],
point_information[network_index[i, 2] - 1],
wavelength,
)
elif network_index.shape[1] == 4:
for i in prange(network_index.shape[0]):
local_channel[i, :], times[i] = EM.losplus2Channelv2(
scattering_points[network_index[i, 0] - 1, :],
scattering_normals[network_index[i, 0] - 1, :],
scattering_weights[network_index[i, 0] - 1, :],
point_information[network_index[i, 0] - 1],
scattering_points[network_index[i, 1] - 1, :],
scattering_normals[network_index[i, 1] - 1, :],
scattering_weights[network_index[i, 1] - 1, :],
point_information[network_index[i, 1] - 1],
scattering_points[network_index[i, 2] - 1, :],
scattering_normals[network_index[i, 2] - 1, :],
scattering_weights[network_index[i, 2] - 1, :],
point_information[network_index[i, 2] - 1],
scattering_points[network_index[i, 3] - 1, :],
scattering_normals[network_index[i, 3] - 1, :],
scattering_weights[network_index[i, 3] - 1, :],
point_information[network_index[i, 3] - 1],
wavelength,
)
return local_channel, times
@njit(parallel=True)
def ScatteringNetworkGeneratorv2(
network_index,
scattering_points,
scattering_normals,
scattering_weights,
point_information,
wavelength,
local_channel,
):
"""
for each row in the partial_network, calculate the distance travelled from the origin to the sink point
#
Parameters
----------
partial_network : n by (3*m) array of coordinates
#
Returns
-------
local_channel : n by 3 array of electric field vectors at each sink
#
"""
if network_index.shape[1] == 2:
local_channel = EM.losChannel(
scattering_points[network_index[0] - 1],
scattering_normals[network_index[0] - 1],
scattering_weights[network_index[0] - 1],
point_information[network_index[0] - 1],
scattering_points[network_index[1] - 1],
scattering_normals[network_index[1] - 1],
scattering_weights[network_index[1] - 1],
point_information[network_index[1] - 1],
wavelength,
)
elif network_index.shape[1] == 3:
local_channel = EM.losplus1Channel(
scattering_points[network_index[0] - 1],
scattering_normals[network_index[0] - 1],
scattering_weights[network_index[0] - 1],
point_information[network_index[0] - 1],
scattering_points[network_index[1] - 1],
scattering_normals[network_index[1] - 1],
scattering_weights[network_index[1] - 1],
point_information[network_index[1] - 1],
scattering_points[network_index[2] - 1],
scattering_normals[network_index[2] - 1],
scattering_weights[network_index[2] - 1],
point_information[network_index[2] - 1],
wavelength,
)
elif network_index.shape[1] == 4:
local_channel = EM.losplus2Channel(
scattering_points[network_index[0] - 1],
scattering_normals[network_index[0] - 1],
scattering_weights[network_index[0] - 1],
point_information[network_index[0] - 1],
scattering_points[network_index[1] - 1],
scattering_normals[network_index[1] - 1],
scattering_weights[network_index[1] - 1],
point_information[network_index[1] - 1],
scattering_points[network_index[2] - 1],
scattering_normals[network_index[2] - 1],
scattering_weights[network_index[2] - 1],
point_information[network_index[2] - 1],
scattering_points[network_index[3] - 1],
scattering_normals[network_index[3] - 1],
scattering_weights[network_index[3] - 1],
point_information[network_index[3] - 1],
wavelength,
)
return local_channel
def PoyntingVector(partial_network):
"""
for each row in the partial_network, calculate the distance travelled from the origin to the sink point
#
Parameters
----------
partial_network : n by 6 array of coordinates
#
Returns
-------
pointing_vectors : n by 3 array of path lengths between each point.
#
"""
pointing_vectors = np.zeros((partial_network.shape[0], 3), dtype=np.float32)
pointing_mags = np.zeros((partial_network.shape[0], 1), dtype=np.float32)
for i in range(partial_network.shape[0]):
pointing_vectors[i, :] = partial_network[i, -3:] - partial_network[i, -6:-3]
pointing_mags[i] = np.sqrt(
pointing_vectors[i, 0] ** 2
+ pointing_vectors[i, 1] ** 2
+ pointing_vectors[i, 2] ** 2
)
pointing_vectors = pointing_vectors / pointing_mags
return pointing_vectors
def AngleofArrivalVectors(partial_network):
"""
calculate the vector from the receiving point to the incoming ray, to calculate angle of arrival correctly
#
Parameters
----------
partial_network : n by 6 array of coordinates
#
Returns
-------
pointing_vectors : n by 3 array of path lengths between each point.
#
"""
pointing_vectors = np.zeros((partial_network.shape[0], 3), dtype=np.float32)
pointing_mags = np.zeros((partial_network.shape[0], 1), dtype=np.float32)
for i in range(partial_network.shape[0]):
pointing_vectors[i, :] = partial_network[i, -6:-3] - partial_network[i, -3:]
pointing_mags[i] = np.sqrt(
pointing_vectors[i, 0] ** 2
+ pointing_vectors[i, 1] ** 2
+ pointing_vectors[i, 2] ** 2
)
pointing_vectors = pointing_vectors / pointing_mags
return pointing_vectors
def CalculatePoyntingVectors(
total_network,
wavelength,
scattering_index,
ideal_vector=np.asarray([[1, 0, 0]]),
az_bins=np.linspace(-np.pi, np.pi, 19),
el_bins=np.linspace(-np.pi / 2.0, np.pi / 2.0, 19),
time_bins=np.linspace(-1e-9, 1.9e-8, 20),
impulse=False,
aoa=True,
):
"""
Takes the total network generated using the raycasting process, and calculates the angle of arrival spectrum, delay spectrum, and angular standard deviation as a measure of `farfield ness' of the arriving waves'
Parameters
----------
total_network : array of n*(m*3) tuples
the coordinates of each interaction point
wavelength : float
wavelength of interest, currently a single value (SI units)
scattering_index : n*2 array of integers
the source and sink index of each ray
ideal_vector : 1*3 array
the direction vector of the `ideal' incoming ray
time bins : 1d numpy array of floats
default is 20 bins, but this should always be set by the user to a sampling rate sufficent for twice the expected highest frequency
Returns
-------
aoa_spectrum : array of size sinknum*len(theta_bins)
angle of arrival spectrum
impulse_response :
travel_times :
"""
max_path_divergence = 2.0
if len(total_network) == 0:
sourcenum = np.int(np.nanmax(scattering_index[:, 0])) + 1
sinknum = np.int(np.nanmax(scattering_index[:, 1])) + 1
aoa_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32)
delay_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32)
else:
wave_vector = (2 * np.pi) / wavelength
sourcenum = np.int(np.nanmax(scattering_index[:, 0])) + 1
sinknum = np.int(np.nanmax(scattering_index[:, 1])) + 1
scatterdepth = int((len(total_network[0, :]) - 3) / 3)
impulse_response = np.zeros((len(time_bins), sinknum), dtype="complex")
angle_response = np.zeros((len(az_bins), sinknum), dtype="complex")
aoa_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32)
delay_spectrum = np.zeros((sinknum, az_bins.shape[0]), dtype=np.float32)
az_std = np.zeros((sinknum), dtype=np.float32)
averaged_response = np.zeros((sinknum, 1), dtype="complex")
phase_std = np.zeros((sinknum), dtype=np.float32)
total_pointing_vectors = np.empty((0, 3), dtype=np.float32)
paths = np.empty((0, 1), dtype=np.float32).ravel()
for idx in range(scatterdepth):
if idx == (scatterdepth - 1):
# indexing
if scatterdepth == 1:
start_row = 0
end_row = len(total_network[:, 1]) - 1
else:
start_row = (
np.max(
np.where((np.isnan(total_network[:, 6 + ((idx - 1) * 3)])))
)
+ 1
)
end_row = len(total_network[:, 1]) - 1
end_col = (idx * 3) + 6
# engine here
paths = np.append(
paths,
PathLength(total_network[start_row : end_row + 1, :end_col]),
axis=0,
)
total_pointing_vectors = np.append(
total_pointing_vectors,
AngleofArrivalVectors(
total_network[start_row : end_row + 1, :end_col]
),
axis=0,
)
elif idx == 0:
# indexing
start_row = 0
end_row = np.max(np.where((np.isnan(total_network[:, 6 + (idx * 3)]))))
end_col = (idx * 3) + 6
# engine here
paths = np.append(
paths,
PathLength(total_network[start_row : end_row + 1, :end_col]),
axis=0,
)
total_pointing_vectors = np.append(
total_pointing_vectors,
AngleofArrivalVectors(
total_network[start_row : end_row + 1, :end_col]
),
axis=0,
)
else:
# indexing
start_row = (
np.max(np.where((np.isnan(total_network[:, 3 + (idx * 3)])))) + 1
)
end_row = np.max(
np.where((np.isnan(total_network[:, 6 + ((idx) * 3)])))
)
end_col = (idx * 3) + 6
# engine here
paths = np.append(
paths,
PathLength(total_network[start_row : end_row + 1, :end_col]),
axis=0,
)
total_pointing_vectors = np.append(
total_pointing_vectors,
AngleofArrivalVectors(
total_network[start_row : end_row + 1, :end_col]
),
axis=0,
)
#
# scatter_map indexing
path_loss = wavelength / (4 * np.pi * paths)
ray_components = path_loss * (np.exp(paths * wave_vector * 1j))
phase_components = paths * wave_vector
angular_dist = Angular_distance(ideal_vector, total_pointing_vectors)
travel_times = paths / scipy.constants.c
for sink_index in range(sinknum):
sink_indexing = np.where((scattering_index[:, 1] == sink_index))
if impulse:
impulse_response = ImpulseStack(
impulse_response,
ray_components,
sink_index,
sink_indexing,
travel_times,
time_bins,
)
# impulse_bin_indexing=np.digitize(travel_times[sink_indexing],time_bins+travel_times[np.argmax(ray_components)],right=True)
# for time_index in range(len(ray_components[sink_indexing])):
# impulse_response[impulse_bin_indexing[time_index],sink_index]=impulse_response[impulse_bin_indexing[time_index],sink_index]+ray_components[sink_indexing][time_index]
if aoa:
angle_response = angleofarrivalStack(
angle_response,
ray_components,
sink_index,
sink_indexing,
angular_dist,
az_bins,
)
# angle_bin_indexing=np.digitize(angular_dist[sink_indexing],az_bins,right=True)
# for angle_index in range(len(ray_components[sink_indexing])):
# angle_response[angle_bin_indexing[angle_index],sink_index]=angle_response[angle_bin_indexing[angle_index],sink_index]+ray_components[sink_indexing][angle_index]
return angle_response, impulse_response, travel_times[np.argmax(ray_components)]
@njit(parallel=True)
def ImpulseStack(
impulse_response, ray_components, sink_index, sink_indexing, travel_times, time_bins
):
# attempt to impove the efficiency of the digitize action
impulse_bin_indexing = np.digitize(
travel_times[sink_indexing],
time_bins + travel_times[np.argmax(ray_components)],
right=True,
)
for time_index in prange(len(ray_components[sink_indexing])):
impulse_response[impulse_bin_indexing[time_index], sink_index] = (
impulse_response[impulse_bin_indexing[time_index], sink_index]
+ ray_components[sink_indexing][time_index]
)
return impulse_response
@njit(parallel=True)
def angleofarrivalStack(
angle_response, ray_components, sink_index, sink_indexing, angular_dist, az_bins
):
# improved efficiency digitizatoin
angle_bin_indexing = np.digitize(angular_dist[sink_indexing], az_bins, right=True)
for angle_index in prange(len(ray_components[sink_indexing])):
angle_response[angle_bin_indexing[angle_index], sink_index] = (
angle_response[angle_bin_indexing[angle_index], sink_index]
+ ray_components[sink_indexing][angle_index]
)
return angle_response
@njit(parallel=True)
def Angular_distance(center_vector, pointing_vectors):
if len(center_vector) == 1:
angle_lists = np.zeros((len(pointing_vectors), 1), dtype=np.float32)
for i in range(len(pointing_vectors)):
angle_lists[i] = np.arccos(np.dot(center_vector, pointing_vectors[i, :]))
# angle_lists[i]=np.arctan2(pointing_vectors[i,1],pointing_vectors[i,0])
else:
angle_lists = np.zeros((len(pointing_vectors), 1), dtype=np.float32)
for i in range(len(pointing_vectors)):
angle_lists[i] = np.arccos(
np.dot(center_vector[i, :], pointing_vectors[i, :])
)
# angle_lists[i]=np.arctan2(pointing_vectors[i,1],pointing_vectors[i,0])
return angle_lists
@jit(nopython=True)
def weighted_avg_and_std(values, weights):
"""
Return the weighted average and standard deviation
Parameters
----------
values : TYPE
DESCRIPTION.
weights : TYPE
DESCRIPTION.
Returns
-------
None.
"""
average = np.average(values, weights=weights)
# Fast and numerically precise:
variance = np.average((values - average) ** 2, weights=weights)
return (average, math.sqrt(variance))
# @jit
def VectorNetworkProcessEM(
sourcenum,
sinknum,
unified_points,
unified_normals,
unified_weights,
point_information,
scattering_index,
scattering_coefficient,
wavelength,
):
"""
A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly.
#
Parameters
----------
total_network : array of n*(m*3) tuples, the coordinates of each interaction point
wavelength : scaler
wavelength of interest, currently a single value (SI units)
scattering_index : n*2 array of integers, the source and sink index of each ray
#
Returns
-------
scatter_map : source_size*sink_size array
#
"""
if scattering_index.shape[0] == 0:
scatter_map = np.zeros((sourcenum, sinknum, 3), dtype="complex")
else:
wave_vector = (2 * np.pi) / wavelength
scatterdepth = scattering_index.shape[1] - 1
scatter_map = np.zeros((sourcenum, sinknum, 3, scatterdepth), dtype="complex")
for idx in range(scatterdepth):
if idx == (scatterdepth - 1):
ray_analysis_index = scattering_index[
~np.equal(scattering_index[:, idx + 1], 0), :
]
ray_components = np.zeros(
(ray_analysis_index.shape[0], 3), dtype=np.complex64
)
ray_components = ScatteringNetworkGenerator(
ray_analysis_index,
unified_points,
unified_normals,
unified_weights,
point_information,
scattering_coefficient,
wavelength,
ray_components,
)
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index[:, [0, -1]]
elif idx == 0:
ray_analysis_index = scattering_index[
np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2
]
ray_components = np.zeros(
(ray_analysis_index.shape[0], 3), dtype=np.complex64
)
ray_components = ScatteringNetworkGenerator(
ray_analysis_index,
unified_points,
unified_normals,
unified_weights,
point_information,
scattering_coefficient,
wavelength,
ray_components,
)
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index
else:
ray_analysis_index = scattering_index[
~np.equal(scattering_index[:, idx + 1], 0), 0 : idx + 2
]
ray_components = np.zeros(
(ray_analysis_index.shape[0], 3), dtype=np.complex64
)
ray_components = ScatteringNetworkGenerator(
ray_analysis_index,
unified_points,
unified_normals,
unified_weights,
point_information,
scattering_coefficient,
wavelength,
ray_components,
)
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index[:, [0, -1]]
#
# scatter_map indexing
# for source_index in range(sourcenum):
# for sink_index in range(sinknum):
# scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)])
scatter_map = scatter_net_sortEMtest(
sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx
)
return scatter_map
def VectorNetworkProcessTimeEM(
sourcenum,
sinknum,
unified_points,
unified_normals,
unified_weights,
point_information,
scattering_index,
wavelength,
):
"""
A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly.
#
Parameters
----------
total_network : array of n*(m*3) tuples, the coordinates of each interaction point
wavelength : scaler
wavelength of interest, currently a single value (SI units)
scattering_index : n*2 array of integers, the source and sink index of each ray
#
Returns
-------
scatter_map : source_size*sink_size array
#
"""
if scattering_index.shape[0] == 0:
scatter_map = np.zeros((sourcenum, sinknum, 3), dtype="complex")
else:
time_index = np.linspace(1e-8, 2e-6, 100000)
wave_vector = (2 * np.pi) / wavelength
scatterdepth = scattering_index.shape[1] - 1
scatter_map = np.zeros(
(sourcenum, sinknum, 3, len(time_index), scatterdepth), dtype="complex"
)
for idx in range(scatterdepth):
if idx == (scatterdepth - 1):
ray_analysis_index = scattering_index[
~np.equal(scattering_index[:, idx + 1], 0), :
]
ray_components = np.zeros(
(ray_analysis_index.shape[0], 3), dtype=np.complex64
)
time_components = np.zeros(
(ray_analysis_index.shape[0]), dtype=np.float32
)
ray_components, time_components = ScatteringNetworkGeneratorTime(
ray_analysis_index,
unified_points,
unified_normals,
unified_weights,
point_information,
wavelength,
ray_components,
time_components,
)
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index[:, [0, -1]]
elif idx == 0:
ray_analysis_index = scattering_index[
np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2
]
ray_components = np.zeros(
(ray_analysis_index.shape[0], 3), dtype=np.complex64
)
time_components = np.zeros(
(ray_analysis_index.shape[0]), dtype=np.float32
)
ray_components, time_components = ScatteringNetworkGeneratorTime(
ray_analysis_index,
unified_points,
unified_normals,
unified_weights,
point_information,
wavelength,
ray_components,
time_components,
)
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index
else:
ray_analysis_index = scattering_index[
~np.equal(scattering_index[:, idx + 1], 0), 0 : idx + 2
]
ray_components = np.zeros(
(ray_analysis_index.shape[0], 3), dtype=np.complex64
)
time_components = np.zeros(
(ray_analysis_index.shape[0]), dtype=np.float32
)
ray_components, time_components = ScatteringNetworkGeneratorTime(
ray_analysis_index,
unified_points,
unified_normals,
unified_weights,
point_information,
wavelength,
ray_components,
time_components,
)
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index[:, [0, -1]]
#
# scatter_map indexing
# for source_index in range(sourcenum):
# for sink_index in range(sinknum):
# scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)])
scatter_map = scatter_net_sortTimeEM(
sourcenum,
sinknum,
scatter_map,
depth_slice,
ray_components,
time_components,
time_index,
idx,
)
return scatter_map
def VectorNetworkProcessEMv2(
sourcenum,
sinknum,
unified_points,
unified_normals,
unified_weights,
point_information,
scattering_index,
wavelength,
):
"""
A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly.
#
Parameters
----------
total_network : array of n*(m*3) tuples, the coordinates of each interaction point
wavelength : scaler
wavelength of interest, currently a single value (SI units)
scattering_index : n*2 array of integers, the source and sink index of each ray
#
Returns
-------
scatter_map : source_size*sink_size array
#
"""
if scattering_index.shape[0] == 0:
scatter_map = np.zeros((sourcenum, sinknum, 3), dtype="complex")
else:
wave_vector = (2 * np.pi) / wavelength
scatterdepth = scattering_index.shape[1] - 1
scatter_map = np.zeros((sourcenum, sinknum, 3, scatterdepth), dtype="complex")
ray_components = np.zeros((scattering_index.shape[0], 3), dtype=np.complex64)
ray_components = EM.EMGPUWrapper(
scattering_index, point_information, wavelength
)
for idx in range(scatterdepth):
if idx == (scatterdepth - 1):
ray_analysis_index = scattering_index[
~np.equal(scattering_index[:, idx + 1], 0), :
]
temp_components = ray_components[
~np.equal(scattering_index[:, idx + 1], 0), :
]
# ray_components=ScatteringNetworkGenerator(ray_analysis_index,unified_points,unified_normals,unified_weights,point_information,wavelength,ray_components)
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index[:, [0, -1]]
elif idx == 0:
ray_analysis_index = scattering_index[
np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2
]
temp_components = ray_components[
~np.equal(scattering_index[:, idx + 2], 0), :
]
# ray_components=np.zeros((ray_analysis_index.shape[0],3),dtype=np.complex64)
# ray_components=EM.EMGPUWrapper(ray_analysis_index,point_information,wavelength)
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index
else:
ray_analysis_index = scattering_index[
~np.equal(scattering_index[:, idx + 1], 0), 0 : idx + 2
]
temp_components = ray_components[
~np.equal(scattering_index[:, idx + 1], 0), :
]
# ray_components=EM.EMGPUWrapper(ray_analysis_index,point_information,wavelength)
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index[:, [0, -1]]
#
# scatter_map indexing
# for source_index in range(sourcenum):
# for sink_index in range(sinknum):
# scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)])
scatter_map = scatter_net_sortEM(
sourcenum, sinknum, scatter_map, depth_slice, temp_components, idx
)
return scatter_map
# @jit
def VectorNetworkProcessv2(
sources, sinks, scattering_points, scattering_index, wavelength
):
"""
A more efficienct version of BaseNetworkProcess, using the vectorised path length function to process the total_network array more efficienctly.
#
Parameters
----------
total_network : array of n*(m*3) tuples, the coordinates of each interaction point
wavelength : scaler
wavelength of interest, currently a single value (SI units)
scattering_index : n*2 array of integers, the source and sink index of each ray
#
Returns
-------
scatter_map : source_size*sink_size array
#
"""
if scattering_index.shape[0] == 0:
sourcenum = np.int(np.nanmax(scattering_index[:, 0])) + 1
sinknum = np.int(np.nanmax(scattering_index[:, 1])) + 1
scatter_map = np.zeros((sourcenum, sinknum, 1), dtype="complex")
else:
unified_model = np.append(
np.append(sources.astype(np.float32), sinks.astype(np.float32), axis=0),
scattering_points.astype(np.float32),
axis=0,
)
wave_vector = (2 * np.pi) / wavelength
sourcenum = sources.shape[0]
sinknum = sinks.shape[0]
scatterdepth = scattering_index.shape[1] - 1
scatter_map = np.zeros((sourcenum, sinknum, scatterdepth), dtype="complex")
for idx in range(scatterdepth):
if idx == (scatterdepth - 1):
ray_analysis_index = scattering_index[
~np.equal(scattering_index[:, idx + 1], 0), :
]
if idx == 0:
paths = PathLength(
np.append(
unified_model[ray_analysis_index[:, 0], :] - 1,
unified_model[ray_analysis_index[:, 1] - 1, :],
axis=1,
)
)
elif idx == 1:
paths = PathLength(
np.append(
np.append(
unified_model[ray_analysis_index[:, 0] - 1, :],
unified_model[ray_analysis_index[:, 1] - 1, :],
axis=1,
),
unified_model[ray_analysis_index[:, 2] - 1, :],
axis=1,
)
)
elif idx == 2:
paths = PathLength(
np.append(
np.append(
np.append(
unified_model[ray_analysis_index[:, 0] - 1, :],
unified_model[ray_analysis_index[:, 1] - 1, :],
axis=1,
),
unified_model[ray_analysis_index[:, 2] - 1, :],
axis=1,
),
unified_model[ray_analysis_index[:, 3] - 1, :],
axis=1,
)
)
# path_loss=((wavelength/(4*np.pi*paths))**2)*np.exp(paths*wave_vector*1j)
path_loss = wavelength / (4 * np.pi * paths)
ray_components = path_loss * (np.exp(paths * wave_vector * 1j))
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index[:, [0, -1]]
elif idx == 0:
ray_analysis_index = scattering_index[
np.equal(scattering_index[:, idx + 2], 0), 0 : idx + 2
]
paths = PathLength(
np.append(
unified_model[ray_analysis_index[:, 0] - 1, :],
unified_model[ray_analysis_index[:, 1] - 1, :],
axis=1,
)
)
# path_loss=((wavelength/(4*np.pi*paths))**2)*np.exp(paths*wave_vector*1j)
path_loss = wavelength / (4 * np.pi * paths)
ray_components = path_loss * (np.exp(paths * wave_vector * 1j))
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index
else:
ray_analysis_index = scattering_index[
~np.equal(scattering_index[:, idx + 1], 0), :
]
if idx == 0:
paths = PathLength(
np.append(
unified_model[ray_analysis_index[:, 0], :] - 1,
unified_model[ray_analysis_index[:, 1] - 1, :],
axis=1,
)
)
elif idx == 1:
paths = PathLength(
np.append(
np.append(
unified_model[ray_analysis_index[:, 0] - 1, :],
unified_model[ray_analysis_index[:, 1] - 1, :],
axis=1,
),
unified_model[ray_analysis_index[:, 2] - 1, :],
axis=1,
)
)
elif idx == 2:
paths = PathLength(
np.append(
np.append(
np.append(
unified_model[ray_analysis_index[:, 0] - 1, :],
unified_model[ray_analysis_index[:, 1] - 1, :],
axis=1,
),
unified_model[ray_analysis_index[:, 2] - 1, :],
axis=1,
),
unified_model[ray_analysis_index[:, 3] - 1, :],
axis=1,
)
)
# path_loss=((wavelength/(4*np.pi*paths))**2)*np.exp(paths*wave_vector*1j)
path_loss = wavelength / (4 * np.pi * paths)
ray_components = path_loss * (np.exp(paths * wave_vector * 1j))
# path_loss=((1j*wave_vector)/(4*np.pi))*(np.exp(paths*wave_vector*1j)/paths)
depth_slice = ray_analysis_index[:, [0, -1]]
#
# scatter_map indexing
# for source_index in range(sourcenum):
# for sink_index in range(sinknum):
# scatter_map[source_index,sink_index,idx]=np.sum(ray_components[(depth_slice[:,0]-1==0)&(depth_slice[:,1]-sourcenum-1==0)])
scatter_map = scatter_net_sorttest(
sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx
)
return scatter_map
@njit(parallel=True)
def scatter_net_sort(sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx):
# simplified scattering sort to create a scattering network using Numbas prange and loop unrolling.
for sink_index in range(sinknum):
for source_index in range(sourcenum):
scatter_map[source_index, sink_index, idx] = np.nansum(
ray_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index)
]
)
return scatter_map
def scatter_net_sorttest(
sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx
):
# simplified scattering sort to create a scattering network using Numbas prange and loop unrolling.
for sink_index in range(sinknum):
for source_index in range(sourcenum):
scatter_map[source_index, sink_index, idx] = np.nansum(
ray_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index)
]
)
return scatter_map
def scatter_net_sortEMtest(
sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx
):
# simplified scattering sort to create a scattering network using Numbas prange and loop unrolling.
for sink_index in range(sinknum):
for source_index in range(sourcenum):
scatter_map[source_index, sink_index, 0, idx] = np.nansum(
ray_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index),
0,
]
)
scatter_map[source_index, sink_index, 1, idx] = np.nansum(
ray_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index),
1,
]
)
scatter_map[source_index, sink_index, 2, idx] = np.nansum(
ray_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index),
2,
]
)
return scatter_map
@njit(parallel=True)
def scatter_net_sortEM(
sourcenum, sinknum, scatter_map, depth_slice, ray_components, idx
):
# simplified scattering sort to create a scattering network using Numbas prange and loop unrolling.
for sink_index in range(sinknum):
for source_index in range(sourcenum):
scatter_map[source_index, sink_index, 0, idx] = np.sum(
ray_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index),
0,
]
)
scatter_map[source_index, sink_index, 1, idx] = np.sum(
ray_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index),
1,
]
)
scatter_map[source_index, sink_index, 2, idx] = np.sum(
ray_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index),
2,
]
)
return scatter_map
# @njit(parallel=True)
def scatter_net_sortTimeEM(
sourcenum,
sinknum,
scatter_map,
depth_slice,
ray_components,
time_components,
time_index,
idx,
):
# simplified scattering sort to create a scattering network using Numbas prange and loop unrolling.
for sink_index in range(sinknum):
for source_index in range(sourcenum):
time_address = np.digitize(
time_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index)
],
time_index,
)
scatter_map[source_index, sink_index, 0, time_address, idx] = np.sum(
ray_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index),
0,
]
)
scatter_map[source_index, sink_index, 1, time_address, idx] = np.sum(
ray_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index),
1,
]
)
scatter_map[source_index, sink_index, 2, time_address, idx] = np.sum(
ray_components[
(depth_slice[:, 0] - 1 == source_index)
& (depth_slice[:, 1] - sourcenum - 1 == sink_index),
2,
]
)
return scatter_map
def BaseNetworkProcess(total_network, wavelength, scattering_index):
# take the total_network matrix, and work out the path length of each connection, and hence superposition
wave_vector = (2 * np.pi) / wavelength
sourcenum = np.int(np.nanmax(scattering_index[:, 0])) + 1
sinknum = np.int(np.nanmax(scattering_index[:, 1])) + 1
scatterdepth = int((len(total_network[1, :]) - 3) / 3)
scatter_map = np.zeros((sourcenum, sinknum, scatterdepth), dtype="complex")
depth_num = 0
for idx in range(scatterdepth):
if idx == (scatterdepth - 1):
if scatterdepth == 1:
start_row = 0
end_row = len(total_network[:, 1]) - 1
else:
start_row = (
np.max(np.where((np.isnan(total_network[:, 6 + ((idx - 1) * 3)]))))
+ 1
)
end_row = len(total_network[:, 1]) - 1
elif idx == 0:
start_row = 0
end_row = np.max(np.where((np.isnan(total_network[:, 6 + (idx * 3)]))))
else:
start_row = (
np.max(np.where((np.isnan(total_network[:, 3 + (idx * 3)])))) + 1
)
end_row = np.max(np.where((np.isnan(total_network[:, 6 + ((idx) * 3)]))))
for idr in range(start_row, end_row + 1):
source_index = np.int(scattering_index[idr, 0])
sink_index = np.int(scattering_index[idr, 1])
path_distance = np.zeros((1))
if idx == 0:
path_distance = (
distance.euclidean(
total_network[idr, np.asarray([0, 1, 2]) + ((0) * 3)],
total_network[idr, np.asarray([3, 4, 5]) + ((0) * 3)],
)
+ path_distance
)
else:
for idy in range(idx + 1):
path_distance = (
distance.euclidean(
total_network[idr, np.asarray([0, 1, 2]) + ((idy) * 3)],
total_network[idr, np.asarray([3, 4, 5]) + ((idy) * 3)],
)
+ path_distance
)
path_loss = (4 * np.pi * path_distance) / wavelength
scatter_map[source_index, sink_index, idx] = (1 / path_loss) * np.exp(
-path_distance * wave_vector * 1j
) + scatter_map[source_index, sink_index, idx]
return scatter_map
def charge_rays_environment1Dv2(sources, sinks, environment_points, point_indexing):
"""
Generate Ray Payload from numpy arrays of sources, sinks, and environment points, in a 1 dimensional array to match the point_indexing demands
Parameters
----------
sources : TYPE
DESCRIPTION.
sinks : TYPE
DESCRIPTION.
environment_points : numpy array of xyz
points for scattering in environment
point_indexing : (n*m) by 2 array, 0 has the source index, and 1 has the sink index, otherwise nans'
Returns
-------
temp_ray_payload : array of ray_t type
ray payload to be sent to GPU
"""
unified_model = np.append(
np.append(sources, sinks, axis=0), environment_points, axis=0
)
temp_ray_payload = np.empty(point_indexing.shape[0], dtype=base_types.ray_t)
local_sources = unified_model[point_indexing[:, 0] - 1, :]
directions = np.zeros(
(len(unified_model[point_indexing[:, 1] - 1, :]), 3), dtype=np.float32
)
norm_length = np.zeros(
(len(unified_model[point_indexing[:, 1] - 1, :]), 1), dtype=np.float32
)
directions, norm_length = math_functions.calc_dv_norm(
unified_model[point_indexing[:, -2] - 1, :],
unified_model[point_indexing[:, -1] - 1, :],
directions,
norm_length,
)
temp_ray_payload[:]["ox"] = unified_model[point_indexing[:, 0] - 1, 0]
temp_ray_payload[:]["oy"] = unified_model[point_indexing[:, 0] - 1, 1]
temp_ray_payload[:]["oz"] = unified_model[point_indexing[:, 0] - 1, 2]
temp_ray_payload[:]["dx"] = directions[:, 0]
temp_ray_payload[:]["dy"] = directions[:, 1]
temp_ray_payload[:]["dz"] = directions[:, 2]
# temp_ray_payload[:]['tx']=unified_model[point_indexing[:,1]-1,0]
# temp_ray_payload[:]['ty']=unified_model[point_indexing[:,1]-1,1]
# temp_ray_payload[:]['tz']=unified_model[point_indexing[:,1]-1,2]
temp_ray_payload[:]["dist"] = norm_length[:, 0]
temp_ray_payload[:]["intersect"] = False
return temp_ray_payload
def rayHits1Dv2(ray_payload, point_indexing, sink_index):
"""
filtering process for each raycasting stage, seprating rays which hit sinks from non-terminating rays.
Parameters
----------
ray_payload : TYPE
DESCRIPTION.
point_indexing : TYPE
DESCRIPTION.
scatter_inc : TYPE, optional
DESCRIPTION. The default is 1.
Returns
-------
filtered_network : TYPE
DESCRIPTION.
final_network : TYPE
DESCRIPTION.
filtered_index : TYPE
DESCRIPTION.
final_index : TYPE
DESCRIPTION.
"""
hitmap = ~ray_payload[:]["intersect"]
final_index = point_indexing[
np.all(np.array([hitmap, np.isin(point_indexing[:, -1], sink_index)]), axis=0),
:,
]
filtered_index = point_indexing[
np.all(np.array([hitmap, ~np.isin(point_indexing[:, -1], sink_index)]), axis=0),
:,
]
return filtered_index, final_index
def launchRaycaster1Dv2(
sources, sinks, scattering_points, io_indexing, environment_local
):
# cuda.select_device(0)
start = timer()
first_ray_payload = charge_rays_environment1Dv2(
sources, sinks, scattering_points, io_indexing
)
prep_dt = timer() - start
raystart = timer()
ray_num = len(first_ray_payload)
tri_num = len(environment_local)
max_tris = 2 ** 20
triangle_chunk = np.linspace(
0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32
)
chunk_size = 2 ** 10
threads_in_block = 1024
ray_chunks = np.linspace(
0, ray_num, math.ceil(ray_num / chunk_size) + 1, dtype=np.int32
)
# for idx in range(len(triangle_chunk)-1):
d_environment = cuda.device_array(len(environment_local), dtype=base_types.triangle_t)
cuda.to_device(environment_local, to=d_environment)
for n in range(len(ray_chunks) - 1):
chunk_payload = first_ray_payload[ray_chunks[n] : ray_chunks[n + 1]]
chunk_ray_size = len(chunk_payload)
d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=base_types.ray_t)
cuda.to_device(chunk_payload, to=d_chunk_payload)
#
# ray_temp=np.empty((chunk_ray_size),dtype=np.bool)
# ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']
# distance_temp=np.empty((chunk_ray_size),dtype=np.float32)
# distance_temp[:]=math.inf
# dist_list=cuda.to_device(distance_temp)
# d_ray_flag=cuda.to_device(ray_temp)
# Here, we choose the granularity of the threading on our device. We want
# to try to cover the entire workload of rays and targets with simulatenous threads, so we'll
# choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads
grids = math.ceil(chunk_ray_size / threads_in_block)
threads = min(ray_chunks[1] - ray_chunks[0], threads_in_block)
# Execute the kernel
# cuda.profile_start()
# kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag)
kernel1Dv2[grids, threads](d_chunk_payload, d_environment)
# cuda.profile_stop()
# distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host()
first_ray_payload[
ray_chunks[n] : ray_chunks[n + 1]
] = d_chunk_payload.copy_to_host()
# first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host()
# first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host()
# cuda.close()
kernel_dt = timer() - raystart
start = timer()
RAYS_CAST = ray_num
sink_index = np.arange(
sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0]
).reshape(sinks.shape[0], 1)
filtered_index, final_index = rayHits1Dv2(
first_ray_payload, io_indexing, sink_index
)
mem_dt = timer() - start
# print("First Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) )
return filtered_index, final_index, RAYS_CAST
def launchRaycaster1Dv3(
sources, sinks, scattering_points, io_indexing, environment_local
):
# cuda.select_device(0)
start = timer()
first_ray_payload = charge_rays_environment1Dv2(
sources, sinks, scattering_points, io_indexing
)
prep_dt = timer() - start
raystart = timer()
ray_num = len(first_ray_payload)
tri_num = len(environment_local)
# print('Launch Raycaster Triangles ', len(environment_local))
max_tris = 2 ** 20
triangle_chunk = np.linspace(
0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32
)
chunk_size = 2 ** 18
threads_in_block = 256
# ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32)
# for idx in range(len(triangle_chunk)-1):
d_environment = cuda.device_array(len(environment_local), dtype=base_types.triangle_t)
cuda.to_device(environment_local, to=d_environment)
d_chunk_payload = cuda.device_array([ray_num], dtype=base_types.ray_t)
cuda.to_device(first_ray_payload, to=d_chunk_payload)
#
# ray_temp=np.empty((chunk_ray_size),dtype=np.bool)
# ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']
# distance_temp=np.empty((chunk_ray_size),dtype=np.float32)
# distance_temp[:]=math.inf
# dist_list=cuda.to_device(distance_temp)
# d_ray_flag=cuda.to_device(ray_temp)
# Here, we choose the granularity of the threading on our device. We want
# to try to cover the entire workload of rays and targets with simulatenous threads, so we'll
# choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads
grids = math.ceil(ray_num / threads_in_block)
threads = threads_in_block
# Execute the kernel
# cuda.profile_start()
# kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag)
kernel1Dv3[grids, threads](d_chunk_payload, d_environment)
# cuda.profile_stop()
# distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host()
first_ray_payload = d_chunk_payload.copy_to_host()
# first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host()
# first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host()
# cuda.close()
kernel_dt = timer() - raystart
start = timer()
RAYS_CAST = ray_num
sink_index = np.arange(
sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0]
).reshape(sinks.shape[0], 1)
filtered_index, final_index = rayHits1Dv2(
first_ray_payload, io_indexing, sink_index
)
mem_dt = timer() - start
# deallocate memory on gpu
#ctx = cuda.current_context()
#deallocs = ctx.deallocations
#deallocs.clear()
# print("First Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) )
return filtered_index, final_index, RAYS_CAST
def chunkingRaycaster1Dv2(
sources, sinks, scattering_points, filtered_index, environment_local, terminate_flag
):
# cuda.select_device(0)
start = timer()
sink_index = np.arange(
sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0]
).reshape(sinks.shape[0], 1)
scattering_point_index = np.arange(
np.max(sink_index) + 1, np.max(sink_index) + 1 + scattering_points.shape[0]
).reshape(scattering_points.shape[0], 1)
if not terminate_flag:
target_indexing = create_model_index(
filtered_index, sink_index, scattering_point_index
)
else:
target_indexing = create_model_index(
filtered_index, sink_index, np.empty((0, 0), dtype=np.int32)
) # only target rays at sinks
second_ray_payload = charge_rays_environment1Dv2(
sources, sinks, scattering_points, target_indexing
)
prep_dt = timer() - start
raystart = timer()
ray_num = len(second_ray_payload)
tri_num = len(environment_local)
max_tris = 2 ** 18
triangle_chunk = np.linspace(
0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32
)
# Create a container for the pixel RGBA information of our image
chunk_size = 2 ** 10
threads_in_block = 1024
ray_chunks = np.linspace(
0, ray_num, math.ceil(ray_num / chunk_size) + 1, dtype=np.int32
)
# for idx in range(len(triangle_chunk)-1):
d_environment = cuda.device_array(len(environment_local), dtype=base_types.triangle_t)
cuda.to_device(environment_local, to=d_environment)
for n in range(len(ray_chunks) - 1):
chunk_payload = second_ray_payload[ray_chunks[n] : ray_chunks[n + 1]]
chunk_ray_size = len(chunk_payload)
# distance_temp=np.empty((chunk_ray_size),dtype=np.float32)
# distance_temp[:]=math.inf
# ray_temp=np.empty((chunk_ray_size),dtype=np.bool)
# ray_temp[:]=False
# dist_list=cuda.to_device(distance_temp)
# d_ray_flag=cuda.to_device(ray_temp)
# d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=ray_t)
d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=base_types.ray_t)
cuda.to_device(chunk_payload, to=d_chunk_payload)
# Here, we choose the granularity of the threading on our device. We want
# to try to cover the entire workload of rays and targets with simulatenous threads, so we'll
# choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads
grids = math.ceil(chunk_ray_size / threads_in_block)
threads = min(ray_chunks[1] - ray_chunks[0], threads_in_block)
# Execute the kernel
# cuda.profile_start()
kernel1Dv2[grids, threads](d_chunk_payload, d_environment)
# cuda.profile_stop()
# distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host()
# second_ray_payload[ray_chunks[n]:ray_chunks[n+1]]=d_chunk_payload.copy_to_host()
# first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]=d_chunk_payload.copy_to_host()
second_ray_payload[
ray_chunks[n] : ray_chunks[n + 1]
] = d_chunk_payload.copy_to_host()
# cuda.close()
kernel_dt = timer() - raystart
start = timer()
RAYS_CAST = ray_num
filtered_index2, final_index2 = rayHits1Dv2(
second_ray_payload, target_indexing, sink_index
)
mem_dt = timer() - start
# print("Second Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) )
return filtered_index2, final_index2, RAYS_CAST
def chunkingRaycaster1Dv3(
sources, sinks, scattering_points, filtered_index, environment_local, terminate_flag
):
start = timer()
free_mem, total_mem = cuda.current_context().get_memory_info()
max_mem = np.ceil(free_mem * 0.5).astype(np.int64)
ray_limit = (
np.floor(np.floor((max_mem - environment_local.nbytes) / base_types.ray_t.size) / 1e7)
* 1e7
).astype(np.int64)
sink_index = np.arange(
sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0]
).reshape(sinks.shape[0], 1)
scattering_point_index = np.arange(
np.max(sink_index) + 1, np.max(sink_index) + 1 + scattering_points.shape[0]
).reshape(scattering_points.shape[0], 1)
prep_dt = timer() - start
raystart = timer()
RAYS_CAST = 0
if not terminate_flag:
target_indexing = create_model_index(
filtered_index, sink_index, scattering_point_index
)
else:
if (filtered_index.shape[0]*sink_index.shape[0])<ray_limit:
target_indexing = create_model_index(
filtered_index, sink_index, np.empty((0, 0), dtype=np.int32)
) # only target rays at sinks
else:
# the rays must fit in GPU memory, aim for no more than 80% utilisation
# establish memory limits
sub_filtered_index = np.array_split(
filtered_index, np.ceil(filtered_index.shape[0]*sink_index.shape[0] / ray_limit).astype(int)
)
chunknum = len(sub_filtered_index)
filtered_index2 = np.empty((0, filtered_index.shape[1]+1), dtype=np.int32)
final_index2 = np.empty((0, filtered_index.shape[1]+1), dtype=np.int32)
# print('chunking total of ',target_indexing.shape[0],' rays in ',chunknum,' batches')
for chunkindex in range(chunknum):
sub_target=create_model_index(
sub_filtered_index[chunkindex], sink_index, np.empty((0, 0), dtype=np.int32)
)
# cycle the raycaster over the sub arrays
threads_in_block = 256
second_ray_payload = charge_rays_environment1Dv2(
sources, sinks, scattering_points, sub_target
)
# ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32)
# for idx in range(len(triangle_chunk)-1):
d_environment = cuda.device_array(
len(environment_local), dtype=base_types.triangle_t
)
cuda.to_device(environment_local, to=d_environment)
d_chunk_payload = cuda.device_array(
[second_ray_payload.shape[0]], dtype=base_types.ray_t
)
cuda.to_device(second_ray_payload, to=d_chunk_payload)
#
# ray_temp=np.empty((chunk_ray_size),dtype=np.bool)
# ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']
# distance_temp=np.empty((chunk_ray_size),dtype=np.float32)
# distance_temp[:]=math.inf
# dist_list=cuda.to_device(distance_temp)
# d_ray_flag=cuda.to_device(ray_temp)
# Here, we choose the granularity of the threading on our device. We want
# to try to cover the entire workload of rays and targets with simulatenous threads, so we'll
# choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads
grids = math.ceil(second_ray_payload.shape[0] / threads_in_block)
threads = threads_in_block
# Execute the kernel
# cuda.profile_start()
# kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag)
kernel1Dv3[grids, threads](d_chunk_payload, d_environment)
# cuda.profile_stop()
# distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host()
second_ray_payload = d_chunk_payload.copy_to_host()
# first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host()
# first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host()
# cuda.close()
kernel_dt = timer() - raystart
start = timer()
RAYS_CAST += second_ray_payload.shape[0]
temp_filtered_index2, temp_final_index2 = rayHits1Dv2(
second_ray_payload, sub_target, sink_index
)
mem_dt = timer() - start
filtered_index2 = np.append(filtered_index2, temp_filtered_index2, axis=0)
final_index2 = np.append(final_index2, temp_final_index2, axis=0)
# deallocate memory on gpu
# ctx = cuda.current_context()
# ctx.reset()
if target_indexing.shape[0] >= ray_limit:
# need to split the array and process seperatly
sub_target = np.array_split(
target_indexing, np.ceil(target_indexing.shape[0] / ray_limit).astype(int)
)
chunknum = len(sub_target)
filtered_index2 = np.empty((0, target_indexing.shape[1]), dtype=np.int32)
final_index2 = np.empty((0, target_indexing.shape[1]), dtype=np.int32)
# print('chunking total of ',target_indexing.shape[0],' rays in ',chunknum,' batches')
for chunkindex in range(chunknum):
# cycle the raycaster over the sub arrays
threads_in_block = 256
second_ray_payload = charge_rays_environment1Dv2(
sources, sinks, scattering_points, sub_target[chunkindex]
)
# ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32)
# for idx in range(len(triangle_chunk)-1):
d_environment = cuda.device_array(
len(environment_local), dtype=base_types.triangle_t
)
cuda.to_device(environment_local, to=d_environment)
d_chunk_payload = cuda.device_array(
[second_ray_payload.shape[0]], dtype=base_types.ray_t
)
cuda.to_device(second_ray_payload, to=d_chunk_payload)
#
# ray_temp=np.empty((chunk_ray_size),dtype=np.bool)
# ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']
# distance_temp=np.empty((chunk_ray_size),dtype=np.float32)
# distance_temp[:]=math.inf
# dist_list=cuda.to_device(distance_temp)
# d_ray_flag=cuda.to_device(ray_temp)
# Here, we choose the granularity of the threading on our device. We want
# to try to cover the entire workload of rays and targets with simulatenous threads, so we'll
# choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads
grids = math.ceil(second_ray_payload.shape[0] / threads_in_block)
threads = threads_in_block
# Execute the kernel
# cuda.profile_start()
# kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag)
kernel1Dv3[grids, threads](d_chunk_payload, d_environment)
# cuda.profile_stop()
# distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host()
second_ray_payload = d_chunk_payload.copy_to_host()
# first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host()
# first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host()
# cuda.close()
kernel_dt = timer() - raystart
start = timer()
RAYS_CAST += second_ray_payload.shape[0]
temp_filtered_index2, temp_final_index2 = rayHits1Dv2(
second_ray_payload, sub_target[chunkindex], sink_index
)
mem_dt = timer() - start
filtered_index2 = np.append(filtered_index2, temp_filtered_index2, axis=0)
final_index2 = np.append(final_index2, temp_final_index2, axis=0)
# deallocate memory on gpu
#ctx = cuda.current_context()
#ctx.reset()
else:
second_ray_payload = charge_rays_environment1Dv2(
sources, sinks, scattering_points, target_indexing
)
# print('Chunking Raycaster Triangles ', len(environment_local))
# max_tris=2**18
# triangle_chunk=np.linspace(0,tri_num,math.ceil(tri_num/max_tris)+1,dtype=np.int32)
# chunk_size=2**11
threads_in_block = 256
# ray_chunks=np.linspace(0,ray_num,math.ceil(ray_num/chunk_size)+1,dtype=np.int32)
# for idx in range(len(triangle_chunk)-1):
d_environment = cuda.device_array(len(environment_local), dtype=base_types.triangle_t)
cuda.to_device(environment_local, to=d_environment)
d_chunk_payload = cuda.device_array(
[second_ray_payload.shape[0]], dtype=base_types.ray_t
)
cuda.to_device(second_ray_payload, to=d_chunk_payload)
#
# ray_temp=np.empty((chunk_ray_size),dtype=np.bool)
# ray_temp[:]=first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']
# distance_temp=np.empty((chunk_ray_size),dtype=np.float32)
# distance_temp[:]=math.inf
# dist_list=cuda.to_device(distance_temp)
# d_ray_flag=cuda.to_device(ray_temp)
# Here, we choose the granularity of the threading on our device. We want
# to try to cover the entire workload of rays and targets with simulatenous threads, so we'll
# choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads
grids = math.ceil(second_ray_payload.shape[0] / threads_in_block)
threads = threads_in_block
# Execute the kernel
# cuda.profile_start()
# kernel1Dv2[grids, threads](d_chunk_payload,d_environment,d_ray_flag)
kernel1Dv3[grids, threads](d_chunk_payload, d_environment)
# cuda.profile_stop()
# distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host()
second_ray_payload = d_chunk_payload.copy_to_host()
# first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['intersect']=d_chunk_payload['intersect'].copy_to_host()
# first_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=d_chunk_payload['dist'].copy_to_host()
# cuda.close()
kernel_dt = timer() - raystart
start = timer()
RAYS_CAST += second_ray_payload.shape[0]
filtered_index2, final_index2 = rayHits1Dv2(
second_ray_payload, target_indexing, sink_index
)
mem_dt = timer() - start
# deallocate memory on gpu
#ctx = cuda.current_context()
#deallocs = ctx.deallocations
#deallocs.clear()
# print("Second Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(prep_dt,kernel_dt,mem_dt) )
return filtered_index2, final_index2, RAYS_CAST
def charge_shadow_rays1D(sources, sinks, point_indexing, target_indexing):
"""
Generate Ray Payload from numpy arrays of sources, sinks, and environment points, in a 1 dimensional array
Parameters
----------
sources : TYPE
DESCRIPTION.
sinks : TYPE
DESCRIPTION.
environment_points : numpy array of xyz
points for scattering in environment
point_indexing : (n*m) by 2 array, 0 has the source index, and 1 has the sink index, otherwise nans'
Returns
-------
temp_ray_payload : array of ray_t type
ray payload to be sent to GPU
"""
idx = 0
targets = sinks
target_num = len(targets)
source_num, origin_width = np.shape(sources)
origins = np.zeros((source_num * target_num, origin_width), dtype=np.float32)
source_sink_index = np.full((source_num * target_num, 2), np.nan, dtype=np.int32)
temp_ray_payload = np.empty([source_num * target_num], dtype=base_types.ray_t)
temp_ray_payload, origins, source_sink_index = ray_charge_core_final_vector(
temp_ray_payload,
origins,
source_sink_index,
sources,
targets,
point_indexing,
target_indexing,
)
return temp_ray_payload, origins, source_sink_index
def ray_charge_core_final_vector(
temp_ray_payload,
origins,
source_sink_index,
sources,
targets,
point_indexing,
target_indexing,
):
# using array allocation
# idx=0
target_num = len(targets)
source_num, origin_width = np.shape(sources)
directions = np.zeros((len(targets), 3), dtype=np.float32)
local_sources = np.zeros((len(targets), origin_width), dtype=np.float32)
norm_length = np.zeros((len(targets), 1), dtype=np.float32)
source_chunking = np.linspace(
0,
len(temp_ray_payload),
math.ceil(len(temp_ray_payload) / target_num) + 1,
dtype=np.int32,
)
for n in range(len(source_chunking) - 1):
local_sources[:, -origin_width:] = sources[n, :]
directions, norm_length = math_functions.calc_dv_norm(
local_sources[:, -3:], targets, directions, norm_length
)
temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][
"ox"
] = local_sources[:, -3]
temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][
"oy"
] = local_sources[:, -2]
temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][
"oz"
] = local_sources[:, -1]
temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][
"dx"
] = directions[:, 0]
temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][
"dy"
] = directions[:, 1]
temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][
"dz"
] = directions[:, 2]
# temp_ray_payload[source_chunking[n]:source_chunking[n+1]]['tx']=targets[:,0]
# temp_ray_payload[source_chunking[n]:source_chunking[n+1]]['ty']=targets[:,1]
# temp_ray_payload[source_chunking[n]:source_chunking[n+1]]['tz']=targets[:,2]
temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][
"dist"
] = norm_length[:, 0]
temp_ray_payload[source_chunking[n] : source_chunking[n + 1]][
"intersect"
] = False
origins[source_chunking[n] : source_chunking[n + 1], -origin_width:] = sources[
n, :
]
source_sink_index[
source_chunking[n] : source_chunking[n + 1], 0
] = point_indexing[n, 0]
source_sink_index[
source_chunking[n] : source_chunking[n + 1], 1
] = target_indexing.ravel()
return temp_ray_payload, origins, source_sink_index
def shadowRaycaster(filtered_network, sinks, filtered_index, environment_local):
start = timer()
target_indexing = np.full((len(sinks), 1), np.nan, dtype=np.float32).ravel()
target_indexing[:] = range(len(sinks))
second_ray_payload, second_origin, io_index2 = charge_shadow_rays1D(
filtered_network, sinks, filtered_index, target_indexing
)
prep_dt = timer() - start
raystart = timer()
ray_num = len(second_ray_payload)
tri_num = len(environment_local)
max_tris = 2 ** 16
triangle_chunk = np.linspace(
0, tri_num, math.ceil(tri_num / max_tris) + 1, dtype=np.int32
)
# Create a container for the pixel RGBA information of our image
chunk_size = 2 ** 21
threads_in_block = 1024
ray_chunks = np.linspace(
0, ray_num, math.ceil(ray_num / chunk_size) + 1, dtype=np.int32
)
# sync_stream=cuda.stream()
for idx in range(len(triangle_chunk) - 1):
d_environment = cuda.device_array(
len(environment_local[triangle_chunk[idx] : triangle_chunk[idx + 1]]),
dtype=base_types.triangle_t,
)
cuda.to_device(
environment_local[triangle_chunk[idx] : triangle_chunk[idx + 1]],
to=d_environment,
)
for n in range(len(ray_chunks) - 1):
chunk_payload = second_ray_payload[ray_chunks[n] : ray_chunks[n + 1]]
chunk_ray_size = len(chunk_payload)
distance_temp = np.empty((chunk_ray_size), dtype=np.float32)
distance_temp[:] = math.inf
ray_temp = np.empty((chunk_ray_size), dtype=np.bool)
ray_temp[:] = False
dist_list = cuda.to_device(distance_temp)
d_ray_flag = cuda.to_device(ray_temp)
# d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=ray_t)
d_chunk_payload = cuda.device_array([chunk_ray_size], dtype=base_types.ray_t)
cuda.to_device(chunk_payload, to=d_chunk_payload)
# Here, we choose the granularity of the threading on our device. We want
# to try to cover the entire workload of rays and targets with simulatenous threads, so we'll
# choose a grid of (source_num/16. target_num/16) blocks, each with (16, 16) threads
grids = math.ceil(chunk_ray_size / threads_in_block)
threads = min(ray_chunks[1] - ray_chunks[0], threads_in_block)
# Execute the kernel
# cuda.profile_start()
kernel1D[grids, threads](
d_chunk_payload, d_environment, d_ray_flag, dist_list
)
# second_ray_payload[ray_chunks[n]:ray_chunks[n+1]]=d_chunk_payload.copy_to_host()
second_ray_payload[ray_chunks[n] : ray_chunks[n + 1]][
"intersect"
] = d_ray_flag.copy_to_host()
# second_ray_payload[ray_chunks[n]:ray_chunks[n+1]]['dist']=dist_list.copy_to_host()
# cuda.profile_stop()
# distmap[source_chunks[n]:source_chunks[n+1],target_chunks[m]:target_chunks[m+1]] = d_distmap_chunked.copy_to_host()
# sync_stream.synchronize()
kernel_dt = timer() - raystart
menstart = timer()
RAYS_CAST = ray_num
filtered_network2, final_network2, filtered_index2, final_index2 = rayHits1D(
second_ray_payload, io_index2, scatter_inc=1, origins_local=second_origin
)
mem_dt = timer() - menstart
print(
"Stage: Prep {:3.1f} s, Raycasting {:3.1f} s, Path Processing {:3.1f} s".format(
prep_dt, kernel_dt, mem_dt
)
)
return filtered_network2, final_network2, filtered_index2, final_index2, RAYS_CAST
def rayHits1D(
ray_payload, point_indexing, scatter_inc=1, origins_local=None, initial_network=None
):
"""
filtering process for each raycasting stage, seprating rays which hit sinks from non-terminating rays.
----------
ray_payload : TYPE
DESCRIPTION.
point_indexing : TYPE
DESCRIPTION.
scatter_inc : TYPE, optional
DESCRIPTION. The default is 1.
origins_local : TYPE, optional
DESCRIPTION. The default is None.
initial_network : TYPE, optional
DESCRIPTION. The default is None.
Returns
-------
filtered_network : TYPE
DESCRIPTION.
final_network : TYPE
DESCRIPTION.
filtered_index : TYPE
DESCRIPTION.
final_index : TYPE
DESCRIPTION.
"""
hitmap = ~ray_payload[:]["intersect"]
filter_vector = np.ravel(
np.nonzero(np.all(np.asarray([hitmap, np.isnan(point_indexing[:, 1])]), axis=0))
)
output_vector = np.ravel(
np.nonzero(
np.all(np.asarray([hitmap, ~np.isnan(point_indexing[:, 1])]), axis=0)
)
)
#
final_network = np.zeros(
(np.max(np.shape(output_vector)), 3 + (scatter_inc * 3)), dtype=np.float32
)
final_network[:, 0:-3] = origins_local[output_vector, :]
final_network[:, -3] = ray_payload[output_vector]["tx"]
final_network[:, -2] = ray_payload[output_vector]["ty"]
final_network[:, -1] = ray_payload[output_vector]["tz"]
#
filtered_network = np.zeros(
(np.max(np.shape(filter_vector)), 3 + (scatter_inc * 3)), dtype=np.float32
)
filtered_network[:, 0:-3] = origins_local[filter_vector, :]
filtered_network[:, -3] = ray_payload[filter_vector]["tx"]
filtered_network[:, -2] = ray_payload[filter_vector]["ty"]
filtered_network[:, -1] = ray_payload[filter_vector]["tz"]
#
# final_network=hit_network[output_vector,:]
# filtered_network=np.delete(hit_network,output_vector,axis=0)
final_index = point_indexing[output_vector, :]
filtered_index = point_indexing[filter_vector, :]
return filtered_network, final_network, filtered_index, final_index
def rotate_view(vis):
ctr = vis.get_view_control()
ctr.rotate(2.0, 0.0)
return False
# @njit
def create_model_index(source_index, sink_index, scattering_point_index):
"""
Create an index for the model environment, first sources, then sinks, the scattering points in an uninteruppted chain
Thus
Parameters
----------
sources : TYPE
DESCRIPTION.
sinks : TYPE
DESCRIPTION.
scattering_points : TYPE
DESCRIPTION.
Returns
-------
io_indexing : TYPE
DESCRIPTION.
"""
io_indexing = np.zeros(
(
source_index.shape[0]
* (sink_index.shape[0] + scattering_point_index.shape[0]),
source_index.shape[1] + 1,
),
dtype=np.int32,
)
idx = 0
for n in np.arange(0, source_index.shape[0]):
if scattering_point_index.shape[0] == 0:
io_indexing[
np.arange(
idx, idx + (sink_index.shape[0] + scattering_point_index.shape[0])
),
-1,
] = sink_index.ravel()
else:
io_indexing[
np.arange(
idx, idx + (sink_index.shape[0] + scattering_point_index.shape[0])
),
-1,
] = np.append(sink_index, scattering_point_index, axis=0).ravel()
io_indexing[
np.arange(
idx, idx + (sink_index.shape[0] + scattering_point_index.shape[0])
),
0 : source_index.shape[1],
] = source_index[n, :]
idx += sink_index.shape[0] + scattering_point_index.shape[0]
# then boolean check and filter to ensure there are no loops, aiming next ray at originating point
return io_indexing[np.not_equal(io_indexing[:, -2], io_indexing[:, -1]), :]
[docs]def workchunkingv2(
sources, sinks, scattering_points, environment, max_scatter, line_of_sight=True
):
"""
Raycasting index creation and assignment to raycaster, upper bound is around 4.7e8 rays at a time, there is already chunking to prevent overflow of the GPU memory and timeouts
Parameters
----------
sources : n*3 numpy array of float
sinks : m*3 numpy array of float
scattering_points : o*3 numpy array of float
environment : numpy array of triangle_t
max_scatter : int
line_of_sight : boolean
Returns
---------
full_index : 2D numpy array of ints
the index for all successful rays cast from source coordinates, to any scattering points, to the sink point for each entry
RAYS_CAST : int
the number of rays cast in this launch.
"""
# temp function to chunk the number of rays to prevent creation of ray arrays to large for memory
# print('WorkChunking Triangles ',len(environment))
RAYS_CAST=0
raycasting_timestamp = timer()
ray_estimate = (
sources.shape[0] * (sinks.shape[0] + scattering_points.shape[0])
) + (
sources.shape[0] * (scattering_points.shape[0] * sinks.shape[0] * (max_scatter))
)
# print("Total of {:3.1f} rays required".format(ray_estimate))
# establish memory limits
free_mem, total_mem = cuda.current_context().get_memory_info()
max_mem = np.ceil(free_mem * 0.8).astype(np.int64)
ray_limit = (
np.floor(np.floor((max_mem - environment.nbytes) / base_types.ray_t.size) / 1e7) * 1e7
).astype(np.int64)
# establish index boundaries
source_index = np.arange(1, sources.shape[0] + 1).reshape(
sources.shape[0], 1
) # index starts at 1, leaving 0 as a null
sink_index = np.arange(
sources.shape[0] + 1, sources.shape[0] + 1 + sinks.shape[0]
).reshape(sinks.shape[0], 1)
scattering_point_index = np.arange(
np.max(sink_index) + 1, np.max(sink_index) + 1 + scattering_points.shape[0]
).reshape(scattering_points.shape[0], 1)
# implement chunking
if max_scatter == 1:
full_index = np.empty((0, 2), dtype=np.int32)
io_indexing = create_model_index(
source_index, sink_index, scattering_point_index
)
if io_indexing.shape[0] >= ray_limit:
# need to split the array and process seperatly
sub_io = np.array_split(
io_indexing, np.ceil(io_indexing.shape[0] / ray_limit).astype(int)
)
chunknum = len(sub_io)
for chunkindex in range(chunknum):
_, temp_index, first_wave_Rays = launchRaycaster1Dv3(
sources,
sinks,
scattering_points,
sub_io[chunkindex],
copy.deepcopy(environment),
)
full_index = np.append(full_index, temp_index, axis=0)
RAYS_CAST += first_wave_Rays
else:
_, temp_index, first_wave_Rays = launchRaycaster1Dv3(
sources,
sinks,
scattering_points,
io_indexing,
copy.deepcopy(environment),
)
full_index = np.append(full_index, temp_index, axis=0)
RAYS_CAST = first_wave_Rays
elif max_scatter == 2:
full_index = np.empty((0, 3), dtype=np.int32)
if line_of_sight:
io_indexing = create_model_index(
source_index, sink_index, scattering_point_index
)
else:
# drop line of sight rays from queue
io_indexing = create_model_index(
source_index, np.empty((0, 1)), scattering_point_index
)
if io_indexing.shape[0] >= ray_limit:
# need to split the array and process separately
sub_io = np.array_split(
io_indexing, np.ceil(io_indexing.shape[0] / ray_limit).astype(int)
)
chunknum = len(sub_io)
final_index = np.empty((0, 3), dtype=np.int32)
RAYS_CAST = 0
for chunkindex in range(chunknum):
(
temp_filtered_index,
temp_final_index,
first_wave_Rays,
) = launchRaycaster1Dv3(
sources,
sinks,
scattering_points,
sub_io[chunkindex],
copy.deepcopy(environment),
)
# filtered_index = np.append(filtered_index, temp_filtered_index, axis=0)
# final_index = np.append(final_index, temp_final_index, axis=0)
if temp_filtered_index.shape[0] == 0:
temp_filtered_index2 = np.empty((0, 3), dtype=np.int32)
temp_final_index2 = np.empty((0, 3), dtype=np.int32)
second_wave_rays = 0
else:
(
temp_filtered_index2,
temp_final_index2,
second_wave_rays,
) = chunkingRaycaster1Dv3(
sources,
sinks,
scattering_points,
temp_filtered_index,
environment,
terminate_flag=True,
)
RAYS_CAST += first_wave_Rays + second_wave_rays
# create combined path network
host_size = np.shape(temp_final_index)
host_padding = np.zeros((host_size[0], 1), dtype=np.int32)
temp_full_index = np.append(
np.append(temp_final_index, host_padding, axis=1),
temp_final_index2,
axis=0,
)
full_index = np.append(full_index, temp_full_index, axis=0)
else:
filtered_index, final_index, first_wave_Rays = launchRaycaster1Dv3(
sources,
sinks,
scattering_points,
io_indexing,
copy.deepcopy(environment),
)
if filtered_index.shape[0] == 0:
filtered_index2 = np.empty((0, 3), dtype=np.int32)
final_index2 = np.empty((0, 3), dtype=np.int32)
second_wave_rays = 0
else:
filtered_index2, final_index2, second_wave_rays = chunkingRaycaster1Dv3(
sources,
sinks,
scattering_points,
filtered_index,
environment,
terminate_flag=True,
)
# cuda.profile_stop()
# print('WorkChunkingMaxScatter2 Triangles ', len(environment), 'Filtered Rays Stage 2', len(filtered_index2), 'Final Rays Stage 2',len(final_index2))
start = timer()
# filtered_network2,final_network2,filtered_index2,final_index2=rayHits(second_ray_payload,distmap2,io_index2,scatter_inc=2,origins_local=second_origin)
RAYS_CAST = first_wave_Rays + second_wave_rays
# create combined path network
host_size = np.shape(final_index)
host_padding = np.zeros((host_size[0], 1), dtype=np.int32)
temp_full_index = np.append(
np.append(final_index, host_padding, axis=1), final_index2, axis=0
)
full_index = np.append(full_index, temp_full_index, axis=0)
elif max_scatter == 3:
full_index = np.empty((0, 4), dtype=np.int32)
chunknum = np.minimum(sources.shape[0], np.int(np.ceil(ray_estimate / 2e8)))
chunknum = np.maximum(2, chunknum)
source_chunking = np.linspace(0, sources.shape[0], chunknum, dtype=np.int32)
for chunkindex in range(source_chunking.size - 1):
if line_of_sight:
io_indexing = create_model_index(
source_index[
source_chunking[chunkindex] : source_chunking[chunkindex + 1]
],
sink_index,
scattering_point_index,
)
filtered_index, final_index, first_wave_Rays = launchRaycaster1Dv3(
sources, sinks, scattering_points, io_indexing, environment
)
else:
# drop line of sight rays from queue
io_indexing = create_model_index(
source_index[
source_chunking[chunkindex] : source_chunking[chunkindex + 1]
],
np.empty((0, 1)),
scattering_point_index,
)
filtered_index, final_index, first_wave_Rays = launchRaycaster1Dv3(
sources, sinks, scattering_points, io_indexing, environment
)
filtered_index2, final_index2, second_wave_rays = chunkingRaycaster1Dv3(
sources, sinks, scattering_points, filtered_index, environment, False
)
if filtered_index2.shape[0] * sinks.shape[0] > 2e8:
temp_chunks = np.int(
np.ceil((filtered_index2.shape[0] * sinks.shape[0]) / 2e8)
)
temp_chunking = np.linspace(
0, filtered_index2.shape[0], temp_chunks, dtype=np.int32
)
temp_index = np.empty((0, 4), dtype=np.int32)
for temp_chunkindex in range(temp_chunking.shape[0] - 1):
(
filtered_index3,
final_index3_part,
third_wave_rays,
) = chunkingRaycaster1Dv3(
sources,
sinks,
scattering_points,
filtered_index2[
temp_chunking[temp_chunkindex] : temp_chunking[
temp_chunkindex + 1
],
:,
],
environment,
True,
)
temp_index = np.append(temp_index, final_index3_part, axis=0)
final_index3 = temp_index
else:
filtered_index3, final_index3, third_wave_rays = chunkingRaycaster1Dv3(
sources,
sinks,
scattering_points,
filtered_index2,
environment,
True,
)
# cuda.profile_stop()
start = timer()
# filtered_network2,final_network2,filtered_index2,final_index2=rayHits(second_ray_payload,distmap2,io_index2,scatter_inc=2,origins_local=second_origin)
RAYS_CAST = first_wave_Rays + second_wave_rays + third_wave_rays
# create combined path network
host_size = np.shape(final_index)
host_nans = np.zeros((host_size[0], 1), dtype=np.int32)
temp_total = np.append(
np.append(final_index, host_nans, axis=1), final_index2, axis=0
)
host_size2 = np.shape(temp_total)
host_nans2 = np.zeros((host_size2[0], 1), dtype=np.int32)
full_index_temp = np.append(
np.append(temp_total, host_nans2, axis=1), final_index3, axis=0
)
full_index = np.append(full_index, full_index_temp, axis=0)
# process for scattering matrix
raycastingduration = raycasting_timestamp - timer()
# print("Raycasting Duration: {:3.1f} s, Total Rays: {:3.1f}".format(raycastingduration,RAYS_CAST) )
return full_index, RAYS_CAST
def create_scatter_index(
source_index, sink_index, scattering_point_index, scattering_mask
):
# create target index for given scattering depth
target_index = np.empty((0, np.max(scattering_mask) + 2), dtype=np.int32)
# temp_index=create_model_index(source_index, sink_index, scattering_point_index)
# stop_index=temp_index[np.isin(temp_index[:,-1],sink_index),:]
# start_index=temp_index[~np.isin(temp_index[:,-1],sink_index),:]
# final_index=create_model_index(start_index, sink_index, scattering_point_index)
# target_index=np.append(np.append(stop_index,np.zeros((stop_index.shape[0],1),dtype=np.int32),axis=1),final_index,axis=0)
for launch_round in range(np.max(scattering_mask) + 1):
# create list of allowed points which always has all sinks.
if launch_round == 0:
allowed_illuminators = source_index
# allowed_points=np.append(np.where(scattering_mask>=(launch_round+1))[0]+1,(np.where(scattering_mask==0)[0]+1))
# allowed_points=copy.deepcopy(sink_index).ravel()
allowed_scatter_points = (
np.where(scattering_mask >= (launch_round + 1))[0] + 1
)
temp_index = create_model_index(
allowed_illuminators,
sink_index,
allowed_scatter_points.reshape(allowed_scatter_points.shape[0], 1),
)
target_index = np.append(
target_index,
np.append(
temp_index,
np.zeros(
(
temp_index.shape[0],
target_index.shape[1] - temp_index.shape[1],
),
dtype=np.int32,
),
axis=1,
),
axis=0,
)
else:
allowed_illuminators = (
np.where(scattering_mask >= (launch_round + 1))[0] + 1
)
# allowed_points=np.append(np.where(scattering_mask>=(launch_round+1))[0]+1,(np.where(scattering_mask==0)[0]+1))
allowed_scatter_points = scattering_point_index
propagate_index = np.where(
target_index[:, launch_round] > np.max(sink_index)
)[0]
drain_trim = np.delete(
propagate_index,
np.isin(
target_index[propagate_index, launch_round], allowed_illuminators
),
)
drain_temp = create_model_index(
target_index[drain_trim, 0 : launch_round + 1],
sink_index,
np.empty((0)),
)
illuminator_index = propagate_index[
np.isin(
target_index[propagate_index, launch_round], allowed_illuminators
)
]
forward_temp = create_model_index(
target_index[illuminator_index, 0 : launch_round + 1],
sink_index,
allowed_scatter_points.reshape(allowed_scatter_points.shape[0], 1),
)
trimmed_index = np.delete(target_index, propagate_index, axis=0)
target_index = np.append(
trimmed_index,
np.append(
np.append(drain_temp, forward_temp, axis=0),
np.zeros(
(
drain_temp.shape[0] + forward_temp.shape[0],
target_index.shape[1] - drain_temp.shape[1],
),
dtype=np.int32,
),
axis=1,
),
axis=0,
)
# so far this
return target_index
def integratedraycastersetup(
sources, sinks, scattering_points, environment, scattering_mask
):
# temp function to chunk the number of rays to prevent creation of ray arrays to large for memory
raycasting_timestamp = timer()
max_scatter = np.max(scattering_mask)
if max_scatter == 0:
ray_estimate = sources * sinks
elif max_scatter == 1:
ray_estimate = (
sources * (sinks + scattering_points.shape[0])
+ sources * scattering_points.shape[0] * sinks
)
elif max_scatter == 2:
ray_estimate = (
sources * (sinks + scattering_points.shape[0])
+ sources * scattering_points.shape[0] * sinks
+ sources
* scattering_points.shape[0]
* (scattering_points.shape[0])
* sinks
)
# print("Total of {:3.1f} rays required".format(ray_estimate))
# establish index boundaries
source_index = np.arange(1, sources + 1).reshape(
sources, 1
) # index starts at 1, leaving 0 as a null
sink_index = np.arange(sources + 1, sources + 1 + sinks).reshape(sinks, 1)
scattering_point_index = np.arange(
np.max(sink_index) + 1,
np.max(sink_index) + 1 + (scattering_points.shape[0] - sources - sinks),
).reshape((scattering_points.shape[0] - sources - sinks), 1)
# implement chunking
full_index = np.empty((0, np.max(scattering_mask) + 2), dtype=np.int32)
chunknum = np.minimum(sources, np.int(np.ceil(ray_estimate / 2e8)))
chunknum = np.maximum(2, chunknum)
source_chunking = np.linspace(0, sources, chunknum, dtype=np.int32)
for chunkindex in range(source_chunking.size - 1):
io_indexing = create_scatter_index(
source_index[source_chunking[chunkindex] : source_chunking[chunkindex + 1]],
sink_index,
scattering_point_index,
scattering_mask,
)
# io_indexing=create_model_index(source_index[source_chunking[chunkindex]:source_chunking[chunkindex+1]],sink_index,np.empty((0,1),dtype=np.int32))
full_index = np.append(
full_index,
integratedRaycaster(io_indexing, scattering_points, environment),
axis=0,
)
# full_index=create_scatter_index(source_index,sink_index,scattering_point_index,scattering_mask)
raycastingduration = timer() - raycasting_timestamp
# print("Raycasting Duration: {:3.1f} s, Total Rays: {:3.1f}".format(raycastingduration,full_index.shape[0]) )
return full_index, io_indexing
if __name__ == "__main__":
print("this is a library")