def iou(box0, box1):
# box0: [x,y,z,rr], where (x,y,z) is the center of a circle; r is the radius
r0 = box0[3] / 2 # half of the radius
s0 = box0[:3] - r0 # convert `center` to a `lower-left vertice of a box`; i.e. (x,y,z) -> (x-r, y-r, z-r)
e0 = box0[:3] + r0 # convert `center` to a `top-right vertice of a box`; i.e. (x,y,z) -> (x+r, y+r, z+r)
r1 = box1[3] / 2
s1 = box1[:3] - r1
e1 = box1[:3] + r1
overlap = []
for i in range(len(s0)):
overlap.append(max(0, min(e0[i], e1[i]) - max(s0[i], s1[i])))
# this is like the 2D box-matching case: using min and max operations
# to get the size of overlap region. To understand it, see tutorials on 2D IoU calculations
intersection = overlap[0] * overlap[1] * overlap[2]
union = box0[3] * box0[3] * box0[3] + box1[3] * box1[3] * box1[3] - intersection
return intersection / union # return IoU
class GetPBB(object):
"""Class for Getting the Predicted Bounding Boxes?
"""
def __init__(self, config):
self.stride = config['stride'] # if `stride=1` and `anchors=4`, we will have 4 anchors at EVERY voxel locations.
self.anchors = np.asarray(config['anchors']) # possible radiuses of the anchors. len(anchors) = number of anchors for each chosen voxel.
def __call__(self, output,thresh = -3, ismask=False):
# I believe `output` has the shape: [Z, H, W, num_anchors_per_voxel, 5],
# where the 5 elements in the last dimension are `confidence`, `(x-xa)/ra`, `(y-ya)/ra`, `(z-za)/ra` and `log(r/ra)`.
# If confidence is large, then, it means that we believe there's an object inside the box.
stride = self.stride
anchors = self.anchors
output = np.copy(output)
offset = (float(stride) - 1) / 2
output_size = output.shape
# A pre-defined anchor position will be at (z,h,w)
oz = np.arange(offset, offset + stride * (output_size[0] - 1) + 1, stride) # stores anchor z positions; [num_z,]. Consider a simple case, that offset=0, stride=1; then, we have oz = [0,1,2,...,output_size[0]].
oh = np.arange(offset, offset + stride * (output_size[1] - 1) + 1, stride) # stores anchor h positions; [num_h,]
ow = np.arange(offset, offset + stride * (output_size[2] - 1) + 1, stride) # stores anchor w positions; [num_w,]
output[:, :, :, :, 1] = oz.reshape((-1, 1, 1, 1)) + output[:, :, :, :, 1] * anchors.reshape((1, 1, 1, -1))
output[:, :, :, :, 2] = oh.reshape((1, -1, 1, 1)) + output[:, :, :, :, 2] * anchors.reshape((1, 1, 1, -1))
output[:, :, :, :, 3] = ow.reshape((1, 1, -1, 1)) + output[:, :, :, :, 3] * anchors.reshape((1, 1, 1, -1))
output[:, :, :, :, 4] = np.exp(output[:, :, :, :, 4]) * anchors.reshape((1, 1, 1, -1))
mask = output[..., 0] > thresh # I think this is for `non-maximum supression`.
xx,yy,zz,aa = np.where(mask) # I believe it should be zz,hh,ww,aa; isn't it ????
output = output[xx,yy,zz,aa] # after nms, the output tensor has the shape: [num_anchors, 4],
# where the last dimension stores the predicted x,y,z,r.
if ismask:
return output,[xx,yy,zz,aa]
else:
return output # [num_anchors, 5], where the last 5 elements are `confidence`, `x`, `y`, `z` and `r`.
or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up