Untitled - HackMD

def iou(box0, box1):

    # box0: [x,y,z,rr], where (x,y,z) is the center of a circle; r is the radius

    r0 = box0[3] / 2  # half of the radius
    s0 = box0[:3] - r0  # convert `center` to a `lower-left vertice of a box`; i.e. (x,y,z) -> (x-r, y-r, z-r)
    e0 = box0[:3] + r0  # convert `center` to a `top-right vertice of a box`; i.e. (x,y,z) -> (x+r, y+r, z+r)

    r1 = box1[3] / 2
    s1 = box1[:3] - r1
    e1 = box1[:3] + r1

    overlap = []
    for i in range(len(s0)):
        overlap.append(max(0, min(e0[i], e1[i]) - max(s0[i], s1[i])))
        # this is like the 2D box-matching case: using min and max operations
        # to get the size of overlap region. To understand it, see tutorials on 2D IoU calculations

    intersection = overlap[0] * overlap[1] * overlap[2]
    union = box0[3] * box0[3] * box0[3] + box1[3] * box1[3] * box1[3] - intersection
    return intersection / union  # return IoU

class GetPBB(object):
    """Class for Getting the Predicted Bounding Boxes?
    """
    def __init__(self, config):
        self.stride = config['stride']  # if `stride=1` and `anchors=4`, we will have 4 anchors at EVERY voxel locations.
        self.anchors = np.asarray(config['anchors'])  # possible radiuses of the anchors. len(anchors) = number of anchors for each chosen voxel.

    def __call__(self, output,thresh = -3, ismask=False):

        # I believe `output` has the shape: [Z, H, W, num_anchors_per_voxel, 5],
        #  where the 5 elements in the last dimension are `confidence`, `(x-xa)/ra`, `(y-ya)/ra`, `(z-za)/ra` and `log(r/ra)`.
        # If confidence is large, then, it means that we believe there's an object inside the box.

        stride = self.stride
        anchors = self.anchors
        output = np.copy(output)
        offset = (float(stride) - 1) / 2
        output_size = output.shape

        # A pre-defined anchor position will be at (z,h,w)
        oz = np.arange(offset, offset + stride * (output_size[0] - 1) + 1, stride)  # stores anchor z positions; [num_z,]. Consider a simple case, that offset=0, stride=1; then, we have oz = [0,1,2,...,output_size[0]].

        oh = np.arange(offset, offset + stride * (output_size[1] - 1) + 1, stride)  # stores anchor h positions; [num_h,] 
        ow = np.arange(offset, offset + stride * (output_size[2] - 1) + 1, stride)  # stores anchor w positions; [num_w,] 
        
        output[:, :, :, :, 1] = oz.reshape((-1, 1, 1, 1)) + output[:, :, :, :, 1] * anchors.reshape((1, 1, 1, -1))
        output[:, :, :, :, 2] = oh.reshape((1, -1, 1, 1)) + output[:, :, :, :, 2] * anchors.reshape((1, 1, 1, -1))
        output[:, :, :, :, 3] = ow.reshape((1, 1, -1, 1)) + output[:, :, :, :, 3] * anchors.reshape((1, 1, 1, -1))
        output[:, :, :, :, 4] = np.exp(output[:, :, :, :, 4]) * anchors.reshape((1, 1, 1, -1))
        mask = output[..., 0] > thresh  #  I think this is for `non-maximum supression`.
        xx,yy,zz,aa = np.where(mask)  # I believe it should be zz,hh,ww,aa; isn't it ????
        
        output = output[xx,yy,zz,aa]  # after nms, the output tensor has the shape: [num_anchors, 4], 
        # where the last dimension stores the predicted x,y,z,r.
        if ismask:
            return output,[xx,yy,zz,aa]
        else:
            return output  # [num_anchors, 5], where the last 5 elements are `confidence`, `x`, `y`, `z` and `r`.

Read more

BDSE32 - 深度學習

BDSE32 - 深度學習

ISpan - AIML

SOLID