Python源码示例:config.cfg.output_shape()

示例1
def soft_argmax(heatmaps, joint_num):

    heatmaps = heatmaps.reshape((-1, joint_num, cfg.depth_dim*cfg.output_shape[0]*cfg.output_shape[1]))
    heatmaps = F.softmax(heatmaps, 2)
    heatmaps = heatmaps.reshape((-1, joint_num, cfg.depth_dim, cfg.output_shape[0], cfg.output_shape[1]))

    accu_x = heatmaps.sum(dim=(2,3))
    accu_y = heatmaps.sum(dim=(2,4))
    accu_z = heatmaps.sum(dim=(3,4))

    accu_x = accu_x * torch.cuda.comm.broadcast(torch.arange(1,cfg.output_shape[1]+1).type(torch.cuda.FloatTensor), devices=[accu_x.device.index])[0]
    accu_y = accu_y * torch.cuda.comm.broadcast(torch.arange(1,cfg.output_shape[0]+1).type(torch.cuda.FloatTensor), devices=[accu_y.device.index])[0]
    accu_z = accu_z * torch.cuda.comm.broadcast(torch.arange(1,cfg.depth_dim+1).type(torch.cuda.FloatTensor), devices=[accu_z.device.index])[0]

    accu_x = accu_x.sum(dim=2, keepdim=True) -1
    accu_y = accu_y.sum(dim=2, keepdim=True) -1
    accu_z = accu_z.sum(dim=2, keepdim=True) -1

    coord_out = torch.cat((accu_x, accu_y, accu_z), dim=2)

    return coord_out 
示例2
def create_refine_net(blocks, is_training, trainable=True):
    initializer = tf.contrib.layers.xavier_initializer()
    bottleneck = resnet_v1.bottleneck
    refine_fms = []
    for i, block in enumerate(blocks):
        mid_fm = block
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            for j in range(i):
                mid_fm = bottleneck(mid_fm, 256, 128, stride=1, scope='res{}/refine_conv{}'.format(2+i, j)) # no projection
        mid_fm = tf.image.resize_bilinear(mid_fm, (cfg.output_shape[0], cfg.output_shape[1]),
            name='upsample_conv/res{}'.format(2+i))
        refine_fms.append(mid_fm)
    refine_fm = tf.concat(refine_fms, axis=3)
    with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
        refine_fm = bottleneck(refine_fm, 256, 128, stride=1, scope='final_bottleneck')
        res = slim.conv2d(refine_fm, cfg.nr_skeleton, [3, 3],
            trainable=trainable, weights_initializer=initializer,
            padding='SAME', activation_fn=None,
            scope='refine_out')
    return res 
示例3
def create_refine_net(blocks, is_training, trainable=True):
    initializer = tf.contrib.layers.xavier_initializer()
    bottleneck = resnet_v1.bottleneck
    refine_fms = []
    for i, block in enumerate(blocks):
        mid_fm = block
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            for j in range(i):
                mid_fm = bottleneck(mid_fm, 256, 128, stride=1, scope='res{}/refine_conv{}'.format(2+i, j)) # no projection
        mid_fm = tf.image.resize_bilinear(mid_fm, (cfg.output_shape[0], cfg.output_shape[1]),
            name='upsample_conv/res{}'.format(2+i))
        refine_fms.append(mid_fm)
    refine_fm = tf.concat(refine_fms, axis=3)
    with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
        refine_fm = bottleneck(refine_fm, 256, 128, stride=1, scope='final_bottleneck')
        res = slim.conv2d(refine_fm, cfg.nr_skeleton, [3, 3],
            trainable=trainable, weights_initializer=initializer,
            padding='SAME', activation_fn=None,
            scope='refine_out')
    return res 
示例4
def create_refine_net(blocks, is_training, trainable=True):
    initializer = tf.contrib.layers.xavier_initializer()
    bottleneck = resnet_v1.bottleneck
    refine_fms = []
    for i, block in enumerate(blocks):
        mid_fm = block
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            for j in range(i):
                mid_fm = bottleneck(mid_fm, 256, 128, stride=1, scope='res{}/refine_conv{}'.format(2+i, j)) # no projection
        mid_fm = tf.image.resize_bilinear(mid_fm, (cfg.output_shape[0], cfg.output_shape[1]),
            name='upsample_conv/res{}'.format(2+i))
        refine_fms.append(mid_fm)
    refine_fm = tf.concat(refine_fms, axis=3)
    with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
        refine_fm = bottleneck(refine_fm, 256, 128, stride=1, scope='final_bottleneck')
        res = slim.conv2d(refine_fm, cfg.nr_skeleton, [3, 3],
            trainable=trainable, weights_initializer=initializer,
            padding='SAME', activation_fn=None,
            scope='refine_out')
    return res 
示例5
def create_refine_net(blocks, is_training, trainable=True):
    initializer = tf.contrib.layers.xavier_initializer()
    bottleneck = resnet_v1.bottleneck
    refine_fms = []
    for i, block in enumerate(blocks):
        mid_fm = block
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            for j in range(i):
                mid_fm = bottleneck(mid_fm, 256, 128, stride=1, scope='res{}/refine_conv{}'.format(2+i, j)) # no projection
        mid_fm = tf.image.resize_bilinear(mid_fm, (cfg.output_shape[0], cfg.output_shape[1]),
            name='upsample_conv/res{}'.format(2+i))
        refine_fms.append(mid_fm)
    refine_fm = tf.concat(refine_fms, axis=3)
    with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
        refine_fm = bottleneck(refine_fm, 256, 128, stride=1, scope='final_bottleneck')
        res = slim.conv2d(refine_fm, cfg.nr_skeleton, [3, 3],
            trainable=trainable, weights_initializer=initializer,
            padding='SAME', activation_fn=None,
            scope='refine_out')
    return res 
示例6
def create_refine_net(blocks, is_training, trainable=True):
    initializer = tf.contrib.layers.xavier_initializer()
    bottleneck = resnet_v1.bottleneck
    refine_fms = []
    for i, block in enumerate(blocks):
        mid_fm = block
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            for j in range(i):
                mid_fm = bottleneck(mid_fm, 256, 128, stride=1, scope='res{}/refine_conv{}'.format(2+i, j)) # no projection
        mid_fm = tf.image.resize_bilinear(mid_fm, (cfg.output_shape[0], cfg.output_shape[1]),
            name='upsample_conv/res{}'.format(2+i))
        refine_fms.append(mid_fm)
    refine_fm = tf.concat(refine_fms, axis=3)
    with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
        refine_fm = bottleneck(refine_fm, 256, 128, stride=1, scope='final_bottleneck')
        res = slim.conv2d(refine_fm, cfg.nr_skeleton, [3, 3],
            trainable=trainable, weights_initializer=initializer,
            padding='SAME', activation_fn=None,
            scope='refine_out')
    return res 
示例7
def render_gaussian_heatmap(self, coord, output_shape, sigma, valid=None):
        
        x = [i for i in range(output_shape[1])]
        y = [i for i in range(output_shape[0])]
        xx,yy = tf.meshgrid(x,y)
        xx = tf.reshape(tf.to_float(xx), (1,*output_shape,1))
        yy = tf.reshape(tf.to_float(yy), (1,*output_shape,1))
              
        x = tf.reshape(coord[:,:,0],[-1,1,1,cfg.num_kps]) / cfg.input_shape[1] * output_shape[1]
        y = tf.reshape(coord[:,:,1],[-1,1,1,cfg.num_kps]) / cfg.input_shape[0] * output_shape[0]

        heatmap = tf.exp(-(((xx-x)/tf.to_float(sigma))**2)/tf.to_float(2) -(((yy-y)/tf.to_float(sigma))**2)/tf.to_float(2))

        if valid is not None:
            valid_mask = tf.reshape(valid, [-1, 1, 1, cfg.num_kps])
            heatmap = heatmap * valid_mask

        return heatmap * 255. 
示例8
def create_refine_net(blocks, is_training, trainable=True):
    initializer = tf.contrib.layers.xavier_initializer()
    bottleneck = resnet_v1.bottleneck
    refine_fms = []
    for i, block in enumerate(blocks):
        mid_fm = block
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            for j in range(i):
                mid_fm = bottleneck(mid_fm, 256, 128, stride=1, scope='res{}/refine_conv{}'.format(2 + i, j))  # no projection
        mid_fm = tf.image.resize_bilinear(mid_fm, (cfg.output_shape[0], cfg.output_shape[1]),
                                          name='upsample_conv/res{}'.format(2 + i))
        refine_fms.append(mid_fm)
    refine_fm = tf.concat(refine_fms, axis=3)
    with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
        refine_fm = bottleneck(refine_fm, 256, 128, stride=1, scope='final_bottleneck')
        res = slim.conv2d(refine_fm, cfg.nr_skeleton, [3, 3],
                          trainable=trainable, weights_initializer=initializer,
                          padding='SAME', activation_fn=None,
                          scope='refine_out')
    return res 
示例9
def create_refine_net(blocks, is_training, trainable=True):
    initializer = tf.contrib.layers.xavier_initializer()
    bottleneck = resnet_v1.bottleneck
    refine_fms = []
    for i, block in enumerate(blocks):
        mid_fm = block
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            for j in range(i):
                mid_fm = bottleneck(mid_fm, 256, 128, stride=1, scope='res{}/refine_conv{}'.format(2 + i, j))  # no projection
        mid_fm = tf.image.resize_bilinear(mid_fm, (cfg.output_shape[0], cfg.output_shape[1]),
                                          name='upsample_conv/res{}'.format(2 + i))
        refine_fms.append(mid_fm)
    refine_fm = tf.concat(refine_fms, axis=3)
    with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
        refine_fm = bottleneck(refine_fm, 256, 128, stride=1, scope='final_bottleneck')
        res = slim.conv2d(refine_fm, cfg.nr_skeleton, [3, 3],
                          trainable=trainable, weights_initializer=initializer,
                          padding='SAME', activation_fn=None,
                          scope='refine_out')
    return res 
示例10
def create_global_net(blocks, is_training, trainable=True):
    global_fms = []
    global_outs = []
    last_fm = None
    initializer = tf.contrib.layers.xavier_initializer()
    for i, block in enumerate(reversed(blocks)):
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            lateral = slim.conv2d(block, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='lateral/res{}'.format(5-i))

        if last_fm is not None:
            sz = tf.shape(lateral)
            upsample = tf.image.resize_bilinear(last_fm, (sz[1], sz[2]),
                name='upsample/res{}'.format(5-i))
            upsample = slim.conv2d(upsample, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='merge/res{}'.format(5-i))
            last_fm = upsample + lateral
        else:
            last_fm = lateral

        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            tmp = slim.conv2d(last_fm, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='tmp/res{}'.format(5-i))
            out = slim.conv2d(tmp, cfg.nr_skeleton, [3, 3],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='pyramid/res{}'.format(5-i))
        global_fms.append(last_fm)
        global_outs.append(tf.image.resize_bilinear(out, (cfg.output_shape[0], cfg.output_shape[1])))
    global_fms.reverse()
    global_outs.reverse()
    return global_fms, global_outs 
示例11
def joints_heatmap_gen(data, label, tar_size=cfg.output_shape, ori_size=cfg.data_shape, points=cfg.nr_skeleton,
                       return_valid=False, gaussian_kernel=cfg.gaussain_kernel):
    if return_valid:
        valid = np.ones((len(data), points), dtype=np.float32)
    ret = np.zeros((len(data), points, tar_size[0], tar_size[1]), dtype='float32')
    for i in range(len(ret)):
        for j in range(points):
            if label[i][j << 1] < 0 or label[i][j << 1 | 1] < 0:
                continue
            label[i][j << 1 | 1] = min(label[i][j << 1 | 1], ori_size[0] - 1)
            label[i][j << 1] = min(label[i][j << 1], ori_size[1] - 1)
            ret[i][j][int(label[i][j << 1 | 1] * tar_size[0] / ori_size[0])][
                int(label[i][j << 1] * tar_size[1] / ori_size[1])] = 1
    for i in range(len(ret)):
        for j in range(points):
            ret[i, j] = cv2.GaussianBlur(ret[i, j], gaussian_kernel, 0)
    for i in range(len(ret)):
        for j in range(cfg.nr_skeleton):
            am = np.amax(ret[i][j])
            if am <= 1e-8:
                if return_valid:
                    valid[i][j] = 0.
                continue
            ret[i][j] /= am / 255
    if return_valid:
        return ret, valid
    else:
        return ret 
示例12
def create_global_net(blocks, is_training, trainable=True):
    global_fms = []
    global_outs = []
    last_fm = None
    initializer = tf.contrib.layers.xavier_initializer()
    for i, block in enumerate(reversed(blocks)):
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            lateral = slim.conv2d(block, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='lateral/res{}'.format(5-i))

        if last_fm is not None:
            sz = tf.shape(lateral)
            upsample = tf.image.resize_bilinear(last_fm, (sz[1], sz[2]),
                name='upsample/res{}'.format(5-i))
            upsample = slim.conv2d(upsample, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='merge/res{}'.format(5-i))
            last_fm = upsample + lateral
        else:
            last_fm = lateral

        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            tmp = slim.conv2d(last_fm, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='tmp/res{}'.format(5-i))
            out = slim.conv2d(tmp, cfg.nr_skeleton, [3, 3],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='pyramid/res{}'.format(5-i))
        global_fms.append(last_fm)
        global_outs.append(tf.image.resize_bilinear(out, (cfg.output_shape[0], cfg.output_shape[1])))
    global_fms.reverse()
    global_outs.reverse()
    return global_fms, global_outs 
示例13
def joints_heatmap_gen(data, label, tar_size=cfg.output_shape, ori_size=cfg.data_shape, points=cfg.nr_skeleton,
                       return_valid=False, gaussian_kernel=cfg.gaussain_kernel):
    if return_valid:
        valid = np.ones((len(data), points), dtype=np.float32)
    ret = np.zeros((len(data), points, tar_size[0], tar_size[1]), dtype='float32')
    for i in range(len(ret)):
        for j in range(points):
            if label[i][j << 1] < 0 or label[i][j << 1 | 1] < 0:
                continue
            label[i][j << 1 | 1] = min(label[i][j << 1 | 1], ori_size[0] - 1)
            label[i][j << 1] = min(label[i][j << 1], ori_size[1] - 1)
            ret[i][j][int(label[i][j << 1 | 1] * tar_size[0] / ori_size[0])][
                int(label[i][j << 1] * tar_size[1] / ori_size[1])] = 1
    for i in range(len(ret)):
        for j in range(points):
            ret[i, j] = cv2.GaussianBlur(ret[i, j], gaussian_kernel, 0)
    for i in range(len(ret)):
        for j in range(cfg.nr_skeleton):
            am = np.amax(ret[i][j])
            if am <= 1e-8:
                if return_valid:
                    valid[i][j] = 0.
                continue
            ret[i][j] /= am / 255
    if return_valid:
        return ret, valid
    else:
        return ret 
示例14
def create_global_net(blocks, is_training, trainable=True):
    global_fms = []
    global_outs = []
    last_fm = None
    initializer = tf.contrib.layers.xavier_initializer()
    for i, block in enumerate(reversed(blocks)):
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            lateral = slim.conv2d(block, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='lateral/res{}'.format(5-i))

        if last_fm is not None:
            sz = tf.shape(lateral)
            upsample = tf.image.resize_bilinear(last_fm, (sz[1], sz[2]),
                name='upsample/res{}'.format(5-i))
            upsample = slim.conv2d(upsample, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='merge/res{}'.format(5-i))
            last_fm = upsample + lateral
        else:
            last_fm = lateral

        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            tmp = slim.conv2d(last_fm, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='tmp/res{}'.format(5-i))
            out = slim.conv2d(tmp, cfg.nr_skeleton, [3, 3],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='pyramid/res{}'.format(5-i))
        global_fms.append(last_fm)
        global_outs.append(tf.image.resize_bilinear(out, (cfg.output_shape[0], cfg.output_shape[1])))
    global_fms.reverse()
    global_outs.reverse()
    return global_fms, global_outs 
示例15
def create_global_net(blocks, is_training, trainable=True):
    global_fms = []
    global_outs = []
    last_fm = None
    initializer = tf.contrib.layers.xavier_initializer()
    for i, block in enumerate(reversed(blocks)):
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            lateral = slim.conv2d(block, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='lateral/res{}'.format(5-i))

        if last_fm is not None:
            sz = tf.shape(lateral)
            upsample = tf.image.resize_bilinear(last_fm, (sz[1], sz[2]),
                name='upsample/res{}'.format(5-i))
            upsample = slim.conv2d(upsample, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='merge/res{}'.format(5-i))
            last_fm = upsample + lateral
        else:
            last_fm = lateral

        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            tmp = slim.conv2d(last_fm, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='tmp/res{}'.format(5-i))
            out = slim.conv2d(tmp, cfg.nr_skeleton, [3, 3],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='pyramid/res{}'.format(5-i))
        global_fms.append(last_fm)
        global_outs.append(tf.image.resize_bilinear(out, (cfg.output_shape[0], cfg.output_shape[1])))
    global_fms.reverse()
    global_outs.reverse()
    return global_fms, global_outs 
示例16
def joints_heatmap_gen(data, label, tar_size=cfg.output_shape, ori_size=cfg.data_shape, points=cfg.nr_skeleton,
                       return_valid=False, gaussian_kernel=cfg.gaussain_kernel):
    if return_valid:
        valid = np.ones((len(data), points), dtype=np.float32)
    ret = np.zeros((len(data), points, tar_size[0], tar_size[1]), dtype='float32')
    for i in range(len(ret)):
        for j in range(points):
            if label[i][j << 1] < 0 or label[i][j << 1 | 1] < 0:
                continue
            label[i][j << 1 | 1] = min(label[i][j << 1 | 1], ori_size[0] - 1)
            label[i][j << 1] = min(label[i][j << 1], ori_size[1] - 1)
            ret[i][j][int(label[i][j << 1 | 1] * tar_size[0] / ori_size[0])][
                int(label[i][j << 1] * tar_size[1] / ori_size[1])] = 1
    for i in range(len(ret)):
        for j in range(points):
            ret[i, j] = cv2.GaussianBlur(ret[i, j], gaussian_kernel, 0)
    for i in range(len(ret)):
        for j in range(cfg.nr_skeleton):
            am = np.amax(ret[i][j])
            if am <= 1e-8:
                if return_valid:
                    valid[i][j] = 0.
                continue
            ret[i][j] /= am / 255
    if return_valid:
        return ret, valid
    else:
        return ret 
示例17
def forward(self, x, k_value):
        # x,y
        xy = self.deconv_layers(x)
        xy = self.xy_layer(xy)
        xy = xy.view(-1,1,cfg.output_shape[0]*cfg.output_shape[1])
        xy = F.softmax(xy,2)
        xy = xy.view(-1,1,cfg.output_shape[0],cfg.output_shape[1])

        hm_x = xy.sum(dim=(2))
        hm_y = xy.sum(dim=(3))

        coord_x = hm_x * torch.cuda.comm.broadcast(torch.arange(1,cfg.output_shape[1]+1).type(torch.cuda.FloatTensor), devices=[hm_x.device.index])[0]
        coord_y = hm_y * torch.cuda.comm.broadcast(torch.arange(1,cfg.output_shape[0]+1).type(torch.cuda.FloatTensor), devices=[hm_y.device.index])[0]
        
        coord_x = coord_x.sum(dim=2) - 1
        coord_y = coord_y.sum(dim=2) - 1

        # z
        img_feat = torch.mean(x.view(x.size(0), x.size(1), x.size(2)*x.size(3)), dim=2) # global average pooling
        img_feat = torch.unsqueeze(img_feat,2); img_feat = torch.unsqueeze(img_feat,3);
        gamma = self.depth_layer(img_feat)
        gamma = gamma.view(-1,1)
        depth = gamma * k_value.view(-1,1)

        coord = torch.cat((coord_x, coord_y, depth), dim=1)
        return coord 
示例18
def evaluate(self, preds, result_dir):
        
        print('Evaluation start...')
        pred_save = []

        gts = self.data
        sample_num = len(preds)
        for n in range(sample_num):
            
            gt = gts[n]
            image_id = gt['image_id']
            f = gt['f']
            c = gt['c']
            bbox = gt['bbox'].tolist()
            score = gt['score']
            
            # restore coordinates to original space
            pred_root = preds[n].copy()
            pred_root[0] = pred_root[0] / cfg.output_shape[1] * bbox[2] + bbox[0]
            pred_root[1] = pred_root[1] / cfg.output_shape[0] * bbox[3] + bbox[1]

            # back project to camera coordinate system
            pred_root = pixel2cam(pred_root[None,:], f, c)[0]

            pred_save.append({'image_id': image_id, 'root_cam': pred_root.tolist(), 'bbox': bbox, 'score': score})
        
        output_path = osp.join(result_dir, 'bbox_root_mupots_output.json')
        with open(output_path, 'w') as f:
            json.dump(pred_save, f)
        print("Test result is saved at " + output_path)

        calculate_score(output_path, self.annot_path, 250) 
示例19
def evaluate(self, preds, result_dir):
        
        print('Evaluation start...')
        gts = self.data
        sample_num = len(preds)
        pred_save = []
        for n in range(sample_num):
            
            gt = gts[n]
            image_id = gt['image_id']
            f = gt['f']
            c = gt['c']
            bbox = gt['bbox'].tolist()
            
            # restore coordinates to original space
            pred_root = preds[n].copy()
            pred_root[0] = pred_root[0] / cfg.output_shape[1] * bbox[2] + bbox[0]
            pred_root[1] = pred_root[1] / cfg.output_shape[0] * bbox[3] + bbox[1]

            # back project to camera coordinate system
            pred_root = pixel2cam(pred_root[None,:], f, c)[0]

            pred_save.append({'image_id': image_id, 'root_cam': pred_root.tolist(), 'bbox': bbox})
        
        output_path = osp.join(result_dir, 'bbox_root_coco_output.json')
        with open(output_path, 'w') as f:
            json.dump(pred_save, f)
        print("Testing result is saved at " + output_path) 
示例20
def evaluate(self, preds, result_dir):
        print('Evaluation start...')
        gts = self.data
        assert len(gts) == len(preds)
        sample_num = len(gts)
 
        pred_save = []
        errors = np.zeros((sample_num,3))
        for n in range(sample_num):
            gt = gts[n]
            f = gt['f']
            c = gt['c']
            bbox = gt['bbox']

            pred_root_coord = preds[n]
            pred_root_coord[0] = pred_root_coord[0] / cfg.output_shape[1] * bbox[2] + bbox[0]
            pred_root_coord[1] = pred_root_coord[1] / cfg.output_shape[0] * bbox[3] + bbox[1]
            pred_root_coord = pixel2cam(pred_root_coord[None,:], f, c)

            # error calculate
            pred_root_coord = pred_root_coord.reshape(3)
            gt_root_coord = gt['root_cam'].reshape(3)
            errors[n] = (pred_root_coord - gt_root_coord)**2

            # prediction save
            img_id = gt['img_id']
            ann_id = gt['ann_id']
            pred_root_coord = pred_root_coord.reshape(3)
            pred_save.append({'image_id': img_id, 'ann_id': ann_id, 'bbox': bbox.tolist(), 'root_cam': pred_root_coord.tolist()})
       
        err_x = np.mean(np.sqrt(errors[:,0]))
        err_y = np.mean(np.sqrt(errors[:,1]))
        err_z = np.mean(np.sqrt(errors[:,2]))
        err_total = np.mean(np.sqrt(np.sum(errors,1)))
        print('MRPE >> x: ' + str(err_x) + ' y: ' + str(err_y) + ' z: ' + str(err_z) + ' total: ' + str(err_total)) # error print (meter)

        output_path = osp.join(result_dir, 'rootnet_pw3d_output.json')
        with open(output_path, 'w') as f:
            json.dump(pred_save, f)
        print("Test result is saved at " + output_path) 
示例21
def create_global_net(blocks, is_training, trainable=True):
    global_fms = []
    global_outs = []
    last_fm = None
    initializer = tf.contrib.layers.xavier_initializer()
    for i, block in enumerate(reversed(blocks)):
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            lateral = slim.conv2d(block, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='lateral/res{}'.format(5-i))

        if last_fm is not None:
            sz = tf.shape(lateral)
            upsample = tf.image.resize_bilinear(last_fm, (sz[1], sz[2]),
                name='upsample/res{}'.format(5-i))
            upsample = slim.conv2d(upsample, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='merge/res{}'.format(5-i))
            last_fm = upsample + lateral
        else:
            last_fm = lateral

        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            tmp = slim.conv2d(last_fm, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='tmp/res{}'.format(5-i))
            out = slim.conv2d(tmp, cfg.nr_skeleton, [3, 3],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='pyramid/res{}'.format(5-i))
        global_fms.append(last_fm)
        global_outs.append(tf.image.resize_bilinear(out, (cfg.output_shape[0], cfg.output_shape[1])))
    global_fms.reverse()
    global_outs.reverse()
    return global_fms, global_outs 
示例22
def create_global_net(blocks, is_training, trainable=True):
    global_fms = []
    global_outs = []
    last_fm = None
    initializer = tf.contrib.layers.xavier_initializer()
    for i, block in enumerate(reversed(blocks)):
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            lateral = slim.conv2d(block, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='lateral/res{}'.format(5-i))

        if last_fm is not None:
            sz = tf.shape(lateral)
            upsample = tf.image.resize_bilinear(last_fm, (sz[1], sz[2]),
                name='upsample/res{}'.format(5-i))
            upsample = slim.conv2d(upsample, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='merge/res{}'.format(5-i))
            last_fm = upsample + lateral
        else:
            last_fm = lateral

        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            tmp = slim.conv2d(last_fm, 256, [1, 1],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=tf.nn.relu,
                scope='tmp/res{}'.format(5-i))
            out = slim.conv2d(tmp, cfg.nr_skeleton, [3, 3],
                trainable=trainable, weights_initializer=initializer,
                padding='SAME', activation_fn=None,
                scope='pyramid/res{}'.format(5-i))
        global_fms.append(last_fm)
        global_outs.append(tf.image.resize_bilinear(out, (cfg.output_shape[0], cfg.output_shape[1])))
    global_fms.reverse()
    global_outs.reverse()
    return global_fms, global_outs 
示例23
def render_gaussian_heatmap(self, coord, output_shape, sigma):

        x = [i for i in range(output_shape[1])]
        y = [i for i in range(output_shape[0])]
        xx,yy = tf.meshgrid(x,y)
        xx = tf.reshape(tf.to_float(xx), (1,*output_shape,1))
        yy = tf.reshape(tf.to_float(yy), (1,*output_shape,1))

        x = tf.floor(tf.reshape(coord[:,:,0],[-1,1,1,cfg.nr_skeleton]) / cfg.data_shape[1] * output_shape[1] + 0.5)
        y = tf.floor(tf.reshape(coord[:,:,1],[-1,1,1,cfg.nr_skeleton]) / cfg.data_shape[0] * output_shape[0] + 0.5)

        heatmap = tf.exp(-(((xx-x)/tf.to_float(sigma))**2)/tf.to_float(2) -(((yy-y)/tf.to_float(sigma))**2)/tf.to_float(2))
        return heatmap * 255. 
示例24
def make_network(self, is_train):
        if is_train:
            image = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.data_shape, 3])

            label15 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            label11 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            label9 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            label7 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            valids = tf.placeholder(tf.float32, shape=[cfg.batch_size, cfg.nr_skeleton])
            labels = [label15, label11, label9, label7]
            self.set_inputs(image, label15, label11, label9, label7, valids)
        else:
            image = tf.placeholder(tf.float32, shape=[None, *cfg.data_shape, 3])
            self.set_inputs(image)

        mobilenet_v1_fms, endpoints = mobilenet_v1_base(image)

        heatmap_outs = self.head_net(mobilenet_v1_fms, is_train)

        # make loss
        if is_train:
            def ohkm(loss, top_k):
                ohkm_loss = 0.
                for i in range(cfg.batch_size):
                    sub_loss = loss[i]
                    topk_val, topk_idx = tf.nn.top_k(sub_loss, k=top_k, sorted=False, name='ohkm{}'.format(i))
                    tmp_loss = tf.gather(sub_loss, topk_idx, name='ohkm_loss{}'.format(i)) # can be ignore ???
                    ohkm_loss += tf.reduce_sum(tmp_loss) / top_k
                ohkm_loss /= cfg.batch_size
                return ohkm_loss

            label = label7 * tf.to_float(tf.greater(tf.reshape(valids, (-1, 1, 1, cfg.nr_skeleton)), 0.1))
            loss = tf.reduce_mean(tf.square(heatmap_outs - label))

            self.add_tower_summary('loss', loss)
            self.set_loss(loss)
        else:
            self.set_outputs(heatmap_outs) 
示例25
def extract_coordinate(self, heatmap_outs):
        shape = heatmap_outs.get_shape().as_list()
        batch_size = tf.shape(heatmap_outs)[0]
        height = shape[1]
        width = shape[2]
        output_shape = (height, width)
        
        # coordinate extract from output heatmap
        y = [i for i in range(output_shape[0])]
        x = [i for i in range(output_shape[1])]
        xx, yy = tf.meshgrid(x, y)
        xx = tf.to_float(xx) + 1
        yy = tf.to_float(yy) + 1
        
        heatmap_outs = tf.reshape(tf.transpose(heatmap_outs, [0, 3, 1, 2]), [batch_size, cfg.num_kps, -1])
        heatmap_outs = tf.nn.softmax(heatmap_outs)
        heatmap_outs = tf.transpose(tf.reshape(heatmap_outs, [batch_size, cfg.num_kps, output_shape[0], output_shape[1]]), [0, 2, 3, 1])

        x_out = tf.reduce_sum(tf.multiply(heatmap_outs, tf.tile(tf.reshape(xx,[1, output_shape[0], output_shape[1], 1]), [batch_size, 1, 1, cfg.num_kps])), [1,2])
        y_out = tf.reduce_sum(tf.multiply(heatmap_outs, tf.tile(tf.reshape(yy,[1, output_shape[0], output_shape[1], 1]), [batch_size, 1, 1, cfg.num_kps])), [1,2])
        coord_out = tf.concat([tf.reshape(x_out, [batch_size, cfg.num_kps, 1])\
            ,tf.reshape(y_out, [batch_size, cfg.num_kps, 1])]\
                    , axis=2)
        coord_out = coord_out - 1

        coord_out = coord_out / output_shape[0] * cfg.input_shape[0]

        return coord_out 
示例26
def create_global_net(blocks, is_training, trainable=True):
    global_fms = []
    global_outs = []
    last_fm = None
    initializer = tf.contrib.layers.xavier_initializer()
    for i, block in enumerate(reversed(blocks)):
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            lateral = slim.conv2d(block, 256, [1, 1],
                                  trainable=trainable, weights_initializer=initializer,
                                  padding='SAME', activation_fn=tf.nn.relu,
                                  scope='lateral/res{}'.format(5 - i))

        if last_fm is not None:
            sz = tf.shape(lateral)
            upsample = tf.image.resize_bilinear(last_fm, (sz[1], sz[2]),
                                                name='upsample/res{}'.format(5 - i))
            upsample = slim.conv2d(upsample, 256, [1, 1],
                                   trainable=trainable, weights_initializer=initializer,
                                   padding='SAME', activation_fn=None,
                                   scope='merge/res{}'.format(5 - i))
            last_fm = upsample + lateral
        else:
            last_fm = lateral

        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            tmp = slim.conv2d(last_fm, 256, [1, 1],
                              trainable=trainable, weights_initializer=initializer,
                              padding='SAME', activation_fn=tf.nn.relu,
                              scope='tmp/res{}'.format(5 - i))
            out = slim.conv2d(tmp, cfg.nr_skeleton, [3, 3],
                              trainable=trainable, weights_initializer=initializer,
                              padding='SAME', activation_fn=None,
                              scope='pyramid/res{}'.format(5 - i))
        global_fms.append(last_fm)
        global_outs.append(tf.image.resize_bilinear(out, (cfg.output_shape[0], cfg.output_shape[1])))
    global_fms.reverse()
    global_outs.reverse()
    return global_fms, global_outs 
示例27
def create_global_net(blocks, is_training, trainable=True):
    global_fms = []
    global_outs = []
    last_fm = None
    initializer = tf.contrib.layers.xavier_initializer()
    for i, block in enumerate(reversed(blocks)):
        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            lateral = slim.conv2d(block, 256, [1, 1],
                                  trainable=trainable, weights_initializer=initializer,
                                  padding='SAME', activation_fn=tf.nn.relu,
                                  scope='lateral/res{}'.format(5 - i))

        if last_fm is not None:
            sz = tf.shape(lateral)
            upsample = tf.image.resize_bilinear(last_fm, (sz[1], sz[2]),
                                                name='upsample/res{}'.format(5 - i))
            upsample = slim.conv2d(upsample, 256, [1, 1],
                                   trainable=trainable, weights_initializer=initializer,
                                   padding='SAME', activation_fn=None,
                                   scope='merge/res{}'.format(5 - i))
            last_fm = upsample + lateral
        else:
            last_fm = lateral

        with slim.arg_scope(resnet_arg_scope(bn_is_training=is_training)):
            tmp = slim.conv2d(last_fm, 256, [1, 1],
                              trainable=trainable, weights_initializer=initializer,
                              padding='SAME', activation_fn=tf.nn.relu,
                              scope='tmp/res{}'.format(5 - i))
            out = slim.conv2d(tmp, cfg.nr_skeleton, [3, 3],
                              trainable=trainable, weights_initializer=initializer,
                              padding='SAME', activation_fn=None,
                              scope='pyramid/res{}'.format(5 - i))
        global_fms.append(last_fm)
        global_outs.append(tf.image.resize_bilinear(out, (cfg.output_shape[0], cfg.output_shape[1])))
    global_fms.reverse()
    global_outs.reverse()
    return global_fms, global_outs 
示例28
def render_gaussian_heatmap(self, coord, output_shape, sigma):

        x = [i for i in range(output_shape[1])]
        y = [i for i in range(output_shape[0])]
        xx, yy = tf.meshgrid(x, y)
        xx = tf.reshape(tf.to_float(xx), (1, *output_shape, 1))
        yy = tf.reshape(tf.to_float(yy), (1, *output_shape, 1))

        x = tf.floor(tf.reshape(coord[:, :, 0], [-1, 1, 1, cfg.nr_skeleton]) / cfg.data_shape[1] * output_shape[1] + 0.5)
        y = tf.floor(tf.reshape(coord[:, :, 1], [-1, 1, 1, cfg.nr_skeleton]) / cfg.data_shape[0] * output_shape[0] + 0.5)

        heatmap = tf.exp(-(((xx - x) / tf.to_float(sigma)) ** 2) / tf.to_float(2) - (((yy - y) / tf.to_float(sigma)) ** 2) / tf.to_float(2))
        return heatmap * 255. 
示例29
def make_network(self, is_train):
        if is_train:
            image = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.data_shape, 3])

            label15 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            label11 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            label9 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            label7 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            valids = tf.placeholder(tf.float32, shape=[cfg.batch_size, cfg.nr_skeleton])
            labels = [label15, label11, label9, label7]
            self.set_inputs(image, label15, label11, label9, label7, valids)
        else:
            image = tf.placeholder(tf.float32, shape=[None, *cfg.data_shape, 3])
            self.set_inputs(image)

        mobilenet_v1_fms, endpoints = mobilenet_v1_base(image)

        heatmap_outs = self.head_net(mobilenet_v1_fms, is_train)

        # make loss
        if is_train:
            def ohkm(loss, top_k):
                ohkm_loss = 0.
                for i in range(cfg.batch_size):
                    sub_loss = loss[i]
                    topk_val, topk_idx = tf.nn.top_k(sub_loss, k=top_k, sorted=False, name='ohkm{}'.format(i))
                    tmp_loss = tf.gather(sub_loss, topk_idx, name='ohkm_loss{}'.format(i))  # can be ignore ???
                    ohkm_loss += tf.reduce_sum(tmp_loss) / top_k
                ohkm_loss /= cfg.batch_size
                return ohkm_loss

            label = label7 * tf.to_float(tf.greater(tf.reshape(valids, (-1, 1, 1, cfg.nr_skeleton)), 0.1))
            loss = tf.reduce_mean(tf.square(heatmap_outs - label))

            self.add_tower_summary('loss', loss)
            self.set_loss(loss)
        else:
            self.set_outputs(heatmap_outs) 
示例30
def main(args):
    # create checkpoint dir
    if not isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # create model
    model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained = True)
    model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion1 = torch.nn.MSELoss().cuda() # for Global loss
    criterion2 = torch.nn.MSELoss(reduce=False).cuda() # for refine loss
    optimizer = torch.optim.Adam(model.parameters(),
                                lr = cfg.lr,
                                weight_decay=cfg.weight_decay)
    
    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            pretrained_dict = checkpoint['state_dict']
            model.load_state_dict(pretrained_dict)
            args.start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
            logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:        
        logger = Logger(join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'LR', 'Train Loss'])

    cudnn.benchmark = True
    print('    Total params: %.2fMB' % (sum(p.numel() for p in model.parameters())/(1024*1024)*4))

    train_loader = torch.utils.data.DataLoader(
        MscocoMulti(cfg),
        batch_size=cfg.batch_size*args.num_gpus, shuffle=True,
        num_workers=args.workers, pin_memory=True) 

    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) 

        # train for one epoch
        train_loss = train(train_loader, model, [criterion1, criterion2], optimizer)
        print('train_loss: ',train_loss)

        # append logger file
        logger.append([epoch + 1, lr, train_loss])

        save_model({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer' : optimizer.state_dict(),
        }, checkpoint=args.checkpoint)

    logger.close()