lightningdot/uniter_model/scripts/map_iid_to_ann_ids.py


								"""

								We load Linjie's features from: datasets/npy_per_img_id.visual_grounding_coco_gt

								Each feature is named as: visual_grounding_coco_000000581857.npz

								containing {norm_bb, features, conf, soft_labels}


								The order of extracted bbox and features should align with ann_ids for each

								img_id.


								We save this order for the use of REFER dataloader.

								"""

								import time

								import pickle

								import numpy as np

								from pprint import pprint

								from tqdm import tqdm

								import json

								import os.path as osp

								import argparse


								def recover_ann_ids(denorm_bb, raw_bb, raw_ann_ids):

								    """

								    Inputs:

								    - denorm_bb  : [xywh], extracted from BUTD detectors.

								    - raw_bb     : [xywh]

								    - raw_ann_ids

								    Return:

								    - ordered_ann_ids: ordered by denorm_bb

								    """

								    assert denorm_bb.shape[0] == raw_bb.shape[0]

								    num_bb = denorm_bb.shape[0]

								    ordered_ann_ids = []

								    for i in range(num_bb):

								        ref_bb = denorm_bb[i]

								        min_err, ix = 1e5, None

								        for j in range(num_bb):

								            if np.sum(np.abs(ref_bb - raw_bb[j])) < min_err:

								                min_err, ix = np.sum(np.abs(ref_bb-raw_bb[j])), j

								        ordered_ann_ids.append(raw_ann_ids[ix])

								    return ordered_ann_ids


								def main(args):


								    # Load all instances from refcoco, refcoco+ and refcocog

								    tic = time.time()

								    iid_to_ann_ids = {}

								    warning_img_ids = set()

								    for dataset in ['refcoco', 'refcoco+', 'refcocog']:

								        print('Checking %s...' % dataset)

								        instances = json.load(open(osp.join(args.refer_dir, dataset,

								                        'instances.json')))

								        Anns, Imgs, iid_to_raw_ann_ids = {}, {}, {}

								        for ann in instances['annotations']:

								            Anns[ann['id']] = ann

								            iid_to_raw_ann_ids[ann['image_id']] = iid_to_raw_ann_ids.get(

								                    ann['image_id'], []) + [ann['id']]

								        for img in instances['images']:

								            Imgs[img['id']] = img


								        # Make iid_to_ann_ids for this dataset

								        img_ids = list(Imgs.keys())

								        for img_id in tqdm(img_ids):

								            if img_id in iid_to_ann_ids:

								                continue

								            raw_ann_ids = iid_to_raw_ann_ids[img_id]

								            # raw_gd_bb

								            raw_gd_bb = np.array([Anns[ann_id]['bbox']

								                                for ann_id in raw_ann_ids]) # (n, 4) xywh

								            # denorm_bb

								            im_width = Imgs[img_id]['width']

								            im_height = Imgs[img_id]['height']

								            img_feat = np.load(osp.join(args.feats_dir,

								                            f'visual_grounding_coco_gt_{int(img_id):012}.npz'))

								            norm_bb = img_feat['norm_bb']

								            x1, x2 = norm_bb[:, 0] * im_width, norm_bb[:, 2] * im_width

								            y1, y2 = norm_bb[:, 1] * im_height, norm_bb[:, 3] * im_height

								            w, h = norm_bb[:, 4] * im_width, norm_bb[:, 5] * im_height

								            denorm_bb = np.stack([x1, y1, w, h], axis=1)  # (n,4)

								            # re-order ann_ids

								            ordered_ann_ids = recover_ann_ids(denorm_bb, raw_gd_bb, raw_ann_ids)

								            # check difference

								            ordered_gd_bb = np.array([Anns[ann_id]['bbox']

								                              for ann_id in ordered_ann_ids]) # (n, 4)

								            for i in range(denorm_bb.shape[0]):

								                assert np.sum(np.abs(denorm_bb[i]-ordered_gd_bb[i])) < 0.01, \

								                '%s, %s' %(denorm_bb[i], ordered_gd_bb[i])

								            # check ann_ids set

								            if set(ordered_ann_ids) != set(raw_ann_ids):

								                print('Please check img_id[%s]'%img_id)

								                warning_img_ids.add(img_id)

								            # check length of ann_ids

								            assert len(ordered_ann_ids) == len(raw_ann_ids)

								            # add to iid_to_ann_ids

								            iid_to_ann_ids[img_id] = ordered_ann_ids


								    print('%s images contain dupicated bounding boxes.' % len(warning_img_ids))

								    pprint(list(warning_img_ids))


								    # save

								    output_file = osp.join(args.output_dir, 'iid_to_ann_ids.json')

								    with open(output_file, 'w') as f:

								        json.dump({'iid_to_ann_ids': iid_to_ann_ids}, f)

								    print('%s iid_to_ann_ids saved in %s.' % (len(iid_to_ann_ids), output_file))


								if __name__ == '__main__':

								    parser = argparse.ArgumentParser()

								    parser.add_argument('--refer_dir',

								                    default='datasets/refer',

								                    help='folder saving all downloaded refer datasets')

								    parser.add_argument('--feats_dir',

								                    default='datasets/npy_per_img_id/visual_grounding_coco_gt',

								                    help='folder saving butd features.')

								    parser.add_argument('--output_dir',

								                    default='index',

								                    help='output folder saving img_id --> [ann_id]')

								    args = parser.parse_args()

								    main(args)