logo
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Readme
Files and versions

68 lines
1.7 KiB

"""
mcan vg annotation image id is COCO, need to map back to VG
"""
import json
ANNOTATION = '/ssd2/yenchun/ANNOTATIONS'
# karpathy 5k test split
TEST_5K = f'{ANNOTATION}/Image-Text-Matching/coco_test.json'
VG_QUESTION = f'{ANNOTATION}/VQA/VG_questions.json'
VG_ANSWER = f'{ANNOTATION}/VQA/VG_annotations.json'
VG_IMG_META = f'{ANNOTATION}/VQA/image_data.json'
def _get_img_id(img_name):
img_name = img_name[:-4]
id_ = int(img_name.split('_')[-1])
return id_
def _get_test_ids():
data = json.load(open(TEST_5K))
ids = {_get_img_id(d['filename']) for d in data}
return ids
def _get_coco2vg():
data = json.load(open(VG_IMG_META))
coco2vg = {d['coco_id']: d['image_id'] for d in data}
return coco2vg
def filter_data(data, test_ids):
filtered = (d for d in data if d['image_id'] not in test_ids)
return filtered
def map_data(data, coco2vg):
def gen_mapped():
for d in data:
coco_id = d['image_id']
d['image_id'] = coco2vg[coco_id]
yield d
return gen_mapped()
def main():
test_ids = _get_test_ids()
coco2vg = _get_coco2vg()
# process questions
questions = json.load(open(VG_QUESTION))['questions']
mapped_qs = list(map_data(filter_data(questions, test_ids), coco2vg))
qname = f'{VG_QUESTION}.mapped'
json.dump({'questions': mapped_qs}, open(qname, 'w'))
del questions, mapped_qs
# process answers
answers = json.load(open(VG_ANSWER))['annotations']
mapped_as = list(map_data(filter_data(answers, test_ids), coco2vg))
aname = f'{VG_ANSWER}.mapped'
json.dump({'annotations': mapped_as}, open(aname, 'w'))
if __name__ == '__main__':
main()