lightningdot
copied
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Readme
Files and versions
67 lines
1.7 KiB
67 lines
1.7 KiB
"""
|
|
mcan vg annotation image id is COCO, need to map back to VG
|
|
"""
|
|
import json
|
|
|
|
|
|
ANNOTATION = '/ssd2/yenchun/ANNOTATIONS'
|
|
# karpathy 5k test split
|
|
TEST_5K = f'{ANNOTATION}/Image-Text-Matching/coco_test.json'
|
|
|
|
VG_QUESTION = f'{ANNOTATION}/VQA/VG_questions.json'
|
|
VG_ANSWER = f'{ANNOTATION}/VQA/VG_annotations.json'
|
|
VG_IMG_META = f'{ANNOTATION}/VQA/image_data.json'
|
|
|
|
|
|
def _get_img_id(img_name):
|
|
img_name = img_name[:-4]
|
|
id_ = int(img_name.split('_')[-1])
|
|
return id_
|
|
|
|
|
|
def _get_test_ids():
|
|
data = json.load(open(TEST_5K))
|
|
ids = {_get_img_id(d['filename']) for d in data}
|
|
return ids
|
|
|
|
|
|
def _get_coco2vg():
|
|
data = json.load(open(VG_IMG_META))
|
|
coco2vg = {d['coco_id']: d['image_id'] for d in data}
|
|
return coco2vg
|
|
|
|
|
|
def filter_data(data, test_ids):
|
|
filtered = (d for d in data if d['image_id'] not in test_ids)
|
|
return filtered
|
|
|
|
|
|
def map_data(data, coco2vg):
|
|
def gen_mapped():
|
|
for d in data:
|
|
coco_id = d['image_id']
|
|
d['image_id'] = coco2vg[coco_id]
|
|
yield d
|
|
return gen_mapped()
|
|
|
|
|
|
def main():
|
|
test_ids = _get_test_ids()
|
|
coco2vg = _get_coco2vg()
|
|
|
|
# process questions
|
|
questions = json.load(open(VG_QUESTION))['questions']
|
|
mapped_qs = list(map_data(filter_data(questions, test_ids), coco2vg))
|
|
qname = f'{VG_QUESTION}.mapped'
|
|
json.dump({'questions': mapped_qs}, open(qname, 'w'))
|
|
del questions, mapped_qs
|
|
|
|
# process answers
|
|
answers = json.load(open(VG_ANSWER))['annotations']
|
|
mapped_as = list(map_data(filter_data(answers, test_ids), coco2vg))
|
|
aname = f'{VG_ANSWER}.mapped'
|
|
json.dump({'annotations': mapped_as}, open(aname, 'w'))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|