logo
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Readme
Files and versions

44 lines
1.3 KiB

TOKER='bert-base-cased'
TXT_DB='datasets/TXT_DB_v3'
ANNOTATIONS='datasets'
RE_ANN=$ANNOTATIONS/refer
if [ $TOKER = 'bert-large-cased' ]; then
SUFFIX='large-cased'
elif [ $TOKER = 'bert-base-cased' ]; then
SUFFIX='base-cased'
else
echo "invalid tokenizer specified"
# exit(1)
fi
# refcoco, refcoco+
for DATASET in 'refcoco' 'refcoco+'; do
for SPLIT in 'train' 'val' 'testA' 'testB'; do
python prepro.py --task re --bert $TOKER \
--annotations $RE_ANN/${DATASET}/'refs(unc).p' \
$RE_ANN/${DATASET}/instances.json \
index/iid_to_ann_ids.json \
--output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db
done
done
# refcocog
DATASET='refcocog'
for SPLIT in 'train' 'val' 'test'; do
python prepro.py --task re --bert $TOKER \
--annotations $RE_ANN/${DATASET}/'refs(umd).p' \
$RE_ANN/${DATASET}/instances.json \
index/iid_to_ann_ids.json \
--output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db
done
# DATASET='refcoco'
# SPLIT='train'
# python prepro.py --task re --bert $TOKER \
# --annotations $RE_ANN/${DATASET}/'refs(unc).p' \
# $RE_ANN/${DATASET}/instances.json \
# index/iid_to_ann_ids.json \
# --output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db