lightningdot
copied
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Readme
Files and versions
44 lines
1.3 KiB
44 lines
1.3 KiB
TOKER='bert-base-cased'
|
|
TXT_DB='datasets/TXT_DB_v3'
|
|
|
|
ANNOTATIONS='datasets'
|
|
RE_ANN=$ANNOTATIONS/refer
|
|
|
|
if [ $TOKER = 'bert-large-cased' ]; then
|
|
SUFFIX='large-cased'
|
|
elif [ $TOKER = 'bert-base-cased' ]; then
|
|
SUFFIX='base-cased'
|
|
else
|
|
echo "invalid tokenizer specified"
|
|
# exit(1)
|
|
fi
|
|
|
|
# refcoco, refcoco+
|
|
for DATASET in 'refcoco' 'refcoco+'; do
|
|
for SPLIT in 'train' 'val' 'testA' 'testB'; do
|
|
python prepro.py --task re --bert $TOKER \
|
|
--annotations $RE_ANN/${DATASET}/'refs(unc).p' \
|
|
$RE_ANN/${DATASET}/instances.json \
|
|
index/iid_to_ann_ids.json \
|
|
--output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db
|
|
done
|
|
done
|
|
|
|
# refcocog
|
|
DATASET='refcocog'
|
|
for SPLIT in 'train' 'val' 'test'; do
|
|
python prepro.py --task re --bert $TOKER \
|
|
--annotations $RE_ANN/${DATASET}/'refs(umd).p' \
|
|
$RE_ANN/${DATASET}/instances.json \
|
|
index/iid_to_ann_ids.json \
|
|
--output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db
|
|
done
|
|
|
|
|
|
# DATASET='refcoco'
|
|
# SPLIT='train'
|
|
# python prepro.py --task re --bert $TOKER \
|
|
# --annotations $RE_ANN/${DATASET}/'refs(unc).p' \
|
|
# $RE_ANN/${DATASET}/instances.json \
|
|
# index/iid_to_ann_ids.json \
|
|
# --output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db
|