lightningdot/uniter_model/scripts/prepro_re.sh


								TOKER='bert-base-cased'

								TXT_DB='datasets/TXT_DB_v3'


								ANNOTATIONS='datasets'

								RE_ANN=$ANNOTATIONS/refer


								if [ $TOKER = 'bert-large-cased' ]; then

								    SUFFIX='large-cased'

								elif [ $TOKER = 'bert-base-cased' ]; then

								    SUFFIX='base-cased'

								else

								    echo "invalid tokenizer specified"

								    # exit(1)

								fi


								# refcoco, refcoco+

								for DATASET in 'refcoco' 'refcoco+'; do

								    for SPLIT in 'train' 'val' 'testA' 'testB'; do

								        python prepro.py --task re --bert $TOKER \

								            --annotations $RE_ANN/${DATASET}/'refs(unc).p' \

								                $RE_ANN/${DATASET}/instances.json \

								                index/iid_to_ann_ids.json \

								            --output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db

								    done

								done


								# refcocog

								DATASET='refcocog'

								for SPLIT in 'train' 'val' 'test'; do

								    python prepro.py --task re --bert $TOKER \

								        --annotations $RE_ANN/${DATASET}/'refs(umd).p' \

								            $RE_ANN/${DATASET}/instances.json \

								            index/iid_to_ann_ids.json \

								        --output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db

								done


								# DATASET='refcoco'

								# SPLIT='train'

								# python prepro.py --task re --bert $TOKER \

								#     --annotations $RE_ANN/${DATASET}/'refs(unc).p' \

								#         $RE_ANN/${DATASET}/instances.json \

								#         index/iid_to_ann_ids.json \

								#     --output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db