lightningdot/uniter_model/scripts/prepro_re.sh

TOKER='bert-base-cased'
TXT_DB='datasets/TXT_DB_v3'

ANNOTATIONS='datasets'
RE_ANN=$ANNOTATIONS/refer

if [ $TOKER = 'bert-large-cased' ]; then
    SUFFIX='large-cased'
elif [ $TOKER = 'bert-base-cased' ]; then
    SUFFIX='base-cased'
else
    echo "invalid tokenizer specified"
    # exit(1)
fi

# refcoco, refcoco+ 
for DATASET in 'refcoco' 'refcoco+'; do
    for SPLIT in 'train' 'val' 'testA' 'testB'; do
        python prepro.py --task re --bert $TOKER \
            --annotations $RE_ANN/${DATASET}/'refs(unc).p' \
                $RE_ANN/${DATASET}/instances.json \
                index/iid_to_ann_ids.json \
            --output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db
    done
done

# refcocog
DATASET='refcocog'
for SPLIT in 'train' 'val' 'test'; do
    python prepro.py --task re --bert $TOKER \
        --annotations $RE_ANN/${DATASET}/'refs(umd).p' \
            $RE_ANN/${DATASET}/instances.json \
            index/iid_to_ann_ids.json \
        --output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db
done


# DATASET='refcoco'
# SPLIT='train'
# python prepro.py --task re --bert $TOKER \
#     --annotations $RE_ANN/${DATASET}/'refs(unc).p' \
#         $RE_ANN/${DATASET}/instances.json \
#         index/iid_to_ann_ids.json \
#     --output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db
update the operator. Signed-off-by: wxywb <xy.wang@zilliz.com> 2 years ago			`TOKER='bert-base-cased'`
			`TXT_DB='datasets/TXT_DB_v3'`

			`ANNOTATIONS='datasets'`
			`RE_ANN=$ANNOTATIONS/refer`

			`if [ $TOKER = 'bert-large-cased' ]; then`
			`SUFFIX='large-cased'`
			`elif [ $TOKER = 'bert-base-cased' ]; then`
			`SUFFIX='base-cased'`
			`else`
			`echo "invalid tokenizer specified"`
			`# exit(1)`
			`fi`

			`# refcoco, refcoco+`
			`for DATASET in 'refcoco' 'refcoco+'; do`
			`for SPLIT in 'train' 'val' 'testA' 'testB'; do`
			`python prepro.py --task re --bert $TOKER \`
			`--annotations $RE_ANN/${DATASET}/'refs(unc).p' \`
			`$RE_ANN/${DATASET}/instances.json \`
			`index/iid_to_ann_ids.json \`
			`--output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db`
			`done`
			`done`

			`# refcocog`
			`DATASET='refcocog'`
			`for SPLIT in 'train' 'val' 'test'; do`
			`python prepro.py --task re --bert $TOKER \`
			`--annotations $RE_ANN/${DATASET}/'refs(umd).p' \`
			`$RE_ANN/${DATASET}/instances.json \`
			`index/iid_to_ann_ids.json \`
			`--output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db`
			`done`


			`# DATASET='refcoco'`
			`# SPLIT='train'`
			`# python prepro.py --task re --bert $TOKER \`
			`# --annotations $RE_ANN/${DATASET}/'refs(unc).p' \`
			`# $RE_ANN/${DATASET}/instances.json \`
			`# index/iid_to_ann_ids.json \`
			`# --output $TXT_DB/${DATASET}_${SPLIT}_$SUFFIX.db`