lightningdot
copied
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Readme
Files and versions
283 lines
9.4 KiB
283 lines
9.4 KiB
"""
|
|
Dataset interfaces
|
|
"""
|
|
from collections import defaultdict
|
|
from contextlib import contextmanager
|
|
import io
|
|
import json
|
|
import lmdb
|
|
from os.path import exists
|
|
|
|
import numpy as np
|
|
import torch
|
|
from torch.utils.data import Dataset, ConcatDataset
|
|
from tqdm import tqdm
|
|
from lz4.frame import compress, decompress
|
|
|
|
import msgpack
|
|
import msgpack_numpy
|
|
msgpack_numpy.patch()
|
|
|
|
|
|
def _fp16_to_fp32(feat_dict):
|
|
out = {k: arr.astype(np.float32)
|
|
if arr.dtype == np.float16 else arr
|
|
for k, arr in feat_dict.items()}
|
|
return out
|
|
|
|
|
|
def compute_num_bb(confs, conf_th, min_bb, max_bb):
|
|
num_bb = max(min_bb, (confs > conf_th).sum())
|
|
num_bb = min(max_bb, num_bb)
|
|
return num_bb
|
|
|
|
|
|
class DetectFeatLmdb(object):
|
|
def __init__(self, img_dir, conf_th=0.2, max_bb=100, min_bb=10, num_bb=36,
|
|
compress=True):
|
|
self.img_dir = img_dir
|
|
if conf_th == -1:
|
|
db_name = f'feat_numbb{num_bb}'
|
|
self.name2nbb = defaultdict(lambda: num_bb)
|
|
else:
|
|
db_name = f'feat_th{conf_th}_max{max_bb}_min{min_bb}'
|
|
nbb = f'nbb_th{conf_th}_max{max_bb}_min{min_bb}.json'
|
|
if not exists(f'{img_dir}/{nbb}'):
|
|
# nbb is not pre-computed
|
|
self.name2nbb = None
|
|
else:
|
|
self.name2nbb = json.load(open(f'{img_dir}/{nbb}'))
|
|
self.compress = compress
|
|
if compress:
|
|
db_name += '_compressed'
|
|
|
|
if self.name2nbb is None:
|
|
if compress:
|
|
db_name = 'all_compressed'
|
|
else:
|
|
db_name = 'all'
|
|
# only read ahead on single node training
|
|
self.env = lmdb.open(f'{img_dir}/{db_name}',
|
|
readonly=True, create=False,
|
|
readahead=not _check_distributed())
|
|
self.txn = self.env.begin(buffers=True)
|
|
if self.name2nbb is None:
|
|
self.name2nbb = self._compute_nbb()
|
|
|
|
def _compute_nbb(self):
|
|
name2nbb = {}
|
|
fnames = json.loads(self.txn.get(key=b'__keys__').decode('utf-8'))
|
|
for fname in tqdm(fnames, desc='reading images'):
|
|
dump = self.txn.get(fname.encode('utf-8'))
|
|
if self.compress:
|
|
with io.BytesIO(dump) as reader:
|
|
img_dump = np.load(reader, allow_pickle=True)
|
|
confs = img_dump['conf']
|
|
else:
|
|
img_dump = msgpack.loads(dump, raw=False)
|
|
confs = img_dump['conf']
|
|
name2nbb[fname] = compute_num_bb(confs, self.conf_th,
|
|
self.min_bb, self.max_bb)
|
|
|
|
return name2nbb
|
|
|
|
def __del__(self):
|
|
self.env.close()
|
|
|
|
def get_dump(self, file_name):
|
|
# hack for MRC
|
|
dump = self.txn.get(file_name.encode('utf-8'))
|
|
nbb = self.name2nbb[file_name]
|
|
if self.compress:
|
|
with io.BytesIO(dump) as reader:
|
|
img_dump = np.load(reader, allow_pickle=True)
|
|
img_dump = _fp16_to_fp32(img_dump)
|
|
else:
|
|
img_dump = msgpack.loads(dump, raw=False)
|
|
img_dump = _fp16_to_fp32(img_dump)
|
|
img_dump = {k: arr[:nbb, ...] for k, arr in img_dump.items()}
|
|
return img_dump
|
|
|
|
def __getitem__(self, file_name):
|
|
dump = self.txn.get(file_name.encode('utf-8'))
|
|
nbb = self.name2nbb[file_name]
|
|
if self.compress:
|
|
with io.BytesIO(dump) as reader:
|
|
img_dump = np.load(reader, allow_pickle=True)
|
|
img_dump = {'features': img_dump['features'],
|
|
'norm_bb': img_dump['norm_bb']}
|
|
else:
|
|
img_dump = msgpack.loads(dump, raw=False)
|
|
img_feat = torch.tensor(img_dump['features'][:nbb, :]).float()
|
|
img_bb = torch.tensor(img_dump['norm_bb'][:nbb, :]).float()
|
|
return img_feat, img_bb
|
|
|
|
def __contains__(self, file_name):
|
|
return self.txn.get(file_name.encode('utf-8')) is not None
|
|
|
|
|
|
@contextmanager
|
|
def open_lmdb(db_dir, readonly=False):
|
|
db = TxtLmdb(db_dir, readonly)
|
|
try:
|
|
yield db
|
|
finally:
|
|
del db
|
|
|
|
|
|
class TxtLmdb(object):
|
|
def __init__(self, db_dir, readonly=True):
|
|
self.readonly = readonly
|
|
if readonly:
|
|
# training
|
|
self.env = lmdb.open(db_dir,
|
|
readonly=True, create=False,
|
|
readahead=not _check_distributed())
|
|
self.txn = self.env.begin(buffers=True)
|
|
self.write_cnt = None
|
|
else:
|
|
# prepro
|
|
self.env = lmdb.open(db_dir, readonly=False, create=True,
|
|
map_size=4 * 1024**4)
|
|
self.txn = self.env.begin(write=True)
|
|
self.write_cnt = 0
|
|
|
|
def __del__(self):
|
|
if self.write_cnt:
|
|
self.txn.commit()
|
|
self.env.close()
|
|
|
|
def __getitem__(self, key):
|
|
return msgpack.loads(decompress(self.txn.get(key.encode('utf-8'))),
|
|
raw=False)
|
|
|
|
def __setitem__(self, key, value):
|
|
# NOTE: not thread safe
|
|
if self.readonly:
|
|
raise ValueError('readonly text DB')
|
|
ret = self.txn.put(key.encode('utf-8'),
|
|
compress(msgpack.dumps(value, use_bin_type=True)))
|
|
self.write_cnt += 1
|
|
if self.write_cnt % 1000 == 0:
|
|
self.txn.commit()
|
|
self.txn = self.env.begin(write=True)
|
|
self.write_cnt = 0
|
|
return ret
|
|
|
|
def get_ids_and_lens(db):
|
|
assert isinstance(db, TxtTokLmdb)
|
|
lens = []
|
|
ids = []
|
|
for id_ in db.ids:
|
|
lens.append(db.id2len[id_])
|
|
ids.append(id_)
|
|
return lens, ids
|
|
|
|
|
|
class DetectFeatTxtTokDataset(Dataset):
|
|
def __init__(self, txt_db, img_db):
|
|
assert isinstance(txt_db, TxtTokLmdb)
|
|
assert isinstance(img_db, DetectFeatLmdb)
|
|
self.txt_db = txt_db
|
|
self.img_db = img_db
|
|
txt_lens, self.ids = get_ids_and_lens(txt_db)
|
|
|
|
txt2img = txt_db.txt2img
|
|
self.lens = [tl + self.img_db.name2nbb[txt2img[id_]]
|
|
for tl, id_ in zip(txt_lens, self.ids)]
|
|
|
|
def __len__(self):
|
|
return len(self.ids)
|
|
|
|
def __getitem__(self, i):
|
|
id_ = self.ids[i]
|
|
example = self.txt_db[id_]
|
|
return example
|
|
|
|
def _get_img_feat(self, fname):
|
|
img_feat, bb = self.img_db[fname]
|
|
img_bb = torch.cat([bb, bb[:, 4:5]*bb[:, 5:]], dim=-1)
|
|
num_bb = img_feat.size(0)
|
|
return img_feat, img_bb, num_bb
|
|
|
|
|
|
class ConcatDatasetWithLens(ConcatDataset):
|
|
""" A thin wrapper on pytorch concat dataset for lens batching """
|
|
def __init__(self, datasets):
|
|
super().__init__(datasets)
|
|
self.lens = [l for dset in datasets for l in dset.lens]
|
|
|
|
def __getattr__(self, name):
|
|
return self._run_method_on_all_dsets(name)
|
|
|
|
def _run_method_on_all_dsets(self, name):
|
|
def run_all(*args, **kwargs):
|
|
return [dset.__getattribute__(name)(*args, **kwargs)
|
|
for dset in self.datasets]
|
|
return run_all
|
|
|
|
|
|
def pad_tensors(tensors, lens=None, pad=0):
|
|
"""B x [T, ...]"""
|
|
if lens is None:
|
|
lens = [t.size(0) for t in tensors]
|
|
max_len = max(lens)
|
|
bs = len(tensors)
|
|
hid = tensors[0].size(-1)
|
|
dtype = tensors[0].dtype
|
|
output = torch.zeros(bs, max_len, hid, dtype=dtype)
|
|
if pad:
|
|
output.data.fill_(pad)
|
|
for i, (t, l) in enumerate(zip(tensors, lens)):
|
|
output.data[i, :l, ...] = t.data
|
|
return output
|
|
|
|
|
|
def get_gather_index(txt_lens, num_bbs, batch_size, max_len, out_size):
|
|
# assert len(txt_lens) == len(num_bbs) == batch_size
|
|
gather_index = torch.arange(0, out_size, dtype=torch.long,
|
|
).unsqueeze(0).repeat(len(num_bbs), 1)
|
|
|
|
# for i, (tl, nbb) in enumerate(zip(txt_lens, num_bbs)):
|
|
# gather_index.data[i, tl:tl+nbb] = torch.arange(max_len, max_len+nbb,
|
|
# dtype=torch.long).data
|
|
return gather_index
|
|
|
|
|
|
def get_gather_index_uniter(txt_lens, num_bbs, batch_size, max_len, out_size):
|
|
assert len(txt_lens) == len(num_bbs) == batch_size
|
|
gather_index = torch.arange(0, out_size, dtype=torch.long,
|
|
).unsqueeze(0).repeat(batch_size, 1)
|
|
|
|
for i, (tl, nbb) in enumerate(zip(txt_lens, num_bbs)):
|
|
gather_index.data[i, tl:tl+nbb] = torch.arange(max_len, max_len+nbb,
|
|
dtype=torch.long).data
|
|
return gather_index
|
|
|
|
|
|
def get_gather_index_img(txt_lens, num_bbs, batch_size, max_len, out_size):
|
|
gather_index = torch.zeros(batch_size, out_size, dtype=torch.long)
|
|
|
|
for i, (tl, nbb) in enumerate(zip(txt_lens, num_bbs)):
|
|
gather_index.data[i, :nbb] = torch.arange(max_len, max_len+nbb,
|
|
dtype=torch.long).data
|
|
gather_index.data[i, nbb:nbb+tl] = torch.arange(0, tl,
|
|
dtype=torch.long).data
|
|
return gather_index
|
|
|
|
|
|
class ImageLmdbGroup(object):
|
|
def __init__(self, conf_th, max_bb, min_bb, num_bb, compress):
|
|
self.path2imgdb = {}
|
|
self.conf_th = conf_th
|
|
self.max_bb = max_bb
|
|
self.min_bb = min_bb
|
|
self.num_bb = num_bb
|
|
self.compress = compress
|
|
|
|
def __getitem__(self, path):
|
|
img_db = self.path2imgdb.get(path, None)
|
|
if img_db is None:
|
|
img_db = DetectFeatLmdb(path, self.conf_th, self.max_bb,
|
|
self.min_bb, self.num_bb, self.compress)
|
|
return img_db
|