lightningdot/uniter_model/data/loader.py


								"""

								A meta data loader for sampling from different datasets / training tasks

								A prefetch loader to speedup data loading

								"""

								import random


								import torch

								from torch.utils.data import DataLoader


								from uniter_model.utils.distributed import any_broadcast


								class MetaLoader(object):

								    """ wraps multiple data loader """

								    def __init__(self, loaders, accum_steps=1, distributed=False):

								        assert isinstance(loaders, dict)

								        self.name2loader = {}

								        self.name2iter = {}

								        self.sampling_pools = []

								        for n, l in loaders.items():

								            if isinstance(l, tuple):

								                l, r = l

								            elif isinstance(l, DataLoader):

								                r = 1

								            else:

								                raise ValueError()

								            self.name2loader[n] = l

								            self.name2iter[n] = iter(l)

								            self.sampling_pools.extend([n]*r)


								        self.accum_steps = accum_steps

								        self.distributed = distributed

								        self.step = 0


								    def __iter__(self):

								        """ this iterator will run indefinitely """

								        task = self.sampling_pools[0]

								        while True:

								            if self.step % self.accum_steps == 0:

								                task = random.choice(self.sampling_pools)

								                if self.distributed:

								                    # make sure all process is training same task

								                    task = any_broadcast(task, 0)

								            self.step += 1

								            iter_ = self.name2iter[task]

								            try:

								                batch = next(iter_)

								            except StopIteration:

								                iter_ = iter(self.name2loader[task])

								                batch = next(iter_)

								                self.name2iter[task] = iter_


								            yield task, batch


								def move_to_cuda(batch):

								    if isinstance(batch, torch.Tensor):

								        return batch.cuda(non_blocking=True)

								    elif isinstance(batch, list):

								        new_batch = [move_to_cuda(t) for t in batch]

								    elif isinstance(batch, tuple):

								        new_batch = tuple(move_to_cuda(t) for t in batch)

								    elif isinstance(batch, dict):

								        new_batch = {n: move_to_cuda(t) for n, t in batch.items()}

								    else:

								        return batch

								    return new_batch


								def record_cuda_stream(batch):

								    if isinstance(batch, torch.Tensor):

								        batch.record_stream(torch.cuda.current_stream())

								    elif isinstance(batch, list) or isinstance(batch, tuple):

								        for t in batch:

								            record_cuda_stream(t)

								    elif isinstance(batch, dict):

								        for t in batch.values():

								            record_cuda_stream(t)

								    else:

								        pass


								class PrefetchLoader(object):

								    """

								    overlap compute and cuda data transfer

								    (copied and then modified from nvidia apex)

								    """

								    def __init__(self, loader):

								        self.loader = loader

								        self.stream = torch.cuda.Stream()


								    def __iter__(self):

								        loader_it = iter(self.loader)

								        self.preload(loader_it)

								        batch = self.next(loader_it)

								        while batch is not None:

								            yield batch

								            batch = self.next(loader_it)


								    def __len__(self):

								        return len(self.loader)


								    def preload(self, it):

								        try:

								            self.batch = next(it)

								        except StopIteration:

								            self.batch = None

								            return

								        # if record_stream() doesn't work, another option is to make sure

								        # device inputs are created on the main stream.

								        # self.next_input_gpu = torch.empty_like(self.next_input,

								        #                                        device='cuda')

								        # self.next_target_gpu = torch.empty_like(self.next_target,

								        #                                         device='cuda')

								        # Need to make sure the memory allocated for next_* is not still in use

								        # by the main stream at the time we start copying to next_*:

								        # self.stream.wait_stream(torch.cuda.current_stream())

								        with torch.cuda.stream(self.stream):

								            self.batch = move_to_cuda(self.batch)

								            # more code for the alternative if record_stream() doesn't work:

								            # copy_ will record the use of the pinned source tensor in this

								            # side stream.

								            # self.next_input_gpu.copy_(self.next_input, non_blocking=True)

								            # self.next_target_gpu.copy_(self.next_target, non_blocking=True)

								            # self.next_input = self.next_input_gpu

								            # self.next_target = self.next_target_gpu


								    def next(self, it):

								        torch.cuda.current_stream().wait_stream(self.stream)

								        batch = self.batch

								        if batch is not None:

								            record_cuda_stream(batch)

								        self.preload(it)

								        return batch


								    def __getattr__(self, name):

								        method = self.loader.__getattribute__(name)

								        return method