StreamDataset

class StreamDataset[source]

All datasets that represent an iterable of data samples should subclass it. Such form of datasets is particularly useful when data come from a stream. All subclasses should overwrite __iter__(), which would return an iterator of samples in this dataset.

Returns

An iterable Dataset.

Return type

Dataset

Examples

from megengine.data.dataset import StreamDataset
from megengine.data.dataloader import DataLoader, get_worker_info
from megengine.data.sampler import StreamSampler

class MyStream(StreamDataset):
    def __init__(self):
        self.data = [iter([1, 2, 3]), iter([4, 5, 6]), iter([7, 8, 9])]
    def __iter__(self):
        worker_info = get_worker_info()
        data_iter = self.data[worker_info.idx]
        while True:
            yield next(data_iter)

dataloader = DataLoader(
    dataset = MyStream(),
    sampler = StreamSampler(batch_size=2),
    num_workers=3,
    parallel_stream = True,
)

for step, data in enumerate(dataloader):
    print(data)