FFCV

Source code for ffcv.fields.bytes

from typing import Callable, TYPE_CHECKING, Tuple, Type
from dataclasses import replace

import numpy as np

from .base import Field, ARG_TYPE
from ..pipeline.operation import Operation
from ..pipeline.state import State
from ..pipeline.compiler import Compiler
from ..pipeline.allocation_query import AllocationQuery
from ..libffcv import memcpy


[docs]class BytesDecoder(Operation):
[docs] def declare_state_and_memory(self, previous_state: State) -> Tuple[State, AllocationQuery]: max_size = self.metadata['size'].max() my_shape = (max_size,) return ( replace(previous_state, jit_mode=True, shape=my_shape, dtype='<u1'), AllocationQuery(my_shape, dtype='<u1') )
[docs] def generate_code(self) -> Callable: mem_read = self.memory_read my_memcpy = Compiler.compile(memcpy) my_range = Compiler.get_iterator() def decoder(batch_indices, destination, metadata, storage_state): for dest_ix in my_range(batch_indices.shape[0]): source_ix = batch_indices[dest_ix] data = mem_read(metadata[source_ix]['ptr'], storage_state) my_memcpy(data, destination[dest_ix]) return destination return decoder
[docs]class BytesField(Field): """ A subclass of :class:`~ffcv.fields.Field` supporting variable-length byte arrays. Intended for use with data such as text or raw data which may not have a fixed size. Data is written sequentially while saving pointers and read by pointer lookup. The writer expects to be passed a 1D uint8 numpy array of variable length for each sample. """ def __init__(self): pass @property def metadata_type(self) -> np.dtype: return np.dtype([ ('ptr', '<u8'), ('size', '<u8') ])
[docs] @staticmethod def from_binary(binary: ARG_TYPE) -> Field: return BytesField()
[docs] def to_binary(self) -> ARG_TYPE: return np.zeros(1, dtype=ARG_TYPE)[0]
[docs] def encode(self, destination, field, malloc): ptr, buffer = malloc(field.size) buffer[:] = field destination['ptr'] = ptr destination['size'] = field.size
[docs] def get_decoder_class(self) -> Type[Operation]: return BytesDecoder