FFCV

Source code for ffcv.fields.ndarray

from typing import Callable, TYPE_CHECKING, Tuple, Type
import warnings
import json
from dataclasses import replace

import numpy as np
import torch as ch

from .base import Field, ARG_TYPE
from ..pipeline.operation import Operation
from ..pipeline.state import State
from ..pipeline.compiler import Compiler
from ..pipeline.allocation_query import AllocationQuery
from ..libffcv import memcpy

if TYPE_CHECKING:
    from ..memory_managers.base import MemoryManager

[docs]class NDArrayDecoder(Operation): """ Default decoder for :class:`~ffcv.fields.NDArrayField`. """
[docs] def declare_state_and_memory(self, previous_state: State) -> Tuple[State, AllocationQuery]: return ( replace(previous_state, jit_mode=True, shape=self.field.shape, dtype=self.field.dtype), AllocationQuery(self.field.shape, self.field.dtype) )
[docs] def generate_code(self) -> Callable: my_range = Compiler.get_iterator() mem_read = self.memory_read my_memcpy = Compiler.compile(memcpy) def decoder(indices, destination, metadata, storage_state): for ix in my_range(indices.shape[0]): sample_id = indices[ix] ptr = metadata[sample_id] data = mem_read(ptr, storage_state) my_memcpy(data, destination[ix].view(np.uint8)) return destination return decoder
NDArrayArgsType = np.dtype([ ('shape', '<u8', 32), # 32 is the max number of dimensions for numpy ('type_length', '<u8'), # length of the dtype description ])
[docs]class NDArrayField(Field): """A subclass of :class:`~ffcv.fields.Field` supporting multi-dimensional fixed size matrices of any numpy type. """ def __init__(self, dtype:np.dtype, shape:Tuple[int, ...]): self.dtype = dtype self.shape = shape self.element_size = dtype.itemsize * np.prod(shape) if dtype == np.uint16: warnings.warn("Pytorch currently doesn't support uint16" "we recommend storing as int16 and reinterpret your data later" "in your pipeline") @property def metadata_type(self) -> np.dtype: return np.dtype('<u8')
[docs] @staticmethod def from_binary(binary: ARG_TYPE) -> Field: header_size = NDArrayArgsType.itemsize header = binary[:header_size].view(NDArrayArgsType)[0] type_length = header['type_length'] type_data = binary[header_size:][:type_length].tobytes().decode('ascii') type_desc = json.loads(type_data) type_desc = [tuple(x) for x in type_desc] assert len(type_desc) == 1 dtype = np.dtype(type_desc)['f0'] shape = list(header['shape']) while shape[-1] == 0: shape.pop() return NDArrayField(dtype, tuple(shape))
[docs] def to_binary(self) -> ARG_TYPE: result = np.zeros(1, dtype=ARG_TYPE)[0] header = np.zeros(1, dtype=NDArrayArgsType) s = np.array(self.shape).astype('<u8') header['shape'][0][:len(s)] = s encoded_type = json.dumps(self.dtype.descr) encoded_type = np.frombuffer(encoded_type.encode('ascii'), dtype='<u1') header['type_length'][0] = len(encoded_type) to_write = np.concatenate([header.view('<u1'), encoded_type]) result[0][:to_write.shape[0]] = to_write return result
[docs] def encode(self, destination, field, malloc): destination[0], data_region = malloc(self.element_size) data_region[:] = field.reshape(-1).view('<u1')
[docs] def get_decoder_class(self) -> Type[Operation]: return NDArrayDecoder
[docs]class TorchTensorField(NDArrayField): """A subclass of :class:`~ffcv.fields.Field` supporting multi-dimensional fixed size matrices of any torch type. """ def __init__(self, dtype:ch.dtype, shape:Tuple[int, ...]): self.dtype = dtype self.shape = shape dtype = ch.zeros(0, dtype=dtype).numpy().dtype super().__init__(dtype, shape)
[docs] def encode(self, destination, field, malloc): field = field.numpy() return super().encode(destination, field, malloc)