cpdbench.dataset.CPD2DFromFileDataset

 1from numpy import ndarray, memmap
 2
 3from cpdbench.dataset.CPDDataset import CPDDataset
 4
 5
 6class CPD2DFromFileDataset(CPDDataset):
 7    """Implementation of CPDDataset where the data source is large numpy array saved as file via memmap.
 8    With this implementation the framework can use very large datasets which are not completely loaded
 9    into the main memory. Instead, numpy will lazy load all needed data points.
10    """
11
12    def __init__(self, file_path: str, dtype: str, ground_truths: list[int], validation_amount=-1):
13        """Constructor
14        :param file_path: The absolute or relative path to numpy file.
15        :param dtype: The data type in which the numpy array was saved.
16        :param ground_truths: The ground truth changepoints as integer list.
17        """
18        self.file_path = file_path
19        self.dtype = dtype
20        self._array = None
21        self._ground_truths = ground_truths
22        self._validation_amount = validation_amount
23
24    def init(self) -> None:
25        self._array = memmap(self.file_path, self.dtype, mode='r')
26        if self._validation_amount == -1:
27            self._validation_array = self._array[:]
28        else:
29            self._validation_array = self._array[0:self._validation_amount]
30        validation_array_length = self._validation_array.shape[0]
31        self._validation_ground_truths = [el for el in self._ground_truths if el < validation_array_length]
32
33    def get_signal(self) -> tuple[ndarray, list[int]]:
34        return self._array, self._ground_truths
35
36    def get_validation_preview(self) -> tuple[ndarray, list[int]]:
37        return self._validation_array, self._validation_ground_truths
class CPD2DFromFileDataset(cpdbench.dataset.CPDDataset.CPDDataset):
 7class CPD2DFromFileDataset(CPDDataset):
 8    """Implementation of CPDDataset where the data source is large numpy array saved as file via memmap.
 9    With this implementation the framework can use very large datasets which are not completely loaded
10    into the main memory. Instead, numpy will lazy load all needed data points.
11    """
12
13    def __init__(self, file_path: str, dtype: str, ground_truths: list[int], validation_amount=-1):
14        """Constructor
15        :param file_path: The absolute or relative path to numpy file.
16        :param dtype: The data type in which the numpy array was saved.
17        :param ground_truths: The ground truth changepoints as integer list.
18        """
19        self.file_path = file_path
20        self.dtype = dtype
21        self._array = None
22        self._ground_truths = ground_truths
23        self._validation_amount = validation_amount
24
25    def init(self) -> None:
26        self._array = memmap(self.file_path, self.dtype, mode='r')
27        if self._validation_amount == -1:
28            self._validation_array = self._array[:]
29        else:
30            self._validation_array = self._array[0:self._validation_amount]
31        validation_array_length = self._validation_array.shape[0]
32        self._validation_ground_truths = [el for el in self._ground_truths if el < validation_array_length]
33
34    def get_signal(self) -> tuple[ndarray, list[int]]:
35        return self._array, self._ground_truths
36
37    def get_validation_preview(self) -> tuple[ndarray, list[int]]:
38        return self._validation_array, self._validation_ground_truths

Implementation of CPDDataset where the data source is large numpy array saved as file via memmap. With this implementation the framework can use very large datasets which are not completely loaded into the main memory. Instead, numpy will lazy load all needed data points.

CPD2DFromFileDataset( file_path: str, dtype: str, ground_truths: list, validation_amount=-1)
13    def __init__(self, file_path: str, dtype: str, ground_truths: list[int], validation_amount=-1):
14        """Constructor
15        :param file_path: The absolute or relative path to numpy file.
16        :param dtype: The data type in which the numpy array was saved.
17        :param ground_truths: The ground truth changepoints as integer list.
18        """
19        self.file_path = file_path
20        self.dtype = dtype
21        self._array = None
22        self._ground_truths = ground_truths
23        self._validation_amount = validation_amount

Constructor

Parameters
  • file_path: The absolute or relative path to numpy file.
  • dtype: The data type in which the numpy array was saved.
  • ground_truths: The ground truth changepoints as integer list.
file_path
dtype
def init(self) -> None:
25    def init(self) -> None:
26        self._array = memmap(self.file_path, self.dtype, mode='r')
27        if self._validation_amount == -1:
28            self._validation_array = self._array[:]
29        else:
30            self._validation_array = self._array[0:self._validation_amount]
31        validation_array_length = self._validation_array.shape[0]
32        self._validation_ground_truths = [el for el in self._ground_truths if el < validation_array_length]

Initialization method to prepare the dataset. Examples: Open a file, open a db connection etc.

def get_signal(self) -> tuple[numpy.ndarray, list[int]]:
34    def get_signal(self) -> tuple[ndarray, list[int]]:
35        return self._array, self._ground_truths

Returns the timeseries as numpy array.

Returns

A 2D ndarray containing the timeseries (time x feature)

def get_validation_preview(self) -> tuple[numpy.ndarray, list[int]]:
37    def get_validation_preview(self) -> tuple[ndarray, list[int]]:
38        return self._validation_array, self._validation_ground_truths

Return a smaller part of the complete signal for fast runtime validation.

Returns

A 2D ndarray containing the timeseries (time x feature)