cpdbench.dataset.CPD2DFromFileDataset
1from numpy import ndarray, memmap 2 3from cpdbench.dataset.CPDDataset import CPDDataset 4 5 6class CPD2DFromFileDataset(CPDDataset): 7 """Implementation of CPDDataset where the data source is large numpy array saved as file via memmap. 8 With this implementation the framework can use very large datasets which are not completely loaded 9 into the main memory. Instead, numpy will lazy load all needed data points. 10 """ 11 12 def __init__(self, file_path: str, dtype: str, ground_truths: list[int], validation_amount=-1): 13 """Constructor 14 :param file_path: The absolute or relative path to numpy file. 15 :param dtype: The data type in which the numpy array was saved. 16 :param ground_truths: The ground truth changepoints as integer list. 17 """ 18 self.file_path = file_path 19 self.dtype = dtype 20 self._array = None 21 self._ground_truths = ground_truths 22 self._validation_amount = validation_amount 23 24 def init(self) -> None: 25 self._array = memmap(self.file_path, self.dtype, mode='r') 26 if self._validation_amount == -1: 27 self._validation_array = self._array[:] 28 else: 29 self._validation_array = self._array[0:self._validation_amount] 30 validation_array_length = self._validation_array.shape[0] 31 self._validation_ground_truths = [el for el in self._ground_truths if el < validation_array_length] 32 33 def get_signal(self) -> tuple[ndarray, list[int]]: 34 return self._array, self._ground_truths 35 36 def get_validation_preview(self) -> tuple[ndarray, list[int]]: 37 return self._validation_array, self._validation_ground_truths
7class CPD2DFromFileDataset(CPDDataset): 8 """Implementation of CPDDataset where the data source is large numpy array saved as file via memmap. 9 With this implementation the framework can use very large datasets which are not completely loaded 10 into the main memory. Instead, numpy will lazy load all needed data points. 11 """ 12 13 def __init__(self, file_path: str, dtype: str, ground_truths: list[int], validation_amount=-1): 14 """Constructor 15 :param file_path: The absolute or relative path to numpy file. 16 :param dtype: The data type in which the numpy array was saved. 17 :param ground_truths: The ground truth changepoints as integer list. 18 """ 19 self.file_path = file_path 20 self.dtype = dtype 21 self._array = None 22 self._ground_truths = ground_truths 23 self._validation_amount = validation_amount 24 25 def init(self) -> None: 26 self._array = memmap(self.file_path, self.dtype, mode='r') 27 if self._validation_amount == -1: 28 self._validation_array = self._array[:] 29 else: 30 self._validation_array = self._array[0:self._validation_amount] 31 validation_array_length = self._validation_array.shape[0] 32 self._validation_ground_truths = [el for el in self._ground_truths if el < validation_array_length] 33 34 def get_signal(self) -> tuple[ndarray, list[int]]: 35 return self._array, self._ground_truths 36 37 def get_validation_preview(self) -> tuple[ndarray, list[int]]: 38 return self._validation_array, self._validation_ground_truths
Implementation of CPDDataset where the data source is large numpy array saved as file via memmap. With this implementation the framework can use very large datasets which are not completely loaded into the main memory. Instead, numpy will lazy load all needed data points.
CPD2DFromFileDataset( file_path: str, dtype: str, ground_truths: list, validation_amount=-1)
13 def __init__(self, file_path: str, dtype: str, ground_truths: list[int], validation_amount=-1): 14 """Constructor 15 :param file_path: The absolute or relative path to numpy file. 16 :param dtype: The data type in which the numpy array was saved. 17 :param ground_truths: The ground truth changepoints as integer list. 18 """ 19 self.file_path = file_path 20 self.dtype = dtype 21 self._array = None 22 self._ground_truths = ground_truths 23 self._validation_amount = validation_amount
Constructor
Parameters
- file_path: The absolute or relative path to numpy file.
- dtype: The data type in which the numpy array was saved.
- ground_truths: The ground truth changepoints as integer list.
def
init(self) -> None:
25 def init(self) -> None: 26 self._array = memmap(self.file_path, self.dtype, mode='r') 27 if self._validation_amount == -1: 28 self._validation_array = self._array[:] 29 else: 30 self._validation_array = self._array[0:self._validation_amount] 31 validation_array_length = self._validation_array.shape[0] 32 self._validation_ground_truths = [el for el in self._ground_truths if el < validation_array_length]
Initialization method to prepare the dataset. Examples: Open a file, open a db connection etc.
def
get_signal(self) -> tuple[numpy.ndarray, list[int]]:
Returns the timeseries as numpy array.
Returns
A 2D ndarray containing the timeseries (time x feature)
def
get_validation_preview(self) -> tuple[numpy.ndarray, list[int]]:
37 def get_validation_preview(self) -> tuple[ndarray, list[int]]: 38 return self._validation_array, self._validation_ground_truths
Return a smaller part of the complete signal for fast runtime validation.
Returns
A 2D ndarray containing the timeseries (time x feature)