"""Dataset classes for optimising with BoTorch."""
from __future__ import annotations
from typing import Tuple, Type, TypeVar, Union
import copy
import numpy as np
import torch
T = TypeVar('T', bound='BayesDataset')
[docs]
class Standardiser:
"""Standardisation transformation."""
def __init__(self, std_tol: float = 1e-6) -> None:
self.mean: torch.Tensor = None
self.stds: torch.Tensor = None
self.mask: torch.Tensor = None
self.inv_mask: torch.Tensor = None
self.num_components: int = None
self.std_tol = std_tol
[docs]
def fit(self, data: torch.Tensor) -> None:
"""Fit the standardisation transformation to the given data.
Parameters
----------
data : torch.Tensor
Data to fit the standardisation transformation to.
"""
self.mean = torch.mean(data, dim=-2)
self.stds = torch.std(data, dim=-2) if data.shape[-2] > 1 else torch.zeros_like(self.mean)
y_abs_avg = torch.mean(torch.abs(data), dim=-2)
self.mask = torch.abs(self.stds / y_abs_avg) > self.std_tol
self.inv_mask = ~self.mask # pylint: disable=invalid-unary-operand-type
self.num_components = torch.count_nonzero(self.mask)
[docs]
def to(self, device: str) -> Standardiser:
"""Move the standardiser to a given device.
Parameters
----------
device : str
Device to move the standardiser to.
Returns
-------
Standardiser
The standardiser in the new device.
"""
new_standardiser = copy.deepcopy(self)
if self.mean is not None:
new_standardiser.mean = self.mean.to(device)
new_standardiser.stds = self.stds.to(device)
new_standardiser.mask = self.mask.to(device)
return new_standardiser
[docs]
class PCA:
"""Principal Component Analysis transformation."""
def __init__(self, variance: float) -> None:
self.variance = variance
self.standardiser = Standardiser()
self.num_components: int = None
self.transformation: torch.Tensor = None
[docs]
def fit(self, data: torch.Tensor, covariances: torch.Tensor) -> None:
"""Fit the PCA transformation to the given data.
Parameters
----------
data : torch.Tensor
Data to fit the PCA transformation to.
covariances : torch.Tensor
Covariances of the data to fit the PCA transformation to.
"""
# Standardise data
self.standardiser.fit(data)
data_std, covariances_std = self.standardiser.transform(data, covariances)
# Compute the joint covariance matrix
# Refer to Eq.(4) of https://doi.org/10.1109/TVCG.2019.2934812 for this
cov = torch.cov(data_std.T) + torch.mean(covariances_std, dim=0)
# Compute eigenvalues and vectors of the covariance matrix and sort by decreasing variance
vals, vecs = torch.linalg.eigh(cov) # pylint: disable=not-callable
idx = torch.argsort(vals, descending=True)
vals = vals[idx]
vecs = vecs[:, idx]
# Select the number of components and update the transformation matrix
vals_norm = vals / vals.sum()
cumsum = torch.cumsum(vals_norm, dim=0)
# self.num_components = torch.count_nonzero(vals_norm[vals_norm > 0])
self.num_components = torch.searchsorted(cumsum, 1.0 - self.variance) + 1
self.transformation = vecs[:, :self.num_components]
[docs]
def to(self, device: str) -> PCA:
"""Move the PCA to a given device.
Parameters
----------
device : str
Device to move the PCA to.
Returns
-------
PCA
The PCA in the new device.
"""
new_pca = copy.deepcopy(self)
new_pca.standardiser = self.standardiser.to(device)
if self.transformation is not None:
new_pca.transformation = self.transformation.to(device)
return new_pca
[docs]
class BayesDataset:
"""Dataset class for multi-outcome data."""
def __init__(
self,
n_dim: int,
n_outputs: int,
export: str = None,
dtype: torch.dtype = torch.float64,
std_tol: float = 1e-6,
pca_variance: float = None,
device: str = "cpu",
) -> None:
self.dtype = dtype
self.n_points = 0
self.n_dim = n_dim
self.n_outputs = n_outputs
self.params = torch.empty((0, n_dim), dtype=dtype, device=device)
self.values = torch.empty((0, n_outputs), dtype=dtype, device=device)
self.variances = torch.empty((0, n_outputs), dtype=dtype, device=device)
self.covariances = torch.empty((0, n_outputs, n_outputs), dtype=dtype, device=device)
self.objectives = torch.empty((0,), dtype=dtype, device=device)
self.export = export
self.std_tol = std_tol
self.pca_variance = pca_variance
self.standardiser = Standardiser(std_tol)
self.pca = PCA(pca_variance) if pca_variance is not None else None
self.device = device
[docs]
def update_stats(self) -> None:
"""Update the statistics of the dataset."""
values = torch.clone(self.values)
# Update PCA transformation
if self.pca_variance is not None and values.shape[0] > 1:
self.pca.fit(values, self.covariances)
values = self.pca.transform(self.values)
# Update standardisation transformation
self.standardiser.fit(values)
[docs]
def numel_latent_space(self) -> int:
"""Return the number of components of the latent space.
Returns
-------
int
Number of components of the latent space.
"""
if self.pca_variance is not None and self.values.shape[0] > 1:
return self.pca.num_components
return self.standardiser.num_components
[docs]
@classmethod
def load(cls: Type[T], filename: str) -> T:
"""Load data from a given input file.
Parameters
----------
filename : str
Path to the file to read from.
Returns
-------
BayesDataset
Dataset loaded from the file.
"""
data: T = torch.load(filename, weights_only=False)
# Rebuild standardiser and PCA
data.standardiser = Standardiser(data.std_tol)
data.pca = PCA(data.pca_variance) if data.pca_variance is not None else None
data.update_stats()
# Send to the correct device before returning
return data.to(data.device)
[docs]
def save(self, filename: str) -> None:
"""Save all dataset data to a file.
Parameters
----------
filename : str
Output file path.
"""
torch.save(self, filename)
[docs]
def push(
self,
params: np.ndarray,
results: np.ndarray,
covariance: np.ndarray,
objective: Union[float, None],
) -> None:
"""Add a point to the dataset.
Parameters
----------
params : np.ndarray
Parameter values for this observation.
result : ObjectiveResult
Result for this observation.
"""
torch_params = torch.tensor(params, dtype=self.dtype, device=self.device)
torch_value = torch.tensor(results, dtype=self.dtype, device=self.device)
torch_covariance = torch.tensor(covariance, dtype=self.dtype, device=self.device)
torch_variance = torch.diagonal(torch_covariance)
self.params = torch.cat([self.params, torch_params.unsqueeze(0)], dim=0)
self.values = torch.cat([self.values, torch_value.unsqueeze(0)], dim=0)
self.variances = torch.cat([self.variances, torch_variance.unsqueeze(0)], dim=0)
self.covariances = torch.cat([self.covariances, torch_covariance.unsqueeze(0)], dim=0)
if objective is not None:
torch_objective = torch.tensor([objective], dtype=self.dtype, device=self.device)
self.objectives = torch.cat([self.objectives, torch_objective], dim=0)
self.n_points += 1
self.update_stats()
# Update the dataset file after every push
if self.export is not None:
self.save(self.export)
[docs]
def min(self) -> Tuple[np.ndarray, np.ndarray]:
"""Return the minimum objective value of the dataset.
Returns
-------
Tuple[np.ndarray, np.ndarray]
Parameters and objective value for the minimum point.
"""
idx = torch.argmin(self.objectives)
return self.params[idx, :].cpu().numpy(), self.objectives[idx].cpu().numpy()
[docs]
def to(self, device: str) -> BayesDataset:
"""Move the dataset to a given device.
Parameters
----------
device : str
Device to move the dataset to.
Returns
-------
BayesDataset
The dataset in the new device.
"""
new_dataset = copy.deepcopy(self)
new_dataset.params = self.params.to(device)
new_dataset.values = self.values.to(device)
new_dataset.variances = self.variances.to(device)
new_dataset.covariances = self.covariances.to(device)
new_dataset.objectives = self.objectives.to(device)
new_dataset.standardiser = self.standardiser.to(device)
if self.pca is not None:
new_dataset.pca = self.pca.to(device)
new_dataset.device = device
return new_dataset