Skip to content

API

AbstractGP

AbstractGP(seqs, num_tasks, default_task, solo_task, scale, lengthscales, noise, factor_task_kernel, rank_factor_task_kernel, noise_task_kernel, device, tfs_scale, tfs_lengthscales, tfs_noise, tfs_factor_task_kernel, tfs_noise_task_kernel, requires_grad_scale, requires_grad_lengthscales, requires_grad_noise, requires_grad_factor_task_kernel, requires_grad_noise_task_kernel, shape_batch, shape_scale, shape_lengthscales, shape_noise, shape_factor_task_kernel, shape_noise_task_kernel, derivatives, derivatives_coeffs, adaptive_nugget)

Bases: Module

Source code in fastgps/abstract_gp.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def __init__(self,
        seqs,
        num_tasks,
        default_task,
        solo_task,
        scale,
        lengthscales,
        noise,
        factor_task_kernel,
        rank_factor_task_kernel,
        noise_task_kernel,
        device,
        tfs_scale,
        tfs_lengthscales,
        tfs_noise,
        tfs_factor_task_kernel,
        tfs_noise_task_kernel,
        requires_grad_scale, 
        requires_grad_lengthscales, 
        requires_grad_noise,
        requires_grad_factor_task_kernel,
        requires_grad_noise_task_kernel,
        shape_batch,
        shape_scale, 
        shape_lengthscales,
        shape_noise,
        shape_factor_task_kernel, 
        shape_noise_task_kernel,
        derivatives,
        derivatives_coeffs,
        adaptive_nugget,
    ):
    super().__init__()
    assert torch.get_default_dtype()==torch.float64, "fast transforms do not work without torch.float64 precision" 
    assert isinstance(num_tasks,int) and num_tasks>0
    self.num_tasks = num_tasks
    self.default_task = default_task
    self.solo_task = solo_task
    self.device = torch.device(device)
    assert isinstance(seqs,np.ndarray) and seqs.shape==(self.num_tasks,)
    self.d = seqs[0].d
    assert all(seqs[i].d==self.d for i in range(self.num_tasks))
    self.seqs = seqs
    self.n = torch.zeros(self.num_tasks,dtype=int,device=self.device)
    self.m = -1*torch.ones(self.num_tasks,dtype=int,device=self.device)
    # derivatives
    if derivatives is not None or derivatives_coeffs is not None:
        rank_factor_task_kernel = 1
        tfs_noise_task_kernel = (lambda x: x, lambda x: x)
        noise_task_kernel = 0.
    if derivatives is None: derivatives = [torch.zeros((1,self.d),dtype=torch.int64,device=self.device) for i in range(self.num_tasks)]
    if isinstance(derivatives,torch.Tensor): derivatives = [derivatives]
    assert isinstance(derivatives,list) and len(derivatives)==self.num_tasks
    derivatives = [deriv[None,:] if deriv.ndim==1 else deriv for deriv in derivatives]
    assert all((derivatives[i].ndim==2 and derivatives[i].size(1)==self.d) for i in range(self.num_tasks))
    self.derivatives = derivatives
    if derivatives_coeffs is None: derivatives_coeffs = [torch.ones(len(self.derivatives[i]),device=self.device) for i in range(self.num_tasks)]
    assert isinstance(derivatives_coeffs,list) and len(derivatives_coeffs)==self.num_tasks
    assert all((derivatives_coeffs[i].ndim==1 and len(derivatives_coeffs[i]))==len(self.derivatives[i]) for i in range(self.num_tasks))
    self.derivatives_coeffs = derivatives_coeffs
    # shape_batch 
    if isinstance(shape_batch,(list,tuple)): shape_batch = torch.Size(shape_batch)
    assert isinstance(shape_batch,torch.Size)
    self.shape_batch = shape_batch
    self.ndim_batch = len(self.shape_batch)
    # scale
    assert np.isscalar(scale) or isinstance(scale,torch.Tensor), "scale must be a scalar or torch.Tensor"
    if isinstance(scale,torch.Tensor): shape_scale = scale.shape
    if isinstance(shape_scale,(list,tuple)): shape_scale = torch.Size(shape_scale)
    assert isinstance(shape_scale,torch.Size) and shape_scale[-1]==1
    if len(shape_scale)>1: assert shape_scale[:-1]==shape_batch[-(len(shape_scale)-1):]
    if np.isscalar(scale): scale = scale*torch.ones(shape_scale,device=self.device)
    assert (scale>0).all(), "scale must be positive"
    assert len(tfs_scale)==2 and callable(tfs_scale[0]) and callable(tfs_scale[1]), "tfs_scale should be a tuple of two callables, the transform and inverse transform"
    self.tf_scale = tfs_scale[1]
    self.raw_scale = torch.nn.Parameter(tfs_scale[0](scale),requires_grad=requires_grad_scale)
    # lengthscales
    assert np.isscalar(lengthscales) or isinstance(lengthscales,torch.Tensor), "lengthscales must be a scalar or torch.Tensor"
    if isinstance(lengthscales,torch.Tensor): shape_lengthscales = lengthscales.shape 
    if shape_lengthscales is None: shape_lengthscales = torch.Size([self.d])
    if isinstance(shape_lengthscales,(list,tuple)): shape_lengthscales = torch.Size(shape_lengthscales)
    assert isinstance(shape_lengthscales,torch.Size) and (shape_lengthscales[-1]==self.d or shape_lengthscales[-1]==1)
    if len(shape_lengthscales)>1: assert shape_lengthscales[:-1]==shape_batch[-(len(shape_lengthscales)-1):]
    if np.isscalar(lengthscales): lengthscales = lengthscales*torch.ones(shape_lengthscales,device=self.device)
    assert (lengthscales>0).all(), "lengthscales must be positive"
    assert len(tfs_lengthscales)==2 and callable(tfs_lengthscales[0]) and callable(tfs_lengthscales[1]), "tfs_lengthscales should be a tuple of two callables, the transform and inverse transform"
    self.tf_lengthscales = tfs_lengthscales[1]
    self.raw_lengthscales = torch.nn.Parameter(tfs_lengthscales[0](lengthscales),requires_grad=requires_grad_lengthscales)
    # noise
    assert np.isscalar(noise) or isinstance(noise,torch.Tensor), "noise must be a scalar or torch.Tensor"
    if isinstance(noise,torch.Tensor): shape_noise = noise.shape
    if isinstance(shape_noise,(list,tuple)): shape_noise = torch.Size(shape_noise)
    assert isinstance(shape_noise,torch.Size) and shape_noise[-1]==1
    if len(shape_noise)>1: assert shape_noise[:-1]==shape_batch[-(len(shape_noise)-1):]
    if np.isscalar(noise): noise = noise*torch.ones(shape_noise,device=self.device)
    assert (noise>0).all(), "noise must be positive"
    assert len(tfs_noise)==2 and callable(tfs_noise[0]) and callable(tfs_noise[1]), "tfs_scale should be a tuple of two callables, the transform and inverse transform"
    self.tf_noise = tfs_noise[1]
    self.raw_noise = torch.nn.Parameter(tfs_noise[0](noise),requires_grad=requires_grad_noise)
    # factor_task_kernel
    assert np.isscalar(factor_task_kernel) or isinstance(factor_task_kernel,torch.Tensor), "factor_task_kernel must be a scalar or torch.Tensor"
    if isinstance(factor_task_kernel,torch.Tensor): shape_factor_task_kernel = factor_task_kernel.shape
    if shape_factor_task_kernel is None:
        if rank_factor_task_kernel is None: rank_factor_task_kernel = 0 if self.num_tasks==1 else 1 
        assert isinstance(rank_factor_task_kernel,int) and 0<=rank_factor_task_kernel<=self.num_tasks
        shape_factor_task_kernel = torch.Size([self.num_tasks,rank_factor_task_kernel])
    if isinstance(shape_factor_task_kernel,(list,tuple)): shape_factor_task_kernel = torch.Size(shape_factor_task_kernel)
    assert isinstance(shape_factor_task_kernel,torch.Size) and 0<=shape_factor_task_kernel[-1]<=self.num_tasks and shape_factor_task_kernel[-2]==self.num_tasks
    if len(shape_factor_task_kernel)>2: assert shape_factor_task_kernel[:-2]==shape_batch[-(len(shape_factor_task_kernel)-2):]
    if np.isscalar(factor_task_kernel): factor_task_kernel = factor_task_kernel*torch.ones(shape_factor_task_kernel,device=self.device)
    assert len(tfs_factor_task_kernel)==2 and callable(tfs_factor_task_kernel[0]) and callable(tfs_factor_task_kernel[1]), "tfs_factor_task_kernel should be a tuple of two callables, the transform and inverse transform"
    self.tf_factor_task_kernel = tfs_factor_task_kernel[1]
    if requires_grad_factor_task_kernel is None: requires_grad_factor_task_kernel = self.num_tasks>1
    self.raw_factor_task_kernel = torch.nn.Parameter(tfs_factor_task_kernel[0](factor_task_kernel),requires_grad=requires_grad_factor_task_kernel)
    # noise_task_kernel
    assert np.isscalar(noise_task_kernel) or isinstance(noise_task_kernel,torch.Tensor), "noise_task_kernel must be a scalar or torch.Tensor"
    if isinstance(noise_task_kernel,torch.Tensor): shape_noise_task_kernel = noise_task_kernel.shape 
    if shape_noise_task_kernel is None: shape_noise_task_kernel = torch.Size([self.num_tasks])
    if isinstance(shape_noise_task_kernel,(list,tuple)): shape_noise_task_kernel = torch.Size(shape_noise_task_kernel)
    assert isinstance(shape_noise_task_kernel,torch.Size) and (shape_noise_task_kernel[-1]==self.num_tasks or shape_noise_task_kernel[-1]==1)
    if len(shape_noise_task_kernel)>1: assert shape_noise_task_kernel[:-1]==shape_batch[-(len(shape_noise_task_kernel)-1):]
    if np.isscalar(noise_task_kernel): noise_task_kernel = noise_task_kernel*torch.ones(shape_noise_task_kernel,device=self.device)
    assert (noise_task_kernel>=0).all(), "noise_task_kernel must be positive"
    assert len(tfs_noise_task_kernel)==2 and callable(tfs_noise_task_kernel[0]) and callable(tfs_noise_task_kernel[1]), "tfs_noise_task_kernel should be a tuple of two callables, the transform and inverse transform"
    self.tf_noise_task_kernel = tfs_noise_task_kernel[1]
    if requires_grad_noise_task_kernel is None: requires_grad_noise_task_kernel = self.num_tasks>1
    self.raw_noise_task_kernel = torch.nn.Parameter(tfs_noise_task_kernel[0](noise_task_kernel),requires_grad=requires_grad_noise_task_kernel)
    # storage and dynamic caches
    self._y = [torch.empty(0,device=self.device) for l in range(self.num_tasks)]
    self.xxb_seqs = np.array([_XXbSeq(self,self.seqs[i]) for i in range(self.num_tasks)],dtype=object)
    self.coeffs_cache = _CoeffsCache(self)
    self.task_cov_cache = _TaskCovCache(self)
    self.inv_log_det_cache_dict = {}
    # derivative multitask setting checks 
    if any((self.derivatives[i]>0).any() or (self.derivatives_coeffs[i]!=1).any() for i in range(self.num_tasks)):
        self.raw_noise_task_kernel.requires_grad_(False)
        self.raw_factor_task_kernel.requires_grad_(False)
        assert (self.gram_matrix_tasks==1).all()
    self.adaptive_nugget = adaptive_nugget

scale property

scale

Kernel scale parameter.

lengthscales property

lengthscales

Kernel lengthscale parameter.

noise property

noise

Noise parameter.

factor_task_kernel property

factor_task_kernel

Factor for the task kernel parameter.

noise_task_kernel property

noise_task_kernel

Noise for the task kernel parameter.

gram_matrix_tasks property

gram_matrix_tasks

Gram matrix for the task kernel.

coeffs property

coeffs

Coefficients \(\mathsf{K}^{-1} \boldsymbol{y}\).

x property

x

Current sampling locations. A torch.Tensor for single task problems. A list for multitask problems.

y property

y

Current sampling values. A torch.Tensor for single task problems. A list for multitask problems.

fit

fit(loss_metric='MLL', iterations=5000, lr=None, optimizer=None, stop_crit_improvement_threshold=0.05, stop_crit_wait_iterations=10, store_hists=False, store_loss_hist=False, store_scale_hist=False, store_lengthscales_hist=False, store_noise_hist=False, store_task_kernel_hist=False, verbose=5, verbose_indent=4, masks=None, cv_weights=1)

Parameters:

Name Type Description Default
loss_metric str

either "MLL" (Marginal Log Likelihood) or "CV" (Cross Validation) or "GCV" (Generalized CV)

'MLL'
iterations int

number of optimization iterations

5000
lr float

learning rate for default optimizer

None
optimizer Optimizer

optimizer defaulted to torch.optim.Rprop(self.parameters(),lr=lr)

None
stop_crit_improvement_threshold float

stop fitting when the maximum number of iterations is reached or the best loss is note reduced by stop_crit_improvement_threshold for stop_crit_wait_iterations iterations

0.05
stop_crit_wait_iterations int

number of iterations to wait for improved loss before early stopping, see the argument description for stop_crit_improvement_threshold

10
store_hists bool

if True then store all hists, otherwise specify individually with the following arguments

False
store_loss_hist bool

if True, store and return iteration data for loss

False
store_scale_hist bool

if True, store and return iteration data for the kernel scale parameter

False
store_lengthscales_hist bool

if True, store and return iteration data for the kernel lengthscale parameters

False
store_noise_hist bool

if True, store and return iteration data for noise

False
store_task_kernel_hist bool

if True, store and return iteration data for the task kernel

False
verbose int

log every verbose iterations, set to 0 for silent mode

5
verbose_indent int

size of the indent to be applied when logging, helpful for logging multiple models

4
masks Tensor

only optimize outputs corresponding to y[...,*masks]

None
cv_weights Union[str, Tensor]

weights for cross validation

1

Returns:

Name Type Description
data dict

iteration data which, dependeing on storage arguments, may include keys in

["loss_hist","scale_hist","lengthscales_hist","noise_hist","task_kernel_hist"]

Source code in fastgps/abstract_gp.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
def fit(self,
    loss_metric:str = "MLL",
    iterations:int = 5000,
    lr:float = None,
    optimizer:torch.optim.Optimizer = None,
    stop_crit_improvement_threshold:float = 5e-2,
    stop_crit_wait_iterations:int = 10,
    store_hists:bool = False,
    store_loss_hist:bool = False, 
    store_scale_hist:bool = False, 
    store_lengthscales_hist:bool = False,
    store_noise_hist:bool = False,
    store_task_kernel_hist:bool = False,
    verbose:int = 5,
    verbose_indent:int = 4,
    masks:torch.Tensor = None,
    cv_weights:torch.Tensor = 1,
    ):
    """
    Args:
        loss_metric (str): either "MLL" (Marginal Log Likelihood) or "CV" (Cross Validation) or "GCV" (Generalized CV)
        iterations (int): number of optimization iterations
        lr (float): learning rate for default optimizer
        optimizer (torch.optim.Optimizer): optimizer defaulted to `torch.optim.Rprop(self.parameters(),lr=lr)`
        stop_crit_improvement_threshold (float): stop fitting when the maximum number of iterations is reached or the best loss is note reduced by `stop_crit_improvement_threshold` for `stop_crit_wait_iterations` iterations 
        stop_crit_wait_iterations (int): number of iterations to wait for improved loss before early stopping, see the argument description for `stop_crit_improvement_threshold`
        store_hists (bool): if True then store all hists, otherwise specify individually with the following arguments 
        store_loss_hist (bool): if `True`, store and return iteration data for loss
        store_scale_hist (bool): if `True`, store and return iteration data for the kernel scale parameter
        store_lengthscales_hist (bool): if `True`, store and return iteration data for the kernel lengthscale parameters
        store_noise_hist (bool): if `True`, store and return iteration data for noise
        store_task_kernel_hist (bool): if `True`, store and return iteration data for the task kernel
        verbose (int): log every `verbose` iterations, set to `0` for silent mode
        verbose_indent (int): size of the indent to be applied when logging, helpful for logging multiple models
        masks (torch.Tensor): only optimize outputs corresponding to `y[...,*masks]`
        cv_weights (Union[str,torch.Tensor]): weights for cross validation

    Returns:
        data (dict): iteration data which, dependeing on storage arguments, may include keys in 
            ```python
            ["loss_hist","scale_hist","lengthscales_hist","noise_hist","task_kernel_hist"]
            ```
    """
    assert isinstance(loss_metric,str) and loss_metric.upper() in ["MLL","GCV","CV"] 
    assert (self.n>0).any(), "cannot fit without data"
    assert isinstance(iterations,int) and iterations>=0
    if optimizer is None:
        optimizer = self.get_default_optimizer(lr)
    assert isinstance(optimizer,torch.optim.Optimizer)
    assert isinstance(store_hists,bool), "require bool store_mll_hist" 
    assert isinstance(store_loss_hist,bool), "require bool store_loss_hist" 
    assert isinstance(store_scale_hist,bool), "require bool store_scale_hist" 
    assert isinstance(store_lengthscales_hist,bool), "require bool store_lengthscales_hist" 
    assert isinstance(store_noise_hist,bool), "require bool store_noise_hist"
    assert isinstance(store_task_kernel_hist,bool), "require bool store_task_kernel_hist"
    assert (isinstance(verbose,int) or isinstance(verbose,bool)) and verbose>=0, "require verbose is a non-negative int"
    assert isinstance(verbose_indent,int) and verbose_indent>=0, "require verbose_indent is a non-negative int"
    assert np.isscalar(stop_crit_improvement_threshold) and 0<stop_crit_improvement_threshold, "require stop_crit_improvement_threshold is a positive float"
    assert isinstance(stop_crit_wait_iterations,int) and stop_crit_wait_iterations>0
    assert masks is None or (isinstance(masks,torch.Tensor))
    loss_metric = loss_metric.upper()
    logtol = np.log(1+stop_crit_improvement_threshold)
    store_loss_hist = store_hists or store_loss_hist
    store_scale_hist = store_hists or (store_scale_hist and self.raw_scale.requires_grad)
    store_lengthscales_hist = store_hists or (store_lengthscales_hist and self.raw_lengthscales.requires_grad)
    store_noise_hist = store_hists or (store_noise_hist and self.raw_noise.requires_grad)
    store_task_kernel_hist = store_hists or (store_task_kernel_hist and (self.raw_factor_task_kernel.requires_grad or self.raw_noise_task_kernel.requires_grad))
    if store_loss_hist: loss_hist = torch.empty(iterations+1)
    if store_scale_hist: scale_hist = torch.empty(torch.Size([iterations+1])+self.raw_scale.shape)
    if store_lengthscales_hist: lengthscales_hist = torch.empty(torch.Size([iterations+1])+self.raw_lengthscales.shape)
    if store_noise_hist: noise_hist = torch.empty(torch.Size([iterations+1])+self.raw_noise.shape)
    if store_task_kernel_hist: task_kernel_hist = torch.empty(torch.Size([iterations+1])+self.gram_matrix_tasks.shape)
    if masks is not None:
        masks = torch.atleast_2d(masks)
        assert masks.ndim==2
        assert len(masks)<=len(self.shape_batch)
        d_out = torch.empty(self.shape_batch)[...,*masks].numel()
    else:
        d_out = int(torch.tensor(self.shape_batch).prod())
    if verbose:
        _s = "%16s | %-10s | %-10s | %-10s"%("iter of %.1e"%iterations,"loss","term1","term2")
        print(" "*verbose_indent+_s)
        print(" "*verbose_indent+"~"*len(_s))
    mll_const = d_out*self.n.sum()*np.log(2*np.pi)
    stop_crit_best_loss = torch.inf 
    stop_crit_save_loss = torch.inf 
    stop_crit_iterations_without_improvement_loss = 0
    os.environ["FASTGP_FORCE_RECOMPILE"] = "True"
    inv_log_det_cache = self.get_inv_log_det_cache()
    for i in range(iterations+1):
        if loss_metric=="GCV":
            numer,denom = inv_log_det_cache.get_gcv_numer_denom()
            if masks is None:
                term1 = numer 
                term2 = denom
            else:
                term1 = numer[...,*masks,:]
                term2 = denom.expand(list(self.shape_batch)+[1])[...,*masks,:]
            loss = (term1/term2).sum()
            metric_val = loss
        elif loss_metric=="MLL":
            norm_term,logdet = inv_log_det_cache.get_norm_term_logdet_term()
            if masks is None:
                term1 = norm_term.sum()
                term2 = d_out/torch.tensor(logdet.shape).prod()*logdet.sum()
            else:
                term1 = norm_term[...,*masks,0].sum()
                term2 = logdet.expand(list(self.shape_batch)+[1])[...,*masks,0].sum()
            loss = 1/2*(term1+term2+mll_const)
            metric_val = -loss
        elif loss_metric=="CV":
            inv_diag = inv_log_det_cache.get_inv_diag()
            term1 = term2 = torch.nan*torch.ones(1)
            del os.environ["FASTGP_FORCE_RECOMPILE"]
            coeffs = self.coeffs # avoid repeated Gram matrix inverse computations
            os.environ["FASTGP_FORCE_RECOMPILE"] = "True"
            squared_sums = ((coeffs/inv_diag)**2*cv_weights).sum(-1,keepdim=True)
            if masks is None:
                loss = squared_sums.sum()
            else:
                loss = squared_sums[...,*masks,0].sum()
        else:
            assert False, "loss_metric parsing implementation error"
        if loss.item()<stop_crit_best_loss:
            stop_crit_best_loss = loss.item()
            best_params = {param[0]:param[1].data.clone() for param in self.named_parameters()}
        if (stop_crit_save_loss-loss.item())>logtol:
            stop_crit_iterations_without_improvement_loss = 0
            stop_crit_save_loss = stop_crit_best_loss
        else:
            stop_crit_iterations_without_improvement_loss += 1
        break_condition = i==iterations or stop_crit_iterations_without_improvement_loss==stop_crit_wait_iterations
        if store_loss_hist: loss_hist[i] = metric_val.item()
        if store_scale_hist: scale_hist[i] = self.scale.detach().to(scale_hist.device)
        if store_lengthscales_hist: lengthscales_hist[i] = self.lengthscales.detach().to(lengthscales_hist.device)
        if store_noise_hist: noise_hist[i] = self.noise.detach().to(noise_hist.device)
        if store_task_kernel_hist: task_kernel_hist[i] = self.gram_matrix_tasks.detach().to(task_kernel_hist.device)
        if verbose and (i%verbose==0 or break_condition):
            _s = "%16.2e | %-10.2e | %-10.2e | %-10.2e"%(i,loss.item(),term1.item() if term1.numel()==1 else torch.nan,term2.item() if term2.numel()==1 else torch.nan)
            print(" "*verbose_indent+_s)
        if break_condition: break
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    for pname,pdata in best_params.items():
        setattr(self,pname,torch.nn.Parameter(pdata,requires_grad=getattr(self,pname).requires_grad))
    del os.environ["FASTGP_FORCE_RECOMPILE"]
    data = {"iterations":i}
    if store_loss_hist: data["loss_hist"] = loss_hist[:(i+1)]
    if store_scale_hist: data["scale_hist"] = scale_hist[:(i+1)]
    if store_lengthscales_hist: data["lengthscales_hist"] = lengthscales_hist[:(i+1)]
    if store_noise_hist: data["noise_hist"] = noise_hist[:(i+1)]
    if store_task_kernel_hist: data["task_kernel_hist"] = task_kernel_hist[:(i+1)]
    return data

get_x_next

get_x_next(n, task=None)

Get the next sampling locations.

Parameters:

Name Type Description Default
n Union[int, Tensor]

maximum sample index per task

required
task Union[int, Tensor]

task index

None

Returns:

Name Type Description
x_next Union[Tensor, List]

next samples in the sequence

Source code in fastgps/abstract_gp.py
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
def get_x_next(self, n:Union[int,torch.Tensor], task:Union[int,torch.Tensor]=None):
    """
    Get the next sampling locations. 

    Args:
        n (Union[int,torch.Tensor]): maximum sample index per task
        task (Union[int,torch.Tensor]): task index

    Returns:
        x_next (Union[torch.Tensor,List]): next samples in the sequence
    """
    if isinstance(n,(int,np.int64)): n = torch.tensor([n],dtype=int,device=self.device) 
    if isinstance(n,list): n = torch.tensor(n,dtype=int)
    if task is None: task = self.default_task
    inttask = isinstance(task,int)
    if inttask: task = torch.tensor([task],dtype=int)
    if isinstance(task,list): task = torch.tensor(task,dtype=int)
    assert isinstance(n,torch.Tensor) and isinstance(task,torch.Tensor) and n.ndim==task.ndim==1 and len(n)==len(task)
    assert (n>=self.n[task]).all() and torch.logical_or(n==0,n&(n-1)==0).all(), "maximum sequence index must be a power of 2 greater than the current number of samples"
    x_next = [self.xxb_seqs[l][self.n[l]:n[i]][0] for i,l in enumerate(task)]
    return x_next[0] if inttask else x_next

add_y_next

add_y_next(y_next, task=None)

Add samples to the GP.

Parameters:

Name Type Description Default
y_next Union[Tensor, List]

new function evaluations at next sampling locations

required
task Union[int, Tensor]

task index

None
Source code in fastgps/abstract_gp.py
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
def add_y_next(self, y_next:Union[torch.Tensor,List], task:Union[int,torch.Tensor]=None):
    """
    Add samples to the GP. 

    Args:
        y_next (Union[torch.Tensor,List]): new function evaluations at next sampling locations
        task (Union[int,torch.Tensor]): task index
    """
    if isinstance(y_next,torch.Tensor): y_next = [y_next]
    if task is None: task = self.default_task
    if isinstance(task,int): task = torch.tensor([task],dtype=int)
    if isinstance(task,list): task = torch.tensor(task,dtype=int)
    assert isinstance(y_next,list) and isinstance(task,torch.Tensor) and task.ndim==1 and len(y_next)==len(task)
    assert all(y_next[i].shape[:-1]==self.shape_batch for i in range(len(y_next)))
    for i,l in enumerate(task):
        self._y[l] = torch.cat([self._y[l],y_next[i]],-1)
    self.n = torch.tensor([self._y[i].size(-1) for i in range(self.num_tasks)],dtype=int,device=self.device)
    assert torch.logical_or(self.n==0,(self.n&(self.n-1)==0)).all(), "total samples must be power of 2"
    self.m = torch.where(self.n==0,-1,torch.log2(self.n)).to(int)
    for key in list(self.inv_log_det_cache_dict.keys()):
        if (torch.tensor(key)<self.n.cpu()).any():
            del self.inv_log_det_cache_dict[key]

post_mean

post_mean(x, task=None, eval=True)

Posterior mean.

Parameters:

Name Type Description Default
x Tensor[N, d]

sampling locations

required
task Union[int, Tensor[T]]

task index

None
eval bool

if True, disable gradients, otherwise use torch.is_grad_enabled()

True

Returns:

Name Type Description
pmean Tensor[..., T, N]

posterior mean

Source code in fastgps/abstract_gp.py
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
def post_mean(self, x:torch.Tensor, task:Union[int,torch.Tensor]=None, eval:bool=True):
    """
    Posterior mean. 

    Args:
        x (torch.Tensor[N,d]): sampling locations
        task (Union[int,torch.Tensor[T]]): task index
        eval (bool): if `True`, disable gradients, otherwise use `torch.is_grad_enabled()`

    Returns:
        pmean (torch.Tensor[...,T,N]): posterior mean
    """
    coeffs = self.coeffs
    kmat_tasks = self.gram_matrix_tasks
    if eval:
        incoming_grad_enabled = torch.is_grad_enabled()
        torch.set_grad_enabled(False)
    assert x.ndim==2 and x.size(1)==self.d, "x must a torch.Tensor with shape (-1,d)"
    if task is None: task = self.default_task
    inttask = isinstance(task,int)
    if inttask: task = torch.tensor([task],dtype=int)
    if isinstance(task,list): task = torch.tensor(task,dtype=int)
    assert task.ndim==1 and (task>=0).all() and (task<self.num_tasks).all()
    kmat = torch.cat([torch.cat([kmat_tasks[...,task[l0],l1,None,None]*self._kernel(x[:,None,:],self.get_xb(l1)[None,:,:],self.derivatives[task[l0]],self.derivatives[l1],self.derivatives_coeffs[task[l0]],self.derivatives_coeffs[l1]) for l1 in range(self.num_tasks)],dim=-1)[...,None,:,:] for l0 in range(len(task))],dim=-3)
    #pmean = (kmat*coeffs[...,None,None,:]).sum(-1)
    pmean = torch.einsum("...i,...i->...",kmat,coeffs[...,None,None,:])
    if eval:
        torch.set_grad_enabled(incoming_grad_enabled)
    return pmean[...,0,:] if inttask else pmean

post_var

post_var(x, task=None, n=None, eval=True)

Posterior variance.

Parameters:

Name Type Description Default
x Tensor[N, d]

sampling locations

required
task Union[int, Tensor[T]]

task indices

None
n Union[int, Tensor[num_tasks]]

number of points at which to evaluate the posterior cubature variance.

None
eval bool

if True, disable gradients, otherwise use torch.is_grad_enabled()

True

Returns:

Name Type Description
pvar Tensor[T, N]

posterior variance

Source code in fastgps/abstract_gp.py
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
def post_var(self, x:torch.Tensor, task:Union[int,torch.Tensor]=None, n:Union[int,torch.Tensor]=None, eval:bool=True):
    """
    Posterior variance.

    Args:
        x (torch.Tensor[N,d]): sampling locations
        task (Union[int,torch.Tensor[T]]): task indices
        n (Union[int,torch.Tensor[num_tasks]]): number of points at which to evaluate the posterior cubature variance.
        eval (bool): if `True`, disable gradients, otherwise use `torch.is_grad_enabled()`

    Returns:
        pvar (torch.Tensor[T,N]): posterior variance
    """
    if n is None: n = self.n
    if isinstance(n,int): n = torch.tensor([n],dtype=int,device=self.device)
    assert isinstance(n,torch.Tensor) and (n&(n-1)==0).all() and (n>=self.n).all(), "require n are all power of two greater than or equal to self.n"
    assert x.ndim==2 and x.size(1)==self.d, "x must a torch.Tensor with shape (-1,d)"
    kmat_tasks = self.gram_matrix_tasks
    if eval:
        incoming_grad_enabled = torch.is_grad_enabled()
        torch.set_grad_enabled(False)
    if task is None: task = self.default_task
    inttask = isinstance(task,int)
    if inttask: task = torch.tensor([task],dtype=int)
    if isinstance(task,list): task = torch.tensor(task,dtype=int)
    assert task.ndim==1 and (task>=0).all() and (task<self.num_tasks).all()
    kmat_new = torch.cat([kmat_tasks[...,task[l0],task[l0],None,None]*self._kernel(x,x,self.derivatives[task[l0]],self.derivatives[task[l0]],self.derivatives_coeffs[task[l0]],self.derivatives_coeffs[task[l0]])[...,None,:] for l0 in range(len(task))],dim=-2)
    kmat = torch.cat([torch.cat([kmat_tasks[...,task[l0],l1,None,None]*self._kernel(x[:,None,:],self.get_xb(l1,n=n[l1])[None,:,:],self.derivatives[task[l0]],self.derivatives[l1],self.derivatives_coeffs[task[l0]],self.derivatives_coeffs[l1]) for l1 in range(self.num_tasks)],dim=-1)[...,None,:,:] for l0 in range(len(task))],dim=-3)
    kmat_perm = torch.permute(kmat,[-3,-2]+[i for i in range(kmat.ndim-3)]+[-1])
    t_perm = self.get_inv_log_det_cache(n).gram_matrix_solve(kmat_perm)
    t = torch.permute(t_perm,[2+i for i in range(t_perm.ndim-3)]+[0,1,-1])
    diag = kmat_new-(t*kmat).sum(-1)
    diag[diag<0] = 0 
    if eval:
        torch.set_grad_enabled(incoming_grad_enabled)
    return diag[...,0,:] if inttask else diag

post_cov

post_cov(x0, x1, task0=None, task1=None, n=None, eval=True)

Posterior covariance.

Parameters:

Name Type Description Default
x0 Tensor[N, d]

left sampling locations

required
x1 Tensor[M, d]

right sampling locations

required
task0 Union[int, Tensor[T1]]

left task index

None
task1 Union[int, Tensor[T2]]

right task index

None
n Union[int, Tensor[num_tasks]]

number of points at which to evaluate the posterior cubature variance.

None
eval bool

if True, disable gradients, otherwise use torch.is_grad_enabled()

True

Returns:

Name Type Description
pcov Tensor[T1, T2, N, M]

posterior covariance matrix

Source code in fastgps/abstract_gp.py
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
def post_cov(self, x0:torch.Tensor, x1:torch.Tensor, task0:Union[int,torch.Tensor]=None, task1:Union[int,torch.Tensor]=None, n:Union[int,torch.Tensor]=None, eval:bool=True):
    """
    Posterior covariance. 

    Args:
        x0 (torch.Tensor[N,d]): left sampling locations
        x1 (torch.Tensor[M,d]): right sampling locations
        task0 (Union[int,torch.Tensor[T1]]): left task index
        task1 (Union[int,torch.Tensor[T2]]): right task index
        n (Union[int,torch.Tensor[num_tasks]]): number of points at which to evaluate the posterior cubature variance.
        eval (bool): if `True`, disable gradients, otherwise use `torch.is_grad_enabled()`

    Returns:
        pcov (torch.Tensor[T1,T2,N,M]): posterior covariance matrix
    """
    if n is None: n = self.n
    if isinstance(n,int): n = torch.tensor([n],dtype=int,device=self.device)
    assert isinstance(n,torch.Tensor) and (n&(n-1)==0).all() and (n>=self.n).all(), "require n are all power of two greater than or equal to self.n"
    assert x0.ndim==2 and x0.size(1)==self.d, "x must a torch.Tensor with shape (-1,d)"
    assert x1.ndim==2 and x1.size(1)==self.d, "z must a torch.Tensor with shape (-1,d)"
    kmat_tasks = self.gram_matrix_tasks
    if eval:
        incoming_grad_enabled = torch.is_grad_enabled()
        torch.set_grad_enabled(False)
    if task0 is None: task0 = self.default_task
    inttask0 = isinstance(task0,int)
    if inttask0: task0 = torch.tensor([task0],dtype=int)
    if isinstance(task0,list): task0 = torch.tensor(task0,dtype=int)
    assert task0.ndim==1 and (task0>=0).all() and (task0<self.num_tasks).all()
    if task1 is None: task1 = self.default_task
    inttask1 = isinstance(task1,int)
    if inttask1: task1 = torch.tensor([task1],dtype=int)
    if isinstance(task1,list): task1 = torch.tensor(task1,dtype=int)
    assert task1.ndim==1 and (task1>=0).all() and (task1<self.num_tasks).all()
    equal = torch.equal(x0,x1) and torch.equal(task0,task1)
    kmat_new = torch.cat([torch.cat([kmat_tasks[...,task0[l0],task1[l1],None,None,None,None]*self._kernel(x0[:,None,:],x1[None,:,:],self.derivatives[task0[l0]],self.derivatives[task1[l1]],self.derivatives_coeffs[task0[l0]],self.derivatives_coeffs[task1[l1]])[...,None,None,:,:] for l1 in range(len(task1))],dim=-3) for l0 in range(len(task0))],dim=-4)
    kmat1 = torch.cat([torch.cat([kmat_tasks[...,task0[l0],l1,None,None]*self._kernel(x0[:,None,:],self.get_xb(l1,n=n[l1])[None,:,:],self.derivatives[task0[l0]],self.derivatives[l1],self.derivatives_coeffs[task0[l0]],self.derivatives_coeffs[l1]) for l1 in range(self.num_tasks)],dim=-1)[...,None,:,:] for l0 in range(len(task0))],dim=-3)
    kmat2 = kmat1 if equal else torch.cat([torch.cat([kmat_tasks[...,task1[l0],l1,None,None]*self._kernel(x1[:,None,:],self.get_xb(l1,n=n[l1])[None,:,:],self.derivatives[task1[l0]],self.derivatives[l1],self.derivatives_coeffs[task1[l0]],self.derivatives_coeffs[l1]) for l1 in range(self.num_tasks)],dim=-1)[...,None,:,:] for l0 in range(len(task1))],dim=-3)
    kmat2_perm = torch.permute(kmat2,[-3,-2]+[i for i in range(kmat2.ndim-3)]+[-1])
    t_perm = self.get_inv_log_det_cache(n).gram_matrix_solve(kmat2_perm)
    t = torch.permute(t_perm,[2+i for i in range(t_perm.ndim-3)]+[0,1,-1])
    kmat = kmat_new-(kmat1[...,:,None,:,None,:]*t[...,None,:,None,:,:]).sum(-1)
    if equal:
        tmesh,nmesh = torch.meshgrid(torch.arange(kmat.size(0),device=self.device),torch.arange(x0.size(0),device=x0.device),indexing="ij")            
        tidx,nidx = tmesh.ravel(),nmesh.ravel()
        diag = kmat[...,tidx,tidx,nidx,nidx]
        diag[diag<0] = 0 
        kmat[...,tidx,tidx,nidx,nidx] = diag 
    if eval:
        torch.set_grad_enabled(incoming_grad_enabled)
    if inttask0 and inttask1:
        return kmat[...,0,0,:,:]
    elif inttask0 and not inttask1:
        return kmat[...,0,:,:,:]
    elif not inttask0 and inttask1:
        return kmat[...,:,0,:,:]
    else: # not inttask0 and not inttask1
        return kmat

post_error

post_error(x, task=None, n=None, confidence=0.99, eval=True)

Posterior error.

Parameters:

Name Type Description Default
x Tensor[N, d]

sampling locations

required
task Union[int, Tensor[T]]

task indices

None
n Union[int, Tensor[num_tasks]]

number of points at which to evaluate the posterior cubature variance.

None
eval bool

if True, disable gradients, otherwise use torch.is_grad_enabled()

True
confidence float

confidence level in \((0,1)\) for the credible interval

0.99

Returns:

Name Type Description
cvar Tensor[T]

posterior variance

quantile float64
scipy.stats.norm.ppf(1-(1-confidence)/2)
perror Tensor[T]

posterior error

Source code in fastgps/abstract_gp.py
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
def post_error(self, x:torch.Tensor, task:Union[int,torch.Tensor]=None, n:Union[int,torch.Tensor]=None, confidence:float=0.99, eval:bool=True):
    """
    Posterior error. 

    Args:
        x (torch.Tensor[N,d]): sampling locations
        task (Union[int,torch.Tensor[T]]): task indices
        n (Union[int,torch.Tensor[num_tasks]]): number of points at which to evaluate the posterior cubature variance.
        eval (bool): if `True`, disable gradients, otherwise use `torch.is_grad_enabled()`
        confidence (float): confidence level in $(0,1)$ for the credible interval

    Returns:
        cvar (torch.Tensor[T]): posterior variance
        quantile (np.float64):
            ```python
            scipy.stats.norm.ppf(1-(1-confidence)/2)
            ```
        perror (torch.Tensor[T]): posterior error
    """
    assert np.isscalar(confidence) and 0<confidence<1, "confidence must be between 0 and 1"
    q = scipy.stats.norm.ppf(1-(1-confidence)/2)
    pvar = self.post_var(x,task=task,n=n,eval=eval,)
    pstd = torch.sqrt(pvar)
    perror = q*pstd
    return pvar,q,perror

post_ci

post_ci(x, task=None, confidence=0.99, eval=True)

Posterior credible interval.

Parameters:

Name Type Description Default
x Tensor[N, d]

sampling locations

required
task Union[int, Tensor[T]]

task indices

None
confidence float

confidence level in \((0,1)\) for the credible interval

0.99
eval bool

if True, disable gradients, otherwise use torch.is_grad_enabled()

True

Returns:

Name Type Description
pmean Tensor[..., T, N]

posterior mean

pvar Tensor[T, N]

posterior variance

quantile float64
scipy.stats.norm.ppf(1-(1-confidence)/2)
pci_low Tensor[..., T, N]

posterior credible interval lower bound

pci_high Tensor[..., T, N]

posterior credible interval upper bound

Source code in fastgps/abstract_gp.py
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
def post_ci(self, x:torch.Tensor, task:Union[int,torch.Tensor]=None, confidence:float=0.99, eval:bool=True):
    """
    Posterior credible interval.

    Args:
        x (torch.Tensor[N,d]): sampling locations
        task (Union[int,torch.Tensor[T]]): task indices
        confidence (float): confidence level in $(0,1)$ for the credible interval
        eval (bool): if `True`, disable gradients, otherwise use `torch.is_grad_enabled()`

    Returns:
        pmean (torch.Tensor[...,T,N]): posterior mean
        pvar (torch.Tensor[T,N]): posterior variance 
        quantile (np.float64):
            ```python
            scipy.stats.norm.ppf(1-(1-confidence)/2)
            ```
        pci_low (torch.Tensor[...,T,N]): posterior credible interval lower bound
        pci_high (torch.Tensor[...,T,N]): posterior credible interval upper bound
    """
    assert np.isscalar(confidence) and 0<confidence<1, "confidence must be between 0 and 1"
    q = scipy.stats.norm.ppf(1-(1-confidence)/2)
    pmean = self.post_mean(x,task=task,eval=eval)
    pvar,q,perror = self.post_error(x,task=task,confidence=confidence)
    pci_low = pmean-q*perror 
    pci_high = pmean+q*perror
    return pmean,pvar,q,pci_low,pci_high

post_cubature_mean

post_cubature_mean(task=None, eval=True)

Posterior cubature mean.

Parameters:

Name Type Description Default
eval bool

if True, disable gradients, otherwise use torch.is_grad_enabled()

True
task Union[int, Tensor[T]]

task indices

None

Returns:

Name Type Description
pcmean Tensor[..., T]

posterior cubature mean

Source code in fastgps/abstract_gp.py
527
528
529
530
531
532
533
534
535
536
537
538
def post_cubature_mean(self, task:Union[int,torch.Tensor]=None, eval:bool=True):
    """
    Posterior cubature mean. 

    Args:
        eval (bool): if `True`, disable gradients, otherwise use `torch.is_grad_enabled()`
        task (Union[int,torch.Tensor[T]]): task indices

    Returns:
        pcmean (torch.Tensor[...,T]): posterior cubature mean
    """
    raise NotImplementedError()

post_cubature_var

post_cubature_var(task=None, n=None, eval=True)

Posterior cubature variance.

Parameters:

Name Type Description Default
task Union[int, Tensor[T]]

task indices

None
n Union[int, Tensor[num_tasks]]

number of points at which to evaluate the posterior cubature variance.

None
eval bool

if True, disable gradients, otherwise use torch.is_grad_enabled()

True

Returns:

Name Type Description
pcvar Tensor[T]

posterior cubature variance

Source code in fastgps/abstract_gp.py
539
540
541
542
543
544
545
546
547
548
549
550
551
def post_cubature_var(self, task:Union[int,torch.Tensor]=None, n:Union[int,torch.Tensor]=None, eval:bool=True):
    """
    Posterior cubature variance. 

    Args:
        task (Union[int,torch.Tensor[T]]): task indices
        n (Union[int,torch.Tensor[num_tasks]]): number of points at which to evaluate the posterior cubature variance.
        eval (bool): if `True`, disable gradients, otherwise use `torch.is_grad_enabled()`

    Returns:
        pcvar (torch.Tensor[T]): posterior cubature variance
    """
    raise NotImplementedError()

post_cubature_cov

post_cubature_cov(task0=None, task1=None, n=None, eval=True)

Posterior cubature covariance.

Parameters:

Name Type Description Default
task0 Union[int, Tensor[T1]]

task indices

None
task1 Union[int, Tensor[T2]]

task indices

None
n Union[int, Tensor[num_tasks]]

number of points at which to evaluate the posterior cubature covariance.

None
eval bool

if True, disable gradients, otherwise use torch.is_grad_enabled()

True

Returns:

Name Type Description
pcvar Tensor[T1, T2]

posterior cubature covariance

Source code in fastgps/abstract_gp.py
552
553
554
555
556
557
558
559
560
561
562
563
564
565
def post_cubature_cov(self, task0:Union[int,torch.Tensor]=None, task1:Union[int,torch.Tensor]=None, n:Union[int,torch.Tensor]=None, eval:bool=True):
    """
    Posterior cubature covariance. 

    Args:
        task0 (Union[int,torch.Tensor[T1]]): task indices
        task1 (Union[int,torch.Tensor[T2]]): task indices
        n (Union[int,torch.Tensor[num_tasks]]): number of points at which to evaluate the posterior cubature covariance.
        eval (bool): if `True`, disable gradients, otherwise use `torch.is_grad_enabled()`

    Returns:
        pcvar (torch.Tensor[T1,T2]): posterior cubature covariance
    """
    raise NotImplementedError()

post_cubature_error

post_cubature_error(task=None, n=None, confidence=0.99, eval=True)

Posterior cubature error.

Parameters:

Name Type Description Default
task Union[int, Tensor[T]]

task indices

None
n Union[int, Tensor[num_tasks]]

number of points at which to evaluate the posterior cubature variance.

None
eval bool

if True, disable gradients, otherwise use torch.is_grad_enabled()

True
confidence float

confidence level in \((0,1)\) for the credible interval

0.99

Returns:

Name Type Description
pcvar Tensor[T]

posterior cubature variance

quantile float64
scipy.stats.norm.ppf(1-(1-confidence)/2)
pcerror Tensor[T]

posterior cubature error

Source code in fastgps/abstract_gp.py
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
def post_cubature_error(self, task:Union[int,torch.Tensor]=None, n:Union[int,torch.Tensor]=None, confidence:float=0.99, eval:bool=True):
    """
    Posterior cubature error. 

    Args:
        task (Union[int,torch.Tensor[T]]): task indices
        n (Union[int,torch.Tensor[num_tasks]]): number of points at which to evaluate the posterior cubature variance.
        eval (bool): if `True`, disable gradients, otherwise use `torch.is_grad_enabled()`
        confidence (float): confidence level in $(0,1)$ for the credible interval

    Returns:
        pcvar (torch.Tensor[T]): posterior cubature variance
        quantile (np.float64):
            ```python
            scipy.stats.norm.ppf(1-(1-confidence)/2)
            ```
        pcerror (torch.Tensor[T]): posterior cubature error
    """
    assert np.isscalar(confidence) and 0<confidence<1, "confidence must be between 0 and 1"
    q = scipy.stats.norm.ppf(1-(1-confidence)/2)
    pcvar = self.post_cubature_var(task=task,n=n,eval=eval)
    pcstd = torch.sqrt(pcvar)
    pcerror = q*pcstd
    return pcvar,q,pcerror

post_cubature_ci

post_cubature_ci(task=None, confidence=0.99, eval=True)

Posterior cubature credible.

Parameters:

Name Type Description Default
task Union[int, Tensor[T]]

task indices

None
confidence float

confidence level in \((0,1)\) for the credible interval

0.99
eval bool

if True, disable gradients, otherwise use torch.is_grad_enabled()

True

Returns:

Name Type Description
pcmean Tensor[..., T]

posterior cubature mean

pcvar Tensor[T]

posterior cubature variance

quantile float64
scipy.stats.norm.ppf(1-(1-confidence)/2)
pcci_low Tensor[..., T]

posterior cubature credible interval lower bound

pcci_high Tensor[..., T]

posterior cubature credible interval upper bound

Source code in fastgps/abstract_gp.py
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
def post_cubature_ci(self, task:Union[int,torch.Tensor]=None, confidence:float=0.99, eval:bool=True):
    """
    Posterior cubature credible.

    Args:
        task (Union[int,torch.Tensor[T]]): task indices
        confidence (float): confidence level in $(0,1)$ for the credible interval
        eval (bool): if `True`, disable gradients, otherwise use `torch.is_grad_enabled()`

    Returns:
        pcmean (torch.Tensor[...,T]): posterior cubature mean
        pcvar (torch.Tensor[T]): posterior cubature variance
        quantile (np.float64):
            ```python
            scipy.stats.norm.ppf(1-(1-confidence)/2)
            ```
        pcci_low (torch.Tensor[...,T]): posterior cubature credible interval lower bound
        pcci_high (torch.Tensor[...,T]): posterior cubature credible interval upper bound
    """
    assert np.isscalar(confidence) and 0<confidence<1, "confidence must be between 0 and 1"
    q = scipy.stats.norm.ppf(1-(1-confidence)/2)
    pcmean = self.post_cubature_mean(task=task,eval=eval) 
    pcvar,q,pcerror = self.post_cubature_error(task=task,confidence=confidence,eval=eval)
    pcci_low = pcmean-pcerror
    pcci_high = pcmean+pcerror
    return pcmean,pcvar,q,pcci_low,pcci_high

StandardGP

StandardGP(seqs, num_tasks=None, seed_for_seq=None, scale=1.0, lengthscales=1.0, noise=0.0001, factor_task_kernel=1.0, rank_factor_task_kernel=None, noise_task_kernel=1.0, device='cpu', tfs_scale=(lambda x: torch.log(x), lambda x: torch.exp(x)), tfs_lengthscales=(lambda x: torch.log(x), lambda x: torch.exp(x)), tfs_noise=(lambda x: torch.log(x), lambda x: torch.exp(x)), tfs_factor_task_kernel=(lambda x: x, lambda x: x), tfs_noise_task_kernel=(lambda x: torch.log(x), lambda x: torch.exp(x)), requires_grad_scale=True, requires_grad_lengthscales=True, requires_grad_noise=False, requires_grad_factor_task_kernel=None, requires_grad_noise_task_kernel=None, shape_batch=torch.Size([]), shape_scale=torch.Size([1]), shape_lengthscales=None, shape_noise=torch.Size([1]), shape_factor_task_kernel=None, shape_noise_task_kernel=None, derivatives=None, derivatives_coeffs=None, kernel_class='Gaussian', adaptive_nugget=True)

Bases: AbstractGP

Standard Gaussian process regression

Examples:

>>> torch.set_default_dtype(torch.float64)
>>> def f_ackley(x, a=20, b=0.2, c=2*np.pi, scaling=32.768):
...     # https://www.sfu.ca/~ssurjano/ackley.html
...     assert x.ndim==2
...     x = 2*scaling*x-scaling
...     t1 = a*torch.exp(-b*torch.sqrt(torch.mean(x**2,1)))
...     t2 = torch.exp(torch.mean(torch.cos(c*x),1))
...     t3 = a+np.exp(1)
...     y = -t1-t2+t3
...     return y
>>> n = 2**6
>>> d = 2
>>> sgp = StandardGP(qmcpy.DigitalNetB2(dimension=d,seed=7))
>>> x_next = sgp.get_x_next(n)
>>> y_next = f_ackley(x_next)
>>> sgp.add_y_next(y_next)
>>> rng = torch.Generator().manual_seed(17)
>>> x = torch.rand((2**7,d),generator=rng)
>>> y = f_ackley(x)
>>> pmean = sgp.post_mean(x)
>>> pmean.shape
torch.Size([128])
>>> torch.linalg.norm(y-pmean)/torch.linalg.norm(y)
tensor(0.0771)
>>> torch.linalg.norm(sgp.post_mean(sgp.x)-sgp.y)/torch.linalg.norm(y)
tensor(0.0559)
>>> data = sgp.fit(verbose=0)
>>> list(data.keys())
['iterations']
>>> torch.linalg.norm(y-sgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0562)
>>> z = torch.rand((2**8,d),generator=rng)
>>> pcov = sgp.post_cov(x,z)
>>> pcov.shape
torch.Size([128, 256])
>>> pcov = sgp.post_cov(x,x)
>>> pcov.shape
torch.Size([128, 128])
>>> assert (pcov.diagonal()>=0).all()
>>> pvar = sgp.post_var(x)
>>> pvar.shape
torch.Size([128])
>>> assert torch.allclose(pcov.diagonal(),pvar)
>>> pmean,pstd,q,ci_low,ci_high = sgp.post_ci(x,confidence=0.99)
>>> ci_low.shape
torch.Size([128])
>>> ci_high.shape
torch.Size([128])
>>> sgp.post_cubature_mean()
tensor(20.0279)
>>> sgp.post_cubature_var()
tensor(0.0043)
>>> pcmean,pcvar,q,pcci_low,pcci_high = sgp.post_cubature_ci(confidence=0.99)
>>> pcci_low
tensor(19.8582)
>>> pcci_high
tensor(20.1976)
>>> pcov_future = sgp.post_cov(x,z,n=2*n)
>>> pvar_future = sgp.post_var(x,n=2*n)
>>> pcvar_future = sgp.post_cubature_var(n=2*n)
>>> x_next = sgp.get_x_next(2*n)
>>> y_next = f_ackley(x_next)
>>> sgp.add_y_next(y_next)
>>> torch.linalg.norm(y-sgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.1060)
>>> assert torch.allclose(sgp.post_cov(x,z),pcov_future)
>>> assert torch.allclose(sgp.post_var(x),pvar_future)
>>> assert torch.allclose(sgp.post_cubature_var(),pcvar_future)
>>> data = sgp.fit(verbose=False)
>>> torch.linalg.norm(y-sgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0740)
>>> x_next = sgp.get_x_next(4*n)
>>> y_next = f_ackley(x_next)
>>> sgp.add_y_next(y_next)
>>> torch.linalg.norm(y-sgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0722)
>>> data = sgp.fit(verbose=False)
>>> torch.linalg.norm(y-sgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0516)
>>> pcov_16n = sgp.post_cov(x,z,n=16*n)
>>> pvar_16n = sgp.post_var(x,n=16*n)
>>> pcvar_16n = sgp.post_cubature_var(n=16*n)
>>> x_next = sgp.get_x_next(16*n)
>>> y_next = f_ackley(x_next)
>>> sgp.add_y_next(y_next)
>>> assert torch.allclose(sgp.post_cov(x,z),pcov_16n)
>>> assert torch.allclose(sgp.post_var(x),pvar_16n)
>>> assert torch.allclose(sgp.post_cubature_var(),pcvar_16n)

Parameters:

Name Type Description Default
seqs Union[int,qmcpy.DiscreteDistribution,List]]

list of sequence generators. If an int d is passed in we use

[qmcpy.DigitalNetB2(d,seed=seed) for seed in np.random.SeedSequence(seed_for_seq).spawn(num_tasks)]
See the qmcpy.DiscreteDistribution docs for more info.

required
num_tasks int

number of tasks

None
seed_for_seq int

seed used for digital net randomization

None
scale float

kernel global scaling parameter

1.0
lengthscales Union[Tensor[d], float]

vector of kernel lengthscales. If a scalar is passed in then lengthscales is set to a constant vector.

1.0
noise float

positive noise variance i.e. nugget term

0.0001
factor_task_kernel Union[Tensor[num_tasks, rank_factor_task_kernel], int]

for \(F\) the factor_task_kernel the task kernel is \(FF^T + \text{diag}(\boldsymbol{v})\) where rank_factor_task_kernel<=num_tasks and \(\boldsymbol{v}\) is the noise_task_kernel.

1.0
rank_factor_task_kernel int

see the description of factor_task_kernel above. Defaults to 0 for single task problems and 1 for multi task problems.

None
noise_task_kernel Union[Tensor[num_tasks], float]

see the description of factor_task_kernel above

1.0
device device

torch device which is required to support torch.float64

'cpu'
tfs_scale Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
tfs_lengthscales Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
tfs_noise Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
tfs_factor_task_kernel Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: x, lambda x: x)
tfs_noise_task_kernel Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
requires_grad_scale bool

wheather or not to optimize the scale parameter

True
requires_grad_lengthscales bool

wheather or not to optimize lengthscale parameters

True
requires_grad_noise bool

wheather or not to optimize the noise parameter

False
requires_grad_factor_task_kernel bool

wheather or not to optimize the factor for the task kernel

None
requires_grad_noise_task_kernel bool

wheather or not to optimize the noise for the task kernel

None
shape_batch Size

shape of the batch output for each task

Size([])
shape_scale Size

shape of the scale parameter, defaults to torch.Size([1])

Size([1])
shape_lengthscales Size

shape of the lengthscales parameter, defaults to torch.Size([d]) where d is the dimension

None
shape_noise Size

shape of the noise parameter, defaults to torch.Size([1])

Size([1])
shape_factor_task_kernel Size

shape of the factor for the task kernel, defaults to torch.Size([num_tasks,r]) where r is the rank, see the description of factor_task_kernel

None
shape_noise_task_kernel Size

shape of the noise for the task kernel, defaults to torch.Size([num_tasks])

None
derivatives list

list of derivative orders e.g. to include a function and its gradient set

derivatives = [torch.zeros(d,dtype=int)]+[ej for ej in torch.eye(d,dtype=int)]

None
derivatives_coeffs list

list of derivative coefficients where if derivatives[k].shape==(p,d) then we should have derivatives_coeffs[k].shape==(p,)

None
adaptive_nugget bool

if True, use the adaptive nugget which modifies noises based on trace ratios.

True
Source code in fastgps/standard_gp.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def __init__(self,
        seqs:Union[qmcpy.IIDStdUniform,int],
        num_tasks:int = None,
        seed_for_seq:int = None,
        scale:float = 1., 
        lengthscales:Union[torch.Tensor,float] = 1., 
        noise:float = 1e-4,
        factor_task_kernel:Union[torch.Tensor,int] = 1.,
        rank_factor_task_kernel:int = None,
        noise_task_kernel:Union[torch.Tensor,float] = 1.,
        device:torch.device = "cpu",
        tfs_scale:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        tfs_lengthscales:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        tfs_noise:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        tfs_factor_task_kernel:Tuple[callable,callable] = ((lambda x: x, lambda x: x)),#((lambda x: x**(1/3)),(lambda x: x**3)),
        tfs_noise_task_kernel:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        requires_grad_scale:bool = True, 
        requires_grad_lengthscales:bool = True, 
        requires_grad_noise:bool = False, 
        requires_grad_factor_task_kernel:bool = None,
        requires_grad_noise_task_kernel:bool = None,
        shape_batch:torch.Size = torch.Size([]),
        shape_scale:torch.Size = torch.Size([1]), 
        shape_lengthscales:torch.Size = None,
        shape_noise:torch.Size = torch.Size([1]),
        shape_factor_task_kernel:torch.Size = None, 
        shape_noise_task_kernel:torch.Size = None,
        derivatives:list = None,
        derivatives_coeffs:list = None,
        kernel_class:str = "Gaussian",
        adaptive_nugget:bool = True,
        ):
    """
    Args:
        seqs (Union[int,qmcpy.DiscreteDistribution,List]]): list of sequence generators. If an int `d` is passed in we use 
            ```python
            [qmcpy.DigitalNetB2(d,seed=seed) for seed in np.random.SeedSequence(seed_for_seq).spawn(num_tasks)]
            ```
            See the <a href="https://qmcpy.readthedocs.io/en/latest/algorithms.html#discrete-distribution-class" target="_blank">`qmcpy.DiscreteDistribution` docs</a> for more info. 
        num_tasks (int): number of tasks 
        seed_for_seq (int): seed used for digital net randomization
        scale (float): kernel global scaling parameter
        lengthscales (Union[torch.Tensor[d],float]): vector of kernel lengthscales. 
            If a scalar is passed in then `lengthscales` is set to a constant vector. 
        noise (float): positive noise variance i.e. nugget term
        factor_task_kernel (Union[Tensor[num_tasks,rank_factor_task_kernel],int]): for $F$ the `factor_task_kernel` the task kernel is $FF^T + \\text{diag}(\\boldsymbol{v})$ 
            where `rank_factor_task_kernel<=num_tasks` and $\\boldsymbol{v}$ is the `noise_task_kernel`.
        rank_factor_task_kernel (int): see the description of `factor_task_kernel` above. Defaults to 0 for single task problems and 1 for multi task problems.
        noise_task_kernel (Union[torch.Tensor[num_tasks],float]): see the description of `factor_task_kernel` above 
        device (torch.device): torch device which is required to support `torch.float64`
        tfs_scale (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_lengthscales (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_noise (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_factor_task_kernel (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_noise_task_kernel (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        requires_grad_scale (bool): wheather or not to optimize the scale parameter
        requires_grad_lengthscales (bool): wheather or not to optimize lengthscale parameters
        requires_grad_noise (bool): wheather or not to optimize the noise parameter
        requires_grad_factor_task_kernel (bool): wheather or not to optimize the factor for the task kernel
        requires_grad_noise_task_kernel (bool): wheather or not to optimize the noise for the task kernel
        shape_batch (torch.Size): shape of the batch output for each task
        shape_scale (torch.Size): shape of the scale parameter, defaults to `torch.Size([1])`
        shape_lengthscales (torch.Size): shape of the lengthscales parameter, defaults to `torch.Size([d])` where `d` is the dimension
        shape_noise (torch.Size): shape of the noise parameter, defaults to `torch.Size([1])`
        shape_factor_task_kernel (torch.Size): shape of the factor for the task kernel, defaults to `torch.Size([num_tasks,r])` where `r` is the rank, see the description of `factor_task_kernel`
        shape_noise_task_kernel (torch.Size): shape of the noise for the task kernel, defaults to `torch.Size([num_tasks])`
        derivatives (list): list of derivative orders e.g. to include a function and its gradient set 
            ```python
            derivatives = [torch.zeros(d,dtype=int)]+[ej for ej in torch.eye(d,dtype=int)]
            ```
        derivatives_coeffs (list): list of derivative coefficients where if `derivatives[k].shape==(p,d)` then we should have `derivatives_coeffs[k].shape==(p,)`
        adaptive_nugget (bool): if True, use the adaptive nugget which modifies noises based on trace ratios.  
    """
    if num_tasks is None: 
        solo_task = True
        default_task = 0 
        num_tasks = 1
    else:
        assert isinstance(num_tasks,int) and num_tasks>0
        solo_task = False
        default_task = torch.arange(num_tasks)
    if isinstance(seqs,int):
        seqs = np.array([qmcpy.DigitalNetB2(seqs,seed=seed) for seed in np.random.SeedSequence(seed_for_seq).spawn(num_tasks)],dtype=object)
    if isinstance(seqs,qmcpy.DiscreteDistribution):
        seqs = np.array([seqs],dtype=object)
    if isinstance(seqs,list):
        seqs = np.array(seqs,dtype=object)
    assert seqs.shape==(num_tasks,), "seqs should be a length num_tasks=%d list"%num_tasks
    assert all(seqs[i].replications==1 for i in range(num_tasks)) and "each seq should have only 1 replication"
    kernel_class = kernel_class.lower()
    assert kernel_class in ["gaussian"]
    self.kernel_class = kernel_class
    super().__init__(
        seqs,
        num_tasks,
        default_task,
        solo_task,
        scale,
        lengthscales,
        noise,
        factor_task_kernel,
        rank_factor_task_kernel,
        noise_task_kernel,
        device,
        tfs_scale,
        tfs_lengthscales,
        tfs_noise,
        tfs_factor_task_kernel,
        tfs_noise_task_kernel,
        requires_grad_scale,
        requires_grad_lengthscales,
        requires_grad_noise,
        requires_grad_factor_task_kernel,
        requires_grad_noise_task_kernel,
        shape_batch,
        shape_scale, 
        shape_lengthscales,
        shape_noise,
        shape_factor_task_kernel, 
        shape_noise_task_kernel,
        derivatives,
        derivatives_coeffs,
        adaptive_nugget,
    )

AbstractFastGP

AbstractFastGP(alpha, ft, ift, *args, **kwargs)

Bases: AbstractGP

Source code in fastgps/abstract_fast_gp.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def __init__(self,
        alpha,
        ft,
        ift,
        *args,
        **kwargs
    ):
    super().__init__(*args,**kwargs)
    # alpha
    assert (np.isscalar(alpha) and alpha%1==0) or (isinstance(alpha,torch.Tensor) and alpha.shape==(self.d,)), "alpha should be an int or a torch.Tensor of length d"
    if np.isscalar(alpha):
        alpha = int(alpha)*torch.ones(self.d,dtype=int,device=self.device)
    self.alpha = alpha
    # fast transforms 
    self.ft_unstable = ft
    self.ift_unstable = ift
    # storage and dynamic caches
    self.k1parts_seq = np.array([[_K1PartsSeq(self,self.xxb_seqs[l0],self.xxb_seqs[l1],self.derivatives[l0],self.derivatives[l1]) if l1>=l0 else None for l1 in range(self.num_tasks)] for l0 in range(self.num_tasks)],dtype=object)
    self.lam_caches = np.array([[_LamCaches(self,l0,l1,self.derivatives[l0],self.derivatives[l1],self.derivatives_coeffs[l0],self.derivatives_coeffs[l1]) if l1>=l0 else None for l1 in range(self.num_tasks)] for l0 in range(self.num_tasks)],dtype=object)
    self.ytilde_cache = np.array([_YtildeCache(self,i) for i in range(self.num_tasks)],dtype=object)

ft

ft(x)

One dimensional fast transform along the last dimenions. For FastGPLattice this is the orthonormal Fast Fourier Transform (FFT). For FastGPDigitalNetB2 this is the orthonormal Fast Walsh Hadamard Transform (FWHT).

Parameters:

Name Type Description Default
x Tensor

inputs to be transformed along the last dimension. Require n = x.size(-1) is a power of 2.

required

Returns:

Name Type Description
y Tensor

transformed inputs with the same shape as x

Source code in fastgps/abstract_fast_gp.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def ft(self, x):
    """
    One dimensional fast transform along the last dimenions. 
        For `FastGPLattice` this is the orthonormal Fast Fourier Transform (FFT). 
        For `FastGPDigitalNetB2` this is the orthonormal Fast Walsh Hadamard Transform (FWHT). 

    Args: 
        x (torch.Tensor): inputs to be transformed along the last dimension. Require `n = x.size(-1)` is a power of 2. 

    Returns: 
        y (torch.Tensor): transformed inputs with the same shape as `x` 
    """
    xmean = x.mean(-1)
    y = self.ft_unstable(x-xmean[...,None])
    y[...,0] += xmean*np.sqrt(x.size(-1))
    return y

ift

ift(x)

One dimensional inverse fast transform along the last dimenions. For FastGPLattice this is the orthonormal Inverse Fast Fourier Transform (IFFT). For FastGPDigitalNetB2 this is the orthonormal Fast Walsh Hadamard Transform (FWHT).

Parameters:

Name Type Description Default
x Tensor

inputs to be transformed along the last dimension. Require n = x.size(-1) is a power of 2.

required

Returns:

Name Type Description
y Tensor

transformed inputs with the same shape as x

Source code in fastgps/abstract_fast_gp.py
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
def ift(self, x):
    """
    One dimensional inverse fast transform along the last dimenions. 
        For `FastGPLattice` this is the orthonormal Inverse Fast Fourier Transform (IFFT). 
        For `FastGPDigitalNetB2` this is the orthonormal Fast Walsh Hadamard Transform (FWHT). 

    Args: 
        x (torch.Tensor): inputs to be transformed along the last dimension. Require `n = x.size(-1)` is a power of 2. 

    Returns: 
        y (torch.Tensor): transformed inputs with the same shape as `x` 
    """
    xmean = x.mean(-1)
    y = self.ift_unstable(x-xmean[...,None])
    y[...,0] += xmean*np.sqrt(x.size(-1))
    return y

FastGPLattice

FastGPLattice(seqs, num_tasks=None, seed_for_seq=None, alpha=2, scale=1.0, lengthscales=1.0, noise=1e-08, factor_task_kernel=1.0, rank_factor_task_kernel=None, noise_task_kernel=1.0, device='cpu', tfs_scale=(lambda x: torch.log(x), lambda x: torch.exp(x)), tfs_lengthscales=(lambda x: torch.log(x), lambda x: torch.exp(x)), tfs_noise=(lambda x: torch.log(x), lambda x: torch.exp(x)), tfs_factor_task_kernel=(lambda x: x, lambda x: x), tfs_noise_task_kernel=(lambda x: torch.log(x), lambda x: torch.exp(x)), requires_grad_scale=True, requires_grad_lengthscales=True, requires_grad_noise=False, requires_grad_factor_task_kernel=None, requires_grad_noise_task_kernel=None, shape_batch=torch.Size([]), shape_scale=torch.Size([1]), shape_lengthscales=None, shape_noise=torch.Size([1]), shape_factor_task_kernel=None, shape_noise_task_kernel=None, derivatives=None, derivatives_coeffs=None, compile_fts=False, compile_fts_kwargs={}, adaptive_nugget=False)

Bases: AbstractFastGP

Fast Gaussian process regression using lattice points and shift invariant kernels

Examples:

>>> torch.set_default_dtype(torch.float64)
>>> def f_ackley(x, a=20, b=0.2, c=2*np.pi, scaling=32.768):
...     # https://www.sfu.ca/~ssurjano/ackley.html
...     assert x.ndim==2
...     x = 2*scaling*x-scaling
...     t1 = a*torch.exp(-b*torch.sqrt(torch.mean(x**2,1)))
...     t2 = torch.exp(torch.mean(torch.cos(c*x),1))
...     t3 = a+np.exp(1)
...     y = -t1-t2+t3
...     return y
>>> n = 2**10
>>> d = 2
>>> fgp = FastGPLattice(seqs = qmcpy.Lattice(dimension=d,seed=7))
>>> x_next = fgp.get_x_next(n)
>>> y_next = f_ackley(x_next)
>>> fgp.add_y_next(y_next)
>>> rng = torch.Generator().manual_seed(17)
>>> x = torch.rand((2**7,d),generator=rng)
>>> y = f_ackley(x)
>>> pmean = fgp.post_mean(x)
>>> pmean.shape
torch.Size([128])
>>> torch.linalg.norm(y-pmean)/torch.linalg.norm(y)
tensor(0.0348)
>>> assert torch.allclose(fgp.post_mean(fgp.x),fgp.y,atol=1e-3)
>>> fgp.post_cubature_mean()
tensor(20.1842)
>>> fgp.post_cubature_var()
tensor(7.0015e-09)
>>> data = fgp.fit(verbose=0)
>>> list(data.keys())
['iterations']
>>> torch.linalg.norm(y-fgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0361)
>>> z = torch.rand((2**8,d),generator=rng)
>>> pcov = fgp.post_cov(x,z)
>>> pcov.shape
torch.Size([128, 256])
>>> pcov = fgp.post_cov(x,x)
>>> pcov.shape
torch.Size([128, 128])
>>> assert (pcov.diagonal()>=0).all()
>>> pvar = fgp.post_var(x)
>>> pvar.shape
torch.Size([128])
>>> assert torch.allclose(pcov.diagonal(),pvar)
>>> pmean,pstd,q,ci_low,ci_high = fgp.post_ci(x,confidence=0.99)
>>> ci_low.shape
torch.Size([128])
>>> ci_high.shape
torch.Size([128])
>>> fgp.post_cubature_mean()
tensor(20.1842)
>>> fgp.post_cubature_var()
tensor(3.1129e-06)
>>> pcmean,pcvar,q,pcci_low,pcci_high = fgp.post_cubature_ci(confidence=0.99)
>>> pcci_low
tensor(20.1797)
>>> pcci_high
tensor(20.1888)
>>> pcov_future = fgp.post_cov(x,z,n=2*n)
>>> pvar_future = fgp.post_var(x,n=2*n)
>>> pcvar_future = fgp.post_cubature_var(n=2*n)
>>> x_next = fgp.get_x_next(2*n)
>>> y_next = f_ackley(x_next)
>>> fgp.add_y_next(y_next)
>>> torch.linalg.norm(y-fgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0304)
>>> assert torch.allclose(fgp.post_cov(x,z),pcov_future)
>>> assert torch.allclose(fgp.post_var(x),pvar_future)
>>> assert torch.allclose(fgp.post_cubature_var(),pcvar_future)
>>> data = fgp.fit(verbose=False)
>>> torch.linalg.norm(y-fgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0274)
>>> x_next = fgp.get_x_next(4*n)
>>> y_next = f_ackley(x_next)
>>> fgp.add_y_next(y_next)
>>> torch.linalg.norm(y-fgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0277)
>>> data = fgp.fit(verbose=False)
>>> torch.linalg.norm(y-fgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0276)
>>> pcov_16n = fgp.post_cov(x,z,n=16*n)
>>> pvar_16n = fgp.post_var(x,n=16*n)
>>> pcvar_16n = fgp.post_cubature_var(n=16*n)
>>> x_next = fgp.get_x_next(16*n)
>>> y_next = f_ackley(x_next)
>>> fgp.add_y_next(y_next)
>>> assert torch.allclose(fgp.post_cov(x,z),pcov_16n)
>>> assert torch.allclose(fgp.post_var(x),pvar_16n)
>>> assert torch.allclose(fgp.post_cubature_var(),pcvar_16n)

Parameters:

Name Type Description Default
seqs [int, Lattice, List]

list of lattice sequence generators with order="NATURAL" and randomize in ["FALSE","SHIFT"]. If an int d is passed in we use

[qmcpy.Lattice(d,seed=seed,randomize="SHIFT") for seed in np.random.SeedSequence(seed_for_seq).spawn(num_tasks)]
See the qmcpy.Lattice docs for more info

required
num_tasks int

number of tasks

None
seed_for_seq int

seed used for lattice randomization

None
alpha int

smoothness parameter

2
scale float

kernel global scaling parameter

1.0
lengthscales Union[Tensor[d], float]

vector of kernel lengthscales. If a scalar is passed in then lengthscales is set to a constant vector.

1.0
noise float

positive noise variance i.e. nugget term

1e-08
factor_task_kernel Union[Tensor[num_tasks, rank_factor_task_kernel], int]

for \(F\) the factor_task_kernel the task kernel is \(FF^T + \text{diag}(\boldsymbol{v})\) where rank_factor_task_kernel<=num_tasks and \(\boldsymbol{v}\) is the noise_task_kernel.

1.0
rank_factor_task_kernel int

see the description of factor_task_kernel above. Defaults to 0 for single task problems and 1 for multi task problems.

None
noise_task_kernel Union[Tensor[num_tasks], float]

see the description of factor_task_kernel above

1.0
device device

torch device which is required to support torch.float64

'cpu'
tfs_scale Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
tfs_lengthscales Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
tfs_noise Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
tfs_factor_task_kernel Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: x, lambda x: x)
tfs_noise_task_kernel Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
requires_grad_scale bool

wheather or not to optimize the scale parameter

True
requires_grad_lengthscales bool

wheather or not to optimize lengthscale parameters

True
requires_grad_noise bool

wheather or not to optimize the noise parameter

False
requires_grad_factor_task_kernel bool

wheather or not to optimize the factor for the task kernel

None
requires_grad_noise_task_kernel bool

wheather or not to optimize the noise for the task kernel

None
shape_batch Size

shape of the batch output for each task

Size([])
shape_scale Size

shape of the scale parameter, defaults to torch.Size([1])

Size([1])
shape_lengthscales Size

shape of the lengthscales parameter, defaults to torch.Size([d]) where d is the dimension

None
shape_noise Size

shape of the noise parameter, defaults to torch.Size([1])

Size([1])
shape_factor_task_kernel Size

shape of the factor for the task kernel, defaults to torch.Size([num_tasks,r]) where r is the rank, see the description of factor_task_kernel

None
shape_noise_task_kernel Size

shape of the noise for the task kernel, defaults to torch.Size([num_tasks])

None
derivatives list

list of derivative orders e.g. to include a function and its gradient set

derivatives = [torch.zeros(d,dtype=int)]+[ej for ej in torch.eye(d,dtype=int)]

None
derivatives_coeffs list

list of derivative coefficients where if derivatives[k].shape==(p,d) then we should have derivatives_coeffs[k].shape==(p,)

None
compile_fts bool

if True, use torch.compile(qmcpy.fftbr_torch,**compile_fts) and torch.compile(qmcpy.ifftbr_torch,**compile_fts), otherwise use the uncompiled versions

False
compile_fts_kwargs dict

keyword arguments to torch.compile, see the compile_fts argument

{}
adaptive_nugget bool

if True, use the adaptive nugget which modifies noises based on trace ratios.

False
Source code in fastgps/fast_gp_lattice.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
def __init__(self,
        seqs:qmcpy.Lattice,
        num_tasks:int = None,
        seed_for_seq:int = None,
        alpha:int = 2,
        scale:float = 1., 
        lengthscales:Union[torch.Tensor,float] = 1., 
        noise:float = 1e-8, 
        factor_task_kernel:Union[torch.Tensor,int] = 1., 
        rank_factor_task_kernel:int = None,
        noise_task_kernel:Union[torch.Tensor,float] = 1.,
        device:torch.device = "cpu",
        tfs_scale:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        tfs_lengthscales:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        tfs_noise:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        tfs_factor_task_kernel:Tuple[callable,callable] = ((lambda x: x, lambda x: x)),#((lambda x: x**(1/3)),(lambda x: x**3)),
        tfs_noise_task_kernel:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        requires_grad_scale:bool = True, 
        requires_grad_lengthscales:bool = True, 
        requires_grad_noise:bool = False, 
        requires_grad_factor_task_kernel:bool = None,
        requires_grad_noise_task_kernel:bool = None,
        shape_batch:torch.Size = torch.Size([]),
        shape_scale:torch.Size = torch.Size([1]), 
        shape_lengthscales:torch.Size = None,
        shape_noise:torch.Size = torch.Size([1]),
        shape_factor_task_kernel:torch.Size = None, 
        shape_noise_task_kernel:torch.Size = None,
        derivatives:list = None,
        derivatives_coeffs:list = None,
        compile_fts:bool = False,
        compile_fts_kwargs:dict = {},
        adaptive_nugget:bool = False,
        ):
    """
    Args:
        seqs ([int,qmcpy.Lattice,List]): list of lattice sequence generators
            with order="NATURAL" and randomize in `["FALSE","SHIFT"]`. If an int `d` is passed in we use 
            ```python
            [qmcpy.Lattice(d,seed=seed,randomize="SHIFT") for seed in np.random.SeedSequence(seed_for_seq).spawn(num_tasks)]
            ```
            See the <a href="https://qmcpy.readthedocs.io/en/latest/algorithms.html#module-qmcpy.discrete_distribution.lattice.lattice" target="_blank">`qmcpy.Lattice` docs</a> for more info
        num_tasks (int): number of tasks
        seed_for_seq (int): seed used for lattice randomization
        alpha (int): smoothness parameter
        scale (float): kernel global scaling parameter
        lengthscales (Union[torch.Tensor[d],float]): vector of kernel lengthscales. 
            If a scalar is passed in then `lengthscales` is set to a constant vector. 
        noise (float): positive noise variance i.e. nugget term
        factor_task_kernel (Union[Tensor[num_tasks,rank_factor_task_kernel],int]): for $F$ the `factor_task_kernel` the task kernel is $FF^T + \\text{diag}(\\boldsymbol{v})$ 
            where `rank_factor_task_kernel<=num_tasks` and $\\boldsymbol{v}$ is the `noise_task_kernel`.
        rank_factor_task_kernel (int): see the description of `factor_task_kernel` above. Defaults to 0 for single task problems and 1 for multi task problems.
        noise_task_kernel (Union[torch.Tensor[num_tasks],float]): see the description of `factor_task_kernel` above
        device (torch.device): torch device which is required to support `torch.float64`
        tfs_scale (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_lengthscales (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_noise (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_factor_task_kernel (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_noise_task_kernel (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        requires_grad_scale (bool): wheather or not to optimize the scale parameter
        requires_grad_lengthscales (bool): wheather or not to optimize lengthscale parameters
        requires_grad_noise (bool): wheather or not to optimize the noise parameter
        requires_grad_factor_task_kernel (bool): wheather or not to optimize the factor for the task kernel
        requires_grad_noise_task_kernel (bool): wheather or not to optimize the noise for the task kernel
        shape_batch (torch.Size): shape of the batch output for each task
        shape_scale (torch.Size): shape of the scale parameter, defaults to `torch.Size([1])`
        shape_lengthscales (torch.Size): shape of the lengthscales parameter, defaults to `torch.Size([d])` where `d` is the dimension
        shape_noise (torch.Size): shape of the noise parameter, defaults to `torch.Size([1])`
        shape_factor_task_kernel (torch.Size): shape of the factor for the task kernel, defaults to `torch.Size([num_tasks,r])` where `r` is the rank, see the description of `factor_task_kernel`
        shape_noise_task_kernel (torch.Size): shape of the noise for the task kernel, defaults to `torch.Size([num_tasks])`
        derivatives (list): list of derivative orders e.g. to include a function and its gradient set 
            ```python
            derivatives = [torch.zeros(d,dtype=int)]+[ej for ej in torch.eye(d,dtype=int)]
            ```
        derivatives_coeffs (list): list of derivative coefficients where if `derivatives[k].shape==(p,d)` then we should have `derivatives_coeffs[k].shape==(p,)`
        compile_fts (bool): if `True`, use `torch.compile(qmcpy.fftbr_torch,**compile_fts)` and `torch.compile(qmcpy.ifftbr_torch,**compile_fts)`, otherwise use the uncompiled versions
        compile_fts_kwargs (dict): keyword arguments to `torch.compile`, see the `compile_fts argument`
        adaptive_nugget (bool): if True, use the adaptive nugget which modifies noises based on trace ratios.  
    """
    assert isinstance(alpha,int) and alpha in qmcpy.kernel_methods.util.shift_invar_ops.BERNOULLIPOLYSDICT.keys(), "alpha must be in %s"%list(qmcpy.kernel_methods.util.shift_invar_ops.BERNOULLIPOLYSDICT.keys())
    if num_tasks is None: 
        solo_task = True
        default_task = 0 
        num_tasks = 1
    else:
        assert isinstance(num_tasks,int) and num_tasks>0
        solo_task = False
        default_task = torch.arange(num_tasks)
    if isinstance(seqs,int):
        seqs = np.array([qmcpy.Lattice(seqs,seed=seed,randomize="SHIFT") for seed in np.random.SeedSequence(seed_for_seq).spawn(num_tasks)],dtype=object)
    if isinstance(seqs,qmcpy.Lattice):
        seqs = np.array([seqs],dtype=object)
    if isinstance(seqs,list):
        seqs = np.array(seqs,dtype=object)
    assert seqs.shape==(num_tasks,), "seqs should be a length num_tasks=%d list"%num_tasks
    assert all(isinstance(seqs[i],qmcpy.Lattice) for i in range(num_tasks)), "each seq should be a qmcpy.Lattice instances"
    assert all(seqs[i].order=="NATURAL" for i in range(num_tasks)), "each seq should be in 'NATURAL' order "
    assert all(seqs[i].replications==1 for i in range(num_tasks)) and "each seq should have only 1 replication"
    assert all(seqs[i].randomize in ['FALSE','SHIFT'] for i in range(num_tasks)), "each seq should have randomize in ['FALSE','SHIFT']"
    ft = torch.compile(qmcpy.fftbr_torch,**compile_fts_kwargs) if compile_fts else qmcpy.fftbr_torch
    ift = torch.compile(qmcpy.ifftbr_torch,**compile_fts_kwargs) if compile_fts else qmcpy.ifftbr_torch
    super().__init__(
        alpha,
        ft,
        ift,
        seqs,
        num_tasks,
        default_task,
        solo_task,
        scale,
        lengthscales,
        noise,
        factor_task_kernel,
        rank_factor_task_kernel,
        noise_task_kernel,
        device,
        tfs_scale,
        tfs_lengthscales,
        tfs_noise,
        tfs_factor_task_kernel,
        tfs_noise_task_kernel,
        requires_grad_scale,
        requires_grad_lengthscales,
        requires_grad_noise,
        requires_grad_factor_task_kernel,
        requires_grad_noise_task_kernel,
        shape_batch,
        shape_scale, 
        shape_lengthscales,
        shape_noise,
        shape_factor_task_kernel, 
        shape_noise_task_kernel,
        derivatives,
        derivatives_coeffs,
        adaptive_nugget,
    )

FastGPDigitalNetB2

FastGPDigitalNetB2(seqs, num_tasks=None, seed_for_seq=None, alpha=2, scale=1.0, lengthscales=1.0, noise=1e-16, factor_task_kernel=1.0, rank_factor_task_kernel=None, noise_task_kernel=1.0, device='cpu', tfs_scale=(lambda x: torch.log(x), lambda x: torch.exp(x)), tfs_lengthscales=(lambda x: torch.log(x), lambda x: torch.exp(x)), tfs_noise=(lambda x: torch.log(x), lambda x: torch.exp(x)), tfs_factor_task_kernel=(lambda x: x, lambda x: x), tfs_noise_task_kernel=(lambda x: torch.log(x), lambda x: torch.exp(x)), requires_grad_scale=True, requires_grad_lengthscales=True, requires_grad_noise=False, requires_grad_factor_task_kernel=None, requires_grad_noise_task_kernel=None, shape_batch=torch.Size([]), shape_scale=torch.Size([1]), shape_lengthscales=None, shape_noise=torch.Size([1]), shape_factor_task_kernel=None, shape_noise_task_kernel=None, derivatives=None, derivatives_coeffs=None, compile_fts=False, compile_fts_kwargs={}, adaptive_nugget=False)

Bases: AbstractFastGP

Fast Gaussian process regression using digitally shifted digital nets paired with digitally shift invariant kernels

Examples:

>>> torch.set_default_dtype(torch.float64)
>>> def f_ackley(x, a=20, b=0.2, c=2*np.pi, scaling=32.768):
...     # https://www.sfu.ca/~ssurjano/ackley.html
...     assert x.ndim==2
...     x = 2*scaling*x-scaling
...     t1 = a*torch.exp(-b*torch.sqrt(torch.mean(x**2,1)))
...     t2 = torch.exp(torch.mean(torch.cos(c*x),1))
...     t3 = a+np.exp(1)
...     y = -t1-t2+t3
...     return y
>>> n = 2**10
>>> d = 2
>>> fgp = FastGPDigitalNetB2(qmcpy.DigitalNetB2(dimension=d,seed=7))
>>> x_next = fgp.get_x_next(n)
>>> y_next = f_ackley(x_next)
>>> fgp.add_y_next(y_next)
>>> rng = torch.Generator().manual_seed(17)
>>> x = torch.rand((2**7,d),generator=rng)
>>> y = f_ackley(x)
>>> pmean = fgp.post_mean(x)
>>> pmean.shape
torch.Size([128])
>>> torch.linalg.norm(y-pmean)/torch.linalg.norm(y)
tensor(0.0336)
>>> assert torch.allclose(fgp.post_mean(fgp.x),fgp.y)
>>> data = fgp.fit(verbose=0)
>>> list(data.keys())
['iterations']
>>> torch.linalg.norm(y-fgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0355)
>>> z = torch.rand((2**8,d),generator=rng)
>>> pcov = fgp.post_cov(x,z)
>>> pcov.shape
torch.Size([128, 256])
>>> pcov = fgp.post_cov(x,x)
>>> pcov.shape
torch.Size([128, 128])
>>> assert (pcov.diagonal()>=0).all()
>>> pvar = fgp.post_var(x)
>>> pvar.shape
torch.Size([128])
>>> assert torch.allclose(pcov.diagonal(),pvar)
>>> pmean,pstd,q,ci_low,ci_high = fgp.post_ci(x,confidence=0.99)
>>> ci_low.shape
torch.Size([128])
>>> ci_high.shape
torch.Size([128])
>>> fgp.post_cubature_mean()
tensor(20.1896)
>>> fgp.post_cubature_var()
tensor(0.0002)
>>> pcmean,pcvar,q,pcci_low,pcci_high = fgp.post_cubature_ci(confidence=0.99)
>>> pcci_low
tensor(20.1564)
>>> pcci_high
tensor(20.2228)
>>> pcov_future = fgp.post_cov(x,z,n=2*n)
>>> pvar_future = fgp.post_var(x,n=2*n)
>>> pcvar_future = fgp.post_cubature_var(n=2*n)
>>> x_next = fgp.get_x_next(2*n)
>>> y_next = f_ackley(x_next)
>>> fgp.add_y_next(y_next)
>>> torch.linalg.norm(y-fgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0258)
>>> assert torch.allclose(fgp.post_cov(x,z),pcov_future)
>>> assert torch.allclose(fgp.post_var(x),pvar_future)
>>> assert torch.allclose(fgp.post_cubature_var(),pcvar_future)
>>> data = fgp.fit(verbose=False)
>>> torch.linalg.norm(y-fgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0259)
>>> x_next = fgp.get_x_next(4*n)
>>> y_next = f_ackley(x_next)
>>> fgp.add_y_next(y_next)
>>> torch.linalg.norm(y-fgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0191)
>>> data = fgp.fit(verbose=False)
>>> torch.linalg.norm(y-fgp.post_mean(x))/torch.linalg.norm(y)
tensor(0.0187)
>>> pcov_16n = fgp.post_cov(x,z,n=16*n)
>>> pvar_16n = fgp.post_var(x,n=16*n)
>>> pcvar_16n = fgp.post_cubature_var(n=16*n)
>>> x_next = fgp.get_x_next(16*n)
>>> y_next = f_ackley(x_next)
>>> fgp.add_y_next(y_next)
>>> assert torch.allclose(fgp.post_cov(x,z),pcov_16n)
>>> assert torch.allclose(fgp.post_var(x),pvar_16n)
>>> assert torch.allclose(fgp.post_cubature_var(),pcvar_16n)

Parameters:

Name Type Description Default
seqs Union[int,qmcpy.DigitalNetB2,List]]

list of digital sequence generators in base \(b=2\) with order="NATURAL" and randomize in ["FALSE","DS"]. If an int d is passed in we use

[qmcpy.DigitalNetB2(d,seed=seed,randomize="DS") for seed in np.random.SeedSequence(seed_for_seq).spawn(num_tasks)]
See the qmcpy.DigitalNetB2 docs for more info. If num_tasks==1 then randomize may be in ["FALSE","DS","LMS","LMS_DS"].

required
num_tasks int

number of tasks

None
seed_for_seq int

seed used for digital net randomization

None
alpha int

smoothness parameter

2
scale float

kernel global scaling parameter

1.0
lengthscales Union[Tensor[d], float]

vector of kernel lengthscales. If a scalar is passed in then lengthscales is set to a constant vector.

1.0
noise float

positive noise variance i.e. nugget term

1e-16
factor_task_kernel Union[Tensor[num_tasks, rank_factor_task_kernel], int]

for \(F\) the factor_task_kernel the task kernel is \(FF^T + \text{diag}(\boldsymbol{v})\) where rank_factor_task_kernel<=num_tasks and \(\boldsymbol{v}\) is the noise_task_kernel.

1.0
rank_factor_task_kernel int

see the description of factor_task_kernel above. Defaults to 0 for single task problems and 1 for multi task problems.

None
noise_task_kernel Union[Tensor[num_tasks], float]

see the description of factor_task_kernel above

1.0
device device

torch device which is required to support torch.float64

'cpu'
tfs_scale Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
tfs_lengthscales Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
tfs_noise Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
tfs_factor_task_kernel Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: x, lambda x: x)
tfs_noise_task_kernel Tuple[callable, callable]

the first argument transforms to the raw value to be optimized, the second applies the inverse transform

(lambda x: log(x), lambda x: exp(x))
requires_grad_scale bool

wheather or not to optimize the scale parameter

True
requires_grad_lengthscales bool

wheather or not to optimize lengthscale parameters

True
requires_grad_noise bool

wheather or not to optimize the noise parameter

False
requires_grad_factor_task_kernel bool

wheather or not to optimize the factor for the task kernel

None
requires_grad_noise_task_kernel bool

wheather or not to optimize the noise for the task kernel

None
shape_batch Size

shape of the batch output for each task

Size([])
shape_scale Size

shape of the scale parameter, defaults to torch.Size([1])

Size([1])
shape_lengthscales Size

shape of the lengthscales parameter, defaults to torch.Size([d]) where d is the dimension

None
shape_noise Size

shape of the noise parameter, defaults to torch.Size([1])

Size([1])
shape_factor_task_kernel Size

shape of the factor for the task kernel, defaults to torch.Size([num_tasks,r]) where r is the rank, see the description of factor_task_kernel

None
shape_noise_task_kernel Size

shape of the noise for the task kernel, defaults to torch.Size([num_tasks])

None
derivatives list

list of derivative orders e.g. to include a function and its gradient set

derivatives = [torch.zeros(d,dtype=int)]+[ej for ej in torch.eye(d,dtype=int)]

None
derivatives_coeffs list

list of derivative coefficients where if derivatives[k].shape==(p,d) then we should have derivatives_coeffs[k].shape==(p,)

None
compile_fts bool

if True, use torch.compile(qmcpy.fwht_torch,**compile_fts_kwargs), otherwise use the uncompiled version

False
compile_fts_kwargs dict

keyword arguments to torch.compile, see the compile_fts argument

{}
adaptive_nugget bool

if True, use the adaptive nugget which modifies noises based on trace ratios.

False
Source code in fastgps/fast_gp_digital_net_b2.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
def __init__(self,
        seqs:Union[qmcpy.DigitalNetB2,int],
        num_tasks:int = None,
        seed_for_seq:int = None,
        alpha:int = 2,
        scale:float = 1., 
        lengthscales:Union[torch.Tensor,float] = 1., 
        noise:float = 1e-16,
        factor_task_kernel:Union[torch.Tensor,int] = 1.,
        rank_factor_task_kernel:int = None,
        noise_task_kernel:Union[torch.Tensor,float] = 1.,
        device:torch.device = "cpu",
        tfs_scale:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        tfs_lengthscales:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        tfs_noise:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        tfs_factor_task_kernel:Tuple[callable,callable] = ((lambda x: x, lambda x: x)),#((lambda x: x**(1/3)),(lambda x: x**3)),
        tfs_noise_task_kernel:Tuple[callable,callable] = ((lambda x: torch.log(x)),(lambda x: torch.exp(x))),
        requires_grad_scale:bool = True, 
        requires_grad_lengthscales:bool = True, 
        requires_grad_noise:bool = False, 
        requires_grad_factor_task_kernel:bool = None,
        requires_grad_noise_task_kernel:bool = None,
        shape_batch:torch.Size = torch.Size([]),
        shape_scale:torch.Size = torch.Size([1]), 
        shape_lengthscales:torch.Size = None,
        shape_noise:torch.Size = torch.Size([1]),
        shape_factor_task_kernel:torch.Size = None, 
        shape_noise_task_kernel:torch.Size = None,
        derivatives:list = None,
        derivatives_coeffs:list = None,
        compile_fts:bool = False,
        compile_fts_kwargs: dict = {},
        adaptive_nugget:bool = False,
        ):
    """
    Args:
        seqs (Union[int,qmcpy.DigitalNetB2,List]]): list of digital sequence generators in base $b=2$ 
            with order="NATURAL" and randomize in `["FALSE","DS"]`. If an int `d` is passed in we use 
            ```python
            [qmcpy.DigitalNetB2(d,seed=seed,randomize="DS") for seed in np.random.SeedSequence(seed_for_seq).spawn(num_tasks)]
            ```
            See the <a href="https://qmcpy.readthedocs.io/en/latest/algorithms.html#module-qmcpy.discrete_distribution.digital_net_b2.digital_net_b2" target="_blank">`qmcpy.DigitalNetB2` docs</a> for more info. 
            If `num_tasks==1` then randomize may be in `["FALSE","DS","LMS","LMS_DS"]`. 
        num_tasks (int): number of tasks 
        seed_for_seq (int): seed used for digital net randomization
        alpha (int): smoothness parameter
        scale (float): kernel global scaling parameter
        lengthscales (Union[torch.Tensor[d],float]): vector of kernel lengthscales. 
            If a scalar is passed in then `lengthscales` is set to a constant vector. 
        noise (float): positive noise variance i.e. nugget term
        factor_task_kernel (Union[Tensor[num_tasks,rank_factor_task_kernel],int]): for $F$ the `factor_task_kernel` the task kernel is $FF^T + \\text{diag}(\\boldsymbol{v})$ 
            where `rank_factor_task_kernel<=num_tasks` and $\\boldsymbol{v}$ is the `noise_task_kernel`.
        rank_factor_task_kernel (int): see the description of `factor_task_kernel` above. Defaults to 0 for single task problems and 1 for multi task problems.
        noise_task_kernel (Union[torch.Tensor[num_tasks],float]): see the description of `factor_task_kernel` above 
        device (torch.device): torch device which is required to support `torch.float64`
        tfs_scale (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_lengthscales (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_noise (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_factor_task_kernel (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        tfs_noise_task_kernel (Tuple[callable,callable]): the first argument transforms to the raw value to be optimized, the second applies the inverse transform
        requires_grad_scale (bool): wheather or not to optimize the scale parameter
        requires_grad_lengthscales (bool): wheather or not to optimize lengthscale parameters
        requires_grad_noise (bool): wheather or not to optimize the noise parameter
        requires_grad_factor_task_kernel (bool): wheather or not to optimize the factor for the task kernel
        requires_grad_noise_task_kernel (bool): wheather or not to optimize the noise for the task kernel
        shape_batch (torch.Size): shape of the batch output for each task
        shape_scale (torch.Size): shape of the scale parameter, defaults to `torch.Size([1])`
        shape_lengthscales (torch.Size): shape of the lengthscales parameter, defaults to `torch.Size([d])` where `d` is the dimension
        shape_noise (torch.Size): shape of the noise parameter, defaults to `torch.Size([1])`
        shape_factor_task_kernel (torch.Size): shape of the factor for the task kernel, defaults to `torch.Size([num_tasks,r])` where `r` is the rank, see the description of `factor_task_kernel`
        shape_noise_task_kernel (torch.Size): shape of the noise for the task kernel, defaults to `torch.Size([num_tasks])`
        derivatives (list): list of derivative orders e.g. to include a function and its gradient set 
            ```python
            derivatives = [torch.zeros(d,dtype=int)]+[ej for ej in torch.eye(d,dtype=int)]
            ```
        derivatives_coeffs (list): list of derivative coefficients where if `derivatives[k].shape==(p,d)` then we should have `derivatives_coeffs[k].shape==(p,)`
        compile_fts (bool): if `True`, use `torch.compile(qmcpy.fwht_torch,**compile_fts_kwargs)`, otherwise use the uncompiled version
        compile_fts_kwargs (dict): keyword arguments to `torch.compile`, see the `compile_fts` argument
        adaptive_nugget (bool): if True, use the adaptive nugget which modifies noises based on trace ratios.  
    """
    assert isinstance(alpha,int) and alpha in qmcpy.kernel_methods.util.dig_shift_invar_ops.WEIGHTEDWALSHFUNCSPOS.keys(), "alpha must be in %s"%list(qmcpy.kernel_methods.util.dig_shift_invar_ops.WEIGHTEDWALSHFUNCSPOS.keys())
    if num_tasks is None: 
        solo_task = True
        default_task = 0 
        num_tasks = 1
    else:
        assert isinstance(num_tasks,int) and num_tasks>0
        solo_task = False
        default_task = torch.arange(num_tasks)
    if isinstance(seqs,int):
        seqs = np.array([qmcpy.DigitalNetB2(seqs,seed=seed,randomize="DS") for seed in np.random.SeedSequence(seed_for_seq).spawn(num_tasks)],dtype=object)
    if isinstance(seqs,qmcpy.DigitalNetB2):
        seqs = np.array([seqs],dtype=object)
    if isinstance(seqs,list):
        seqs = np.array(seqs,dtype=object)
    assert seqs.shape==(num_tasks,), "seqs should be a length num_tasks=%d list"%num_tasks
    assert all(isinstance(seqs[i],qmcpy.DigitalNetB2) for i in range(num_tasks)), "each seq should be a qmcpy.DigitalNetB2 instances"
    assert all(seqs[i].order=="NATURAL" for i in range(num_tasks)), "each seq should be in 'NATURAL' order "
    assert all(seqs[i].replications==1 for i in range(num_tasks)) and "each seq should have only 1 replication"
    assert all(seqs[i].t_lms<64 for i in range(num_tasks)), "each seq must have t_lms<64"
    if num_tasks==1:
        assert seqs[0].randomize in ['FALSE','DS','LMS','LMS_DS'], "seq should have randomize in ['FALSE','DS','LMS','LMS_DS']"
    else:
        assert all(seqs[i].randomize in ['FALSE','DS'] for i in range(num_tasks)), "each seq should have randomize in ['FALSE','DS']"
    assert all(seqs[i].t_lms==seqs[0].t_lms for i in range(num_tasks)), "all seqs should have the same t_lms"
    self.t = seqs[0].t_lms
    ift = ft = torch.compile(qmcpy.fwht_torch,**compile_fts_kwargs) if compile_fts else qmcpy.fwht_torch
    super().__init__(
        alpha,
        ft,
        ift,
        seqs,
        num_tasks,
        default_task,
        solo_task,
        scale,
        lengthscales,
        noise,
        factor_task_kernel,
        rank_factor_task_kernel,
        noise_task_kernel,
        device,
        tfs_scale,
        tfs_lengthscales,
        tfs_noise,
        tfs_factor_task_kernel,
        tfs_noise_task_kernel,
        requires_grad_scale,
        requires_grad_lengthscales,
        requires_grad_noise,
        requires_grad_factor_task_kernel,
        requires_grad_noise_task_kernel,
        shape_batch,
        shape_scale, 
        shape_lengthscales,
        shape_noise,
        shape_factor_task_kernel, 
        shape_noise_task_kernel,
        derivatives,
        derivatives_coeffs,
        adaptive_nugget,
    )
    assert (self.alpha<=4).all() and (self.alpha>=2).all()