Multivariate: Discrete and Categorical

Import libraries

[1]:
import numpy as np
import xarray as xr
import synthia as syn
import pyvinecopulib as pv

Generate dummy data

[2]:
a = np.array([[0, 1, 2], [3, 4, 5]])
b = np.array(['red', 'green'])
ds_true = xr.Dataset({
    'a': (('sample', 'foo'), a),
    'b': (('sample'), b)
    })

ds_true
[2]:
<xarray.Dataset>
Dimensions:  (sample: 2, foo: 3)
Dimensions without coordinates: sample, foo
Data variables:
    a        (sample, foo) int64 0 1 2 3 4 5
    b        (sample) <U5 'red' 'green'

Fit and generate new samples

[3]:
generator = syn.CopulaDataGenerator(verbose=False)

ctrl = pv.FitControlsVinecop(family_set=[pv.gaussian], trunc_lvl=1, select_trunc_lvl=False)
generator.fit(ds_true, types={ 'a': 'cat', 'b': 'cat' }, copula=syn.VineCopula(controls=ctrl))
ds_synth = generator.generate(10)

ds_synth
[3]:
<xarray.Dataset>
Dimensions:  (sample: 10, foo: 3)
Dimensions without coordinates: sample, foo
Data variables:
    a        (sample, foo) int64 3 1 2 0 4 2 0 1 2 3 4 ... 1 2 0 1 5 0 1 2 3 1 2
    b        (sample) <U5 'green' 'green' 'green' ... 'green' 'green' 'red'
[ ]: