Skip to content
Snippets Groups Projects
create_dataset.py 1.98 KiB
import numpy as np
from time import time
import tqdm

data_load_D = np.load('Density.npz')
D = data_load_D['fields']

data_load_P = np.load('P.npz')
P = data_load_P['fields']

data_load_T = np.load('T.npz')
T = data_load_T['fields']

data_load_U = np.load('U.npz')
U = data_load_U['fields']

data_load_V = np.load('V.npz')
V = data_load_V['fields']

# interp
x = np.linspace(0, 1, T.shape[1])
y = np.linspace(0, 1, T.shape[2])
x_new = np.linspace(0, 1, 720)
y_new = np.linspace(0, 1, 1440)
from scipy import interpolate

D_new = np.zeros((D.shape[0], 720, 1440), dtype=np.float32)
P_new = np.zeros((P.shape[0], 720, 1440), dtype=np.float32)
T_new = np.zeros((T.shape[0], 720, 1440), dtype=np.float32)
U_new = np.zeros((U.shape[0], 720, 1440), dtype=np.float32)
V_new = np.zeros((V.shape[0], 720, 1440), dtype=np.float32)
for i in tqdm(range(D.shape[0])):
    fi = interpolate.RectBivariateSpline(x, y, D[i])
    D_new[i] = fi(x_new, y_new)

    fi = interpolate.RectBivariateSpline(x, y, P[i])
    P_new[i] = fi(x_new, y_new)

    fi = interpolate.RectBivariateSpline(x, y, T[i])
    T_new[i] = fi(x_new, y_new)

    fi = interpolate.RectBivariateSpline(x, y, U[i])
    U_new[i] = fi(x_new, y_new)

    fi = interpolate.RectBivariateSpline(x, y, V[i])
    V_new[i] = fi(x_new, y_new)


data = np.hstack((D_new.reshape((-1, 6)+D_new.shape[1:]), P_new.reshape((-1, 6)+P_new.shape[1:]),
                      T_new.reshape((-1, 6)+T_new.shape[1:]), U_new.reshape((-1, 6)+U_new.shape[1:]), V_new.reshape((-1, 6)+V_new.shape[1:])))

train_mask = data.shape[0]//6*5

import os
print(os.getcwd())
os.makedirs('train', exist_ok=True)
os.makedirs('test', exist_ok=True)
os.makedirs('out_of_sample', exist_ok=True)

import h5py
with h5py.File('train/2010.h5', 'w') as f:
    f.create_dataset("fields", data=data[:train_mask])
    
with h5py.File('test/2010.h5', 'w') as f:
    f.create_dataset("fields", data=data[train_mask:])
    
with h5py.File('out_of_sample/2010.h5', 'w') as f:
    f.create_dataset("fields", data=data[train_mask:])