Skip to content
Snippets Groups Projects
Commit 4cf64805 authored by xuetaowave's avatar xuetaowave
Browse files

update

parent 4c4493ff
No related merge requests found
log_to_wandb: 'False'
lr: '0.0005'
batch_size: '8'
patch_size: '4'
depth: '6'
img_size: '[192, 288]'
max_epochs: '1500'
scheduler: CosineAnnealingLR
in_channels: "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
23\n 24 25 26 27 28 29]"
out_channels: "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
22 23\n 24 25 26 27 28 29]"
prediction_length: '100'
orography: 'False'
orography_path: None
exp_dir: ./results/tec_256
train_data_path: ./train
valid_data_path: ./test
inf_data_path: ./out_of_sample
time_means_path: ./time_means.npy
global_means_path: ./global_means.npy
global_stds_path: ./global_stds.npy
loss: l2
retrain: 'False'
num_data_workers: '4'
dt: '1'
n_history: '0'
prediction_type: iterative
n_initial_conditions: '5'
ics_type: default
save_raw_forecasts: 'True'
save_channel: 'False'
masked_acc: 'False'
maskpath: None
perturb: 'False'
add_grid: 'False'
N_grid_channels: '0'
gridtype: sinusoidal
roll: 'False'
num_blocks: '8'
nettype: afno
width: '56'
modes: '32'
target: default
normalization: zscore
log_to_screen: 'True'
save_checkpoint: 'True'
enable_nhwc: 'False'
optimizer_type: FusedAdam
crop_size_x: None
crop_size_y: None
two_step_training: 'False'
plot_animations: 'False'
add_noise: 'False'
noise_std: '0'
epsilon_factor: '0'
world_size: '1'
experiment_dir:
/home/cxt/work/fourcastnet_TEC/FourCastNetTEC/data_ljkj/results/tec_256/afno_backbone_ljkj/d6p4
checkpoint_path: ./results/tec_256/afno_backbone_ljkj/d6p4/training_checkpoints/ckpt.tar
best_checkpoint_path: ./results/tec_256/afno_backbone_ljkj/d6p4/training_checkpoints/best_ckpt.tar
resuming: 'False'
local_rank: '0'
enable_amp: 'True'
name: afno_backbone_ljkj_d6p4
group: era5_precipafno_backbone_ljkj
project: ERA5_precip
entity: flowgan
N_in_channels: '30'
N_out_channels: '30'
2024-02-19 23:15:25,843 - root - INFO - --------------- Versions ---------------
2024-02-19 23:15:25,848 - root - INFO - git branch: b'* master'
2024-02-19 23:15:25,851 - root - INFO - git hash: b'380ab9a713b93fc8bb7a1afeb6e46dc7796e0fcf'
2024-02-19 23:15:25,851 - root - INFO - Torch: 2.1.2+cu118
2024-02-19 23:15:25,851 - root - INFO - ----------------------------------------
2024-02-19 23:15:25,851 - root - INFO - ------------------ Configuration ------------------
2024-02-19 23:15:25,851 - root - INFO - Configuration file: /home/cxt/work/fourcastnet_TEC/FourCastNetTEC/data_ljkj/AFNO.yaml
2024-02-19 23:15:25,851 - root - INFO - Configuration name: afno_backbone_ljkj
2024-02-19 23:15:25,851 - root - INFO - log_to_wandb False
2024-02-19 23:15:25,851 - root - INFO - lr 0.0005
2024-02-19 23:15:25,851 - root - INFO - batch_size 64
2024-02-19 23:15:25,851 - root - INFO - patch_size 4
2024-02-19 23:15:25,851 - root - INFO - depth 6
2024-02-19 23:15:25,851 - root - INFO - img_size [192, 288]
2024-02-19 23:15:25,851 - root - INFO - max_epochs 1500
2024-02-19 23:15:25,851 - root - INFO - scheduler CosineAnnealingLR
2024-02-19 23:15:25,851 - root - INFO - in_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
2024-02-19 23:15:25,851 - root - INFO - out_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
2024-02-19 23:15:25,852 - root - INFO - prediction_length 100
2024-02-19 23:15:25,852 - root - INFO - orography False
2024-02-19 23:15:25,852 - root - INFO - orography_path None
2024-02-19 23:15:25,852 - root - INFO - exp_dir ./results/tec_256
2024-02-19 23:15:25,852 - root - INFO - train_data_path ./train
2024-02-19 23:15:25,852 - root - INFO - valid_data_path ./test
2024-02-19 23:15:25,852 - root - INFO - inf_data_path ./out_of_sample
2024-02-19 23:15:25,852 - root - INFO - time_means_path ./time_means.npy
2024-02-19 23:15:25,852 - root - INFO - global_means_path ./global_means.npy
2024-02-19 23:15:25,852 - root - INFO - global_stds_path ./global_stds.npy
2024-02-19 23:15:25,852 - root - INFO - loss l2
2024-02-19 23:15:25,852 - root - INFO - retrain False
2024-02-19 23:15:25,852 - root - INFO - num_data_workers 4
2024-02-19 23:15:25,852 - root - INFO - dt 1
2024-02-19 23:15:25,852 - root - INFO - n_history 0
2024-02-19 23:15:25,852 - root - INFO - prediction_type iterative
2024-02-19 23:15:25,852 - root - INFO - n_initial_conditions 5
2024-02-19 23:15:25,852 - root - INFO - ics_type default
2024-02-19 23:15:25,852 - root - INFO - save_raw_forecasts True
2024-02-19 23:15:25,852 - root - INFO - save_channel False
2024-02-19 23:15:25,852 - root - INFO - masked_acc False
2024-02-19 23:15:25,852 - root - INFO - maskpath None
2024-02-19 23:15:25,852 - root - INFO - perturb False
2024-02-19 23:15:25,852 - root - INFO - add_grid False
2024-02-19 23:15:25,852 - root - INFO - N_grid_channels 0
2024-02-19 23:15:25,852 - root - INFO - gridtype sinusoidal
2024-02-19 23:15:25,852 - root - INFO - roll False
2024-02-19 23:15:25,852 - root - INFO - num_blocks 8
2024-02-19 23:15:25,852 - root - INFO - nettype afno
2024-02-19 23:15:25,852 - root - INFO - width 56
2024-02-19 23:15:25,852 - root - INFO - modes 32
2024-02-19 23:15:25,852 - root - INFO - target default
2024-02-19 23:15:25,852 - root - INFO - normalization zscore
2024-02-19 23:15:25,852 - root - INFO - log_to_screen True
2024-02-19 23:15:25,853 - root - INFO - save_checkpoint True
2024-02-19 23:15:25,853 - root - INFO - enable_nhwc False
2024-02-19 23:15:25,853 - root - INFO - optimizer_type FusedAdam
2024-02-19 23:15:25,853 - root - INFO - crop_size_x None
2024-02-19 23:15:25,853 - root - INFO - crop_size_y None
2024-02-19 23:15:25,853 - root - INFO - two_step_training False
2024-02-19 23:15:25,853 - root - INFO - plot_animations False
2024-02-19 23:15:25,853 - root - INFO - add_noise False
2024-02-19 23:15:25,853 - root - INFO - noise_std 0
2024-02-19 23:15:25,853 - root - INFO - epsilon_factor 0
2024-02-19 23:15:25,853 - root - INFO - world_size 1
2024-02-19 23:15:25,853 - root - INFO - experiment_dir /home/cxt/work/fourcastnet_TEC/FourCastNetTEC/data_ljkj/results/tec_256/afno_backbone_ljkj/d6p4
2024-02-19 23:15:25,853 - root - INFO - checkpoint_path ./results/tec_256/afno_backbone_ljkj/d6p4/training_checkpoints/ckpt.tar
2024-02-19 23:15:25,853 - root - INFO - best_checkpoint_path ./results/tec_256/afno_backbone_ljkj/d6p4/training_checkpoints/best_ckpt.tar
2024-02-19 23:15:25,853 - root - INFO - resuming False
2024-02-19 23:15:25,853 - root - INFO - local_rank 0
2024-02-19 23:15:25,853 - root - INFO - enable_amp True
2024-02-19 23:15:25,853 - root - INFO - name afno_backbone_ljkj_d6p4
2024-02-19 23:15:25,853 - root - INFO - group era5_precipafno_backbone_ljkj
2024-02-19 23:15:25,853 - root - INFO - project ERA5_precip
2024-02-19 23:15:25,853 - root - INFO - entity flowgan
2024-02-19 23:15:25,853 - root - INFO - ---------------------------------------------------
2024-02-19 23:15:25,857 - root - INFO - rank 0, begin data loader init
2024-02-19 23:15:25,858 - root - INFO - Getting file stats from ./train/2010.h5
2024-02-19 23:15:25,858 - root - INFO - Number of samples per year: 2360
2024-02-19 23:15:25,858 - root - INFO - Found data at path ./train. Number of examples: 2360. Image Shape: 192 x 288 x 30
2024-02-19 23:15:25,858 - root - INFO - Delta t: 6 hours
2024-02-19 23:15:25,858 - root - INFO - Including 0 hours of past history in training at a frequency of 6 hours
2024-02-19 23:15:25,859 - root - INFO - Getting file stats from ./test/2010.h5
2024-02-19 23:15:25,859 - root - INFO - Number of samples per year: 472
2024-02-19 23:15:25,859 - root - INFO - Found data at path ./test. Number of examples: 472. Image Shape: 192 x 288 x 30
2024-02-19 23:15:25,859 - root - INFO - Delta t: 6 hours
2024-02-19 23:15:25,859 - root - INFO - Including 0 hours of past history in training at a frequency of 6 hours
2024-02-19 23:15:25,859 - root - INFO - rank 0, data loader initialized
2024-02-19 23:15:26,321 - root - INFO - Number of trainable model parameters: 33534720
2024-02-19 23:15:26,321 - root - INFO - Starting Training Loop...
2024-02-19 23:16:27,256 - root - INFO - --------------- Versions ---------------
2024-02-19 23:16:27,262 - root - INFO - git branch: b'* master'
2024-02-19 23:16:27,266 - root - INFO - git hash: b'380ab9a713b93fc8bb7a1afeb6e46dc7796e0fcf'
2024-02-19 23:16:27,266 - root - INFO - Torch: 2.1.2+cu118
2024-02-19 23:16:27,266 - root - INFO - ----------------------------------------
2024-02-19 23:16:27,266 - root - INFO - ------------------ Configuration ------------------
2024-02-19 23:16:27,266 - root - INFO - Configuration file: /home/cxt/work/fourcastnet_TEC/FourCastNetTEC/data_ljkj/AFNO.yaml
2024-02-19 23:16:27,266 - root - INFO - Configuration name: afno_backbone_ljkj
2024-02-19 23:16:27,266 - root - INFO - log_to_wandb False
2024-02-19 23:16:27,266 - root - INFO - lr 0.0005
2024-02-19 23:16:27,266 - root - INFO - batch_size 8
2024-02-19 23:16:27,266 - root - INFO - patch_size 4
2024-02-19 23:16:27,266 - root - INFO - depth 6
2024-02-19 23:16:27,266 - root - INFO - img_size [192, 288]
2024-02-19 23:16:27,267 - root - INFO - max_epochs 1500
2024-02-19 23:16:27,267 - root - INFO - scheduler CosineAnnealingLR
2024-02-19 23:16:27,267 - root - INFO - in_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
2024-02-19 23:16:27,267 - root - INFO - out_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
2024-02-19 23:16:27,267 - root - INFO - prediction_length 100
2024-02-19 23:16:27,267 - root - INFO - orography False
2024-02-19 23:16:27,267 - root - INFO - orography_path None
2024-02-19 23:16:27,267 - root - INFO - exp_dir ./results/tec_256
2024-02-19 23:16:27,267 - root - INFO - train_data_path ./train
2024-02-19 23:16:27,267 - root - INFO - valid_data_path ./test
2024-02-19 23:16:27,267 - root - INFO - inf_data_path ./out_of_sample
2024-02-19 23:16:27,267 - root - INFO - time_means_path ./time_means.npy
2024-02-19 23:16:27,267 - root - INFO - global_means_path ./global_means.npy
2024-02-19 23:16:27,267 - root - INFO - global_stds_path ./global_stds.npy
2024-02-19 23:16:27,267 - root - INFO - loss l2
2024-02-19 23:16:27,267 - root - INFO - retrain False
2024-02-19 23:16:27,267 - root - INFO - num_data_workers 4
2024-02-19 23:16:27,267 - root - INFO - dt 1
2024-02-19 23:16:27,267 - root - INFO - n_history 0
2024-02-19 23:16:27,267 - root - INFO - prediction_type iterative
2024-02-19 23:16:27,267 - root - INFO - n_initial_conditions 5
2024-02-19 23:16:27,267 - root - INFO - ics_type default
2024-02-19 23:16:27,267 - root - INFO - save_raw_forecasts True
2024-02-19 23:16:27,267 - root - INFO - save_channel False
2024-02-19 23:16:27,267 - root - INFO - masked_acc False
2024-02-19 23:16:27,267 - root - INFO - maskpath None
2024-02-19 23:16:27,267 - root - INFO - perturb False
2024-02-19 23:16:27,267 - root - INFO - add_grid False
2024-02-19 23:16:27,267 - root - INFO - N_grid_channels 0
2024-02-19 23:16:27,267 - root - INFO - gridtype sinusoidal
2024-02-19 23:16:27,267 - root - INFO - roll False
2024-02-19 23:16:27,267 - root - INFO - num_blocks 8
2024-02-19 23:16:27,267 - root - INFO - nettype afno
2024-02-19 23:16:27,268 - root - INFO - width 56
2024-02-19 23:16:27,268 - root - INFO - modes 32
2024-02-19 23:16:27,268 - root - INFO - target default
2024-02-19 23:16:27,268 - root - INFO - normalization zscore
2024-02-19 23:16:27,268 - root - INFO - log_to_screen True
2024-02-19 23:16:27,268 - root - INFO - save_checkpoint True
2024-02-19 23:16:27,268 - root - INFO - enable_nhwc False
2024-02-19 23:16:27,268 - root - INFO - optimizer_type FusedAdam
2024-02-19 23:16:27,268 - root - INFO - crop_size_x None
2024-02-19 23:16:27,268 - root - INFO - crop_size_y None
2024-02-19 23:16:27,268 - root - INFO - two_step_training False
2024-02-19 23:16:27,268 - root - INFO - plot_animations False
2024-02-19 23:16:27,268 - root - INFO - add_noise False
2024-02-19 23:16:27,268 - root - INFO - noise_std 0
2024-02-19 23:16:27,268 - root - INFO - epsilon_factor 0
2024-02-19 23:16:27,268 - root - INFO - world_size 1
2024-02-19 23:16:27,268 - root - INFO - experiment_dir /home/cxt/work/fourcastnet_TEC/FourCastNetTEC/data_ljkj/results/tec_256/afno_backbone_ljkj/d6p4
2024-02-19 23:16:27,268 - root - INFO - checkpoint_path ./results/tec_256/afno_backbone_ljkj/d6p4/training_checkpoints/ckpt.tar
2024-02-19 23:16:27,268 - root - INFO - best_checkpoint_path ./results/tec_256/afno_backbone_ljkj/d6p4/training_checkpoints/best_ckpt.tar
2024-02-19 23:16:27,268 - root - INFO - resuming False
2024-02-19 23:16:27,268 - root - INFO - local_rank 0
2024-02-19 23:16:27,268 - root - INFO - enable_amp True
2024-02-19 23:16:27,268 - root - INFO - name afno_backbone_ljkj_d6p4
2024-02-19 23:16:27,268 - root - INFO - group era5_precipafno_backbone_ljkj
2024-02-19 23:16:27,268 - root - INFO - project ERA5_precip
2024-02-19 23:16:27,268 - root - INFO - entity flowgan
2024-02-19 23:16:27,268 - root - INFO - ---------------------------------------------------
2024-02-19 23:16:27,272 - root - INFO - rank 0, begin data loader init
2024-02-19 23:16:27,273 - root - INFO - Getting file stats from ./train/2010.h5
2024-02-19 23:16:27,274 - root - INFO - Number of samples per year: 2360
2024-02-19 23:16:27,274 - root - INFO - Found data at path ./train. Number of examples: 2360. Image Shape: 192 x 288 x 30
2024-02-19 23:16:27,274 - root - INFO - Delta t: 6 hours
2024-02-19 23:16:27,274 - root - INFO - Including 0 hours of past history in training at a frequency of 6 hours
2024-02-19 23:16:27,274 - root - INFO - Getting file stats from ./test/2010.h5
2024-02-19 23:16:27,274 - root - INFO - Number of samples per year: 472
2024-02-19 23:16:27,274 - root - INFO - Found data at path ./test. Number of examples: 472. Image Shape: 192 x 288 x 30
2024-02-19 23:16:27,274 - root - INFO - Delta t: 6 hours
2024-02-19 23:16:27,275 - root - INFO - Including 0 hours of past history in training at a frequency of 6 hours
2024-02-19 23:16:27,275 - root - INFO - rank 0, data loader initialized
2024-02-19 23:16:27,740 - root - INFO - Number of trainable model parameters: 33534720
2024-02-19 23:16:27,740 - root - INFO - Starting Training Loop...
2024-02-19 23:17:30,667 - root - INFO - Time taken for epoch 1 is 62.926409006118774 sec
2024-02-19 23:17:30,667 - root - INFO - Train loss: 0.45115166902542114. Valid loss: 0.4281286299228668
2024-02-19 23:17:30,730 - root - INFO - Memory Used: 4297.0 MB, GPU.UUID: GPU-dead2cd9-3e2a-4455-2129-2efe429a641d
2024-02-19 23:18:34,538 - root - INFO - Time taken for epoch 2 is 63.80713081359863 sec
2024-02-19 23:18:34,538 - root - INFO - Train loss: 0.4192982316017151. Valid loss: 0.38309821486473083
2024-02-19 23:18:34,570 - root - INFO - Memory Used: 8728.0 MB, GPU.UUID: GPU-dead2cd9-3e2a-4455-2129-2efe429a641d
2024-02-19 23:19:37,158 - root - INFO - Time taken for epoch 3 is 62.587475538253784 sec
2024-02-19 23:19:37,159 - root - INFO - Train loss: 0.3974052965641022. Valid loss: 0.36405760049819946
2024-02-19 23:19:37,198 - root - INFO - Memory Used: 8760.0 MB, GPU.UUID: GPU-dead2cd9-3e2a-4455-2129-2efe429a641d
......@@ -5,7 +5,7 @@
#SBATCH -N 1 -n 1 -c 8 --gres=gpu:a100:1 -p GPU-8A100 --qos=gpu_8a100
config_file=./AFNO.yaml
config='afno_backbone_ljkj'
config='afno_backbone_ustc'
run_num='d6p4'
export HDF5_USE_FILE_LOCKING=FALSE
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment