Skip to content
Snippets Groups Projects
Commit 1fa9d270 authored by xuetao chen's avatar xuetao chen
Browse files

update

parent 7b87da92
No related branches found
No related tags found
No related merge requests found
log_to_wandb: 'False'
lr: '0.0005'
batch_size: '8'
patch_size: '4'
depth: '6'
img_size: '[192, 288]'
max_epochs: '1500'
scheduler: CosineAnnealingLR
in_channels: '[ 0 1 2 3 4 5 6 7 8 9 10 11 12]'
out_channels: '[ 0 1 2 3 4 5 6 7 8 9 10 11 12]'
prediction_length: '100'
orography: 'False'
orography_path: None
exp_dir: ./results/tec_256
train_data_path: ./train
valid_data_path: ./test
inf_data_path: ./out_of_sample
time_means_path: ./time_means.npy
global_means_path: ./global_means.npy
global_stds_path: ./global_stds.npy
loss: l2
num_data_workers: '4'
dt: '1'
n_history: '0'
prediction_type: iterative
n_initial_conditions: '5'
ics_type: default
save_raw_forecasts: 'True'
save_channel: 'False'
masked_acc: 'False'
maskpath: None
perturb: 'False'
add_grid: 'False'
N_grid_channels: '0'
gridtype: sinusoidal
roll: 'False'
num_blocks: '8'
nettype: afno
width: '56'
modes: '32'
target: default
normalization: zscore
log_to_screen: 'True'
save_checkpoint: 'True'
enable_nhwc: 'False'
optimizer_type: FusedAdam
crop_size_x: None
crop_size_y: None
two_step_training: 'False'
plot_animations: 'False'
add_noise: 'False'
noise_std: '0'
epsilon_factor: '0'
world_size: '1'
experiment_dir: /home/ess/cxt/work/FourCastNetTEC/data_ljkj/results/tec_256/afno_backbone_ljkj/1
checkpoint_path: ./results/tec_256/afno_backbone_ljkj/1/training_checkpoints/ckpt.tar
best_checkpoint_path: ./results/tec_256/afno_backbone_ljkj/1/training_checkpoints/best_ckpt.tar
resuming: 'False'
local_rank: '0'
enable_amp: 'True'
name: afno_backbone_ljkj_1
group: era5_precipafno_backbone_ljkj
project: ERA5_precip
entity: flowgan
N_in_channels: '13'
N_out_channels: '13'
2024-02-19 21:24:29,723 - root - INFO - --------------- Versions ---------------
2024-02-19 21:24:29,749 - root - INFO - git branch: b'* master'
2024-02-19 21:24:29,767 - root - INFO - git hash: b'7b87da9222c1ec166fb9b9804e38288691d68bf4'
2024-02-19 21:24:29,767 - root - INFO - Torch: 1.13.1+cu117
2024-02-19 21:24:29,767 - root - INFO - ----------------------------------------
2024-02-19 21:24:29,767 - root - INFO - ------------------ Configuration ------------------
2024-02-19 21:24:29,767 - root - INFO - Configuration file: /home/ess/cxt/work/FourCastNetTEC/data_ljkj/AFNO.yaml
2024-02-19 21:24:29,767 - root - INFO - Configuration name: afno_backbone_ljkj
2024-02-19 21:24:29,767 - root - INFO - log_to_wandb False
2024-02-19 21:24:29,767 - root - INFO - lr 0.0005
2024-02-19 21:24:29,767 - root - INFO - batch_size 8
2024-02-19 21:24:29,767 - root - INFO - patch_size 4
2024-02-19 21:24:29,767 - root - INFO - depth 6
2024-02-19 21:24:29,767 - root - INFO - img_size [192, 288]
2024-02-19 21:24:29,767 - root - INFO - max_epochs 1500
2024-02-19 21:24:29,767 - root - INFO - scheduler CosineAnnealingLR
2024-02-19 21:24:29,767 - root - INFO - in_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
2024-02-19 21:24:29,768 - root - INFO - out_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
2024-02-19 21:24:29,768 - root - INFO - prediction_length 100
2024-02-19 21:24:29,768 - root - INFO - orography False
2024-02-19 21:24:29,768 - root - INFO - orography_path None
2024-02-19 21:24:29,768 - root - INFO - exp_dir ./results/tec_256
2024-02-19 21:24:29,768 - root - INFO - train_data_path ./train
2024-02-19 21:24:29,768 - root - INFO - valid_data_path ./test
2024-02-19 21:24:29,768 - root - INFO - inf_data_path ./out_of_sample
2024-02-19 21:24:29,768 - root - INFO - time_means_path ./time_means.npy
2024-02-19 21:24:29,768 - root - INFO - global_means_path ./global_means.npy
2024-02-19 21:24:29,768 - root - INFO - global_stds_path ./global_stds.npy
2024-02-19 21:24:29,768 - root - INFO - loss l2
2024-02-19 21:24:29,768 - root - INFO - num_data_workers 4
2024-02-19 21:24:29,768 - root - INFO - dt 1
2024-02-19 21:24:29,768 - root - INFO - n_history 0
2024-02-19 21:24:29,768 - root - INFO - prediction_type iterative
2024-02-19 21:24:29,768 - root - INFO - n_initial_conditions 5
2024-02-19 21:24:29,768 - root - INFO - ics_type default
2024-02-19 21:24:29,768 - root - INFO - save_raw_forecasts True
2024-02-19 21:24:29,768 - root - INFO - save_channel False
2024-02-19 21:24:29,768 - root - INFO - masked_acc False
2024-02-19 21:24:29,768 - root - INFO - maskpath None
2024-02-19 21:24:29,769 - root - INFO - perturb False
2024-02-19 21:24:29,769 - root - INFO - add_grid False
2024-02-19 21:24:29,769 - root - INFO - N_grid_channels 0
2024-02-19 21:24:29,769 - root - INFO - gridtype sinusoidal
2024-02-19 21:24:29,769 - root - INFO - roll False
2024-02-19 21:24:29,769 - root - INFO - num_blocks 8
2024-02-19 21:24:29,769 - root - INFO - nettype afno
2024-02-19 21:24:29,769 - root - INFO - width 56
2024-02-19 21:24:29,769 - root - INFO - modes 32
2024-02-19 21:24:29,769 - root - INFO - target default
2024-02-19 21:24:29,769 - root - INFO - normalization zscore
2024-02-19 21:24:29,769 - root - INFO - log_to_screen True
2024-02-19 21:24:29,769 - root - INFO - save_checkpoint True
2024-02-19 21:24:29,769 - root - INFO - enable_nhwc False
2024-02-19 21:24:29,769 - root - INFO - optimizer_type FusedAdam
2024-02-19 21:24:29,769 - root - INFO - crop_size_x None
2024-02-19 21:24:29,769 - root - INFO - crop_size_y None
2024-02-19 21:24:29,769 - root - INFO - two_step_training False
2024-02-19 21:24:29,769 - root - INFO - plot_animations False
2024-02-19 21:24:29,769 - root - INFO - add_noise False
2024-02-19 21:24:29,769 - root - INFO - noise_std 0
2024-02-19 21:24:29,770 - root - INFO - epsilon_factor 0
2024-02-19 21:24:29,770 - root - INFO - world_size 1
2024-02-19 21:24:29,770 - root - INFO - experiment_dir /home/ess/cxt/work/FourCastNetTEC/data_ljkj/results/tec_256/afno_backbone_ljkj/1
2024-02-19 21:24:29,771 - root - INFO - checkpoint_path ./results/tec_256/afno_backbone_ljkj/1/training_checkpoints/ckpt.tar
2024-02-19 21:24:29,771 - root - INFO - best_checkpoint_path ./results/tec_256/afno_backbone_ljkj/1/training_checkpoints/best_ckpt.tar
2024-02-19 21:24:29,771 - root - INFO - resuming False
2024-02-19 21:24:29,771 - root - INFO - local_rank 0
2024-02-19 21:24:29,771 - root - INFO - enable_amp True
2024-02-19 21:24:29,771 - root - INFO - name afno_backbone_ljkj_1
2024-02-19 21:24:29,771 - root - INFO - group era5_precipafno_backbone_ljkj
2024-02-19 21:24:29,771 - root - INFO - project ERA5_precip
2024-02-19 21:24:29,771 - root - INFO - entity flowgan
2024-02-19 21:24:29,772 - root - INFO - ---------------------------------------------------
2024-02-19 21:24:29,782 - root - INFO - rank 0, begin data loader init
2024-02-19 21:24:47,996 - root - INFO - --------------- Versions ---------------
2024-02-19 21:24:48,022 - root - INFO - git branch: b'* master'
2024-02-19 21:24:48,038 - root - INFO - git hash: b'7b87da9222c1ec166fb9b9804e38288691d68bf4'
2024-02-19 21:24:48,038 - root - INFO - Torch: 1.13.1+cu117
2024-02-19 21:24:48,038 - root - INFO - ----------------------------------------
2024-02-19 21:24:48,038 - root - INFO - ------------------ Configuration ------------------
2024-02-19 21:24:48,038 - root - INFO - Configuration file: /home/ess/cxt/work/FourCastNetTEC/data_ljkj/AFNO.yaml
2024-02-19 21:24:48,038 - root - INFO - Configuration name: afno_backbone_ljkj
2024-02-19 21:24:48,038 - root - INFO - log_to_wandb False
2024-02-19 21:24:48,038 - root - INFO - lr 0.0005
2024-02-19 21:24:48,038 - root - INFO - batch_size 8
2024-02-19 21:24:48,038 - root - INFO - patch_size 4
2024-02-19 21:24:48,038 - root - INFO - depth 6
2024-02-19 21:24:48,038 - root - INFO - img_size [192, 288]
2024-02-19 21:24:48,038 - root - INFO - max_epochs 1500
2024-02-19 21:24:48,038 - root - INFO - scheduler CosineAnnealingLR
2024-02-19 21:24:48,038 - root - INFO - in_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
2024-02-19 21:24:48,038 - root - INFO - out_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
2024-02-19 21:24:48,039 - root - INFO - prediction_length 100
2024-02-19 21:24:48,039 - root - INFO - orography False
2024-02-19 21:24:48,039 - root - INFO - orography_path None
2024-02-19 21:24:48,039 - root - INFO - exp_dir ./results/tec_256
2024-02-19 21:24:48,039 - root - INFO - train_data_path ./train
2024-02-19 21:24:48,039 - root - INFO - valid_data_path ./test
2024-02-19 21:24:48,039 - root - INFO - inf_data_path ./out_of_sample
2024-02-19 21:24:48,039 - root - INFO - time_means_path ./time_means.npy
2024-02-19 21:24:48,039 - root - INFO - global_means_path ./global_means.npy
2024-02-19 21:24:48,039 - root - INFO - global_stds_path ./global_stds.npy
2024-02-19 21:24:48,039 - root - INFO - loss l2
2024-02-19 21:24:48,039 - root - INFO - num_data_workers 4
2024-02-19 21:24:48,039 - root - INFO - dt 1
2024-02-19 21:24:48,039 - root - INFO - n_history 0
2024-02-19 21:24:48,039 - root - INFO - prediction_type iterative
2024-02-19 21:24:48,039 - root - INFO - n_initial_conditions 5
2024-02-19 21:24:48,039 - root - INFO - ics_type default
2024-02-19 21:24:48,039 - root - INFO - save_raw_forecasts True
2024-02-19 21:24:48,039 - root - INFO - save_channel False
2024-02-19 21:24:48,039 - root - INFO - masked_acc False
2024-02-19 21:24:48,039 - root - INFO - maskpath None
2024-02-19 21:24:48,039 - root - INFO - perturb False
2024-02-19 21:24:48,039 - root - INFO - add_grid False
2024-02-19 21:24:48,039 - root - INFO - N_grid_channels 0
2024-02-19 21:24:48,039 - root - INFO - gridtype sinusoidal
2024-02-19 21:24:48,039 - root - INFO - roll False
2024-02-19 21:24:48,039 - root - INFO - num_blocks 8
2024-02-19 21:24:48,039 - root - INFO - nettype afno
2024-02-19 21:24:48,039 - root - INFO - width 56
2024-02-19 21:24:48,039 - root - INFO - modes 32
2024-02-19 21:24:48,039 - root - INFO - target default
2024-02-19 21:24:48,039 - root - INFO - normalization zscore
2024-02-19 21:24:48,039 - root - INFO - log_to_screen True
2024-02-19 21:24:48,039 - root - INFO - save_checkpoint True
2024-02-19 21:24:48,039 - root - INFO - enable_nhwc False
2024-02-19 21:24:48,040 - root - INFO - optimizer_type FusedAdam
2024-02-19 21:24:48,040 - root - INFO - crop_size_x None
2024-02-19 21:24:48,040 - root - INFO - crop_size_y None
2024-02-19 21:24:48,040 - root - INFO - two_step_training False
2024-02-19 21:24:48,040 - root - INFO - plot_animations False
2024-02-19 21:24:48,040 - root - INFO - add_noise False
2024-02-19 21:24:48,040 - root - INFO - noise_std 0
2024-02-19 21:24:48,040 - root - INFO - epsilon_factor 0
2024-02-19 21:24:48,040 - root - INFO - world_size 1
2024-02-19 21:24:48,040 - root - INFO - experiment_dir /home/ess/cxt/work/FourCastNetTEC/data_ljkj/results/tec_256/afno_backbone_ljkj/1
2024-02-19 21:24:48,040 - root - INFO - checkpoint_path ./results/tec_256/afno_backbone_ljkj/1/training_checkpoints/ckpt.tar
2024-02-19 21:24:48,040 - root - INFO - best_checkpoint_path ./results/tec_256/afno_backbone_ljkj/1/training_checkpoints/best_ckpt.tar
2024-02-19 21:24:48,040 - root - INFO - resuming False
2024-02-19 21:24:48,040 - root - INFO - local_rank 0
2024-02-19 21:24:48,040 - root - INFO - enable_amp True
2024-02-19 21:24:48,040 - root - INFO - name afno_backbone_ljkj_1
2024-02-19 21:24:48,040 - root - INFO - group era5_precipafno_backbone_ljkj
2024-02-19 21:24:48,040 - root - INFO - project ERA5_precip
2024-02-19 21:24:48,040 - root - INFO - entity flowgan
2024-02-19 21:24:48,040 - root - INFO - ---------------------------------------------------
2024-02-19 21:24:48,047 - root - INFO - rank 0, begin data loader init
log_to_wandb: 'False'
lr: '0.0005'
batch_size: '8'
patch_size: '4'
depth: '6'
img_size: '[192, 288]'
max_epochs: '1500'
scheduler: CosineAnnealingLR
in_channels: '[ 0 1 2 3 4 5 6 7 8 9 10 11 12]'
out_channels: '[ 0 1 2 3 4 5 6 7 8 9 10 11 12]'
prediction_length: '100'
orography: 'False'
orography_path: None
exp_dir: ./results/tec_256
train_data_path: ./train
valid_data_path: ./test
inf_data_path: ./out_of_sample
time_means_path: ./time_means.npy
global_means_path: ./global_means.npy
global_stds_path: ./global_stds.npy
loss: l2
num_data_workers: '4'
dt: '1'
n_history: '0'
prediction_type: iterative
n_initial_conditions: '5'
ics_type: default
save_raw_forecasts: 'True'
save_channel: 'False'
masked_acc: 'False'
maskpath: None
perturb: 'False'
add_grid: 'False'
N_grid_channels: '0'
gridtype: sinusoidal
roll: 'False'
num_blocks: '8'
nettype: afno
width: '56'
modes: '32'
target: default
normalization: zscore
log_to_screen: 'True'
save_checkpoint: 'True'
enable_nhwc: 'False'
optimizer_type: FusedAdam
crop_size_x: None
crop_size_y: None
two_step_training: 'False'
plot_animations: 'False'
add_noise: 'False'
noise_std: '0'
epsilon_factor: '0'
world_size: '1'
experiment_dir: /home/ess/cxt/work/FourCastNetTEC/data_ljkj/results/tec_256/afno_backbone_ljkj/d6p2
checkpoint_path: ./results/tec_256/afno_backbone_ljkj/d6p2/training_checkpoints/ckpt.tar
best_checkpoint_path: ./results/tec_256/afno_backbone_ljkj/d6p2/training_checkpoints/best_ckpt.tar
resuming: 'False'
local_rank: '0'
enable_amp: 'True'
name: afno_backbone_ljkj_d6p2
group: era5_precipafno_backbone_ljkj
project: ERA5_precip
entity: flowgan
N_in_channels: '13'
N_out_channels: '13'
2024-02-19 21:39:12,490 - root - INFO - --------------- Versions ---------------
2024-02-19 21:39:12,518 - root - INFO - git branch: b'* master'
2024-02-19 21:39:12,538 - root - INFO - git hash: b'7b87da9222c1ec166fb9b9804e38288691d68bf4'
2024-02-19 21:39:12,538 - root - INFO - Torch: 1.13.1+cu117
2024-02-19 21:39:12,538 - root - INFO - ----------------------------------------
2024-02-19 21:39:12,538 - root - INFO - ------------------ Configuration ------------------
2024-02-19 21:39:12,538 - root - INFO - Configuration file: /home/ess/cxt/work/FourCastNetTEC/data_ljkj/AFNO.yaml
2024-02-19 21:39:12,538 - root - INFO - Configuration name: afno_backbone_ljkj
2024-02-19 21:39:12,538 - root - INFO - log_to_wandb False
2024-02-19 21:39:12,538 - root - INFO - lr 0.0005
2024-02-19 21:39:12,538 - root - INFO - batch_size 8
2024-02-19 21:39:12,538 - root - INFO - patch_size 4
2024-02-19 21:39:12,538 - root - INFO - depth 6
2024-02-19 21:39:12,538 - root - INFO - img_size [192, 288]
2024-02-19 21:39:12,538 - root - INFO - max_epochs 1500
2024-02-19 21:39:12,538 - root - INFO - scheduler CosineAnnealingLR
2024-02-19 21:39:12,538 - root - INFO - in_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
2024-02-19 21:39:12,538 - root - INFO - out_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
2024-02-19 21:39:12,538 - root - INFO - prediction_length 100
2024-02-19 21:39:12,539 - root - INFO - orography False
2024-02-19 21:39:12,539 - root - INFO - orography_path None
2024-02-19 21:39:12,539 - root - INFO - exp_dir ./results/tec_256
2024-02-19 21:39:12,539 - root - INFO - train_data_path ./train
2024-02-19 21:39:12,539 - root - INFO - valid_data_path ./test
2024-02-19 21:39:12,539 - root - INFO - inf_data_path ./out_of_sample
2024-02-19 21:39:12,539 - root - INFO - time_means_path ./time_means.npy
2024-02-19 21:39:12,539 - root - INFO - global_means_path ./global_means.npy
2024-02-19 21:39:12,539 - root - INFO - global_stds_path ./global_stds.npy
2024-02-19 21:39:12,539 - root - INFO - loss l2
2024-02-19 21:39:12,539 - root - INFO - num_data_workers 4
2024-02-19 21:39:12,539 - root - INFO - dt 1
2024-02-19 21:39:12,539 - root - INFO - n_history 0
2024-02-19 21:39:12,539 - root - INFO - prediction_type iterative
2024-02-19 21:39:12,539 - root - INFO - n_initial_conditions 5
2024-02-19 21:39:12,539 - root - INFO - ics_type default
2024-02-19 21:39:12,539 - root - INFO - save_raw_forecasts True
2024-02-19 21:39:12,539 - root - INFO - save_channel False
2024-02-19 21:39:12,539 - root - INFO - masked_acc False
2024-02-19 21:39:12,539 - root - INFO - maskpath None
2024-02-19 21:39:12,539 - root - INFO - perturb False
2024-02-19 21:39:12,539 - root - INFO - add_grid False
2024-02-19 21:39:12,539 - root - INFO - N_grid_channels 0
2024-02-19 21:39:12,539 - root - INFO - gridtype sinusoidal
2024-02-19 21:39:12,539 - root - INFO - roll False
2024-02-19 21:39:12,539 - root - INFO - num_blocks 8
2024-02-19 21:39:12,539 - root - INFO - nettype afno
2024-02-19 21:39:12,539 - root - INFO - width 56
2024-02-19 21:39:12,539 - root - INFO - modes 32
2024-02-19 21:39:12,539 - root - INFO - target default
2024-02-19 21:39:12,539 - root - INFO - normalization zscore
2024-02-19 21:39:12,539 - root - INFO - log_to_screen True
2024-02-19 21:39:12,539 - root - INFO - save_checkpoint True
2024-02-19 21:39:12,539 - root - INFO - enable_nhwc False
2024-02-19 21:39:12,540 - root - INFO - optimizer_type FusedAdam
2024-02-19 21:39:12,540 - root - INFO - crop_size_x None
2024-02-19 21:39:12,540 - root - INFO - crop_size_y None
2024-02-19 21:39:12,540 - root - INFO - two_step_training False
2024-02-19 21:39:12,540 - root - INFO - plot_animations False
2024-02-19 21:39:12,540 - root - INFO - add_noise False
2024-02-19 21:39:12,540 - root - INFO - noise_std 0
2024-02-19 21:39:12,540 - root - INFO - epsilon_factor 0
2024-02-19 21:39:12,540 - root - INFO - world_size 1
2024-02-19 21:39:12,540 - root - INFO - experiment_dir /home/ess/cxt/work/FourCastNetTEC/data_ljkj/results/tec_256/afno_backbone_ljkj/d6p2
2024-02-19 21:39:12,541 - root - INFO - checkpoint_path ./results/tec_256/afno_backbone_ljkj/d6p2/training_checkpoints/ckpt.tar
2024-02-19 21:39:12,541 - root - INFO - best_checkpoint_path ./results/tec_256/afno_backbone_ljkj/d6p2/training_checkpoints/best_ckpt.tar
2024-02-19 21:39:12,542 - root - INFO - resuming False
2024-02-19 21:39:12,542 - root - INFO - local_rank 0
2024-02-19 21:39:12,542 - root - INFO - enable_amp True
2024-02-19 21:39:12,542 - root - INFO - name afno_backbone_ljkj_d6p2
2024-02-19 21:39:12,542 - root - INFO - group era5_precipafno_backbone_ljkj
2024-02-19 21:39:12,542 - root - INFO - project ERA5_precip
2024-02-19 21:39:12,542 - root - INFO - entity flowgan
2024-02-19 21:39:12,542 - root - INFO - ---------------------------------------------------
2024-02-19 21:39:12,549 - root - INFO - rank 0, begin data loader init
2024-02-19 21:41:26,053 - root - INFO - --------------- Versions ---------------
2024-02-19 21:41:26,076 - root - INFO - git branch: b'* master'
2024-02-19 21:41:26,086 - root - INFO - git hash: b'7b87da9222c1ec166fb9b9804e38288691d68bf4'
2024-02-19 21:41:26,087 - root - INFO - Torch: 1.13.1+cu117
2024-02-19 21:41:26,087 - root - INFO - ----------------------------------------
2024-02-19 21:41:26,087 - root - INFO - ------------------ Configuration ------------------
2024-02-19 21:41:26,087 - root - INFO - Configuration file: /home/ess/cxt/work/FourCastNetTEC/data_ljkj/AFNO.yaml
2024-02-19 21:41:26,087 - root - INFO - Configuration name: afno_backbone_ljkj
2024-02-19 21:41:26,087 - root - INFO - log_to_wandb False
2024-02-19 21:41:26,087 - root - INFO - lr 0.0005
2024-02-19 21:41:26,087 - root - INFO - batch_size 8
2024-02-19 21:41:26,088 - root - INFO - patch_size 4
2024-02-19 21:41:26,088 - root - INFO - depth 6
2024-02-19 21:41:26,088 - root - INFO - img_size [192, 288]
2024-02-19 21:41:26,088 - root - INFO - max_epochs 1500
2024-02-19 21:41:26,088 - root - INFO - scheduler CosineAnnealingLR
2024-02-19 21:41:26,088 - root - INFO - in_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
2024-02-19 21:41:26,088 - root - INFO - out_channels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
2024-02-19 21:41:26,088 - root - INFO - prediction_length 100
2024-02-19 21:41:26,088 - root - INFO - orography False
2024-02-19 21:41:26,089 - root - INFO - orography_path None
2024-02-19 21:41:26,089 - root - INFO - exp_dir ./results/tec_256
2024-02-19 21:41:26,089 - root - INFO - train_data_path ./train
2024-02-19 21:41:26,089 - root - INFO - valid_data_path ./test
2024-02-19 21:41:26,089 - root - INFO - inf_data_path ./out_of_sample
2024-02-19 21:41:26,089 - root - INFO - time_means_path ./time_means.npy
2024-02-19 21:41:26,089 - root - INFO - global_means_path ./global_means.npy
2024-02-19 21:41:26,089 - root - INFO - global_stds_path ./global_stds.npy
2024-02-19 21:41:26,089 - root - INFO - loss l2
2024-02-19 21:41:26,089 - root - INFO - num_data_workers 4
2024-02-19 21:41:26,090 - root - INFO - dt 1
2024-02-19 21:41:26,090 - root - INFO - n_history 0
2024-02-19 21:41:26,090 - root - INFO - prediction_type iterative
2024-02-19 21:41:26,090 - root - INFO - n_initial_conditions 5
2024-02-19 21:41:26,090 - root - INFO - ics_type default
2024-02-19 21:41:26,090 - root - INFO - save_raw_forecasts True
2024-02-19 21:41:26,090 - root - INFO - save_channel False
2024-02-19 21:41:26,090 - root - INFO - masked_acc False
2024-02-19 21:41:26,090 - root - INFO - maskpath None
2024-02-19 21:41:26,090 - root - INFO - perturb False
2024-02-19 21:41:26,091 - root - INFO - add_grid False
2024-02-19 21:41:26,091 - root - INFO - N_grid_channels 0
2024-02-19 21:41:26,091 - root - INFO - gridtype sinusoidal
2024-02-19 21:41:26,091 - root - INFO - roll False
2024-02-19 21:41:26,091 - root - INFO - num_blocks 8
2024-02-19 21:41:26,091 - root - INFO - nettype afno
2024-02-19 21:41:26,091 - root - INFO - width 56
2024-02-19 21:41:26,091 - root - INFO - modes 32
2024-02-19 21:41:26,091 - root - INFO - target default
2024-02-19 21:41:26,091 - root - INFO - normalization zscore
2024-02-19 21:41:26,092 - root - INFO - log_to_screen True
2024-02-19 21:41:26,092 - root - INFO - save_checkpoint True
2024-02-19 21:41:26,092 - root - INFO - enable_nhwc False
2024-02-19 21:41:26,092 - root - INFO - optimizer_type FusedAdam
2024-02-19 21:41:26,092 - root - INFO - crop_size_x None
2024-02-19 21:41:26,092 - root - INFO - crop_size_y None
2024-02-19 21:41:26,092 - root - INFO - two_step_training False
2024-02-19 21:41:26,093 - root - INFO - plot_animations False
2024-02-19 21:41:26,093 - root - INFO - add_noise False
2024-02-19 21:41:26,093 - root - INFO - noise_std 0
2024-02-19 21:41:26,093 - root - INFO - epsilon_factor 0
2024-02-19 21:41:26,093 - root - INFO - world_size 1
2024-02-19 21:41:26,093 - root - INFO - experiment_dir /home/ess/cxt/work/FourCastNetTEC/data_ljkj/results/tec_256/afno_backbone_ljkj/d6p2
2024-02-19 21:41:26,093 - root - INFO - checkpoint_path ./results/tec_256/afno_backbone_ljkj/d6p2/training_checkpoints/ckpt.tar
2024-02-19 21:41:26,093 - root - INFO - best_checkpoint_path ./results/tec_256/afno_backbone_ljkj/d6p2/training_checkpoints/best_ckpt.tar
2024-02-19 21:41:26,093 - root - INFO - resuming False
2024-02-19 21:41:26,094 - root - INFO - local_rank 0
2024-02-19 21:41:26,094 - root - INFO - enable_amp True
2024-02-19 21:41:26,094 - root - INFO - name afno_backbone_ljkj_d6p2
2024-02-19 21:41:26,094 - root - INFO - group era5_precipafno_backbone_ljkj
2024-02-19 21:41:26,094 - root - INFO - project ERA5_precip
2024-02-19 21:41:26,094 - root - INFO - entity flowgan
2024-02-19 21:41:26,094 - root - INFO - ---------------------------------------------------
2024-02-19 21:41:26,115 - root - INFO - rank 0, begin data loader init
python ../train.py --enable_amp --yaml_config=./config/AFNO.yaml --config=afno_backbone_tec_ustc --run_num=d6p2
\ No newline at end of file
python -m pdb ../train.py --enable_amp --yaml_config=./AFNO.yaml --config=afno_backbone_ljkj --run_num=d6p2
srun --nodes=1 --ntasks-per-node=2 --cpus-per-task=8 -p GPU-8A100 --time=1:00:00 --gres=gpu:a100:2 --qos=gpu_8a100 --pty bash
\ No newline at end of file
srun --nodes=1 --ntasks-per-node=1 --cpus-per-task=8 -p GPU-8A100 --time=1:00:00 --gres=gpu:a100:1 --qos=gpu_8a100 --pty bash
......@@ -16,6 +16,7 @@ export MASTER_ADDR=$(hostname)
set -x
srun -u --mpi=pmi2 \
bash -c "
source export_DDP_vars.sh
source /home/ess/cxt/miniconda3/etc/profile.d/conda.sh
conda activate pytorch
python ../train.py --enable_amp --yaml_config=$config_file --config=$config --run_num=$run_num
"
......@@ -533,3 +533,99 @@
2024-02-19 16:19:01,725 - root - INFO - Time taken for epoch 5 is 84.67461013793945 sec
2024-02-19 16:19:01,727 - root - INFO - Train loss: 0.2511962652206421. Valid loss: 0.3271612823009491
2024-02-19 16:19:01,910 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:20:26,026 - root - INFO - Time taken for epoch 6 is 84.11569499969482 sec
2024-02-19 16:20:26,028 - root - INFO - Train loss: 0.24897676706314087. Valid loss: 0.3151703476905823
2024-02-19 16:20:26,210 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:21:50,322 - root - INFO - Time taken for epoch 7 is 84.11205720901489 sec
2024-02-19 16:21:50,323 - root - INFO - Train loss: 0.24616846442222595. Valid loss: 0.31500449776649475
2024-02-19 16:21:50,490 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:23:15,131 - root - INFO - Time taken for epoch 8 is 84.64115118980408 sec
2024-02-19 16:23:15,132 - root - INFO - Train loss: 0.24504172801971436. Valid loss: 0.311320424079895
2024-02-19 16:23:15,307 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:24:40,720 - root - INFO - Time taken for epoch 9 is 85.41201877593994 sec
2024-02-19 16:24:40,721 - root - INFO - Train loss: 0.24273811280727386. Valid loss: 0.3063752055168152
2024-02-19 16:24:40,910 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:26:04,953 - root - INFO - Time taken for epoch 10 is 84.04293489456177 sec
2024-02-19 16:26:04,954 - root - INFO - Train loss: 0.24008522927761078. Valid loss: 0.3057232201099396
2024-02-19 16:26:05,128 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:27:29,383 - root - INFO - Time taken for epoch 11 is 84.2551281452179 sec
2024-02-19 16:27:29,384 - root - INFO - Train loss: 0.23983930051326752. Valid loss: 0.2953037917613983
2024-02-19 16:27:29,565 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:28:53,449 - root - INFO - Time taken for epoch 12 is 83.88315796852112 sec
2024-02-19 16:28:53,449 - root - INFO - Train loss: 0.23826853930950165. Valid loss: 0.2955130636692047
2024-02-19 16:28:53,628 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:30:17,479 - root - INFO - Time taken for epoch 13 is 83.85005640983582 sec
2024-02-19 16:30:17,479 - root - INFO - Train loss: 0.23320788145065308. Valid loss: 0.2985433340072632
2024-02-19 16:30:17,647 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:31:41,448 - root - INFO - Time taken for epoch 14 is 83.80134725570679 sec
2024-02-19 16:31:41,449 - root - INFO - Train loss: 0.2289363145828247. Valid loss: 0.3279796540737152
2024-02-19 16:31:41,620 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:33:05,374 - root - INFO - Time taken for epoch 15 is 83.75351929664612 sec
2024-02-19 16:33:05,374 - root - INFO - Train loss: 0.22761228680610657. Valid loss: 0.38529449701309204
2024-02-19 16:33:05,558 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:34:29,532 - root - INFO - Time taken for epoch 16 is 83.97377586364746 sec
2024-02-19 16:34:29,532 - root - INFO - Train loss: 0.2269406020641327. Valid loss: 0.42874449491500854
2024-02-19 16:34:29,721 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:35:53,505 - root - INFO - Time taken for epoch 17 is 83.78326678276062 sec
2024-02-19 16:35:53,505 - root - INFO - Train loss: 0.2261020541191101. Valid loss: 0.44770216941833496
2024-02-19 16:35:53,675 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:37:17,498 - root - INFO - Time taken for epoch 18 is 83.82217478752136 sec
2024-02-19 16:37:17,498 - root - INFO - Train loss: 0.22515258193016052. Valid loss: 0.4511180520057678
2024-02-19 16:37:17,669 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:38:41,421 - root - INFO - Time taken for epoch 19 is 83.75144696235657 sec
2024-02-19 16:38:41,422 - root - INFO - Train loss: 0.2274629771709442. Valid loss: 0.4669801592826843
2024-02-19 16:38:41,592 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:40:05,420 - root - INFO - Time taken for epoch 20 is 83.82694387435913 sec
2024-02-19 16:40:05,421 - root - INFO - Train loss: 0.22646993398666382. Valid loss: 0.4684992730617523
2024-02-19 16:40:05,593 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:41:29,440 - root - INFO - Time taken for epoch 21 is 83.84612894058228 sec
2024-02-19 16:41:29,440 - root - INFO - Train loss: 0.2264901101589203. Valid loss: 0.48286527395248413
2024-02-19 16:41:29,608 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:42:53,365 - root - INFO - Time taken for epoch 22 is 83.75583982467651 sec
2024-02-19 16:42:53,365 - root - INFO - Train loss: 0.22445407509803772. Valid loss: 0.5032124519348145
2024-02-19 16:42:53,536 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:44:17,295 - root - INFO - Time taken for epoch 23 is 83.75787162780762 sec
2024-02-19 16:44:17,295 - root - INFO - Train loss: 0.22289973497390747. Valid loss: 0.50677090883255
2024-02-19 16:44:17,468 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:45:41,223 - root - INFO - Time taken for epoch 24 is 83.75482606887817 sec
2024-02-19 16:45:41,224 - root - INFO - Train loss: 0.22309765219688416. Valid loss: 0.5197709798812866
2024-02-19 16:45:41,398 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:47:05,316 - root - INFO - Time taken for epoch 25 is 83.91739010810852 sec
2024-02-19 16:47:05,317 - root - INFO - Train loss: 0.22330808639526367. Valid loss: 0.5301516652107239
2024-02-19 16:47:05,492 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:48:29,408 - root - INFO - Time taken for epoch 26 is 83.91606712341309 sec
2024-02-19 16:48:29,409 - root - INFO - Train loss: 0.22123104333877563. Valid loss: 0.5519839525222778
2024-02-19 16:48:29,577 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:49:53,432 - root - INFO - Time taken for epoch 27 is 83.85473537445068 sec
2024-02-19 16:49:53,433 - root - INFO - Train loss: 0.2199837565422058. Valid loss: 0.5518139600753784
2024-02-19 16:49:53,601 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:51:17,405 - root - INFO - Time taken for epoch 28 is 83.80416226387024 sec
2024-02-19 16:51:17,406 - root - INFO - Train loss: 0.21754133701324463. Valid loss: 0.5325676202774048
2024-02-19 16:51:17,586 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:52:41,477 - root - INFO - Time taken for epoch 29 is 83.89058804512024 sec
2024-02-19 16:52:41,478 - root - INFO - Train loss: 0.21527598798274994. Valid loss: 0.5180424451828003
2024-02-19 16:52:41,645 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:54:05,399 - root - INFO - Time taken for epoch 30 is 83.75387215614319 sec
2024-02-19 16:54:05,400 - root - INFO - Train loss: 0.2140357494354248. Valid loss: 0.4983566403388977
2024-02-19 16:54:05,564 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:55:29,365 - root - INFO - Time taken for epoch 31 is 83.80045366287231 sec
2024-02-19 16:55:29,365 - root - INFO - Train loss: 0.21105033159255981. Valid loss: 0.4925711750984192
2024-02-19 16:55:29,530 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:56:53,366 - root - INFO - Time taken for epoch 32 is 83.83543395996094 sec
2024-02-19 16:56:53,367 - root - INFO - Train loss: 0.21086212992668152. Valid loss: 0.4883137345314026
2024-02-19 16:56:53,535 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:58:17,352 - root - INFO - Time taken for epoch 33 is 83.8166515827179 sec
2024-02-19 16:58:17,353 - root - INFO - Train loss: 0.20764294266700745. Valid loss: 0.5009825229644775
2024-02-19 16:58:17,522 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 16:59:41,318 - root - INFO - Time taken for epoch 34 is 83.79647207260132 sec
2024-02-19 16:59:41,319 - root - INFO - Train loss: 0.2036314606666565. Valid loss: 0.4828004837036133
2024-02-19 16:59:41,488 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 17:01:05,307 - root - INFO - Time taken for epoch 35 is 83.81800055503845 sec
2024-02-19 17:01:05,308 - root - INFO - Train loss: 0.20334747433662415. Valid loss: 0.4227519631385803
2024-02-19 17:01:05,477 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 17:02:29,298 - root - INFO - Time taken for epoch 36 is 83.82022404670715 sec
2024-02-19 17:02:29,299 - root - INFO - Train loss: 0.2066626250743866. Valid loss: 0.38958901166915894
2024-02-19 17:02:29,474 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
2024-02-19 17:03:53,292 - root - INFO - Time taken for epoch 37 is 83.81771874427795 sec
2024-02-19 17:03:53,292 - root - INFO - Train loss: 0.20207679271697998. Valid loss: 0.4114855229854584
2024-02-19 17:03:53,464 - root - INFO - Memory Used: 16535.0 MB, GPU.UUID: GPU-1340a6ef-9d99-f4c3-82af-2cdd2452cbf6
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment