| { | |
| "_class_name": "CausalVAEModel", | |
| "_diffusers_version": "0.27.2", | |
| "attn_resolutions": [], | |
| "decoder_attention": "AttnBlock3D", | |
| "decoder_conv_in": "CausalConv3d", | |
| "decoder_conv_out": "CausalConv3d", | |
| "decoder_mid_resnet": "ResnetBlock3D", | |
| "decoder_resnet_blocks": [ | |
| "ResnetBlock3D", | |
| "ResnetBlock3D", | |
| "ResnetBlock3D", | |
| "ResnetBlock3D" | |
| ], | |
| "decoder_spatial_upsample": [ | |
| "", | |
| "SpatialUpsample2x", | |
| "SpatialUpsample2x", | |
| "SpatialUpsample2x" | |
| ], | |
| "decoder_temporal_upsample": [ | |
| "", | |
| "", | |
| "TimeUpsample2x", | |
| "TimeUpsample2x" | |
| ], | |
| "double_z": true, | |
| "dropout": 0.0, | |
| "embed_dim": 4, | |
| "encoder_attention": "AttnBlock3D", | |
| "encoder_conv_in": "CausalConv3d", | |
| "encoder_conv_out": "CausalConv3d", | |
| "encoder_mid_resnet": "ResnetBlock3D", | |
| "encoder_resnet_blocks": [ | |
| "ResnetBlock3D", | |
| "ResnetBlock3D", | |
| "ResnetBlock3D", | |
| "ResnetBlock3D" | |
| ], | |
| "encoder_spatial_downsample": [ | |
| "SpatialDownsample2x", | |
| "SpatialDownsample2x", | |
| "SpatialDownsample2x", | |
| "" | |
| ], | |
| "encoder_temporal_downsample": [ | |
| "TimeDownsample2x", | |
| "TimeDownsample2x", | |
| "", | |
| "" | |
| ], | |
| "hidden_size": 128, | |
| "hidden_size_mult": [ | |
| 1, | |
| 2, | |
| 4, | |
| 4 | |
| ], | |
| "loss_params": { | |
| "disc_start": 2001, | |
| "disc_weight": 0.5, | |
| "kl_weight": 1e-06, | |
| "logvar_init": 0.0 | |
| }, | |
| "loss_type": "opensora.models.ae.videobase.losses.LPIPSWithDiscriminator", | |
| "lr": 1e-05, | |
| "num_res_blocks": 2, | |
| "q_conv": "CausalConv3d", | |
| "resolution": 256, | |
| "z_channels": 4 | |
| } |