{
    "model": "tacotron2",
    "run_name": "coqui_tts",
    "run_description": "Training a luxembourgish TTS-model with the Marylux-648 dataset",
    "epochs": 1000,
    "batch_size": 10,
    "eval_batch_size": 8,
    "mixed_precision": false,
    "scheduler_after_epoch": false,
    "run_eval": true,
    "test_delay_epochs": -1,
    "print_eval": true,
    "dashboard_logger": "tensorboard",
    "print_step": 32,
    "plot_step": 100,
    "model_param_stats": false,
    "project_name": null,
    "log_model_step": null,
    "wandb_entity": null,
    "save_step": 4000,
    "checkpoint": true,
    "keep_all_best": false,
    "keep_after": 1000,
    "num_loader_workers": 1,
    "num_eval_loader_workers": 1,
    "use_noise_augment": false,
    "output_path": "/home/mbarnig/MB_TTS/TTS/recipes/ljspeech/tacotron2-DCA",
    "distributed_backend": "nccl",
    "distributed_url": "tcp://localhost:54321",
    "audio": {
        "fft_size": 1024,
        "win_length": 1024,
        "hop_length": 256,
        "frame_shift_ms": null,
        "frame_length_ms": null,
        "stft_pad_mode": "reflect",
        "sample_rate": 22050,
        "resample": false,
        "preemphasis": 0.98,
        "ref_level_db": 20,
        "do_sound_norm": false,
        "log_func": "np.log",
        "do_trim_silence": false,
        "trim_db": 60.0,
        "power": 1.5,
        "griffin_lim_iters": 60,
        "num_mels": 80,
        "mel_fmin": 0.0,
        "mel_fmax": 8000,
        "spec_gain": 1.0,
        "do_amp_to_db_linear": true,
        "do_amp_to_db_mel": true,
        "signal_norm": false,
        "min_level_db": -100,
        "symmetric_norm": true,
        "max_norm": 4.0,
        "clip_norm": true,
        "stats_path": "stats.npy"
    },
    "use_phonemes": false,
    "use_espeak_phonemes": true,
    "phoneme_language": "lb",
    "compute_input_seq_cache": false,
    "text_cleaner": "basic_cleaners",
    "enable_eos_bos_chars": false,
    "test_sentences_file": "",
    "phoneme_cache_path": "/home/mbarnig/MB_TTS/TTS/recipes/ljspeech/tacotron2-DCA/phoneme_cache",
    "characters": {
        "pad": "_",
        "eos": "~",
        "bos": "^",
        "characters": "Xabdefghijklmnopstuvwyz\u00e6\u0153\u0250\u0251\u0255\u0259\u025b\u025c\u026a\u0273\u0280\u0281\u0283\u028a\u0291\u0292\u02a6\u02d0 \u0303",
        "punctuations": "!'(),-.:;? ",
        "phonemes": "",
        "unique": true
    },
    "batch_group_size": 0,
    "loss_masking": true,
    "sort_by_audio_len": false,
    "min_seq_len": 1,
    "max_seq_len": 300,
    "compute_f0": false,
    "compute_linear_spec": false,
    "add_blank": false,
    "datasets": [
        {
            "name": "marylux",
            "path": "../marylux",
            "meta_file_train": "metadata_train.csv",
            "unused_speakers": null,
            "meta_file_val": "metadata_val.csv",
            "meta_file_attn_mask": ""
        }
    ],
    "optimizer": "RAdam",
    "optimizer_params": {
        "betas": [
            0.9,
            0.998
        ],
        "weight_decay": 1e-06
    },
    "lr_scheduler": "NoamLR",
    "lr_scheduler_params": {
        "warmup_steps": 4000
    },
    "test_sentences": [
        "\u0251n d\u0281 \u02a6\u00e6\u02d0\u026at hun z\u0259\u0255 d\u0259n no\u0280dv\u0251nt \u0251n dzon g\u0259\u0283t\u0280id\u0259n, vi\u0259 fun hin\u0259n \u02a6we\u02d0 vu\u0259l m\u025c\u026a \u0283ta\u02d0\u0280k vi\u02d0\u0281, v\u025c\u026a \u0259 v\u0251nd\u0259\u0280\u0281, de\u02d0n \u0251n e\u02d0 va\u02d0\u0280m\u0259 m\u0251nt\u0259l \u0251\u0281\u0259pa\u02d0k va\u02d0\u0280, iv\u0281t d\u0259 ve\u02d0 k\u0259\u028am.",
        "zi\u02d0 g\u0259\u028af\u0259n z\u0259\u0255 e\u02d0ns, d\u0251t de\u02d0j\u025c\u026ane\u02d0\u0283 fi\u02d0\u0281 de\u02d0 \u0283t\u025b\u02d0\u0250kst\u0259 g\u0259l\u0259 zolt, de\u02d0n d\u0259 v\u0251nd\u0259\u0280\u0281 fo\u0280k\u025c\u026a\u0250\u0259 g\u025c\u026af, z\u00e6\u02d0\u026a m\u0251nt\u0259l \u0251\u028az\u02a6\u0259do\u0259n.",
        "d\u0259n no\u0280dv\u0251nt hu\u0259t m\u0251t \u0251l\u0281 fo\u0280s g\u0259blo\u02d0z\u0259n, a\u02d0v\u0281 va\u02d0t \u0259 m\u025c\u026a g\u0259blo\u02d0z\u0259n hu\u0259t, va\u02d0t d\u0259 v\u0251nd\u0259\u0280\u0281 z\u0259\u0255 m\u025c\u026a \u0251 z\u00e6\u02d0\u026a m\u0251nt\u0259l \u0251g\u0259vek\u0259lt hu\u0259t.",
        "um \u00e6n hu\u0259t d\u0259n no\u0280dv\u0251nt z\u00e6\u02d0\u026a k\u0251mpf opgin.",
        "dun hu\u0259t dzon dloft m\u0251t hi\u02d0\u0280\u0259 f\u0280\u0259ntl\u00e6\u0255\u0259 \u0283\u0280a\u02d0l\u0259 g\u0259vi\u02d0\u0281mt, \u0251 \u0283on no\u02d0 ku\u0259\u0280\u02a6\u0259\u0280 \u02a6\u00e6\u02d0\u026at hu\u0259t d\u0259 v\u0251nd\u0259\u0280\u0281 z\u00e6\u02d0\u026a m\u0251nt\u0259l \u00e6\u02d0\u028asg\u0259do\u02d0\u0259n.",
        "do\u02d0 hu\u0259t d\u0259n no\u0280dv\u0251nt mis\u0259n \u02a6\u0259\u028agin, d\u0251t dzon fun hin\u0259n \u02a6we\u02d0 de\u02d0 \u0283t\u025b\u02d0\u0250kst\u0259 vi\u02d0\u0281."
    ],
    "use_gst": false,
    "gst": null,
    "gst_style_input": null,
    "num_speakers": 1,
    "num_chars": 46,
    "r": 2,
    "gradual_training": null,
    "memory_size": -1,
    "prenet_type": "original",
    "prenet_dropout": true,
    "prenet_dropout_at_inference": false,
    "stopnet": true,
    "separate_stopnet": true,
    "stopnet_pos_weight": 10.0,
    "max_decoder_steps": 1000,
    "encoder_in_features": 512,
    "decoder_in_features": 512,
    "decoder_output_dim": 80,
    "out_channels": 80,
    "attention_type": "dynamic_convolution",
    "attention_heads": null,
    "attention_norm": "sigmoid",
    "attention_win": false,
    "windowing": false,
    "use_forward_attn": false,
    "forward_attn_mask": false,
    "transition_agent": false,
    "location_attn": true,
    "bidirectional_decoder": false,
    "double_decoder_consistency": true,
    "ddc_r": 6,
    "use_speaker_embedding": false,
    "speaker_embedding_dim": 512,
    "use_d_vector_file": false,
    "d_vector_file": false,
    "d_vector_dim": null,
    "lr": 0.0001,
    "grad_clip": 5.0,
    "seq_len_norm": false,
    "decoder_loss_alpha": 0.25,
    "postnet_loss_alpha": 0.25,
    "postnet_diff_spec_alpha": 0.25,
    "decoder_diff_spec_alpha": 0.25,
    "decoder_ssim_alpha": 0.25,
    "postnet_ssim_alpha": 0.25,
    "ga_alpha": 5.0
}
