#!/bin/bash

s=bdl
t=slt

mode=melspc_r3

out_dir=exp/cfs2_vc_prime_ft/${mode}/${s}_to_${t}
mkdir -p ${out_dir}

n_gpu=1
export CUDA_VISIBLE_DEVICES=0

../../../tools/venv/bin/python3 \
    ../../../espnet2/bin/gan_vc_train.py \
    --use_preprocessor false \
    --normalize global_mvn \
    --s_normalize global_mvn \
    --resume true \
    --output_dir ${out_dir} \
    --config conf/tuning/joint_finetune_cfs2_vc_prime_${mode}_${s}_to_${t}.yaml \
    --feats_s_extract fbank \
    --feats_s_extract_conf n_fft=1024 \
    --feats_s_extract_conf hop_length=256 \
    --feats_s_extract_conf win_length=null \
    --feats_s_extract_conf fs=24000 \
    --feats_s_extract_conf fmin=80 \
    --feats_s_extract_conf fmax=7600 \
    --feats_s_extract_conf n_mels=80 \
    --feats_extract fbank \
    --feats_extract_conf n_fft=1024 \
    --feats_extract_conf hop_length=256 \
    --feats_extract_conf win_length=null \
    --feats_extract_conf fs=24000 \
    --feats_extract_conf fmin=80 \
    --feats_extract_conf fmax=7600 \
    --feats_extract_conf n_mels=80 \
    --train_data_path_and_name_and_type dump/raw/${s}_train_no_dev/wav.scp,speech_s,sound \
    --train_data_path_and_name_and_type dump/raw/${t}_train_no_dev/wav.scp,speech,sound \
    --train_shape_file stats/jets/${s}/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/speech_s_shape \
    --train_shape_file stats/jets/${t}/tts_stats_raw_phn_tacotron_g2p_en_no_space/train//speech_shape \
    --valid_data_path_and_name_and_type dump/raw/${s}_dev/wav.scp,speech_s,sound \
    --valid_data_path_and_name_and_type dump/raw/${t}_dev/wav.scp,speech,sound \
    --valid_shape_file stats/jets/${s}/tts_stats_raw_phn_tacotron_g2p_en_no_space/valid/speech_s_shape \
    --valid_shape_file stats/jets/${t}/tts_stats_raw_phn_tacotron_g2p_en_no_space/valid/speech_shape \
    --train_data_path_and_name_and_type exp/vtn/melspc_r3/${s}_to_${t}/inference_use_teacher_forcing_200epoch/train_no_dev/durations/durations,durations,text_int \
    --valid_data_path_and_name_and_type exp/vtn/melspc_r3/${s}_to_${t}/inference_use_teacher_forcing_200epoch/dev/durations/durations,durations,text_int \
    --train_data_path_and_name_and_type stats/jets/${t}/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/collect_feats/pitch.scp,pitch,npy \
    --valid_data_path_and_name_and_type stats/jets/${t}/tts_stats_raw_phn_tacotron_g2p_en_no_space/valid/collect_feats/pitch.scp,pitch,npy \
    --train_data_path_and_name_and_type stats/jets/${t}/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/collect_feats/energy.scp,energy,npy \
    --valid_data_path_and_name_and_type stats/jets/${t}/tts_stats_raw_phn_tacotron_g2p_en_no_space/valid/collect_feats/energy.scp,energy,npy \
    --pitch_extract_conf fs=24000 \
    --pitch_extract_conf n_fft=1024 \
    --pitch_extract_conf hop_length=256 \
    --pitch_extract_conf f0max=400 \
    --pitch_extract_conf f0min=80 \
    --pitch_extract_conf reduction_factor=1 \
    --pitch_extract_conf use_token_averaged_f0=false \
    --pitch_normalize_conf stats_file=stats/jets/${t}/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/pitch_stats.npz \
    --energy_extract_conf fs=24000 \
    --energy_extract_conf n_fft=1024 \
    --energy_extract_conf hop_length=256 \
    --energy_extract_conf win_length=null \
    --energy_extract_conf reduction_factor=1 \
    --energy_extract_conf use_token_averaged_energy=false \
    --energy_normalize_conf stats_file=stats/jets/${t}/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/energy_stats.npz \
    --normalize_conf stats_file=stats/jets/${t}/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/feats_stats.npz \
    --s_normalize_conf stats_file=stats/jets/${s}/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/feats_stats.npz \
    --ngpu ${n_gpu} \
    --multiprocessing_distributed True \
    2>&1 | tee ${out_dir}/train.log
