@inproceedings{henter2018generating,
  title={Generating segment-level foreign-accented synthetic speech with natural speech prosody},
  author={Henter, Gustav Eje and Lorenzo-Trueba, Jaime and Wang, Xin and Kondo, Mariko and Yamagishi, Junichi},
  booktitle={IPSJ SIG Tech. Rep.},
  abstract={We present a new application of deep-learning-based TTS, namely multilingual speech synthesis for generating controllable foreign accent. We train an acoustic model on non-accented multilingual speech recordings from the same speaker and interpolate quinphone linguistic features between languages to generate microscopic foreign accent. By copying pitch and durations from a pre-recorded utterance of the desired prompt, natural prosody is achieved. We call this paradigm "cyborg speech" as it combines human and machine speech parameters. Experiments on synthetic American-English-accented Japanese confirm the success of the approach.},
  keywords={controllable speech synthesis, foreign accent, multilingual speech synthesis, speech perception},
  address={Tsukuba, Japan},
  month={Jan.},
  publisher={IPSJ},
  volume={2018-SLP-120},
  number={8},
  pages={1--3},
  url={https://ipsj.ixsq.nii.ac.jp/ej/?action=repository\_uri\&item\_id=185801},
  year={2018}
}