@inproceedings{henter2018generating, title={Generating segment-level foreign-accented synthetic speech with natural speech prosody}, author={Henter, Gustav Eje and Lorenzo-Trueba, Jaime and Wang, Xin and Kondo, Mariko and Yamagishi, Junichi}, booktitle={IPSJ SIG Tech. Rep.}, abstract={We present a new application of deep-learning-based TTS, namely multilingual speech synthesis for generating controllable foreign accent. We train an acoustic model on non-accented multilingual speech recordings from the same speaker and interpolate quinphone linguistic features between languages to generate microscopic foreign accent. By copying pitch and durations from a pre-recorded utterance of the desired prompt, natural prosody is achieved. We call this paradigm "cyborg speech" as it combines human and machine speech parameters. Experiments on synthetic American-English-accented Japanese confirm the success of the approach.}, keywords={controllable speech synthesis, foreign accent, multilingual speech synthesis, speech perception}, address={Tsukuba, Japan}, month={Jan.}, publisher={IPSJ}, volume={2018-SLP-120}, number={8}, pages={1--3}, url={https://ipsj.ixsq.nii.ac.jp/ej/?action=repository\_uri\&item\_id=185801}, year={2018} }