@article{valle2021transflower,
  title={{T}ransflower: {P}robabilistic autoregressive dance generation with multimodal attention},
  author={Valle-P{\'e}rez, Guillermo and Henter, Gustav Eje and Beskow, Jonas and Holzapfel, Andre and Oudeyer, Pierre-Yves and Alexanderson, Simon},
  journal={ACM Trans. Graph.},
  abstract={Dance requires skillful composition of complex movements that follow rhythmic, tonal and timbral features of music. Formally, generating dance conditioned on a piece of music can be expressed as a problem of modelling a high-dimensional continuous motion signal, conditioned on an audio signal. In this work we make two contributions to tackle this problem. First, we present a novel probabilistic autoregressive architecture that models the distribution over future poses with a normalizing flow conditioned on previous poses as well as music context, using a multimodal transformer encoder. Second, we introduce the currently largest 3D dance-motion dataset, obtained with a variety of motion-capture technologies, and including both professional and casual dancers. Using this dataset, we compare our new model against two baselines, via objective metrics and a user study, and show that both the ability to model a probability distribution, as well as being able to attend over a large motion and music context are necessary to produce interesting, diverse, and realistic dance that matches the music.},
  keywords={generative models, machine learning, normalising flows, Glow, transformers, dance},
  month={Dec.},
  publisher={ACM},
  volume={40},
  number={6},
  pages={1:1--1:13},
  doi={10.1145/3478513.3480570},
  year={2021}
}