@article {158, title = {The ThreeDWorld Transport Challenge: A Visually Guided Task-and-Motion Planning Benchmark for Physically Realistic Embodied AI}, journal = {arXiv}, year = {2021}, month = {03/25/2021}, type = {preprint}, abstract = {

We introduce a visually-guided and physics-driven taskand-motion planning benchmark, which we call the ThreeDWorld Transport Challenge. In this challenge, an embodied agent equipped with two 9-DOF articulated arms is spawned randomly in a simulated physical home environment. The agent is required to find a small set of objects scattered around the house, pick them up, and transport them to a desired final location. We also position containers around the house that can be used as tools to assist with transporting objects efficiently. To complete the task, an embodied agent must plan a sequence of actions to change the state of a large number of objects in the face of realistic physical constraints. We build this benchmark challenge using the ThreeDWorld simulation: a virtual 3D environment where all objects respond to physics, and where can be controlled using a fully physics-driven navigation and interaction API. We evaluate several existing agents on this benchmark. Experimental results suggest that: 1) a pure RL model struggles on this challenge; 2) hierarchical planningbased agents can transport some objects but still far from solving this task. We anticipate that this benchmark will empower researchers to develop more intelligent physicsdriven robots for the physical world.\ 

}, doi = {arXiv:2103.14025}, url = {https://arxiv.org/abs/2103.14025}, author = {Gan, Chuang and Zhou, Siyuan and Schwartz, Jeremy and Alter, Seth and Bhandwaldar, Abhishek and Gutfreund, Dan and Yamins, Daniel LK and DiCarlo, James J and McDermott, Josh and Torralba, Antonio} } @article {148, title = {ThreeDWorld: A Platform for Interactive Multi-Modal Physical Simulation}, journal = {arXiv}, year = {2020}, month = {July 9, 2020}, type = {preprint}, abstract = {

We introduce ThreeDWorld (TDW), a platform for interactive multi-modal physical simulation. With TDW, users can simulate high-fidelity sensory data and physical interactions between mobile agents and objects in a wide variety of rich 3D environments. TDW has several unique properties: 1) realtime near photo-realistic image rendering quality; 2) a library of objects and environments with materials for high-quality rendering, and routines enabling user customization of the asset library; 3) generative procedures for efficiently building classes of new environments 4) high-fidelity audio rendering; 5) believable and realistic physical interactions for a wide variety of material types, including cloths, liquid, and deformable objects; 6) a range of \"avatar\" types that serve as embodiments of AI agents, with the option for user avatar customization; and 7) support for human interactions with VR devices. TDW also provides a rich API enabling multiple agents to interact within a simulation and return a range of sensor and physics data representing the state of the world. We present initial experiments enabled by the platform around emerging research directions in computer vision, machine learning, and cognitive science, including multi-modal physical scene understanding, multi-agent interactions, models that \"learn like a child\", and attention studies in humans and neural networks. The simulation platform will be made publicly available.

}, url = {https://arxiv.org/abs/2007.04954}, author = {Gan, Chuang and Schwartz, Jeremy and Alter, Seth and Schrimpf, Martin and Traer, James and De Freitas, Julian and Kubilius, Jonas and Bhandwaldar, Abhishek and Haber, Nick and Sano, Megumi and Wang, Elias and Mrowca, Damian and Lingelbach, Michael and Curtis, Aidan and Figelis, Kevin and Bear, Daniel M. and Gutfreund, Dan and Cox, David and DiCarlo, James J. and McDermott, Josh and Tenenbaum, Joshua B. and Yamins, Daniel L.K.} }