@article {180, title = {Let{\textquoteright}s move forward: Image-computable models and a common model evaluation scheme are prerequisites for a scientific understanding of human visionAbstract}, journal = {Behavioral and Brain Sciences}, volume = {4634}, year = {2023}, month = {Jan-01-2023}, abstract = {

In the target article, Bowers et al. dispute deep artificial neural network (ANN) models as the currently leading models of human vision without producing alternatives. They eschew the use of public benchmarking platforms to compare vision models with the brain and behavior, and they advocate for a fragmented, phenomenon-specific modeling approach. These are unconstructive to scientific progress. We outline how the Brain-Score community is moving forward to add new model-to-human comparisons to its community-transparent suite of benchmarks.

}, issn = {0140-525X}, doi = {10.1017/S0140525X23001607}, url = {https://www.cambridge.org/core/product/identifier/S0140525X23001607/type/journal_article}, author = {DiCarlo, James J. and Yamins, Daniel L. K. and Ferguson, Michael E. and Fedorenko, Evelina and Bethge, Matthias and Bonnen, Tyler and Schrimpf, Martin} } @article {167, title = {Unsupervised neural network models of the ventral visual stream}, journal = {Proceedings of the National Academy of Sciences}, volume = {118}, year = {2021}, month = {Jul-01-2022}, pages = {e2014196118}, abstract = {

Deep neural networks currently provide the best quantitative models of the response patterns of neurons throughout the primate ventral visual stream. However, such networks have remained implausible as a model of the development of the ventral stream, in part because they are trained with supervised methods requiring many more labels than are accessible to infants during development. Here, we report that recent rapid progress in unsupervised learning has largely closed this gap. We find that neural network models learned with deep unsupervised contrastive embedding methods achieve neural prediction accuracy in multiple ventral visual cortical areas that equals or exceeds that of models derived using today\’s best supervised methods and that the mapping of these neural network models\’ hidden layers is neuroanatomically consistent across the ventral stream. Strikingly, we find that these methods produce brain-like representations even when trained solely with real human child developmental data collected from head-mounted cameras, despite the fact that these datasets are noisy and limited. We also find that semisupervised deep contrastive embeddings can leverage small numbers of labeled examples to produce representations with substantially improved error-pattern consistency to human behavior. Taken together, these results illustrate a use of unsupervised learning to provide a quantitative model of a multiarea cortical brain system and present a strong candidate for a biologically plausible computational theory of primate sensory learning.

}, issn = {0027-8424}, doi = {10.1073/pnas.2014196118}, url = {http://www.pnas.org/lookup/doi/10.1073/pnas.2014196118}, author = {Zhuang, Chengxu and Yan, Siming and Nayebi, Aran and Schrimpf, Martin and Frank, Michael C. and DiCarlo, James J. and Yamins, Daniel L. K.} } @article {32, title = {Deep Neural Networks Rival the Representation of Primate IT Cortex for Core Visual Object Recognition}, journal = {PLoS Computational Biology}, volume = {10}, year = {2014}, month = {12/2014}, pages = {e1003963}, abstract = {

The primate visual system achieves remarkable visual object recognition performance even in brief presentations, and under changes to object exemplar, geometric transformations, and background variation (a.k.a. core visual object recognition). This remarkable performance is mediated by the representation formed in inferior temporal (IT) cortex. In parallel, recent advances in machine learning have led to ever higher performing models of object recognition using artificial deep neural networks (DNNs). It remains unclear, however, whether the representational performance of DNNs rivals that of the brain. To accurately produce such a comparison, a major difficulty has been a unifying metric that accounts for experimental limitations, such as the amount of noise, the number of neural recording sites, and the number of trials, and computational limitations, such as the complexity of the decoding classifier and the number of classifier training examples. In this work, we perform a direct comparison that corrects for these experimental limitations and computational considerations. As part of our methodology, we propose an extension of \"kernel analysis\" that measures the generalization accuracy as a function of representational complexity. Our evaluations show that, unlike previous bio-inspired models, the latest DNNs rival the representational performance of IT cortex on this visual object recognition task. Furthermore, we show that models that perform well on measures of representational performance also perform well on measures of representational similarity to IT, and on measures of predicting individual IT multi-unit responses. Whether these DNNs rely on computational mechanisms similar to the primate visual system is yet to be determined, but, unlike all previous bio-inspired models, that possibility cannot be ruled out merely on representational performance grounds.

}, doi = {10.1371/journal.pcbi.1003963}, url = {https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1003963\&type=printable}, author = {Cadieu, Charles F. and Hong, Ha and Yamins, Daniel L. K. and Pinto, Nicolas and Ardila, Diego and Solomon, Ethan A. and Majaj, Najib J. and DiCarlo, James J.}, editor = {Bethge, Matthias} }