@proceedings {156, title = {Correlation-based spatial layout of deep neural network features generates ventral stream topography}, journal = {Computation and Systems Neuroscience (COSYNE)}, year = {2020}, month = {2/28/2020}, publisher = {COSYNE}, address = {Denver, CO}, abstract = {

The primate visual system is organized into functional maps, including pinwheel-like arrangements of orientationtuned neurons in primary visual cortex (V1) and patches of category-selective neurons in higher visual cortex. Recent work has demonstrated that deep convolutional neural networks (DCNNs) trained for object recognition are good descriptors of neural representations throughout the ventral pathway, with early, intermediate, and late cortical brain areas best predicted by corresponding layers of the DCNN. Despite this success, DCNNs have no inherent spatial layout for features at a given retinotopic location, and thus, make no predictions regarding many of the characteristic topographic phenomena observed in the brain beyond retinotopy itself, e.g., pinwheels and patches. Cortical map formation has been modeled using self-organizing maps that leverage principles of wiring-length minimization and local correlations of unit responses to produce topographic structure. However, these methods rely on simplified feature parameterizations that limit their ability to accommodate more realistic descriptions of neuron response properties, especially in higher visual areas. Here, we augment DCNNs by assigning model units spatial positions in a 2D \“cortical sheet\” and introduce a novel algorithm to arrange units so that local response correlations are maximized. Applying this algorithm to a categorization-optimized DCNN, we find that layouts generated from earlier layers recapitulate core features of V1 orientation, spatial frequency, and color preference maps, while those generated from later layers naturally exhibit category-selective clusters. Because this wide range of apparently disparate phenomenology is produced by the same underlying principle, our results suggest that the functional architecture of the visual system can be explained by two fundamental constraints: the need to perform visual tasks and the pressure to minimize biophysical costs such as wiring length. Our framework for spatially mapping DCNNs integrates biophysical and representational phenomenology, allowing a more unified understanding of the visual system\’s functional architecture.

}, url = {http://cosyne.org/cosyne20/Cosyne2020_program_book.pdf}, author = {Eshed Margalit and Hyodong Lee and Tiago Marques and James J. DiCarlo and Daniel L.K. Yamins} } @article {5, title = {Topographic deep artificial neural networks reproduce the hallmarks of the primate inferior temporal cortex face processing network}, journal = {bioRxiv}, year = {2020}, month = {07/2020}, type = {preprint}, abstract = {

A salient characteristic of monkey inferior temporal (IT) cortex is the IT face processing network. Its hallmarks include: \“face neurons\” that respond more to faces than non-face objects, strong spatial clustering of those neurons in foci at each IT anatomical level (\“face patches\”), and the preferential interconnection of those foci. While some deep artificial neural networks (ANNs) are good predictors of IT neuronal responses, including face neurons, they do not explain those face network hallmarks. Here we ask if they might be explained with a simple, metabolically motivated addition to current ANN ventral stream models. Specifically, we designed and successfully trained topographic deep ANNs (TDANNs) to solve real-world visual recognition tasks (as in prior work), but, in addition, we also optimized each network to minimize a proxy for neuronal wiring length within its IT layers. We report that after this dual optimization, the model IT layers of TDANNs reproduce the hallmarks of the IT face network: the presence of face neurons, clusters of face neurons that quantitatively match those found in IT face patches, connectivity between those patches, and the emergence of face viewpoint invariance along the network hierarchy. We find that these phenomena emerge for a range of naturalistic experience, but not for highly unnatural training. Taken together, these results show that the IT face processing network could be a consequence of a basic hierarchical anatomy along the ventral stream, selection pressure on the visual system to accomplish general object categorization, and selection pressure to minimize axonal wiring length.

}, doi = {https://doi.org/10.1101/2020.07.09.185116}, url = {https://www.biorxiv.org/content/10.1101/2020.07.09.185116v1.full.pdf}, author = {Hyodong Lee and Eshed Margalit and Kamila M. Jozwik and Michael A. Cohen and Nancy Kanwisher and Daniel L. K. Yamins and James J. DiCarlo} }