MachinePsychophysics/index.html at main · grow-ai-like-a-child/MachinePsychophysics · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <link rel="shortcut icon" href="favicon.ico" type="image/x-icon">
  <!-- —— Meta —— -->
  <!-- —— Meta —— -->
  <meta name="description" content="Large-scale psychophysics study shows emergent cognitive-control signatures in 108 Vision–Language Models across 2 220 trials (+ 238 control).">
  <meta property="og:title" content="Machine Psychophysics: Cognitive Control in Vision–Language Models">
  <meta property="og:description" content="108 VLMs × 2 220 trials (plus 238 control) reveal human-aligned congruency effects and persistent hierarchical-conflict deficits.">
  <meta property="og:url" content="https://github.com/grow-ai-like-a-child/Psychophysics.git">
  <meta property="og:image" content="standard_vs_squared.png">
  <meta property="og:image:width" content="1200">
  <meta property="og:image:height" content="630">
  <meta name="twitter:title" content="Machine Psychophysics: Cognitive Control in VLMs">
  <meta name="twitter:description" content="Emergent executive-function signatures and scaling trends across 108 VLMs.">
  <meta name="twitter:image" content="standard_vs_squared.png">
  <meta name="twitter:card" content="summary_large_image">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Machine Psychophysics: Cognitive Control in Vision–Language Models</title>

  <!-- Fonts & Bulma -->
  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro">
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma-carousel@4.0.3/dist/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">

  <!-- libs -->
  <script defer src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
  <script defer src="https://cdn.jsdelivr.net/npm/bulma-carousel@4.0.3/dist/js/bulma-carousel.min.js"></script>

<style>
  :root {
    --bg-main: #ffffff;
    --text-main: #1a1a1a;
  }
  [data-theme="dark"] {
    --bg-main: #181818;
    --text-main: #1a1a1a; /* 保持文字颜色一致 */
  }

  html {
    scroll-behavior: smooth;
  }

  body {
    background: var(--bg-main);
    color: var(--text-main);
    font-family: "Noto Sans", sans-serif;
  }

  /* Navbar 固定 */
  .navbar {
    position: sticky;
    top: 0;
    z-index: 20;
    background: var(--bg-main);
    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.06);
  }

  .hero-body {
    padding: 3rem 1.5rem;
  }

  .title.is-1 {
    font-size: 2.5rem !important;
  }
  @media (min-width: 1024px) {
    .title.is-1 {
      font-size: 3rem !important;
    }
  }
  @media (min-width: 1440px) {
    .title.is-1 {
      font-size: 3.5rem !important;
    }
  }

  /* 深色模式标题修正 */
  [data-theme="dark"] .title {
    color: #ffffff !important;
  }

  /* Hero is-light 深色背景修正 */
  [data-theme="dark"] .hero.is-light {
    background: #1e1e1e !important;
    color: #e5e5e5 !important;
  }

  /* 强调文字修正 */
  [data-theme="dark"] p strong,
  [data-theme="dark"] li strong,
  [data-theme="dark"] figcaption strong {
    color: #1a1a1a !important;
  }
  [data-theme="dark"] figcaption {
    color: #444 !important;
  }

  /* ✅ 关键模块卡片样式统一 */
  .gradient-box {
    position: relative;
    color: #1a1a1a !important;
    background: #f5f5f5;
    padding: 1.2rem 1.5rem;
    border-radius: 12px;
    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06);
    transition: all 0.2s ease;
  }

  .gradient-box::before {
    content: "";
    position: absolute;
    inset: 0;
    border-radius: inherit;
    background: rgba(0, 0, 0, 0.05);
    mix-blend-mode: multiply;
    pointer-events: none;
  }

  [data-theme="dark"] .gradient-box {
    background: #f5f5f5 !important;
    color: #1a1a1a !important;
    text-shadow: none !important;
  }

  /* ✅ 保证卡片文字统一间距 */
  .gradient-box p strong {
    display: inline-block;
    font-size: 1.1rem;
    margin-top: -0.2rem;
    margin-bottom: 0.6rem;
  }

  /* ✅ 等高布局支持 */
  .equal-height-columns {
    display: flex;
    flex-wrap: wrap;
  }

  .equal-height-columns .column {
    display: flex;
  }

  .equal-height-columns .box {
    flex: 1;
    display: flex;
    flex-direction: column;
    justify-content: center;
    min-height: 240px;
  }

  /* 图片样式 */
  .carousel img,
  figure img {
    border-radius: 8px;
    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.08);
  }

  figure figcaption {
    margin-top: 6px;
    font-size: 0.8rem;
    color: #666;
    text-align: center;
  }

  /* 作者部分 */
  .affiliation {
    font-weight: normal;
    font-size: 0.9em;
    color: #888;
    margin-left: 4px;
  }

  a[title] {
    text-decoration: none;
    border-bottom: 1px dashed #ccc;
    cursor: help;
  }

  [data-theme="dark"] a[title] {
    border-bottom-color: #666;
  }

  [data-theme="dark"] .affiliation {
    color: #aaa;
  }

  /* BibTeX 拷贝按钮 */
  .copy-btn {
    position: absolute;
    top: 8px;
    right: 8px;
    border: none;
    background: #3273dc;
    color: #fff;
    border-radius: 4px;
    font-size: 0.8rem;
    padding: 4px 8px;
    cursor: pointer;
  }

  /* Footer 深色修正 */
  [data-theme="dark"] .footer {
    background-color: #181818 !important;
    color: #e5e5e5 !important;
  }

  [data-theme="dark"] .footer a {
    color: #a0c8ff !important;
  }

  /* 去掉 design 中下划线 */
  #design u {
    text-decoration: none;
  }
</style>


<!-- Sticky Nav -->
<nav class="navbar" role="navigation">
  <div class="container is-max-desktop" style="justify-content:space-between">
    <div class="navbar-brand">
      <a class="navbar-item" href="#">VLM-Psychophysics</a>
      <a role="button" class="navbar-burger" data-target="navMenu">
        <span></span><span></span><span></span>
      </a>
    </div>
    <div id="navMenu" class="navbar-menu">
      <a class="navbar-item" href="#abstract">Abstract</a>
      <a class="navbar-item" href="#design">Design</a>
      <a class="navbar-item" href="#results">Results</a>
      <a class="navbar-item" href="#BibTeX">BibTeX</a>
    </div>
</nav>

<!-- Hero -->
<section id="top" class="hero">
  <div class="hero-body">
    <div class="container is-max-desktop has-text-centered">
      <h1 class="title is-1">🧠 Machine Psychophysics: Cognitive Control in Vision–Language Models</h1>

      <p class="is-size-5">
        <a href="https://ihzedoul.com/" target="_blank">Dezhi Luo<sup>1</sup></a>,
        <a href="https://openreview.net/profile?id=%7EMaijunxian_Wang1" target="_blank">Maijunxian Wang<sup>2</sup></a>,
        <a href="https://openreview.net/profile?id=%7EBingyang_Wang2" target="_blank">Bingyang Wang<sup>3</sup></a>,<br>
        <a href="https://openreview.net/profile?id=~Tianwei_Zhao1" target="_blank">Tianwei Zhao<sup>4</sup></a>,
        <a href="https://williamium3000.github.io/" target="_blank">Yijiang Li<sup>5</sup></a>,
        <a href="https://hokindeng.github.io/" target="_blank">Hokin Deng<sup>6</sup></a>
      </p>

      <p class="is-size-6">
        <sup>1</sup>University of Michigan &nbsp;&nbsp;
        <sup>2</sup>University of California, Davis &nbsp;&nbsp;
        <sup>3</sup>Emory University<br>
        <sup>4</sup>Johns Hopkins University &nbsp;&nbsp;
        <sup>5</sup>University of California, San Diego &nbsp;&nbsp;
        <sup>6</sup>Carnegie Mellon University
      </p>

      <div class="has-text-centered" style="margin-top: 0.8rem;">
        <span class="author-block" style="color: #4a4a4a; margin-top: 10px;">
          <img src="growai.png" alt="GrowAI Team Logo" style="max-height: 100px; margin-top: 10px;"><br>
          <strong>GrowAI Team</strong> |
          <a href="https://growing-ai-like-a-child.github.io/" target="_blank" style="color: #3273dc;">
            growing-ai-like-a-child.github.io
          </a>
        </span>
      </div>


      <p style="margin-top:15px">
        <a class="button is-dark is-rounded" href="https://arxiv.org/abs/2505.18969" target="_blank">
          <span class="icon"><i class="fas fa-file-pdf"></i></span><span>Paper</span>
        </a>
        <a class="button is-dark is-rounded" href="https://github.com/grow-ai-like-a-child/Psychophysics" target="_blank">
          <span class="icon"><i class="fab fa-github"></i></span><span>Code & Dataset</span>
        </a>
      </p>
    </div>
  </div>
</section>


<!-- Teaser -->
<section class="section">
  <div class="container is-max-desktop">
    <div class="notification is-primary is-light" style="font-size:1.1rem">
      <strong>TL;DR</strong> We demonstrate that cognitive control emerges in vision–language models through structured inter-model variation across Stroop and Flanker tasks. These differences persist in more demanding Squared variants and align with human-like patterns of processing under resource constraints. Scaling trends mirror forced-response paradigms, revealing systematic control capacity across model size.
    </div>
  </div>
</section>

<!-- Abstract -->
<section id="abstract" class="section hero is-light">
  <div class="container is-max-desktop">
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p><strong>Cognitive control</strong> refers to the ability to flexibly coordinate thought and action in pursuit of internal goals. Conflict-task paradigms benchmark this faculty by contrasting congruent and incongruent trials.</p>
          <p>We evaluate <strong>108 vision–language models</strong> on Stroop, Letter- and Number-Flanker tasks and their more demanding “<em>Squared</em>” variants — <strong>across 2,220 structured trials and 238 control trials</strong>. Models reproduce human-like congruency effects and, critically, show <strong>robust inter-model variation</strong> that reflects differential sensitivity to interference.</p>
          <p>Letter- and Number-Flanker scores are <strong>highly correlated</strong> (r = 0.96), indicating stable, convergent traits of control. Furthermore, accuracy improves <em>log-linearly</em> with parameter scale, aligning with human forced-response processing curves.</p>
          <p>These results support the emergence of control mechanisms from general-purpose associative learning and introduce a framework for measuring trait-like cognitive properties in large-scale AI systems.</p>
        </div>
      </div>
    </div>
  </div>
</section>

<!-- Key Findings -->
<section class="hero is-small">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <h2 class="title is-3 has-text-centered" style="margin-top:2rem;">🔍 Key Findings</h2>

      <div class="columns is-multiline equal-height-columns is-centered">
        <div class="column is-half">
          <div class="box gradient-box" style="background:linear-gradient(135deg,#b3cde0 0%,#ccebc5 100%);">
            <h3 class="title is-4"><i class="fas fa-sliders-h"></i> Emergent Congruency</h3>
            <p>All models reproduce the classic congruency effect, indicating interference resolution emerges from multimodal pre-training — with <strong>model-level variability</strong> in sensitivity.</p>
          </div>
        </div>
        <div class="column is-half">
          <div class="box gradient-box" style="background:linear-gradient(135deg,#decbe4 0%,#fed9a6 100%);">
            <h3 class="title is-4"><i class="fas fa-layer-group"></i> Hierarchical Interference</h3>
            <p>Squared tasks introduce <strong>nested conflicts</strong> that amplify <strong>inter-model differences</strong>, revealing structured variation in higher-order control.</p>
          </div>
        </div>
        <div class="column is-half">
          <div class="box gradient-box" style="background:linear-gradient(135deg,#ffffcc 0%,#ccebc5 100%);">
            <h3 class="title is-4"><i class="fas fa-chart-line"></i> Scaling Trend</h3>
            <p><p>Performance rises <strong>log-linearly</strong> from 1B → 110B parameters, mirroring human processing-time curves and reflecting <strong>resource-based control limitations</strong>.</p>
          </div>
        </div>
        <div class="column is-half">
          <div class="box gradient-box" style="background:linear-gradient(135deg,#fbb4ae 0%,#fddaec 100%);">
            <h3 class="title is-4"><i class="fas fa-check-double"></i> Convergent Validity</h3>
            <p>Letter- and Number-Flanker scores covary strongly (r = 0.96), indicating a unified control construct and <strong>trait-like stability across models</strong>.</p>
          </div>
        </div>
      </div>
    </div>
  </div>
</section>


<!-- Experiment Overview -->
<section class="section">
  <div class="container is-max-desktop">
    <h2 class="title is-3 has-text-centered">🧪 Experiment Overview</h2>

    <div class="columns is-multiline is-centered" style="margin-bottom: 2rem;">
      <!-- Card 1: Model Count -->
      <div class="column is-one-third has-text-centered">
        <div class="has-text-weight-bold is-size-1" style="color: #e74c3c;">108</div>
        <div class="has-text-weight-semibold">Vision–Language Models</div>
        <div class="is-size-7 has-text-grey">1B–110B parameters</div>
      </div>

      <!-- Card 2: Trial Count -->
      <div class="column is-one-third has-text-centered">
        <div class="has-text-weight-bold is-size-1" style="color: #27ae60;">2,220</div>
        <div class="has-text-weight-semibold">Conflict Trials</div>
        <div class="is-size-7 has-text-grey">Stroop, Flanker, Squared</div>
      </div>

      <!-- Card 3: Effect Prevalence -->
      <div class="column is-one-third has-text-centered">
        <div class="has-text-weight-bold is-size-1" style="color: #2980b9;">&gt;95%</div>
        <div class="has-text-weight-semibold">Show Conflict Effect</div>
        <div class="is-size-7 has-text-grey">Performance drop: C &gt; I</div>
      </div>
    </div>

    <p class="has-text-centered is-size-5">
      Our large-scale battery tested <strong>108 vision–language models</strong> across <strong>2,220 structured trials</strong>, revealing that <strong>over 95% of models</strong> exhibit cognitive conflict effects.
    </p>
  </div>
</section>


<!-- Experimental Design -->
<section id="design" class="hero is-small">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <h2 class="title is-3 has-text-centered" style="margin-top:2rem;">🔬 Experimental Design</h2>
      <div class="content has-text-justified" style="max-width:800px;margin:0 auto 1.5rem">
        <p><strong>We evaluate cognitive control in VLMs with a three-tier battery.</strong>
          <br><b>(1) Standard tasks</b> reproduce classic Stroop and Flanker paradigms — models must ignore irrelevant colour or flanker cues in simple congruent vs. incongruent trials. <u>They include <strong>84 Stroop</strong> (42&nbsp;C/42&nbsp;I) and <strong>180 Letter- / 180 Number-Flanker</strong> (90&nbsp;C/90&nbsp;I) stimuli.</u>
          <br><b>(2) Squared tasks</b> add a second layer of conflict, yielding four hierarchical conditions (FC, FI, SCRI, SIRC) that tax executive control far beyond the standard benchmark. <u>This yields <strong>336 Stroop-Squared</strong> and <strong>720 stimuli for each Flanker-Squared task</strong>.</u>
          <br><b>(3) Control battery</b> disentangles low-level demands by isolating OCR, colour perception, and 2-D spatial encoding, ensuring that any deficits arise from representational conflict rather than perception per se. <u>It contains <strong>238 control trials</strong>.</u>
        </p>
      </div>

      <div id="methodology-carousel" class="carousel results-carousel">
        <div class="item">
          <figure>
            <img loading="lazy" src="figure1.png" alt="Standard tasks examples">
            <figcaption><strong>Standard Tasks.</strong> 84 Stroop + 180 × 2 Flanker stimuli.</figcaption>
          </figure>
        </div>
        <div class="item">
          <figure>
            <img loading="lazy" src="figure2.png" alt="Squared tasks conditions">
            <figcaption><strong>Squared Tasks.</strong> 336 Stroop-Squared + 720 × 2 Flanker-Squared stimuli.</figcaption>
          </figure>
        </div>
        <div class="item">
          <figure>
            <img loading="lazy" src="control_tasks.png" alt="Control battery">
            <figcaption><strong>Control Battery.</strong> 238 trials disentangling OCR, colour, spatial encoding.</figcaption>
          </figure>
        </div>
      </div>
    </div>
  </div>
</section>

<!-- Results -->
<section id="results" class="section">
  <div class="container is-max-desktop">
    <h2 class="title is-3 has-text-centered">📊 Results</h2>
    <div class="content has-text-justified">
      <h3 class="title is-4">Highlights</h3>
      <ul>
        <li><strong>Structured Congruency:</strong> Standard tasks elicit clear congruency effects with <em>graded model-level sensitivity</em>.</li>
        <li><strong>Hierarchical Differentiation:</strong> Squared tasks amplify individual differences, exposing <em>trait-level variation</em> in cognitive control.</li>
        <li><strong>Conflict-Specific Impairments:</strong> Control battery confirms that observed deficits stem from <em>representational interference</em> rather than perceptual limitations.</li>
        <li><strong>Human-like Scaling:</strong> Performance increases <em>log-linearly with model size</em>, aligning with human resource-constrained processing trajectories.</li>
      </ul>
      <figure style="text-align:center;margin:2rem 0">
        <img loading="lazy" src="standard_vs_squared.png" alt="Overall accuracy" style="max-width:95%">
        <figcaption><strong>Overall Results.</strong> Human-model accuracy across conflict conditions.</figcaption>
      </figure>

      <h4 class="title is-5">Scaling with parameters</h4>
      <p style="max-width:720px;margin:0 auto 1rem">
        Model size acts as a proxy for computational capacity, akin to processing-time constraints in human cognition. Accuracy increases <strong>log-linearly</strong> from 1B to 110B parameters — echoing <em>forced-response paradigms</em> in psychophysics. Notably, Squared tasks reveal that even the largest models exhibit residual conflict sensitivity, suggesting that <strong>cognitive control mechanisms scale in depth, not just in breadth</strong>.
      </p>

      <figure style="text-align:center">
        <img loading="lazy" src="scaling.png" alt="Scaling curve" style="max-width:90%">
        <figcaption><strong>Scaling Trend.</strong> Accuracy vs. parameter count (1&nbsp;B → 110&nbsp;B).</figcaption>
      </figure>

<!-- Result Summary Section -->
<section style="margin-top: 3rem;">
  <h3 class="title is-4 has-text-centered">📌 Result Summary</h3>
  <div class="columns is-multiline is-centered equal-height-columns">

    <div class="column is-half">
      <div class="box gradient-box" style="background: linear-gradient(135deg, #b3cde0 0%, #ccebc5 100%);">
        <h3 class="title is-4"><i class="fas fa-sliders-h"></i> Congruency Effect</h3>
        <p>>95% models show conflict interference.<br>
        Stroop: t = 8.99, p < 10⁻¹⁴<br>
        Flanker-L: t = 17.88, p < 10⁻³³<br>
        Flanker-N: t = 16.85, p < 10⁻³¹</p>
      </div>
    </div>

    <div class="column is-half">
      <div class="box gradient-box" style="background: linear-gradient(135deg, #e0c3fc 0%, #8ec5fc 100%);">
        <h3 class="title is-4"><i class="fas fa-layer-group"></i> Squared Conflict</h3>
        <p>Fully-incongruent (SIRC) trials show largest drop.<br>All p &lt; 0.001</p>
      </div>
    </div>

    <div class="column is-half">
      <div class="box gradient-box" style="background: linear-gradient(135deg, #f6d365 0%, #fda085 100%);">
        <h3 class="title is-4"><i class="fas fa-check-circle"></i> Control Accuracy</h3>
        <p>Accuracy ≈ 85% on 238 control trials<br>(OCR, color, spatial).</p>
      </div>
    </div>

    <div class="column is-half">
      <div class="box gradient-box" style="background: linear-gradient(135deg, #ff9a9e 0%, #fecfef 100%);">
        <h3 class="title is-4"><i class="fas fa-link"></i> Task Concordance</h3>
        <p>r = 0.96 between Letter-/Number-Flanker<br>→ shared bottleneck</p>
      </div>
    </div>

  </div>
</section>


<section id="implications" class="section">
  <div class="container is-max-desktop">
    <h2 class="title is-3 has-text-centered">🧭 Implications & Future Work</h2>

    <div class="content has-text-justified" style="max-width: 880px; margin: 0 auto;">

      <h3 class="title is-5">🧠 Implications for Cognitive AI</h3>
      <ul>
        <li><strong>Emergent Congruency:</strong> Stroop and Flanker effects emerge reliably across model scales, with <em>graded variation</em> in sensitivity — indicating that interference resolution arises from general-purpose associative dynamics.</li>
        <li><strong>Hierarchical Bottlenecks:</strong> Squared tasks expose <em>residual conflict sensitivity</em> even in frontier models, suggesting that scaling alone cannot resolve higher-order control limitations.</li>
        <li><strong>Shared Control Mechanisms:</strong> Strong correlations between letter- and number-Flanker scores (r = 0.96) support the existence of a unified control construct within VLMs, suggesting <em>stable, trait-like signatures</em> of cognitive control.</li>
      </ul>

      <h3 class="title is-5">🔍 Open Questions</h3>
      <ul>
        <li>What aspects of pretraining distribution influence <em>trait emergence</em> in control tasks?</li>
        <li>What architectural or procedural inductive biases support <em>deep hierarchical interference resolution</em>?</li>
        <li>Can VLMs develop <em>temporally persistent</em> control structures akin to executive functions in biological agents?</li>
      </ul>

      <h3 class="title is-5">🧪 Methodological Contribution</h3>
      <p>
        This study proposes a <strong>psychophysics-inspired trait measurement framework</strong> for evaluating control behaviors in VLMs.
        By using <em>minimally confounded trials</em> and contrastive conflict structures, we isolate underlying control properties and enable
        <em>structural comparisons</em> across models.
      </p>
    </div>
  </div>
</section>


<!-- BibTeX -->
<section class="section" id="BibTeX">
  <div class="container is-max-desktop content" style="position:relative">
    <h2 class="title">BibTeX</h2>
    <button class="copy-btn" id="copyBtn">Copy</button>
<pre><code id="bibtex">
@article{luo2025machine,
  title   = {Machine Psychophysics: Cognitive Control in Vision–Language Models},
  author  = {Luo, Dezhi and Wang, Maijunxian and Wang, Bingyang and Zhao, Tianwei and Li, Yijiang and Deng, Hokin},
  publisher={arXiv Preprints},
  year    = {2025},
  url     = {https://arxiv.org/abs/2505.18969}
}
</code></pre>
  </div>
</section>

<footer class="footer">
  <div class="container">
    <div class="columns is-centered">
      <div class="column is-8">
        <div class="content">
          <p class="has-text-centered">
            <img src="growai.png" alt="GrowAI Team Logo" style="max-height: 200px; margin-top: 10px;"><br>
            <strong>GrowAI Team</strong> |
            <a href="https://growing-ai-like-a-child.github.io/" target="_blank" style="color: #3273dc;">
              growing-ai-like-a-child.github.io
            </a>
          </p>
          <p class="has-text-centered" style="font-size: 0.9rem;">
            This page was built using the
            <a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank" style="color: #3273dc;">
              Academic Project Page Template
            </a> which was adopted from the
            <a href="https://nerfies.github.io" target="_blank" style="color: #3273dc;">Nerfies</a> project page. You are free to borrow the source code of this website, we just ask that you link back to this page in the footer. This website is licensed under a
            <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank" style="color: #3273dc;">
              Creative Commons Attribution-ShareAlike 4.0 International License
            </a>.
          </p>
        </div>
      </div>
    </div>
  </div>
</footer>


<!-- JS -->
<script>
  /* burger */
  const burger=document.querySelector('.navbar-burger');
  burger.onclick=()=>{burger.classList.toggle('is-active');document.getElementById(burger.dataset.target).classList.toggle('is-active');};

  /* theme */
  const root=document.documentElement,btn=document.getElementById('theme-toggle');
  if(localStorage.theme==='dark'){root.dataset.theme='dark';btn.innerHTML='<span class="icon"><i class="fas fa-sun"></i></span>';}
  btn.onclick=()=>{const dark=root.dataset.theme==='dark';root.dataset.theme=dark?'light':'dark';
    btn.innerHTML=dark?'<span class="icon"><i class="fas fa-moon"></i></span>':'<span class="icon"><i class="fas fa-sun"></i></span>';
    localStorage.theme=root.dataset.theme;};

  /* copy bib */
  document.getElementById('copyBtn').onclick=()=>{
    navigator.clipboard.writeText(document.getElementById('bibtex').innerText).then(()=>{
      const b=document.getElementById('copyBtn');b.textContent='Copied!';setTimeout(()=>b.textContent='Copy',1500);
    });
  };

  /* carousel */
  document.addEventListener('DOMContentLoaded',()=>bulmaCarousel.attach());
</script>
</body>
</html>