Solr 9.9 → 9.10 for integrations tests (#1165)

thomascorthals · web-flow · commit a0a14df4b8c1 · 2025-11-13T15:12:45.000+01:00
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -51,12 +51,12 @@ jobs:
                 ref: branch_8_11
                 path: lucene-solr
 
-            - name: Checkout solr 9.9
+            - name: Checkout solr 9.10
               if: matrix.solr == 9
               uses: actions/checkout@v4
               with:
                 repository: apache/solr
-                ref: branch_9_9
+                ref: branch_9_10
                 path: lucene-solr
 
             - name: Start Solr ${{ matrix.solr }} in ${{ matrix.mode }} mode
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [7.0.0]
+### Added
+- Solarium\QueryType\Extract\Query::setStreamType()
+
 ### Changed
  - Added `void` return type to `Solarium\Core\Plugin\PluginInterface::initPlugin()` method signature
  - Added `void` return type to `Solarium\Core\Plugin\PluginInterface::deinitPlugin()` method signature
diff --git a/docs/queries/extract-query.md b/docs/queries/extract-query.md
@@ -15,6 +15,7 @@ See the example code below.
 | omitheader    | boolean | true                          | Disable Solr headers (saves some overhead, as the values aren't actually used in most cases)                                                  |
 | extractonly   | boolean | false                         | If true, returns the extracted content from Tika without indexing the document                                                                |
 | extractformat | string  | null                          | Controls the serialization format of the extracted content. By default 'xml', the other option is 'text'. Only valid if 'extractonly' is true |
+| stream.type   | string  | null                          | Explicitly specify a MIME type for Tika                                                                                                       |
 ||
 
 Executing an extract query
diff --git a/src/QueryType/Extract/Query.php b/src/QueryType/Extract/Query.php
@@ -145,6 +145,30 @@ public function getFile()
         return $this->getOption('file');
     }
 
+    /**
+     * Set an explicit MIME type for Tika.
+     *
+     * @param string $type
+     *
+     * @return self Provides fluent interface
+     */
+    public function setStreamType(string $type): self
+    {
+        $this->setOption('stream.type', $type);
+
+        return $this;
+    }
+
+    /**
+     * Get the explicit MIME type for Tika.
+     *
+     * @return string|null
+     */
+    public function getStreamType(): ?string
+    {
+        return $this->getOption('stream.type');
+    }
+
     /**
      * Set the prefix for fields that are not defined in the schema.
      *
diff --git a/src/QueryType/Extract/RequestBuilder.php b/src/QueryType/Extract/RequestBuilder.php
@@ -41,6 +41,7 @@ public function build(QueryInterface|Query $query): Request
         $request->addParam('defaultField', $query->getDefaultField());
         $request->addParam('extractOnly', $query->getExtractOnly());
         $request->addParam('extractFormat', $query->getExtractFormat());
+        $request->addParam('stream.type', $query->getStreamType());
 
         foreach ($query->getFieldMappings() as $fromField => $toField) {
             $request->addParam('fmap.'.$fromField, $toField);
diff --git a/tests/Integration/AbstractTechproductsTestCase.php b/tests/Integration/AbstractTechproductsTestCase.php
@@ -4522,18 +4522,15 @@ public function testExtractIntoDocument(bool $usePostBigExtractRequestPlugin): v
 
         /** @var Document $document */
         $document = $iterator->current();
-        $this->assertSame('application/pdf', $document['content_type'][0], 'Written document does not contain extracted content type');
         $this->assertSame('PDF Test', trim($document['content'][0]), 'Written document does not contain extracted result');
         $this->assertSame(['bar 1'], $document['attr_foo_1']);
         $iterator->next();
         $document = $iterator->current();
-        $this->assertSame('text/html; charset=UTF-8', $document['content_type'][0], 'Written document does not contain extracted content type');
         $this->assertSame('HTML Test Title', $document['title'][0], 'Written document does not contain extracted title');
         $this->assertMatchesRegularExpression('/^HTML Test Title\s+HTML Test Body$/', trim($document['content'][0]), 'Written document does not contain extracted result');
         $this->assertSame(['bar 2'], $document['attr_foo_2']);
         $iterator->next();
         $document = $iterator->current();
-        $this->assertSame('text/html; charset=UTF-8', $document['content_type'][0], 'Written document does not contain extracted content type');
         $this->assertSame('HTML Stream Title', $document['title'][0], 'Written document does not contain extracted title');
         $this->assertMatchesRegularExpression('/^HTML Stream Title\s+HTML Stream Body$/', trim($document['content'][0]), 'Written document does not contain extracted result');
         $this->assertSame(['bar 3'], $document['attr_foo_3']);
diff --git a/tests/QueryType/Extract/QueryTest.php b/tests/QueryType/Extract/QueryTest.php
@@ -82,6 +82,12 @@ public function testSetAndGetFileResource(): void
         fclose($file);
     }
 
+    public function testSetAndGetStreamType(): void
+    {
+        $this->query->setStreamType('application/x-test');
+        $this->assertSame('application/x-test', $this->query->getStreamType());
+    }
+
     public function testSetAndGetUprefix(): void
     {
         $this->query->setUprefix('dyn_');
diff --git a/tests/QueryType/Extract/RequestBuilderTest.php b/tests/QueryType/Extract/RequestBuilderTest.php
@@ -127,6 +127,18 @@ public function testGetUri(): void
         );
     }
 
+    public function testGetUriWithStreamType(): void
+    {
+        $query = $this->query;
+        $query->setStreamType('application/x-test');
+        $request = $this->builder->build($query);
+        $this->assertSame(
+            'update/extract?omitHeader=true&param1=value1&wt=json&json.nl=flat&extractOnly=false&stream.type=application%2Fx-test&fmap.from-field=to-field'.
+            '&resource.name=RequestBuilderTest.php',
+            $request->getUri()
+        );
+    }
+
     public function testGetUriWithExtractFormat(): void
     {
         $query = $this->query;

Original file line number	Diff line number	Diff line change
`@@ -82,6 +82,12 @@ public function testSetAndGetFileResource(): void`
`82`	`82`	`fclose($file);`
`83`	`83`	`}`
`84`	`84`
	`85`	`+ public function testSetAndGetStreamType(): void`
	`86`	`+ {`
	`87`	`+ $this->query->setStreamType('application/x-test');`
	`88`	`+ $this->assertSame('application/x-test', $this->query->getStreamType());`
	`89`	`+ }`
	`90`	`+`
`85`	`91`	`public function testSetAndGetUprefix(): void`
`86`	`92`	`{`
`87`	`93`	`$this->query->setUprefix('dyn_');`