Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"php": "^7.4 || ^8.0",
"ext-json": "*",
"ext-xmlwriter": "*",
"ibexa/core": "~4.6.0@dev",
"ibexa/core": "dev-taxonomy-suggestions as 4.6.x-dev",
"netgen/query-translator": "^1.0.2",
"symfony/http-kernel": "^5.0",
"symfony/dependency-injection": "^5.0",
Expand Down
16 changes: 16 additions & 0 deletions src/contracts/Query/EmbeddingVisitor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
namespace Ibexa\Contracts\Solr\Query;

use Ibexa\Contracts\Core\Repository\Values\Content\Query\Embedding;

abstract class EmbeddingVisitor
{
abstract public function canVisit(Embedding $embedding): bool;

abstract public function visit(Embedding $embedding, int $limit): string;
}
59 changes: 59 additions & 0 deletions src/lib/Query/Common/EmbeddingVisitor/Aggregate.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
namespace Ibexa\Solr\Query\Common\EmbeddingVisitor;

use Ibexa\Contracts\Core\Repository\Exceptions\NotImplementedException;
use Ibexa\Contracts\Core\Repository\Values\Content\Query\Embedding;
use Ibexa\Contracts\Solr\Query\EmbeddingVisitor;

final class Aggregate extends EmbeddingVisitor
{
/**
* @var iterable<\Ibexa\Contracts\Solr\Query\EmbeddingVisitor>
*/
protected iterable $visitors = [];

/**
* @param \Ibexa\Contracts\Solr\Query\EmbeddingVisitor[] $visitors
*/
public function __construct(iterable $visitors = [])
{
$this->visitors = $visitors;
}

public function canVisit(Embedding $embedding): bool
{
return $this->findVisitor($embedding) !== null;
}

/**
* Map field value to a proper Solr representation.
*
* @throws \Ibexa\Contracts\Core\Repository\Exceptions\NotImplementedException
*/
public function visit(Embedding $embedding, int $limit): string
{
foreach ($this->visitors as $visitor) {
if ($visitor->canVisit($embedding)) {
return $visitor->visit($embedding, $limit);
}
}

throw new NotImplementedException('No visitor available for: ' . \get_class($embedding));
}

private function findVisitor(Embedding $embedding): ?EmbeddingVisitor
{
foreach ($this->visitors as $visitor) {
if ($visitor->canVisit($embedding)) {
return $visitor;
}
}

return null;
}
}
14 changes: 12 additions & 2 deletions src/lib/Query/Common/QueryConverter/NativeQueryConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
*/
namespace Ibexa\Solr\Query\Common\QueryConverter;

use Ibexa\Contracts\Core\Repository\Values\Content\EmbeddingQuery;
use Ibexa\Contracts\Core\Repository\Values\Content\Query;
use Ibexa\Contracts\Solr\Query\AggregationVisitor;
use Ibexa\Contracts\Solr\Query\CriterionVisitor;
use Ibexa\Contracts\Solr\Query\EmbeddingVisitor;
use Ibexa\Contracts\Solr\Query\SortClauseVisitor;
use Ibexa\Solr\Query\FacetFieldVisitor;
use Ibexa\Solr\Query\QueryConverter;
Expand Down Expand Up @@ -44,6 +46,8 @@ class NativeQueryConverter extends QueryConverter
*/
private $aggregationVisitor;

private EmbeddingVisitor $embeddingVisitor;

/**
* Construct from visitors.
*
Expand All @@ -55,26 +59,32 @@ public function __construct(
CriterionVisitor $criterionVisitor,
SortClauseVisitor $sortClauseVisitor,
FacetFieldVisitor $facetBuilderVisitor,
AggregationVisitor $aggregationVisitor
AggregationVisitor $aggregationVisitor,
EmbeddingVisitor $embeddingVisitor
) {
$this->criterionVisitor = $criterionVisitor;
$this->sortClauseVisitor = $sortClauseVisitor;
$this->facetBuilderVisitor = $facetBuilderVisitor;
$this->aggregationVisitor = $aggregationVisitor;
$this->embeddingVisitor = $embeddingVisitor;
}

public function convert(Query $query, array $languageSettings = [])
{
$params = [
'q' => '{!lucene}' . $this->criterionVisitor->visit($query->query),
'fq' => '{!lucene}' . $this->criterionVisitor->visit($query->filter),
'fq' => ['{!lucene}' . $this->criterionVisitor->visit($query->filter)],
'sort' => $this->getSortClauses($query->sortClauses),
'start' => $query->offset,
'rows' => $query->limit,
'fl' => '*,score,[shard]',
'wt' => 'json',
];

if ($query instanceof EmbeddingQuery && $query->getEmbedding() !== null) {
$params['fq'][] = $this->embeddingVisitor->visit($query->getEmbedding(), $query->limit);
}

$facetParams = $this->getFacetParams($query->facetBuilders);
if (!empty($facetParams)) {
$params['facet'] = 'true';
Expand Down
2 changes: 2 additions & 0 deletions src/lib/Resources/config/container/solr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ services:
- '@ibexa.solr.query.content.sort_clause_visitor.aggregate'
- '@ibexa.solr.query.content.facet_builder_visitor.aggregate'
- '@ibexa.solr.query.content.aggregation_visitor.dispatcher'
- '@Ibexa\Solr\Query\Common\EmbeddingVisitor\Aggregate'

ibexa.solr.query_converter.location:
class: Ibexa\Solr\Query\Common\QueryConverter\NativeQueryConverter
Expand All @@ -108,6 +109,7 @@ services:
- '@ibexa.solr.query.location.sort_clause_visitor.aggregate'
- '@ibexa.solr.query.location.facet_builder_visitor.aggregate'
- '@ibexa.solr.query.location.aggregation_visitor.dispatcher'
- '@Ibexa\Solr\Query\Common\EmbeddingVisitor\Aggregate'

Ibexa\Solr\Gateway\UpdateSerializer:
arguments:
Expand Down
4 changes: 4 additions & 0 deletions src/lib/Resources/config/container/solr/services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ services:
arguments:
$client: '@ibexa.solr.http_client'

Ibexa\Solr\Query\Common\EmbeddingVisitor\Aggregate:
arguments:
$visitors: !tagged ibexa.search.solr.query.content.embedding.visitor

# Note: services tagged with 'ibexa.search.solr.query.content.criterion.visitor'
# are registered to this one using compilation pass
ibexa.solr.query.content.criterion_visitor.aggregate:
Expand Down
169 changes: 169 additions & 0 deletions src/lib/Resources/config/solr/managed-schema.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE schema [
<!ENTITY langfields SYSTEM "language-fieldtypes.xml">
<!ENTITY customfields SYSTEM "custom-fields-types.xml">
]>
<!--
This is the Solr schema file. This file should be named "schema.xml" and should
be in the conf directory under the solr home (i.e. ./solr/conf/schema.xml by
default) or located where the classloader for the Solr webapp can find it.

It provides the default types and definitions for a functional Solr based
search in eZ Publish 5. You may extend it with your own definitions, but you
should not remove or drastically change the existing definitions.
-->

<schema name="eZ Publish 5 base schema" version="1.5">
<!--
language specific field types are included here, there should be at least
a field type with the name "text" be defined"
Included in the eZ platform distribution are configurations for various
languages, including additional files like stopwords or other features
under the directory "solr.languages"
-->
&langfields;

<!--
custom field types and fields are included from a separate file to ease upgrades
-->
&customfields;

<!--
Default types by Solr. Will be reused for dynamic fields.
-->
<fieldType name="string" class="solr.TextField" sortMissingLast="true">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true" sortMissingLast="true">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
<!--
Numeric field types that index values using KD-trees.
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
-->
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>

<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
<fieldType name="random" class="solr.RandomSortField" indexed="true"/>

<fieldType name="identifier" class="solr.StrField" sortMissingLast="true" />
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" multiValued="false"/>
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="int" class="solr.IntPointField" docValues="true"/>
<fieldType name="float" class="solr.FloatPointField" docValues="true"/>
<fieldType name="long" class="solr.LongPointField" docValues="true"/>
<fieldType name="double" class="solr.DoublePointField" docValues="true"/>
<fieldType name="date" class="solr.DatePointField" docValues="true"/>

<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
<fieldType name="location" class="solr.LatLonPointSpatialField" sortMissingLast="true"/>

<!-- for 1536-dim models (ada-002 & 3-small) -->
<fieldType name="vector_1536"
class="solr.DenseVectorField"
vectorDimension="1536"
similarityFunction="cosine"
indexed="true"
stored="true" />

<!-- for the 3072-dim model (3-large) -->
<fieldType name="vector_3072"
class="solr.DenseVectorField"
vectorDimension="3072"
similarityFunction="cosine"
indexed="true"
stored="true"/>

<!--
Required ID field.
-->
<field name="id" type="string" indexed="true" stored="true" required="true"/>

<!--
Always contains the date a document was added to the index. Might be
useful.
-->
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>

<!--
Points to the root document of a block of nested documents. Required for nested document support.
-->
<field name="_root_" type="string" indexed="true" stored="true" required="false"/>

<field name="document_type_id" type="string" indexed="true" stored="true" required="true"/>

<!--
Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns. RESTRICTION: the
glob-like pattern in the name attribute must have a "*" only at the start
or the end. EXAMPLE: name="*_i" will match any field ending in _i (like
myid_i, z_i) Longer patterns will be matched first. if equal size
patterns both match, the first appearing in the schema will be used.
-->
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
<dynamicField name="*_mi" type="int" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_id" type="identifier" indexed="true" stored="true"/>
<dynamicField name="*_mid" type="identifier" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_ms" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true" multiValued="true" omitNorms="false"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_mb" type="boolean" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_gl" type="location" indexed="true" stored="true"/>
<dynamicField name="*_gl_0_coordinate" type="double" indexed="true" stored="true"/>
<dynamicField name="*_gl_1_coordinate" type="double" indexed="true" stored="true"/>

<!--
This field is required to allow random sorting
-->
<dynamicField name="random*" type="random" indexed="true" stored="false"/>

<!--
This field is required for Embeddings
-->
<!-- 1536-dim suffix for ada-002 -->
<dynamicField name="*_ada002_dv" type="vector_1536"/>

<!-- 1536-dim suffix for 3-small -->
<dynamicField name="*_3small_dv" type="vector_1536"/>

<!-- 3072-dim suffix for 3-large -->
<dynamicField name="*_3large_dv" type="vector_3072"/>

<!--
This field is required since Solr 4
-->
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />

<uniqueKey>id</uniqueKey>
</schema>
Loading
Loading