Skip to content

feat: Add GIN index on Identity.identifier #5369

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/core/db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django.contrib.postgres.indexes import GinIndex


class UnrestrictedGinIndex(GinIndex):
max_name_length = 63
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Generated by Django 4.2.18 on 2025-04-22 10:15

from django.contrib.postgres.operations import TrigramExtension
from django.db import migrations
import django.contrib.postgres.indexes
import django.db.models.functions.text

from core.migration_helpers import PostgresOnlyRunSQL
import core.db


_create_index_sql = "CREATE INDEX CONCURRENTLY IF NOT EXISTS environments_identity_identifier_gin_idx ON environments_identity USING GIN (UPPER(identifier) gin_trgm_ops);"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_create_index_reverse_sql = (
"DROP INDEX CONCURRENTLY IF EXISTS environments_identity_identifier_gin_idx;"
)


class Migration(migrations.Migration):
atomic = False

dependencies = [
("identities", "0002_alter_identity_index_together"),
]

operations = [
migrations.SeparateDatabaseAndState(
state_operations=[
migrations.AddIndex(
model_name="identity",
index=core.db.UnrestrictedGinIndex(
django.contrib.postgres.indexes.OpClass(
django.db.models.functions.text.Upper("identifier"),
name="gin_trgm_ops",
),
name="environments_identity_identifier_gin_idx",
),
),
],
database_operations=[
TrigramExtension(),
PostgresOnlyRunSQL(
sql=_create_index_sql,
reverse_sql=_create_index_reverse_sql,
),
],
),
]
9 changes: 9 additions & 0 deletions api/environments/identities/models.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import typing
from itertools import chain

from django.contrib.postgres.indexes import OpClass
from django.db import models
from django.db.models import Prefetch, Q
from django.db.models.functions import Upper
from django.utils import timezone
from flag_engine.segments.evaluator import evaluate_identity_in_segment

from core.db import UnrestrictedGinIndex
from environments.identities.managers import IdentityManager
from environments.identities.traits.models import Trait
from environments.models import Environment
Expand Down Expand Up @@ -40,6 +43,12 @@ class Meta:
# avoid any downtime. If people using MySQL / Oracle have issues with poor performance on the identities table,
# we can provide them the SQL to add it manually in a small window of downtime.
index_together = (("environment", "created_date"),)
indexes = [
UnrestrictedGinIndex(
OpClass(Upper("identifier"), name="gin_trgm_ops"),
name="environments_identity_identifier_gin_idx",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Can we shorten the index name to just identifier_gin_idx to avoid having to subclass? It's not immediately clear what UnrestrictedGinIndex means just from the class name. If we do keep that class, could we add a docstring to explain its purpose?

),
]

def natural_key(self): # type: ignore[no-untyped-def]
return self.identifier, self.environment.api_key
Expand Down
Loading