Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,20 +1,12 @@
DROP VIEW IF EXISTS first_type_title_for_entity;
DROP VIEW IF EXISTS last_type_title_for_entity;
DROP VIEW IF EXISTS type_title_for_entity;

DROP VIEW IF EXISTS first_label_for_entity;
DROP VIEW IF EXISTS last_label_for_entity;
DROP VIEW IF EXISTS label_for_entity;

DROP TABLE entity_embeddings;
DROP VIEW entity_is_of_type_ids;
DROP TABLE entity_is_of_type;
DROP VIEW entity_has_left_entity;
DROP VIEW entity_has_right_entity;
DROP TABLE entity_edge;
DROP TYPE EDGE_DIRECTION;
DROP TYPE ENTITY_EDGE_KIND;
DROP TABLE entity_temporal_metadata;
DROP TABLE entity_edition_cache;
DROP TABLE entity_editions;
DROP TABLE entity_drafts;
DROP TABLE entity_ids;
110 changes: 28 additions & 82 deletions libs/@local/graph/migrations/graph-migrations/v009__entities/up.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,34 @@ CREATE TABLE entity_editions (
confidence DOUBLE PRECISION
);

-- Denormalized per-edition cache of the sorting/filtering aggregates, rebuildable at
-- any time via `reindex_entity_cache`. The four type-derived arrays are positionally
-- aligned and cover ALL inheritance depths (type containment checks match supertypes),
-- ordered by (inheritance depth, title, base URL, version DESC) β€” the direct types form
-- the prefix of length `direct_types`, and `[1]` is the canonically first direct type.
-- `versions` carries the numeric versions for consumers needing base URL and version
-- separately (e.g. HashQL).
-- `labels` is resolved per direct type (label inheritance lives in each type's
-- `closed_schema.allOf`), ordered by (title, base URL, version DESC); `labels[1]` is
-- the display/sort label, NULL when the entity has none. Descending sorts reuse the
-- same element β€” no min/max flip.
CREATE TABLE entity_edition_cache (
entity_edition_id UUID PRIMARY KEY REFERENCES entity_editions ON DELETE CASCADE,
direct_types INT NOT NULL,
labels TEXT [],
type_titles TEXT [] NOT NULL,
base_urls TEXT [] NOT NULL,
versions BIGINT [] NOT NULL,
versioned_urls TEXT [] NOT NULL
);

-- Type filters arrive as containment checks (`@>`); labels/titles are only sorted or
-- projected, never filtered, so they carry no index.
CREATE INDEX entity_edition_cache_base_urls ON entity_edition_cache USING gin (base_urls);
CREATE INDEX entity_edition_cache_versioned_urls ON entity_edition_cache USING gin (
versioned_urls
);

CREATE TABLE entity_temporal_metadata (
web_id UUID NOT NULL,
entity_uuid UUID NOT NULL,
Expand Down Expand Up @@ -58,16 +86,6 @@ CREATE TABLE entity_is_of_type (
PRIMARY KEY (entity_edition_id, entity_type_ontology_id)
);

CREATE VIEW entity_is_of_type_ids AS
SELECT
entity_is_of_type.entity_edition_id,
array_agg(ontology_ids.base_url) AS base_urls,
array_agg(ontology_ids.version) AS versions
FROM entity_is_of_type
INNER JOIN ontology_ids ON entity_is_of_type.entity_type_ontology_id = ontology_ids.ontology_id
WHERE entity_is_of_type.inheritance_depth = 0
GROUP BY entity_is_of_type.entity_edition_id;

CREATE TYPE entity_edge_kind AS ENUM ('has-left-entity', 'has-right-entity');
CREATE TYPE edge_direction AS ENUM ('outgoing', 'incoming');

Expand Down Expand Up @@ -126,75 +144,3 @@ CREATE TABLE entity_embeddings (

CREATE UNIQUE INDEX entity_embeddings_idx
ON entity_embeddings (web_id, entity_uuid, property) NULLS NOT DISTINCT;


CREATE VIEW type_title_for_entity AS
SELECT
entity_temporal_metadata.entity_edition_id,
entity_types.schema ->> 'title' AS title
FROM entity_temporal_metadata
INNER JOIN entity_is_of_type
ON entity_temporal_metadata.entity_edition_id = entity_is_of_type.entity_edition_id
INNER JOIN ontology_temporal_metadata
ON entity_is_of_type.entity_type_ontology_id = ontology_temporal_metadata.ontology_id
INNER JOIN entity_types
ON ontology_temporal_metadata.ontology_id = entity_types.ontology_id
WHERE ontology_temporal_metadata.transaction_time @> now()
AND entity_is_of_type.inheritance_depth = 0;

CREATE VIEW first_type_title_for_entity AS
SELECT
type_title_for_entity.entity_edition_id,
min(type_title_for_entity.title) AS title
FROM type_title_for_entity
GROUP BY type_title_for_entity.entity_edition_id;

CREATE VIEW last_type_title_for_entity AS
SELECT
type_title_for_entity.entity_edition_id,
max(type_title_for_entity.title) AS title
FROM type_title_for_entity
GROUP BY type_title_for_entity.entity_edition_id;


CREATE VIEW label_for_entity AS
SELECT
entity_editions.entity_edition_id,
jsonb_extract_path(
entity_editions.properties,
jsonb_array_elements_text(
jsonb_path_query_array(
entity_types.closed_schema,
'$.allOf[*].labelProperty'
)
)
) AS label_property
FROM entity_editions
INNER JOIN entity_is_of_type
ON entity_editions.entity_edition_id = entity_is_of_type.entity_edition_id
INNER JOIN ontology_temporal_metadata
ON entity_is_of_type.entity_type_ontology_id = ontology_temporal_metadata.ontology_id
INNER JOIN entity_types
ON ontology_temporal_metadata.ontology_id = entity_types.ontology_id
WHERE ontology_temporal_metadata.transaction_time @> now()
AND entity_is_of_type.inheritance_depth = 0;

CREATE VIEW first_label_for_entity AS
SELECT
label_for_entity.entity_edition_id,
(array_agg(
label_for_entity.label_property
ORDER BY label_for_entity.label_property ASC
))[1] AS label_property
FROM label_for_entity
GROUP BY label_for_entity.entity_edition_id;

CREATE VIEW last_label_for_entity AS
SELECT
label_for_entity.entity_edition_id,
(array_agg(
label_for_entity.label_property
ORDER BY label_for_entity.label_property DESC
))[1] AS label_property
FROM label_for_entity
GROUP BY label_for_entity.entity_edition_id;
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
-- Denormalized per-edition cache of an entity's type/label aggregates. Computing these
-- inline forces the planner to re-evaluate the aggregation per row in entity-subgraph
-- queries; reading the cache instead is a single 1:1 join.
--
-- The cache is derived data: rows are inserted alongside `entity_is_of_type` writes and
-- can be fully rebuilt at any time (`reindex_entity_cache`), e.g. after in-place changes
-- to entity-type schemas. A row exists exactly for editions that have at least one
-- depth-0 entity type.
--
-- The four type-derived arrays (`type_titles`, `base_urls`, `versions`,
-- `versioned_urls`) are
-- positionally aligned and cover ALL inheritance depths so that type predicates
-- (containment via `@>`) match supertypes. Order is (inheritance depth, title,
-- base URL, version DESC); the entity's direct types therefore form the array prefix
-- of length `direct_types` (used to project `entityTypeIds`), and `[1]` is the
-- canonically first direct type, providing the type-title sort key. Titles are taken
-- from `entity_types` without the `transaction_time @> now()` filter, so the cache
-- does not depend on type archival state.
--
-- `labels` is resolved per DIRECT type (label inheritance already lives in each type's
-- `closed_schema.allOf`, nearest ancestor first), ordered by the canonical type order
-- (title, base URL, version DESC) with the `allOf` position as tie-breaker within one
-- type. `labels[1]` is the entity's display/sort label; NULL means the entity has no
-- label. Descending sorts use the same element β€” there is no min/max flip.
CREATE TABLE entity_edition_cache (
entity_edition_id UUID PRIMARY KEY REFERENCES entity_editions ON DELETE CASCADE,
direct_types INT NOT NULL,
labels TEXT [],
type_titles TEXT [] NOT NULL,
base_urls TEXT [] NOT NULL,
versions BIGINT [] NOT NULL,
versioned_urls TEXT [] NOT NULL
);

INSERT INTO entity_edition_cache (
entity_edition_id,
direct_types,
labels,
type_titles,
base_urls,
versions,
versioned_urls
)
SELECT
types.entity_edition_id,
types.direct_types,
labels.labels,
types.type_titles,
types.base_urls,
types.versions,
types.versioned_urls
FROM (
SELECT
entity_is_of_type.entity_edition_id,
count(*) FILTER (WHERE entity_is_of_type.inheritance_depth = 0) AS direct_types,
array_agg(entity_types.schema ->> 'title'
ORDER BY entity_is_of_type.inheritance_depth,
entity_types.schema ->> 'title', ontology_ids.base_url,
ontology_ids.version DESC
) AS type_titles,
array_agg(ontology_ids.base_url
ORDER BY entity_is_of_type.inheritance_depth,
entity_types.schema ->> 'title', ontology_ids.base_url,
ontology_ids.version DESC
) AS base_urls,
array_agg(ontology_ids.version
ORDER BY entity_is_of_type.inheritance_depth,
entity_types.schema ->> 'title', ontology_ids.base_url,
ontology_ids.version DESC
) AS versions,
array_agg(ontology_ids.base_url || 'v/' || ontology_ids.version
ORDER BY entity_is_of_type.inheritance_depth,
entity_types.schema ->> 'title', ontology_ids.base_url,
ontology_ids.version DESC
) AS versioned_urls
FROM entity_is_of_type
INNER JOIN ontology_ids
ON entity_is_of_type.entity_type_ontology_id = ontology_ids.ontology_id
INNER JOIN entity_types
ON ontology_ids.ontology_id = entity_types.ontology_id
GROUP BY entity_is_of_type.entity_edition_id
) AS types
LEFT JOIN (
SELECT
entity_is_of_type.entity_edition_id,
array_agg(label_value.label
ORDER BY entity_types.schema ->> 'title', ontology_ids.base_url,
ontology_ids.version DESC, label_value.ordinality
) FILTER (WHERE label_value.label IS NOT NULL) AS labels
FROM entity_is_of_type
INNER JOIN ontology_ids
ON entity_is_of_type.entity_type_ontology_id = ontology_ids.ontology_id
INNER JOIN entity_types
ON ontology_ids.ontology_id = entity_types.ontology_id
INNER JOIN entity_editions
ON entity_is_of_type.entity_edition_id = entity_editions.entity_edition_id
CROSS JOIN LATERAL (
SELECT
jsonb_extract_path(entity_editions.properties, label_path.path) #>> '{}' AS label,
label_path.ordinality
FROM jsonb_array_elements_text(jsonb_path_query_array(entity_types.closed_schema, '$.allOf[*].labelProperty'))
WITH ORDINALITY AS label_path (path, ordinality)
) AS label_value
WHERE entity_is_of_type.inheritance_depth = 0
GROUP BY entity_is_of_type.entity_edition_id
) AS labels
ON types.entity_edition_id = labels.entity_edition_id;

-- Type filters arrive as containment checks (`@>`); labels/titles are only sorted or
-- projected, never filtered, so they carry no index.
CREATE INDEX entity_edition_cache_base_urls ON entity_edition_cache USING gin (base_urls);
CREATE INDEX entity_edition_cache_versioned_urls ON entity_edition_cache USING gin (versioned_urls);

-- The cache replaces the per-row aggregate views for every consumer (query compiler and
-- HashQL), so they are dropped.
DROP VIEW first_label_for_entity;
DROP VIEW last_label_for_entity;
DROP VIEW label_for_entity;
DROP VIEW first_type_title_for_entity;
DROP VIEW last_type_title_for_entity;
DROP VIEW type_title_for_entity;
DROP VIEW entity_is_of_type_ids;
9 changes: 9 additions & 0 deletions libs/@local/graph/postgres-store/src/snapshot/entity/batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,15 @@ where
.await
.change_context(InsertionError)?;

// The cache derives `labels` from `entity_editions.properties`, so the normalized
// values written above would otherwise leave stale label entries behind.
if !edition_ids_updates.is_empty() {
postgres_client
.reindex_entity_cache()
.await
.change_context(InsertionError)?;
Comment thread
TimDiekmann marked this conversation as resolved.
}

Ok(())
}
}
Loading
Loading