Skip to content

Database ERD - PyCharter Schema

This document provides an Entity Relationship Diagram (ERD) for the PyCharter database schema.

ERD Diagram

erDiagram
    %% Core Component Tables
    schemas {
        uuid id PK
        string title
        uuid data_contract_id FK
        string version
        json schema_data
        datetime created_at
        datetime updated_at
    }

    coercion_rules {
        uuid id PK
        string title
        uuid data_contract_id FK
        string description
        string version
        json rules
        uuid schema_id FK
        datetime created_at
        datetime updated_at
    }

    validation_rules {
        uuid id PK
        string title
        uuid data_contract_id FK
        string description
        string version
        json rules
        uuid schema_id FK
        datetime created_at
        datetime updated_at
    }

    metadata_records {
        uuid id PK
        string title
        uuid data_contract_id FK
        string version
        string status
        string type
        text description
        json governance_rules
        datetime created_at
        datetime updated_at
        string created_by
        string updated_by
    }

    owners {
        string id PK
        string name
        string email UK
        string team
        json additional_info
        datetime created_at
        datetime updated_at
    }

    %% Entity Tables
    systems {
        uuid id PK
        string name UK
        string app_id
        text description
        datetime created_at
        datetime updated_at
    }

    domains {
        uuid id PK
        string name UK
        text description
        datetime created_at
        datetime updated_at
    }

    %% Central Join Table
    data_contracts {
        uuid id PK
        string name
        string version
        string status
        text description
        uuid schema_id FK
        uuid coercion_rules_id FK
        uuid validation_rules_id FK
        uuid metadata_record_id FK
        string schema_version
        string coercion_rules_version
        string validation_rules_version
        string metadata_version
        datetime created_at
        datetime updated_at
        string created_by
        string updated_by
    }

    %% Join Tables (Many-to-Many)
    metadata_record_system_pulls {
        uuid id PK
        uuid metadata_record_id FK
        uuid system_id FK
        datetime created_at
    }

    metadata_record_system_pushes {
        uuid id PK
        uuid metadata_record_id FK
        uuid system_id FK
        datetime created_at
    }

    metadata_record_system_sources {
        uuid id PK
        uuid metadata_record_id FK
        uuid system_id FK
        datetime created_at
    }

    metadata_record_domains {
        uuid id PK
        uuid metadata_record_id FK
        uuid domain_id FK
        datetime created_at
    }

    %% Ownership Join Tables (Many-to-Many)
    metadata_record_business_owners {
        uuid id PK
        uuid metadata_record_id FK
        string owner_id FK
        datetime created_at
    }

    metadata_record_bu_sme {
        uuid id PK
        uuid metadata_record_id FK
        string owner_id FK
        datetime created_at
    }

    metadata_record_it_application_owners {
        uuid id PK
        uuid metadata_record_id FK
        string owner_id FK
        datetime created_at
    }

    metadata_record_it_sme {
        uuid id PK
        uuid metadata_record_id FK
        string owner_id FK
        datetime created_at
    }

    metadata_record_support_lead {
        uuid id PK
        uuid metadata_record_id FK
        string owner_id FK
        datetime created_at
    }

    %% Quality Assurance Tables
    quality_metrics {
        uuid id PK
        string schema_id
        string schema_version
        uuid data_contract_id FK
        float overall_score
        float violation_rate
        float completeness
        float accuracy
        integer record_count
        integer valid_count
        integer invalid_count
        integer violation_count
        json field_scores
        json threshold_breaches
        string passed
        datetime check_timestamp
        json additional_metadata
        datetime created_at
        datetime updated_at
        string created_by
    }

    quality_violations {
        uuid id PK
        string schema_id
        string schema_version
        uuid data_contract_id FK
        string record_identifier
        json record_data
        string field_name
        string error_type
        text error_message
        string severity
        string status
        datetime resolved_at
        string resolved_by
        datetime check_timestamp
        json additional_metadata
        datetime created_at
        datetime updated_at
    }

    %% Relationships - Core Components (Many-to-One from components to data_contracts)
    schemas }o--|| data_contracts : "belongs_to"
    coercion_rules }o--|| data_contracts : "belongs_to"
    validation_rules }o--|| data_contracts : "belongs_to"
    metadata_records }o--|| data_contracts : "belongs_to"

    %% Relationships - Schema Components (Many-to-One from rules to schemas)
    coercion_rules }o--o| schemas : "optional_link"
    validation_rules }o--o| schemas : "optional_link"

    %% Relationships - Many-to-Many (via Join Tables)
    metadata_records ||--o{ metadata_record_system_pulls : "pulls_from"
    systems ||--o{ metadata_record_system_pulls : "pulled_by"

    metadata_records ||--o{ metadata_record_system_pushes : "pushes_to"
    systems ||--o{ metadata_record_system_pushes : "pushed_to"

    metadata_records ||--o{ metadata_record_system_sources : "sources_from"
    systems ||--o{ metadata_record_system_sources : "sourced_by"

    metadata_records ||--o{ metadata_record_domains : "belongs_to"
    domains ||--o{ metadata_record_domains : "contains"

    metadata_records ||--o{ metadata_record_business_owners : "has"
    owners ||--o{ metadata_record_business_owners : "owns"

    metadata_records ||--o{ metadata_record_bu_sme : "has"
    owners ||--o{ metadata_record_bu_sme : "is_sme"

    metadata_records ||--o{ metadata_record_it_application_owners : "has"
    owners ||--o{ metadata_record_it_application_owners : "is_it_owner"

    metadata_records ||--o{ metadata_record_it_sme : "has"
    owners ||--o{ metadata_record_it_sme : "is_it_sme"

    metadata_records ||--o{ metadata_record_support_lead : "has"
    owners ||--o{ metadata_record_support_lead : "is_support_lead"

    %% Relationships - Quality Assurance (Many-to-One to data_contracts)
    quality_metrics }o--o| data_contracts : "optional_link"
    quality_violations }o--o| data_contracts : "optional_link"

Table Descriptions

Core Component Tables

schemas

Stores JSON Schema definitions that define the structure and validation rules for data.

Model: SchemaModel (file: schema.py)

Key Fields: - id: Primary key (UUID) - title: Schema title - data_contract_id: Foreign key to data_contracts (required) - version: Schema version (e.g., "1.0.0") - schema_data: JSON schema definition

Unique Constraint: (data_contract_id, version)

Relationships: - Many-to-one with data_contracts (required) - One-to-many with coercion_rules (optional) - One-to-many with validation_rules (optional)

coercion_rules

Defines data type coercion rules for schema fields.

Model: CoercionRuleModel (file: coercion_rule.py)

Key Fields: - id: Primary key (UUID) - title: Rule title - data_contract_id: Foreign key to data_contracts (required) - version: Rules version - rules: JSON object containing coercion rules - schema_id: Optional foreign key to schemas

Unique Constraint: (data_contract_id, version)

Relationships: - Many-to-one with data_contracts (required, via data_contract_id) - Many-to-one with schemas (optional, via schema_id)

validation_rules

Defines validation rules for schema fields.

Model: ValidationRuleModel (file: validation_rule.py)

Key Fields: - id: Primary key (UUID) - title: Rule title - data_contract_id: Foreign key to data_contracts (required) - version: Rules version - rules: JSON object containing validation rules - schema_id: Optional foreign key to schemas

Unique Constraint: (data_contract_id, version)

Relationships: - Many-to-one with data_contracts (required, via data_contract_id) - Many-to-one with schemas (optional, via schema_id)

metadata_records

Stores comprehensive metadata including audit information, data lineage, governance rules, and ownership.

Model: MetadataRecordModel (file: metadata_record.py)

Key Fields: - id: Primary key (UUID) - title: Metadata title - data_contract_id: Foreign key to data_contracts (required) - version: Metadata version - status: e.g., "active", "deprecated", "draft" - type: e.g., "object" - description: Text description - governance_rules: JSON object containing governance rules - created_at, updated_at, created_by, updated_by: Audit fields

Unique Constraint: (data_contract_id, version)

Relationships: - Many-to-one with data_contracts (required) - Many-to-many with systems via: - metadata_record_system_pulls (pulls_from) - metadata_record_system_pushes (pushes_to) - metadata_record_system_sources (system_sources) - Many-to-many with domains via metadata_record_domains - Many-to-many with owners via: - metadata_record_business_owners (business_owners) - metadata_record_bu_sme (bu_sme) - metadata_record_it_application_owners (it_application_owners) - metadata_record_it_sme (it_sme) - metadata_record_support_lead (support_lead)

Note: Governance rules are stored as JSON in this table, not as a separate table. Ownership relationships are stored in join tables linking to the owners table.

owners

Lookup table of owner entities (people/teams) that can be referenced by metadata_records.

Model: OwnerModel (file: owner.py)

Key Fields: - id: Primary key (string, e.g., "lincoln_mak", "operations-team") - name: Display name (optional, defaults to id if not provided) - email: Owner email address (unique if provided) - team: Team name (e.g., "data-engineering") - additional_info: JSON object with additional owner metadata

Unique Constraint: (email) if email is provided

Relationships: - Many-to-many with metadata_records via: - metadata_record_business_owners (business_owners) - metadata_record_bu_sme (bu_sme) - metadata_record_it_application_owners (it_application_owners) - metadata_record_it_sme (it_sme) - metadata_record_support_lead (support_lead)

Note: This table stores owner entities. Ownership relationships are stored in join tables linking metadata_records to owners.

Entity Tables

systems

Represents external systems that metadata records interact with (pulls_from, pushes_to, system_sources).

Model: SystemModel (file: system.py)

Key Fields: - id: Primary key (UUID) - name: Unique system name (e.g., "1811", "ICC", "1846") - app_id: Application ID for the system (optional) - description: System description

Unique Constraint: (name)

Relationships: - Many-to-many with metadata_records via: - metadata_record_system_pulls (pulls_from) - metadata_record_system_pushes (pushes_to) - metadata_record_system_sources (system_sources)

domains

Represents business domains that metadata records belong to.

Model: DomainModel (file: domain.py)

Key Fields: - id: Primary key (UUID) - name: Unique domain name (e.g., "IOC") - description: Domain description

Unique Constraint: (name)

Relationships: - Many-to-many with metadata_records via metadata_record_domains

Central Join Table

data_contracts

The central table that links all components of a data contract. Each contract represents a versioned contract for a specific dataset.

Model: DataContractModel (file: data_contract.py)

Key Fields: - id: Primary key (UUID) - name: Contract name (e.g., "template_contract") - version: Contract version (e.g., "1.0.0") - status: Contract status (e.g., "active", "deprecated", "draft") - description: Contract description - schema_id: Foreign key to schemas (optional, nullable) - coercion_rules_id: Foreign key to coercion_rules (optional) - validation_rules_id: Foreign key to validation_rules (optional) - metadata_record_id: Foreign key to metadata_records (optional) - schema_version, coercion_rules_version, validation_rules_version, metadata_version: Version tracking - created_at, updated_at, created_by, updated_by: Audit fields

Unique Constraint: (name, version)

Relationships: - One-to-many with schemas (optional, nullable) - One-to-many with coercion_rules (optional) - One-to-many with validation_rules (optional) - One-to-many with metadata_records (optional)

Note: Ownership and governance rules are stored within metadata_records, not as separate foreign keys in data_contracts.

Join Tables (Many-to-Many Relationships)

metadata_record_system_pulls

Links metadata_records to systems they pull data from.

Model: MetadataRecordSystemPull

Key Fields: - id: Primary key (UUID) - metadata_record_id: Foreign key to metadata_records - system_id: Foreign key to systems - created_at: Timestamp

Unique Constraint: (metadata_record_id, system_id)

metadata_record_system_pushes

Links metadata_records to systems they push data to.

Model: MetadataRecordSystemPush

Key Fields: - id: Primary key (UUID) - metadata_record_id: Foreign key to metadata_records - system_id: Foreign key to systems - created_at: Timestamp

Unique Constraint: (metadata_record_id, system_id)

metadata_record_system_sources

Links metadata_records to their source systems.

Model: MetadataRecordSystemSource

Key Fields: - id: Primary key (UUID) - metadata_record_id: Foreign key to metadata_records - system_id: Foreign key to systems - created_at: Timestamp

Unique Constraint: (metadata_record_id, system_id)

metadata_record_domains

Links metadata_records to domains.

Model: MetadataRecordDomain

Key Fields: - id: Primary key (UUID) - metadata_record_id: Foreign key to metadata_records - domain_id: Foreign key to domains - created_at: Timestamp

Unique Constraint: (metadata_record_id, domain_id)

metadata_record_business_owners

Links metadata_records to owners as business owners.

Model: MetadataRecordBusinessOwner

Key Fields: - id: Primary key (UUID) - metadata_record_id: Foreign key to metadata_records - owner_id: Foreign key to owners (string) - created_at: Timestamp

Unique Constraint: (metadata_record_id, owner_id)

metadata_record_bu_sme

Links metadata_records to owners as BU SMEs (Business Unit Subject Matter Experts).

Model: MetadataRecordBUSME

Key Fields: - id: Primary key (UUID) - metadata_record_id: Foreign key to metadata_records - owner_id: Foreign key to owners (string) - created_at: Timestamp

Unique Constraint: (metadata_record_id, owner_id)

metadata_record_it_application_owners

Links metadata_records to owners as IT Application Owners.

Model: MetadataRecordITApplicationOwner

Key Fields: - id: Primary key (UUID) - metadata_record_id: Foreign key to metadata_records - owner_id: Foreign key to owners (string) - created_at: Timestamp

Unique Constraint: (metadata_record_id, owner_id)

metadata_record_it_sme

Links metadata_records to owners as IT SMEs (IT Subject Matter Experts).

Model: MetadataRecordITSME

Key Fields: - id: Primary key (UUID) - metadata_record_id: Foreign key to metadata_records - owner_id: Foreign key to owners (string) - created_at: Timestamp

Unique Constraint: (metadata_record_id, owner_id)

metadata_record_support_lead

Links metadata_records to owners as Support Leads.

Model: MetadataRecordSupportLead

Key Fields: - id: Primary key (UUID) - metadata_record_id: Foreign key to metadata_records - owner_id: Foreign key to owners (string) - created_at: Timestamp

Unique Constraint: (metadata_record_id, owner_id)

Quality Assurance Tables

quality_metrics

Stores quality check results and metrics for data quality assurance.

Model: QualityMetricModel (file: quality_metric.py)

Key Fields: - id: Primary key (UUID) - schema_id: Schema identifier (string, indexed) - schema_version: Schema version (optional) - data_contract_id: Foreign key to data_contracts (optional, indexed) - overall_score: Overall quality score (0-100, float) - violation_rate: Violation rate (0-1, float) - completeness: Completeness score (0-1, float) - accuracy: Accuracy score (0-1, float) - record_count: Total number of records checked (integer) - valid_count: Number of valid records (integer) - invalid_count: Number of invalid records (integer) - violation_count: Total number of violations (integer) - field_scores: JSON object with per-field quality scores - threshold_breaches: JSON array of threshold breach messages - passed: Whether quality check passed ("true" or "false") - check_timestamp: Timestamp when quality check was run (indexed) - additional_metadata: JSON object with additional context - created_at, updated_at, created_by: Audit fields

Indexes: - schema_id - data_contract_id - check_timestamp

Relationships: - Many-to-one with data_contracts (optional, via data_contract_id)

Note: This table stores historical quality metrics for trend analysis and monitoring. Each quality check creates a new record with timestamp.

quality_violations

Stores individual data quality violations detected during quality checks.

Model: QualityViolationModel (file: quality_violation.py)

Key Fields: - id: Primary key (UUID) - schema_id: Schema identifier (string, indexed) - schema_version: Schema version (optional) - data_contract_id: Foreign key to data_contracts (optional, indexed) - record_identifier: Unique identifier for the violating record (string, indexed) - record_data: JSON snapshot of the violating record - field_name: Field name where violation occurred (string, indexed) - error_type: Type of error (string, indexed, e.g., "validation_error", "missing_field", "type_error") - error_message: Detailed error message (text) - severity: Violation severity (string, indexed, e.g., "critical", "warning", "info") - status: Violation status (string, indexed, e.g., "open", "resolved", "ignored") - resolved_at: Timestamp when violation was resolved (optional) - resolved_by: User/process that resolved the violation (optional) - check_timestamp: Timestamp when violation was detected (indexed) - additional_metadata: JSON object with additional context - created_at, updated_at: Audit fields

Indexes: - schema_id - data_contract_id - record_identifier - field_name - error_type - severity - status - check_timestamp

Relationships: - Many-to-one with data_contracts (optional, via data_contract_id)

Note: This table stores individual violations for tracking and resolution. Multiple violations can be recorded for a single record if it has multiple field-level issues. Violations can be filtered by schema, status, severity, and date range for efficient querying.

Relationship Summary

  1. Many-to-One Relationships:
  2. schemasdata_contracts (required, via data_contract_id)
  3. coercion_rulesdata_contracts (required, via data_contract_id)
  4. validation_rulesdata_contracts (required, via data_contract_id)
  5. metadata_recordsdata_contracts (required, via data_contract_id)
  6. coercion_rulesschemas (optional, via schema_id)
  7. validation_rulesschemas (optional, via schema_id)

  8. One-to-Many Relationships (via foreign keys in data_contracts):

  9. data_contractsschemas (optional, via schema_id)
  10. data_contractscoercion_rules (optional, via coercion_rules_id)
  11. data_contractsvalidation_rules (optional, via validation_rules_id)
  12. data_contractsmetadata_records (optional, via metadata_record_id)

  13. Many-to-Many Relationships (via join tables):

  14. metadata_recordssystems (pulls_from, pushes_to, system_sources)
  15. metadata_recordsdomains (belongs_to)
  16. metadata_recordsowners (business_owners, bu_sme, it_application_owners, it_sme, support_lead)

  17. Quality Assurance Relationships:

  18. quality_metricsdata_contracts (optional, via data_contract_id)
  19. quality_violationsdata_contracts (optional, via data_contract_id)

Unique Constraints

  • schemas: (data_contract_id, version)
  • coercion_rules: (data_contract_id, version)
  • validation_rules: (data_contract_id, version)
  • metadata_records: (data_contract_id, version)
  • data_contracts: (name, version)
  • systems: (name)
  • domains: (name)
  • owners: (id) (string primary key), (email) if email is provided
  • All join tables have unique constraints on their foreign key combinations
  • quality_metrics: No unique constraint (multiple metrics per schema allowed for historical tracking)
  • quality_violations: No unique constraint (multiple violations per record allowed)

Naming Convention

  • Tables: Plural (e.g., schemas, coercion_rules, metadata_records)
  • Files: Singular (e.g., schema.py, coercion_rule.py, metadata_record.py)
  • Classes: Singular with Model suffix (e.g., SchemaModel, CoercionRuleModel, MetadataRecordModel)

Notes

  • All tables are in the pycharter schema (not the default public schema)
  • All primary keys use UUID type (not integers)
  • Foreign keys use appropriate cascade behaviors (CASCADE for required relationships, SET NULL for optional)
  • All tables include audit fields (created_at, updated_at) where applicable
  • The data_contracts table is the central hub that connects all components
  • Systems and domains are normalized into separate entity tables to avoid duplication
  • Ownership information is stored in join tables linking metadata_records to owners table
  • Governance rules are stored as JSON in metadata_records table, not as a separate table
  • The owners table is a lookup table for owner entities; ownership relationships are stored in join tables

Core Architecture

The schema is organized into:

  1. Component Tables: Store the actual contract components (schema, rules, metadata)
  2. Entity Tables: Store reference entities (systems, domains, owners)
  3. Data Contract Table: Central table that links all components together
  4. Join Tables: Many-to-many relationship tables for systems and domains
  5. Quality Assurance Tables: Store quality metrics and violations for data quality monitoring

The schema is based on the template data structure found in data/examples/template/.


Detailed Field Descriptions

Component Tables

schemas

Fields: - id (PK, UUID) - title - Schema title - data_contract_id (FK to data_contracts) - Required - version - Schema version (e.g., "1.0.0") - schema_data (JSON) - Complete JSON Schema object - created_at, updated_at

Relationships: - Many-to-one with data_contracts (required, via data_contract_id) - One-to-many with coercion_rules (optional, via schema_id in coercion_rules) - One-to-many with validation_rules (optional, via schema_id in validation_rules)

coercion_rules

Fields: - id (PK, UUID) - title - Rule title - data_contract_id (FK to data_contracts) - Required - description - version - Rule version - rules (JSON) - Dict mapping field names to coercion functions - schema_id (FK to schemas) - Optional - created_at, updated_at

Relationships: - Many-to-one with data_contracts (required, via data_contract_id) - Many-to-one with schemas (optional, via schema_id)

validation_rules

Fields: - id (PK, UUID) - title - Rule title - data_contract_id (FK to data_contracts) - Required - description - version - Rule version - rules (JSON) - Dict mapping field names to validation configs - schema_id (FK to schemas) - Optional - created_at, updated_at

Relationships: - Many-to-one with data_contracts (required, via data_contract_id) - Many-to-one with schemas (optional, via schema_id)

metadata_records

Fields: - id (PK, UUID) - title - Metadata title - data_contract_id (FK to data_contracts) - Required - version - Metadata version - status - e.g., "active", "deprecated", "draft" - type - e.g., "object" - description - created_at, updated_at, created_by, updated_by - governance_rules (JSON) - Full governance rules object

Relationships: - Many-to-one with data_contracts (required, via data_contract_id) - Many-to-many with systems via: - metadata_record_system_pulls (pulls_from) - metadata_record_system_pushes (pushes_to) - metadata_record_system_sources (system_sources) - Many-to-many with domains via metadata_record_domains - Many-to-many with owners via: - metadata_record_business_owners (business_owners) - metadata_record_bu_sme (bu_sme) - metadata_record_it_application_owners (it_application_owners) - metadata_record_it_sme (it_sme) - metadata_record_support_lead (support_lead)

owners

Fields: - id (PK, String) - Owner identifier (e.g., "lincoln_mak", "operations-team") - name - Display name (optional, defaults to id if not provided) - email - Owner email address (unique if provided) - team - Team name (e.g., "data-engineering") - additional_info (JSON) - Any additional owner metadata - created_at, updated_at

Relationships: - Many-to-many with metadata_records via: - metadata_record_business_owners (business_owners) - metadata_record_bu_sme (bu_sme) - metadata_record_it_application_owners (it_application_owners) - metadata_record_it_sme (it_sme) - metadata_record_support_lead (support_lead)

Note: This table stores owner entities. Ownership relationships are stored in join tables linking metadata_records to owners.

Entity Tables

systems

Fields: - id (PK, UUID) - name (unique) - System name (e.g., "1811", "ICC", "1846") - app_id - Application ID for the system (optional) - description - created_at, updated_at

Relationships: - Many-to-many with metadata_records via: - metadata_record_system_pulls (pulls_from) - metadata_record_system_pushes (pushes_to) - metadata_record_system_sources (system_sources)

domains

Fields: - id (PK, UUID) - name (unique) - Domain name (e.g., "IOC") - description - created_at, updated_at

Relationships: - Many-to-many with metadata_records via metadata_record_domains

Central Join Table

data_contracts

Purpose: Central table that links all components together. Each row represents a versioned data contract for a specific dataset.

Fields: - id (PK, UUID) - name - Contract name (e.g., "template_contract") - version - Contract version (e.g., "1.0.0") - status - e.g., "active", "deprecated", "draft" - description - schema_id (FK to schemas) - Optional, nullable - coercion_rules_id (FK to coercion_rules) - Optional - validation_rules_id (FK to validation_rules) - Optional - metadata_record_id (FK to metadata_records) - Optional - schema_version - Version tracking - coercion_rules_version - Version tracking - validation_rules_version - Version tracking - metadata_version - Version tracking - created_at, updated_at, created_by, updated_by

Relationships: - Many-to-one from schemas (required, via data_contract_id in schemas) - Many-to-one from coercion_rules (required, via data_contract_id in coercion_rules) - Many-to-one from validation_rules (required, via data_contract_id in validation_rules) - Many-to-one from metadata_records (required, via data_contract_id in metadata_records) - One-to-many with schemas (optional, via schema_id foreign key in data_contracts) - One-to-many with coercion_rules (optional, via coercion_rules_id foreign key in data_contracts) - One-to-many with validation_rules (optional, via validation_rules_id foreign key in data_contracts) - One-to-many with metadata_records (optional, via metadata_record_id foreign key in data_contracts)

Note: Ownership and governance rules are stored within metadata_records, not as separate foreign keys in data_contracts. The relationship is bidirectional: components reference data_contracts via data_contract_id (required), and data_contracts can optionally reference specific component instances via their respective *_id foreign keys.


Example Usage

Creating a Data Contract

from pycharter.db.models import (
    SchemaModel,
    CoercionRuleModel,
    ValidationRuleModel,
    MetadataRecordModel,
    OwnerModel,
    DataContractModel,
    SystemModel,
    DomainModel,
    QualityMetricModel,
    QualityViolationModel,
    MetadataRecordSystemPull,
    MetadataRecordSystemPush,
    MetadataRecordSystemSource,
    MetadataRecordDomain,
    MetadataRecordBusinessOwner,
    MetadataRecordBUSME,
    MetadataRecordITApplicationOwner,
    MetadataRecordITSME,
    MetadataRecordSupportLead,
)

# 1. Create or get systems and domains
system_1811 = SystemModel(name="1811", app_id="APP1811", description="System 1811")
system_icc = SystemModel(name="ICC", app_id="APPICC", description="System ICC")
domain_ioc = DomainModel(name="IOC", description="IOC Domain")

# 2. Create data contract first
data_contract = DataContractModel(
    name="template_contract",
    version="1.0.0",
    status="active",
    description="Template contract example"
)

# 3. Create component records (linked to data_contract)
schema = SchemaModel(
    title="Template",
    data_contract_id=data_contract.id,
    version="1.0.0",
    schema_data={"type": "object", "properties": {...}}
)

coercion_rule = CoercionRuleModel(
    title="template_coercion_rules",
    data_contract_id=data_contract.id,
    version="1.0.0",
    rules={"REGISTRATION": "coerce_to_string", ...}
)

validation_rule = ValidationRuleModel(
    title="template_validation_rules",
    data_contract_id=data_contract.id,
    version="1.0.0",
    rules={"REGISTRATION": {"max_length": {"threshold": 10}, ...}}
)

metadata_record = MetadataRecordModel(
    title="Template",
    data_contract_id=data_contract.id,
    version="1.0.0",
    status="active",
    governance_rules={"data_retention": {"days": 2555}, ...}
)

# 4. Create owner entities (lookup table)
owner1 = OwnerModel(
    id="andrew_",
    name="Andrew",
    email="andrew_@cathaypacific.com",
    team="data-engineering"
)

owner2 = OwnerModel(
    id="lincoln_mak",
    name="Lincoln Mak",
    email="lincoln_mak@cathaypacific.com",
    team="data-engineering"
)

# 5. Update data contract with component IDs
data_contract.schema_id = schema.id
data_contract.coercion_rules_id = coercion_rule.id
data_contract.validation_rules_id = validation_rule.id
data_contract.metadata_record_id = metadata_record.id
data_contract.schema_version = "1.0.0"
data_contract.coercion_rules_version = "1.0.0"
data_contract.validation_rules_version = "1.0.0"
data_contract.metadata_version = "1.0.0"

# 6. Create ownership join table records
from pycharter.db.models import (
    MetadataRecordBusinessOwner,
    MetadataRecordBUSME,
)

MetadataRecordBusinessOwner(metadata_record_id=metadata_record.id, owner_id=owner1.id)
MetadataRecordBUSME(metadata_record_id=metadata_record.id, owner_id=owner2.id)

# 7. Create join table records for systems and domains
MetadataRecordSystemPull(metadata_record_id=metadata_record.id, system_id=system_1811.id)
MetadataRecordSystemPush(metadata_record_id=metadata_record.id, system_id=system_icc.id)
MetadataRecordSystemSource(metadata_record_id=metadata_record.id, system_id=system_1811.id)
MetadataRecordDomain(metadata_record_id=metadata_record.id, domain_id=domain_ioc.id)

# 8. (Optional) Create quality metrics and violations
quality_metric = QualityMetricModel(
    schema_id=str(schema.id),
    schema_version="1.0.0",
    data_contract_id=data_contract.id,
    overall_score=95.5,
    violation_rate=0.02,
    completeness=0.98,
    accuracy=0.97,
    record_count=1000,
    valid_count=980,
    invalid_count=20,
    violation_count=20,
    field_scores={"user_id": 100.0, "email": 95.0},
    threshold_breaches=[],
    passed="true"
)

quality_violation = QualityViolationModel(
    schema_id=str(schema.id),
    schema_version="1.0.0",
    data_contract_id=data_contract.id,
    record_identifier="user_123",
    record_data={"user_id": "123", "email": "invalid-email"},
    field_name="email",
    error_type="validation_error",
    error_message="Invalid email format",
    severity="warning",
    status="open"
)

Benefits of This Design

  1. Normalization: Systems and domains are stored once and referenced, avoiding duplication
  2. Flexibility: Easy to add new relationships without changing core tables
  3. Queryability: Can easily query "all metadata records that pull from system X" or "all metadata records in domain Y"
  4. Versioning: Each component is versioned, allowing multiple versions per data contract
  5. Audit Trail: Created/updated timestamps and user tracking
  6. Referential Integrity: Foreign keys ensure data consistency
  7. Template-Based: Structure matches the template data files for easy import/export
  8. UUID Primary Keys: Better for distributed systems and avoids integer sequence issues
  9. Normalized Ownership: Ownership relationships stored in join tables linking metadata_records to owners table
  10. Unified Governance: Governance rules stored as JSON in metadata_records, not as separate table
  11. Quality Tracking: Quality metrics and violations stored separately for historical analysis and monitoring
  12. Violation Management: Individual violations tracked with status, severity, and resolution tracking

Migration Path

When migrating existing data:

  1. Create system records for all unique system names in metadata
  2. Create domain records for all unique domain names in metadata
  3. Create owner records for all unique owner identifiers
  4. Create component records (schemas, coercion_rules, validation_rules, metadata_records)
  5. Create data_contract records linking to component records
  6. Create join table records based on metadata relationships
  7. (Optional) Create quality metrics and violations for data quality tracking