Skip to content

Contract Parser

Parse data contract files into structured metadata.

Overview

from pycharter import parse_contract_file

metadata = parse_contract_file("user_contract.yaml")

API Reference

parse_contract

parse_contract(
    contract_data: dict[str, Any], validate: bool = True
) -> ContractMetadata

Parse a contract dictionary and decompose into metadata components.

Expected contract structure: { "schema": {...}, # JSON Schema definition (required, may contain "version") "coercion_rules": {...}, # Optional coercion rules (may contain "version") "validation_rules": {...}, # Optional validation rules (may contain "version") "metadata": { # Optional metadata (may contain "version") "ownership": {...}, # Ownership info nested in metadata "governance_rules": {...} # Governance rules nested in metadata }, "ontology": {...}, # Optional ontology (version, fields with concept/definition/relationships) "versions": {...} # Optional explicit version tracking }

The contract is validated against Pydantic models to ensure it adheres to the database table design. This ensures data integrity when storing contracts.

Note: ownership and governance_rules should be inside metadata, not at top level.

Parameters:

Name Type Description Default
contract_data dict[str, Any]

Contract data as dictionary

required
validate bool

If True (default), validate contract against schema before parsing

True

Returns:

Type Description
ContractMetadata

ContractMetadata object with decomposed components and version tracking

Raises:

Type Description
ValueError

If contract does not conform to the required schema (when validate=True)

Example

contract = { ... "schema": {"type": "object", "version": "1.0.0", "properties": {"name": {"type": "string"}}}, ... "metadata": { ... "version": "1.0.0", ... "ownership": {"owner": "team-data", "team": "data-engineering"}, ... "governance_rules": {"data_retention": {"days": 365}} ... } ... } metadata = parse_contract(contract) metadata.schema {'type': 'object', 'version': '1.0.0', 'properties': {'name': {'type': 'string'}}} metadata.versions {'schema': '1.0.0', 'metadata': '1.0.0'} metadata.ownership

Source code in src/pycharter/contract_parser/parser.py
def parse_contract(
    contract_data: dict[str, Any], validate: bool = True
) -> ContractMetadata:
    """
    Parse a contract dictionary and decompose into metadata components.

    Expected contract structure:
    {
        "schema": {...},              # JSON Schema definition (required, may contain "version")
        "coercion_rules": {...},      # Optional coercion rules (may contain "version")
        "validation_rules": {...},   # Optional validation rules (may contain "version")
        "metadata": {                 # Optional metadata (may contain "version")
            "ownership": {...},       # Ownership info nested in metadata
            "governance_rules": {...} # Governance rules nested in metadata
        },
        "ontology": {...},            # Optional ontology (version, fields with concept/definition/relationships)
        "versions": {...}             # Optional explicit version tracking
    }

    The contract is validated against Pydantic models to ensure it adheres to the
    database table design. This ensures data integrity when storing contracts.

    Note: ownership and governance_rules should be inside metadata, not at top level.

    Args:
        contract_data: Contract data as dictionary
        validate: If True (default), validate contract against schema before parsing

    Returns:
        ContractMetadata object with decomposed components and version tracking

    Raises:
        ValueError: If contract does not conform to the required schema (when validate=True)

    Example:
        >>> contract = {
        ...     "schema": {"type": "object", "version": "1.0.0", "properties": {"name": {"type": "string"}}},
        ...     "metadata": {
        ...         "version": "1.0.0",
        ...         "ownership": {"owner": "team-data", "team": "data-engineering"},
        ...         "governance_rules": {"data_retention": {"days": 365}}
        ...     }
        ... }
        >>> metadata = parse_contract(contract)
        >>> metadata.schema
        {'type': 'object', 'version': '1.0.0', 'properties': {'name': {'type': 'string'}}}
        >>> metadata.versions
        {'schema': '1.0.0', 'metadata': '1.0.0'}
        >>> metadata.ownership
        {'owner': 'team-data', 'team': 'data-engineering'}
    """
    # Validate contract structure against schema
    if validate:
        _validate_contract_structure(contract_data)

    schema = contract_data.get("schema", {})
    coercion_rules = contract_data.get("coercion_rules", {})
    validation_rules = contract_data.get("validation_rules", {})
    metadata = contract_data.get("metadata", {})
    ontology = contract_data.get("ontology", {})

    # If schema is not at top level, check if entire contract is a schema
    if not schema and ("type" in contract_data or "properties" in contract_data):
        schema = contract_data
        # Extract other components if they exist as separate keys
        coercion_rules = contract_data.get("coercion_rules", {})
        validation_rules = contract_data.get("validation_rules", {})
        metadata = {
            k: v
            for k, v in contract_data.items()
            if k
            not in [
                "schema",
                "coercion_rules",
                "validation_rules",
                "versions",
                "ontology",
            ]
        }

    # Ensure metadata is a dict
    if not isinstance(metadata, dict):
        metadata = {}

    # Move ownership from top level to metadata if present
    if "ownership" in contract_data and "ownership" not in metadata:
        metadata["ownership"] = contract_data.get("ownership")

    # Move governance_rules from top level to metadata if present
    if "governance_rules" in contract_data and "governance_rules" not in metadata:
        metadata["governance_rules"] = contract_data.get("governance_rules")

    # Extract versions from all components
    versions: dict[str, str] = {}

    # Check if explicit versions dict is provided
    if "versions" in contract_data and isinstance(contract_data["versions"], dict):
        versions.update(contract_data["versions"])

    # Extract version from schema
    if isinstance(schema, dict) and "version" in schema:
        versions["schema"] = schema["version"]

    # Extract version from metadata
    if isinstance(metadata, dict) and "version" in metadata:
        versions["metadata"] = metadata["version"]

    # Extract version from coercion_rules
    if isinstance(coercion_rules, dict) and "version" in coercion_rules:
        versions["coercion_rules"] = coercion_rules["version"]

    # Extract version from validation_rules
    if isinstance(validation_rules, dict) and "version" in validation_rules:
        versions["validation_rules"] = validation_rules["version"]

    # Extract version from ontology
    if isinstance(ontology, dict) and "version" in ontology:
        versions["ontology"] = ontology["version"]

    return ContractMetadata(
        schema=schema,
        coercion_rules=coercion_rules,
        validation_rules=validation_rules,
        metadata=metadata,
        ontology=ontology,
        versions=versions,
    )

parse_contract_file

parse_contract_file(
    file_path: str, validate: bool = True
) -> ContractMetadata

Load and parse a contract file (YAML or JSON).

The contract is validated against Pydantic models to ensure it adheres to the database table design before parsing.

Parameters:

Name Type Description Default
file_path str

Path to contract file

required
validate bool

If True (default), validate contract against schema before parsing

True

Returns:

Type Description
ContractMetadata

ContractMetadata object with decomposed components

Raises:

Type Description
FileNotFoundError

If file doesn't exist

ValueError

If file format is not supported, invalid, or doesn't conform to schema

Example

metadata = parse_contract_file("contract.yaml") print(metadata.schema)

Source code in src/pycharter/contract_parser/parser.py
def parse_contract_file(file_path: str, validate: bool = True) -> ContractMetadata:
    """
    Load and parse a contract file (YAML or JSON).

    The contract is validated against Pydantic models to ensure it adheres to the
    database table design before parsing.

    Args:
        file_path: Path to contract file
        validate: If True (default), validate contract against schema before parsing

    Returns:
        ContractMetadata object with decomposed components

    Raises:
        FileNotFoundError: If file doesn't exist
        ValueError: If file format is not supported, invalid, or doesn't conform to schema

    Example:
        >>> metadata = parse_contract_file("contract.yaml")
        >>> print(metadata.schema)
    """
    path = Path(file_path)

    if not path.exists():
        raise FileNotFoundError(f"Contract file not found: {file_path}")

    # Determine file format
    suffix = path.suffix.lower()

    if suffix in [".yaml", ".yml"]:
        with open(path, "r", encoding="utf-8") as f:
            contract_data = yaml.safe_load(f)
    elif suffix == ".json":
        with open(path, "r", encoding="utf-8") as f:
            contract_data = json.load(f)
    else:
        raise ValueError(
            f"Unsupported file format: {suffix}. Supported formats: .json, .yaml, .yml"
        )

    if not isinstance(contract_data, dict):
        raise ValueError(
            f"Contract file must contain a dictionary/object, got {type(contract_data)}"
        )

    # Resolve variable substitutions in contract data
    from pycharter.utils.value_injector import resolve_values

    contract_data = resolve_values(contract_data, source_file=file_path)

    return parse_contract(contract_data, validate=validate)

ContractMetadata

The result of parsing a contract:

Attribute Type Description
schema Dict JSON Schema definition
coercion_rules Dict Coercion rules
validation_rules Dict Validation rules
metadata Dict Metadata (title, description)
ownership Dict Ownership information
governance_rules Dict Governance policies
ontology Dict Ontology annotations (version, fields)
versions Dict Component versions

Examples

from pycharter import parse_contract_file

# Parse from file
metadata = parse_contract_file("contracts/user.yaml")

# Access components
print(f"Schema: {metadata.schema}")
print(f"Metadata: {metadata.metadata}")
print(f"Owner: {metadata.ownership}")

See Also