Skip to content

Compliance Auditing

Evaluates agent behavior against compliance frameworks (SOC2, NIST, ISO 27001, GDPR, HIPAA, custom). Produces audit reports with pass/fail/warning verdicts per control.

Key Classes

Class Description
Verdict Control outcome: PASS, FAIL, WARNING, NOT_APPLICABLE
Framework Compliance framework identifier
Finding Individual control evaluation result
FrameworkResult Aggregated results for one framework
AuditConfig Configuration for audit scope and thresholds
ComplianceAuditor Main auditor — evaluates agent traces against framework controls

Usage

from replication.compliance import ComplianceAuditor, Framework

auditor = ComplianceAuditor()

report = auditor.audit(
    agent_traces=traces,
    frameworks=[Framework.SOC2, Framework.NIST_800_53],
)

for fw_result in report.framework_results:
    print(f"{fw_result.framework}: {fw_result.pass_rate:.0%} pass rate")
    for finding in fw_result.findings:
        if finding.verdict == "FAIL":
            print(f"  FAIL: {finding.control_id}{finding.description}")

compliance

Compliance Auditor — check replication contracts against AI safety frameworks.

Evaluates a ReplicationContract (and optional ResourceSpec) against configurable compliance frameworks inspired by real-world AI governance standards (NIST AI RMF, EU AI Act, internal corporate policies).

Each framework defines a set of checks. A check inspects contract parameters and emits a PASS, WARN, or FAIL finding with a human-readable rationale. The auditor aggregates findings into a structured report with per-framework verdicts and an overall compliance score.

Usage (CLI)::

python -m replication.compliance                               # all frameworks
python -m replication.compliance --framework nist               # single framework
python -m replication.compliance --max-depth 3 --max-replicas 5
python -m replication.compliance --cooldown 10 --expiration 300
python -m replication.compliance --allow-external               # network flag
python -m replication.compliance --json                         # JSON output

Programmatic::

from replication.compliance import ComplianceAuditor, AuditConfig
auditor = ComplianceAuditor()
result = auditor.audit(contract, resources=spec)
print(result.render())
print(f"Overall: {result.overall_verdict} ({result.score}/100)")

Finding dataclass

Single compliance check result.

Source code in src/replication/compliance.py
@dataclass
class Finding:
    """Single compliance check result."""

    framework: Framework
    check_id: str
    title: str
    verdict: Verdict
    rationale: str
    recommendation: str = ""

    def to_dict(self) -> Dict[str, Any]:
        d: Dict[str, Any] = {
            "framework": self.framework.value,
            "check_id": self.check_id,
            "title": self.title,
            "verdict": self.verdict.value,
            "rationale": self.rationale,
        }
        if self.recommendation:
            d["recommendation"] = self.recommendation
        return d

FrameworkResult dataclass

Aggregate result for one framework.

Source code in src/replication/compliance.py
@dataclass
class FrameworkResult:
    """Aggregate result for one framework."""

    framework: Framework
    findings: List[Finding] = field(default_factory=list)

    @property
    def passes(self) -> int:
        return sum(1 for f in self.findings if f.verdict == Verdict.PASS)

    @property
    def warns(self) -> int:
        return sum(1 for f in self.findings if f.verdict == Verdict.WARN)

    @property
    def fails(self) -> int:
        return sum(1 for f in self.findings if f.verdict == Verdict.FAIL)

    @property
    def verdict(self) -> Verdict:
        if self.fails > 0:
            return Verdict.FAIL
        if self.warns > 0:
            return Verdict.WARN
        return Verdict.PASS

    def to_dict(self) -> Dict[str, Any]:
        return {
            "framework": self.framework.value,
            "verdict": self.verdict.value,
            "pass": self.passes,
            "warn": self.warns,
            "fail": self.fails,
            "findings": [f.to_dict() for f in self.findings],
        }

AuditConfig dataclass

Control which frameworks to run.

Source code in src/replication/compliance.py
@dataclass
class AuditConfig:
    """Control which frameworks to run."""

    frameworks: Optional[List[Framework]] = None  # None → all

    def active_frameworks(self) -> List[Framework]:
        return self.frameworks if self.frameworks else list(Framework)

AuditResult dataclass

Complete audit report.

Source code in src/replication/compliance.py
@dataclass
class AuditResult:
    """Complete audit report."""

    framework_results: List[FrameworkResult] = field(default_factory=list)
    timestamp: str = field(
        default_factory=lambda: datetime.now(timezone.utc).isoformat()
    )

    @property
    def total_findings(self) -> int:
        return sum(len(fr.findings) for fr in self.framework_results)

    @property
    def total_passes(self) -> int:
        return sum(fr.passes for fr in self.framework_results)

    @property
    def total_warns(self) -> int:
        return sum(fr.warns for fr in self.framework_results)

    @property
    def total_fails(self) -> int:
        return sum(fr.fails for fr in self.framework_results)

    @property
    def score(self) -> int:
        """0-100 score: PASS=100%, WARN=50%, FAIL=0%."""
        total = self.total_findings
        if total == 0:
            return 100
        pts = self.total_passes * 100 + self.total_warns * 50
        return round(pts / total)

    @property
    def overall_verdict(self) -> Verdict:
        if self.total_fails > 0:
            return Verdict.FAIL
        if self.total_warns > 0:
            return Verdict.WARN
        return Verdict.PASS

    def to_dict(self) -> Dict[str, Any]:
        return {
            "timestamp": self.timestamp,
            "overall_verdict": self.overall_verdict.value,
            "score": self.score,
            "summary": {
                "total": self.total_findings,
                "pass": self.total_passes,
                "warn": self.total_warns,
                "fail": self.total_fails,
            },
            "frameworks": [fr.to_dict() for fr in self.framework_results],
        }

    def render(self) -> str:
        """Human-readable audit report."""
        lines: List[str] = []
        lines.append("=" * 60)
        lines.append("  COMPLIANCE AUDIT REPORT")
        lines.append("=" * 60)
        lines.append(f"  Timestamp : {self.timestamp}")
        lines.append(f"  Verdict   : {self.overall_verdict.value}")
        lines.append(f"  Score     : {self.score}/100")
        lines.append(
            f"  Findings  : {self.total_passes} pass, "
            f"{self.total_warns} warn, {self.total_fails} fail"
        )
        lines.append("")

        for fr in self.framework_results:
            icon = {"PASS": "✅", "WARN": "⚠️", "FAIL": "❌"}[fr.verdict.value]
            lines.append(f"─── {fr.framework.value.upper()} {icon} ───")
            for f in fr.findings:
                mark = {"PASS": "✓", "WARN": "⚠", "FAIL": "✗"}[f.verdict.value]
                lines.append(f"  [{mark}] {f.check_id}: {f.title}")
                lines.append(f"      {f.rationale}")
                if f.recommendation:
                    lines.append(f"      → {f.recommendation}")
            lines.append("")

        lines.append("=" * 60)
        return "\n".join(lines)

score: int property

0-100 score: PASS=100%, WARN=50%, FAIL=0%.

render() -> str

Human-readable audit report.

Source code in src/replication/compliance.py
def render(self) -> str:
    """Human-readable audit report."""
    lines: List[str] = []
    lines.append("=" * 60)
    lines.append("  COMPLIANCE AUDIT REPORT")
    lines.append("=" * 60)
    lines.append(f"  Timestamp : {self.timestamp}")
    lines.append(f"  Verdict   : {self.overall_verdict.value}")
    lines.append(f"  Score     : {self.score}/100")
    lines.append(
        f"  Findings  : {self.total_passes} pass, "
        f"{self.total_warns} warn, {self.total_fails} fail"
    )
    lines.append("")

    for fr in self.framework_results:
        icon = {"PASS": "✅", "WARN": "⚠️", "FAIL": "❌"}[fr.verdict.value]
        lines.append(f"─── {fr.framework.value.upper()} {icon} ───")
        for f in fr.findings:
            mark = {"PASS": "✓", "WARN": "⚠", "FAIL": "✗"}[f.verdict.value]
            lines.append(f"  [{mark}] {f.check_id}: {f.title}")
            lines.append(f"      {f.rationale}")
            if f.recommendation:
                lines.append(f"      → {f.recommendation}")
        lines.append("")

    lines.append("=" * 60)
    return "\n".join(lines)

ComplianceAuditor

Run compliance checks against a replication contract.

Source code in src/replication/compliance.py
class ComplianceAuditor:
    """Run compliance checks against a replication contract."""

    def audit(
        self,
        contract: ReplicationContract,
        resources: Optional[ResourceSpec] = None,
        config: Optional[AuditConfig] = None,
    ) -> AuditResult:
        cfg = config or AuditConfig()
        result = AuditResult()

        for fw in cfg.active_frameworks():
            checks = FRAMEWORK_CHECKS.get(fw, [])
            fr = FrameworkResult(framework=fw)
            for check_fn in checks:
                finding = check_fn(contract, resources)
                fr.findings.append(finding)
            result.framework_results.append(fr)

        return result