Audit Logging Configuration¶
Truthound provides enterprise audit logging for compliance and operational tracking.
Quick Start¶
from truthound.infrastructure.audit import (
configure_audit,
get_audit_logger,
)
# Configure audit logging
# Note: Additional options like include_data_preview, redact_sensitive
# are passed via **kwargs to EnterpriseAuditConfig
audit = configure_audit(
service_name="my-service",
environment="production",
storage_backends=["memory"],
)
# Or get the global audit logger
audit = get_audit_logger()
# Log audit events
audit.log_validation_start(checkpoint_name="daily_check", user="system")
EnterpriseAuditConfig¶
from truthound.infrastructure.audit import EnterpriseAuditConfig, RetentionPolicy
config = EnterpriseAuditConfig(
enabled=True,
include_data_preview=False, # Include data samples in logs
redact_sensitive=True, # Redact sensitive fields
sensitive_fields=[
"password", "ssn", "credit_card",
"api_key", "secret", "token",
],
retention=RetentionPolicy(name="default", retention_days=365),
# Storage backends
storage_backends=["memory"],
# Options: memory, sqlite, elasticsearch, s3, kafka
# Elasticsearch configuration
elasticsearch_url="",
elasticsearch_index_prefix="truthound-audit",
elasticsearch_username="",
elasticsearch_password="",
# S3 configuration
s3_bucket="",
s3_prefix="audit/",
s3_region="",
# Kafka configuration
kafka_bootstrap_servers="",
kafka_topic="truthound-audit",
# Compliance
compliance_standards=[], # SOC2, GDPR, HIPAA
require_checksum=True,
require_signing=False,
# Retention
retention_policy="default",
archive_to_cold_storage=False,
cold_storage_after_days=90,
# Performance
async_write=True,
batch_size=100,
flush_interval=5.0,
)
Presets¶
# Production configuration
config = EnterpriseAuditConfig.production("truthound")
# Compliance configuration (7-year retention)
config = EnterpriseAuditConfig.compliance("truthound", ["SOC2"])
Retention Policies¶
from truthound.infrastructure.audit import RetentionPolicy
# Default: 1 year
policy = RetentionPolicy.default()
# SOC 2: 7 years
policy = RetentionPolicy.compliance_soc2()
# GDPR: 1 year (data minimization)
policy = RetentionPolicy.compliance_gdpr()
# HIPAA: 6 years
policy = RetentionPolicy.compliance_hipaa()
Storage Backends¶
Elasticsearch¶
# Configure Elasticsearch storage via configure_audit()
configure_audit(
service_name="my-service",
storage_backends=["elasticsearch"],
elasticsearch_url="https://es.example.com:9200",
)
S3 (Long-term Archival)¶
# Configure S3 storage via configure_audit()
configure_audit(
service_name="my-service",
storage_backends=["s3"],
s3_bucket="audit-logs-bucket",
)
Kafka (Real-time Streaming)¶
# Configure Kafka storage via configure_audit()
configure_audit(
service_name="my-service",
storage_backends=["kafka"],
kafka_bootstrap_servers="kafka:9092",
)
Audit Events¶
Validation Events¶
audit = get_audit_logger()
# Validation start
audit.log_validation_start(
checkpoint_name="daily_check",
user="system",
data_source="s3://bucket/data.parquet",
)
# Validation complete
audit.log_validation_complete(
result=result,
duration_ms=1500,
issues_found=15,
)
# Validation failed
audit.log_validation_failed(
checkpoint_name="daily_check",
error="Connection timeout",
)
Action Events¶
# Action executed
audit.log_action_executed(
action_type="slack",
status="success",
target="#data-quality",
)
# Action failed
audit.log_action_failed(
action_type="email",
error="SMTP connection refused",
)
Data Access Events¶
# Data read
audit.log_data_access(
operation="read",
data_source="users_table",
rows_accessed=10000,
user="analyst@company.com",
)
# Data export
audit.log_data_export(
format="csv",
destination="s3://exports/report.csv",
rows_exported=5000,
)
Compliance Reporting¶
from truthound.infrastructure.audit import ComplianceReporter
reporter = ComplianceReporter(audit_logger=audit)
# SOC 2 report
soc2_report = reporter.generate_soc2_report(
date_range=("2024-01-01", "2024-12-31"),
)
# GDPR report
gdpr_report = reporter.generate_gdpr_report(
data_subject="user@example.com",
)
# HIPAA report
hipaa_report = reporter.generate_hipaa_report(
date_range=("2024-01-01", "2024-12-31"),
)
Store Observability Audit¶
Additional audit configuration for storage operations:
from truthound.stores.observability import AuditConfig, AuditLogLevel
config = AuditConfig(
enabled=True,
level=AuditLogLevel.STANDARD, # minimal, standard, verbose, debug
backend="json", # memory, file, json, elasticsearch, kafka
file_path="./audit.log",
include_data_preview=False,
max_data_preview_size=1024,
redact_sensitive=True,
sensitive_fields=[
"password", "secret", "token",
"api_key", "ssn", "credit_card",
],
retention_days=90,
batch_size=100,
flush_interval_seconds=5.0,
)
Audit Log Levels¶
| Level | Description |
|---|---|
MINIMAL |
Only errors and critical events |
STANDARD |
Normal operations and errors |
VERBOSE |
Detailed operation logging |
DEBUG |
Full debug information |
Best Practices¶
1. Configure Appropriate Retention¶
# Production: SOC 2 compliance
config = EnterpriseAuditConfig(
retention_policy="soc2",
archive_to_cold_storage=True,
cold_storage_after_days=90,
)
2. Redact Sensitive Data¶
config = EnterpriseAuditConfig(
redact_sensitive=True,
sensitive_fields=[
"password", "ssn", "credit_card",
"api_key", "secret", "token",
"email", "phone", # Add PII fields
],
)
3. Use Multiple Storage Backends¶
# Real-time to Elasticsearch + archival to S3
configure_audit(
config,
storage=[
ElasticsearchAuditStorage(url="http://elk:9200"),
S3AuditStorage(bucket="audit-archive"),
],
)