API Reference
Complete API documentation for Truthound.
Core Module
The main entry point for Truthound functionality.
import truthound as th
# Core functions
report = th.check("data.csv") # Validate data
profile = th.profile("data.csv") # Profile data
schema = th.learn("data.csv") # Learn schema
pii = th.scan("data.csv") # Scan for PII
masked = th.mask("data.csv") # Mask data
drift = th.compare(old, new) # Detect drift
Module Index
Core API
Validation
Profiling
Storage & Reporting
CI/CD
Advanced Features
Extensions
Quick Examples
Basic Validation
import truthound as th
# Check with default validators
report = th.check("data.csv")
print(f"Issues found: {report.issue_count}")
# Check with specific validators
report = th.check(
"data.csv",
validators=["null", "duplicate", "range"],
min_severity="high"
)
# Check with schema
schema = th.learn("reference_data.csv")
report = th.check("new_data.csv", schema=schema)
Profiling
from truthound import DataProfiler, ProfilerConfig
# Configure profiler
config = ProfilerConfig(
include_patterns=True,
include_correlations=True,
sample_size=10000
)
# Profile data
profiler = DataProfiler(config=config)
profile = profiler.profile("data.csv")
# Access profile information
print(f"Rows: {profile.row_count}")
print(f"Columns: {profile.column_count}")
for col in profile.columns:
print(f"{col.name}: {col.inferred_type}, {col.null_ratio:.1%} nulls")
Custom Validators
from truthound.validators.sdk import validator, ValidationResult
@validator("my_validator", category="custom")
def my_custom_validator(df, column: str, threshold: float = 0.1):
"""Custom validation logic."""
values = df[column]
invalid_count = (values < 0).sum()
invalid_ratio = invalid_count / len(values)
return ValidationResult(
passed=invalid_ratio <= threshold,
message=f"Found {invalid_ratio:.1%} invalid values",
details={"invalid_count": invalid_count}
)