Testing
This guide covers testing practices for the MLOps platform.
Test Structure
src/mlflow-sklearn/
└── tests/
├── __init__.py
├── conftest.py # Shared fixtures
├── test_preprocessing.py
├── test_training.py
└── test_evaluation.py
src/mlflow-tf/
└── tests/
├── __init__.py
├── conftest.py
├── unit/ # Unit tests
│ ├── test_pipeline.py
│ └── test_utils.py
└── integration/ # Integration tests
└── test_full_pipeline.py
Running Tests
All Tests
# Run all tests
pytest
# Run with verbose output
pytest -v
# Run with coverage
pytest --cov=mlflow_sklearn --cov-report=html
Specific Tests
# Run tests for a specific package
pytest src/mlflow-sklearn/tests/
# Run a specific test file
pytest tests/test_training.py
# Run a specific test function
pytest tests/test_training.py::test_model_accuracy
# Run tests matching a pattern
pytest -k "training"
Test Markers
# Run only unit tests
pytest -m unit
# Run only integration tests
pytest -m integration
# Skip slow tests
pytest -m "not slow"
Writing Tests
Basic Test
import pytest
from mlflow_sklearn.s04_train import train_model
def test_train_returns_model():
"""Test that training returns a model object."""
config = {"learning_rate": 0.01}
model = train_model(config)
assert model is not None
assert hasattr(model, "predict")
Using Fixtures
# conftest.py
import pytest
import pandas as pd
@pytest.fixture
def sample_data():
"""Create sample training data."""
return pd.DataFrame({
"feature1": [1.0, 2.0, 3.0, 4.0, 5.0],
"feature2": [0.1, 0.2, 0.3, 0.4, 0.5],
"target": [0, 0, 1, 1, 1]
})
@pytest.fixture
def trained_model(sample_data):
"""Create a trained model for testing."""
from sklearn.linear_model import LogisticRegression
X = sample_data[["feature1", "feature2"]]
y = sample_data["target"]
model = LogisticRegression()
model.fit(X, y)
return model
# test_evaluation.py
def test_model_prediction(trained_model, sample_data):
"""Test model can make predictions."""
X = sample_data[["feature1", "feature2"]]
predictions = trained_model.predict(X)
assert len(predictions) == len(X)
assert all(p in [0, 1] for p in predictions)
Parametrized Tests
import pytest
@pytest.mark.parametrize("learning_rate,expected_min_accuracy", [
(0.001, 0.5),
(0.01, 0.6),
(0.1, 0.7),
])
def test_learning_rate_impact(learning_rate, expected_min_accuracy, sample_data):
"""Test different learning rates achieve minimum accuracy."""
model = train_with_learning_rate(sample_data, learning_rate)
accuracy = evaluate_model(model, sample_data)
assert accuracy >= expected_min_accuracy
Testing MLflow Integration
import mlflow
import pytest
@pytest.fixture
def mlflow_tracking():
"""Set up MLflow for testing."""
mlflow.set_tracking_uri("sqlite:///test_mlflow.db")
mlflow.set_experiment("test_experiment")
yield
# Cleanup
import os
os.remove("test_mlflow.db")
def test_mlflow_logging(mlflow_tracking):
"""Test that metrics are logged to MLflow."""
with mlflow.start_run() as run:
mlflow.log_metric("accuracy", 0.95)
mlflow.log_param("epochs", 10)
# Verify logging
client = mlflow.tracking.MlflowClient()
run_data = client.get_run(run.info.run_id)
assert run_data.data.metrics["accuracy"] == 0.95
assert run_data.data.params["epochs"] == "10"
Mocking External Services
from unittest.mock import patch, MagicMock
def test_s3_read_with_mock():
"""Test S3 reading without actual S3 access."""
mock_data = pd.DataFrame({"col1": [1, 2, 3]})
with patch("io_library.read_from_s3.boto3") as mock_boto:
mock_s3 = MagicMock()
mock_boto.client.return_value = mock_s3
mock_s3.get_object.return_value = {
"Body": io.BytesIO(mock_data.to_csv().encode())
}
result = read_csv_from_s3("bucket", "key")
assert len(result) == 3
Test Categories
Unit Tests
Test individual functions in isolation:
@pytest.mark.unit
def test_normalize_data():
"""Test data normalization function."""
data = np.array([0, 50, 100])
normalized = normalize(data)
assert normalized.min() == 0
assert normalized.max() == 1
Integration Tests
Test component interactions:
@pytest.mark.integration
def test_full_preprocessing_pipeline():
"""Test complete preprocessing pipeline."""
raw_data = load_test_data()
# Run through all preprocessing steps
processed = preprocess_pipeline(raw_data)
assert processed.shape[0] > 0
assert not processed.isnull().any().any()
End-to-End Tests
Test complete workflows:
@pytest.mark.e2e
@pytest.mark.slow
def test_full_training_pipeline():
"""Test complete training pipeline end-to-end."""
# This test requires S3 access
result = run_full_pipeline()
assert result["status"] == "success"
assert result["accuracy"] > 0.8
Coverage
Generate Coverage Report
# HTML report
pytest --cov=mlflow_sklearn --cov-report=html
# Terminal report
pytest --cov=mlflow_sklearn --cov-report=term-missing
# XML for CI
pytest --cov=mlflow_sklearn --cov-report=xml
Coverage Configuration
# pytest.ini
[pytest]
addopts = --cov=mlflow_sklearn --cov-report=term-missing
testpaths = tests
markers =
unit: Unit tests
integration: Integration tests
slow: Slow running tests
e2e: End-to-end tests
CI/CD Integration
Bitbucket Pipelines
# bitbucket-pipelines.yml
pipelines:
default:
- step:
name: Run Tests
image: python:3.9
script:
- pip install -r requirements.txt
- pip install pytest pytest-cov
- pytest --cov --cov-report=xml
artifacts:
- coverage.xml