Testing

This guide covers testing practices for the MLOps platform.

Test Structure

src/mlflow-sklearn/
└── tests/
    ├── __init__.py
    ├── conftest.py           # Shared fixtures
    ├── test_preprocessing.py
    ├── test_training.py
    └── test_evaluation.py

src/mlflow-tf/
└── tests/
    ├── __init__.py
    ├── conftest.py
    ├── unit/                 # Unit tests
    │   ├── test_pipeline.py
    │   └── test_utils.py
    └── integration/          # Integration tests
        └── test_full_pipeline.py

Running Tests

All Tests

# Run all tests
pytest

# Run with verbose output
pytest -v

# Run with coverage
pytest --cov=mlflow_sklearn --cov-report=html

Specific Tests

# Run tests for a specific package
pytest src/mlflow-sklearn/tests/

# Run a specific test file
pytest tests/test_training.py

# Run a specific test function
pytest tests/test_training.py::test_model_accuracy

# Run tests matching a pattern
pytest -k "training"

Test Markers

# Run only unit tests
pytest -m unit

# Run only integration tests
pytest -m integration

# Skip slow tests
pytest -m "not slow"

Writing Tests

Basic Test

import pytest
from mlflow_sklearn.s04_train import train_model

def test_train_returns_model():
    """Test that training returns a model object."""
    config = {"learning_rate": 0.01}
    model = train_model(config)
    
    assert model is not None
    assert hasattr(model, "predict")

Using Fixtures

# conftest.py
import pytest
import pandas as pd

@pytest.fixture
def sample_data():
    """Create sample training data."""
    return pd.DataFrame({
        "feature1": [1.0, 2.0, 3.0, 4.0, 5.0],
        "feature2": [0.1, 0.2, 0.3, 0.4, 0.5],
        "target": [0, 0, 1, 1, 1]
    })

@pytest.fixture
def trained_model(sample_data):
    """Create a trained model for testing."""
    from sklearn.linear_model import LogisticRegression
    
    X = sample_data[["feature1", "feature2"]]
    y = sample_data["target"]
    
    model = LogisticRegression()
    model.fit(X, y)
    return model

# test_evaluation.py
def test_model_prediction(trained_model, sample_data):
    """Test model can make predictions."""
    X = sample_data[["feature1", "feature2"]]
    predictions = trained_model.predict(X)
    
    assert len(predictions) == len(X)
    assert all(p in [0, 1] for p in predictions)

Parametrized Tests

import pytest

@pytest.mark.parametrize("learning_rate,expected_min_accuracy", [
    (0.001, 0.5),
    (0.01, 0.6),
    (0.1, 0.7),
])
def test_learning_rate_impact(learning_rate, expected_min_accuracy, sample_data):
    """Test different learning rates achieve minimum accuracy."""
    model = train_with_learning_rate(sample_data, learning_rate)
    accuracy = evaluate_model(model, sample_data)
    
    assert accuracy >= expected_min_accuracy

Testing MLflow Integration

import mlflow
import pytest

@pytest.fixture
def mlflow_tracking():
    """Set up MLflow for testing."""
    mlflow.set_tracking_uri("sqlite:///test_mlflow.db")
    mlflow.set_experiment("test_experiment")
    yield
    # Cleanup
    import os
    os.remove("test_mlflow.db")

def test_mlflow_logging(mlflow_tracking):
    """Test that metrics are logged to MLflow."""
    with mlflow.start_run() as run:
        mlflow.log_metric("accuracy", 0.95)
        mlflow.log_param("epochs", 10)
    
    # Verify logging
    client = mlflow.tracking.MlflowClient()
    run_data = client.get_run(run.info.run_id)
    
    assert run_data.data.metrics["accuracy"] == 0.95
    assert run_data.data.params["epochs"] == "10"

Mocking External Services

from unittest.mock import patch, MagicMock

def test_s3_read_with_mock():
    """Test S3 reading without actual S3 access."""
    mock_data = pd.DataFrame({"col1": [1, 2, 3]})
    
    with patch("io_library.read_from_s3.boto3") as mock_boto:
        mock_s3 = MagicMock()
        mock_boto.client.return_value = mock_s3
        mock_s3.get_object.return_value = {
            "Body": io.BytesIO(mock_data.to_csv().encode())
        }
        
        result = read_csv_from_s3("bucket", "key")
        
        assert len(result) == 3

Test Categories

Unit Tests

Test individual functions in isolation:

@pytest.mark.unit
def test_normalize_data():
    """Test data normalization function."""
    data = np.array([0, 50, 100])
    normalized = normalize(data)
    
    assert normalized.min() == 0
    assert normalized.max() == 1

Integration Tests

Test component interactions:

@pytest.mark.integration
def test_full_preprocessing_pipeline():
    """Test complete preprocessing pipeline."""
    raw_data = load_test_data()
    
    # Run through all preprocessing steps
    processed = preprocess_pipeline(raw_data)
    
    assert processed.shape[0] > 0
    assert not processed.isnull().any().any()

End-to-End Tests

Test complete workflows:

@pytest.mark.e2e
@pytest.mark.slow
def test_full_training_pipeline():
    """Test complete training pipeline end-to-end."""
    # This test requires S3 access
    result = run_full_pipeline()
    
    assert result["status"] == "success"
    assert result["accuracy"] > 0.8

Coverage

Generate Coverage Report

# HTML report
pytest --cov=mlflow_sklearn --cov-report=html

# Terminal report
pytest --cov=mlflow_sklearn --cov-report=term-missing

# XML for CI
pytest --cov=mlflow_sklearn --cov-report=xml

Coverage Configuration

# pytest.ini
[pytest]
addopts = --cov=mlflow_sklearn --cov-report=term-missing
testpaths = tests
markers =
    unit: Unit tests
    integration: Integration tests
    slow: Slow running tests
    e2e: End-to-end tests

CI/CD Integration

Bitbucket Pipelines

# bitbucket-pipelines.yml
pipelines:
  default:
    - step:
        name: Run Tests
        image: python:3.9
        script:
          - pip install -r requirements.txt
          - pip install pytest pytest-cov
          - pytest --cov --cov-report=xml
        artifacts:
          - coverage.xml