Testing

This guide covers testing strategies for AIDK agents, from unit tests to integration tests with mock models.

Testing Utilities

AIDK provides testing utilities in aidk-shared/testing:

typescript

import {
  createUserMessage,
  createAssistantMessage,
  createToolUseBlock,
  createToolResultBlock,
  createTextBlock,
  createTextStreamSequence,
  captureAsyncGenerator,
  waitFor,
} from "aidk-shared/testing";

Unit Testing Components

Testing Render Output

Test that components render the expected context:

typescript

import { compile } from "aidk-ai-sdk";
import { describe, it, expect } from "vitest";

describe("GreetingAgent", () => {
  it("renders system prompt with user name", async () => {
    const result = await compile(
      <GreetingAgent userName="Alice" />
    );

    expect(result.system).toContain("Alice");
    expect(result.system).toContain("helpful assistant");
  });

  it("includes tools in compilation", async () => {
    const result = await compile(<TaskAgent />);

    expect(result.tools).toHaveProperty("add_task");
    expect(result.tools).toHaveProperty("complete_task");
  });
});

Testing with Initial State

Pass initial COM state to test different scenarios:

typescript

import { compile } from "aidk-ai-sdk";

describe("TaskAgent", () => {
  it("renders tasks when present", async () => {
    const initialState = {
      tasks: [
        { id: "1", text: "Buy milk", done: false },
        { id: "2", text: "Walk dog", done: true },
      ],
    };

    const result = await compile(
      <TaskAgent />,
      { initialState }
    );

    expect(result.system).toContain("Buy milk");
    expect(result.system).toContain("Walk dog");
  });

  it("shows empty state message when no tasks", async () => {
    const result = await compile(
      <TaskAgent />,
      { initialState: { tasks: [] } }
    );

    expect(result.system).toContain("No tasks yet");
  });
});

Testing Conditional Rendering

typescript

describe("AdaptiveAgent", () => {
  it("uses detailed prompt after 5 ticks", async () => {
    const result = await compile(
      <AdaptiveAgent />,
      { tickState: { tick: 6 } }
    );

    expect(result.system).toContain("detailed");
  });

  it("uses concise prompt on early ticks", async () => {
    const result = await compile(
      <AdaptiveAgent />,
      { tickState: { tick: 2 } }
    );

    expect(result.system).not.toContain("detailed");
  });
});

Testing Tools

Testing Tool Handlers

Test tool handlers in isolation:

typescript

import { describe, it, expect, vi } from "vitest";

describe("SearchTool", () => {
  it("returns search results", async () => {
    const mockService = {
      query: vi.fn().mockResolvedValue([
        { title: "Result 1", url: "https://example.com/1" },
        { title: "Result 2", url: "https://example.com/2" },
      ]),
    };

    const tool = createSearchTool({ service: mockService });
    const result = await tool.handler({ query: "test query" });

    expect(mockService.query).toHaveBeenCalledWith("test query");
    expect(result).toHaveLength(2);
  });

  it("handles empty results", async () => {
    const mockService = {
      query: vi.fn().mockResolvedValue([]),
    };

    const tool = createSearchTool({ service: mockService });
    const result = await tool.handler({ query: "no results" });

    expect(result).toEqual([]);
  });
});

Testing Tool Lifecycle

Test tool lifecycle hooks:

typescript

describe("DatabaseTool", () => {
  it("connects on mount", async () => {
    const mockDb = {
      connect: vi.fn().mockResolvedValue({ connected: true }),
    };

    const com = createMockCOM();
    const tool = createDatabaseTool({ db: mockDb });

    await tool.onMount?.(com);

    expect(mockDb.connect).toHaveBeenCalled();
    expect(com.setState).toHaveBeenCalledWith(
      "connection",
      expect.objectContaining({ connected: true })
    );
  });

  it("disconnects on unmount", async () => {
    const mockConnection = { close: vi.fn() };
    const com = createMockCOM({
      getState: () => mockConnection,
    });

    const tool = createDatabaseTool({});
    await tool.onUnmount?.(com);

    expect(mockConnection.close).toHaveBeenCalled();
  });
});

// Helper to create mock COM
function createMockCOM(overrides = {}) {
  return {
    setState: vi.fn(),
    getState: vi.fn(),
    ...overrides,
  };
}

Testing Tool Rendering

typescript

describe("InventoryTool", () => {
  it("renders current inventory", async () => {
    const com = createMockCOM({
      getState: (key) => {
        if (key === "inventory") {
          return [
            { id: "1", name: "Widget", quantity: 10 },
            { id: "2", name: "Gadget", quantity: 5 },
          ];
        }
      },
    });

    const tool = createInventoryTool();
    const rendered = tool.render?.(com, {});

    // Compile the rendered JSX
    const result = await compile(rendered);

    expect(result.system).toContain("Widget");
    expect(result.system).toContain("10");
  });
});

Integration Testing

Testing with Mock Models

Create a mock model adapter for deterministic testing:

typescript

import { createCompiler } from "aidk-ai-sdk";

function createMockModel(responses: string[]) {
  let callIndex = 0;

  return async function mockModel(input) {
    const response = responses[callIndex] || "Default response";
    callIndex++;

    return {
      text: response,
      toolCalls: [],
      usage: { promptTokens: 100, completionTokens: 50 },
    };
  };
}

describe("ChatAgent integration", () => {
  it("handles multi-turn conversation", async () => {
    const mockModel = createMockModel([
      "Hello! How can I help you today?",
      "I'd be happy to help with that.",
    ]);

    const compiler = createCompiler();
    const events: any[] = [];

    for await (const event of compiler.stream(
      <ChatAgent />,
      mockModel,
      { timeline: [createUserMessage("Hi there")] }
    )) {
      events.push(event);
    }

    const textEvents = events.filter(e => e.type === "text");
    expect(textEvents[0].content).toContain("Hello");
  });
});

Testing Tool Execution Flow

typescript

describe("TaskAgent integration", () => {
  it("executes tool and updates state", async () => {
    // Mock model that calls the add_task tool
    const mockModel = createMockModelWithToolCall({
      name: "add_task",
      arguments: { text: "New task" },
    });

    const compiler = createCompiler();
    const finalState = await runToCompletion(
      compiler,
      <TaskAgent />,
      mockModel,
      { timeline: [createUserMessage("Add a task")] }
    );

    expect(finalState.tasks).toContainEqual(
      expect.objectContaining({ text: "New task" })
    );
  });
});

async function runToCompletion(compiler, agent, model, input) {
  let state = {};

  for await (const event of compiler.stream(agent, model, input)) {
    if (event.type === "state_update") {
      state = { ...state, ...event.state };
    }
  }

  return state;
}

Testing Streaming

typescript

import { captureAsyncGenerator } from "aidk-shared/testing";

describe("streaming", () => {
  it("emits chunks in order", async () => {
    const mockModel = createStreamingMockModel([
      { type: "text_delta", content: "Hello" },
      { type: "text_delta", content: " world" },
      { type: "finish", reason: "stop" },
    ]);

    const compiler = createCompiler();
    const events = await captureAsyncGenerator(
      compiler.stream(<ChatAgent />, mockModel)
    );

    const textDeltas = events
      .filter(e => e.type === "text_delta")
      .map(e => e.content);

    expect(textDeltas).toEqual(["Hello", " world"]);
  });
});

Testing Patterns

Snapshot Testing

Capture and compare compiled output:

typescript

describe("ComplexAgent", () => {
  it("matches snapshot", async () => {
    const result = await compile(
      <ComplexAgent config={defaultConfig} />,
      { initialState: defaultState }
    );

    expect(result).toMatchSnapshot();
  });
});

Testing Hooks

Test that hooks fire correctly:

typescript

describe("lifecycle hooks", () => {
  it("calls onMount once", async () => {
    const onMountSpy = vi.fn();

    class TestAgent extends Component {
      async onMount(com) {
        onMountSpy(com);
      }
      render() {
        return <System>Test</System>;
      }
    }

    const engine = createEngine();
    await engine.run(<TestAgent />);

    expect(onMountSpy).toHaveBeenCalledTimes(1);
  });

  it("calls onTickStart on each tick", async () => {
    const onTickStartSpy = vi.fn();

    class TestAgent extends Component {
      async onTickStart(com, state) {
        onTickStartSpy(state.tick);
        if (state.tick >= 3) {
          return { stop: true };
        }
      }
      render() {
        return <System>Test</System>;
      }
    }

    const engine = createEngine();
    await engine.run(<TestAgent />, mockModel);

    expect(onTickStartSpy).toHaveBeenCalledTimes(3);
    expect(onTickStartSpy).toHaveBeenCalledWith(1);
    expect(onTickStartSpy).toHaveBeenCalledWith(2);
    expect(onTickStartSpy).toHaveBeenCalledWith(3);
  });
});

Testing Error Handling

typescript

describe("error handling", () => {
  it("recovers from tool errors", async () => {
    const failingTool = createTool({
      name: "failing_tool",
      handler: async () => {
        throw new Error("Tool failed");
      },
    });

    class RecoveringAgent extends Component {
      private error = signal<string | null>(null);

      async onError(error) {
        this.error.set(error.message);
        return { retry: false, continue: true };
      }

      render() {
        return (
          <>
            {this.error() && <System>Error occurred: {this.error()}</System>}
            {failingTool}
          </>
        );
      }
    }

    const engine = createEngine();
    // Should not throw
    await expect(engine.run(<RecoveringAgent />)).resolves.toBeDefined();
  });
});

Best Practices

Test at multiple levels: Unit test handlers, integration test flows
Use deterministic mocks: Avoid flaky tests with predictable mock models
Test edge cases: Empty states, error conditions, boundary values
Snapshot sparingly: Only for complex outputs that are hard to assert
Isolate side effects: Mock external services and databases
Test the contract: Focus on inputs/outputs, not implementation details

Test Setup

Example Vitest configuration:

typescript

// vitest.config.ts
import { defineConfig } from "vitest/config";

export default defineConfig({
  test: {
    globals: true,
    environment: "node",
    include: ["**/*.spec.ts", "**/*.test.ts"],
    coverage: {
      provider: "v8",
      include: ["src/**/*.ts"],
      exclude: ["src/**/*.d.ts", "src/**/*.spec.ts"],
    },
  },
});

Next Steps

Error Handling - Testing error recovery patterns
API Reference - Complete testing utility documentation

Testing ​

Testing Utilities ​

Unit Testing Components ​

Testing Render Output ​

Testing with Initial State ​

Testing Conditional Rendering ​

Testing Tools ​

Testing Tool Handlers ​

Testing Tool Lifecycle ​

Testing Tool Rendering ​

Integration Testing ​

Testing with Mock Models ​

Testing Tool Execution Flow ​

Testing Streaming ​

Testing Patterns ​

Snapshot Testing ​

Testing Hooks ​

Testing Error Handling ​

Best Practices ​

Test Setup ​

Next Steps ​

Testing

Testing Utilities

Unit Testing Components

Testing Render Output

Testing with Initial State

Testing Conditional Rendering

Testing Tools

Testing Tool Handlers

Testing Tool Lifecycle

Testing Tool Rendering

Integration Testing

Testing with Mock Models

Testing Tool Execution Flow

Testing Streaming

Testing Patterns

Snapshot Testing

Testing Hooks

Testing Error Handling

Best Practices

Test Setup

Next Steps