Aegis Orchestrator
Reference

gRPC API Reference

aegis.runtime.v1 AegisRuntime service — the Temporal-worker-facing gRPC transport.

gRPC API Reference

AEGIS exposes a gRPC service on port 50051 (default). The service is defined in Protocol Buffers under the aegis.runtime.v1 package.

Proto source files are in the aegis-proto repository under proto/aegis_runtime.proto.


Authentication

gRPC auth is configured via spec.grpc_auth in aegis-config.yaml. When enabled, all methods except the configured exempt_methods require a Bearer JWT (Keycloak-issued) in the authorization metadata key:

authorization: Bearer <keycloak-issued-jwt>

The /aegis.v1.InnerLoop/Generate method is exempt by default (used by agent bootstrap containers on the internal network). Unauthenticated calls receive UNAUTHENTICATED.


Service: AegisRuntime

The single service exported by the orchestrator gRPC server:

package aegis.runtime.v1;

service AegisRuntime {
  rpc ExecuteAgent(ExecuteAgentRequest) returns (stream ExecutionEvent);
  rpc ExecuteSystemCommand(ExecuteSystemCommandRequest) returns (ExecuteSystemCommandResponse);
  rpc ValidateWithJudges(ValidateRequest) returns (ValidateResponse);
  rpc QueryCortexPatterns(QueryCortexRequest) returns (QueryCortexResponse);
  rpc StoreCortexPattern(StoreCortexPatternRequest) returns (StoreCortexPatternResponse);
  rpc AttestAgent(AttestAgentRequest) returns (AttestAgentResponse);
  rpc InvokeTool(InvokeToolRequest) returns (InvokeToolResponse);
  rpc IngestStimulus(IngestStimulusRequest) returns (IngestStimulusResponse);
}

ExecuteAgent (server-streaming)

Execute an agent with 100monkeys iterative refinement. Streams typed events as execution progresses.

rpc ExecuteAgent(ExecuteAgentRequest) returns (stream ExecutionEvent);

Request:

message ExecuteAgentRequest {
  string agent_id                         = 1;
  string input                            = 2;
  string context_json                     = 3;
  optional SecurityPolicy security_policy = 4;
  optional string workflow_execution_id   = 5; // Link to a workflow execution
}

Response stream — ExecutionEvent:

Events are strongly typed via a oneof discriminant:

message ExecutionEvent {
  oneof event {
    ExecutionStarted   execution_started   = 1;
    IterationStarted   iteration_started   = 2;
    IterationOutput    iteration_output    = 3;
    IterationCompleted iteration_completed = 4;
    IterationFailed    iteration_failed    = 5;
    RefinementApplied  refinement_applied  = 6;
    ExecutionCompleted execution_completed = 7;
    ExecutionFailed    execution_failed    = 8;
  }
}

Key event messages:

message ExecutionStarted {
  string execution_id = 1;
  string agent_id     = 2;
  string started_at   = 3; // ISO 8601
}

message IterationStarted {
  string execution_id     = 1;
  uint32 iteration_number = 2;
  string action           = 3;
  string started_at       = 4;
}

message IterationFailed {
  string execution_id     = 1;
  uint32 iteration_number = 2;
  IterationError error    = 3;
  string failed_at        = 4;
}

message IterationError {
  string error_type          = 1;
  string message             = 2;
  optional string stacktrace = 3;
}

message RefinementApplied {
  string execution_id     = 1;
  uint32 iteration_number = 2;
  string code_diff        = 3;
  string applied_at       = 4;
}

message ExecutionCompleted {
  string execution_id     = 1;
  string final_output     = 2;
  uint32 total_iterations = 3;
  string completed_at     = 4;
}

message ExecutionFailed {
  string execution_id     = 1;
  string reason           = 2;
  uint32 total_iterations = 3;
  string failed_at        = 4;
}

Service account tenant delegation:

Service accounts should include x-tenant-id in call metadata to execute agents in the target user tenant. Example: metadata.add("x-tenant-id", "zaru-consumer"). Without this key, service account calls scope to the aegis-system tenant by default.

Example (TypeScript):

import * as grpc from '@grpc/grpc-js';
import * as protoLoader from '@grpc/proto-loader';

const pkgDef = protoLoader.loadSync('aegis_runtime.proto');
const proto = grpc.loadPackageDefinition(pkgDef).aegis.runtime.v1 as any;

const client = new proto.AegisRuntime('localhost:50051', grpc.credentials.createInsecure());

const stream = client.ExecuteAgent({
  agent_id: 'agt-uuid-here',
  input: 'Write a primality check function',
  context_json: '{}',
});

stream.on('data', (event: any) => {
  if (event.execution_started) {
    console.log('Started:', event.execution_started.execution_id);
  } else if (event.iteration_started) {
    console.log(`Iteration ${event.iteration_started.iteration_number}: ${event.iteration_started.action}`);
  } else if (event.execution_completed) {
    console.log('Output:', event.execution_completed.final_output);
  }
});

stream.on('end', () => console.log('Done'));
stream.on('error', (err: Error) => console.error(err));

ExecuteSystemCommand

Execute a shell command on the orchestrator host. Used by workflow System states.

rpc ExecuteSystemCommand(ExecuteSystemCommandRequest) returns (ExecuteSystemCommandResponse);

message ExecuteSystemCommandRequest {
  string command                  = 1;
  map<string, string> env         = 2;
  optional string workdir         = 3;
  optional uint32 timeout_seconds = 4;
}

message ExecuteSystemCommandResponse {
  int32  exit_code   = 1;
  string stdout      = 2;
  string stderr      = 3;
  uint64 duration_ms = 4;
}

ValidateWithJudges

Gradient validation using one or more judge agents with configurable consensus.

rpc ValidateWithJudges(ValidateRequest) returns (ValidateResponse);

message ValidateRequest {
  string output              = 1;
  string task                = 2;
  repeated JudgeConfig judges = 3;
  ConsensusConfig consensus  = 4;
  string context_json        = 5;
}

message JudgeConfig {
  string agent_id       = 1;
  string input_template = 2;
  float  weight         = 3;
}

message ConsensusConfig {
  enum Strategy {
    WEIGHTED_AVERAGE = 0;
    MAJORITY_VOTE    = 1;
    UNANIMOUS        = 2;
    ANY_APPROVED     = 3;
  }
  Strategy       strategy  = 1;
  optional float threshold = 2;
  optional float agreement = 3;
  optional uint32 n        = 4;
}

message ValidateResponse {
  float  score                          = 1; // 0.0–1.0
  float  confidence                     = 2; // 0.0–1.0
  string reasoning                      = 3;
  bool   binary_valid                   = 4;
  repeated JudgeResult individual_results = 5;
}

QueryCortexPatterns

Query Cortex memory for patterns matching a given error signature, to inform the 100monkeys refinement loop.

rpc QueryCortexPatterns(QueryCortexRequest) returns (QueryCortexResponse);

message QueryCortexRequest {
  string error_signature           = 1;
  optional string error_type       = 2;
  optional uint32 limit            = 3;
  optional float min_success_score = 4;
}

message QueryCortexResponse {
  repeated CortexPattern patterns = 1;
}

StoreCortexPattern

Store a new learned error-solution pattern, or increment the weight of an existing semantically matching pattern (deduplication).

rpc StoreCortexPattern(StoreCortexPatternRequest) returns (StoreCortexPatternResponse);

message StoreCortexPatternRequest {
  string error_signature        = 1;
  string error_type             = 2;
  string error_message          = 3;
  string solution_approach      = 4;
  optional string solution_code = 5;
  repeated string tags          = 7;
}

message StoreCortexPatternResponse {
  string pattern_id    = 1;
  bool   deduplicated  = 2; // true if an existing pattern's weight was incremented
  uint32 new_frequency = 3;
}

AttestAgent

Issue an SEAL SecurityToken to an agent container after verifying its identity. This is the first call an agent makes before any tool invocations are possible. See the SEAL reference for the full attestation flow.

rpc AttestAgent(AttestAgentRequest) returns (AttestAgentResponse);

message AttestAgentRequest {
  string agent_id      = 1; // UUID of the agent making the request
  string execution_id  = 2; // UUID of the active execution
  string container_id  = 3; // Docker container ID (runtime identity proof)
  string public_key_pem = 4; // Ephemeral Ed25519 public key (PEM-encoded)
}

message AttestAgentResponse {
  string security_token = 1; // Signed JWT SecurityToken for subsequent InvokeTool calls
}

Notes:

  • The agent generates an ephemeral Ed25519 keypair in memory at startup. The private key never leaves the container.
  • The security_token is a short-lived JWT valid for the duration of the execution. Include it in every InvokeToolRequest.security_token.
  • Also available over REST at POST /v1/seal/attest.

InvokeTool

Invoke a tool on behalf of an agent via orchestrator mediation. Agents do not call MCP servers directly — all tool calls are routed through the orchestrator, which verifies the SEAL envelope before forwarding. See Tool Routing for the three routing paths.

rpc InvokeTool(InvokeToolRequest) returns (InvokeToolResponse);

message InvokeToolRequest {
  string protocol       = 1; // Always "seal/v1"
  string security_token = 2; // JWT from AttestAgentResponse
  string signature      = 3; // Ed25519 signature of payload bytes (base64)
  bytes  payload        = 4; // JSON-RPC payload for the MCP tool call
  string timestamp      = 5; // ISO 8601 UTC timestamp of envelope creation
}

message InvokeToolResponse {
  bytes result_json = 1; // Success or failure JSON result
}

Notes:

  • payload is a standard MCP JSON-RPC call_tool request serialized to bytes.
  • signature must be the Ed25519 signature over payload, produced with the ephemeral private key registered during AttestAgent.
  • The orchestrator verifies the signature, checks the security_token, evaluates the SecurityContext policy, then routes the call to the appropriate tool server.
  • Available over REST at POST /v1/seal/invoke (spec-compliant path) and POST /v1/invoke (convenience alias).
  • Returns PERMISSION_DENIED if the SecurityContext denies the tool call.

IngestStimulus

Ingest an external stimulus and route it to a workflow via the two-stage hybrid pipeline.

rpc IngestStimulus(IngestStimulusRequest) returns (IngestStimulusResponse);

message IngestStimulusRequest {
  string source_name                = 1; // Source identifier (e.g., "github", "stripe")
  string content                    = 2; // Raw event payload
  optional string idempotency_key   = 3; // Unique key for deduplication (24h TTL)
  map<string, string> headers       = 4; // Forwarded HTTP headers
}

message IngestStimulusResponse {
  string stimulus_id            = 1; // UUID of the ingested stimulus
  string workflow_execution_id  = 2; // UUID of the workflow execution started
}

Status Codes:

gRPC StatusMeaning
OKStimulus routed and workflow started.
ALREADY_EXISTSIdempotent duplicate (returns original stimulus_id).
INVALID_ARGUMENTLow confidence classification or no router configured.
INTERNALRouterAgent or workflow execution failure.
PERMISSION_DENIEDOIDC identity not authorized.

Error Codes

gRPC StatusMeaning
OKSuccess.
NOT_FOUNDAgent or pattern not found.
INVALID_ARGUMENTMalformed request or input validation error.
PERMISSION_DENIEDSecurityContext or policy violation.
RESOURCE_EXHAUSTEDRate limit or iteration limit reached.
UNAUTHENTICATEDMissing or invalid Bearer token.
INTERNALUnhandled error — check daemon logs.
UNAVAILABLEOrchestrator temporarily down.

Service: NodeClusterService

The cluster coordination service exposed by controller nodes. All inter-node cluster RPCs use this service. Defined in aegis_cluster.proto in the aegis-proto repository.

Port: 50056 (default; configurable via spec.cluster.cluster_grpc_port)
Package: aegis.cluster.v1

All RPCs (except AttestNode) require a valid NodeSecurityToken in the authorization metadata key and an SealNodeEnvelope wrapping the payload. AttestNode is unauthenticated (it is the first call a new worker makes).

package aegis.cluster.v1;

service NodeClusterService {
  // Node attestation and registration
  rpc AttestNode(AttestNodeRequest) returns (AttestNodeResponse);
  rpc ChallengeNode(ChallengeNodeRequest) returns (ChallengeNodeResponse);
  rpc RegisterNode(RegisterNodeRequest) returns (RegisterNodeResponse);
  rpc Heartbeat(HeartbeatRequest) returns (HeartbeatResponse);
  rpc DeregisterNode(DeregisterNodeRequest) returns (DeregisterNodeResponse);

  // Execution routing
  rpc RouteExecution(RouteExecutionRequest) returns (RouteExecutionResponse);
  rpc ForwardExecution(ForwardExecutionRequest) returns (stream ExecutionEvent);

  // Configuration management
  rpc SyncConfig(SyncConfigRequest) returns (SyncConfigResponse);
  rpc PushConfig(PushConfigRequest) returns (PushConfigResponse);

  // Cluster introspection
  rpc ListPeers(ListPeersRequest) returns (ListPeersResponse);
}

RPC Summary

RPCAuth RequiredDirectionDescription
AttestNodeNoWorker → ControllerInitiate node attestation
ChallengeNodeNoController → Worker (response)Cryptographic challenge to prove keypair ownership
RegisterNodeYesWorker → ControllerRegister with capability advertisement after attestation
HeartbeatYesWorker → ControllerPeriodic status update; response may carry NodeCommands
DeregisterNodeYesWorker → ControllerGraceful deregistration
RouteExecutionYesAny → ControllerRequest an ExecutionRoute (target worker) for an execution
ForwardExecutionYesController → WorkerExecute an agent on this worker; streams ExecutionEvent back
SyncConfigYesWorker → ControllerRequest current config from controller
PushConfigYesController → WorkerController pushes updated config to worker
ListPeersYesAny → ControllerList all registered NodePeers and their status

Node Attestation Flow

Before a worker node can participate in the cluster, it must attest its identity to the controller:

Worker                              Controller
  │                                      │
  │  AttestNode(node_id, public_key_pem) │
  │─────────────────────────────────────▶│
  │                                      │  (generate challenge nonce)
  │  ChallengeNode(nonce)                │
  │◀─────────────────────────────────────│
  │                                      │
  │  (sign challenge with Ed25519 private key)
  │                                      │
  │  ChallengeNode(signature)            │
  │─────────────────────────────────────▶│
  │                                      │  (verify signature)
  │  AttestNodeResponse(node_security_token)
  │◀─────────────────────────────────────│
  │                                      │
  │  RegisterNode(capabilities_advertisement)
  │─────────────────────────────────────▶│
  │                                      │
  │  RegisterNodeResponse(peer_id)       │
  │◀─────────────────────────────────────│
  │                                      │
  │  [every 30s] Heartbeat(status, load) │
  │─────────────────────────────────────▶│
  │  HeartbeatResponse(commands?)        │
  │◀─────────────────────────────────────│

The NodeSecurityToken is an RS256 JWT (1-hour TTL) signed by the controller's OpenBao Transit key. It is analogous to the agent SecurityToken from SEAL but scoped to node identity. All subsequent RPCs wrap their payload in:

message SealNodeEnvelope {
  string node_security_token = 1; // NodeSecurityToken JWT
  bytes  signature           = 2; // Ed25519 signature over payload
  bytes  payload             = 3; // Serialized request message
}

AttestNode

Initiates the attestation handshake. No authentication required. Returns a challenge nonce that the worker must sign.

rpc AttestNode(AttestNodeRequest) returns (AttestNodeResponse);

message AttestNodeRequest {
  string node_id       = 1; // Must match spec.node.id / spec.cluster.node_id
  string public_key_pem = 2; // Ed25519 public key in PEM format
  NodeRole role        = 3; // WORKER or HYBRID
}

message AttestNodeResponse {
  string challenge_nonce = 1; // Random nonce to be signed with private key
  string challenge_id    = 2; // Correlation ID for the follow-up ChallengeNode call
}

enum NodeRole {
  CONTROLLER = 0;
  WORKER     = 1;
  HYBRID     = 2;
}

ChallengeNode

Called by the worker to complete the attestation challenge. The worker signs the challenge_nonce from AttestNodeResponse with its Ed25519 private key and returns the signature. On success, returns a NodeSecurityToken.

rpc ChallengeNode(ChallengeNodeRequest) returns (ChallengeNodeResponse);

message ChallengeNodeRequest {
  string challenge_id        = 1; // From AttestNodeResponse
  bytes  signature           = 2; // Ed25519 signature over challenge_nonce bytes
}

message ChallengeNodeResponse {
  string node_security_token = 1; // RS256 JWT; use as Bearer in subsequent RPCs
  string expires_at          = 2; // ISO 8601 expiry timestamp (1-hour TTL)
}

RegisterNode

Registers the worker with its capability advertisement after successful attestation. Must be called before Heartbeat.

rpc RegisterNode(RegisterNodeRequest) returns (RegisterNodeResponse);

message RegisterNodeRequest {
  NodeCapabilityAdvertisement capabilities = 1;
}

message NodeCapabilityAdvertisement {
  uint32   gpu_count            = 1;
  float    vram_gb              = 2;
  uint32   cpu_cores            = 3;
  float    available_memory_gb  = 4;
  repeated string supported_runtimes = 5; // e.g. ["docker", "firecracker"]
  repeated string tags               = 6; // Matched against ExecutionTarget tags
}

message RegisterNodeResponse {
  string peer_id = 1; // Assigned NodePeer ID within the NodeCluster aggregate
}

Heartbeat

Periodic keep-alive sent by worker nodes (default: every 30 seconds). The response may contain NodeCommands instructing the worker to drain, update config, or shut down.

rpc Heartbeat(HeartbeatRequest) returns (HeartbeatResponse);

message HeartbeatRequest {
  string peer_id                             = 1; // From RegisterNodeResponse
  uint32 active_execution_count              = 2;
  float  cpu_utilization_percent             = 3;
  float  memory_utilization_percent          = 4;
  NodePeerStatus status                      = 5;
}

message HeartbeatResponse {
  repeated NodeCommand commands = 1; // Optional commands for the worker to execute
}

enum NodePeerStatus {
  ACTIVE    = 0;
  DRAINING  = 1;
  UNHEALTHY = 2;
}

message NodeCommand {
  oneof command {
    DrainCommand   drain   = 1;
    ShutdownCommand shutdown = 2;
    SyncConfigCommand sync_config = 3;
  }
}

RouteExecution

Returns the target worker for a given execution. The NodeRouter uses round-robin among healthy workers matching the requested tags (Phase 1).

rpc RouteExecution(RouteExecutionRequest) returns (RouteExecutionResponse);

message RouteExecutionRequest {
  string execution_id                    = 1;
  NodeCapabilityAdvertisement required   = 2; // Minimum capabilities required
}

message RouteExecutionResponse {
  string target_node_id       = 1;
  string worker_grpc_address  = 2; // e.g. "worker-gpu-001.internal:50051"
}

ForwardExecution (server-streaming)

Forwards an execution to this worker. The worker executes the agent and streams ExecutionEvent messages back to the caller. Uses the same ExecutionEvent type as AegisRuntime.ExecuteAgent.

rpc ForwardExecution(ForwardExecutionRequest) returns (stream ExecutionEvent);

message ForwardExecutionRequest {
  string execution_id  = 1;
  string agent_id      = 2;
  string manifest_yaml = 3; // Serialized agent manifest YAML
  string input         = 4;
  string context_json  = 5;
}

See ExecuteAgent for the ExecutionEvent stream format.


ListPeers

Returns all registered NodePeers in the NodeCluster aggregate along with their current status and capabilities.

rpc ListPeers(ListPeersRequest) returns (ListPeersResponse);

message ListPeersRequest {
  optional NodePeerStatus filter_status = 1; // Filter by status; omit for all peers
}

message ListPeersResponse {
  repeated NodePeer peers = 1;
}

message NodePeer {
  string   node_id            = 1;
  NodeRole role               = 2;
  NodeCapabilityAdvertisement capabilities = 3;
  string   last_heartbeat_at  = 4; // ISO 8601
  NodePeerStatus status       = 5;
  string   grpc_address       = 6;
}

SyncConfig / PushConfig

Configuration synchronization between controller and workers. SyncConfig is worker-initiated; PushConfig is controller-initiated (typically triggered by a SyncConfigCommand in a HeartbeatResponse).

rpc SyncConfig(SyncConfigRequest) returns (SyncConfigResponse);
rpc PushConfig(PushConfigRequest) returns (PushConfigResponse);

message SyncConfigRequest {}

message SyncConfigResponse {
  string config_yaml = 1; // Controller-managed sections of NodeConfig YAML
  string config_hash = 2; // SHA-256 of config_yaml for idempotency checks
}

message PushConfigRequest {
  string config_yaml = 1;
  string config_hash = 2;
}

message PushConfigResponse {
  bool accepted = 1;
  optional string rejection_reason = 2;
}

On this page