gRPC API Reference
aegis.runtime.v1 AegisRuntime service — the Temporal-worker-facing gRPC transport.
gRPC API Reference
AEGIS exposes a gRPC service on port 50051 (default). The service is defined in Protocol Buffers under the aegis.runtime.v1 package.
Proto source files are in the aegis-proto repository under proto/aegis_runtime.proto.
Authentication
gRPC auth is configured via spec.grpc_auth in aegis-config.yaml. When enabled, all methods except the configured exempt_methods require a Bearer JWT (Keycloak-issued) in the authorization metadata key:
authorization: Bearer <keycloak-issued-jwt>The /aegis.v1.InnerLoop/Generate method is exempt by default (used by agent bootstrap containers on the internal network). Unauthenticated calls receive UNAUTHENTICATED.
Service: AegisRuntime
The single service exported by the orchestrator gRPC server:
package aegis.runtime.v1;
service AegisRuntime {
rpc ExecuteAgent(ExecuteAgentRequest) returns (stream ExecutionEvent);
rpc ExecuteSystemCommand(ExecuteSystemCommandRequest) returns (ExecuteSystemCommandResponse);
rpc ValidateWithJudges(ValidateRequest) returns (ValidateResponse);
rpc QueryCortexPatterns(QueryCortexRequest) returns (QueryCortexResponse);
rpc StoreCortexPattern(StoreCortexPatternRequest) returns (StoreCortexPatternResponse);
rpc AttestAgent(AttestAgentRequest) returns (AttestAgentResponse);
rpc InvokeTool(InvokeToolRequest) returns (InvokeToolResponse);
rpc IngestStimulus(IngestStimulusRequest) returns (IngestStimulusResponse);
}ExecuteAgent (server-streaming)
Execute an agent with 100monkeys iterative refinement. Streams typed events as execution progresses.
rpc ExecuteAgent(ExecuteAgentRequest) returns (stream ExecutionEvent);Request:
message ExecuteAgentRequest {
string agent_id = 1;
string input = 2;
string context_json = 3;
optional SecurityPolicy security_policy = 4;
optional string workflow_execution_id = 5; // Link to a workflow execution
}Response stream — ExecutionEvent:
Events are strongly typed via a oneof discriminant:
message ExecutionEvent {
oneof event {
ExecutionStarted execution_started = 1;
IterationStarted iteration_started = 2;
IterationOutput iteration_output = 3;
IterationCompleted iteration_completed = 4;
IterationFailed iteration_failed = 5;
RefinementApplied refinement_applied = 6;
ExecutionCompleted execution_completed = 7;
ExecutionFailed execution_failed = 8;
}
}Key event messages:
message ExecutionStarted {
string execution_id = 1;
string agent_id = 2;
string started_at = 3; // ISO 8601
}
message IterationStarted {
string execution_id = 1;
uint32 iteration_number = 2;
string action = 3;
string started_at = 4;
}
message IterationFailed {
string execution_id = 1;
uint32 iteration_number = 2;
IterationError error = 3;
string failed_at = 4;
}
message IterationError {
string error_type = 1;
string message = 2;
optional string stacktrace = 3;
}
message RefinementApplied {
string execution_id = 1;
uint32 iteration_number = 2;
string code_diff = 3;
string applied_at = 4;
}
message ExecutionCompleted {
string execution_id = 1;
string final_output = 2;
uint32 total_iterations = 3;
string completed_at = 4;
}
message ExecutionFailed {
string execution_id = 1;
string reason = 2;
uint32 total_iterations = 3;
string failed_at = 4;
}Service account tenant delegation:
Service accounts should include x-tenant-id in call metadata to execute agents in the target user tenant. Example: metadata.add("x-tenant-id", "zaru-consumer"). Without this key, service account calls scope to the aegis-system tenant by default.
Example (TypeScript):
import * as grpc from '@grpc/grpc-js';
import * as protoLoader from '@grpc/proto-loader';
const pkgDef = protoLoader.loadSync('aegis_runtime.proto');
const proto = grpc.loadPackageDefinition(pkgDef).aegis.runtime.v1 as any;
const client = new proto.AegisRuntime('localhost:50051', grpc.credentials.createInsecure());
const stream = client.ExecuteAgent({
agent_id: 'agt-uuid-here',
input: 'Write a primality check function',
context_json: '{}',
});
stream.on('data', (event: any) => {
if (event.execution_started) {
console.log('Started:', event.execution_started.execution_id);
} else if (event.iteration_started) {
console.log(`Iteration ${event.iteration_started.iteration_number}: ${event.iteration_started.action}`);
} else if (event.execution_completed) {
console.log('Output:', event.execution_completed.final_output);
}
});
stream.on('end', () => console.log('Done'));
stream.on('error', (err: Error) => console.error(err));ExecuteSystemCommand
Execute a shell command on the orchestrator host. Used by workflow System states.
rpc ExecuteSystemCommand(ExecuteSystemCommandRequest) returns (ExecuteSystemCommandResponse);
message ExecuteSystemCommandRequest {
string command = 1;
map<string, string> env = 2;
optional string workdir = 3;
optional uint32 timeout_seconds = 4;
}
message ExecuteSystemCommandResponse {
int32 exit_code = 1;
string stdout = 2;
string stderr = 3;
uint64 duration_ms = 4;
}ValidateWithJudges
Gradient validation using one or more judge agents with configurable consensus.
rpc ValidateWithJudges(ValidateRequest) returns (ValidateResponse);
message ValidateRequest {
string output = 1;
string task = 2;
repeated JudgeConfig judges = 3;
ConsensusConfig consensus = 4;
string context_json = 5;
}
message JudgeConfig {
string agent_id = 1;
string input_template = 2;
float weight = 3;
}
message ConsensusConfig {
enum Strategy {
WEIGHTED_AVERAGE = 0;
MAJORITY_VOTE = 1;
UNANIMOUS = 2;
ANY_APPROVED = 3;
}
Strategy strategy = 1;
optional float threshold = 2;
optional float agreement = 3;
optional uint32 n = 4;
}
message ValidateResponse {
float score = 1; // 0.0–1.0
float confidence = 2; // 0.0–1.0
string reasoning = 3;
bool binary_valid = 4;
repeated JudgeResult individual_results = 5;
}QueryCortexPatterns
Query Cortex memory for patterns matching a given error signature, to inform the 100monkeys refinement loop.
rpc QueryCortexPatterns(QueryCortexRequest) returns (QueryCortexResponse);
message QueryCortexRequest {
string error_signature = 1;
optional string error_type = 2;
optional uint32 limit = 3;
optional float min_success_score = 4;
}
message QueryCortexResponse {
repeated CortexPattern patterns = 1;
}StoreCortexPattern
Store a new learned error-solution pattern, or increment the weight of an existing semantically matching pattern (deduplication).
rpc StoreCortexPattern(StoreCortexPatternRequest) returns (StoreCortexPatternResponse);
message StoreCortexPatternRequest {
string error_signature = 1;
string error_type = 2;
string error_message = 3;
string solution_approach = 4;
optional string solution_code = 5;
repeated string tags = 7;
}
message StoreCortexPatternResponse {
string pattern_id = 1;
bool deduplicated = 2; // true if an existing pattern's weight was incremented
uint32 new_frequency = 3;
}AttestAgent
Issue an SEAL SecurityToken to an agent container after verifying its identity. This is the first call an agent makes before any tool invocations are possible. See the SEAL reference for the full attestation flow.
rpc AttestAgent(AttestAgentRequest) returns (AttestAgentResponse);
message AttestAgentRequest {
string agent_id = 1; // UUID of the agent making the request
string execution_id = 2; // UUID of the active execution
string container_id = 3; // Docker container ID (runtime identity proof)
string public_key_pem = 4; // Ephemeral Ed25519 public key (PEM-encoded)
}
message AttestAgentResponse {
string security_token = 1; // Signed JWT SecurityToken for subsequent InvokeTool calls
}Notes:
- The agent generates an ephemeral Ed25519 keypair in memory at startup. The private key never leaves the container.
- The
security_tokenis a short-lived JWT valid for the duration of the execution. Include it in everyInvokeToolRequest.security_token. - Also available over REST at
POST /v1/seal/attest.
InvokeTool
Invoke a tool on behalf of an agent via orchestrator mediation. Agents do not call MCP servers directly — all tool calls are routed through the orchestrator, which verifies the SEAL envelope before forwarding. See Tool Routing for the three routing paths.
rpc InvokeTool(InvokeToolRequest) returns (InvokeToolResponse);
message InvokeToolRequest {
string protocol = 1; // Always "seal/v1"
string security_token = 2; // JWT from AttestAgentResponse
string signature = 3; // Ed25519 signature of payload bytes (base64)
bytes payload = 4; // JSON-RPC payload for the MCP tool call
string timestamp = 5; // ISO 8601 UTC timestamp of envelope creation
}
message InvokeToolResponse {
bytes result_json = 1; // Success or failure JSON result
}Notes:
payloadis a standard MCP JSON-RPCcall_toolrequest serialized to bytes.signaturemust be the Ed25519 signature overpayload, produced with the ephemeral private key registered duringAttestAgent.- The orchestrator verifies the signature, checks the
security_token, evaluates the SecurityContext policy, then routes the call to the appropriate tool server. - Available over REST at
POST /v1/seal/invoke(spec-compliant path) andPOST /v1/invoke(convenience alias). - Returns
PERMISSION_DENIEDif the SecurityContext denies the tool call.
IngestStimulus
Ingest an external stimulus and route it to a workflow via the two-stage hybrid pipeline.
rpc IngestStimulus(IngestStimulusRequest) returns (IngestStimulusResponse);
message IngestStimulusRequest {
string source_name = 1; // Source identifier (e.g., "github", "stripe")
string content = 2; // Raw event payload
optional string idempotency_key = 3; // Unique key for deduplication (24h TTL)
map<string, string> headers = 4; // Forwarded HTTP headers
}
message IngestStimulusResponse {
string stimulus_id = 1; // UUID of the ingested stimulus
string workflow_execution_id = 2; // UUID of the workflow execution started
}Status Codes:
| gRPC Status | Meaning |
|---|---|
OK | Stimulus routed and workflow started. |
ALREADY_EXISTS | Idempotent duplicate (returns original stimulus_id). |
INVALID_ARGUMENT | Low confidence classification or no router configured. |
INTERNAL | RouterAgent or workflow execution failure. |
PERMISSION_DENIED | OIDC identity not authorized. |
Error Codes
| gRPC Status | Meaning |
|---|---|
OK | Success. |
NOT_FOUND | Agent or pattern not found. |
INVALID_ARGUMENT | Malformed request or input validation error. |
PERMISSION_DENIED | SecurityContext or policy violation. |
RESOURCE_EXHAUSTED | Rate limit or iteration limit reached. |
UNAUTHENTICATED | Missing or invalid Bearer token. |
INTERNAL | Unhandled error — check daemon logs. |
UNAVAILABLE | Orchestrator temporarily down. |
Service: NodeClusterService
The cluster coordination service exposed by controller nodes. All inter-node cluster RPCs use this service. Defined in aegis_cluster.proto in the aegis-proto repository.
Port: 50056 (default; configurable via spec.cluster.cluster_grpc_port)
Package: aegis.cluster.v1
All RPCs (except AttestNode) require a valid NodeSecurityToken in the authorization metadata key and an SealNodeEnvelope wrapping the payload. AttestNode is unauthenticated (it is the first call a new worker makes).
package aegis.cluster.v1;
service NodeClusterService {
// Node attestation and registration
rpc AttestNode(AttestNodeRequest) returns (AttestNodeResponse);
rpc ChallengeNode(ChallengeNodeRequest) returns (ChallengeNodeResponse);
rpc RegisterNode(RegisterNodeRequest) returns (RegisterNodeResponse);
rpc Heartbeat(HeartbeatRequest) returns (HeartbeatResponse);
rpc DeregisterNode(DeregisterNodeRequest) returns (DeregisterNodeResponse);
// Execution routing
rpc RouteExecution(RouteExecutionRequest) returns (RouteExecutionResponse);
rpc ForwardExecution(ForwardExecutionRequest) returns (stream ExecutionEvent);
// Configuration management
rpc SyncConfig(SyncConfigRequest) returns (SyncConfigResponse);
rpc PushConfig(PushConfigRequest) returns (PushConfigResponse);
// Cluster introspection
rpc ListPeers(ListPeersRequest) returns (ListPeersResponse);
}RPC Summary
| RPC | Auth Required | Direction | Description |
|---|---|---|---|
AttestNode | No | Worker → Controller | Initiate node attestation |
ChallengeNode | No | Controller → Worker (response) | Cryptographic challenge to prove keypair ownership |
RegisterNode | Yes | Worker → Controller | Register with capability advertisement after attestation |
Heartbeat | Yes | Worker → Controller | Periodic status update; response may carry NodeCommands |
DeregisterNode | Yes | Worker → Controller | Graceful deregistration |
RouteExecution | Yes | Any → Controller | Request an ExecutionRoute (target worker) for an execution |
ForwardExecution | Yes | Controller → Worker | Execute an agent on this worker; streams ExecutionEvent back |
SyncConfig | Yes | Worker → Controller | Request current config from controller |
PushConfig | Yes | Controller → Worker | Controller pushes updated config to worker |
ListPeers | Yes | Any → Controller | List all registered NodePeers and their status |
Node Attestation Flow
Before a worker node can participate in the cluster, it must attest its identity to the controller:
Worker Controller
│ │
│ AttestNode(node_id, public_key_pem) │
│─────────────────────────────────────▶│
│ │ (generate challenge nonce)
│ ChallengeNode(nonce) │
│◀─────────────────────────────────────│
│ │
│ (sign challenge with Ed25519 private key)
│ │
│ ChallengeNode(signature) │
│─────────────────────────────────────▶│
│ │ (verify signature)
│ AttestNodeResponse(node_security_token)
│◀─────────────────────────────────────│
│ │
│ RegisterNode(capabilities_advertisement)
│─────────────────────────────────────▶│
│ │
│ RegisterNodeResponse(peer_id) │
│◀─────────────────────────────────────│
│ │
│ [every 30s] Heartbeat(status, load) │
│─────────────────────────────────────▶│
│ HeartbeatResponse(commands?) │
│◀─────────────────────────────────────│The NodeSecurityToken is an RS256 JWT (1-hour TTL) signed by the controller's OpenBao Transit key. It is analogous to the agent SecurityToken from SEAL but scoped to node identity. All subsequent RPCs wrap their payload in:
message SealNodeEnvelope {
string node_security_token = 1; // NodeSecurityToken JWT
bytes signature = 2; // Ed25519 signature over payload
bytes payload = 3; // Serialized request message
}AttestNode
Initiates the attestation handshake. No authentication required. Returns a challenge nonce that the worker must sign.
rpc AttestNode(AttestNodeRequest) returns (AttestNodeResponse);
message AttestNodeRequest {
string node_id = 1; // Must match spec.node.id / spec.cluster.node_id
string public_key_pem = 2; // Ed25519 public key in PEM format
NodeRole role = 3; // WORKER or HYBRID
}
message AttestNodeResponse {
string challenge_nonce = 1; // Random nonce to be signed with private key
string challenge_id = 2; // Correlation ID for the follow-up ChallengeNode call
}
enum NodeRole {
CONTROLLER = 0;
WORKER = 1;
HYBRID = 2;
}ChallengeNode
Called by the worker to complete the attestation challenge. The worker signs the challenge_nonce from AttestNodeResponse with its Ed25519 private key and returns the signature. On success, returns a NodeSecurityToken.
rpc ChallengeNode(ChallengeNodeRequest) returns (ChallengeNodeResponse);
message ChallengeNodeRequest {
string challenge_id = 1; // From AttestNodeResponse
bytes signature = 2; // Ed25519 signature over challenge_nonce bytes
}
message ChallengeNodeResponse {
string node_security_token = 1; // RS256 JWT; use as Bearer in subsequent RPCs
string expires_at = 2; // ISO 8601 expiry timestamp (1-hour TTL)
}RegisterNode
Registers the worker with its capability advertisement after successful attestation. Must be called before Heartbeat.
rpc RegisterNode(RegisterNodeRequest) returns (RegisterNodeResponse);
message RegisterNodeRequest {
NodeCapabilityAdvertisement capabilities = 1;
}
message NodeCapabilityAdvertisement {
uint32 gpu_count = 1;
float vram_gb = 2;
uint32 cpu_cores = 3;
float available_memory_gb = 4;
repeated string supported_runtimes = 5; // e.g. ["docker", "firecracker"]
repeated string tags = 6; // Matched against ExecutionTarget tags
}
message RegisterNodeResponse {
string peer_id = 1; // Assigned NodePeer ID within the NodeCluster aggregate
}Heartbeat
Periodic keep-alive sent by worker nodes (default: every 30 seconds). The response may contain NodeCommands instructing the worker to drain, update config, or shut down.
rpc Heartbeat(HeartbeatRequest) returns (HeartbeatResponse);
message HeartbeatRequest {
string peer_id = 1; // From RegisterNodeResponse
uint32 active_execution_count = 2;
float cpu_utilization_percent = 3;
float memory_utilization_percent = 4;
NodePeerStatus status = 5;
}
message HeartbeatResponse {
repeated NodeCommand commands = 1; // Optional commands for the worker to execute
}
enum NodePeerStatus {
ACTIVE = 0;
DRAINING = 1;
UNHEALTHY = 2;
}
message NodeCommand {
oneof command {
DrainCommand drain = 1;
ShutdownCommand shutdown = 2;
SyncConfigCommand sync_config = 3;
}
}RouteExecution
Returns the target worker for a given execution. The NodeRouter uses round-robin among healthy workers matching the requested tags (Phase 1).
rpc RouteExecution(RouteExecutionRequest) returns (RouteExecutionResponse);
message RouteExecutionRequest {
string execution_id = 1;
NodeCapabilityAdvertisement required = 2; // Minimum capabilities required
}
message RouteExecutionResponse {
string target_node_id = 1;
string worker_grpc_address = 2; // e.g. "worker-gpu-001.internal:50051"
}ForwardExecution (server-streaming)
Forwards an execution to this worker. The worker executes the agent and streams ExecutionEvent messages back to the caller. Uses the same ExecutionEvent type as AegisRuntime.ExecuteAgent.
rpc ForwardExecution(ForwardExecutionRequest) returns (stream ExecutionEvent);
message ForwardExecutionRequest {
string execution_id = 1;
string agent_id = 2;
string manifest_yaml = 3; // Serialized agent manifest YAML
string input = 4;
string context_json = 5;
}See ExecuteAgent for the ExecutionEvent stream format.
ListPeers
Returns all registered NodePeers in the NodeCluster aggregate along with their current status and capabilities.
rpc ListPeers(ListPeersRequest) returns (ListPeersResponse);
message ListPeersRequest {
optional NodePeerStatus filter_status = 1; // Filter by status; omit for all peers
}
message ListPeersResponse {
repeated NodePeer peers = 1;
}
message NodePeer {
string node_id = 1;
NodeRole role = 2;
NodeCapabilityAdvertisement capabilities = 3;
string last_heartbeat_at = 4; // ISO 8601
NodePeerStatus status = 5;
string grpc_address = 6;
}SyncConfig / PushConfig
Configuration synchronization between controller and workers. SyncConfig is worker-initiated; PushConfig is controller-initiated (typically triggered by a SyncConfigCommand in a HeartbeatResponse).
rpc SyncConfig(SyncConfigRequest) returns (SyncConfigResponse);
rpc PushConfig(PushConfigRequest) returns (PushConfigResponse);
message SyncConfigRequest {}
message SyncConfigResponse {
string config_yaml = 1; // Controller-managed sections of NodeConfig YAML
string config_hash = 2; // SHA-256 of config_yaml for idempotency checks
}
message PushConfigRequest {
string config_yaml = 1;
string config_hash = 2;
}
message PushConfigResponse {
bool accepted = 1;
optional string rejection_reason = 2;
}