Skip to main content

Go Integration

The Ferro Labs AI Gateway is written in Go. This guide covers everything a Go developer needs โ€” from pointing a standard OpenAI client at the gateway, to embedding the engine as a library, writing custom plugins, propagating trace context, and profiling with pprof.

1. Using the go-openai Clientโ€‹

The sashabaranov/go-openai client works out of the box. Point it at your gateway URL and set your API key.

package main

import (
"context"
"fmt"
"log"

openai "github.com/sashabaranov/go-openai"
)

func main() {
cfg := openai.DefaultConfig("your-ferro-api-key")
cfg.BaseURL = "http://localhost:8080/v1" // Ferro Labs AI Gateway

client := openai.NewClientWithConfig(cfg)

resp, err := client.CreateChatCompletion(
context.Background(),
openai.ChatCompletionRequest{
Model: openai.GPT4,
Messages: []openai.ChatCompletionMessage{
{
Role: openai.ChatMessageRoleUser,
Content: "Explain the CAP theorem in two sentences.",
},
},
},
)
if err != nil {
log.Fatalf("completion error: %v", err)
}

fmt.Println(resp.Choices[0].Message.Content)
}

2. Embedding Ferro as a Libraryโ€‹

You can embed the gateway directly into your Go application instead of running it as a standalone process. This is useful for custom orchestration or testing.

package main

import (
"log"
"os"
"os/signal"
"syscall"

gateway "github.com/ferro-labs/ai-gateway"
)

func main() {
// Load configuration from a YAML file.
cfg, err := gateway.LoadConfig("ferro.yaml")
if err != nil {
log.Fatalf("failed to load config: %v", err)
}

// Create a new gateway instance.
gw, err := gateway.New(cfg)
if err != nil {
log.Fatalf("failed to create gateway: %v", err)
}

// Start serving in a goroutine.
go func() {
if err := gw.Serve(":8080"); err != nil {
log.Fatalf("gateway exited: %v", err)
}
}()

log.Println("AI Gateway listening on :8080")

// Wait for interrupt signal to gracefully shut down.
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
<-quit

log.Println("shutting down...")
gw.Shutdown()
}

3. Writing a Custom Pluginโ€‹

Plugins implement the Plugin interface. Each plugin can hook into the request lifecycle at two points: before the request is forwarded to the provider and after the response is received.

package myplugin

import (
"context"
"log"
"time"

gateway "github.com/ferro-labs/ai-gateway"
)

// TimingPlugin logs the round-trip duration for every request.
type TimingPlugin struct{}

// Name returns the plugin identifier used in configuration.
func (p *TimingPlugin) Name() string {
return "timing"
}

// BeforeRequest is called before the request is forwarded to the provider.
// Return a non-nil error to abort the request.
func (p *TimingPlugin) BeforeRequest(ctx context.Context, req *gateway.Request) error {
// Store the start time in the request context.
req.SetMeta("timing_start", time.Now())
log.Printf("[timing] request to model=%s started", req.Model)
return nil
}

// AfterResponse is called after the provider responds.
func (p *TimingPlugin) AfterResponse(ctx context.Context, req *gateway.Request, resp *gateway.Response) error {
start, ok := req.GetMeta("timing_start").(time.Time)
if !ok {
return nil
}
elapsed := time.Since(start)
log.Printf("[timing] model=%s completed in %s (tokens: %d)",
req.Model, elapsed, resp.Usage.TotalTokens)
return nil
}

Register the plugin when creating the gateway:

gw, err := gateway.New(cfg, gateway.WithPlugins(
&myplugin.TimingPlugin{},
))

4. Context Propagation and Trace ID Extractionโ€‹

The AI Gateway injects a X-Ferro-Trace-Id header into every response. Use this to correlate gateway logs with your application traces.

package main

import (
"context"
"fmt"
"log"
"net/http"

openai "github.com/sashabaranov/go-openai"
)

func main() {
cfg := openai.DefaultConfig("your-ferro-api-key")
cfg.BaseURL = "http://localhost:8080/v1"

// Use a custom HTTP client to capture response headers.
var traceID string
cfg.HTTPClient = &http.Client{
Transport: &traceTransport{
base: http.DefaultTransport,
onResponse: func(resp *http.Response) {
traceID = resp.Header.Get("X-Ferro-Trace-Id")
},
},
}

client := openai.NewClientWithConfig(cfg)

resp, err := client.CreateChatCompletion(
context.Background(),
openai.ChatCompletionRequest{
Model: openai.GPT4,
Messages: []openai.ChatCompletionMessage{
{Role: openai.ChatMessageRoleUser, Content: "Hello"},
},
},
)
if err != nil {
log.Fatalf("completion error: %v", err)
}

fmt.Printf("Response: %s\n", resp.Choices[0].Message.Content)
fmt.Printf("Trace ID: %s\n", traceID)
}

// traceTransport wraps an http.RoundTripper to expose response headers.
type traceTransport struct {
base http.RoundTripper
onResponse func(*http.Response)
}

func (t *traceTransport) RoundTrip(req *http.Request) (*http.Response, error) {
resp, err := t.base.RoundTrip(req)
if err != nil {
return resp, err
}
if t.onResponse != nil {
t.onResponse(resp)
}
return resp, nil
}

5. Benchmarking with pprofโ€‹

Enable the pprof endpoint to profile the gateway under load.

Enable the pprof endpointโ€‹

Add the following to your ferro.yaml:

debug:
pprof: true
pprof_addr: "localhost:6060"

Or enable it programmatically when embedding:

import "net/http"
import _ "net/http/pprof"

go func() {
log.Println("pprof listening on :6060")
log.Println(http.ListenAndServe("localhost:6060", nil))
}()

Run a load test and capture a profileโ€‹

Use hey (or any HTTP load testing tool) alongside go tool pprof:

# Generate load โ€” 1000 requests, 50 concurrent
hey -n 1000 -c 50 \
-H "Authorization: Bearer your-ferro-api-key" \
-H "Content-Type: application/json" \
-d '{"model":"gpt-4","messages":[{"role":"user","content":"ping"}]}' \
http://localhost:8080/v1/chat/completions

# Capture a 30-second CPU profile
go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30

# Capture a heap snapshot
go tool pprof http://localhost:6060/debug/pprof/heap

Analyze the profileโ€‹

Inside the pprof interactive shell:

(pprof) top 20
(pprof) web # opens a call-graph SVG in your browser
(pprof) list HandleRequest # source-level annotation

Write a Go benchmark testโ€‹

package gateway_test

import (
"context"
"testing"

openai "github.com/sashabaranov/go-openai"
)

func BenchmarkChatCompletion(b *testing.B) {
cfg := openai.DefaultConfig("your-ferro-api-key")
cfg.BaseURL = "http://localhost:8080/v1"
client := openai.NewClientWithConfig(cfg)

req := openai.ChatCompletionRequest{
Model: openai.GPT4,
Messages: []openai.ChatCompletionMessage{
{Role: openai.ChatMessageRoleUser, Content: "ping"},
},
}

b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := client.CreateChatCompletion(context.Background(), req)
if err != nil {
b.Fatalf("request failed: %v", err)
}
}
}

Run with:

go test -bench=BenchmarkChatCompletion -benchtime=30s -count=3 ./...