Timing SDK operations
When debugging timeout / latency issues in the SDK, it is critical to identify the components of the operation lifecycle which are taking more time to execute than expected. As a starting point, you will generally need to inspect the timing breakdown between the overall operation call and the HTTP call itself.
The following sample program implements a basic instrumentation probe in terms
of smithy-go
middleware for SQS clients and demonstrates how it is used. The
probe emits the following information for each operation call:
- AWS request ID
- service ID
- operation name
- operation invocation time
- http call time
Each emitted message is prefixed with a unique (to a single operation) “invocation ID” which is set at the beginning of the handler stack.
The entry point for instrumentation is exposed as WithOperationTiming
, which
is parameterized to accept a message handling function which will receive
instrumentation “events” as formatted strings. PrintfMSGHandler
is provided
as a convenience which will simply dump messages to stdout.
The service used here is interchangeable - ALL service client options accept
APIOptions
and an HTTPClient
as configuration. For example,
WithOperationTiming
could instead be declared as:
func WithOperationTiming(msgHandler func(string)) func(*s3.Options)
func WithOperationTiming(msgHandler func(string)) func(*dynamodb.Options)
// etc.
If you change it, be sure to change the signature of the function it returns as well.
import (
"context"
"fmt"
"log"
"net/http"
"sync"
"time"
awsmiddleware "github.com/aws/aws-sdk-go-v2/aws/middleware"
awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/sqs"
"github.com/aws/smithy-go/middleware"
smithyrand "github.com/aws/smithy-go/rand"
)
// WithOperationTiming instruments an SQS client to dump timing information for
// the following spans:
// - overall operation time
// - HTTPClient call time
//
// This instrumentation will also emit the request ID, service name, and
// operation name for each invocation.
//
// Accepts a message "handler" which is invoked with formatted messages to be
// handled externally, you can use the declared PrintfMSGHandler to simply dump
// these values to stdout.
func WithOperationTiming(msgHandler func(string)) func(*sqs.Options) {
return func(o *sqs.Options) {
o.APIOptions = append(o.APIOptions, addTimingMiddlewares(msgHandler))
o.HTTPClient = &timedHTTPClient{
client: awshttp.NewBuildableClient(),
msgHandler: msgHandler,
}
}
}
// PrintfMSGHandler writes messages to stdout.
func PrintfMSGHandler(msg string) {
fmt.Printf("%s\n", msg)
}
type invokeIDKey struct{}
func setInvokeID(ctx context.Context, id string) context.Context {
return middleware.WithStackValue(ctx, invokeIDKey{}, id)
}
func getInvokeID(ctx context.Context) string {
id, _ := middleware.GetStackValue(ctx, invokeIDKey{}).(string)
return id
}
// Records the current time, and returns a function to be called when the
// target span of events is completed. The return function will emit the given
// span name and time elapsed to the given message consumer.
func timeSpan(ctx context.Context, name string, consumer func(string)) func() {
start := time.Now()
return func() {
elapsed := time.Now().Sub(start)
consumer(fmt.Sprintf("[%s] %s: %s", getInvokeID(ctx), name, elapsed))
}
}
type timedHTTPClient struct {
client *awshttp.BuildableClient
msgHandler func(string)
}
func (c *timedHTTPClient) Do(r *http.Request) (*http.Response, error) {
defer timeSpan(r.Context(), "http", c.msgHandler)()
resp, err := c.client.Do(r)
if err != nil {
return nil, fmt.Errorf("inner client do: %v", err)
}
return resp, nil
}
type addInvokeIDMiddleware struct {
msgHandler func(string)
}
func (*addInvokeIDMiddleware) ID() string { return "addInvokeID" }
func (*addInvokeIDMiddleware) HandleInitialize(ctx context.Context, in middleware.InitializeInput, next middleware.InitializeHandler) (
out middleware.InitializeOutput, md middleware.Metadata, err error,
) {
id, err := smithyrand.NewUUID(smithyrand.Reader).GetUUID()
if err != nil {
return out, md, fmt.Errorf("new uuid: %v", err)
}
return next.HandleInitialize(setInvokeID(ctx, id), in)
}
type timeOperationMiddleware struct {
msgHandler func(string)
}
func (*timeOperationMiddleware) ID() string { return "timeOperation" }
func (m *timeOperationMiddleware) HandleInitialize(ctx context.Context, in middleware.InitializeInput, next middleware.InitializeHandler) (
middleware.InitializeOutput, middleware.Metadata, error,
) {
defer timeSpan(ctx, "operation", m.msgHandler)()
return next.HandleInitialize(ctx, in)
}
type emitMetadataMiddleware struct {
msgHandler func(string)
}
func (*emitMetadataMiddleware) ID() string { return "emitMetadata" }
func (m *emitMetadataMiddleware) HandleInitialize(ctx context.Context, in middleware.InitializeInput, next middleware.InitializeHandler) (
middleware.InitializeOutput, middleware.Metadata, error,
) {
out, md, err := next.HandleInitialize(ctx, in)
invokeID := getInvokeID(ctx)
requestID, _ := awsmiddleware.GetRequestIDMetadata(md)
service := awsmiddleware.GetServiceID(ctx)
operation := awsmiddleware.GetOperationName(ctx)
m.msgHandler(fmt.Sprintf(`[%s] requestID = "%s"`, invokeID, requestID))
m.msgHandler(fmt.Sprintf(`[%s] service = "%s"`, invokeID, service))
m.msgHandler(fmt.Sprintf(`[%s] operation = "%s"`, invokeID, operation))
return out, md, err
}
func addTimingMiddlewares(mh func(string)) func(*middleware.Stack) error {
return func(s *middleware.Stack) error {
if err := s.Initialize.Add(&timeOperationMiddleware{msgHandler: mh}, middleware.Before); err != nil {
return fmt.Errorf("add time operation middleware: %v", err)
}
if err := s.Initialize.Add(&addInvokeIDMiddleware{msgHandler: mh}, middleware.Before); err != nil {
return fmt.Errorf("add invoke id middleware: %v", err)
}
if err := s.Initialize.Insert(&emitMetadataMiddleware{msgHandler: mh}, "RegisterServiceMetadata", middleware.After); err != nil {
return fmt.Errorf("add emit metadata middleware: %v", err)
}
return nil
}
}
func main() {
cfg, err := config.LoadDefaultConfig(context.Background())
if err != nil {
log.Fatal(fmt.Errorf("load default config: %v", err))
}
svc := sqs.NewFromConfig(cfg, WithOperationTiming(PrintfMSGHandler))
var wg sync.WaitGroup
for i := 0; i < 6; i++ {
wg.Add(1)
go func() {
defer wg.Done()
_, err = svc.ListQueues(context.Background(), nil)
if err != nil {
fmt.Println(fmt.Errorf("list queues: %v", err))
}
}()
}
wg.Wait()
}
A sample output of this program:
[e9a801bb-c51d-45c8-8e9f-a202e263fde8] http: 192.24067ms
[e9a801bb-c51d-45c8-8e9f-a202e263fde8] requestID = "dbee3082-96a3-5b23-adca-6d005696fa94"
[e9a801bb-c51d-45c8-8e9f-a202e263fde8] service = "SQS"
[e9a801bb-c51d-45c8-8e9f-a202e263fde8] operation = "ListQueues"
[e9a801bb-c51d-45c8-8e9f-a202e263fde8] operation: 193.098393ms
[0740f0e0-953e-4328-94fc-830a5052e763] http: 195.185732ms
[0740f0e0-953e-4328-94fc-830a5052e763] requestID = "48b301fa-fc9f-5f1f-9007-5c783caa9322"
[0740f0e0-953e-4328-94fc-830a5052e763] service = "SQS"
[0740f0e0-953e-4328-94fc-830a5052e763] operation = "ListQueues"
[0740f0e0-953e-4328-94fc-830a5052e763] operation: 195.725491ms
[c0589832-f351-4cc7-84f1-c656eb79dbd7] http: 200.52383ms
[444030d0-6743-4de5-bd91-bc40b2b94c55] http: 200.525919ms
[c0589832-f351-4cc7-84f1-c656eb79dbd7] requestID = "4a73cc82-b47b-56e1-b327-9100744e1b1f"
[c0589832-f351-4cc7-84f1-c656eb79dbd7] service = "SQS"
[c0589832-f351-4cc7-84f1-c656eb79dbd7] operation = "ListQueues"
[c0589832-f351-4cc7-84f1-c656eb79dbd7] operation: 201.214365ms
[444030d0-6743-4de5-bd91-bc40b2b94c55] requestID = "ca1523ed-1879-5610-bf5d-7e6fd84cabee"
[444030d0-6743-4de5-bd91-bc40b2b94c55] service = "SQS"
[444030d0-6743-4de5-bd91-bc40b2b94c55] operation = "ListQueues"
[444030d0-6743-4de5-bd91-bc40b2b94c55] operation: 201.197071ms
[079e8dbd-bb93-43ab-89e5-a7bb392b86a5] http: 206.449568ms
[12b2b39d-df86-4648-a436-ff0482d13340] http: 206.526603ms
[079e8dbd-bb93-43ab-89e5-a7bb392b86a5] requestID = "64229710-b552-56ed-8f96-ca927567ec7b"
[079e8dbd-bb93-43ab-89e5-a7bb392b86a5] service = "SQS"
[079e8dbd-bb93-43ab-89e5-a7bb392b86a5] operation = "ListQueues"
[079e8dbd-bb93-43ab-89e5-a7bb392b86a5] operation: 207.252357ms
[12b2b39d-df86-4648-a436-ff0482d13340] requestID = "76d9cbc0-07aa-58aa-98b7-9642c79f9851"
[12b2b39d-df86-4648-a436-ff0482d13340] service = "SQS"
[12b2b39d-df86-4648-a436-ff0482d13340] operation = "ListQueues"
[12b2b39d-df86-4648-a436-ff0482d13340] operation: 207.360621ms