Skip to content

Commit

Permalink
feat: webhook analytics timeouts (#1717)
Browse files Browse the repository at this point in the history
## Description

Creates analytics around webhook timeouts in the form of Prom metrics.

Ex:
Pepr Module
```ts
async function sleep(ms: number) {
  return new Promise(resolve => setTimeout(resolve, ms));
}
When(a.Pod)
.IsCreatedOrUpdated()
.Mutate(async po => {
  await sleep(5000);
  po.SetLabel("hello", "world");
})
```

_run `npx pepr@v0.42.3 dev`_

Patch the webhook timeoutSeconds and failurePolicy

```bash
kubectl patch  mutatingwebhookconfiguration pepr-static-test --type='json' -p='[
  {
    "op": "replace",
    "path": "/webhooks/0/failurePolicy",
    "value": "Fail"
  },
  {
    "op": "replace",
    "path": "/webhooks/0/timeoutSeconds",
    "value": 3
  }
]'
```

Create a pod 

```bash
> k run a --image=nginx
Error from server (InternalError): Internal error occurred: failed calling webhook "pepr-static-test.pepr.dev": failed to call webhook: Post "https://host.k3d.internal:3000/mutate/72e209af76fec1700f45afd06d9f121d14e2727beef9d67f537bd3bffe5eda41?timeout=3s": context deadline exceeded
```

Curl the metrics

```bash
> curl -k https://localhost:3000/metrics
... yada yada yada...
# HELP pepr_mutate_timeouts Number of mutate webhook timeouts
# TYPE pepr_mutate_timeouts counter
pepr_mutate_timeouts 1

# HELP pepr_validate_timeouts Number of validate webhook timeouts
# TYPE pepr_validate_timeouts counter
pepr_validate_timeouts 0
```


## Related Issue

Fixes #358 
<!-- or -->
Relates to #

## Type of change

- [ ] Bug fix (non-breaking change which fixes an issue)
- [x] New feature (non-breaking change which adds functionality)
- [ ] Other (security config, docs update, etc)

## Checklist before merging
- [x] Unit,
[Journey](https://github.com/defenseunicorns/pepr/tree/main/journey),
[E2E Tests](https://github.com/defenseunicorns/pepr-excellent-examples),
[docs](https://github.com/defenseunicorns/pepr/tree/main/docs),
[adr](https://github.com/defenseunicorns/pepr/tree/main/adr) added or
updated as needed
- [x] [Contributor Guide
Steps](https://docs.pepr.dev/main/contribute/#submitting-a-pull-request)
followed

---------

Signed-off-by: Case Wylie <cmwylie19@defenseunicorns.com>
  • Loading branch information
cmwylie19 authored Jan 24, 2025
1 parent 77fab89 commit a31f1d6
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 10 deletions.
2 changes: 2 additions & 0 deletions src/cli/dev.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { buildModule, loadModule } from "./build";
import { deployWebhook } from "../lib/assets/deploy";
import { promises as fs } from "fs";
import { validateCapabilityNames } from "../lib/helpers";

export default function (program: RootCmd): void {
program
.command("dev")
Expand Down Expand Up @@ -42,6 +43,7 @@ export default function (program: RootCmd): void {
description: cfg.description,
},
path,
[],
opts.host,
);

Expand Down
21 changes: 20 additions & 1 deletion src/lib/processors/mutate-processor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ import { PeprMutateRequest } from "../mutate-request";
import * as sut from "./mutate-processor";
import { AdmissionRequest, Binding, MutateAction } from "../types";
import { Event, Operation } from "../enums";
import { convertFromBase64Map, convertToBase64Map } from "../utils";
import { convertFromBase64Map, convertToBase64Map, base64Encode } from "../utils";
import { GenericClass, KubernetesObject } from "kubernetes-fluent-client";
import { MutateResponse } from "../k8s";
import { OnError } from "../../cli/init/enums";
import { updateResponsePatchAndWarnings } from "./mutate-processor";
import { Operation as JSONPatchOperation } from "fast-json-patch";

jest.mock("../utils");
const mockConvertFromBase64Map = jest.mocked(convertFromBase64Map);
Expand Down Expand Up @@ -292,3 +294,20 @@ describe("processRequest", () => {
expect(val).toBe("Action failed: An error occurred with the mutate action.");
});
});

describe("updateResponsePatchAndWarnings", () => {
const mutateResponse: MutateResponse = { uid: "uid", allowed: true, patch: "" };
const patches: JSONPatchOperation[] = [{ op: "add", path: "/data/hello-pepr-v2-a", value: "value" }];
it("should add a patch to the response if patch length is greater than 0", () => {
updateResponsePatchAndWarnings(patches, mutateResponse);
expect(mutateResponse.patch).toBe(base64Encode(JSON.stringify(patches)));
expect(mutateResponse.patchType).toBe("JSONPatch");
});

it("should remove warnings from the response if warnings are empty", () => {
const localMutateResponse = { ...mutateResponse, warnings: [] };
expect(localMutateResponse.warnings.length).toBe(0);
updateResponsePatchAndWarnings(patches, localMutateResponse);
expect(localMutateResponse.warnings).not.toBeDefined();
});
});
19 changes: 13 additions & 6 deletions src/lib/processors/mutate-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import jsonPatch from "fast-json-patch";
import { kind, KubernetesObject } from "kubernetes-fluent-client";
import { clone } from "ramda";

import { MeasureWebhookTimeout } from "../telemetry/webhookTimeouts";
import { Capability } from "../core/capability";
import { shouldSkipRequest } from "../filter/filter";
import { MutateResponse } from "../k8s";
Expand All @@ -15,7 +15,8 @@ import { PeprMutateRequest } from "../mutate-request";
import { base64Encode, convertFromBase64Map, convertToBase64Map } from "../utils";
import { OnError } from "../../cli/init/enums";
import { resolveIgnoreNamespaces } from "../assets/webhooks";

import { Operation } from "fast-json-patch";
import { WebhookType } from "../enums";
export interface Bindable {
req: AdmissionRequest;
config: ModuleConfig;
Expand Down Expand Up @@ -139,6 +140,8 @@ export async function mutateProcessor(
req: AdmissionRequest,
reqMetadata: Record<string, string>,
): Promise<MutateResponse> {
const webhookTimer = new MeasureWebhookTimeout(WebhookType.MUTATE);
webhookTimer.start(config.webhookTimeout);
let response: MutateResponse = {
uid: req.uid,
warnings: [],
Expand Down Expand Up @@ -207,6 +210,14 @@ export async function mutateProcessor(
// Compare the original request to the modified request to get the patches
const patches = jsonPatch.compare(req.object, transformed);

updateResponsePatchAndWarnings(patches, response);

Log.debug({ ...reqMetadata, patches }, `Patches generated`);
webhookTimer.stop();
return response;
}

export function updateResponsePatchAndWarnings(patches: Operation[], response: MutateResponse): void {
// Only add the patch if there are patches to apply
if (patches.length > 0) {
response.patchType = "JSONPatch";
Expand All @@ -219,8 +230,4 @@ export async function mutateProcessor(
if (response.warnings && response.warnings.length < 1) {
delete response.warnings;
}

Log.debug({ ...reqMetadata, patches }, `Patches generated`);

return response;
}
9 changes: 6 additions & 3 deletions src/lib/processors/validate-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import { convertFromBase64Map } from "../utils";
import { PeprValidateRequest } from "../validate-request";
import { ModuleConfig } from "../core/module";
import { resolveIgnoreNamespaces } from "../assets/webhooks";
import { MeasureWebhookTimeout } from "../telemetry/webhookTimeouts";
import { WebhookType } from "../enums";

export async function processRequest(
binding: Binding,
Expand Down Expand Up @@ -58,12 +60,13 @@ export async function validateProcessor(
req: AdmissionRequest,
reqMetadata: Record<string, string>,
): Promise<ValidateResponse[]> {
const webhookTimer = new MeasureWebhookTimeout(WebhookType.VALIDATE);
webhookTimer.start(config.webhookTimeout);
const wrapped = new PeprValidateRequest(req);
const response: ValidateResponse[] = [];

// If the resource is a secret, decode the data
const isSecret = req.kind.version === "v1" && req.kind.kind === "Secret";
if (isSecret) {
if (req.kind.version === "v1" && req.kind.kind === "Secret") {
convertFromBase64Map(wrapped.Raw as unknown as kind.Secret);
}

Expand Down Expand Up @@ -94,6 +97,6 @@ export async function validateProcessor(
response.push(resp);
}
}

webhookTimer.stop();
return response;
}
13 changes: 13 additions & 0 deletions src/lib/telemetry/timeUtils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: 2023-Present The Pepr Authors

import { describe, expect, it } from "@jest/globals";
import { getNow } from "./timeUtils";
describe("getNow", () => {
it("should return the current time in milliseconds", () => {
const perfNow = getNow();
const performanceNow = performance.now();

expect(performanceNow).toBeGreaterThanOrEqual(perfNow);
});
});
1 change: 1 addition & 0 deletions src/lib/telemetry/timeUtils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export const getNow = (): number => performance.now();
63 changes: 63 additions & 0 deletions src/lib/telemetry/webhookTimeout.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: 2023-Present The Pepr Authors

import { describe, expect, it, jest, beforeEach } from "@jest/globals";
import { MeasureWebhookTimeout } from "./webhookTimeouts";
import { metricsCollector } from "./metrics";
import { getNow } from "./timeUtils";
import { WebhookType } from "../enums";

jest.mock("./metrics", () => ({
metricsCollector: {
addCounter: jest.fn(),
incCounter: jest.fn(),
},
}));

jest.mock("./timeUtils", () => ({
getNow: jest.fn(),
}));

describe("MeasureWebhookTimeout", () => {
beforeEach(() => {
jest.clearAllMocks();
});

it("should initialize a timeout counter for the webhook type", () => {
const webhookType = WebhookType.MUTATE;
new MeasureWebhookTimeout(webhookType);

expect(metricsCollector.addCounter).toHaveBeenCalledWith(
`${WebhookType.MUTATE}_timeouts`,
"Number of mutate webhook timeouts",
);
});

it("should throw an error if stop is called before start", () => {
const webhook = new MeasureWebhookTimeout(WebhookType.MUTATE);

expect(() => webhook.stop()).toThrow("Timer was not started before calling stop.");
});

it("should not increment the timeout counter if elapsed time is less than the timeout", () => {
(getNow as jest.Mock).mockReturnValueOnce(1000).mockReturnValueOnce(1500);

const webhook = new MeasureWebhookTimeout(WebhookType.MUTATE);
webhook.start(1000);

webhook.stop();

expect(metricsCollector.incCounter).not.toHaveBeenCalled();
});

it("should increment the timeout counter if elapsed time exceeds the timeout", () => {
(getNow as jest.Mock).mockReturnValueOnce(1000).mockReturnValueOnce(2000);

const webhook = new MeasureWebhookTimeout(WebhookType.MUTATE);
webhook.start(500);

webhook.stop();

expect(metricsCollector.incCounter).toHaveBeenCalledWith(`${WebhookType.MUTATE}_timeouts`);
});
});
34 changes: 34 additions & 0 deletions src/lib/telemetry/webhookTimeouts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { metricsCollector } from "./metrics";
import { getNow } from "./timeUtils";
import Log from "./logger";
import { WebhookType } from "../enums";
export class MeasureWebhookTimeout {
#startTime: number | null = null;
#webhookType: string;
timeout: number = 0;

constructor(webhookType: WebhookType) {
this.#webhookType = webhookType;
metricsCollector.addCounter(`${webhookType}_timeouts`, `Number of ${webhookType} webhook timeouts`);
}

start(timeout: number = 10): void {
this.#startTime = getNow();
this.timeout = timeout;
Log.info(`Starting timer at ${this.#startTime}`);
}

stop(): void {
if (this.#startTime === null) {
throw new Error("Timer was not started before calling stop.");
}

const elapsedTime = getNow() - this.#startTime;
Log.info(`Webhook ${this.#startTime} took ${elapsedTime}ms`);
this.#startTime = null;

if (elapsedTime > this.timeout) {
metricsCollector.incCounter(`${this.#webhookType}_timeouts`);
}
}
}

0 comments on commit a31f1d6

Please sign in to comment.