Skip to content

Commit

Permalink
Allow providing a batchSize to hint memory cache limit
Browse files Browse the repository at this point in the history
  • Loading branch information
zcbenz committed Sep 16, 2024
1 parent 779625a commit 6ad0dff
Showing 1 changed file with 12 additions and 3 deletions.
15 changes: 12 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {readFileSync} from 'node:fs'
import {statSync, readFileSync} from 'node:fs'
import {TokenizerLoader} from '@lenml/tokenizers';
import {core as mx, nn} from '@frost-beta/mlx';

Expand Down Expand Up @@ -35,7 +35,7 @@ export class Clip {
#imageProcessor?: ClipImageProcessor;
#model?: ClipModel;

constructor(public modelDir: string) {}
constructor(public modelDir: string, public batchSize?: number) {}

get tokenizer() {
if (!this.#tokenizer)
Expand All @@ -50,8 +50,17 @@ export class Clip {
}

get model() {
if (!this.#model)
if (!this.#model) {
if (this.batchSize) {
// When batchSize is hinted, we will set a cache limit. This is needed
// because the model can burst to use many RAM and MLX's cache memory
// will leave app's RAM usage at the peak. We should eventually fix the
// model but for now setting cache limit is enough.
const {size} = statSync(`${this.modelDir}/model.safetensors`);
mx.metal.setCacheLimit(size * (1 + this.batchSize));
}
this.#model = loadModel(this.modelDir);
}
return this.#model;
}

Expand Down

0 comments on commit 6ad0dff

Please sign in to comment.