diff --git a/docs/guide/embedding.md b/docs/guide/embedding.md
index cf697e09..fa4f4167 100644
--- a/docs/guide/embedding.md
+++ b/docs/guide/embedding.md
@@ -172,7 +172,7 @@ const documents = [
     "Cleaning the house is a good way to keep it tidy"
 ];
 
-const query = "Tell me a nature geographical fact";
+const query = "Tell me a geographical fact";
 const rankedDocuments = await context.rankAndSort(query, documents);
 
 const topDocument = rankedDocuments[0]!;
@@ -185,7 +185,7 @@ console.log("Ranked documents:", rankedDocuments);
 ```
 > This example will produce this output:
 > ```
-> query: Tell me a nature geographical fact
+> query: Tell me a geographical fact
 > Top document: Mount Everest is the tallest mountain in the world
 > Second document: The capital of France is Paris
 > ```
diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts
index 243ad4ff..c4ceb32b 100644
--- a/src/bindings/Llama.ts
+++ b/src/bindings/Llama.ts
@@ -42,6 +42,7 @@ export class Llama {
     /** @internal */ public readonly _debug: boolean;
     /** @internal */ public readonly _threadsSplitter: ThreadsSplitter;
     /** @internal */ private readonly _gpu: LlamaGpuType;
+    /** @internal */ private readonly _numa: LlamaNuma;
     /** @internal */ private readonly _buildType: "localBuild" | "prebuilt";
     /** @internal */ private readonly _cmakeOptions: Readonly<Record<string, string>>;
     /** @internal */ private readonly _supportsGpuOffloading: boolean;
@@ -95,6 +96,7 @@ export class Llama {
 
         this._bindings = bindings;
         this._debug = debug;
+        this._numa = numa ?? false;
         this._logLevel = this._debug
             ? LlamaLogLevel.debug
             : (logLevel ?? LlamaLogLevel.debug);
@@ -111,7 +113,7 @@ export class Llama {
 
         bindings.ensureGpuDeviceIsSupported();
 
-        if (numa != null && numa !== false)
+        if (this._numa !== false)
             bindings.setNuma(numa);
 
         this._gpu = bindings.getGpuType() ?? false;
@@ -211,6 +213,13 @@ export class Llama {
         this._threadsSplitter.maxThreads = Math.floor(Math.max(0, value));
     }
 
+    /**
+     * See the `numa` option of `getLlama` for more information
+     */
+    public get numa() {
+        return this._numa;
+    }
+
     public get logLevel() {
         return this._logLevel;
     }
diff --git a/src/bindings/types.ts b/src/bindings/types.ts
index 7748772c..5c6af332 100644
--- a/src/bindings/types.ts
+++ b/src/bindings/types.ts
@@ -22,6 +22,7 @@ export type BuildOptions = {
         release: string
     }
 };
+export const llamaNumaOptions = ["distribute", "isolate", "numactl", "mirror", false] as const satisfies LlamaNuma[];
 export type LlamaNuma = false | "distribute" | "isolate" | "numactl" | "mirror";
 
 export type BuildOptionsJSON = Omit<BuildOptions, "customCmakeOptions"> & {
@@ -44,6 +45,20 @@ export function parseNodeLlamaCppGpuOption(option: (typeof nodeLlamaCppGpuOption
     return "auto";
 }
 
+export function parseNumaOption(option: (typeof llamaNumaOptions)[number] | (typeof nodeLlamaCppGpuOffStringOptions)[number]): LlamaNuma {
+    function optionIsGpuOff(opt: typeof option): opt is (typeof nodeLlamaCppGpuOffStringOptions)[number] {
+        return nodeLlamaCppGpuOffStringOptions.includes(opt as (typeof nodeLlamaCppGpuOffStringOptions)[number]);
+    }
+
+    if (optionIsGpuOff(option))
+        return false;
+
+    if (llamaNumaOptions.includes(option))
+        return option;
+
+    return false;
+}
+
 
 export function convertBuildOptionsJSONToBuildOptions(buildOptionsJSON: BuildOptionsJSON): BuildOptions {
     return {
diff --git a/src/cli/commands/ChatCommand.ts b/src/cli/commands/ChatCommand.ts
index 39f0bd59..486f3f82 100644
--- a/src/cli/commands/ChatCommand.ts
+++ b/src/cli/commands/ChatCommand.ts
@@ -13,7 +13,8 @@ import {getLlama} from "../../bindings/getLlama.js";
 import {LlamaGrammar} from "../../evaluator/LlamaGrammar.js";
 import {LlamaChatSession} from "../../evaluator/LlamaChatSession/LlamaChatSession.js";
 import {
-    BuildGpu, LlamaLogLevel, LlamaLogLevelGreaterThan, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption
+    BuildGpu, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaNuma, llamaNumaOptions, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption,
+    parseNumaOption
 } from "../../bindings/types.js";
 import withOra from "../../utils/withOra.js";
 import {TokenMeter} from "../../evaluator/TokenMeter.js";
@@ -67,6 +68,7 @@ type ChatCommand = {
     tokenPredictionDraftModel?: string,
     tokenPredictionModelContextSize?: number,
     debug: boolean,
+    numa?: LlamaNuma,
     meter: boolean,
     timing: boolean,
     noMmap: boolean,
@@ -298,6 +300,20 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
                 default: false,
                 description: "Print llama.cpp info and debug logs"
             })
+            .option("numa", {
+                type: "string",
+
+                // yargs types don't support passing `false` as a choice, although it is supported by yargs
+                choices: llamaNumaOptions as any as Exclude<typeof llamaNumaOptions[number], false>[],
+                coerce: (value) => {
+                    if (value == null || value == "")
+                        return false;
+
+                    return parseNumaOption(value);
+                },
+                defaultDescription: "false",
+                description: "NUMA allocation policy. See the `numa` option on the `getLlama` method for more information"
+            })
             .option("meter", {
                 type: "boolean",
                 default: false,
@@ -326,7 +342,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
         noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK,
         topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
         repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, reasoningBudget, noHistory,
-        environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, meter, timing, noMmap, printTimings
+        environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, printTimings
     }) {
         try {
             await RunChat({
@@ -335,7 +351,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
                 temperature, minP, topK, topP, seed,
                 gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
                 maxTokens, reasoningBudget, noHistory, environmentFunctions, tokenPredictionDraftModel, tokenPredictionModelContextSize,
-                debug, meter, timing, noMmap, printTimings
+                debug, numa, meter, timing, noMmap, printTimings
             });
         } catch (err) {
             await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
@@ -352,7 +368,7 @@ async function RunChat({
     jsonSchemaGrammarFile: jsonSchemaGrammarFilePath,
     threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine,
     repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, reasoningBudget, noHistory, environmentFunctions, tokenPredictionDraftModel,
-    tokenPredictionModelContextSize, debug, meter, timing, noMmap, printTimings
+    tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, printTimings
 }: ChatCommand) {
     if (contextSize === -1) contextSize = undefined;
     if (gpuLayers === -1) gpuLayers = undefined;
@@ -369,11 +385,13 @@ async function RunChat({
         : LlamaLogLevel.warn;
     const llama = gpu == null
         ? await getLlama("lastBuild", {
-            logLevel: llamaLogLevel
+            logLevel: llamaLogLevel,
+            numa
         })
         : await getLlama({
             gpu,
-            logLevel: llamaLogLevel
+            logLevel: llamaLogLevel,
+            numa
         });
     const logBatchSize = batchSize != null;
     const useMmap = !noMmap && llama.supportsMmap;
diff --git a/src/cli/commands/CompleteCommand.ts b/src/cli/commands/CompleteCommand.ts
index 1aae93fd..55cc01b1 100644
--- a/src/cli/commands/CompleteCommand.ts
+++ b/src/cli/commands/CompleteCommand.ts
@@ -7,7 +7,8 @@ import fs from "fs-extra";
 import prettyMilliseconds from "pretty-ms";
 import {getLlama} from "../../bindings/getLlama.js";
 import {
-    BuildGpu, LlamaLogLevel, LlamaLogLevelGreaterThan, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption
+    BuildGpu, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaNuma, llamaNumaOptions, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption,
+    parseNumaOption
 } from "../../bindings/types.js";
 import {LlamaCompletion} from "../../evaluator/LlamaCompletion.js";
 import withOra from "../../utils/withOra.js";
@@ -49,6 +50,7 @@ type CompleteCommand = {
     tokenPredictionDraftModel?: string,
     tokenPredictionModelContextSize?: number,
     debug: boolean,
+    numa?: LlamaNuma,
     meter: boolean,
     timing: boolean,
     noMmap: boolean,
@@ -218,6 +220,20 @@ export const CompleteCommand: CommandModule<object, CompleteCommand> = {
                 default: false,
                 description: "Print llama.cpp info and debug logs"
             })
+            .option("numa", {
+                type: "string",
+
+                // yargs types don't support passing `false` as a choice, although it is supported by yargs
+                choices: llamaNumaOptions as any as Exclude<typeof llamaNumaOptions[number], false>[],
+                coerce: (value) => {
+                    if (value == null || value == "")
+                        return false;
+
+                    return parseNumaOption(value);
+                },
+                defaultDescription: "false",
+                description: "NUMA allocation policy. See the `numa` option on the `getLlama` method for more information"
+            })
             .option("meter", {
                 type: "boolean",
                 default: false,
@@ -245,14 +261,14 @@ export const CompleteCommand: CommandModule<object, CompleteCommand> = {
         flashAttention, swaFullCache, threads, temperature, minP, topK,
         topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
         repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, tokenPredictionDraftModel, tokenPredictionModelContextSize,
-        debug, meter, timing, noMmap, printTimings
+        debug, numa, meter, timing, noMmap, printTimings
     }) {
         try {
             await RunCompletion({
                 modelPath, header, gpu, systemInfo, text, textFile, contextSize, batchSize, flashAttention, swaFullCache,
                 threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty,
                 repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
-                tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, meter, timing, noMmap, printTimings
+                tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, printTimings
             });
         } catch (err) {
             await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
@@ -267,7 +283,7 @@ async function RunCompletion({
     modelPath: modelArg, header: headerArg, gpu, systemInfo, text, textFile, contextSize, batchSize, flashAttention, swaFullCache,
     threads, temperature, minP, topK, topP, seed, gpuLayers,
     lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
-    tokenPredictionDraftModel, tokenPredictionModelContextSize, maxTokens, debug, meter, timing, noMmap, printTimings
+    tokenPredictionDraftModel, tokenPredictionModelContextSize, maxTokens, debug, numa, meter, timing, noMmap, printTimings
 }: CompleteCommand) {
     if (contextSize === -1) contextSize = undefined;
     if (gpuLayers === -1) gpuLayers = undefined;
@@ -282,11 +298,13 @@ async function RunCompletion({
         : LlamaLogLevel.warn;
     const llama = gpu == null
         ? await getLlama("lastBuild", {
-            logLevel: llamaLogLevel
+            logLevel: llamaLogLevel,
+            numa
         })
         : await getLlama({
             gpu,
-            logLevel: llamaLogLevel
+            logLevel: llamaLogLevel,
+            numa
         });
     const logBatchSize = batchSize != null;
     const useMmap = !noMmap && llama.supportsMmap;
diff --git a/src/cli/commands/InfillCommand.ts b/src/cli/commands/InfillCommand.ts
index a47df068..b07f1e59 100644
--- a/src/cli/commands/InfillCommand.ts
+++ b/src/cli/commands/InfillCommand.ts
@@ -7,7 +7,8 @@ import fs from "fs-extra";
 import prettyMilliseconds from "pretty-ms";
 import {getLlama} from "../../bindings/getLlama.js";
 import {
-    BuildGpu, LlamaLogLevel, LlamaLogLevelGreaterThan, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption
+    BuildGpu, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaNuma, llamaNumaOptions, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption,
+    parseNumaOption
 } from "../../bindings/types.js";
 import {LlamaCompletion} from "../../evaluator/LlamaCompletion.js";
 import withOra from "../../utils/withOra.js";
@@ -51,6 +52,7 @@ type InfillCommand = {
     tokenPredictionDraftModel?: string,
     tokenPredictionModelContextSize?: number,
     debug: boolean,
+    numa?: LlamaNuma,
     meter: boolean,
     timing: boolean,
     noMmap: boolean,
@@ -228,6 +230,20 @@ export const InfillCommand: CommandModule<object, InfillCommand> = {
                 default: false,
                 description: "Print llama.cpp info and debug logs"
             })
+            .option("numa", {
+                type: "string",
+
+                // yargs types don't support passing `false` as a choice, although it is supported by yargs
+                choices: llamaNumaOptions as any as Exclude<typeof llamaNumaOptions[number], false>[],
+                coerce: (value) => {
+                    if (value == null || value == "")
+                        return false;
+
+                    return parseNumaOption(value);
+                },
+                defaultDescription: "false",
+                description: "NUMA allocation policy. See the `numa` option on the `getLlama` method for more information"
+            })
             .option("meter", {
                 type: "boolean",
                 default: false,
@@ -255,14 +271,14 @@ export const InfillCommand: CommandModule<object, InfillCommand> = {
         flashAttention, swaFullCache, threads, temperature, minP, topK,
         topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
         repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, tokenPredictionDraftModel, tokenPredictionModelContextSize,
-        debug, meter, timing, noMmap, printTimings
+        debug, numa, meter, timing, noMmap, printTimings
     }) {
         try {
             await RunInfill({
                 modelPath, header, gpu, systemInfo, prefix, prefixFile, suffix, suffixFile, contextSize, batchSize, flashAttention,
                 swaFullCache, threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty,
                 repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
-                tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, meter, timing, noMmap, printTimings
+                tokenPredictionDraftModel, tokenPredictionModelContextSize, debug, numa, meter, timing, noMmap, printTimings
             });
         } catch (err) {
             await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
@@ -277,7 +293,7 @@ async function RunInfill({
     modelPath: modelArg, header: headerArg, gpu, systemInfo, prefix, prefixFile, suffix, suffixFile, contextSize, batchSize, flashAttention,
     swaFullCache, threads, temperature, minP, topK, topP, seed, gpuLayers,
     lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
-    tokenPredictionDraftModel, tokenPredictionModelContextSize, maxTokens, debug, meter, timing, noMmap, printTimings
+    tokenPredictionDraftModel, tokenPredictionModelContextSize, maxTokens, debug, numa, meter, timing, noMmap, printTimings
 }: InfillCommand) {
     if (contextSize === -1) contextSize = undefined;
     if (gpuLayers === -1) gpuLayers = undefined;
@@ -292,11 +308,13 @@ async function RunInfill({
         : LlamaLogLevel.warn;
     const llama = gpu == null
         ? await getLlama("lastBuild", {
-            logLevel: llamaLogLevel
+            logLevel: llamaLogLevel,
+            numa
         })
         : await getLlama({
             gpu,
-            logLevel: llamaLogLevel
+            logLevel: llamaLogLevel,
+            numa
         });
     const logBatchSize = batchSize != null;
     const useMmap = !noMmap && llama.supportsMmap;
diff --git a/test/modelDependent/bgeReranker/rank.test.ts b/test/modelDependent/bgeReranker/rank.test.ts
index b82db45e..37a4bd4e 100644
--- a/test/modelDependent/bgeReranker/rank.test.ts
+++ b/test/modelDependent/bgeReranker/rank.test.ts
@@ -4,7 +4,10 @@ import {getTestLlama} from "../../utils/getTestLlama.js";
 
 describe("bgeReranker", () => {
     describe("rank", () => {
-        test("simple ranking", {timeout: 1000 * 60 * 60 * 2}, async () => {
+        test("simple ranking", {timeout: 1000 * 60 * 60 * 2}, async (test) => {
+            if (process.platform !== "darwin")
+                test.skip(); // the scores are a bit different on different platforms, so skipping on other platforms due to flakiness
+
             const modelPath = await getModelFile("bge-reranker-v2-m3-Q8_0.gguf");
             const llama = await getTestLlama();
 
@@ -28,7 +31,7 @@ describe("bgeReranker", () => {
                 "Cleaning the house is a good way to keep it tidy"
             ];
 
-            const query = "Tell me a nature geographical fact";
+            const query = "Tell me a geographical fact";
 
             const ranks = await Promise.all(
                 documents.map((doc) => rankingContext.rank(query, doc))
@@ -40,24 +43,27 @@ describe("bgeReranker", () => {
             const highestRankDocument = documents[highestRankIndex];
             expect(highestRankDocument).to.eql("Mount Everest is the tallest mountain in the world");
 
-            expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.0024726231566347743");
+            expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.026596993576865856");
             expect(simplifyRanks(ranks)).toMatchInlineSnapshot(`
               [
                 0.00002039908727992137,
                 0.00006772414961977023,
                 0.00003716893710288947,
+                0.004496273160941178,
                 0.00003716893710288947,
+                0.026596993576865856,
                 0.00003716893710288947,
-                0.0024726231566347743,
-                0.00003716893710288947,
-                0.00003716893710288947,
+                0.00002039908727992137,
                 0.00002039908727992137,
                 0.00003716893710288947,
               ]
             `);
         });
 
-        test("rank all", {timeout: 1000 * 60 * 60 * 2}, async () => {
+        test("rank all", {timeout: 1000 * 60 * 60 * 2}, async (test) => {
+            if (process.platform !== "darwin")
+                test.skip(); // the scores are a bit different on different platforms, so skipping on other platforms due to flakiness
+
             const modelPath = await getModelFile("bge-reranker-v2-m3-Q8_0.gguf");
             const llama = await getTestLlama();
 
@@ -81,7 +87,7 @@ describe("bgeReranker", () => {
                 "Cleaning the house is a good way to keep it tidy"
             ];
 
-            const query = "Tell me a nature geographical fact";
+            const query = "Tell me a geographical fact";
 
             const ranks = await rankingContext.rankAll(query, documents);
 
@@ -91,24 +97,27 @@ describe("bgeReranker", () => {
             const highestRankDocument = documents[highestRankIndex];
             expect(highestRankDocument).to.eql("Mount Everest is the tallest mountain in the world");
 
-            expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.0024726231566347743");
+            expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.026596993576865856");
             expect(simplifyRanks(ranks)).toMatchInlineSnapshot(`
               [
                 0.00002039908727992137,
                 0.00006772414961977023,
                 0.00003716893710288947,
+                0.004496273160941178,
                 0.00003716893710288947,
+                0.026596993576865856,
                 0.00003716893710288947,
-                0.0024726231566347743,
-                0.00003716893710288947,
-                0.00003716893710288947,
+                0.00002039908727992137,
                 0.00002039908727992137,
                 0.00003716893710288947,
               ]
             `);
         });
 
-        test("rank and sort", {timeout: 1000 * 60 * 60 * 2}, async () => {
+        test("rank and sort", {timeout: 1000 * 60 * 60 * 2}, async (test) => {
+            if (process.platform !== "darwin")
+                test.skip(); // the scores are a bit different on different platforms, so skipping on other platforms due to flakiness
+
             const modelPath = await getModelFile("bge-reranker-v2-m3-Q8_0.gguf");
             const llama = await getTestLlama();
 
@@ -130,7 +139,7 @@ describe("bgeReranker", () => {
                 "Cleaning the house is a good way to keep it tidy"
             ];
 
-            const query = "Tell me a nature geographical fact";
+            const query = "Tell me a geographical fact";
 
             const rankedDocuments = await rankingContext.rankAndSort(query, documents);
 
@@ -141,21 +150,25 @@ describe("bgeReranker", () => {
             expect(simplifySortedRanks([topDocument])[0]).toMatchInlineSnapshot(`
               {
                 "document": "Mount Everest is the tallest mountain in the world",
-                "score": 0.0024726231566347743,
+                "score": 0.026596993576865856,
               }
             `);
             expect(simplifySortedRanks(rankedDocuments)).toMatchInlineSnapshot(`
               [
                 {
                   "document": "Mount Everest is the tallest mountain in the world",
-                  "score": 0.0024726231566347743,
+                  "score": 0.026596993576865856,
+                },
+                {
+                  "document": "The capital of France is Paris",
+                  "score": 0.004496273160941178,
                 },
                 {
                   "document": "I love eating pizza with extra cheese",
                   "score": 0.00006772414961977023,
                 },
                 {
-                  "document": "The capital of France is Paris",
+                  "document": "A warm cup of tea is perfect for a cold winter day",
                   "score": 0.00003716893710288947,
                 },
                 {
@@ -166,10 +179,6 @@ describe("bgeReranker", () => {
                   "document": "Cleaning the house is a good way to keep it tidy",
                   "score": 0.00003716893710288947,
                 },
-                {
-                  "document": "A warm cup of tea is perfect for a cold winter day",
-                  "score": 0.00003716893710288947,
-                },
                 {
                   "document": "Not all the things that shine are made of gold",
                   "score": 0.00002039908727992137,
@@ -181,6 +190,51 @@ describe("bgeReranker", () => {
               ]
             `);
         });
+
+        test("rank and sort without scores", {timeout: 1000 * 60 * 60 * 2}, async () => {
+            const modelPath = await getModelFile("bge-reranker-v2-m3-Q8_0.gguf");
+            const llama = await getTestLlama();
+
+            const model = await llama.loadModel({
+                modelPath
+            });
+            const rankingContext = await model.createRankingContext({
+                contextSize: 512
+            });
+
+            const documents = [
+                "The sky is clear and blue today",
+                "I love eating pizza with extra cheese",
+                "Dogs love to play fetch with their owners",
+                "The capital of France is Paris",
+                "Mount Everest is the tallest mountain in the world",
+                "A warm cup of tea is perfect for a cold winter day",
+                "Not all the things that shine are made of gold",
+                "Cleaning the house is a good way to keep it tidy"
+            ];
+
+            const query = "Tell me a geographical fact";
+
+            const rankedDocuments = await rankingContext.rankAndSort(query, documents);
+
+            const topDocument = rankedDocuments[0]!;
+
+            expect(topDocument.document).to.eql("Mount Everest is the tallest mountain in the world");
+
+            expect(onlyDocuments([topDocument])[0]).toMatchInlineSnapshot('"Mount Everest is the tallest mountain in the world"');
+            expect(onlyDocuments(rankedDocuments)).toMatchInlineSnapshot(`
+              [
+                "Mount Everest is the tallest mountain in the world",
+                "The capital of France is Paris",
+                "I love eating pizza with extra cheese",
+                "A warm cup of tea is perfect for a cold winter day",
+                "Dogs love to play fetch with their owners",
+                "Cleaning the house is a good way to keep it tidy",
+                "Not all the things that shine are made of gold",
+                "The sky is clear and blue today",
+              ]
+            `);
+        });
     });
 });
 
@@ -195,6 +249,10 @@ function simplifySortedRanks<const T extends {document: string, score: number}[]
     })) as T;
 }
 
+function onlyDocuments(values: {document: string, score: number}[]): string[] {
+    return values.map((item) => item.document);
+}
+
 function simplifyScore(score: number) {
     return toSigmoid(parseFloat(roundToPrecision(toLogit(score), 0.6).toFixed(1)));
 }