iframe-proxy | Sunbelt Computer Software

613 lines (569 loc) · 26.6 KB
 * @fileoverview NumKong - Portable mixed-precision BLAS-like vector math library
 * NumKong provides SIMD-accelerated distance metrics and vector operations for
 * x86, ARM, RISC-V, and WASM platforms. The library automatically detects and uses
 * the best available SIMD instruction set at runtime.
 * @module numkong
 * @author Ash Vardanian
 * @example
 * ```typescript
 * import { dot, euclidean, Float16Array } from 'numkong';
 * // Auto-detected types
 * const a = new Float32Array([1, 2, 3]);
 * const b = new Float32Array([4, 5, 6]);
 * dot(a, b);        // 32
 * euclidean(a, b);  // 5.196...
 * // Custom types with explicit dtype
 * const c = new Float16Array([1, 2, 3]);
 * const d = new Float16Array([4, 5, 6]);
 * dot(c, d, DType.F16); // 32
import build from "node-gyp-build";
import { createRequire } from "node:module";
import * as path from "node:path";
import { existsSync } from "node:fs";
import { getFileName, getRoot } from "bindings";
import { setConversionFunctions, Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, DType, dtypeToString, outputDtype, KernelFamily } from "./types.js";
function loadNativeAddon(): any {
  // Duplicate-libomp guard. We ship our own `libomp.dylib` next to
  // `numkong.node` in each `@numkong/darwin-*` package, but another OpenMP
  // runtime (e.g. one loaded by another native addon) may already be
  // resident. `KMP_DUPLICATE_LIB_OK=TRUE` tells LLVM libomp / Intel
  // libiomp5 to coexist; it must be in `process.env` before the `require()`
  // below triggers the addon's `dlopen`, since libomp's constructor reads
  // the env during dependency resolution and is too late to influence
  // afterwards. Left unguarded because the variable is harmless on
  // platforms / runtimes (GCC libgomp) that don't recognize it, and a user
  // who set it to something else is respected by `??=`. See
  // `python/numkong/__init__.py` for the Python analog.
  process.env.KMP_DUPLICATE_LIB_OK ??= "TRUE";
  // Tier 1: platform-specific optional dependency (@numkong/<os>-<arch>)
    const req = createRequire(path.join(getDirName(), "noop.js"));
    return req(`@numkong/${process.platform}-${process.arch}`);
  } catch { }
  // Tier 2: node-gyp-build fallback (local dev, unsupported platform, build-from-source)
    return build(getBuildDir(getDirName()));
  } catch { }
  return null;
let compiled: any = loadNativeAddon();
if (compiled) {
  setConversionFunctions({
    castF16ToF32: compiled.castF16ToF32,
    castF32ToF16: compiled.castF32ToF16,
    castBF16ToF32: compiled.castBF16ToF32,
    castF32ToBF16: compiled.castF32ToBF16,
    castE4M3ToF32: compiled.castE4M3ToF32,
    castF32ToE4M3: compiled.castF32ToE4M3,
    castE5M2ToF32: compiled.castE5M2ToF32,
    castF32ToE5M2: compiled.castF32ToE5M2,
    cast: compiled.cast,
  throw new Error(
    "NumKong native addon not found. Install with `npm install numkong` (which fetches " +
    "the prebuilt binary), or build from source with `npm run install`. " +
    "For WASM, import from 'numkong/wasm' instead."
 * CPU capability bit masks in chronological order (by first commercial silicon).
 * Use these with getCapabilities() to check for specific SIMD support.
export const Capability = {
  SERIAL: 1n << 0n,          // Always: Fallback
  NEON: 1n << 1n,            // 2013: ARM NEON
  HASWELL: 1n << 2n,         // 2013: Intel AVX2
  SKYLAKE: 1n << 3n,         // 2017: Intel AVX-512
  NEONHALF: 1n << 4n,        // 2017: ARM NEON FP16
  NEONSDOT: 1n << 5n,        // 2017: ARM NEON i8 dot
  NEONFHM: 1n << 6n,         // 2018: ARM NEON FP16 FML
  ICELAKE: 1n << 7n,         // 2019: Intel AVX-512 VNNI
  GENOA: 1n << 8n,           // 2020: Intel/AMD AVX-512 BF16
  NEONBFDOT: 1n << 9n,       // 2020: ARM NEON BF16
  SVE: 1n << 10n,            // 2020: ARM SVE
  SVEHALF: 1n << 11n,        // 2020: ARM SVE FP16
  SVESDOT: 1n << 12n,        // 2020: ARM SVE i8 dot
  ALDER: 1n << 13n,          // 2021: Intel AVX2+VNNI
  SVEBFDOT: 1n << 14n,       // 2021: ARM SVE BF16
  SVE2: 1n << 15n,           // 2022: ARM SVE2
  V128RELAXED: 1n << 16n,    // 2022: WASM Relaxed SIMD
  SAPPHIRE: 1n << 17n,       // 2023: Intel AVX-512 FP16
  SAPPHIREAMX: 1n << 18n,    // 2023: Intel Sapphire AMX
  RVV: 1n << 19n,            // 2023: RISC-V Vector
  RVVHALF: 1n << 20n,        // 2023: RISC-V Zvfh
  RVVBF16: 1n << 21n,        // 2023: RISC-V Zvfbfwma
  GRANITEAMX: 1n << 22n,     // 2024: Intel Granite AMX FP16
  TURIN: 1n << 23n,          // 2024: AMD Turin AVX-512 CD
  SME: 1n << 24n,            // 2024: ARM SME
  SME2: 1n << 25n,           // 2024: ARM SME2
  SMEF64: 1n << 26n,         // 2024: ARM SME F64
  SMEFA64: 1n << 27n,        // 2024: ARM SME FA64
  SVE2P1: 1n << 28n,         // 2025+: ARM SVE2.1
  SME2P1: 1n << 29n,         // 2025+: ARM SME2.1
  SMEHALF: 1n << 30n,        // 2025+: ARM SME F16F16
  SMEBF16: 1n << 31n,        // 2025+: ARM SME B16B16
  SMELUT2: 1n << 32n,        // 2025+: ARM SME LUTv2
  RVVBB: 1n << 33n,          // 2025+: RISC-V Zvbb
  SIERRA: 1n << 34n,         // 2024: Intel AVXVNNIINT8
  SMEBI32: 1n << 35n,        // 2025+: ARM SME BI32I32
  LOONGSONASX: 1n << 36n,    // LoongArch LASX 256-bit SIMD
  POWERVSX: 1n << 37n,       // Power VSX 128-bit SIMD
  DIAMOND: 1n << 38n,        // 2025+: Intel AVX10.2
  NEONFP8: 1n << 39n,        // ARM NEON FP8
} as const;
export { Float16Array, BFloat16Array, E4M3Array, E5M2Array, BinaryArray, TensorBase, VectorBase, VectorView, Vector, MatrixBase, Matrix, PackedMatrix, outputDtype };
/** Convert a single FP16 value (as uint16 bits) to FP32 */
export const castF16ToF32 = compiled.castF16ToF32;
/** Convert a single FP32 value to FP16 (returns uint16 bits) */
export const castF32ToF16 = compiled.castF32ToF16;
/** Convert a single BF16 value (as uint16 bits) to FP32 */
export const castBF16ToF32 = compiled.castBF16ToF32;
/** Convert a single FP32 value to BF16 (returns uint16 bits) */
export const castF32ToBF16 = compiled.castF32ToBF16;
/** Convert a single E4M3 value (as uint8 bits) to FP32 */
export const castE4M3ToF32 = compiled.castE4M3ToF32;
/** Convert a single FP32 value to E4M3 (returns uint8 bits) */
export const castF32ToE4M3 = compiled.castF32ToE4M3;
/** Convert a single E5M2 value (as uint8 bits) to FP32 */
export const castE5M2ToF32 = compiled.castE5M2ToF32;
/** Convert a single FP32 value to E5M2 (returns uint8 bits) */
export const castF32ToE5M2 = compiled.castF32ToE5M2;
/** Bulk conversion between different numeric types (modifies destination array in-place) */
export const cast = compiled.cast;
export { DType };
 * Numeric arrays supported by distance metrics with auto-detected dtype.
 * These standard TypedArrays are auto-detected by the N-API binding.
export type NumericArray = Float64Array | Float32Array | Int8Array | Uint8Array;
 * Extended array types supported by distance metrics with explicit dtype parameter.
 * Includes Uint16Array (backing type for Float16Array and BFloat16Array) in addition
 * to the auto-detected types. Pass a dtype string as the third argument to distance
 * functions when using custom types.
export type DistanceArray = Float64Array | Float32Array | Int8Array | Uint8Array | Uint16Array;
 * Union type for all array types (including custom types for conversions)
export type NumKongArray =
  | Float64Array
  | Float32Array
  | Float16Array
  | BFloat16Array
  | E4M3Array
  | E5M2Array
  | Int8Array
  | Uint8Array
  | BinaryArray;
 * Extract a TypedArray from a TensorBase for the N-API backend.
 * The native backend doesn't benefit from zero-copy TensorBase (Node.js TypedArrays
 * already share process memory), but accepting TensorBase keeps the API uniform.
function unwrapTensor(input: TensorBase): { arr: DistanceArray; dtype: DType } {
  switch (input.dtype) {
    case DType.F64: return { arr: new Float64Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
    case DType.F32: return { arr: new Float32Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
    case DType.F16: case DType.BF16: return { arr: new Uint16Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
    case DType.I8: return { arr: new Int8Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
    case DType.U8: case DType.U1: return { arr: new Uint8Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
    default: return { arr: new Uint8Array(input.buffer, input.byteOffset, input.length), dtype: input.dtype };
 * Returns the runtime-detected SIMD capabilities as a bitmask.
 * The bitmask includes flags for various SIMD instruction sets like AVX2, AVX-512,
 * ARM NEON, ARM SVE, ARM SME, RISC-V Vector, and WASM SIMD extensions.
 * Use with Capability constants to check for specific instruction sets.
 * @returns {bigint} Bitmask of capability flags (use with Capability constants)
 * @example
 * import { getCapabilities, Capability } from 'numkong';
 * const caps = getCapabilities();
 * console.log(`Capabilities: 0x${caps.toString(16)}`);
 * // Check for specific SIMD support
 * if (caps & Capability.HASWELL) {
 *   console.log('AVX2 available');
export const getCapabilities = (): bigint => {
  return compiled.getCapabilities();
 * Checks if a specific SIMD capability is available at runtime.
 * This is a convenience wrapper around getCapabilities() that tests for a single capability.
 * @param {bigint} cap - Capability flag to check (from Capability constants)
 * @returns {boolean} True if the capability is available, false otherwise
 * @example
 * import { hasCapability, Capability } from 'numkong';
 * if (hasCapability(Capability.HASWELL)) {
 *   console.log('Intel AVX2 (Haswell) available');
 * if (hasCapability(Capability.NEON)) {
 *   console.log('ARM NEON available');
 * if (hasCapability(Capability.V128RELAXED)) {
 *   console.log('WASM Relaxed SIMD available');
export const hasCapability = (cap: bigint): boolean => {
  return (getCapabilities() & cap) !== 0n;
 * Computes the squared Euclidean distance between two vectors.
 * @param a - The first vector.
 * @param b - The second vector (must match the type of a).
 * @param dtype - Optional dtype string for custom types (e.g. 'f16', 'bf16', 'e4m3').
 * @returns {number} The squared Euclidean distance between vectors a and b.
export function sqeuclidean(a: NumericArray, b: NumericArray): number;
export function sqeuclidean(a: DistanceArray, b: DistanceArray, dtype: DType): number;
export function sqeuclidean(a: TensorBase, b: TensorBase): number;
export function sqeuclidean(a: DistanceArray | TensorBase, b: DistanceArray | TensorBase, dtype?: DType): number {
  if (a instanceof TensorBase) { const u = unwrapTensor(a), v = unwrapTensor(b as TensorBase); return compiled.sqeuclidean(u.arr, v.arr, dtypeToString(u.dtype)); }
  return dtype !== undefined ? compiled.sqeuclidean(a, b, dtypeToString(dtype)) : compiled.sqeuclidean(a, b);
 * Computes the Euclidean distance between two vectors.
 * @param a - The first vector.
 * @param b - The second vector (must match the type of a).
 * @param dtype - Optional dtype string for custom types (e.g. 'f16', 'bf16', 'e4m3').
 * @returns {number} The Euclidean distance between vectors a and b.
export function euclidean(a: NumericArray, b: NumericArray): number;
export function euclidean(a: DistanceArray, b: DistanceArray, dtype: DType): number;
export function euclidean(a: TensorBase, b: TensorBase): number;
export function euclidean(a: DistanceArray | TensorBase, b: DistanceArray | TensorBase, dtype?: DType): number {
  if (a instanceof TensorBase) { const u = unwrapTensor(a), v = unwrapTensor(b as TensorBase); return compiled.euclidean(u.arr, v.arr, dtypeToString(u.dtype)); }
  return dtype !== undefined ? compiled.euclidean(a, b, dtypeToString(dtype)) : compiled.euclidean(a, b);
 * Computes the angular distance between two vectors.
 * @param a - The first vector.
 * @param b - The second vector (must match the type of a).
 * @param dtype - Optional dtype string for custom types (e.g. 'f16', 'bf16', 'e4m3').
 * @returns {number} The angular distance between vectors a and b.
export function angular(a: NumericArray, b: NumericArray): number;
export function angular(a: DistanceArray, b: DistanceArray, dtype: DType): number;
export function angular(a: TensorBase, b: TensorBase): number;
export function angular(a: DistanceArray | TensorBase, b: DistanceArray | TensorBase, dtype?: DType): number {
  if (a instanceof TensorBase) { const u = unwrapTensor(a), v = unwrapTensor(b as TensorBase); return compiled.angular(u.arr, v.arr, dtypeToString(u.dtype)); }
  return dtype !== undefined ? compiled.angular(a, b, dtypeToString(dtype)) : compiled.angular(a, b);
 * Computes the inner product of two vectors (same as dot product).
 * @param a - The first vector.
 * @param b - The second vector (must match the type of a).
 * @param dtype - Optional dtype string for custom types (e.g. 'f16', 'bf16', 'e4m3').
 * @returns {number} The inner product of vectors a and b.
export function inner(a: NumericArray, b: NumericArray): number;
export function inner(a: DistanceArray, b: DistanceArray, dtype: DType): number;
export function inner(a: TensorBase, b: TensorBase): number;
export function inner(a: DistanceArray | TensorBase, b: DistanceArray | TensorBase, dtype?: DType): number {
  if (a instanceof TensorBase) { const u = unwrapTensor(a), v = unwrapTensor(b as TensorBase); return compiled.inner(u.arr, v.arr, dtypeToString(u.dtype)); }
  return dtype !== undefined ? compiled.inner(a, b, dtypeToString(dtype)) : compiled.inner(a, b);
 * Computes the dot product of two vectors (same as inner product).
 * @param a - The first vector.
 * @param b - The second vector (must match the type of a).
 * @param dtype - Optional dtype string for custom types (e.g. 'f16', 'bf16', 'e4m3').
 * @returns {number} The dot product of vectors a and b.
export function dot(a: NumericArray, b: NumericArray): number;
export function dot(a: DistanceArray, b: DistanceArray, dtype: DType): number;
export function dot(a: TensorBase, b: TensorBase): number;
export function dot(a: DistanceArray | TensorBase, b: DistanceArray | TensorBase, dtype?: DType): number {
  if (a instanceof TensorBase) { const u = unwrapTensor(a), v = unwrapTensor(b as TensorBase); return compiled.dot(u.arr, v.arr, dtypeToString(u.dtype)); }
  return dtype !== undefined ? compiled.dot(a, b, dtypeToString(dtype)) : compiled.dot(a, b);
 * Computes the bitwise Hamming distance between two vectors.
 * Both vectors are treated as bit-packed (u1 dtype), where each byte contains 8 bits.
 * Use toBinary() to convert numeric arrays to bit-packed format.
 * @param {Uint8Array | BinaryArray} a - The first bit-packed vector.
 * @param {Uint8Array | BinaryArray} b - The second bit-packed vector.
 * @returns {number} The Hamming distance (number of differing bits) between vectors a and b.
export const hamming = (a: Uint8Array | BinaryArray | TensorBase, b: Uint8Array | BinaryArray | TensorBase): number => {
  if (a instanceof TensorBase) { const u = unwrapTensor(a), v = unwrapTensor(b as TensorBase); return compiled.hamming(u.arr, v.arr); }
  return compiled.hamming(a, b);
 * Computes the bitwise Jaccard distance between two vectors.
 * Both vectors are treated as bit-packed (u1 dtype), where each byte contains 8 bits.
 * Use toBinary() to convert numeric arrays to bit-packed format.
 * @param {Uint8Array | BinaryArray} a - The first bit-packed vector.
 * @param {Uint8Array | BinaryArray} b - The second bit-packed vector.
 * @returns {number} The Jaccard distance (1 - Jaccard similarity) between vectors a and b.
export const jaccard = (a: Uint8Array | BinaryArray | TensorBase, b: Uint8Array | BinaryArray | TensorBase): number => {
  if (a instanceof TensorBase) { const u = unwrapTensor(a), v = unwrapTensor(b as TensorBase); return compiled.jaccard(u.arr, v.arr); }
  return compiled.jaccard(a, b);
 * Computes the Kullback-Leibler divergence between two probability distributions.
 * Both vectors must represent valid probability distributions (non-negative, sum to 1).
 * Supports f64, f32 (auto-detected) and f16, bf16 (with explicit dtype).
 * @param a - The first probability distribution.
 * @param b - The second probability distribution (must match the type of a).
 * @param dtype - Optional dtype string for custom types (e.g. 'f16', 'bf16').
 * @returns {number} The Kullback-Leibler divergence KL(a || b) = Σ a[i] * log(a[i] / b[i]).
export function kullbackleibler(a: Float64Array | Float32Array, b: Float64Array | Float32Array): number;
export function kullbackleibler(a: Float64Array | Float32Array | Uint16Array, b: Float64Array | Float32Array | Uint16Array, dtype: DType): number;
export function kullbackleibler(a: TensorBase, b: TensorBase): number;
export function kullbackleibler(a: Float64Array | Float32Array | Uint16Array | TensorBase, b: Float64Array | Float32Array | Uint16Array | TensorBase, dtype?: DType): number {
  if (a instanceof TensorBase) { const u = unwrapTensor(a), v = unwrapTensor(b as TensorBase); return compiled.kullbackleibler(u.arr, v.arr, dtypeToString(u.dtype)); }
  return dtype !== undefined ? compiled.kullbackleibler(a, b, dtypeToString(dtype)) : compiled.kullbackleibler(a, b);
 * Computes the Jensen-Shannon distance between two probability distributions.
 * Both vectors must represent valid probability distributions (non-negative, sum to 1).
 * Supports f64, f32 (auto-detected) and f16, bf16 (with explicit dtype).
 * JSD is the square root of the symmetrized KL divergence, forming a true metric.
 * @param a - The first probability distribution.
 * @param b - The second probability distribution (must match the type of a).
 * @param dtype - Optional dtype string for custom types (e.g. 'f16', 'bf16').
 * @returns {number} The Jensen-Shannon distance d_JS(a, b) = √(0.5 × (KL(a‖m) + KL(b‖m))), where m = (a + b) / 2.
export function jensenshannon(a: Float64Array | Float32Array, b: Float64Array | Float32Array): number;
export function jensenshannon(a: Float64Array | Float32Array | Uint16Array, b: Float64Array | Float32Array | Uint16Array, dtype: DType): number;
export function jensenshannon(a: TensorBase, b: TensorBase): number;
export function jensenshannon(a: Float64Array | Float32Array | Uint16Array | TensorBase, b: Float64Array | Float32Array | Uint16Array | TensorBase, dtype?: DType): number {
  if (a instanceof TensorBase) { const u = unwrapTensor(a), v = unwrapTensor(b as TensorBase); return compiled.jensenshannon(u.arr, v.arr, dtypeToString(u.dtype)); }
  return dtype !== undefined ? compiled.jensenshannon(a, b, dtypeToString(dtype)) : compiled.jensenshannon(a, b);
 * Quantizes a numeric vector into a bit-packed binary representation.
 * Converts each element to a single bit: 1 for positive values, 0 for non-positive values.
 * The bits are packed into bytes (8 bits per byte) in big-endian bit order within each byte.
 * This is the required format for hamming() and jaccard() distance functions.
 * @param {Float32Array | Float64Array | Int8Array} vector - The vector to quantize and pack.
 * @returns {Uint8Array} A bit-packed array where each byte contains 8 binary values.
 * @example
 * const vec = new Float32Array([1.5, -2.3, 0.0, 3.1, -1.0, 2.0, 0.5, -0.5]);
 * const binary = toBinary(vec);
 * // Result: Uint8Array([0b10010110]) = [0x96]
 * //   bits: [1, 0, 0, 1, 0, 1, 1, 0] for elements [+, -, 0, +, -, +, +, -]
 * // Use with Hamming distance
 * const a = toBinary(new Float32Array([1, 2, 3]));
 * const b = toBinary(new Float32Array([1, -2, 3]));
 * const dist = hamming(a, b); // Counts differing bits
export const toBinary = (vector: Float32Array | Float64Array | Int8Array): Uint8Array => {
  const byteLength = Math.ceil(vector.length / 8);
  const packedVector = new Uint8Array(byteLength);
  for (let i = 0; i < vector.length; i++) {
    if (vector[i] > 0) {
      const byteIndex = Math.floor(i / 8);
      const bitPosition = 7 - (i % 8);
      packedVector[byteIndex] |= (1 << bitPosition);
  return packedVector;
 * Extract a TypedArray from a Matrix for passing to the N-API backend.
function unwrapMatrix(matrix: Matrix): { array: DistanceArray; dtype: DType } {
  switch (matrix.dtype) {
    case DType.F64: return { array: new Float64Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
    case DType.F32: return { array: new Float32Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
    case DType.F16: case DType.BF16: return { array: new Uint16Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
    case DType.I8: return { array: new Int8Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
    case DType.U8: return { array: new Uint8Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
    default: return { array: new Uint8Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols), dtype: matrix.dtype };
 * Extract a result TypedArray from a Matrix matching its output dtype.
function unwrapResultMatrix(matrix: Matrix): Float64Array | Float32Array | Int32Array | Uint32Array {
  switch (matrix.dtype) {
    case DType.F64: return new Float64Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols);
    case DType.F32: return new Float32Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols);
    case DType.I32: return new Int32Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols);
    case DType.U32: return new Uint32Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols);
    default: return new Float64Array(matrix.buffer, matrix.byteOffset, matrix.rows * matrix.cols);
 * Query the packed buffer byte count for a given matrix shape and dtype.
export function dotsPackedSize(width: number, depth: number, dtype: DType): number {
  return compiled.dotsPackedSize(width, depth, dtypeToString(dtype));
 * Pack a Matrix for use with packed GEMM-like operations.
export function dotsPack(matrix: Matrix): PackedMatrix {
  const { array, dtype } = unwrapMatrix(matrix);
  const result = compiled.dotsPack(array, matrix.rows, matrix.cols, matrix.rowStride, dtypeToString(dtype));
  return new PackedMatrix(result.buffer, result.width, result.depth, matrix.dtype, result.byteLength);
function packedOperation(compiledName: string, family: KernelFamily, a: Matrix, packed: PackedMatrix, out?: Matrix): Matrix {
  if (a.cols !== packed.depth) {
    throw new Error(`Matrix cols (${a.cols}) must match packed depth (${packed.depth})`);
  const outDtype = outputDtype(family, a.dtype);
  if (!out) {
    out = new Matrix(a.rows, packed.width, outDtype);
  const aUnwrapped = unwrapMatrix(a);
  const resultArray = unwrapResultMatrix(out);
  (compiled as any)[compiledName](
    aUnwrapped.array, packed.buffer, resultArray,
    a.rows, packed.width, a.cols,
    a.rowStride, out.rowStride,
    dtypeToString(a.dtype),
  return out;
function symmetricOperation(compiledName: string, family: KernelFamily, vectors: Matrix, out?: Matrix, rowStart = 0, rowCount?: number): Matrix {
  const count = rowCount ?? vectors.rows - rowStart;
  const outDtype = outputDtype(family, vectors.dtype);
  if (!out) {
    out = new Matrix(vectors.rows, vectors.rows, outDtype);
  const vectorsUnwrapped = unwrapMatrix(vectors);
  const resultArray = unwrapResultMatrix(out);
  (compiled as any)[compiledName](
    vectorsUnwrapped.array, resultArray,
    vectors.rows, vectors.cols,
    vectors.rowStride, out.rowStride,
    rowStart, count,
    dtypeToString(vectors.dtype),
  return out;
export function dotsPacked(a: Matrix, packed: PackedMatrix, out?: Matrix): Matrix {
  return packedOperation('dotsPacked', 'dots', a, packed, out);
export function angularsPacked(a: Matrix, packed: PackedMatrix, out?: Matrix): Matrix {
  return packedOperation('angularsPacked', 'angulars', a, packed, out);
export function euclideansPacked(a: Matrix, packed: PackedMatrix, out?: Matrix): Matrix {
  return packedOperation('euclideansPacked', 'euclideans', a, packed, out);
export function dotsSymmetric(vectors: Matrix, out?: Matrix, options?: { rowStart?: number; rowCount?: number }): Matrix {
  return symmetricOperation('dotsSymmetric', 'dots', vectors, out, options?.rowStart ?? 0, options?.rowCount);
export function angularsSymmetric(vectors: Matrix, out?: Matrix, options?: { rowStart?: number; rowCount?: number }): Matrix {
  return symmetricOperation('angularsSymmetric', 'angulars', vectors, out, options?.rowStart ?? 0, options?.rowCount);
export function euclideansSymmetric(vectors: Matrix, out?: Matrix, options?: { rowStart?: number; rowCount?: number }): Matrix {
  return symmetricOperation('euclideansSymmetric', 'euclideans', vectors, out, options?.rowStart ?? 0, options?.rowCount);
export default {
  sqeuclidean,
  euclidean,
  kullbackleibler,
  jensenshannon,
  toBinary,
  Float16Array,
  BFloat16Array,
  E4M3Array,
  E5M2Array,
  BinaryArray,
  TensorBase,
  VectorBase,
  VectorView,
  MatrixBase,
  PackedMatrix,
  castF16ToF32,
  castF32ToF16,
  castBF16ToF32,
  castF32ToBF16,
  castE4M3ToF32,
  castF32ToE4M3,
  castE5M2ToF32,
  castF32ToE5M2,
  dotsPack,
  dotsPacked,
  angularsPacked,
  euclideansPacked,
  dotsSymmetric,
  angularsSymmetric,
  euclideansSymmetric,
  dotsPackedSize,
  outputDtype,
 * Finds the directory where the native build of the numkong module is located.
 * @param {string} dir - The directory to start the search from.
function getBuildDir(dir: string) {
  if (existsSync(path.join(dir, "build"))) return dir;
  if (existsSync(path.join(dir, "prebuilds"))) return dir;
  if (path.basename(dir) === ".next") {
    // special case for next.js on custom node (not vercel)
    const sideways = path.join(dir, "..", "node_modules", "numkong");
    if (existsSync(sideways)) return getBuildDir(sideways);
  if (dir === "/") throw new Error("Could not find native build for numkong");
  return getBuildDir(path.join(dir, ".."));
function getDirName() {
    if (__dirname) return __dirname;
  } catch (e) { }
  // Fall back to cwd, which is typically the project root in dev and CI.
  // This helps runtimes like Deno and Bun where the `bindings` module's
  // V8 stack-trace hack may not resolve correctly.
    const cwd = process.cwd();
    if (existsSync(path.join(cwd, "build")) || existsSync(path.join(cwd, "prebuilds")))
      return cwd;
  } catch (e) { }
  return getRoot(getFileName());
Sunbelt Computer Software

PL/B Language Development and Support

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Sunbelt Computer Software

PL/B Language Development and Support

FilesExpand file tree

numkong.ts

Latest commit

History

numkong.ts

File metadata and controls