Skip to content

Filesystem path arbitraryĀ #6440

@dmurvihill

Description

@dmurvihill

šŸš€ Feature Request

Generators for filesystem paths

Example

This implementation gives paths that are valid both on Windows and on Unix. It could be adapted to provide only Windows paths, or only Unix paths.

This is a rough draft and probably isn't ready to merge, but I could finish it up in the next few months. Are you interested?

import fc, { type Arbitrary } from 'fast-check';
import path from 'node:path';

const ntfsProhibitedFileNames = [
  'CON', 'PRN', 'AUX', 'NUL', 'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6',
  'COM7', 'COM8', 'COM9', 'COM¹', 'COM²', 'COM³', 'LPT1', 'LPT2', 'LPT3',
  'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9', 'LPT¹', 'LPT²', 'LPT',
];

interface FsPathConstraints {
  maxLength?: number;
}

const maxWindowsLength_chars = 260;
const maxUnixLength_octets = 255;

/** Strings that are legal paths on most systems
 *
 * Supported: NTFS, most Unix
 * Not supported: FAT, strict POSIX
 *
 * Unix paths are 8-bit octet arrays; all values except 0x00 (NUL) and
 * 0x2f (/) are allowed. Allowed length is 1-255 octets.
 *
 * NTFS paths are UTF-16 strings; all characters are allowed except
 * codes 0-31 and any characters *"/:<>?\|. Allowed length is 1-260
 * characters (although on Windows 10 and later, it can be made higher
 * with a registry edit).
 */
export function fsPath(constraints?: FsPathConstraints): Arbitrary<string> {
  return fc.array(fc.string({
    unit: pathChar(),
    maxLength: constraints?.maxLength ?? maxUnixLength_octets,
  })).map(segments => joinPathSegmentsWithMaxLength(segments))
    .filter(p => {
      const uc = p.toUpperCase();
      const fname = path.basename(uc, path.extname(uc));
      return p.length > 0 && !ntfsProhibitedFileNames.includes(fname);
    });
}

function pathChar(): Arbitrary<string> {
  return fc.string({ unit: 'binary', minLength: 1, maxLength: 1 })
    .filter(c => allValidUnixPathChars(c) && allValidNtfsPathChars(c));
}

function joinPathSegmentsWithMaxLength(segments: string[]): string {
  const separatorLength = 1;
  let i = 0;
  let unixLength = 0;
  let windowsLength = 0;
  for (
    ;
    i < segments.length
    && windowsLength < maxWindowsLength_chars
    && unixLength < maxUnixLength_octets;
    i++
  ) {
    if (windowsLength > 0) {
      // All separator characters are assumed to be 1 byte
      windowsLength += separatorLength;
      unixLength += separatorLength;
    }
    windowsLength += segments[i].length;
    unixLength += Buffer.byteLength(segments[i], 'utf-8');
  }
  if (
    windowsLength > maxWindowsLength_chars
    || unixLength > maxUnixLength_octets
  ) {
    i--;
  }
  return path.join(...segments.slice(0, i));
}

function allValidUnixPathChars(c: string) {
  return Buffer.from(c, 'utf-8').every(b => b !== 0x00 && b !== 0x2F);
}


const ntfsProhibitedChars = ['*', '"', '/', ':', '<', '>', '?', '\\', '|'];

function allValidNtfsPathChars(c: string) {
  for (let i = 0; i < c.length; i++) {
    if (c.codePointAt(i) > 31 || ntfsProhibitedChars.includes(c)) {
      return false;
    }
  }
  return true;
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions