PCRE2Project · NWilson · Oct 14, 2025 · Oct 6, 2025 · Oct 7, 2025 · Oct 8, 2025
diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic
@@ -440,6 +440,7 @@ released, the numbers must not be changed. */
 #define PCRE2_ERROR_DIFFSUBSSUBJECT   (-72)
 #define PCRE2_ERROR_DIFFSUBSOFFSET    (-73)
 #define PCRE2_ERROR_DIFFSUBSOPTIONS   (-74)
+#define PCRE2_ERROR_BAD_BACKSLASH_K   (-75)
 
 
 /* Request types for pcre2_pattern_info() */

diff --git a/src/pcre2.h.in b/src/pcre2.h.in
@@ -440,6 +440,7 @@ released, the numbers must not be changed. */
 #define PCRE2_ERROR_DIFFSUBSSUBJECT   (-72)
 #define PCRE2_ERROR_DIFFSUBSOFFSET    (-73)
 #define PCRE2_ERROR_DIFFSUBSOPTIONS   (-74)
+#define PCRE2_ERROR_BAD_BACKSLASH_K   (-75)
 
 
 /* Request types for pcre2_pattern_info() */

diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
@@ -8370,6 +8370,10 @@ for (;; pptr++)
       case ESC_A:
       if (cb->max_lookbehind == 0) cb->max_lookbehind = 1;
       break;
+
+      case ESC_K:
+      cb->external_flags |= PCRE2_HASBSK;  /* Record */
+      break;
       }
 
     *code++ = meta_arg;

diff --git a/src/pcre2_error.c b/src/pcre2_error.c
@@ -305,6 +305,7 @@ static const unsigned char match_error_texts[] =
   "substitute subject differs from prior match call\0"
   "substitute start offset differs from prior match call\0"
   "substitute options differ from prior match call\0"
+  "disallowed use of \\K in lookaround\0"
   ;
 
 

diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
@@ -538,6 +538,7 @@ bytes in a code unit in that mode. */
 #define PCRE2_DUPCAPUSED    0x00200000u /* contains (?| */
 #define PCRE2_HASBKC        0x00400000u /* contains \C */
 #define PCRE2_HASACCEPT     0x00800000u /* contains (*ACCEPT) */
+#define PCRE2_HASBSK        0x01000000u /* contains \K */
 
 #define PCRE2_MODE_MASK     (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
 

diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h
@@ -969,7 +969,9 @@ typedef struct match_block {
   uint32_t match_call_count;      /* Number of times a new frame is created */
   BOOL hitend;                    /* Hit the end of the subject at some point */
   BOOL hasthen;                   /* Pattern contains (*THEN) */
+  BOOL hasbsk;                    /* Pattern contains \K */
   BOOL allowemptypartial;         /* Allow empty hard partial */
+  BOOL allowlookaroundbsk;        /* Allow \K within lookarounds */
   const uint8_t *lcc;             /* Points to lower casing table */
   const uint8_t *fcc;             /* Points to case-flipping table */
   const uint8_t *ctypes;          /* Points to table of type maps */

diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
@@ -13772,6 +13772,34 @@ common->accept_label = LABEL();
 if (common->accept != NULL)
   set_jumps(common->accept, common->accept_label);
 
+/* Fail if we detect that the start position was moved to be either after
+the end position (\K in lookahead) or before the start offset (\K in
+lookbehind). */
+
+if (common->has_set_som &&
+    (common->re->extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0)
+  {
+  if (HAS_VIRTUAL_REGISTERS)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+    }
+  else
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
+    }
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
+
+  /* (ovector[0] < jit_arguments->str)? */
+  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, TMP1, 0);
+  /* Unconditionally set R0 (aka TMP1), in between the comparison that needs to
+  use TMP1, but before the jump. */
+  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_BAD_BACKSLASH_K);
+  add_jump(compiler, &common->abort, JUMP(SLJIT_LESS));
+  /* (ovector[0] > STR_PTR)?  NB. ovector[1] hasn't yet been set to STR_PTR. */
+  add_jump(compiler, &common->abort, CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0));
+  }
+
 /* This means we have a match. Update the ovector. */
 copy_ovector(common, re->top_bracket + 1);
 common->quit_label = common->abort_label = LABEL();

diff --git a/src/pcre2_match.c b/src/pcre2_match.c
@@ -1010,11 +1010,28 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
         }
 
 #ifdef DEBUG_SHOW_OPS
-      fprintf(stderr, "++ Failed ACCEPT not at end (endanchnored set)\n");
+      fprintf(stderr, "++ Failed ACCEPT not at end (endanchored set)\n");
 #endif
       return MATCH_NOMATCH;   /* (*ACCEPT) */
       }
 
+    /* Fail if we detect that the start position was moved to be either after
+    the end position (\K in lookahead) or before the start offset (\K in
 if (ovector[1] < ovector[0] || ovector[0] < start_offset) 
 if (ovector[1] < ovector[0] || ovector[0] < start_offset) 
+    lookbehind). If this occurs, the pattern must have used \K in a somewhat
+    sneaky way (e.g. by pattern recursion), because if the \K is actually
+    syntactically inside the lookaround, it's blocked at compile-time. */
+
+    if (Fstart_match < mb->start_subject + mb->start_offset ||
+        Fstart_match > Feptr)
+      {
+      /* The \K expression is fairly rare. We assert it was used so that we
+      catch any unexpected invalid data in start_match. */
+      PCRE2_ASSERT(mb->hasbsk);
+
+      if (!mb->allowlookaroundbsk)
+        return PCRE2_ERROR_BAD_BACKSLASH_K;
+      }
+
     /* We have a successful match of the whole pattern. Record the result and
     then do a direct return from the function. If there is space in the offset
     vector, set any pairs that follow the highest-numbered captured string but
@@ -7393,8 +7410,11 @@ mb->start_offset = start_offset;
 mb->end_subject = end_subject;
 mb->true_end_subject = true_end_subject;
 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
+mb->hasbsk = (re->flags & PCRE2_HASBSK) != 0;
 mb->allowemptypartial = (re->max_lookbehind > 0) ||
     (re->flags & PCRE2_MATCH_EMPTY) != 0;
+mb->allowlookaroundbsk =
+  (re->extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) != 0;
 mb->poptions = re->overall_options;          /* Pattern options */
 mb->ignore_skip_arg = 0;
 mb->mark = mb->nomatch_mark = NULL;          /* In case never set */

diff --git a/testdata/testinput2 b/testdata/testinput2
@@ -6378,7 +6378,42 @@ a)"xI
 
 /^abc(?<!b\Kq)d/,allow_lookaround_bsk
     abcd
-
+
+# PCRE2 now also rejects sneaky cases where the \K is inside a lookaround... but
+# it's not always easy to detect this syntactically at compile-time (indeed,
+# a conditional expression could dynamically invoke \K via a subroutine, based
+# on the subject contents).
+
+/(?(DEFINE)(?<sneaky>b\K))a(?=(?&sneaky))/g,allow_lookaround_bsk
+    ab
+
+/(?(DEFINE)(?<sneaky>b\K))a(?=(?&sneaky))/g
+    ab
+    zz
+
+/a|(?(DEFINE)(?<sneaky>\Ka))(?<=(?&sneaky))b/g,allow_lookaround_bsk
+    ab
+
+/a|(?(DEFINE)(?<sneaky>\Ka))(?<=(?&sneaky))b/g
+    ab
+    zz
+
+/a|(?(DEFINE)(?<sneaky>\K\Ga))(?<=(?&sneaky))b/g
+    ab
+    zz
+
+/(?=.{10}(?1))x(\K){0}/
+    x1234567890
+
+/(?=.{10}(.))(*scs:(1)(?2))x(\K){0}/
+    x1234567890
+
+/(?=.{5}(?1))\d*(\K){0}/
+\= Totally fine - pattern does nothing bad even though \K is reachable
+    1234567890
+\= Not fine - the subject now causes the \K to misbehave
+    abcdefgh
+
 # --------- 
 
 # Tests for zero-length NULL to be treated as an empty string.

diff --git a/testdata/testoutput2 b/testdata/testoutput2
@@ -19062,7 +19062,57 @@ Failed: error 199 at offset 14: \K is not allowed in lookarounds (but see PCRE2_
 /^abc(?<!b\Kq)d/,allow_lookaround_bsk
     abcd
  0: abcd
-
+
+# PCRE2 now also rejects sneaky cases where the \K is inside a lookaround... but
+# it's not always easy to detect this syntactically at compile-time (indeed,
+# a conditional expression could dynamically invoke \K via a subroutine, based
+# on the subject contents).
+
+/(?(DEFINE)(?<sneaky>b\K))a(?=(?&sneaky))/g,allow_lookaround_bsk
+    ab
+Start of matched string is beyond its end - displaying from end to start.
+ 0: b
+
+/(?(DEFINE)(?<sneaky>b\K))a(?=(?&sneaky))/g
+    ab
+Failed: error -75: disallowed use of \K in lookaround
+    zz
+No match
+
+/a|(?(DEFINE)(?<sneaky>\Ka))(?<=(?&sneaky))b/g,allow_lookaround_bsk
+    ab
+ 0: a
+ 0: ab
+
+/a|(?(DEFINE)(?<sneaky>\Ka))(?<=(?&sneaky))b/g
+    ab
+ 0: a
+Failed: error -75: disallowed use of \K in lookaround
+    zz
+No match
+
+/a|(?(DEFINE)(?<sneaky>\K\Ga))(?<=(?&sneaky))b/g
+    ab
+ 0: a
+    zz
+No match
+
+/(?=.{10}(?1))x(\K){0}/
+    x1234567890
+Failed: error -75: disallowed use of \K in lookaround
+
+/(?=.{10}(.))(*scs:(1)(?2))x(\K){0}/
+    x1234567890
+Failed: error -75: disallowed use of \K in lookaround
+
+/(?=.{5}(?1))\d*(\K){0}/
+\= Totally fine - pattern does nothing bad even though \K is reachable
+    1234567890
+ 0: 67890
+\= Not fine - the subject now causes the \K to misbehave
+    abcdefgh
+Failed: error -75: disallowed use of \K in lookaround
+
 # --------- 
 
 # Tests for zero-length NULL to be treated as an empty string.