microsoft
diff --git a/‎.github/instructions/skill-files.instructions.md‎
Lines changed: 8 additions & 1 deletion b/‎.github/instructions/skill-files.instructions.md‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎.github/skills/file-test-bug/SKILL.md‎
Lines changed: 4 additions & 0 deletions b/‎.github/skills/file-test-bug/SKILL.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/skills/markdown-token-optimizer/SKILL.md‎
Lines changed: 4 additions & 0 deletions b/‎.github/skills/markdown-token-optimizer/SKILL.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/skills/sensei/SKILL.md‎
Lines changed: 4 additions & 0 deletions b/‎.github/skills/sensei/SKILL.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/skills/sensei/references/SCORING.md‎
Lines changed: 3 additions & 3 deletions b/‎.github/skills/sensei/references/SCORING.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/skills/skill-authoring/SKILL.md‎
Lines changed: 4 additions & 0 deletions b/‎.github/skills/skill-authoring/SKILL.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/eval.yml‎
Lines changed: 34 additions & 0 deletions b/‎.github/workflows/eval.yml‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎.github/workflows/pr.yml‎
Lines changed: 108 additions & 28 deletions b/‎.github/workflows/pr.yml‎
Lines changed: 108 additions & 28 deletions
diff --git a/‎.github/workflows/test-all-integration.yml‎
Lines changed: 18 additions & 11 deletions b/‎.github/workflows/test-all-integration.yml‎
Lines changed: 18 additions & 11 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
@@ -13,12 +13,19 @@ Every SKILL.md must include YAML frontmatter with:
 ```yaml
 ---
 name: skill-name
-description: Detailed description including trigger phrases and use cases.
+description: "Detailed description including trigger phrases and use cases."
+license: MIT
+metadata:
+  author: Microsoft
+  version: "1.0.0"
 ---
 ```
 
 - **name**: 1-64 characters, lowercase letters and hyphens only, must match directory name
 - **description**: 1-1024 characters, explain WHAT the skill does and WHEN to use it. Include trigger phrases.
+- **license**: Required for all skills. Use `MIT` unless there is a documented exception.
+- **metadata.author**: Recommended value is `Microsoft`.
+- **metadata.version**: Semver format (`X.Y.Z`). Set to `"1.0.0"` for new skills. Must be bumped in the same PR that modifies the skill.
 
 ## Size Limits
 
 
@@ -1,6 +1,10 @@
 ---
 name: file-test-bug
 description: "File a GitHub issue for local integration test failures. TRIGGERS: file test bug, report test failure, create bug for test, integration test failed, test failure issue, junit failure"
+license: MIT
+metadata:
+  author: Microsoft
+  version: "1.0.0"
 ---
 
 # File Test Bug
 
@@ -1,6 +1,10 @@
 ---
 name: markdown-token-optimizer
 description: "Analyzes markdown files for token efficiency. TRIGGERS: optimize markdown, reduce tokens, token count, token bloat, too many tokens, make concise, shrink file, file too large, optimize for AI, token efficiency, verbose markdown, reduce file size"
+license: MIT
+metadata:
+  author: Microsoft
+  version: "1.0.0"
 ---
 
 # Markdown Token Optimizer
 
@@ -1,6 +1,10 @@
 ---
 name: sensei
 description: "**WORKFLOW SKILL** — Iteratively improve skill frontmatter compliance using the Ralph loop pattern. WHEN: \"run sensei\", \"sensei help\", \"improve skill\", \"fix frontmatter\", \"skill compliance\", \"frontmatter audit\", \"score skill\", \"check skill tokens\". INVOKES: token counting tools, test runners, git commands. FOR SINGLE OPERATIONS: use token CLI directly for counts/checks."
+license: MIT
+metadata:
+  author: Microsoft
+  version: "1.0.0"
 ---
 
 # Sensei
 
@@ -102,7 +102,7 @@ license: MIT
 compatibility: Supports ASP.NET Core (.NET 6+), Node.js. Requires App Insights resource.
 metadata:
   author: example-org
-  version: "1.0"
+  version: "1.0.0"
 ```
 
 ---
@@ -218,7 +218,7 @@ The [agentskills.io spec](https://agentskills.io/specification) defines addition
 ```
 SUGGESTIONS:
 • Add license field (e.g., license: MIT)
-• Add metadata.version field (e.g., metadata: { version: "1.0" })
+• Add metadata.version field (e.g., metadata: { version: "1.0.0" })
 ```
 
 ### 7. SKILL.md Size Limits
@@ -345,7 +345,7 @@ function collectSuggestions(skill):
     if skill.license == null:
         suggestions.add("Add license field (e.g., license: MIT)")
     if skill.metadata == null OR skill.metadata.version == null:
-        suggestions.add("Add metadata.version field (e.g., metadata: { version: \"1.0\" })")
+        suggestions.add("Add metadata.version field (e.g., metadata: { version: \"1.0.0\" })")
     if usesBlockScalar(skill.rawDescription):
         suggestions.add("Use inline double-quoted string for description (>- incompatible with skills.sh)")
     if containsAntiTriggers(skill.description):
 
@@ -1,6 +1,10 @@
 ---
 name: skill-authoring
 description: "Guidelines for writing Agent Skills that comply with the agentskills.io specification. WHEN: \"create a skill\", \"new skill\", \"write a skill\", \"skill template\", \"skill structure\", \"review skill\", \"skill PR\", \"skill compliance\", \"SKILL.md format\", \"skill frontmatter\", \"skill best practices\"."
+license: MIT
+metadata:
+  author: Microsoft
+  version: "1.0.0"
 ---
 
 # Skill Authoring Guide
 
@@ -0,0 +1,34 @@
+name: Run Skill Evaluations
+
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - 'evals/**'
+      - 'plugin/skills/**'
+
+permissions:
+  contents: read
+
+jobs:
+  eval:
+    name: Run Evaluations
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Azure Developer CLI
+        uses: Azure/setup-azd@v2
+      - name: Install waza extension
+        run: |
+          azd config set alpha.extensions on
+          azd ext source add -n waza -t url -l https://raw.githubusercontent.com/microsoft/waza/main/registry.json
+          azd ext install microsoft.azd.waza
+      - name: Run evaluations
+        run: azd waza run evals/azure-hosted-copilot-sdk/eval.yaml --output-dir ./results
+      - name: Upload results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-results
+          path: ./results
+          retention-days: 30
@@ -32,10 +32,14 @@ jobs:
         working-directory: ./scripts
         run: npm ci
 
-      - name: Typecheck
+      - name: Typecheck tests
         working-directory: ./tests
         run: npm run typecheck
 
+      - name: Typecheck scripts
+        working-directory: ./scripts
+        run: npm run typecheck
+
       - name: Lint test
         working-directory: ./tests
         run: npm run lint
@@ -186,6 +190,8 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
 
       - name: Check changed skill files
         id: changed-skills
@@ -195,41 +201,54 @@ jobs:
             plugin/skills/**/SKILL.md
             .github/skills/**/SKILL.md
 
+      - name: Check all changed plugin skill files
+        id: changed-plugin-skills
+        uses: tj-actions/changed-files@v46
+        with:
+          files: |
+            plugin/skills/**
+            .github/skills/**
+
+      - name: Setup Node.js
+        if: steps.changed-skills.outputs.any_changed == 'true'
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+          cache: 'npm'
+          cache-dependency-path: scripts/package.json
+
+      - name: Install scripts dependencies
+        if: steps.changed-skills.outputs.any_changed == 'true'
+        working-directory: ./scripts
+        run: npm ci
+
       - name: Validate skill frontmatter
         if: steps.changed-skills.outputs.any_changed == 'true'
+        working-directory: ./scripts
         run: |
           echo "## Skill Validation" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
           
-          # Use IFS to safely handle filenames (though markdown files typically don't have spaces)
-          IFS=$'\n'
+          # Pass changed SKILL.md file paths directly to the validator
+          # Prefix with ../ since working-directory is ./scripts
+          SKILL_FILES=""
           for file in ${{ steps.changed-skills.outputs.all_changed_files }}; do
-            echo "Checking $file..."
-            
-            # Check for required frontmatter
-            if ! head -1 "$file" | grep -q "^---"; then
-              echo "::error file=$file::Missing YAML frontmatter"
-              echo "❌ $file - Missing frontmatter" >> $GITHUB_STEP_SUMMARY
-              exit 1
-            fi
-            
-            # Check for name field
-            if ! grep -q "^name:" "$file"; then
-              echo "::error file=$file::Missing required 'name' field in frontmatter"
-              echo "❌ $file - Missing 'name' field" >> $GITHUB_STEP_SUMMARY
-              exit 1
-            fi
-            
-            # Check for description field
-            if ! grep -q "^description:" "$file"; then
-              echo "::error file=$file::Missing required 'description' field in frontmatter"
-              echo "❌ $file - Missing 'description' field" >> $GITHUB_STEP_SUMMARY
-              exit 1
-            fi
-            
-            echo "✅ $file - Valid" >> $GITHUB_STEP_SUMMARY
+            SKILL_FILES="$SKILL_FILES ../$file"
           done
           
-          echo "All skill files have valid frontmatter"
+          # Run frontmatter spec validation
+          if OUTPUT=$(npm run frontmatter -- $SKILL_FILES 2>&1); then
+            echo "$OUTPUT"
+            echo "✅ All skill frontmatter is valid" >> $GITHUB_STEP_SUMMARY
+          else
+            echo "$OUTPUT"
+            echo "❌ Frontmatter validation failed" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+            echo "$OUTPUT" >> $GITHUB_STEP_SUMMARY
+            echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+            exit 1
+          fi
 
       - name: Validate skills.json
         uses: actions/github-script@v7
@@ -281,6 +300,67 @@ jobs:
                 core.setFailed('tests/skills.json validation failed');
             }
 
+      - name: Check skill version bumps
+        if: steps.changed-plugin-skills.outputs.any_changed == 'true'
+        run: |
+          echo "## Skill Version Check" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          FAILED=false
+
+          # For each changed file, walk up to find the nearest SKILL.md
+          SKILL_FILES=""
+          for file in ${{ steps.changed-plugin-skills.outputs.all_changed_files }}; do
+            dir=$(dirname "$file")
+            while [ "$dir" != "plugin/skills" ] && [ "$dir" != ".github/skills" ] && [ "$dir" != "plugin" ] && [ "$dir" != ".github" ] && [ "$dir" != "." ]; do
+              if [ -f "$dir/SKILL.md" ]; then
+                SKILL_FILES="$SKILL_FILES $dir/SKILL.md"
+                break
+              fi
+              dir=$(dirname "$dir")
+            done
+          done
+
+          # Deduplicate
+          SKILL_FILES=$(echo "$SKILL_FILES" | tr ' ' '\n' | sort -u | grep -v '^$')
+
+          if [ -z "$SKILL_FILES" ]; then
+            echo "No SKILL.md files affected by changes."
+            echo "✅ No version bump needed" >> $GITHUB_STEP_SUMMARY
+            exit 0
+          fi
+
+          for skill_file in $SKILL_FILES; do
+            SKILL_NAME=$(basename "$(dirname "$skill_file")")
+
+            # Get version in base branch (use [[:space:]] for portable whitespace matching)
+            BASE_VERSION=$(git show origin/${{ github.base_ref }}:"$skill_file" 2>/dev/null | sed -n '/^---$/,/^---$/p' | grep -E '^[[:space:]]+version:' | head -1 | sed 's/.*version:[[:space:]]*"\{0,1\}\([^"]*\)"\{0,1\}/\1/' || echo "")
+
+            # Get version in PR branch
+            HEAD_VERSION=$(sed -n '/^---$/,/^---$/p' "$skill_file" | grep -E '^[[:space:]]+version:' | head -1 | sed 's/.*version:[[:space:]]*"\{0,1\}\([^"]*\)"\{0,1\}/\1/')
+
+            # New skill (no base version) — skip, it starts at 1.0.0
+            if [ -z "$BASE_VERSION" ]; then
+              echo "✅ $skill_file — new skill (version: $HEAD_VERSION)"
+              continue
+            fi
+
+            if [ "$BASE_VERSION" = "$HEAD_VERSION" ]; then
+              echo "::error file=$skill_file::Skill '$SKILL_NAME' was modified but metadata.version was not bumped ($BASE_VERSION). Bump the version in your PR."
+              echo "❌ \`$SKILL_NAME\` (\`$skill_file\`) — version not bumped ($BASE_VERSION)" >> $GITHUB_STEP_SUMMARY
+              FAILED=true
+            else
+              echo "✅ $skill_file — $SKILL_NAME version bumped: $BASE_VERSION → $HEAD_VERSION"
+              echo "✅ \`$SKILL_NAME\` (\`$skill_file\`) — $BASE_VERSION → $HEAD_VERSION" >> $GITHUB_STEP_SUMMARY
+            fi
+          done
+
+          if [ "$FAILED" = true ]; then
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "> Every PR that modifies a skill must bump its \`metadata.version\` in the same PR." >> $GITHUB_STEP_SUMMARY
+            exit 1
+          fi
+
       - name: Skip message
         if: steps.changed-skills.outputs.any_changed != 'true'
         run: echo "No skill files changed - skipping validation"
 
@@ -11,12 +11,13 @@ permissions:
 
 on:
   schedule:
-    # 9:00 PM PST every day (5:00 AM UTC)
-    - cron: '0 5 * * *'
-    # 12:00 AM PST every day (8:00 AM UTC)
-    - cron: '0 8 * * *'
-    # 4:00 AM PST every day (12:00 PM UTC)
-    - cron: '0 12 * * *'
+    # Nightly job to run all the integration tests, except weekends in PST.
+    # 9:00 PM PST Mon-Fri (5:00 AM UTC Tue-Sat)
+    - cron: '0 5 * * 2-6'
+    # 12:00 AM PST Tue-Sat (8:00 AM UTC Tue-Sat)
+    - cron: '0 8 * * 2-6'
+    # 4:00 AM PST Tue-Sat (12:00 PM UTC Tue-Sat)
+    - cron: '0 12 * * 2-6'
   workflow_dispatch:
     inputs:
       skills:
@@ -30,6 +31,11 @@ on:
         options:
           - claude-sonnet-4.5
           - claude-opus-4.5
+      skill-test-pattern:
+        description: 'Optional: pattern by name or describe block for filtering skill tests. This parameter does not apply to azure-deploy tests'
+        required: false
+        type: string
+        default: ''
       deploy-test-pattern:
         description: 'Optional: Comma separated patterns by name or describe block for filtering azure-deploy tests.'
         required: false
@@ -92,13 +98,13 @@ jobs:
             
             # Set deploy-test-pattern and debug based on the schedule slot
             case "$CRON" in
-              "0 5 * * *")
+              "0 5 * * 2-6")
                 echo "debug=${{ env.SCHEDULED_DEBUG_21 }}" >> "$GITHUB_OUTPUT"
                 ;;
-              "0 8 * * *")
+              "0 8 * * 2-6")
                 echo "debug=${{ env.SCHEDULED_DEBUG_00 }}" >> "$GITHUB_OUTPUT"
                 ;;
-              "0 12 * * *")
+              "0 12 * * 2-6")
                 echo "debug=${{ env.SCHEDULED_DEBUG_04 }}" >> "$GITHUB_OUTPUT"
                 ;;
               *)
@@ -258,13 +264,14 @@ jobs:
           DEBUG: ${{ needs.resolve-inputs.outputs.debug == 'true' && '1' || '' }}
           TEST_RUN_ID: all-integration
           MODEL_OVERRIDE: ${{ inputs.model-override }}
+          SKILL_TEST_PATTERN: ${{ inputs.skill-test-pattern }}
         run: |
           echo test with $MODEL_OVERRIDE
           # Handle azure-ai vs azure-aigateway prefix collision
           if [ "${{ matrix.skill }}" = "azure-ai" ]; then
-            npm run test:integration azure-ai/
+            npm run test:integration azure-ai/ "$SKILL_TEST_PATTERN"
           else
-            npm run test:integration ${{ matrix.skill }}
+            npm run test:integration ${{ matrix.skill }} "$SKILL_TEST_PATTERN"
           fi
         continue-on-error: true
 
 
@@ -1,4 +1,6 @@
 
+venv/
+.venv/
 __integration_*__/
 __pycache__/
 __test_fixtures__/