development-skills/.github/workflows/detect-duplicates.yml at de2ca228448d0ce63a4ae9c5482667d4cadacfcd · mcj-coder/development-skills · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# Duplicate Detection Workflow
#
# Detects potentially duplicate issues when new issues are created.
# Based on template: skills/issue-driven-delivery/templates/detect-duplicates.yml
#
# Permissions: Requires issues:write for commenting.

name: Detect Duplicates

on:
  issues:
    types: [opened]

env:
  MIN_KEYWORDS: "2"
  MAX_MATCHES: "5"
  # Stop words customized for this repository
  STOP_WORDS: "a an the is are was were be been being have has had do does did will would could should may might must shall can for to of in on at by with from as or and but not this that these those it its add new create implement fix update bug feature issue skill skills automation script workflow"

jobs:
  detect:
    runs-on: ubuntu-latest
    permissions:
      issues: write
    steps:
      - name: Extract Keywords from Title
        id: keywords
        run: |
          TITLE="${{ github.event.issue.title }}"
          echo "Original title: $TITLE"

          TITLE_LOWER=$(echo "$TITLE" | tr '[:upper:]' '[:lower:]')
          TITLE_CLEAN=$(echo "$TITLE_LOWER" | sed 's/[^a-z0-9 ]/ /g' | tr -s ' ')

          STOP_WORDS_ARRAY=($STOP_WORDS)

          KEYWORDS=""
          for word in $TITLE_CLEAN; do
            if [ ${#word} -lt 3 ]; then
              continue
            fi

            IS_STOP=false
            for stop in "${STOP_WORDS_ARRAY[@]}"; do
              if [ "$word" = "$stop" ]; then
                IS_STOP=true
                break
              fi
            done

            if [ "$IS_STOP" = false ]; then
              if [ -z "$KEYWORDS" ]; then
                KEYWORDS="$word"
              else
                KEYWORDS="$KEYWORDS $word"
              fi
            fi
          done

          echo "Extracted keywords: $KEYWORDS"

          KEYWORD_COUNT=$(echo "$KEYWORDS" | wc -w)
          echo "Keyword count: $KEYWORD_COUNT"

          echo "keywords=$KEYWORDS" >> $GITHUB_OUTPUT
          echo "count=$KEYWORD_COUNT" >> $GITHUB_OUTPUT

      - name: Check Minimum Keywords
        id: check
        run: |
          KEYWORD_COUNT="${{ steps.keywords.outputs.count }}"
          MIN_KEYWORDS="${{ env.MIN_KEYWORDS }}"

          if [ "$KEYWORD_COUNT" -lt "$MIN_KEYWORDS" ]; then
            echo "Title too short for reliable detection"
            echo "skip=true" >> $GITHUB_OUTPUT
          else
            echo "skip=false" >> $GITHUB_OUTPUT
          fi

      - name: Search for Duplicates
        id: search
        if: steps.check.outputs.skip != 'true'
        env:
          GH_TOKEN: ${{ github.token }}
        run: |
          KEYWORDS="${{ steps.keywords.outputs.keywords }}"
          CURRENT_ISSUE="${{ github.event.issue.number }}"
          CURRENT_TITLE="${{ github.event.issue.title }}"

          RESULTS=$(gh issue list --search "$KEYWORDS in:title is:open" --json number,title,url --limit 20 2>&1) || {
            echo "::warning title=Search Failed::Could not search for duplicates"
            echo "matches=" >> $GITHUB_OUTPUT
            echo "count=0" >> $GITHUB_OUTPUT
            exit 0
          }

          MATCHES=$(echo "$RESULTS" | jq -r --arg num "$CURRENT_ISSUE" '[.[] | select(.number != ($num | tonumber))] | .[:'"$MAX_MATCHES"']')
          MATCH_COUNT=$(echo "$MATCHES" | jq 'length')
          TOTAL_COUNT=$(echo "$RESULTS" | jq --arg num "$CURRENT_ISSUE" '[.[] | select(.number != ($num | tonumber))] | length')

          echo "Found $MATCH_COUNT potential matches"

          EXACT_MATCH=$(echo "$RESULTS" | jq -r --arg title "$CURRENT_TITLE" --arg num "$CURRENT_ISSUE" \
            '[.[] | select(.title == $title and .number != ($num | tonumber))] | .[0].number // ""')

          echo "matches<<EOF" >> $GITHUB_OUTPUT
          echo "$MATCHES" >> $GITHUB_OUTPUT
          echo "EOF" >> $GITHUB_OUTPUT
          echo "count=$MATCH_COUNT" >> $GITHUB_OUTPUT
          echo "total=$TOTAL_COUNT" >> $GITHUB_OUTPUT
          echo "exact=$EXACT_MATCH" >> $GITHUB_OUTPUT

      - name: Post Comment
        if: steps.check.outputs.skip != 'true' && steps.search.outputs.count != '0'
        env:
          GH_TOKEN: ${{ github.token }}
        run: |
          MATCHES='${{ steps.search.outputs.matches }}'
          COUNT="${{ steps.search.outputs.count }}"
          TOTAL="${{ steps.search.outputs.total }}"
          EXACT="${{ steps.search.outputs.exact }}"
          ISSUE_NUMBER="${{ github.event.issue.number }}"

          COMMENT="## Potential Duplicates Detected\n\n"

          if [ -n "$EXACT" ]; then
            COMMENT="${COMMENT}**Exact title match found with #$EXACT**\n\n"
          fi

          COMMENT="${COMMENT}The following open issues may be related to this one:\n\n"
          COMMENT="${COMMENT}| Issue | Title |\n"
          COMMENT="${COMMENT}|-------|-------|\n"

          # Build table rows (avoid subshell by using command substitution)
          TABLE_ROWS=$(echo "$MATCHES" | jq -r '.[] | "| #\(.number) | \(.title) |"')
          while IFS= read -r line; do
            COMMENT="${COMMENT}${line}\n"
          done <<< "$TABLE_ROWS"

          if [ "$TOTAL" -gt "$COUNT" ]; then
            COMMENT="${COMMENT}\n*Showing $COUNT of $TOTAL potential matches.*\n"
          fi

          COMMENT="${COMMENT}\nPlease review these issues. If this is a duplicate, consider:\n"
          COMMENT="${COMMENT}- Closing this issue as duplicate of the original\n"
          COMMENT="${COMMENT}- Linking issues if they are related but distinct\n"
          COMMENT="${COMMENT}\n---\n*This comment was automatically generated by the duplicate detection workflow.*"

          echo -e "$COMMENT" | gh issue comment "$ISSUE_NUMBER" --body-file -

          echo "Posted duplicate detection comment"