From c53f598f5a09a072124b2085990a3cee8dd28c5a Mon Sep 17 00:00:00 2001 From: Bamboo <13664854532@163.com> Date: Thu, 21 Aug 2025 16:11:49 +0800 Subject: [PATCH 01/13] fix(jira): update epic collector to use new API endpoint and include all fields --- backend/plugins/jira/tasks/epic_collector.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/plugins/jira/tasks/epic_collector.go b/backend/plugins/jira/tasks/epic_collector.go index dcb1633096d..f52605923f9 100644 --- a/backend/plugins/jira/tasks/epic_collector.go +++ b/backend/plugins/jira/tasks/epic_collector.go @@ -86,7 +86,7 @@ func CollectEpics(taskCtx plugin.SubTaskContext) errors.Error { ApiClient: data.ApiClient, PageSize: 100, Incremental: false, - UrlTemplate: "api/2/search", + UrlTemplate: "api/3/search/jql", Query: func(reqData *api.RequestData) (url.Values, errors.Error) { query := url.Values{} epicKeys := []string{} @@ -98,6 +98,8 @@ func CollectEpics(taskCtx plugin.SubTaskContext) errors.Error { query.Set("startAt", fmt.Sprintf("%v", reqData.Pager.Skip)) query.Set("maxResults", fmt.Sprintf("%v", reqData.Pager.Size)) query.Set("expand", "changelog") + // Add fields parameter to ensure all required fields are returned in the new API + query.Set("fields", "*all") return query, nil }, Input: epicIterator, From 55fe451b245fdcf1228a9df2d1a50080cd11a9e6 Mon Sep 17 00:00:00 2001 From: Bamboo <13664854532@163.com> Date: Mon, 25 Aug 2025 11:04:04 +0800 Subject: [PATCH 02/13] fix(jira): enhance epic collector to dynamically select API endpoint based on JIRA version --- backend/plugins/jira/tasks/epic_collector.go | 27 +++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/backend/plugins/jira/tasks/epic_collector.go b/backend/plugins/jira/tasks/epic_collector.go index f52605923f9..e5d783fe672 100644 --- a/backend/plugins/jira/tasks/epic_collector.go +++ b/backend/plugins/jira/tasks/epic_collector.go @@ -82,11 +82,30 @@ func CollectEpics(taskCtx plugin.SubTaskContext) errors.Error { jql = buildJQL(*apiCollector.GetSince(), loc) } + // Choose API endpoint based on JIRA version + var urlTemplate string + var shouldAddFieldsParam bool + + // Use api/2 for JIRA Server <= v8, api/3 for newer versions + if data.JiraServerInfo.DeploymentType == models.DeploymentServer && + len(data.JiraServerInfo.VersionNumbers) == 3 && + data.JiraServerInfo.VersionNumbers[0] <= 8 { + // JIRA Server <= v8 + urlTemplate = "api/2/search" + shouldAddFieldsParam = false + logger.Info("Using api/2/search for JIRA Server version <= 8") + } else { + // JIRA Cloud and Server > v8 (api/2 deprecated in Cloud) + urlTemplate = "api/3/search" + shouldAddFieldsParam = true + logger.Info("Using api/3/search for JIRA Cloud or JIRA Server version > 8") + } + err = apiCollector.InitCollector(api.ApiCollectorArgs{ ApiClient: data.ApiClient, PageSize: 100, Incremental: false, - UrlTemplate: "api/3/search/jql", + UrlTemplate: urlTemplate, Query: func(reqData *api.RequestData) (url.Values, errors.Error) { query := url.Values{} epicKeys := []string{} @@ -98,8 +117,10 @@ func CollectEpics(taskCtx plugin.SubTaskContext) errors.Error { query.Set("startAt", fmt.Sprintf("%v", reqData.Pager.Skip)) query.Set("maxResults", fmt.Sprintf("%v", reqData.Pager.Size)) query.Set("expand", "changelog") - // Add fields parameter to ensure all required fields are returned in the new API - query.Set("fields", "*all") + // api/3 requires fields parameter, api/2 does not + if shouldAddFieldsParam { + query.Set("fields", "*all") + } return query, nil }, Input: epicIterator, From 59dc06e5408cb000d609c7b56204c3e2e33d53b0 Mon Sep 17 00:00:00 2001 From: Bamboo <13664854532@163.com> Date: Mon, 25 Aug 2025 11:13:26 +0800 Subject: [PATCH 03/13] fix(jira): update epic collector to use correct API endpoint for JIRA Cloud and Server versions --- backend/plugins/jira/tasks/epic_collector.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/backend/plugins/jira/tasks/epic_collector.go b/backend/plugins/jira/tasks/epic_collector.go index e5d783fe672..0a6cfad0f2e 100644 --- a/backend/plugins/jira/tasks/epic_collector.go +++ b/backend/plugins/jira/tasks/epic_collector.go @@ -85,10 +85,10 @@ func CollectEpics(taskCtx plugin.SubTaskContext) errors.Error { // Choose API endpoint based on JIRA version var urlTemplate string var shouldAddFieldsParam bool - + // Use api/2 for JIRA Server <= v8, api/3 for newer versions - if data.JiraServerInfo.DeploymentType == models.DeploymentServer && - len(data.JiraServerInfo.VersionNumbers) == 3 && + if data.JiraServerInfo.DeploymentType == models.DeploymentServer && + len(data.JiraServerInfo.VersionNumbers) == 3 && data.JiraServerInfo.VersionNumbers[0] <= 8 { // JIRA Server <= v8 urlTemplate = "api/2/search" @@ -96,11 +96,11 @@ func CollectEpics(taskCtx plugin.SubTaskContext) errors.Error { logger.Info("Using api/2/search for JIRA Server version <= 8") } else { // JIRA Cloud and Server > v8 (api/2 deprecated in Cloud) - urlTemplate = "api/3/search" + urlTemplate = "api/3/search/jql" shouldAddFieldsParam = true - logger.Info("Using api/3/search for JIRA Cloud or JIRA Server version > 8") + logger.Info("Using api/3/search/jql for JIRA Cloud or JIRA Server version > 8") } - + err = apiCollector.InitCollector(api.ApiCollectorArgs{ ApiClient: data.ApiClient, PageSize: 100, From c3a54ae84cf371b6ef29b313065bde701579100d Mon Sep 17 00:00:00 2001 From: Bamboo <13664854532@163.com> Date: Mon, 25 Aug 2025 16:49:50 +0800 Subject: [PATCH 04/13] fix(jira): refactor epic collector to streamline API endpoint selection and enhance error handling --- backend/plugins/jira/tasks/epic_collector.go | 121 ++++++++++++++----- 1 file changed, 94 insertions(+), 27 deletions(-) diff --git a/backend/plugins/jira/tasks/epic_collector.go b/backend/plugins/jira/tasks/epic_collector.go index 0a6cfad0f2e..2e777f92c98 100644 --- a/backend/plugins/jira/tasks/epic_collector.go +++ b/backend/plugins/jira/tasks/epic_collector.go @@ -82,45 +82,47 @@ func CollectEpics(taskCtx plugin.SubTaskContext) errors.Error { jql = buildJQL(*apiCollector.GetSince(), loc) } - // Choose API endpoint based on JIRA version - var urlTemplate string - var shouldAddFieldsParam bool - - // Use api/2 for JIRA Server <= v8, api/3 for newer versions - if data.JiraServerInfo.DeploymentType == models.DeploymentServer && - len(data.JiraServerInfo.VersionNumbers) == 3 && - data.JiraServerInfo.VersionNumbers[0] <= 8 { - // JIRA Server <= v8 - urlTemplate = "api/2/search" - shouldAddFieldsParam = false - logger.Info("Using api/2/search for JIRA Server version <= 8") + // Choose API endpoint based on JIRA deployment type + if data.JiraServerInfo.DeploymentType == models.DeploymentServer { + logger.Info("Using api/2/search for JIRA Server") + err = setupApiV2Collector(apiCollector, data, epicIterator, jql) } else { - // JIRA Cloud and Server > v8 (api/2 deprecated in Cloud) - urlTemplate = "api/3/search/jql" - shouldAddFieldsParam = true - logger.Info("Using api/3/search/jql for JIRA Cloud or JIRA Server version > 8") + logger.Info("Using api/3/search/jql for JIRA Cloud") + err = setupApiV3Collector(apiCollector, data, epicIterator, jql) } + if err != nil { + return err + } + return apiCollector.Execute() +} - err = apiCollector.InitCollector(api.ApiCollectorArgs{ +// JIRA Server API v2 collector +func setupApiV2Collector(apiCollector *api.StatefulApiCollector, data *JiraTaskData, epicIterator api.Iterator, jql string) errors.Error { + return apiCollector.InitCollector(api.ApiCollectorArgs{ ApiClient: data.ApiClient, PageSize: 100, Incremental: false, - UrlTemplate: urlTemplate, + UrlTemplate: "api/2/search", Query: func(reqData *api.RequestData) (url.Values, errors.Error) { query := url.Values{} epicKeys := []string{} - for _, e := range reqData.Input.([]interface{}) { - epicKeys = append(epicKeys, *e.(*string)) + + input, ok := reqData.Input.([]interface{}) + if !ok { + return nil, errors.Default.New("invalid input type, expected []interface{}") } + + for _, e := range input { + if epicKey, ok := e.(*string); ok && epicKey != nil { + epicKeys = append(epicKeys, *epicKey) + } + } + localJQL := fmt.Sprintf("issue in (%s) and %s", strings.Join(epicKeys, ","), jql) query.Set("jql", localJQL) query.Set("startAt", fmt.Sprintf("%v", reqData.Pager.Skip)) query.Set("maxResults", fmt.Sprintf("%v", reqData.Pager.Size)) query.Set("expand", "changelog") - // api/3 requires fields parameter, api/2 does not - if shouldAddFieldsParam { - query.Set("fields", "*all") - } return query, nil }, Input: epicIterator, @@ -140,13 +142,78 @@ func CollectEpics(taskCtx plugin.SubTaskContext) errors.Error { } return data.Issues, nil }, - // Jira Server returns 400 if the epic is not found AfterResponse: ignoreHTTPStatus400, }) +} + +// JIRA Cloud API v3 collector +func setupApiV3Collector(apiCollector *api.StatefulApiCollector, data *JiraTaskData, epicIterator api.Iterator, jql string) errors.Error { + return apiCollector.InitCollector(api.ApiCollectorArgs{ + ApiClient: data.ApiClient, + PageSize: 100, + Incremental: false, + UrlTemplate: "api/3/search/jql", + GetNextPageCustomData: getNextPageCustomDataForV3, + Query: func(reqData *api.RequestData) (url.Values, errors.Error) { + query := url.Values{} + epicKeys := []string{} + for _, e := range reqData.Input.([]interface{}) { + epicKeys = append(epicKeys, *e.(*string)) + } + localJQL := fmt.Sprintf("issue in (%s) and %s", strings.Join(epicKeys, ","), jql) + query.Set("jql", localJQL) + query.Set("maxResults", fmt.Sprintf("%v", reqData.Pager.Size)) + query.Set("expand", "changelog") + query.Set("fields", "*all") + + if reqData.CustomData != nil { + query.Set("nextPageToken", reqData.CustomData.(string)) + } + + return query, nil + }, + Input: epicIterator, + ResponseParser: func(res *http.Response) ([]json.RawMessage, errors.Error) { + var data struct { + Issues []json.RawMessage `json:"issues"` + } + blob, err := io.ReadAll(res.Body) + if err != nil { + return nil, errors.Convert(err) + } + err = json.Unmarshal(blob, &data) + if err != nil { + return nil, errors.Convert(err) + } + return data.Issues, nil + }, + AfterResponse: ignoreHTTPStatus400, + }) +} + +// Get next page token for API v3 +func getNextPageCustomDataForV3(_ *api.RequestData, prevPageResponse *http.Response) (interface{}, errors.Error) { + var response struct { + NextPageToken string `json:"nextPageToken"` + } + + blob, err := io.ReadAll(prevPageResponse.Body) if err != nil { - return err + return nil, errors.Convert(err) } - return apiCollector.Execute() + + prevPageResponse.Body = io.NopCloser(strings.NewReader(string(blob))) + + err = json.Unmarshal(blob, &response) + if err != nil { + return nil, errors.Convert(err) + } + + if response.NextPageToken == "" { + return nil, api.ErrFinishCollect + } + + return response.NextPageToken, nil } func GetEpicKeysIterator(db dal.Dal, data *JiraTaskData, batchSize int) (api.Iterator, errors.Error) { From 9a6ff4aa212b11473236ea12390ae0554fd846f4 Mon Sep 17 00:00:00 2001 From: Bamboo <13664854532@163.com> Date: Fri, 29 Aug 2025 09:04:53 +0800 Subject: [PATCH 05/13] fix(jira): fix type for Jira issue descriptions --- .../plugins/jira/tasks/apiv2models/issue.go | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/backend/plugins/jira/tasks/apiv2models/issue.go b/backend/plugins/jira/tasks/apiv2models/issue.go index 4fcc8578050..444ccc2104d 100644 --- a/backend/plugins/jira/tasks/apiv2models/issue.go +++ b/backend/plugins/jira/tasks/apiv2models/issue.go @@ -26,6 +26,43 @@ import ( "github.com/apache/incubator-devlake/plugins/jira/models" ) +// FlexibleDescription handles both string and object (ADF) formats for Jira description field +type FlexibleDescription struct { + Value string +} + +// UnmarshalJSON implements custom JSON unmarshaling for FlexibleDescription +func (fd *FlexibleDescription) UnmarshalJSON(data []byte) error { + // handle null values + if string(data) == "null" { + fd.Value = "" + return nil + } + + // try to unmarshal as string first + var str string + if err := json.Unmarshal(data, &str); err == nil { + fd.Value = str + return nil + } + + // if string unmarshaling fails, try to unmarshal as object + var obj map[string]interface{} + if err := json.Unmarshal(data, &obj); err != nil { + fd.Value = string(data) + return nil + } + + // keep the JSON representation + fd.Value = string(data) + return nil +} + +// String returns the string representation of the description +func (fd FlexibleDescription) String() string { + return fd.Value +} + type Issue struct { Expand string `json:"expand"` ID uint64 `json:"id,string"` @@ -121,8 +158,8 @@ type Issue struct { ID string `json:"id"` Name string `json:"name"` } `json:"components"` - Timeoriginalestimate *int64 `json:"timeoriginalestimate"` - Description string `json:"description"` + Timeoriginalestimate *int64 `json:"timeoriginalestimate"` + Description FlexibleDescription `json:"description"` Timetracking *struct { RemainingEstimate string `json:"remainingEstimate"` TimeSpent string `json:"timeSpent"` @@ -233,7 +270,7 @@ func (i Issue) toToolLayer(connectionId uint64) *models.JiraIssue { IssueKey: i.Key, StoryPoint: &workload, Summary: i.Fields.Summary, - Description: i.Fields.Description, + Description: i.Fields.Description.Value, Type: i.Fields.Issuetype.ID, StatusName: i.Fields.Status.Name, StatusKey: i.Fields.Status.StatusCategory.Key, From 01baebbc82488b3166e1ec101a2a524d49d0a190 Mon Sep 17 00:00:00 2001 From: Bamboo <13664854532@163.com> Date: Fri, 29 Aug 2025 11:52:37 +0800 Subject: [PATCH 06/13] refactor(jira): update comment and worklog models to use FlexibleDescription type for comments --- .../plugins/jira/tasks/apiv2models/comment.go | 18 +-- .../plugins/jira/tasks/apiv2models/issue.go | 109 ++++++++++++++++-- .../plugins/jira/tasks/apiv2models/worklog.go | 22 ++-- 3 files changed, 120 insertions(+), 29 deletions(-) diff --git a/backend/plugins/jira/tasks/apiv2models/comment.go b/backend/plugins/jira/tasks/apiv2models/comment.go index b41feb8478d..c2cd8e1edce 100644 --- a/backend/plugins/jira/tasks/apiv2models/comment.go +++ b/backend/plugins/jira/tasks/apiv2models/comment.go @@ -25,14 +25,14 @@ import ( ) type Comment struct { - Self string `json:"self"` - Id string `json:"id"` - Author *Account `json:"author"` - Body string `json:"body"` - UpdateAuthor *Account `json:"updateAuthor"` - Created common.Iso8601Time `json:"created"` - Updated common.Iso8601Time `json:"updated"` - JsdPublic bool `json:"jsdPublic"` + Self string `json:"self"` + Id string `json:"id"` + Author *Account `json:"author"` + Body FlexibleDescription `json:"body"` + UpdateAuthor *Account `json:"updateAuthor"` + Created common.Iso8601Time `json:"created"` + Updated common.Iso8601Time `json:"updated"` + JsdPublic bool `json:"jsdPublic"` } func (c Comment) ToToolLayer(connectionId uint64, issueId uint64, issueUpdated *time.Time) *models.JiraIssueComment { @@ -41,7 +41,7 @@ func (c Comment) ToToolLayer(connectionId uint64, issueId uint64, issueUpdated * IssueId: issueId, ComentId: c.Id, Self: c.Self, - Body: c.Body, + Body: c.Body.Value, Created: c.Updated.ToTime(), Updated: c.Updated.ToTime(), IssueUpdated: issueUpdated, diff --git a/backend/plugins/jira/tasks/apiv2models/issue.go b/backend/plugins/jira/tasks/apiv2models/issue.go index 444ccc2104d..fd0e993b375 100644 --- a/backend/plugins/jira/tasks/apiv2models/issue.go +++ b/backend/plugins/jira/tasks/apiv2models/issue.go @@ -19,6 +19,7 @@ package apiv2models import ( "encoding/json" + "strings" "time" "github.com/apache/incubator-devlake/core/errors" @@ -26,39 +27,129 @@ import ( "github.com/apache/incubator-devlake/plugins/jira/models" ) -// FlexibleDescription handles both string and object (ADF) formats for Jira description field +// FlexibleDescription supports both plain text and ADF (Atlassian Document Format) for Jira description field type FlexibleDescription struct { Value string } +// ADFNode represents a node in Atlassian Document Format +type ADFNode struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` + Content []ADFNode `json:"content,omitempty"` + Attrs map[string]interface{} `json:"attrs,omitempty"` +} + // UnmarshalJSON implements custom JSON unmarshaling for FlexibleDescription func (fd *FlexibleDescription) UnmarshalJSON(data []byte) error { - // handle null values + // Handle null values if string(data) == "null" { fd.Value = "" return nil } - // try to unmarshal as string first + // Try to unmarshal as string first var str string if err := json.Unmarshal(data, &str); err == nil { fd.Value = str return nil } - // if string unmarshaling fails, try to unmarshal as object - var obj map[string]interface{} - if err := json.Unmarshal(data, &obj); err != nil { - fd.Value = string(data) + // Try to unmarshal as ADF document + var adfDoc ADFNode + if err := json.Unmarshal(data, &adfDoc); err == nil { + fd.Value = extractTextFromADF(adfDoc) return nil } - // keep the JSON representation + // Fallback: keep raw JSON as string for debugging fd.Value = string(data) return nil } -// String returns the string representation of the description +// extractTextFromADF recursively extracts plain text from ADF document +func extractTextFromADF(node ADFNode) string { + var result strings.Builder + + switch node.Type { + case "text": + result.WriteString(node.Text) + case "hardBreak": + result.WriteString("\n") + case "paragraph": + for _, child := range node.Content { + result.WriteString(extractTextFromADF(child)) + } + result.WriteString("\n") + case "heading": + for _, child := range node.Content { + result.WriteString(extractTextFromADF(child)) + } + result.WriteString("\n") + case "listItem": + for _, child := range node.Content { + result.WriteString(extractTextFromADF(child)) + } + case "bulletList", "orderedList": + for _, child := range node.Content { + result.WriteString("• ") + result.WriteString(extractTextFromADF(child)) + result.WriteString("\n") + } + case "table": + for _, row := range node.Content { + if row.Type == "tableRow" { + for j, cell := range row.Content { + if j > 0 { + result.WriteString(" | ") + } + result.WriteString(extractTextFromADF(cell)) + } + result.WriteString("\n") + } + } + case "tableCell", "tableHeader": + for _, child := range node.Content { + result.WriteString(extractTextFromADF(child)) + } + case "codeBlock": + result.WriteString("```\n") + for _, child := range node.Content { + result.WriteString(extractTextFromADF(child)) + } + result.WriteString("\n```\n") + case "blockquote": + result.WriteString("> ") + for _, child := range node.Content { + result.WriteString(extractTextFromADF(child)) + } + result.WriteString("\n") + case "doc": + for _, child := range node.Content { + result.WriteString(extractTextFromADF(child)) + } + case "inlineCard", "mention": + // Extract text from attrs or content for links and mentions + if attrs, ok := node.Attrs["text"]; ok { + if text, ok := attrs.(string); ok { + result.WriteString(text) + } + } else { + for _, child := range node.Content { + result.WriteString(extractTextFromADF(child)) + } + } + default: + // For unknown types, extract content recursively + for _, child := range node.Content { + result.WriteString(extractTextFromADF(child)) + } + } + + return result.String() +} + +// String returns the string value func (fd FlexibleDescription) String() string { return fd.Value } diff --git a/backend/plugins/jira/tasks/apiv2models/worklog.go b/backend/plugins/jira/tasks/apiv2models/worklog.go index 83bbe0c7bfb..5fdb237782e 100644 --- a/backend/plugins/jira/tasks/apiv2models/worklog.go +++ b/backend/plugins/jira/tasks/apiv2models/worklog.go @@ -25,17 +25,17 @@ import ( ) type Worklog struct { - Self string `json:"self"` - Author *Account `json:"author"` - UpdateAuthor *Account `json:"updateAuthor"` - Comment string `json:"comment"` - Created string `json:"created"` - Updated common.Iso8601Time `json:"updated"` - Started common.Iso8601Time `json:"started"` - TimeSpent string `json:"timeSpent"` - TimeSpentSeconds int `json:"timeSpentSeconds"` - ID string `json:"id"` - IssueID uint64 `json:"issueId,string"` + Self string `json:"self"` + Author *Account `json:"author"` + UpdateAuthor *Account `json:"updateAuthor"` + Comment FlexibleDescription `json:"comment"` + Created string `json:"created"` + Updated common.Iso8601Time `json:"updated"` + Started common.Iso8601Time `json:"started"` + TimeSpent string `json:"timeSpent"` + TimeSpentSeconds int `json:"timeSpentSeconds"` + ID string `json:"id"` + IssueID uint64 `json:"issueId,string"` } func (w Worklog) ToToolLayer(connectionId uint64, issueUpdated *time.Time) *models.JiraWorklog { From 5c4060f0def0629552334d5a65bfa278ed522adb Mon Sep 17 00:00:00 2001 From: Bamboo <13664854532@163.com> Date: Mon, 1 Sep 2025 12:10:12 +0800 Subject: [PATCH 07/13] docs(jira): add ADF reference for FlexibleDescription type in issue model --- backend/plugins/jira/tasks/apiv2models/issue.go | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/plugins/jira/tasks/apiv2models/issue.go b/backend/plugins/jira/tasks/apiv2models/issue.go index fd0e993b375..ab54efb9594 100644 --- a/backend/plugins/jira/tasks/apiv2models/issue.go +++ b/backend/plugins/jira/tasks/apiv2models/issue.go @@ -28,6 +28,7 @@ import ( ) // FlexibleDescription supports both plain text and ADF (Atlassian Document Format) for Jira description field +// ADF reference: https://developer.atlassian.com/cloud/jira/platform/apis/document/structure/ type FlexibleDescription struct { Value string } From d6d376415beec31f042753aa9dab1fd6d6d18541 Mon Sep 17 00:00:00 2001 From: Bamboo <13664854532@163.com> Date: Fri, 5 Sep 2025 08:44:30 +0800 Subject: [PATCH 08/13] refactor(migrations): enhance file meta migration to check column existence and nullability before modification --- .../20250320_modify_file_meta.go | 77 +++++++++++++++++-- 1 file changed, 71 insertions(+), 6 deletions(-) diff --git a/backend/plugins/q_dev/models/migrationscripts/20250320_modify_file_meta.go b/backend/plugins/q_dev/models/migrationscripts/20250320_modify_file_meta.go index d9d70427366..9744f6d0971 100644 --- a/backend/plugins/q_dev/models/migrationscripts/20250320_modify_file_meta.go +++ b/backend/plugins/q_dev/models/migrationscripts/20250320_modify_file_meta.go @@ -19,6 +19,7 @@ package migrationscripts import ( "github.com/apache/incubator-devlake/core/context" + "github.com/apache/incubator-devlake/core/dal" "github.com/apache/incubator-devlake/core/errors" ) @@ -31,14 +32,78 @@ func (*modifyFileMetaTable) Name() string { func (*modifyFileMetaTable) Up(basicRes context.BasicRes) errors.Error { db := basicRes.GetDal() - // 修改 processed_time 列允许为 NULL - sql := "ALTER TABLE _tool_q_dev_s3_file_meta MODIFY processed_time DATETIME NULL" - err := db.Exec(sql) - if err != nil { - return errors.Default.Wrap(err, "failed to modify processed_time column") + // Target table and column + tableName := "_tool_q_dev_s3_file_meta" + columnName := "processed_time" + + // If column doesn't exist, no migration needed, idempotent + if !db.HasColumn(tableName, columnName) { + return nil } - return nil + // Read column metadata to check if already nullable, return idempotently if already nullable + var processedTimeNullable bool + { + cols, err := db.GetColumns(dal.DefaultTabler{Name: tableName}, func(cm dal.ColumnMeta) bool { + return cm.Name() == columnName + }) + if err != nil { + return errors.Default.Wrap(err, "failed to load column metadata for _tool_q_dev_s3_file_meta.processed_time") + } + if len(cols) == 0 { + // If column is not visible in metadata, treat as no processing needed + return nil + } + if nullable, ok := cols[0].Nullable(); ok { + processedTimeNullable = nullable + } + } + if processedTimeNullable { + return nil + } + + // Execute compatible SQL by dialect + switch db.Dialect() { + case "postgres": + // PostgreSQL makes column nullable via DROP NOT NULL, without changing data type + if err := db.Exec( + "ALTER TABLE ? ALTER COLUMN ? DROP NOT NULL", + dal.ClauseTable{Name: tableName}, + dal.ClauseColumn{Name: columnName}, + ); err != nil { + return errors.Default.Wrap(err, "failed to drop NOT NULL on processed_time for postgres") + } + return nil + case "mysql": + // MySQL requires MODIFY COLUMN with original type specification, preserve original type as much as possible + cols, err := db.GetColumns(dal.DefaultTabler{Name: tableName}, func(cm dal.ColumnMeta) bool { + return cm.Name() == columnName + }) + if err != nil { + return errors.Default.Wrap(err, "failed to load column metadata for mysql type preservation") + } + columnTypeSql := "DATETIME" + if len(cols) > 0 { + if ct, ok := cols[0].ColumnType(); ok && ct != "" { + columnTypeSql = ct + } else if dbt := cols[0].DatabaseTypeName(); dbt != "" { + // DatabaseTypeName may return DATETIME, TIMESTAMP etc + columnTypeSql = dbt + } + } + alterSql := "ALTER TABLE ? MODIFY COLUMN ? " + columnTypeSql + " NULL" + if err := db.Exec( + alterSql, + dal.ClauseTable{Name: tableName}, + dal.ClauseColumn{Name: columnName}, + ); err != nil { + return errors.Default.Wrap(err, "failed to modify processed_time to NULL for mysql") + } + return nil + default: + // Other dialects are not forced to migrate for now, return idempotently + return nil + } } func (*modifyFileMetaTable) Version() uint64 { From 71b27ba9bfef830370ea68cef2aaf6cb1bea35e3 Mon Sep 17 00:00:00 2001 From: Bamboo <13664854532@163.com> Date: Tue, 23 Sep 2025 13:15:36 +0800 Subject: [PATCH 09/13] feat(gitextractor): add support for excluding file extensions in commit stats --- .../plugins/gitextractor/parser/repo_gogit.go | 49 +++++++++- .../gitextractor/parser/repo_libgit2.go | 95 ++++++++++++++++--- .../plugins/gitextractor/parser/taskdata.go | 2 + backend/plugins/gitlab/api/blueprint_v200.go | 25 +++-- ...0250921_add_pr_size_excluded_extensions.go | 50 ++++++++++ .../models/migrationscripts/register.go | 1 + backend/plugins/gitlab/models/scope_config.go | 2 + .../src/plugins/register/gitlab/config.tsx | 1 + .../register/gitlab/transformation.tsx | 30 ++++++ 9 files changed, 226 insertions(+), 29 deletions(-) create mode 100644 backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go diff --git a/backend/plugins/gitextractor/parser/repo_gogit.go b/backend/plugins/gitextractor/parser/repo_gogit.go index baaa4e42b17..21cf09cd9e9 100644 --- a/backend/plugins/gitextractor/parser/repo_gogit.go +++ b/backend/plugins/gitextractor/parser/repo_gogit.go @@ -23,6 +23,7 @@ import ( "encoding/hex" "fmt" "regexp" + "strings" "github.com/apache/incubator-devlake/core/dal" "github.com/apache/incubator-devlake/core/errors" @@ -220,9 +221,6 @@ func (r *GogitRepoCollector) CollectBranches(subtaskCtx plugin.SubTaskContext) e func(r *plumbing.Reference) bool { return r.Name().IsBranch() || r.Name().IsRemote() }, refIter) - if err != nil { - return err - } headRef, err := r.repo.Head() if err != nil { return err @@ -336,7 +334,26 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext) (e if err != nil { return err } else { + excluded := map[string]struct{}{} + for _, ext := range taskOpts.ExcludeFileExtensions { + e := strings.ToLower(strings.TrimSpace(ext)) + if e == "" { + continue + } + excluded[e] = struct{}{} + } for _, stat := range stats { + nameLower := strings.ToLower(stat.Name) + skip := false + for ext := range excluded { + if strings.HasSuffix(nameLower, ext) { + skip = true + break + } + } + if skip { + continue + } codeCommit.Additions += stat.Addition // In some repos, deletion may be zero, which is different from git log --stat. // It seems go-git doesn't get the correct changes. @@ -363,7 +380,7 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext) (e return err } if !*taskOpts.SkipCommitFiles { - if err := r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit); err != nil { + if err := r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit, taskOpts.ExcludeFileExtensions); err != nil { return err } } @@ -423,7 +440,7 @@ func (r *GogitRepoCollector) getCurrentAndParentTree(ctx context.Context, commit return commitTree, firstParentTree, nil } -func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit *object.Commit) (err error) { +func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit *object.Commit, excludeExts []string) (err error) { commitTree, firstParentTree, err := r.getCurrentAndParentTree(subtaskCtx.GetContext(), commit) if err != nil { return err @@ -433,12 +450,34 @@ func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plu if err != nil { return err } + // normalize exclusions + excluded := map[string]struct{}{} + for _, ext := range excludeExts { + e := strings.ToLower(strings.TrimSpace(ext)) + if e == "" { + continue + } + excluded[e] = struct{}{} + } for _, p := range patch.Stats() { commitFile := &code.CommitFile{ CommitSha: commit.Hash.String(), } fileName := p.Name commitFile.FilePath = fileName + if len(excluded) > 0 { + lower := strings.ToLower(fileName) + skip := false + for ext := range excluded { + if strings.HasSuffix(lower, ext) { + skip = true + break + } + } + if skip { + continue + } + } commitFile.Id = genCommitFileId(commitFile.CommitSha, fileName) commitFile.Deletions = p.Deletion commitFile.Additions = p.Addition diff --git a/backend/plugins/gitextractor/parser/repo_libgit2.go b/backend/plugins/gitextractor/parser/repo_libgit2.go index 78451feb017..69d82e253d2 100644 --- a/backend/plugins/gitextractor/parser/repo_libgit2.go +++ b/backend/plugins/gitextractor/parser/repo_libgit2.go @@ -25,6 +25,7 @@ import ( "regexp" "sort" "strconv" + "strings" "github.com/apache/incubator-devlake/core/dal" "github.com/apache/incubator-devlake/core/errors" @@ -317,12 +318,13 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext) if !*taskOpts.SkipCommitStat { var stats *git.DiffStats - if stats, err = r.getDiffComparedToParent(taskOpts, c.Sha, commit, parent, opts, componentMap); err != nil { + var addIncluded, delIncluded int + if stats, addIncluded, delIncluded, err = r.getDiffComparedToParent(taskOpts, c.Sha, commit, parent, opts, componentMap); err != nil { return err } r.logger.Debug("state: %#+v\n", stats.Deletions()) - c.Additions += stats.Insertions() - c.Deletions += stats.Deletions() + c.Additions += addIncluded + c.Deletions += delIncluded } err = r.store.Commits(c) @@ -358,39 +360,83 @@ func (r *Libgit2RepoCollector) storeParentCommits(commitSha string, commit *git. return r.store.CommitParents(commitParents) } -func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts *GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit, opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats, errors.Error) { +func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts *GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit, opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats, int, int, errors.Error) { var err error var parentTree, tree *git.Tree if parent != nil { parentTree, err = parent.Tree() } if err != nil { - return nil, errors.Convert(err) + return nil, 0, 0, errors.Convert(err) } tree, err = commit.Tree() if err != nil { - return nil, errors.Convert(err) + return nil, 0, 0, errors.Convert(err) } var diff *git.Diff diff, err = r.repo.DiffTreeToTree(parentTree, tree, opts) if err != nil { - return nil, errors.Convert(err) + return nil, 0, 0, errors.Convert(err) + } + // build excluded extension set + excluded := map[string]struct{}{} + for _, ext := range taskOpts.ExcludeFileExtensions { + e := strings.ToLower(strings.TrimSpace(ext)) + if e == "" { + continue + } + excluded[e] = struct{}{} } if !*taskOpts.SkipCommitFiles { - err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap) + err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap, excluded) if err != nil { - return nil, errors.Convert(err) + return nil, 0, 0, errors.Convert(err) } } var stats *git.DiffStats stats, err = diff.Stats() if err != nil { - return nil, errors.Convert(err) - } - return stats, nil + return nil, 0, 0, errors.Convert(err) + } + // calculate included totals with exclusions + addIncluded := 0 + delIncluded := 0 + if len(excluded) == 0 { + addIncluded = stats.Insertions() + delIncluded = stats.Deletions() + return stats, addIncluded, delIncluded, nil + } + _ = diff.ForEach(func(file git.DiffDelta, progress float64) (git.DiffForEachHunkCallback, error) { + // choose path to check based on delta status; for deletions use old path + pathForCheck := file.NewFile.Path + if file.Status == git.DeltaDeleted || pathForCheck == "" { + pathForCheck = file.OldFile.Path + } + lower := strings.ToLower(pathForCheck) + for ext := range excluded { + if strings.HasSuffix(lower, ext) { + // skip all lines for excluded files + return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) { + return func(line git.DiffLine) error { return nil }, nil + }, nil + } + } + return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) { + return func(line git.DiffLine) error { + if line.Origin == git.DiffLineAddition { + addIncluded += line.NumLines + } + if line.Origin == git.DiffLineDeletion { + delIncluded += line.NumLines + } + return nil + }, nil + }, nil + }, git.DiffDetailLines) + return stats, addIncluded, delIncluded, nil } -func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff *git.Diff, componentMap map[string]*regexp.Regexp) errors.Error { +func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff *git.Diff, componentMap map[string]*regexp.Regexp, excluded map[string]struct{}) errors.Error { var commitFile *code.CommitFile var commitFileComponent *code.CommitFileComponent var err error @@ -404,15 +450,36 @@ func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff * } } + // skip files by extension if configured + if len(excluded) > 0 { + pathForCheck := file.NewFile.Path + if file.Status == git.DeltaDeleted || pathForCheck == "" { + pathForCheck = file.OldFile.Path + } + lower := strings.ToLower(pathForCheck) + for ext := range excluded { + if strings.HasSuffix(lower, ext) { + // skip this file entirely + return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) { + return func(line git.DiffLine) error { return nil }, nil + }, nil + } + } + } + commitFile = new(code.CommitFile) commitFile.CommitSha = commitSha + // prefer new path; for deletions fall back to old path commitFile.FilePath = file.NewFile.Path + if commitFile.FilePath == "" { + commitFile.FilePath = file.OldFile.Path + } // With some long path,the varchar(255) was not enough both ID and file_path // So we use the hash to compress the path in ID and add length of file_path. // Use commitSha and the sha256 of FilePath to create id shaFilePath := sha256.New() - shaFilePath.Write([]byte(file.NewFile.Path)) + shaFilePath.Write([]byte(commitFile.FilePath)) commitFile.Id = commitSha + ":" + hex.EncodeToString(shaFilePath.Sum(nil)) commitFileComponent = new(code.CommitFileComponent) diff --git a/backend/plugins/gitextractor/parser/taskdata.go b/backend/plugins/gitextractor/parser/taskdata.go index 8dccf5ffe9f..bdfdbd2ae1b 100644 --- a/backend/plugins/gitextractor/parser/taskdata.go +++ b/backend/plugins/gitextractor/parser/taskdata.go @@ -47,4 +47,6 @@ type GitExtractorOptions struct { NoShallowClone bool `json:"noShallowClone" mapstructure:"noShallowClone"` ConnectionId uint64 `json:"connectionId" mapstructure:"connectionId,omitempty"` PluginName string `json:"pluginName" mapstructure:"pluginName,omitempty"` + // Configured by upstream plugin (e.g., GitLab) to exclude file extensions from commit stats + ExcludeFileExtensions []string `json:"excludeFileExtensions" mapstructure:"excludeFileExtensions"` } diff --git a/backend/plugins/gitlab/api/blueprint_v200.go b/backend/plugins/gitlab/api/blueprint_v200.go index b891f72b3ef..dbe14905df1 100644 --- a/backend/plugins/gitlab/api/blueprint_v200.go +++ b/backend/plugins/gitlab/api/blueprint_v200.go @@ -132,17 +132,22 @@ func makePipelinePlanV200( return nil, err } cloneUrl.User = url.UserPassword("git", connection.Token) + gitextOpts := map[string]interface{}{ + "url": cloneUrl.String(), + "name": gitlabProject.Name, + "fullName": gitlabProject.PathWithNamespace, + "repoId": didgen.NewDomainIdGenerator(&models.GitlabProject{}).Generate(connection.ID, gitlabProject.GitlabId), + "proxy": connection.Proxy, + "connectionId": gitlabProject.ConnectionId, + "pluginName": "gitlab", + } + if len(scopeConfig.PrSizeExcludedFileExtensions) > 0 { + // pass excluded file extensions to gitextractor to support PR Size exclusion + gitextOpts["excludeFileExtensions"] = scopeConfig.PrSizeExcludedFileExtensions + } stage = append(stage, &coreModels.PipelineTask{ - Plugin: "gitextractor", - Options: map[string]interface{}{ - "url": cloneUrl.String(), - "name": gitlabProject.Name, - "fullName": gitlabProject.PathWithNamespace, - "repoId": didgen.NewDomainIdGenerator(&models.GitlabProject{}).Generate(connection.ID, gitlabProject.GitlabId), - "proxy": connection.Proxy, - "connectionId": gitlabProject.ConnectionId, - "pluginName": "gitlab", - }, + Plugin: "gitextractor", + Options: gitextOpts, }) } diff --git a/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go b/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go new file mode 100644 index 00000000000..a0971e38a4d --- /dev/null +++ b/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go @@ -0,0 +1,50 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package migrationscripts + +import ( + "github.com/apache/incubator-devlake/core/context" + "github.com/apache/incubator-devlake/core/errors" + "github.com/apache/incubator-devlake/core/plugin" + "github.com/apache/incubator-devlake/helpers/migrationhelper" +) + +var _ plugin.MigrationScript = (*addPrSizeExcludedFileExtensions)(nil) + +type gitlabScopeConfig20250921 struct { + PrSizeExcludedFileExtensions []string `gorm:"type:json" json:"prSizeExcludedFileExtensions" mapstructure:"prSizeExcludedFileExtensions"` +} + +func (gitlabScopeConfig20250921) TableName() string { + return "_tool_gitlab_scope_configs" +} + +type addPrSizeExcludedFileExtensions struct{} + +func (script *addPrSizeExcludedFileExtensions) Up(basicRes context.BasicRes) errors.Error { + return migrationhelper.AutoMigrateTables( + basicRes, + &gitlabScopeConfig20250921{}, + ) +} + +func (*addPrSizeExcludedFileExtensions) Version() uint64 { return 20250921100000 } + +func (*addPrSizeExcludedFileExtensions) Name() string { + return "add pr_size_excluded_file_extensions to _tool_gitlab_scope_configs" +} diff --git a/backend/plugins/gitlab/models/migrationscripts/register.go b/backend/plugins/gitlab/models/migrationscripts/register.go index 1d89b250512..30a76f63ed9 100644 --- a/backend/plugins/gitlab/models/migrationscripts/register.go +++ b/backend/plugins/gitlab/models/migrationscripts/register.go @@ -52,5 +52,6 @@ func All() []plugin.MigrationScript { new(addGitlabAssigneeAndReviewerPrimaryKey), new(changeIssueComponentType), new(addIsChildToPipelines240906), + new(addPrSizeExcludedFileExtensions), } } diff --git a/backend/plugins/gitlab/models/scope_config.go b/backend/plugins/gitlab/models/scope_config.go index 78cfd7f2d1d..525720c3282 100644 --- a/backend/plugins/gitlab/models/scope_config.go +++ b/backend/plugins/gitlab/models/scope_config.go @@ -37,6 +37,8 @@ type GitlabScopeConfig struct { ProductionPattern string `mapstructure:"productionPattern,omitempty" json:"productionPattern" gorm:"type:varchar(255)"` EnvNamePattern string `mapstructure:"envNamePattern,omitempty" json:"envNamePattern" gorm:"type:varchar(255)"` Refdiff datatypes.JSONMap `mapstructure:"refdiff,omitempty" json:"refdiff" swaggertype:"object" format:"json"` + // A list of file extensions to exclude when calculating PR Size (affects commit additions/deletions used by dashboards) + PrSizeExcludedFileExtensions []string `mapstructure:"prSizeExcludedFileExtensions" json:"prSizeExcludedFileExtensions" gorm:"type:json;serializer:json"` } func (t GitlabScopeConfig) TableName() string { diff --git a/config-ui/src/plugins/register/gitlab/config.tsx b/config-ui/src/plugins/register/gitlab/config.tsx index 9caf5260af6..b8924d7f750 100644 --- a/config-ui/src/plugins/register/gitlab/config.tsx +++ b/config-ui/src/plugins/register/gitlab/config.tsx @@ -78,6 +78,7 @@ export const GitLabConfig: IPluginConfig = { envNamePattern: '(?i)prod(.*)', deploymentPattern: '', productionPattern: '', + prSizeExcludedFileExtensions: [], }, }, }; diff --git a/config-ui/src/plugins/register/gitlab/transformation.tsx b/config-ui/src/plugins/register/gitlab/transformation.tsx index 3ad9b97c6f5..2405e661454 100644 --- a/config-ui/src/plugins/register/gitlab/transformation.tsx +++ b/config-ui/src/plugins/register/gitlab/transformation.tsx @@ -178,4 +178,34 @@ const renderCollapseItems = ({ > ), }, + { + key: 'CODEREVIEW', + label: 'Code Review', + style: panelStyle, + children: ( + <> +
+ Use Regular Expression to filter Jenkins jobs by branch name. This helps exclude temporary branch/PR builds from collection.{' '}
+
^(main|master|develop).*
(main branches only), .*production.*
(production jobs)
+