diff --git a/cla-backend-go/cmd/monthly_signature_report/monthly_signature_report.go b/cla-backend-go/cmd/monthly_signature_report/monthly_signature_report.go new file mode 100644 index 000000000..98799e41e --- /dev/null +++ b/cla-backend-go/cmd/monthly_signature_report/monthly_signature_report.go @@ -0,0 +1,348 @@ +// Copyright The Linux Foundation. +// SPDX-License-Identifier: MIT + +package main + +import ( + "encoding/csv" + "fmt" + "log" + "os" + "sort" + "strconv" + "strings" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/dynamodb" + "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbattribute" +) + +const ( + regionDefault = "us-east-1" + profileName = "lfproduct-prod" + tableName = "cla-prod-signatures" +) + +// SignatureRecord represents the DynamoDB signature record structure +type SignatureRecord struct { + SignatureID string `dynamodbav:"signature_id"` + DateCreated string `dynamodbav:"date_created"` + ApproxDateCreated string `dynamodbav:"approx_date_created"` + SignatureType string `dynamodbav:"signature_type"` + SigtypeSignedApprovedID string `dynamodbav:"sigtype_signed_approved_id"` + SignatureApproved bool `dynamodbav:"signature_approved"` + SignatureSigned bool `dynamodbav:"signature_signed"` +} + +// MonthlyStats holds the count of signatures per month +type MonthlyStats struct { + Month string + ICLA int + ECLA int + CCLA int +} + +// go run cmd/monthly_signature_report/monthly_signature_report.go +func main() { + // Set up AWS session + sess, err := session.NewSessionWithOptions(session.Options{ + Profile: profileName, + Config: aws.Config{ + Region: aws.String(regionDefault), + }, + }) + if err != nil { + log.Fatalf("Error creating AWS session: %v", err) + } + + svc := dynamodb.New(sess) + + fmt.Println("Scanning signatures table for ICLA, ECLA, and CCLA statistics...") + + // Monthly counters map[YYYY-MM]Stats + monthlyStats := make(map[string]*MonthlyStats) + + // Scan parameters + // Full attributes scan + // params := &dynamodb.ScanInput{TableName: aws.String(tableName)} + // Scan only needed parameters + params := &dynamodb.ScanInput{ + TableName: aws.String(tableName), + // Only fetch the fields we actually need + ProjectionExpression: aws.String( + "#sid, #dc, #adc, #st, #ssa, #sa, #ss", + ), + ExpressionAttributeNames: map[string]*string{ + "#sid": aws.String("signature_id"), + "#dc": aws.String("date_created"), + "#adc": aws.String("approx_date_created"), + "#st": aws.String("signature_type"), + "#ssa": aws.String("sigtype_signed_approved_id"), + "#sa": aws.String("signature_approved"), + "#ss": aws.String("signature_signed"), + }, + } + + // Get current time for validation + now := time.Now() + currentMonth := now.Format("2006-01") + + totalProcessed := 0 + totalICLA := 0 + totalECLA := 0 + totalCCLA := 0 + skippedInvalidDates := 0 + skippedFutureDates := 0 + + // Scan the table + err = svc.ScanPages(params, func(page *dynamodb.ScanOutput, lastPage bool) bool { + for _, item := range page.Items { + var sig SignatureRecord + e := dynamodbattribute.UnmarshalMap(item, &sig) + if e != nil { + log.Printf("Error unmarshalling record: %v", e) + continue + } + + totalProcessed++ + if totalProcessed%1000 == 0 { + fmt.Printf("Processed %d records...\n", totalProcessed) + } + + // Only process signatures that are signed and approved + if !sig.SignatureSigned || !sig.SignatureApproved { + continue + } + + // Get the creation date (prefer date_created, fallback to approx_date_created) + creationDate := sig.DateCreated + if creationDate == "" { + creationDate = sig.ApproxDateCreated + } + if creationDate == "" { + continue + } + + // Parse creation date to extract month + month := extractMonth(creationDate) + if month == "" { + skippedInvalidDates++ + continue + } + + // Check if month is in the future + // Month and currentMonth are formatted as YYYY-MM, so string comparison is safe + if month > currentMonth { + skippedFutureDates++ + continue + } + + // Determine signature type based on multiple factors + var isICLA, isECLA, isCCLA bool + + // Primary method: check sigtype_signed_approved_id + if sig.SigtypeSignedApprovedID != "" { + if strings.HasPrefix(sig.SigtypeSignedApprovedID, "icla#") { + isICLA = true + totalICLA++ + } else if strings.HasPrefix(sig.SigtypeSignedApprovedID, "ecla#") { + isECLA = true + totalECLA++ + } else if strings.HasPrefix(sig.SigtypeSignedApprovedID, "ccla#") { + isCCLA = true + totalCCLA++ + } else { + // Skip unknown types + continue + } + } else if sig.SignatureType != "" { + // Fallback method: check signature_type field + switch sig.SignatureType { + case "cla", "icla": + // For legacy CLA records without sigtype_signed_approved_id, treat as ICLA + isICLA = true + totalICLA++ + case "ccla": + isCCLA = true + totalCCLA++ + case "ecla": + isECLA = true + totalECLA++ + default: + continue + } + } else { + // Skip records without type information + continue + } + + // Initialize month stats if not exists + if monthlyStats[month] == nil { + monthlyStats[month] = &MonthlyStats{Month: month} + } + + // Increment appropriate counter + if isICLA { + monthlyStats[month].ICLA++ + } else if isECLA { + monthlyStats[month].ECLA++ + } else if isCCLA { + monthlyStats[month].CCLA++ + } + } + return true // Continue scanning + }) + + if err != nil { + log.Fatalf("Error scanning table: %v", err) + } + + fmt.Printf("\nProcessing complete!\n") + fmt.Printf("Total records processed: %d\n", totalProcessed) + fmt.Printf("Total ICLA signatures: %d\n", totalICLA) + fmt.Printf("Total ECLA signatures: %d\n", totalECLA) + fmt.Printf("Total CCLA signatures: %d\n", totalCCLA) + fmt.Printf("Skipped invalid dates: %d\n", skippedInvalidDates) + fmt.Printf("Skipped future dates: %d\n", skippedFutureDates) + + // Convert map to slice and sort by month + var monthlyData []MonthlyStats + for _, stats := range monthlyStats { + monthlyData = append(monthlyData, *stats) + } + + sort.Slice(monthlyData, func(i, j int) bool { + return monthlyData[i].Month < monthlyData[j].Month + }) + + // Create CSV output + outputFile := "signature_monthly_report.csv" + file, err := os.Create(outputFile) + if err != nil { + log.Fatalf("Error creating output file: %v", err) + } + defer file.Close() + + writer := csv.NewWriter(file) + + // Set semicolon as separator + writer.Comma = ';' + + // Write header + if err := writer.Write([]string{"month", "ICLAs", "ECLAs", "CCLAs"}); err != nil { + log.Fatalf("Error writing CSV header: %v", err) + } + + // Write data + for _, stats := range monthlyData { + record := []string{ + stats.Month, + strconv.Itoa(stats.ICLA), + strconv.Itoa(stats.ECLA), + strconv.Itoa(stats.CCLA), + } + if err := writer.Write(record); err != nil { + log.Fatalf("Error writing CSV record for month %s: %v", stats.Month, err) + } + } + writer.Flush() + if err := writer.Error(); err != nil { + log.Fatalf("Error flushing CSV writer: %v", err) + } + + fmt.Printf("Report generated: %s\n", outputFile) + fmt.Printf("Total months with activity: %d\n", len(monthlyData)) +} + +// extractMonth extracts YYYY-MM from date_created field with proper validation +func extractMonth(dateStr string) string { + if dateStr == "" { + return "" + } + + // Handle different date formats + // 2021-08-09T15:21:56.492368+0000 + // 2024-07-30T12:11:34Z + + var t time.Time + var err error + + // Try parsing different formats + formats := []string{ + "2006-01-02T15:04:05.999999+0000", + "2006-01-02T15:04:05Z", + "2006-01-02T15:04:05.999999Z", + "2006-01-02T15:04:05+0000", + "2006-01-02T15:04:05.999999-0700", + "2006-01-02T15:04:05-0700", + time.RFC3339, + time.RFC3339Nano, + "2006-01-02 15:04:05", + "2006-01-02", + } + + for _, format := range formats { + t, err = time.Parse(format, dateStr) + if err == nil { + break + } + } + + thisYear := time.Now().Year() + if err != nil { + // Try to extract just the date part + parts := strings.Split(dateStr, "T") + if len(parts) > 0 { + datePart := parts[0] + if len(datePart) >= 7 { // YYYY-MM format at minimum + // Try different date part lengths + for _, length := range []int{10, 7} { // YYYY-MM-DD or YYYY-MM + if len(datePart) >= length { + testDateStr := datePart[:length] + var testFormat string + if length == 10 { + testFormat = "2006-01-02" + } else { + testFormat = "2006-01" + } + + if testTime, testErr := time.Parse(testFormat, testDateStr); testErr == nil { + // Validate year and month ranges + year := testTime.Year() + month := int(testTime.Month()) + + if year >= 2000 && year <= thisYear && + month >= 1 && month <= 12 { + return testTime.Format("2006-01") + } + } + } + } + } + } + return "" + } + + // Validate the parsed time + year := t.Year() + month := int(t.Month()) + + // Check for reasonable year and month ranges + if year < 2000 || year > thisYear || month < 1 || month > 12 { + return "" + } + + result := t.Format("2006-01") + + // Additional validation: don't return invalid months like 2025-26 + if testTime, testErr := time.Parse("2006-01", result); testErr == nil { + // Ensure the month is valid + if testTime.Month() >= 1 && testTime.Month() <= 12 { + return result + } + } + + return "" +} diff --git a/cla-backend-go/signature_monthly_report.csv b/cla-backend-go/signature_monthly_report.csv new file mode 100644 index 000000000..49fa40f2c --- /dev/null +++ b/cla-backend-go/signature_monthly_report.csv @@ -0,0 +1,83 @@ +month;ICLAs;ECLAs;CCLAs +2019-02;1;0;1 +2019-03;0;0;2 +2019-04;3;2;12 +2019-05;2;3;4 +2019-06;10;3;21 +2019-07;13;6;19 +2019-08;6215;5341;503 +2019-09;48;60;10 +2019-10;74;131;12 +2019-11;71;156;11 +2019-12;40;78;19 +2020-01;57;82;13 +2020-02;53;109;11 +2020-03;81;112;6 +2020-04;83;110;9 +2020-05;57;111;10 +2020-06;117;198;10 +2020-07;99;118;8 +2020-08;142;70;20 +2020-09;106;75;26 +2020-10;133;73;8 +2020-11;111;68;14 +2020-12;99;58;16 +2021-01;111;87;29 +2021-02;116;83;14 +2021-03;174;114;24 +2021-04;124;56;19 +2021-05;18408;82;18 +2021-06;284;1722;243 +2021-07;519;173;29 +2021-08;524;979;66 +2021-09;236;90;17 +2021-10;238;81;21 +2021-11;414;313;53 +2021-12;1324;113;25 +2022-01;597;148;30 +2022-02;478;147;32 +2022-03;402;137;34 +2022-04;364;119;25 +2022-05;397;115;22 +2022-06;407;142;42 +2022-07;415;140;20 +2022-08;610;146;23 +2022-09;476;202;35 +2022-10;590;273;38 +2022-11;478;183;38 +2022-12;421;95;26 +2023-01;456;128;27 +2023-02;473;251;31 +2023-03;564;1276;59 +2023-04;511;621;51 +2023-05;480;676;28 +2023-06;428;2659;45 +2023-07;427;1182;39 +2023-08;489;686;41 +2023-09;407;446;34 +2023-10;454;2427;75 +2023-11;462;619;39 +2023-12;379;81;19 +2024-01;468;107;26 +2024-02;440;258;36 +2024-03;545;599;30 +2024-04;460;157;25 +2024-05;500;516;23 +2024-06;392;1906;37 +2024-07;465;907;22 +2024-08;486;187;36 +2024-09;460;217;43 +2024-10;548;156;27 +2024-11;469;207;29 +2024-12;407;108;23 +2025-01;412;157;26 +2025-02;417;193;27 +2025-03;509;253;27 +2025-04;475;184;31 +2025-05;517;258;29 +2025-06;477;204;17 +2025-07;499;202;36 +2025-08;477;232;25 +2025-09;473;190;22 +2025-10;492;174;18 +2025-11;470;144;12