|
| 1 | +package main |
| 2 | + |
| 3 | +import ( |
| 4 | + "dnacollector" |
| 5 | + "encoding/json" |
| 6 | + "errors" |
| 7 | + "fmt" |
| 8 | + "github.com/caarlos0/env" |
| 9 | + log "github.com/sirupsen/logrus" |
| 10 | + git2 "gopkg.in/src-d/go-git.v4" |
| 11 | + "gopkg.in/src-d/go-git.v4/plumbing/format/diff" |
| 12 | + git "gopkg.in/src-d/go-git.v4/plumbing/object" |
| 13 | + "io" |
| 14 | +) |
| 15 | + |
| 16 | +type config struct { |
| 17 | + GithubToken string `env:"GITHUB_TOKEN"` |
| 18 | + GitlabToken string `env:"GITLAB_TOKEN"` |
| 19 | +} |
| 20 | + |
| 21 | +var ( |
| 22 | + // ErrGroupNotFound is the error returned when group can not be found |
| 23 | + ErrFileSimplifiedCreation = errors.New("we could not instantiate GitFileSimplified from GitFile") |
| 24 | +) |
| 25 | + |
| 26 | +type GitFileSimplified struct { |
| 27 | + Name string `json:"name"` |
| 28 | + Sha string `json:"sha"` |
| 29 | + IsBinary bool `json:"is_binary"` |
| 30 | + Size int64 `json:"size"` |
| 31 | +} |
| 32 | + |
| 33 | +type CommitSimplified struct { |
| 34 | + Message string `json:"message"` |
| 35 | + Sha string `json:"sha"` |
| 36 | + Author git.Signature `json:"author"` |
| 37 | + Committer git.Signature `json:"committer"` |
| 38 | + Files []*GitFileSimplified `json:"files"` |
| 39 | +} |
| 40 | + |
| 41 | +func NewFromGitFile(file *git.File) *GitFileSimplified { |
| 42 | + isBinary, _ := file.IsBinary() |
| 43 | + return &GitFileSimplified{Name: file.Name, Sha: file.Hash.String(), IsBinary: isBinary, Size: file.Size} |
| 44 | +} |
| 45 | + |
| 46 | +func NewFromCommit(commit *git.Commit, files []*GitFileSimplified) *CommitSimplified { |
| 47 | + return &CommitSimplified{Message: commit.Message, Sha: commit.Hash.String(), Author: commit.Author, Committer: commit.Committer, Files: files} |
| 48 | +} |
| 49 | + |
| 50 | +func NewFromFilePatch(filePatch diff.FilePatch) (*GitFileSimplified, error) { |
| 51 | + isBinary := filePatch.IsBinary() |
| 52 | + from, to := filePatch.Files() |
| 53 | + // If the patch creates a new file, "from" will be nil. |
| 54 | + // If the patch deletes a file, "to" will be nil. |
| 55 | + |
| 56 | + // Rare usecase |
| 57 | + if to == nil && from == nil { |
| 58 | + return nil, ErrFileSimplifiedCreation |
| 59 | + } else if to != nil { |
| 60 | + // File creation |
| 61 | + return &GitFileSimplified{Name: to.Path(), Sha: to.Hash().String(), IsBinary: isBinary, Size: 0}, nil |
| 62 | + } else { |
| 63 | + // File deletion |
| 64 | + return &GitFileSimplified{Name: from.Path(), Sha: from.Hash().String(), IsBinary: isBinary, Size: 0}, nil |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | +func NewAnalyzer() *Analyzer { |
| 69 | + return &Analyzer{make([]*CommitSimplified, 0)} |
| 70 | +} |
| 71 | + |
| 72 | +type Analyzer struct { |
| 73 | + CommitsList []*CommitSimplified |
| 74 | +} |
| 75 | + |
| 76 | +func (a *Analyzer) GetFilesFromCommit(commit *git.Commit) ([]*GitFileSimplified, error) { |
| 77 | + var files []*GitFileSimplified |
| 78 | + |
| 79 | + parent, err := commit.Parent(0) |
| 80 | + |
| 81 | + // There is no parent, so we take all the files |
| 82 | + if err != nil { |
| 83 | + filesIter, err := commit.Files() |
| 84 | + if err != nil { |
| 85 | + return nil, err |
| 86 | + } |
| 87 | + |
| 88 | + filesIter.ForEach(func(file *git.File) error { |
| 89 | + fileSimplified := NewFromGitFile(file) |
| 90 | + log.Debugf("Appending file %s", fileSimplified.Name) |
| 91 | + if fileSimplified.Size > 0 { |
| 92 | + /* fileSimplifiedJson, _ := json.Marshal(fileSimplified)*/ |
| 93 | + log.Info(fileSimplified) |
| 94 | + } |
| 95 | + |
| 96 | + files = append(files, fileSimplified) |
| 97 | + return nil |
| 98 | + }) |
| 99 | + // There is a parent, so we consider only the diff |
| 100 | + } else { |
| 101 | + patch, _ := commit.Patch(parent) |
| 102 | + filePatches := patch.FilePatches() |
| 103 | + //log.Info(patch.Stats()) |
| 104 | + for _, fp := range filePatches { |
| 105 | + fileSimplified, err := NewFromFilePatch(fp) |
| 106 | + //for _, chunk := range fp.Chunks() { |
| 107 | + // log.Debug(chunk) |
| 108 | + //} |
| 109 | + log.Debugf("Appending file %s", fileSimplified.Name) |
| 110 | + if err != nil { |
| 111 | + log.Warn(fileSimplified) |
| 112 | + files = append(files, fileSimplified) |
| 113 | + } else { |
| 114 | + continue |
| 115 | + //log.Error(ErrFileSimplifiedCreation) |
| 116 | + //log.Warn(commit) |
| 117 | + //log.Warn(fp) |
| 118 | + |
| 119 | + } |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + return files, nil |
| 124 | +} |
| 125 | + |
| 126 | +// AnalyzeCommit extracts author and committer from a commit |
| 127 | +func (a *Analyzer) AnalyzeCommit(commit *git.Commit) string { |
| 128 | + // Store commmit sha |
| 129 | + files, _ := a.GetFilesFromCommit(commit) |
| 130 | + a.CommitsList = append(a.CommitsList, NewFromCommit(commit, files)) |
| 131 | + return commit.Hash.String() |
| 132 | +} |
| 133 | + |
| 134 | +func (a *Analyzer) GetStats() map[string]int { |
| 135 | + res := make(map[string]int) |
| 136 | + res["nb_commits"] = len(a.CommitsList) |
| 137 | + nb_files_shas := 0 |
| 138 | + for _, commit := range a.CommitsList { |
| 139 | + nb_files_shas += len(commit.Files) |
| 140 | + } |
| 141 | + res["nb_files_shas"] = nb_files_shas |
| 142 | + return res |
| 143 | +} |
| 144 | + |
| 145 | +//func (a *Analyzer) GetCommitShasArr() []string { |
| 146 | +// var res []string |
| 147 | +// for k := range a.SetCommitsSha { |
| 148 | +// res = append(res, k) |
| 149 | +// } |
| 150 | +// return res |
| 151 | +//} |
| 152 | + |
| 153 | +func main() { |
| 154 | + conf := config{} |
| 155 | + |
| 156 | + // Config log |
| 157 | + log.SetFormatter(&log.TextFormatter{ |
| 158 | + DisableColors: true, |
| 159 | + FullTimestamp: true, |
| 160 | + }) |
| 161 | + log.SetReportCaller(true) |
| 162 | + log.SetLevel(log.InfoLevel) |
| 163 | + |
| 164 | + if err := env.Parse(&conf); err != nil { |
| 165 | + log.Fatalf("Could not parse env: %v\n", err) |
| 166 | + } |
| 167 | + log.Debug(conf) |
| 168 | + //var cloner dnacollector.Cloner = &dnacollector.MemoryCloner{} |
| 169 | + //auth := &http.BasicAuth{ |
| 170 | + // Username: "ericfourrier", |
| 171 | + // Password: conf.GithubToken, |
| 172 | + //} |
| 173 | + |
| 174 | + repository, err := git2.PlainOpen("/Users/ericfourrier/Documents/GGCode/dna-collector/testdata/react-vis") |
| 175 | + if err != nil { |
| 176 | + fmt.Print(err) |
| 177 | + } |
| 178 | + repository.Config() |
| 179 | + //log.Infof("Cloned repo %v (size: %v)\n", repository.n, repository.GetStorageSize()) |
| 180 | + extractor, err := dnacollector.NewExtractor(repository) |
| 181 | + analyzer := NewAnalyzer() |
| 182 | + for { |
| 183 | + commit, err := extractor.ExtractNextCommit() |
| 184 | + if err != nil && err != io.EOF { |
| 185 | + log.Panic(err) |
| 186 | + } |
| 187 | + if commit == nil { |
| 188 | + break |
| 189 | + } |
| 190 | + |
| 191 | + analyzer.AnalyzeCommit(commit) |
| 192 | + } |
| 193 | + res2, _ := json.Marshal(analyzer.CommitsList) |
| 194 | + log.Debug(string(res2)) |
| 195 | + //fmt.Print(analyzer.SetCommitsSha) |
| 196 | + //for _, files := range analyzer.CommitTable { |
| 197 | + // for _, file := range files { |
| 198 | + // log.Info(file.Sha) |
| 199 | + // } |
| 200 | + //} |
| 201 | + log.Info(analyzer.GetStats()) |
| 202 | + log.Infof("Done extracting %v\n", repository) |
| 203 | + |
| 204 | +} |
0 commit comments