Skip to content

Commit 385a65f

Browse files
committed
Implement multipass analysis
Closes #380. This changes how Resyntax works by making it produce a single "analysis" object describing all of the multiple rounds of changes to apply to analyzed code before actually modifying any files. The CLI layer is changed into a pure frontend to the analysis API, with none of the multipass logic implemented in the CLI like it was previously.
1 parent 0772c19 commit 385a65f

File tree

6 files changed

+345
-234
lines changed

6 files changed

+345
-234
lines changed

cli.rkt

Lines changed: 82 additions & 179 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
(require fancy-app
55
json
66
racket/cmdline
7+
racket/file
78
racket/format
89
racket/hash
10+
(except-in racket/list range)
911
racket/logging
1012
racket/match
1113
racket/path
@@ -14,6 +16,7 @@
1416
rebellion/collection/entry
1517
rebellion/collection/hash
1618
rebellion/collection/list
19+
rebellion/collection/multiset
1720
rebellion/collection/range-set
1821
rebellion/collection/vector/builder
1922
rebellion/streaming/reducer
@@ -218,15 +221,15 @@ For help on these, use 'analyze --help' or 'fix --help'."
218221

219222
(define (resyntax-analyze-run)
220223
(define options (resyntax-analyze-parse-command-line))
221-
(define files (file-groups-resolve (resyntax-analyze-options-targets options)))
222-
(printf "resyntax: --- analyzing code ---\n")
224+
(define sources (file-groups-resolve (resyntax-analyze-options-targets options)))
225+
(define analysis
226+
(resyntax-analyze-all sources
227+
#:suite (resyntax-analyze-options-suite options)
228+
#:max-passes 1))
223229
(define results
224-
(transduce files
225-
(append-mapping
226-
(λ (portion)
227-
(resyntax-analyze (file-source (file-portion-path portion))
228-
#:suite (resyntax-analyze-options-suite options)
229-
#:lines (file-portion-lines portion))))
230+
(transduce (resyntax-analysis-all-results analysis)
231+
(append-mapping in-hash-values)
232+
(append-mapping refactoring-result-set-results)
230233
#:into into-list))
231234

232235
(define (display-results)
@@ -244,7 +247,7 @@ For help on these, use 'analyze --help' or 'fix --help'."
244247
(string-indent (~a old-code) #:amount 2)
245248
(string-indent (~a new-code) #:amount 2)))]
246249
[(== github-pull-request-review)
247-
(define req (refactoring-results->github-review results #:file-count (length files)))
250+
(define req (refactoring-results->github-review results #:file-count (length sources)))
248251
(write-json (github-review-request-jsexpr req))]))
249252

250253
(match (resyntax-analyze-options-output-destination options)
@@ -259,191 +262,91 @@ For help on these, use 'analyze --help' or 'fix --help'."
259262
(define (resyntax-fix-run)
260263
(define options (resyntax-fix-parse-command-line))
261264
(define output-format (resyntax-fix-options-output-format options))
262-
(match output-format
263-
[(== git-commit-message)
264-
(display "This is an automated change generated by Resyntax.\n\n")]
265-
[_ (void)])
266-
(define files
267-
(transduce (file-groups-resolve (resyntax-fix-options-targets options))
268-
(indexing file-portion-path)
269-
(grouping into-list)
270-
#:into into-hash))
265+
(define sources (file-groups-resolve (resyntax-fix-options-targets options)))
271266
(define max-modified-files (resyntax-fix-options-max-modified-files options))
272267
(define max-modified-lines (resyntax-fix-options-max-modified-lines options))
273-
(define results-by-path
274-
(for/fold ([all-results (hash)]
275-
[files files]
276-
[max-fixes (resyntax-fix-options-max-fixes options)]
277-
[lines-to-analyze-by-file (hash)]
278-
#:result all-results)
279-
([pass-number (in-inclusive-range 1 (resyntax-fix-options-max-pass-count options))]
280-
#:do [(define pass-results
281-
(resyntax-fix-run-one-pass options files
282-
#:lines lines-to-analyze-by-file
283-
#:max-fixes max-fixes
284-
#:max-modified-files max-modified-files
285-
#:max-modified-lines max-modified-lines
286-
#:pass-number pass-number))
287-
(define pass-fix-count
288-
(for/sum ([(_ results) (in-hash pass-results)])
289-
(length results)))
290-
(define pass-modified-file-count (hash-count pass-results))
291-
(define new-max-fixes (- max-fixes pass-fix-count))]
292-
#:break (hash-empty? pass-results)
293-
#:final (zero? new-max-fixes))
294-
(define new-files (hash-filter-keys files (hash-has-key? pass-results _)))
295-
(define new-lines-to-analyze
296-
(for/hash ([(path results) (in-hash pass-results)])
297-
(values path
298-
(transduce results
299-
(mapping refactoring-result-modified-line-range)
300-
(filtering nonempty-range?)
301-
#:into (into-range-set natural<=>)))))
302-
(values (hash-union all-results pass-results #:combine append)
303-
new-files
304-
new-max-fixes
305-
new-lines-to-analyze)))
268+
(define analysis
269+
(resyntax-analyze-all sources
270+
#:suite (resyntax-fix-options-suite options)
271+
#:max-fixes (resyntax-fix-options-max-fixes options)
272+
#:max-passes (resyntax-fix-options-max-pass-count options)
273+
#:max-modified-sources max-modified-files
274+
#:max-modified-lines max-modified-lines))
275+
(resyntax-analysis-write-file-changes! analysis)
306276
(match output-format
307-
[(== plain-text) (printf "resyntax: --- summary ---\n")]
308-
[(== git-commit-message) (printf "## Summary\n\n")])
309-
(define total-fixes
310-
(for/sum ([(_ results) (in-hash results-by-path)])
311-
(length results)))
312-
(define total-files (hash-count results-by-path))
277+
[(== git-commit-message)
278+
(resyntax-fix-print-git-commit-message analysis)]
279+
[(== plain-text)
280+
(resyntax-fix-print-plain-text-summary analysis)]))
281+
282+
283+
(define (resyntax-fix-print-git-commit-message analysis)
284+
(display "This is an automated change generated by Resyntax.\n\n")
285+
(for ([pass-results (resyntax-analysis-all-results analysis)]
286+
[pass-number (in-naturals 1)])
287+
(unless (hash-empty? pass-results)
288+
(printf "#### Pass ~a\n\n" pass-number))
289+
(for ([(source result-set) (in-hash pass-results)])
290+
(define result-count (length (refactoring-result-set-results result-set)))
291+
(define fix-string (if (> result-count 1) "fixes" "fix"))
292+
;; For a commit message, we always use a relative path since we're likely running inside
293+
;; some CI runner. Additionally, we make the path a link to the corresponding file at HEAD,
294+
;; since making file paths clickable is pleasant.
295+
(define relative-path (find-relative-path (current-directory) (source-path source)))
296+
(define repo-head-path (format "../blob/HEAD/~a" relative-path))
297+
(printf "Applied ~a ~a to [`~a`](~a)\n\n"
298+
result-count fix-string relative-path repo-head-path)
299+
(for ([result (in-list (refactoring-result-set-results result-set))])
300+
(define line (refactoring-result-original-line result))
301+
(define rule (refactoring-result-rule-name result))
302+
(define message (refactoring-result-message result))
303+
(printf " * Line ~a, `~a`: ~a\n" line rule message))
304+
(newline)))
305+
(printf "## Summary\n\n")
306+
(define total-fixes (resyntax-analysis-total-fixes analysis))
307+
(define total-files (resyntax-analysis-total-sources-modified analysis))
313308
(define fix-counts-by-rule
314-
(transduce (hash-values results-by-path)
315-
(append-mapping values)
316-
(indexing refactoring-result-rule-name)
317-
(grouping into-count)
309+
(transduce (in-hash-entries (multiset-frequencies (resyntax-analysis-rules-applied analysis)))
318310
(sorting #:key entry-value #:descending? #true)
319311
#:into into-list))
320312
(define issue-string (if (> total-fixes 1) "issues" "issue"))
321313
(define file-string (if (> total-files 1) "files" "file"))
322-
(define summary-message
323-
(if (zero? total-fixes)
324-
"No issues found.\n"
325-
(format "Fixed ~a ~a in ~a ~a.\n\n" total-fixes issue-string total-files file-string)))
326-
(match output-format
327-
[(== plain-text) (printf "\n ~a" summary-message)]
328-
[(== git-commit-message) (printf summary-message)])
314+
(if (zero? total-fixes)
315+
(printf "No issues found.\n")
316+
(printf "Fixed ~a ~a in ~a ~a.\n\n" total-fixes issue-string total-files file-string))
329317
(for ([rule+count (in-list fix-counts-by-rule)])
330318
(match-define (entry rule count) rule+count)
331319
(define occurrence-string (if (> count 1) "occurrences" "occurrence"))
332-
(define rule-string
333-
(match output-format
334-
[(== plain-text) rule]
335-
[(== git-commit-message) (format "`~a`" rule)]))
336-
(printf " * Fixed ~a ~a of ~a\n" count occurrence-string rule-string))
320+
(printf " * Fixed ~a ~a of `~a`\n" count occurrence-string rule))
337321
(unless (zero? total-fixes)
338322
(newline)))
339323

340324

341-
(define (resyntax-fix-run-one-pass options files
342-
#:lines lines-to-analyze-by-file
343-
#:max-fixes max-fixes
344-
#:max-modified-files max-modified-files
345-
#:max-modified-lines max-modified-lines
346-
#:pass-number pass-number)
347-
(define output-format (resyntax-fix-options-output-format options))
348-
(match output-format
349-
[(== plain-text)
350-
(unless (equal? pass-number 1)
351-
(printf "resyntax: --- pass ~a ---\n" pass-number))
352-
(printf "resyntax: --- analyzing code ---\n")]
353-
[_ (void)])
354-
(define all-results
355-
(transduce (in-hash-entries files) ; entries with file path keys and lists of file-portion? values
356-
357-
;; The following steps perform a kind of layered shuffle: the files to refactor are
358-
;; shuffled such that files in the same directory remain together. When combined with
359-
;; the #:max-modified-files argument, this makes Resyntax prefer to refactor closely
360-
;; related files instead of selecting arbitrary unrelated files from across an entire
361-
;; codebase. This limits potential for merge conflicts and makes changes easier to
362-
;; review, since it's more likely the refactored files will have shared context.
363-
364-
; key by directory
365-
(indexing (λ (e) (simple-form-path (build-path (entry-key e) 'up))))
366-
367-
; group by key and shuffle within each group
368-
(grouping (into-transduced (shuffling) #:into into-list))
369-
370-
; shuffle groups
371-
(shuffling)
372-
373-
; ungroup and throw away directory
374-
(append-mapping entry-value)
375-
376-
;; Now the stream contains exactly what it did before the above steps, but shuffled in
377-
;; a convenient manner.
378-
379-
(append-mapping entry-value) ; throw away the file path, we don't need it anymore
380-
(mapping (filter-file-portion _ lines-to-analyze-by-file))
381-
(append-mapping
382-
(λ (portion)
383-
(resyntax-analyze (file-source (file-portion-path portion))
384-
#:suite (resyntax-fix-options-suite options)
385-
#:lines (file-portion-lines portion))))
386-
(limiting max-modified-lines
387-
#:by (λ (result)
388-
(define replacement (refactoring-result-line-replacement result))
389-
(add1 (- (line-replacement-original-end-line replacement)
390-
(line-replacement-start-line replacement)))))
391-
(if (equal? max-fixes +inf.0) (transducer-pipe) (taking max-fixes))
392-
(if (equal? max-modified-files +inf.0)
393-
(transducer-pipe)
394-
(transducer-pipe
395-
(indexing
396-
(λ (result)
397-
(syntax-replacement-source (refactoring-result-syntax-replacement result))))
398-
(grouping into-list)
399-
(taking max-modified-files)
400-
(append-mapping entry-value)))
401-
#:into into-list))
402-
(define results-by-path
403-
(transduce
404-
all-results
405-
(indexing
406-
(λ (result)
407-
(file-source-path
408-
(syntax-replacement-source (refactoring-result-syntax-replacement result)))))
409-
(grouping (into-transduced (sorting #:key refactoring-result-original-line) #:into into-list))
410-
#:into into-hash))
411-
(match output-format
412-
[(== plain-text) (printf "resyntax: --- fixing code ---\n")]
413-
[(== git-commit-message)
414-
(unless (hash-empty? results-by-path)
415-
(printf "#### Pass ~a\n\n" pass-number))])
416-
(for ([(path results) (in-hash results-by-path)])
417-
(define result-count (length results))
418-
(define fix-string (if (> result-count 1) "fixes" "fix"))
419-
(match output-format
420-
[(== plain-text)
421-
(printf "resyntax: applying ~a ~a to ~a\n\n" result-count fix-string path)]
422-
[(== git-commit-message)
423-
;; For a commit message, we always use a relative path since we're likely running inside
424-
;; some CI runner. Additionally, we make the path a link to the corresponding file at HEAD,
425-
;; since making file paths clickable is pleasant.
426-
(define relative-path (find-relative-path (current-directory) path))
427-
(define repo-head-path (format "../blob/HEAD/~a" relative-path))
428-
(printf "Applied ~a ~a to [`~a`](~a)\n\n"
429-
result-count fix-string relative-path repo-head-path)])
430-
(for ([result (in-list results)])
431-
(define line (refactoring-result-original-line result))
432-
(define rule (refactoring-result-rule-name result))
433-
(define message (refactoring-result-message result))
434-
(match output-format
435-
[(== plain-text) (printf " * [line ~a] ~a: ~a\n" line rule message)]
436-
[(== git-commit-message) (printf " * Line ~a, `~a`: ~a\n" line rule message)]))
437-
(refactor! results)
438-
(newline))
439-
results-by-path)
440-
441-
442-
(define (filter-file-portion portion lines-by-path)
443-
(define path (file-portion-path portion))
444-
(define lines (file-portion-lines portion))
445-
(define ranges-to-remove (range-set-complement (hash-ref lines-by-path path all-lines)))
446-
(file-portion path (range-set-remove-all lines ranges-to-remove)))
325+
(define (resyntax-fix-print-plain-text-summary analysis)
326+
(printf "resyntax: --- summary ---\n\n")
327+
(define total-fixes (resyntax-analysis-total-fixes analysis))
328+
(define total-files (resyntax-analysis-total-sources-modified analysis))
329+
(define message
330+
(cond
331+
[(zero? total-fixes) "No issues found."]
332+
[(equal? total-fixes 1) "Fixed 1 issue in 1 file."]
333+
[(equal? total-files 1) (format "Fixed ~a issues in 1 file." total-fixes)]
334+
[else (format "Fixed ~a issues in ~a files." total-fixes total-files)]))
335+
(printf " ~a\n\n" message)
336+
(define rules-applied (resyntax-analysis-rules-applied analysis))
337+
(transduce (in-hash-entries (multiset-frequencies rules-applied))
338+
(sorting #:key entry-value #:descending? #true)
339+
(mapping
340+
(λ (e)
341+
(match-define (entry rule-name rule-fixes) e)
342+
(define message
343+
(if (equal? rule-fixes 1)
344+
(format "Fixed 1 occurrence of ~a" rule-name)
345+
(format "Fixed ~a occurrences of ~a" rule-fixes rule-name)))
346+
(format " * ~a\n" message)))
347+
#:into (into-for-each display))
348+
(when (positive? total-fixes)
349+
(newline)))
447350

448351

449352
(module+ main

0 commit comments

Comments
 (0)