Skip to content

Commit b3b3fa1

Browse files
committed
dev: Standardize ps1 files to tabs
1 parent d1f76ae commit b3b3fa1

File tree

6 files changed

+493
-493
lines changed

6 files changed

+493
-493
lines changed
Lines changed: 171 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -1,171 +1,171 @@
1-
<#
2-
.SYNOPSIS
3-
Classify text files by encoding under the current subtree, respecting .gitignore.
4-
5-
.DESCRIPTION
6-
Enumerates tracked files and untracked-but-not-ignored files (via Git) beneath
7-
PWD. Skips likely-binary files (NUL probe). Classifies remaining files as:
8-
- 'utf8' : valid UTF-8 (no BOM) or empty file
9-
- 'utf8-with-bom' : starts with UTF-8 BOM (EF BB BF)
10-
- 'other' : text but not valid UTF-8 (e.g., UTF-16/ANSI)
11-
12-
Outputs:
13-
1) Relative paths of files classified as 'other'
14-
2) A table by extension: UTF8 / UTF8-with-BOM / Other / Total
15-
16-
Notes:
17-
- Read-only: this script makes no changes.
18-
- Requires Git and must be run inside a Git work tree.
19-
#>
20-
21-
[CmdletBinding()]
22-
param()
23-
24-
Set-StrictMode -Version Latest
25-
$ErrorActionPreference = 'Stop'
26-
27-
# --- Git enumeration ---------------------------------------------------------
28-
function Assert-InGitWorkTree {
29-
# Throws if not inside a Git work tree.
30-
$inside = (& git rev-parse --is-inside-work-tree 2>$null).Trim()
31-
if ($LASTEXITCODE -ne 0 -or $inside -ne 'true') {
32-
throw 'Not in a Git work tree.'
33-
}
34-
}
35-
36-
function Get-GitFilesUnderPwd {
37-
<#
38-
Returns full paths to tracked + untracked-not-ignored files under PWD.
39-
#>
40-
Assert-InGitWorkTree
41-
42-
$repoRoot = (& git rev-parse --show-toplevel).Trim()
43-
$pwdPath = (Get-Location).Path
44-
45-
# cached (tracked) + others (untracked not ignored)
46-
$nulSeparated = & git -C $repoRoot ls-files -z --cached --others --exclude-standard
47-
48-
$relativePaths = $nulSeparated.Split(
49-
[char]0, [System.StringSplitOptions]::RemoveEmptyEntries)
50-
51-
foreach ($relPath in $relativePaths) {
52-
$fullPath = Join-Path $repoRoot $relPath
53-
54-
# Only include files under the current subtree.
55-
if ($fullPath.StartsWith($pwdPath,
56-
[System.StringComparison]::OrdinalIgnoreCase)) {
57-
if (Test-Path -LiteralPath $fullPath -PathType Leaf) { $fullPath }
58-
}
59-
}
60-
}
61-
62-
# --- Probes ------------------------------------------------------------------
63-
function Test-ProbablyBinary {
64-
# Heuristic: treat as binary if the first 8 KiB contains any NUL byte.
65-
param([Parameter(Mandatory)][string]$Path)
66-
67-
try {
68-
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
69-
try {
70-
$len = [int][Math]::Min(8192,$stream.Length)
71-
if ($len -le 0) { return $false }
72-
73-
$buffer = [byte[]]::new($len)
74-
[void]$stream.Read($buffer,0,$len)
75-
return ($buffer -contains 0)
76-
}
77-
finally { $stream.Dispose() }
78-
}
79-
catch { return $false }
80-
}
81-
82-
function Get-TextEncodingCategory {
83-
# Returns 'utf8', 'utf8-with-bom', 'other', or $null for likely-binary.
84-
param([Parameter(Mandatory)][string]$Path)
85-
86-
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
87-
try {
88-
$fileLength = $stream.Length
89-
if ($fileLength -eq 0) { return 'utf8' }
90-
91-
# BOM check (EF BB BF)
92-
$header = [byte[]]::new([Math]::Min(3,$fileLength))
93-
[void]$stream.Read($header,0,$header.Length)
94-
if ($header.Length -ge 3 -and
95-
$header[0] -eq 0xEF -and $header[1] -eq 0xBB -and $header[2] -eq 0xBF) {
96-
return 'utf8-with-bom'
97-
}
98-
99-
# Quick binary probe before expensive decoding
100-
$stream.Position = 0
101-
$sampleLen = [int][Math]::Min(8192,$fileLength)
102-
$sample = [byte[]]::new($sampleLen)
103-
[void]$stream.Read($sample,0,$sampleLen)
104-
if ($sample -contains 0) { return $null }
105-
}
106-
finally { $stream.Dispose() }
107-
108-
# Validate UTF-8 by decoding with throw-on-invalid option (no BOM).
109-
try {
110-
$bytes = [System.IO.File]::ReadAllBytes($Path)
111-
$utf8 = [System.Text.UTF8Encoding]::new($false,$true)
112-
[void]$utf8.GetString($bytes)
113-
return 'utf8'
114-
}
115-
catch { return 'other' }
116-
}
117-
118-
# --- Main --------------------------------------------------------------------
119-
$otherFiles = @()
120-
$byExtension = @{}
121-
122-
$allFiles = Get-GitFilesUnderPwd
123-
124-
foreach ($fullPath in $allFiles) {
125-
# Avoid decoding likely-binary files.
126-
if (Test-ProbablyBinary $fullPath) { continue }
127-
128-
$category = Get-TextEncodingCategory $fullPath
129-
if (-not $category) { continue }
130-
131-
$ext = [IO.Path]::GetExtension($fullPath).ToLower()
132-
if (-not $byExtension.ContainsKey($ext)) {
133-
$byExtension[$ext] = @{ 'utf8' = 0; 'utf8-with-bom' = 0; 'other' = 0 }
134-
}
135-
136-
$byExtension[$ext][$category]++
137-
138-
if ($category -eq 'other') {
139-
$otherFiles += (Resolve-Path -LiteralPath $fullPath -Relative)
140-
}
141-
}
142-
143-
# 1) Files in 'other'
144-
if ($otherFiles.Count -gt 0) {
145-
'Files classified as ''other'':'
146-
$otherFiles | Sort-Object | ForEach-Object { " $_" }
147-
''
148-
}
149-
150-
# 2) Table by extension
151-
$rows = foreach ($kv in $byExtension.GetEnumerator()) {
152-
$ext = if ($kv.Key) { $kv.Key } else { '[noext]' }
153-
$u = [int]$kv.Value['utf8']
154-
$b = [int]$kv.Value['utf8-with-bom']
155-
$o = [int]$kv.Value['other']
156-
157-
[PSCustomObject]@{
158-
Extension = $ext
159-
UTF8 = $u
160-
'UTF8-with-BOM' = $b
161-
Other = $o
162-
Total = $u + $b + $o
163-
}
164-
}
165-
166-
$rows |
167-
Sort-Object -Property (
168-
@{Expression='Total';Descending=$true},
169-
@{Expression='Extension';Descending=$false}
170-
) |
171-
Format-Table -AutoSize
1+
<#
2+
.SYNOPSIS
3+
Classify text files by encoding under the current subtree, respecting .gitignore.
4+
5+
.DESCRIPTION
6+
Enumerates tracked files and untracked-but-not-ignored files (via Git) beneath
7+
PWD. Skips likely-binary files (NUL probe). Classifies remaining files as:
8+
- 'utf8' : valid UTF-8 (no BOM) or empty file
9+
- 'utf8-with-bom' : starts with UTF-8 BOM (EF BB BF)
10+
- 'other' : text but not valid UTF-8 (e.g., UTF-16/ANSI)
11+
12+
Outputs:
13+
1) Relative paths of files classified as 'other'
14+
2) A table by extension: UTF8 / UTF8-with-BOM / Other / Total
15+
16+
Notes:
17+
- Read-only: this script makes no changes.
18+
- Requires Git and must be run inside a Git work tree.
19+
#>
20+
21+
[CmdletBinding()]
22+
param()
23+
24+
Set-StrictMode -Version Latest
25+
$ErrorActionPreference = 'Stop'
26+
27+
# --- Git enumeration ---------------------------------------------------------
28+
function Assert-InGitWorkTree {
29+
# Throws if not inside a Git work tree.
30+
$inside = (& git rev-parse --is-inside-work-tree 2>$null).Trim()
31+
if ($LASTEXITCODE -ne 0 -or $inside -ne 'true') {
32+
throw 'Not in a Git work tree.'
33+
}
34+
}
35+
36+
function Get-GitFilesUnderPwd {
37+
<#
38+
Returns full paths to tracked + untracked-not-ignored files under PWD.
39+
#>
40+
Assert-InGitWorkTree
41+
42+
$repoRoot = (& git rev-parse --show-toplevel).Trim()
43+
$pwdPath = (Get-Location).Path
44+
45+
# cached (tracked) + others (untracked not ignored)
46+
$nulSeparated = & git -C $repoRoot ls-files -z --cached --others --exclude-standard
47+
48+
$relativePaths = $nulSeparated.Split(
49+
[char]0, [System.StringSplitOptions]::RemoveEmptyEntries)
50+
51+
foreach ($relPath in $relativePaths) {
52+
$fullPath = Join-Path $repoRoot $relPath
53+
54+
# Only include files under the current subtree.
55+
if ($fullPath.StartsWith($pwdPath,
56+
[System.StringComparison]::OrdinalIgnoreCase)) {
57+
if (Test-Path -LiteralPath $fullPath -PathType Leaf) { $fullPath }
58+
}
59+
}
60+
}
61+
62+
# --- Probes ------------------------------------------------------------------
63+
function Test-ProbablyBinary {
64+
# Heuristic: treat as binary if the first 8 KiB contains any NUL byte.
65+
param([Parameter(Mandatory)][string]$Path)
66+
67+
try {
68+
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
69+
try {
70+
$len = [int][Math]::Min(8192,$stream.Length)
71+
if ($len -le 0) { return $false }
72+
73+
$buffer = [byte[]]::new($len)
74+
[void]$stream.Read($buffer,0,$len)
75+
return ($buffer -contains 0)
76+
}
77+
finally { $stream.Dispose() }
78+
}
79+
catch { return $false }
80+
}
81+
82+
function Get-TextEncodingCategory {
83+
# Returns 'utf8', 'utf8-with-bom', 'other', or $null for likely-binary.
84+
param([Parameter(Mandatory)][string]$Path)
85+
86+
$stream = [System.IO.File]::Open($Path,'Open','Read','ReadWrite')
87+
try {
88+
$fileLength = $stream.Length
89+
if ($fileLength -eq 0) { return 'utf8' }
90+
91+
# BOM check (EF BB BF)
92+
$header = [byte[]]::new([Math]::Min(3,$fileLength))
93+
[void]$stream.Read($header,0,$header.Length)
94+
if ($header.Length -ge 3 -and
95+
$header[0] -eq 0xEF -and $header[1] -eq 0xBB -and $header[2] -eq 0xBF) {
96+
return 'utf8-with-bom'
97+
}
98+
99+
# Quick binary probe before expensive decoding
100+
$stream.Position = 0
101+
$sampleLen = [int][Math]::Min(8192,$fileLength)
102+
$sample = [byte[]]::new($sampleLen)
103+
[void]$stream.Read($sample,0,$sampleLen)
104+
if ($sample -contains 0) { return $null }
105+
}
106+
finally { $stream.Dispose() }
107+
108+
# Validate UTF-8 by decoding with throw-on-invalid option (no BOM).
109+
try {
110+
$bytes = [System.IO.File]::ReadAllBytes($Path)
111+
$utf8 = [System.Text.UTF8Encoding]::new($false,$true)
112+
[void]$utf8.GetString($bytes)
113+
return 'utf8'
114+
}
115+
catch { return 'other' }
116+
}
117+
118+
# --- Main --------------------------------------------------------------------
119+
$otherFiles = @()
120+
$byExtension = @{}
121+
122+
$allFiles = Get-GitFilesUnderPwd
123+
124+
foreach ($fullPath in $allFiles) {
125+
# Avoid decoding likely-binary files.
126+
if (Test-ProbablyBinary $fullPath) { continue }
127+
128+
$category = Get-TextEncodingCategory $fullPath
129+
if (-not $category) { continue }
130+
131+
$ext = [IO.Path]::GetExtension($fullPath).ToLower()
132+
if (-not $byExtension.ContainsKey($ext)) {
133+
$byExtension[$ext] = @{ 'utf8' = 0; 'utf8-with-bom' = 0; 'other' = 0 }
134+
}
135+
136+
$byExtension[$ext][$category]++
137+
138+
if ($category -eq 'other') {
139+
$otherFiles += (Resolve-Path -LiteralPath $fullPath -Relative)
140+
}
141+
}
142+
143+
# 1) Files in 'other'
144+
if ($otherFiles.Count -gt 0) {
145+
'Files classified as ''other'':'
146+
$otherFiles | Sort-Object | ForEach-Object { " $_" }
147+
''
148+
}
149+
150+
# 2) Table by extension
151+
$rows = foreach ($kv in $byExtension.GetEnumerator()) {
152+
$ext = if ($kv.Key) { $kv.Key } else { '[noext]' }
153+
$u = [int]$kv.Value['utf8']
154+
$b = [int]$kv.Value['utf8-with-bom']
155+
$o = [int]$kv.Value['other']
156+
157+
[PSCustomObject]@{
158+
Extension = $ext
159+
UTF8 = $u
160+
'UTF8-with-BOM' = $b
161+
Other = $o
162+
Total = $u + $b + $o
163+
}
164+
}
165+
166+
$rows |
167+
Sort-Object -Property (
168+
@{Expression='Total';Descending=$true},
169+
@{Expression='Extension';Descending=$false}
170+
) |
171+
Format-Table -AutoSize

0 commit comments

Comments
 (0)