Skip to content

Commit 30f45bc

Browse files
authored
Added a ground truth path normalization. (microsoft#6213)
This change adds a ground truth path normalization helper: - All slashes are changed to system native (`\` for windows, `/` for all others) - All repeated slashes are removed (except for leading slashes, so windows UNC paths are not broken) - All relative paths (including the main file, and ones that begin with `..`) are prepended with `./` or `.\` if not already The path normalization is applied in the following places: - Main file path in dxcompilerobj - Paths passed into IDxcIncludeHandler - Paths written into all DI* debug info - Paths written into dx.content metadata and Pdb SourceInfo - All paths loaded up by DxcPdbUtils The reason for this change is to make it easier to tools authors to implement recompilation (example: PIX edit-and-continue). When the paths in all the above places match, the files can be matched with a normal string equal instead of having to path normalization.
1 parent dbf60d7 commit 30f45bc

File tree

10 files changed

+437
-95
lines changed

10 files changed

+437
-95
lines changed

include/dxc/Support/Path.h

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
///////////////////////////////////////////////////////////////////////////////
2+
// //
3+
// Path.h //
4+
// Copyright (C) Microsoft Corporation. All rights reserved. //
5+
// This file is distributed under the University of Illinois Open Source //
6+
// License. See LICENSE.TXT for details. //
7+
// //
8+
// Helper for HLSL related file paths. //
9+
// //
10+
///////////////////////////////////////////////////////////////////////////////
11+
#pragma once
12+
13+
#include "llvm/ADT/StringRef.h"
14+
#include <string>
15+
16+
namespace hlsl {
17+
18+
template <typename CharTy>
19+
bool IsAbsoluteOrCurDirRelativeImpl(const CharTy *Path, size_t Len) {
20+
if (Len == 1 && Path[0] == '.')
21+
return true;
22+
// Current dir-relative path.
23+
if (Len >= 2 && Path[0] == '.' && (Path[1] == '/' || Path[1] == '\\')) {
24+
return true;
25+
}
26+
// Disk designator, then absolute path.
27+
if (Len >= 3 && Path[1] && Path[1] == ':' &&
28+
(Path[2] == '\\' || Path[2] == '/')) {
29+
return true;
30+
}
31+
// UNC name
32+
if (Len >= 2 && Path[0] == '\\') {
33+
return Path[1] == '\\';
34+
}
35+
36+
#ifndef _WIN32
37+
// Absolute paths on unix systems start with '/'
38+
if (Len >= 1 && Path[0] == '/') {
39+
return true;
40+
}
41+
#endif
42+
43+
//
44+
// NOTE: there are a number of cases we don't handle, as they don't play well
45+
// with the simple file system abstraction we use:
46+
// - current directory on disk designator (eg, D:file.ext), requires per-disk
47+
// current dir
48+
// - parent paths relative to current directory (eg, ..\\file.ext)
49+
//
50+
// The current-directory support is available to help in-memory handlers.
51+
// On-disk handlers will typically have absolute paths to begin with.
52+
//
53+
return false;
54+
}
55+
56+
inline bool IsAbsoluteOrCurDirRelativeW(const wchar_t *Path) {
57+
if (!Path)
58+
return false;
59+
return IsAbsoluteOrCurDirRelativeImpl<wchar_t>(Path, wcslen(Path));
60+
}
61+
inline bool IsAbsoluteOrCurDirRelative(const char *Path) {
62+
if (!Path)
63+
return false;
64+
return IsAbsoluteOrCurDirRelativeImpl<char>(Path, strlen(Path));
65+
}
66+
67+
// This is the new ground truth of how paths are normalized. There had been
68+
// many inconsistent path normalization littered all over the code base.
69+
// 1. All slashes are changed to system native: `\` for windows and `/` for all
70+
// others.
71+
// 2. All repeated slashes are removed (except for leading slashes, so windows
72+
// UNC paths are not broken)
73+
// 3. All relative paths (including ones that begin with ..) are prepended with
74+
// ./ or .\ if not already
75+
//
76+
// Examples:
77+
// F:\\\my_path////\\/my_shader.hlsl -> F:\my_path\my_shader.hlsl
78+
// my_path/my_shader.hlsl -> .\my_path\my_shader.hlsl
79+
// ..\\.//.\\\my_path/my_shader.hlsl -> .\..\.\.\my_path\my_shader.hlsl
80+
// \\my_network_path/my_shader.hlsl -> \\my_network_path\my_shader.hlsl
81+
//
82+
template <typename CharT, typename StringTy>
83+
StringTy NormalizePathImpl(const CharT *Path, size_t Length) {
84+
StringTy PathCopy(Path, Length);
85+
86+
#ifdef _WIN32
87+
constexpr CharT SlashFrom = '/';
88+
constexpr CharT SlashTo = '\\';
89+
#else
90+
constexpr CharT SlashFrom = '\\';
91+
constexpr CharT SlashTo = '/';
92+
#endif
93+
94+
for (unsigned i = 0; i < PathCopy.size(); i++) {
95+
if (PathCopy[i] == SlashFrom)
96+
PathCopy[i] = SlashTo;
97+
}
98+
99+
// Remove double slashes.
100+
bool SeenNonSlash = false;
101+
for (unsigned i = 0; i < PathCopy.size();) {
102+
// Remove this slash if:
103+
// 1. It is preceded by another slash.
104+
// 2. It is NOT part of a series of leading slashes. (E.G. \\, which on
105+
// windows is a network path).
106+
if (PathCopy[i] == SlashTo && i > 0 && PathCopy[i - 1] == SlashTo &&
107+
SeenNonSlash) {
108+
PathCopy.erase(PathCopy.begin() + i);
109+
continue;
110+
}
111+
SeenNonSlash |= PathCopy[i] != SlashTo;
112+
i++;
113+
}
114+
115+
// If relative path, prefix with dot.
116+
if (IsAbsoluteOrCurDirRelativeImpl<CharT>(PathCopy.c_str(),
117+
PathCopy.size())) {
118+
return PathCopy;
119+
} else {
120+
return StringTy(1, CharT('.')) + StringTy(1, SlashTo) + PathCopy;
121+
}
122+
}
123+
124+
inline std::string NormalizePath(const char *Path) {
125+
return NormalizePathImpl<char, std::string>(Path, ::strlen(Path));
126+
}
127+
inline std::wstring NormalizePathW(const wchar_t *Path) {
128+
return NormalizePathImpl<wchar_t, std::wstring>(Path, ::wcslen(Path));
129+
}
130+
inline std::string NormalizePath(llvm::StringRef Path) {
131+
return NormalizePathImpl<char, std::string>(Path.data(), Path.size());
132+
}
133+
134+
} // namespace hlsl

tools/clang/lib/CodeGen/CGDebugInfo.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "CGObjCRuntime.h"
1818
#include "CodeGenFunction.h"
1919
#include "CodeGenModule.h"
20+
#include "dxc/Support/Path.h" // HLSL Change
2021
#include "clang/AST/ASTContext.h"
2122
#include "clang/AST/DeclFriend.h"
2223
#include "clang/AST/DeclObjC.h"
@@ -237,6 +238,17 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) {
237238
return internString(Name);
238239
}
239240

241+
// HLSL Change - begin
242+
std::string CGDebugInfo::HLSLNormalizeDbgFileName(StringRef Str) {
243+
// For HLSL, we want to keep the main file name exactly as is. Everything
244+
// else should be formatted in a standard way.
245+
if (CGM.getLangOpts().HLSL) {
246+
return hlsl::NormalizePath(Str);
247+
}
248+
return Str;
249+
}
250+
// HLSL Change - end
251+
240252
llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
241253
if (!Loc.isValid())
242254
// If Location is not valid then use main input file.
@@ -260,7 +272,8 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
260272
}
261273

262274
llvm::DIFile *F =
263-
DBuilder.createFile(PLoc.getFilename(), getCurrentDirname());
275+
DBuilder.createFile(HLSLNormalizeDbgFileName(PLoc.getFilename()),
276+
getCurrentDirname()); // HLSL Change
264277

265278
DIFileCache[fname].reset(F);
266279
return F;

tools/clang/lib/CodeGen/CGDebugInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,8 @@ class CGDebugInfo {
260260
bool TryCollectHLSLRecordElements(const RecordType *Ty,
261261
llvm::DICompositeType *DITy,
262262
SmallVectorImpl<llvm::Metadata *> &Elements);
263+
264+
std::string HLSLNormalizeDbgFileName(StringRef Str);
263265
// HLSL Change Ends
264266

265267
public:

tools/clang/lib/CodeGen/ModuleBuilder.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
#include "clang/CodeGen/ModuleBuilder.h"
1515
#include "CGDebugInfo.h"
1616
#include "CodeGenModule.h"
17+
#include "dxc/DXIL/DxilMetadataHelper.h" // HLSL Change - dx source info
18+
#include "dxc/DxcBindingTable/DxcBindingTable.h" // HLSL Change
19+
#include "dxc/Support/Path.h" // HLSL Change
1720
#include "clang/AST/ASTContext.h"
1821
#include "clang/AST/DeclObjC.h"
1922
#include "clang/AST/Expr.h"
@@ -24,10 +27,8 @@
2427
#include "llvm/IR/DataLayout.h"
2528
#include "llvm/IR/LLVMContext.h"
2629
#include "llvm/IR/Module.h"
27-
#include <memory>
28-
#include "dxc/DXIL/DxilMetadataHelper.h" // HLSL Change - dx source info
29-
#include "dxc/DxcBindingTable/DxcBindingTable.h" // HLSL Change
3030
#include "llvm/Support/Path.h"
31+
#include <memory>
3132
using namespace clang;
3233

3334
namespace {
@@ -275,17 +276,19 @@ namespace {
275276
end = Ctx.getSourceManager().fileinfo_end();
276277
it != end; ++it) {
277278
if (it->first->isValid() && !it->second->IsSystemFile) {
279+
std::string path = hlsl::NormalizePath(it->first->getName());
280+
StringRef contentBuffer = it->second->getRawBuffer()->getBuffer();
278281
// If main file, write that to metadata first.
279282
// Add the rest to filesMap to sort by name.
280-
llvm::SmallString<128> NormalizedPath;
281-
llvm::sys::path::native(it->first->getName(), NormalizedPath);
282283
if (CodeGenOpts.MainFileName.compare(it->first->getName()) == 0) {
283284
assert(!bFoundMainFile && "otherwise, more than one file matches main filename");
284-
AddFile(NormalizedPath, it->second->getRawBuffer()->getBuffer());
285+
AddFile(path, contentBuffer);
285286
bFoundMainFile = true;
286287
} else {
287-
filesMap[NormalizedPath.str()] =
288-
it->second->getRawBuffer()->getBuffer();
288+
// We want the include file paths to match the values passed into
289+
// the include handlers exactly. The SourceManager entries should
290+
// match it except the call to MakeAbsoluteOrCurDirRelative.
291+
filesMap[path] = contentBuffer;
289292
}
290293
}
291294
}

tools/clang/tools/dxcompiler/dxcfilesystem.cpp

Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "dxcutil.h"
1818
#include "llvm/Support/raw_ostream.h"
1919

20+
#include "dxc/Support/Path.h"
2021
#include "dxc/Support/Unicode.h"
2122
#include "dxc/Support/dxcfilesystem.h"
2223
#include "clang/Frontend/CompilerInstance.h"
@@ -150,48 +151,12 @@ const DxcArgsHandle OutputHandle(SpecialValue::Output);
150151
/// ERROR_OUT_OF_STRUCTURES will be returned by an attempt to open a file.
151152
static const size_t MaxIncludedFiles = 1000;
152153

153-
bool IsAbsoluteOrCurDirRelativeW(LPCWSTR Path) {
154-
if (!Path || !Path[0])
155-
return FALSE;
156-
// Current dir-relative path.
157-
if (Path[0] == L'.') {
158-
return Path[1] == L'\0' || Path[1] == L'/' || Path[1] == L'\\';
159-
}
160-
// Disk designator, then absolute path.
161-
if (Path[1] == L':' && (Path[2] == L'\\' || Path[2] == L'/')) {
162-
return TRUE;
163-
}
164-
// UNC name
165-
if (Path[0] == L'\\') {
166-
return Path[1] == L'\\';
167-
}
168-
169-
#ifndef _WIN32
170-
// Absolute paths on unix systems start with '/'
171-
if (Path[0] == L'/') {
172-
return TRUE;
173-
}
174-
#endif
175-
176-
//
177-
// NOTE: there are a number of cases we don't handle, as they don't play well
178-
// with the simple file system abstraction we use:
179-
// - current directory on disk designator (eg, D:file.ext), requires per-disk
180-
// current dir
181-
// - parent paths relative to current directory (eg, ..\\file.ext)
182-
//
183-
// The current-directory support is available to help in-memory handlers.
184-
// On-disk handlers will typically have absolute paths to begin with.
185-
//
186-
return FALSE;
187-
}
188-
189154
} // namespace
190155

191156
namespace dxcutil {
192157

193158
void MakeAbsoluteOrCurDirRelativeW(LPCWSTR &Path, std::wstring &PathStorage) {
194-
if (IsAbsoluteOrCurDirRelativeW(Path)) {
159+
if (hlsl::IsAbsoluteOrCurDirRelativeW(Path)) {
195160
return;
196161
} else {
197162
PathStorage = L"./";
@@ -302,7 +267,10 @@ class DxcArgsFileSystemImpl : public DxcArgsFileSystem {
302267
}
303268

304269
CComPtr<::IDxcBlob> fileBlob;
305-
HRESULT hr = m_includeLoader->LoadSource(lpFileName, &fileBlob);
270+
271+
std::wstring NormalizedFileName = hlsl::NormalizePathW(lpFileName);
272+
HRESULT hr =
273+
m_includeLoader->LoadSource(NormalizedFileName.c_str(), &fileBlob);
306274
if (FAILED(hr)) {
307275
return ERROR_UNHANDLED_EXCEPTION;
308276
}

tools/clang/tools/dxcompiler/dxcompilerobj.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "dxc/DxilContainer/DxilContainerAssembler.h"
3636
#include "dxc/DxilRootSignature/DxilRootSignature.h"
3737
#include "dxc/HLSL/HLSLExtensionsCodegenHelper.h"
38+
#include "dxc/Support/Path.h"
3839
#include "dxc/Support/WinIncludes.h"
3940
#include "dxc/Support/dxcfilesystem.h"
4041
#include "dxc/dxcapi.internal.h"
@@ -592,6 +593,9 @@ class DxcCompiler : public IDxcCompiler3,
592593
// Formerly API values.
593594
const char *pUtf8SourceName =
594595
opts.InputFile.empty() ? "hlsl.hlsl" : opts.InputFile.data();
596+
std::string NormalizedSourceName = hlsl::NormalizePath(pUtf8SourceName);
597+
pUtf8SourceName = NormalizedSourceName.c_str();
598+
595599
CA2W pWideSourceName(pUtf8SourceName, CP_UTF8);
596600
const char *pUtf8EntryPoint =
597601
opts.EntryPoint.empty() ? "main" : opts.EntryPoint.data();

tools/clang/tools/dxcompiler/dxcpdbutils.cpp

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "dxc/DXIL/DxilUtil.h"
3030
#include "dxc/DxilContainer/DxilContainer.h"
3131
#include "dxc/Support/HLSLOptions.h"
32+
#include "dxc/Support/Path.h"
3233
#include "dxc/Support/Unicode.h"
3334
#include "dxc/Support/microcom.h"
3435
#include "dxc/dxcapi.h"
@@ -414,12 +415,13 @@ struct DxcPdbUtils : public IDxcPdbUtils2
414415

415416
HRESULT AddSource(StringRef name, StringRef content) {
416417
Source_File source;
417-
IFR(Utf8ToBlobWide(name, &source.Name));
418418
IFR(hlsl::DxcCreateBlob(content.data(), content.size(),
419419
/*bPinned*/ false, /*bCopy*/ true,
420420
/*encodingKnown*/ true, CP_UTF8, m_pMalloc,
421421
&source.Content));
422422

423+
std::string normalizedPath = hlsl::NormalizePath(name);
424+
IFR(Utf8ToBlobWide(name, &source.Name));
423425
// First file is the main file
424426
if (m_SourceFiles.empty()) {
425427
m_MainFileName = source.Name;
@@ -604,16 +606,7 @@ struct DxcPdbUtils : public IDxcPdbUtils2
604606
llvm::MDTuple *tup = cast<llvm::MDTuple>(node.getOperand(i));
605607
MDString *md_name = cast<MDString>(tup->getOperand(0));
606608
MDString *md_content = cast<MDString>(tup->getOperand(1));
607-
608-
// File name
609-
Source_File file;
610-
IFR(Utf8ToBlobWide(md_name->getString(), &file.Name));
611-
IFR(hlsl::DxcCreateBlob(
612-
md_content->getString().data(), md_content->getString().size(),
613-
/*bPinned*/ false, /*bCopy*/ true,
614-
/*encodingKnown*/ true, CP_UTF8, m_pMalloc, &file.Content));
615-
616-
m_SourceFiles.push_back(std::move(file));
609+
AddSource(md_name->getString(), md_content->getString());
617610
}
618611
}
619612
// dx.source.mainFileName
@@ -622,7 +615,13 @@ struct DxcPdbUtils : public IDxcPdbUtils2
622615
hlsl::DxilMDHelper::kDxilSourceMainFileNameOldMDName) {
623616
MDTuple *tup = cast<MDTuple>(node.getOperand(0));
624617
MDString *str = cast<MDString>(tup->getOperand(0));
625-
IFR(Utf8ToBlobWide(str->getString(), &m_MainFileName));
618+
std::string normalized = hlsl::NormalizePath(str->getString());
619+
m_MainFileName =
620+
nullptr; // This may already be set from reading dx.source content.
621+
// If we have a dx.source.mainFileName, we want to use that
622+
// here as the source of truth. Set it to nullptr to avoid
623+
// leak (and assert).
624+
IFR(Utf8ToBlobWide(normalized, &m_MainFileName));
626625
}
627626
// dx.source.args
628627
else if (node_name == hlsl::DxilMDHelper::kDxilSourceArgsMDName ||

0 commit comments

Comments
 (0)