Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions clang/include/clang/Basic/FileEntry.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ class FileEntryRef {
const FileEntry &getFileEntry() const {
return *getBaseMapEntry().second->V.get<FileEntry *>();
}

// This is a non const version of getFileEntry() which is used if the buffer
// size needs to be increased due to potential z/OS EBCDIC -> UTF-8 conversion
FileEntry &getFileEntryToUpdate() {
return *getBaseMapEntry().second->V.get<FileEntry *>();
}

DirectoryEntryRef getDir() const { return ME->second->Dir; }

inline off_t getSize() const;
Expand Down Expand Up @@ -323,6 +330,8 @@ class FileEntry {

StringRef tryGetRealPathName() const { return RealPathName; }
off_t getSize() const { return Size; }
// Size may increase due to potential z/OS EBCDIC -> UTF-8 conversion.
void setSize(off_t NewSize) { Size = NewSize; }
unsigned getUID() const { return UID; }
const llvm::sys::fs::UniqueID &getUniqueID() const { return UniqueID; }
time_t getModificationTime() const { return ModTime; }
Expand Down
32 changes: 31 additions & 1 deletion clang/lib/Basic/SourceManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/AutoConvert.h"
#include "llvm/Support/Capacity.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
Expand Down Expand Up @@ -156,8 +157,11 @@ ContentCache::getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM,
// Unless this is a named pipe (in which case we can handle a mismatch),
// check that the file's size is the same as in the file entry (which may
// have come from a stat cache).
// The buffer will always be larger than the file size on z/OS in the presence
// of characters outside the base character set.
assert(Buffer->getBufferSize() >= (size_t)ContentsEntry->getSize());
if (!ContentsEntry->isNamedPipe() &&
Buffer->getBufferSize() != (size_t)ContentsEntry->getSize()) {
Buffer->getBufferSize() < (size_t)ContentsEntry->getSize()) {
Diag.Report(Loc, diag::err_file_modified) << ContentsEntry->getName();

return std::nullopt;
Expand Down Expand Up @@ -583,6 +587,18 @@ SourceManager::getOrCreateFileID(FileEntryRef SourceFile,
FileCharacter);
}

/// Helper function to determine if an input file requires conversion
bool needConversion(StringRef Filename) {
#ifdef __MVS__
llvm::ErrorOr<bool> NeedConversion =
llvm::needzOSConversion(Filename.str().c_str());
assert(NeedConversion && "Filename was not found");
return *NeedConversion;
#else
return false;
#endif
}

/// createFileID - Create a new FileID for the specified ContentCache and
/// include position. This works regardless of whether the ContentCache
/// corresponds to a file or some other input source.
Expand All @@ -602,6 +618,20 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,
return FileID::get(LoadedID);
}
unsigned FileSize = File.getSize();
bool NeedConversion = needConversion(Filename);
if (NeedConversion) {
// Buffer size may increase due to potential z/OS EBCDIC to UTF-8
// conversion.
if (std::optional<llvm::MemoryBufferRef> Buffer =
File.getBufferOrNone(Diag, getFileManager())) {
unsigned BufSize = Buffer->getBufferSize();
if (BufSize > FileSize) {
if (File.ContentsEntry.has_value())
File.ContentsEntry->getFileEntryToUpdate().setSize(BufSize);
FileSize = BufSize;
}
}
}
if (!(NextLocalOffset + FileSize + 1 > NextLocalOffset &&
NextLocalOffset + FileSize + 1 <= CurrentLoadedOffset)) {
Diag.Report(IncludePos, diag::err_sloc_space_too_large);
Expand Down
11 changes: 9 additions & 2 deletions llvm/include/llvm/Support/AutoConvert.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#ifdef __MVS__
#include <_Ccsid.h>
#ifdef __cplusplus
#include "llvm/Support/ErrorOr.h"
#include <system_error>
#endif /* __cplusplus */

Expand Down Expand Up @@ -54,8 +55,14 @@ std::error_code restorezOSStdHandleAutoConversion(int FD);
/** \brief Set the tag information for a file descriptor. */
std::error_code setzOSFileTag(int FD, int CCSID, bool Text);

} /* namespace llvm */
#endif /* __cplusplus */
// Get the the tag ccsid for a file name or a file descriptor.
ErrorOr<__ccsid_t> getzOSFileTag(const char *FileName, const int FD = -1);

// Query the file tag to determine if it needs conversion to UTF-8 codepage.
ErrorOr<bool> needzOSConversion(const char *FileName, const int FD = -1);

} // namespace llvm
#endif // __cplusplus

#endif /* __MVS__ */

Expand Down
40 changes: 39 additions & 1 deletion llvm/lib/Support/AutoConvert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include <sys/stat.h>
#include <unistd.h>

using namespace llvm;

static int savedStdHandleAutoConversionMode[3] = {-1, -1, -1};

int disablezOSAutoConversion(int FD) {
Expand Down Expand Up @@ -116,4 +118,40 @@ std::error_code llvm::setzOSFileTag(int FD, int CCSID, bool Text) {
return std::error_code();
}

#endif // __MVS__
ErrorOr<__ccsid_t> llvm::getzOSFileTag(const char *FileName, const int FD) {
// If we have a file descriptor, use it to find out file tagging. Otherwise we
// need to use stat() with the file path.
if (FD != -1) {
struct f_cnvrt Query = {
QUERYCVT, // cvtcmd
0, // pccsid
0, // fccsid
};
if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
return std::error_code(errno, std::generic_category());
return Query.fccsid;
}
struct stat Attr;
if (stat(FileName, &Attr) == -1)
return std::error_code(errno, std::generic_category());
return Attr.st_tag.ft_ccsid;
}

ErrorOr<bool> llvm::needzOSConversion(const char *FileName, const int FD) {
ErrorOr<__ccsid_t> Ccsid = getzOSFileTag(FileName, FD);
if (std::error_code EC = Ccsid.getError())
return EC;
// We don't need conversion for UTF-8 tagged files or binary files.
// TODO: Remove the assumption of ISO8859-1 = UTF-8 here when we fully resolve
// problems related to UTF-8 tagged source files.
switch (*Ccsid) {
case CCSID_UTF_8:
case CCSID_ISO8859_1:
case FT_BINARY:
return false;
default:
return true;
}
}

#endif //__MVS__
16 changes: 14 additions & 2 deletions llvm/lib/Support/MemoryBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,11 @@ static bool shouldUseMmap(sys::fs::file_t FD,
bool RequiresNullTerminator,
int PageSize,
bool IsVolatile) {
#if defined(__MVS__)
// zOS Enhanced ASCII auto convert does not support mmap.
return false;
#endif

// mmap may leave the buffer without null terminator if the file size changed
// by the time the last page is mapped in, so avoid it if the file size is
// likely to change.
Expand Down Expand Up @@ -503,9 +508,16 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
}

#ifdef __MVS__
// Set codepage auto-conversion for z/OS.
if (auto EC = llvm::enablezOSAutoConversion(FD))
ErrorOr<bool> NeedConversion = needzOSConversion(Filename.str().c_str(), FD);
if (std::error_code EC = NeedConversion.getError())
return EC;
// File size may increase due to EBCDIC -> UTF-8 conversion, therefore we
// cannot trust the file size and we create the memory buffer by copying
// off the stream.
// Note: This only works with the assumption of reading a full file (i.e,
// Offset == 0 and MapSize == FileSize). Reading a file slice does not work.
if (Offset == 0 && MapSize == FileSize && *NeedConversion)
return getMemoryBufferForStream(FD, Filename);
#endif

auto Buf =
Expand Down
Loading