Skip to content

Commit b0f8596

Browse files
author
Kobayashi
authored
Improve NormalizedFilePath (#453)
* improve NormalizedFilePath * add adoption plan * try Text * finalize
1 parent 78dc807 commit b0f8596

File tree

5 files changed

+73
-63
lines changed

5 files changed

+73
-63
lines changed

.github/workflows/haskell.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ jobs:
5454
shell: bash
5555
run: |
5656
if [ ${{ matrix.ospath }} = "true" ]; then
57-
cabal configure --constraint="filepath ^>= 1.4.100.0"
57+
cabal configure --flags="force-ospath"
5858
fi
5959
- name: Build using cabal
6060
run: cabal build all

lsp-types/lsp-types.cabal

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ category: Development
1616
build-type: Simple
1717
extra-source-files: ChangeLog.md, README.md
1818

19+
flag force-ospath
20+
default: False
21+
manual: False
22+
description: Force a version bound on filepath library, to enable 'OsPath'.
23+
1924
library
2025
exposed-modules: Language.LSP.Types
2126
, Language.LSP.Types.Capabilities
@@ -79,7 +84,6 @@ library
7984
, deepseq
8085
, Diff >= 0.2
8186
, dlist
82-
, filepath
8387
, hashable
8488
, lens >= 4.15.2
8589
, mtl < 2.4
@@ -92,7 +96,10 @@ library
9296
, unordered-containers
9397
, exceptions
9498
, safe
95-
, bytestring
99+
if flag(force-ospath)
100+
build-depends: filepath ^>= 1.4.100.0
101+
else
102+
build-depends: filepath
96103
hs-source-dirs: src
97104
default-language: Haskell2010
98105
default-extensions: StrictData

lsp-types/src/Language/LSP/Types/Uri.hs

Lines changed: 41 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -26,27 +26,21 @@ module Language.LSP.Types.Uri
2626
where
2727

2828
import Control.DeepSeq
29-
import qualified Data.Aeson as A
30-
import Data.Binary (Binary, Get, get, put)
31-
import Data.ByteString.Short (ShortByteString)
32-
import qualified Data.ByteString.Short as BS
29+
import qualified Data.Aeson as A
30+
import Data.Binary (Binary, Get, get, put)
3331
import Data.Hashable
34-
import Data.List (stripPrefix)
35-
import Data.String (IsString (fromString))
36-
import Data.Text (Text)
37-
import qualified Data.Text as T
38-
import qualified Data.Text.Encoding as T
39-
import Data.Text.Encoding.Error (UnicodeException)
32+
import Data.List (stripPrefix)
33+
import Data.String (IsString (fromString))
34+
import Data.Text (Text)
35+
import qualified Data.Text as T
4036
import GHC.Generics
41-
import GHC.Stack (HasCallStack)
42-
import Network.URI hiding (authority)
43-
import Safe (tailMay)
44-
import qualified System.FilePath as FP
45-
import qualified System.FilePath.Posix as FPP
46-
import qualified System.FilePath.Windows as FPW
37+
import Network.URI hiding (authority)
38+
import Safe (tailMay)
39+
import qualified System.FilePath as FP
40+
import qualified System.FilePath.Posix as FPP
41+
import qualified System.FilePath.Windows as FPW
4742
import qualified System.Info
4843

49-
5044
newtype Uri = Uri { getUri :: Text }
5145
deriving (Eq,Ord,Read,Show,Generic,A.FromJSON,A.ToJSON,Hashable,A.ToJSONKey,A.FromJSONKey)
5246

@@ -164,32 +158,39 @@ platformAdjustToUriPath systemOS srcPath
164158
FPP.addTrailingPathSeparator (init drv)
165159
| otherwise = drv
166160

167-
-- | A file path that is already normalized. It is stored as an UTF-8 encoded 'ShortByteString'
168-
--
169-
-- The 'NormalizedUri' is cached to avoided
170-
-- repeated normalisation when we need to compute them (which is a lot).
171-
--
172-
-- This is one of the most performance critical parts of ghcide, do not
173-
-- modify it without profiling.
174-
data NormalizedFilePath = NormalizedFilePath !NormalizedUri {-# UNPACK #-} !ShortByteString
161+
{-| A file path that is already normalized.
162+
163+
The 'NormalizedUri' is cached to avoided
164+
repeated normalisation when we need to compute them (which is a lot).
165+
166+
This is one of the most performance critical parts of HLS, do not
167+
modify it without profiling.
168+
169+
== Adoption Plan of OsPath
170+
171+
Currently we store 'Text'. We may change it to OsPath in the future if
172+
the following steps are executed.
173+
174+
1. In the client codebase, use 'osPathToNormalizedFilePath' and 'normalizedFilePathToOsPath' instead of 'fromNormalizedFilePath'
175+
and 'toNormalizedFilePath'. For HLS, we could wait until GHC 9.6 becomes the oldest
176+
GHC we support, then change 'FilePath' to OsPath everywhere in the codebase.
177+
2. Deprecate and remove 'fromNormalizedFilePath' and 'toNormalizedFilePath'.
178+
3. Change 'Text' to OsPath and benchmark it to make sure performance doesn't go down. Don't forget to check Windows,
179+
as OsPath on Windows uses UTF-16, which may consume more memory.
180+
181+
See [#453](https://github.com/haskell/lsp/pull/453) and [#446](https://github.com/haskell/lsp/pull/446)
182+
for more discussions on this topic.
183+
-}
184+
data NormalizedFilePath = NormalizedFilePath !NormalizedUri {-# UNPACK #-} !Text
175185
deriving (Generic, Eq, Ord)
176186

177187
instance NFData NormalizedFilePath
178188

179189
instance Binary NormalizedFilePath where
180190
put (NormalizedFilePath _ fp) = put fp
181191
get = do
182-
v <- Data.Binary.get :: Get ShortByteString
183-
case decodeFilePath v of
184-
Left e -> fail (show e)
185-
Right v' ->
186-
return (NormalizedFilePath (internalNormalizedFilePathToUri v') v)
187-
188-
encodeFilePath :: String -> ShortByteString
189-
encodeFilePath = BS.toShort . T.encodeUtf8 . T.pack
190-
191-
decodeFilePath :: ShortByteString -> Either UnicodeException String
192-
decodeFilePath = fmap T.unpack . T.decodeUtf8' . BS.fromShort
192+
v <- Data.Binary.get :: Get Text
193+
return (NormalizedFilePath (internalNormalizedFilePathToUri (T.unpack v)) v)
193194

194195
-- | Internal helper that takes a file path that is assumed to
195196
-- already be normalized to a URI. It is up to the caller
@@ -213,24 +214,20 @@ instance IsString NormalizedFilePath where
213214
fromString = toNormalizedFilePath
214215

215216
toNormalizedFilePath :: FilePath -> NormalizedFilePath
216-
toNormalizedFilePath fp = NormalizedFilePath nuri . encodeFilePath $ nfp
217+
toNormalizedFilePath fp = NormalizedFilePath nuri . T.pack $ nfp
217218
where
218219
nfp = FP.normalise fp
219220
nuri = internalNormalizedFilePathToUri nfp
220221

221222
-- | Extracts 'FilePath' from 'NormalizedFilePath'.
222-
-- The function is total. The 'HasCallStack' constraint is added for debugging purpose only.
223-
fromNormalizedFilePath :: HasCallStack => NormalizedFilePath -> FilePath
224-
fromNormalizedFilePath (NormalizedFilePath _ fp) =
225-
case decodeFilePath fp of
226-
Left e -> error $ show e
227-
Right x -> x
223+
fromNormalizedFilePath :: NormalizedFilePath -> FilePath
224+
fromNormalizedFilePath (NormalizedFilePath _ fp) = T.unpack fp
228225

229226
normalizedFilePathToUri :: NormalizedFilePath -> NormalizedUri
230227
normalizedFilePathToUri (NormalizedFilePath uri _) = uri
231228

232229
uriToNormalizedFilePath :: NormalizedUri -> Maybe NormalizedFilePath
233-
uriToNormalizedFilePath nuri = fmap (NormalizedFilePath nuri . encodeFilePath) mbFilePath
230+
uriToNormalizedFilePath nuri = fmap (NormalizedFilePath nuri . T.pack) mbFilePath
234231
where mbFilePath = platformAwareUriToFilePath System.Info.os (fromNormalizedUri nuri)
235232

236233
emptyNormalizedUri :: NormalizedUri

lsp-types/src/Language/LSP/Types/Uri/OsPath.hs

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,38 +10,44 @@ module Language.LSP.Types.Uri.OsPath
1010
#ifdef OS_PATH
1111
osPathToNormalizedFilePath
1212
, normalizedFilePathToOsPath
13+
, EncodingException
1314
#endif
1415
) where
1516

1617
#ifdef OS_PATH
1718

18-
import Control.DeepSeq (NFData, force)
1919
import Control.Exception hiding (try)
2020
import Control.Monad.Catch
21+
import GHC.IO.Encoding (getFileSystemEncoding)
2122
import Language.LSP.Types.Uri
23+
import System.IO
2224
import System.IO.Unsafe (unsafePerformIO)
2325
import System.OsPath
26+
import System.OsPath.Encoding (EncodingException)
2427

2528
{-|
26-
Constructs 'NormalizedFilePath' from 'OsPath'. Throws 'IOException' if the conversion fails.
29+
Constructs 'NormalizedFilePath' from 'OsPath'. Throws 'EncodingException' if the conversion fails.
30+
31+
We store a 'Text' in 'NormalizedFilePath', which is UTF-16 or UTF-8 depending on the verion of text library.
32+
'OsPath' may have a different encoding than 'Text', so this function may fail.
33+
But DO NOTE THAT encoding mismatch doesn't always mean an exception will be thrown.
34+
[Possibly your encoding simply won't throw exception on failure](https://hackage.haskell.org/package/base-4.17.0.0/docs/src/GHC.IO.Encoding.html#initFileSystemEncoding).
35+
Possibly the conversion function can't find any invalid byte sequence, giving a sucessful but wrong result.
2736
-}
2837
osPathToNormalizedFilePath :: MonadThrow m => OsPath -> m NormalizedFilePath
29-
osPathToNormalizedFilePath = fmap toNormalizedFilePath . unsafePerformIO' . decodeFS
38+
osPathToNormalizedFilePath = fmap toNormalizedFilePath . liftException . decodeWith systemEnc utf16le
3039

3140
{-|
32-
Extracts 'OsPath' from 'NormalizedFilePath'. Throws 'IOException' if the conversion fails.
41+
Extracts 'OsPath' from 'NormalizedFilePath'. Throws 'EncodingException' if the conversion fails.
3342
-}
3443
normalizedFilePathToOsPath :: MonadThrow m => NormalizedFilePath -> m OsPath
35-
normalizedFilePathToOsPath = unsafePerformIO' . encodeFS . fromNormalizedFilePath
36-
37-
unsafePerformIO' :: (MonadThrow m, NFData a) => IO a -> m a
38-
unsafePerformIO' action =
39-
case fp of
40-
Left (e :: SomeException) -> throwM e
41-
Right fp' -> pure fp'
42-
where
43-
fp = unsafePerformIO . try $ do
44-
x <- action
45-
evaluate . force $ x
44+
normalizedFilePathToOsPath = liftException . encodeWith systemEnc utf16le . fromNormalizedFilePath
45+
46+
liftException :: (MonadThrow m, Exception e) => Either e a -> m a
47+
liftException (Right x) = pure x
48+
liftException (Left err) = throwM err
49+
50+
systemEnc :: TextEncoding
51+
systemEnc = unsafePerformIO getFileSystemEncoding
4652

4753
#endif

lsp-types/test/URIFilePathSpec.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,5 +307,5 @@ normalizedFilePathSpec = beforeAll (setFileSystemEncoding utf8) $ do
307307
case OsPath.encodeWith utf16be utf16be "\184921" of
308308
Left err -> throwIO err
309309
Right osPath -> do
310-
osPathToNormalizedFilePath osPath `shouldThrow` \(_ :: IOException) -> True
310+
osPathToNormalizedFilePath osPath `shouldThrow` \(_ :: EncodingException) -> True
311311
#endif

0 commit comments

Comments
 (0)