Skip to content

Commit ea6ceee

Browse files
authored
Support get file(notebook) md5 (#1363)
1 parent 8ed8b33 commit ea6ceee

File tree

8 files changed

+150
-38
lines changed

8 files changed

+150
-38
lines changed

docs/source/developers/contents.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ Models may contain the following entries:
6363
| |``None`` |if any. (:ref:`See |
6464
| | |Below<modelcontent>`) |
6565
+--------------------+-----------+------------------------------+
66+
|**md5** |unicode or |The md5 of the contents. |
67+
| |``None`` | |
68+
| | | |
69+
+--------------------+-----------+------------------------------+
6670

6771
.. _modelcontent:
6872

@@ -76,6 +80,8 @@ model. There are three model types: **notebook**, **file**, and **directory**.
7680
:class:`nbformat.notebooknode.NotebookNode` representing the .ipynb file
7781
represented by the model. See the `NBFormat`_ documentation for a full
7882
description.
83+
- The ``md5`` field a hexdigest string of the md5 value of the notebook
84+
file.
7985

8086
- ``file`` models
8187
- The ``format`` field is either ``"text"`` or ``"base64"``.
@@ -85,12 +91,14 @@ model. There are three model types: **notebook**, **file**, and **directory**.
8591
file models, ``content`` simply contains the file's bytes after decoding
8692
as UTF-8. Non-text (``base64``) files are read as bytes, base64 encoded,
8793
and then decoded as UTF-8.
94+
- The ``md5`` field a hexdigest string of the md5 value of the file.
8895

8996
- ``directory`` models
9097
- The ``format`` field is always ``"json"``.
9198
- The ``mimetype`` field is always ``None``.
9299
- The ``content`` field contains a list of :ref:`content-free<contentfree>`
93100
models representing the entities in the directory.
101+
- The ``md5`` field is always ``None``.
94102

95103
.. note::
96104

@@ -129,6 +137,7 @@ model. There are three model types: **notebook**, **file**, and **directory**.
129137
"path": "foo/a.ipynb",
130138
"type": "notebook",
131139
"writable": True,
140+
"md5": "7e47382b370c05a1b14706a2a8aff91a",
132141
}
133142
134143
# Notebook Model without Content

jupyter_server/services/contents/fileio.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# Copyright (c) Jupyter Development Team.
55
# Distributed under the terms of the Modified BSD License.
66
import errno
7+
import hashlib
78
import os
89
import shutil
910
from base64 import decodebytes, encodebytes
@@ -268,7 +269,9 @@ def _read_notebook(self, os_path, as_version=4, capture_validation_error=None):
268269
with self.open(os_path, "r", encoding="utf-8") as f:
269270
try:
270271
return nbformat.read(
271-
f, as_version=as_version, capture_validation_error=capture_validation_error
272+
f,
273+
as_version=as_version,
274+
capture_validation_error=capture_validation_error,
272275
)
273276
except Exception as e:
274277
e_orig = e
@@ -309,13 +312,17 @@ def _read_file(self, os_path, format):
309312
format:
310313
If 'text', the contents will be decoded as UTF-8.
311314
If 'base64', the raw bytes contents will be encoded as base64.
315+
If 'byte', the raw bytes contents will be returned.
312316
If not specified, try to decode as UTF-8, and fall back to base64
313317
"""
314318
if not os.path.isfile(os_path):
315319
raise HTTPError(400, "Cannot read non-file %s" % os_path)
316320

317321
with self.open(os_path, "rb") as f:
318322
bcontent = f.read()
323+
if format == "byte":
324+
# Not for http response but internal use
325+
return bcontent, "byte"
319326

320327
if format is None or format == "text":
321328
# Try to interpret as unicode if format is unknown or if unicode
@@ -350,6 +357,12 @@ def _save_file(self, os_path, content, format):
350357
with self.atomic_writing(os_path, text=False) as f:
351358
f.write(bcontent)
352359

360+
def _get_md5(self, os_path):
361+
c, _ = self._read_file(os_path, "byte")
362+
md5 = hashlib.md5() # noqa: S324
363+
md5.update(c)
364+
return md5.hexdigest()
365+
353366

354367
class AsyncFileManagerMixin(FileManagerMixin):
355368
"""
@@ -417,13 +430,17 @@ async def _read_file(self, os_path, format):
417430
format:
418431
If 'text', the contents will be decoded as UTF-8.
419432
If 'base64', the raw bytes contents will be encoded as base64.
433+
If 'byte', the raw bytes contents will be returned.
420434
If not specified, try to decode as UTF-8, and fall back to base64
421435
"""
422436
if not os.path.isfile(os_path):
423437
raise HTTPError(400, "Cannot read non-file %s" % os_path)
424438

425439
with self.open(os_path, "rb") as f:
426440
bcontent = await run_sync(f.read)
441+
if format == "byte":
442+
# Not for http response but internal use
443+
return bcontent, "byte"
427444

428445
if format is None or format == "text":
429446
# Try to interpret as unicode if format is unknown or if unicode
@@ -457,3 +474,9 @@ async def _save_file(self, os_path, content, format):
457474

458475
with self.atomic_writing(os_path, text=False) as f:
459476
await run_sync(f.write, bcontent)
477+
478+
async def _get_md5(self, os_path):
479+
c, _ = await self._read_file(os_path, "byte")
480+
md5 = hashlib.md5() # noqa: S324
481+
await run_sync(md5.update, c)
482+
return md5.hexdigest()

jupyter_server/services/contents/filemanager.py

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ def _base_model(self, path):
268268
model["mimetype"] = None
269269
model["size"] = size
270270
model["writable"] = self.is_writable(path)
271+
model["md5"] = None
271272

272273
return model
273274

@@ -335,7 +336,7 @@ def _dir_model(self, path, content=True):
335336

336337
return model
337338

338-
def _file_model(self, path, content=True, format=None):
339+
def _file_model(self, path, content=True, format=None, md5=False):
339340
"""Build a model for a file
340341
341342
if content is requested, include the file contents.
@@ -364,10 +365,13 @@ def _file_model(self, path, content=True, format=None):
364365
content=content,
365366
format=format,
366367
)
368+
if md5:
369+
md5 = self._get_md5(os_path)
370+
model.update(md5=md5)
367371

368372
return model
369373

370-
def _notebook_model(self, path, content=True):
374+
def _notebook_model(self, path, content=True, md5=False):
371375
"""Build a notebook model
372376
373377
if content is requested, the notebook content will be populated
@@ -386,10 +390,12 @@ def _notebook_model(self, path, content=True):
386390
model["content"] = nb
387391
model["format"] = "json"
388392
self.validate_notebook_model(model, validation_error)
393+
if md5:
394+
model["md5"] = self._get_md5(os_path)
389395

390396
return model
391397

392-
def get(self, path, content=True, type=None, format=None):
398+
def get(self, path, content=True, type=None, format=None, md5=None):
393399
"""Takes a path for an entity and returns its model
394400
395401
Parameters
@@ -404,6 +410,8 @@ def get(self, path, content=True, type=None, format=None):
404410
format : str, optional
405411
The requested format for file contents. 'text' or 'base64'.
406412
Ignored if this returns a notebook or directory model.
413+
md5: bool, optional
414+
Whether to include the md5 of the file contents.
407415
408416
Returns
409417
-------
@@ -431,11 +439,11 @@ def get(self, path, content=True, type=None, format=None):
431439
)
432440
model = self._dir_model(path, content=content)
433441
elif type == "notebook" or (type is None and path.endswith(".ipynb")):
434-
model = self._notebook_model(path, content=content)
442+
model = self._notebook_model(path, content=content, md5=md5)
435443
else:
436444
if type == "directory":
437445
raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
438-
model = self._file_model(path, content=content, format=format)
446+
model = self._file_model(path, content=content, format=format, md5=md5)
439447
self.emit(data={"action": "get", "path": path})
440448
return model
441449

@@ -686,7 +694,9 @@ def _get_dir_size(self, path="."):
686694
).stdout.split()
687695
else:
688696
result = subprocess.run(
689-
["du", "-s", "--block-size=1", path], capture_output=True, check=True
697+
["du", "-s", "--block-size=1", path],
698+
capture_output=True,
699+
check=True,
690700
).stdout.split()
691701

692702
self.log.info(f"current status of du command {result}")
@@ -784,7 +794,7 @@ async def _dir_model(self, path, content=True):
784794

785795
return model
786796

787-
async def _file_model(self, path, content=True, format=None):
797+
async def _file_model(self, path, content=True, format=None, md5=False):
788798
"""Build a model for a file
789799
790800
if content is requested, include the file contents.
@@ -813,10 +823,13 @@ async def _file_model(self, path, content=True, format=None):
813823
content=content,
814824
format=format,
815825
)
826+
if md5:
827+
md5 = await self._get_md5(os_path)
828+
model.update(md5=md5)
816829

817830
return model
818831

819-
async def _notebook_model(self, path, content=True):
832+
async def _notebook_model(self, path, content=True, md5=False):
820833
"""Build a notebook model
821834
822835
if content is requested, the notebook content will be populated
@@ -835,10 +848,12 @@ async def _notebook_model(self, path, content=True):
835848
model["content"] = nb
836849
model["format"] = "json"
837850
self.validate_notebook_model(model, validation_error)
851+
if md5:
852+
model["md5"] = await self._get_md5(os_path)
838853

839854
return model
840855

841-
async def get(self, path, content=True, type=None, format=None):
856+
async def get(self, path, content=True, type=None, format=None, md5=False):
842857
"""Takes a path for an entity and returns its model
843858
844859
Parameters
@@ -853,6 +868,8 @@ async def get(self, path, content=True, type=None, format=None):
853868
format : str, optional
854869
The requested format for file contents. 'text' or 'base64'.
855870
Ignored if this returns a notebook or directory model.
871+
md5: bool, optional
872+
Whether to include the md5 of the file contents.
856873
857874
Returns
858875
-------
@@ -875,11 +892,11 @@ async def get(self, path, content=True, type=None, format=None):
875892
)
876893
model = await self._dir_model(path, content=content)
877894
elif type == "notebook" or (type is None and path.endswith(".ipynb")):
878-
model = await self._notebook_model(path, content=content)
895+
model = await self._notebook_model(path, content=content, md5=md5)
879896
else:
880897
if type == "directory":
881898
raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
882-
model = await self._file_model(path, content=content, format=format)
899+
model = await self._file_model(path, content=content, format=format, md5=md5)
883900
self.emit(data={"action": "get", "path": path})
884901
return model
885902

@@ -1147,7 +1164,9 @@ async def _get_dir_size(self, path: str = ".") -> str:
11471164
).stdout.split()
11481165
else:
11491166
result = subprocess.run(
1150-
["du", "-s", "--block-size=1", path], capture_output=True, check=True
1167+
["du", "-s", "--block-size=1", path],
1168+
capture_output=True,
1169+
check=True,
11511170
).stdout.split()
11521171

11531172
self.log.info(f"current status of du command {result}")

0 commit comments

Comments
 (0)