You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1226 lines
46 KiB
Python
1226 lines
46 KiB
Python
"""A contents manager that uses the local file system for storage."""
|
|
|
|
# Copyright (c) Jupyter Development Team.
|
|
# Distributed under the terms of the Modified BSD License.
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import errno
|
|
import math
|
|
import mimetypes
|
|
import os
|
|
import platform
|
|
import shutil
|
|
import stat
|
|
import subprocess
|
|
import sys
|
|
import typing as t
|
|
import warnings
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
import nbformat
|
|
from anyio.to_thread import run_sync
|
|
from jupyter_core.paths import exists, is_file_hidden, is_hidden
|
|
from send2trash import send2trash
|
|
from tornado import web
|
|
from traitlets import Bool, Int, TraitError, Unicode, default, validate
|
|
|
|
from jupyter_server import _tz as tz
|
|
from jupyter_server.base.handlers import AuthenticatedFileHandler
|
|
from jupyter_server.transutils import _i18n
|
|
from jupyter_server.utils import to_api_path
|
|
|
|
from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
|
|
from .fileio import AsyncFileManagerMixin, FileManagerMixin
|
|
from .manager import AsyncContentsManager, ContentsManager, copy_pat
|
|
|
|
try:
|
|
from os.path import samefile
|
|
except ImportError:
|
|
# windows
|
|
from jupyter_server.utils import samefile_simple as samefile # type:ignore[assignment]
|
|
|
|
_script_exporter = None
|
|
|
|
|
|
class FileContentsManager(FileManagerMixin, ContentsManager):
|
|
"""A file contents manager."""
|
|
|
|
root_dir = Unicode(config=True)
|
|
|
|
max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied")
|
|
|
|
@default("root_dir")
|
|
def _default_root_dir(self):
|
|
if not self.parent:
|
|
return os.getcwd()
|
|
return self.parent.root_dir
|
|
|
|
@validate("root_dir")
|
|
def _validate_root_dir(self, proposal):
|
|
value = proposal["value"]
|
|
if not os.path.isabs(value):
|
|
# If we receive a non-absolute path, make it absolute.
|
|
value = os.path.abspath(value)
|
|
if not os.path.isdir(value):
|
|
raise TraitError("%r is not a directory" % value)
|
|
return value
|
|
|
|
@default("preferred_dir")
|
|
def _default_preferred_dir(self):
|
|
if not self.parent:
|
|
return ""
|
|
try:
|
|
value = self.parent.preferred_dir
|
|
if value == self.parent.root_dir:
|
|
value = None
|
|
except AttributeError:
|
|
pass
|
|
else:
|
|
if value is not None:
|
|
warnings.warn(
|
|
"ServerApp.preferred_dir config is deprecated in jupyter-server 2.0. Use FileContentsManager.preferred_dir instead",
|
|
FutureWarning,
|
|
stacklevel=3,
|
|
)
|
|
try:
|
|
path = Path(value)
|
|
return path.relative_to(self.root_dir).as_posix()
|
|
except ValueError:
|
|
raise TraitError("%s is outside root contents directory" % value) from None
|
|
return ""
|
|
|
|
@validate("preferred_dir")
|
|
def _validate_preferred_dir(self, proposal):
|
|
# It should be safe to pass an API path through this method:
|
|
proposal["value"] = to_api_path(proposal["value"], self.root_dir)
|
|
return super()._validate_preferred_dir(proposal)
|
|
|
|
@default("checkpoints_class")
|
|
def _checkpoints_class_default(self):
|
|
return FileCheckpoints
|
|
|
|
delete_to_trash = Bool(
|
|
True,
|
|
config=True,
|
|
help="""If True (default), deleting files will send them to the
|
|
platform's trash/recycle bin, where they can be recovered. If False,
|
|
deleting files really deletes them.""",
|
|
)
|
|
|
|
always_delete_dir = Bool(
|
|
False,
|
|
config=True,
|
|
help="""If True, deleting a non-empty directory will always be allowed.
|
|
WARNING this may result in files being permanently removed; e.g. on Windows,
|
|
if the data size is too big for the trash/recycle bin the directory will be permanently
|
|
deleted. If False (default), the non-empty directory will be sent to the trash only
|
|
if safe. And if ``delete_to_trash`` is True, the directory won't be deleted.""",
|
|
)
|
|
|
|
@default("files_handler_class")
|
|
def _files_handler_class_default(self):
|
|
return AuthenticatedFileHandler
|
|
|
|
@default("files_handler_params")
|
|
def _files_handler_params_default(self):
|
|
return {"path": self.root_dir}
|
|
|
|
def is_hidden(self, path):
|
|
"""Does the API style path correspond to a hidden directory or file?
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
The path to check. This is an API path (`/` separated,
|
|
relative to root_dir).
|
|
|
|
Returns
|
|
-------
|
|
hidden : bool
|
|
Whether the path exists and is hidden.
|
|
"""
|
|
path = path.strip("/")
|
|
os_path = self._get_os_path(path=path)
|
|
return is_hidden(os_path, self.root_dir)
|
|
|
|
def is_writable(self, path):
|
|
"""Does the API style path correspond to a writable directory or file?
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
The path to check. This is an API path (`/` separated,
|
|
relative to root_dir).
|
|
|
|
Returns
|
|
-------
|
|
hidden : bool
|
|
Whether the path exists and is writable.
|
|
"""
|
|
path = path.strip("/")
|
|
os_path = self._get_os_path(path=path)
|
|
try:
|
|
return os.access(os_path, os.W_OK)
|
|
except OSError:
|
|
self.log.error("Failed to check write permissions on %s", os_path)
|
|
return False
|
|
|
|
def file_exists(self, path):
|
|
"""Returns True if the file exists, else returns False.
|
|
|
|
API-style wrapper for os.path.isfile
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
The relative path to the file (with '/' as separator)
|
|
|
|
Returns
|
|
-------
|
|
exists : bool
|
|
Whether the file exists.
|
|
"""
|
|
path = path.strip("/")
|
|
os_path = self._get_os_path(path)
|
|
return os.path.isfile(os_path)
|
|
|
|
def dir_exists(self, path):
|
|
"""Does the API-style path refer to an extant directory?
|
|
|
|
API-style wrapper for os.path.isdir
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
The path to check. This is an API path (`/` separated,
|
|
relative to root_dir).
|
|
|
|
Returns
|
|
-------
|
|
exists : bool
|
|
Whether the path is indeed a directory.
|
|
"""
|
|
path = path.strip("/")
|
|
os_path = self._get_os_path(path=path)
|
|
return os.path.isdir(os_path)
|
|
|
|
def exists(self, path):
|
|
"""Returns True if the path exists, else returns False.
|
|
|
|
API-style wrapper for os.path.exists
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
The API path to the file (with '/' as separator)
|
|
|
|
Returns
|
|
-------
|
|
exists : bool
|
|
Whether the target exists.
|
|
"""
|
|
path = path.strip("/")
|
|
os_path = self._get_os_path(path=path)
|
|
return exists(os_path)
|
|
|
|
def _base_model(self, path):
|
|
"""Build the common base of a contents model"""
|
|
os_path = self._get_os_path(path)
|
|
info = os.lstat(os_path)
|
|
|
|
four_o_four = "file or directory does not exist: %r" % path
|
|
|
|
if not self.allow_hidden and is_hidden(os_path, self.root_dir):
|
|
self.log.info("Refusing to serve hidden file or directory %r, via 404 Error", os_path)
|
|
raise web.HTTPError(404, four_o_four)
|
|
|
|
try:
|
|
# size of file
|
|
size = info.st_size
|
|
except (ValueError, OSError):
|
|
self.log.warning("Unable to get size.")
|
|
size = None
|
|
|
|
try:
|
|
last_modified = tz.utcfromtimestamp(info.st_mtime)
|
|
except (ValueError, OSError):
|
|
# Files can rarely have an invalid timestamp
|
|
# https://github.com/jupyter/notebook/issues/2539
|
|
# https://github.com/jupyter/notebook/issues/2757
|
|
# Use the Unix epoch as a fallback so we don't crash.
|
|
self.log.warning("Invalid mtime %s for %s", info.st_mtime, os_path)
|
|
last_modified = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC)
|
|
|
|
try:
|
|
created = tz.utcfromtimestamp(info.st_ctime)
|
|
except (ValueError, OSError): # See above
|
|
self.log.warning("Invalid ctime %s for %s", info.st_ctime, os_path)
|
|
created = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC)
|
|
|
|
# Create the base model.
|
|
model = {}
|
|
model["name"] = path.rsplit("/", 1)[-1]
|
|
model["path"] = path
|
|
model["last_modified"] = last_modified
|
|
model["created"] = created
|
|
model["content"] = None
|
|
model["format"] = None
|
|
model["mimetype"] = None
|
|
model["size"] = size
|
|
model["writable"] = self.is_writable(path)
|
|
model["hash"] = None
|
|
model["hash_algorithm"] = None
|
|
|
|
return model
|
|
|
|
def _dir_model(self, path, content=True):
|
|
"""Build a model for a directory
|
|
|
|
if content is requested, will include a listing of the directory
|
|
"""
|
|
os_path = self._get_os_path(path)
|
|
|
|
four_o_four = "directory does not exist: %r" % path
|
|
|
|
if not os.path.isdir(os_path):
|
|
raise web.HTTPError(404, four_o_four)
|
|
elif not self.allow_hidden and is_hidden(os_path, self.root_dir):
|
|
self.log.info("Refusing to serve hidden directory %r, via 404 Error", os_path)
|
|
raise web.HTTPError(404, four_o_four)
|
|
|
|
model = self._base_model(path)
|
|
model["type"] = "directory"
|
|
model["size"] = None
|
|
if content:
|
|
model["content"] = contents = []
|
|
os_dir = os_path
|
|
for name in os.listdir(os_dir):
|
|
try:
|
|
os_path = os.path.join(os_dir, name)
|
|
except UnicodeDecodeError as e:
|
|
self.log.warning("failed to decode filename '%s': %r", name, e)
|
|
continue
|
|
|
|
try:
|
|
st = os.lstat(os_path)
|
|
except OSError as e:
|
|
# skip over broken symlinks in listing
|
|
if e.errno == errno.ENOENT:
|
|
self.log.warning("%s doesn't exist", os_path)
|
|
elif e.errno != errno.EACCES: # Don't provide clues about protected files
|
|
self.log.warning("Error stat-ing %s: %r", os_path, e)
|
|
continue
|
|
|
|
if (
|
|
not stat.S_ISLNK(st.st_mode)
|
|
and not stat.S_ISREG(st.st_mode)
|
|
and not stat.S_ISDIR(st.st_mode)
|
|
):
|
|
self.log.debug("%s not a regular file", os_path)
|
|
continue
|
|
|
|
try:
|
|
if self.should_list(name) and (
|
|
self.allow_hidden or not is_file_hidden(os_path, stat_res=st)
|
|
):
|
|
contents.append(self.get(path=f"{path}/{name}", content=False))
|
|
except OSError as e:
|
|
# ELOOP: recursive symlink, also don't show failure due to permissions
|
|
if e.errno not in [errno.ELOOP, errno.EACCES]:
|
|
self.log.warning(
|
|
"Unknown error checking if file %r is hidden",
|
|
os_path,
|
|
exc_info=True,
|
|
)
|
|
|
|
model["format"] = "json"
|
|
|
|
return model
|
|
|
|
def _file_model(self, path, content=True, format=None, require_hash=False):
|
|
"""Build a model for a file
|
|
|
|
if content is requested, include the file contents.
|
|
|
|
format:
|
|
If 'text', the contents will be decoded as UTF-8.
|
|
If 'base64', the raw bytes contents will be encoded as base64.
|
|
If not specified, try to decode as UTF-8, and fall back to base64
|
|
|
|
if require_hash is true, the model will include 'hash'
|
|
"""
|
|
model = self._base_model(path)
|
|
model["type"] = "file"
|
|
|
|
os_path = self._get_os_path(path)
|
|
model["mimetype"] = mimetypes.guess_type(os_path)[0]
|
|
|
|
bytes_content = None
|
|
if content:
|
|
content, format, bytes_content = self._read_file(os_path, format, raw=True) # type: ignore[misc]
|
|
if model["mimetype"] is None:
|
|
default_mime = {
|
|
"text": "text/plain",
|
|
"base64": "application/octet-stream",
|
|
}[format]
|
|
model["mimetype"] = default_mime
|
|
|
|
model.update(
|
|
content=content,
|
|
format=format,
|
|
)
|
|
|
|
if require_hash:
|
|
if bytes_content is None:
|
|
bytes_content, _ = self._read_file(os_path, "byte") # type: ignore[assignment,misc]
|
|
model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type]
|
|
|
|
return model
|
|
|
|
def _notebook_model(self, path, content=True, require_hash=False):
|
|
"""Build a notebook model
|
|
|
|
if content is requested, the notebook content will be populated
|
|
as a JSON structure (not double-serialized)
|
|
|
|
if require_hash is true, the model will include 'hash'
|
|
"""
|
|
model = self._base_model(path)
|
|
model["type"] = "notebook"
|
|
os_path = self._get_os_path(path)
|
|
|
|
bytes_content = None
|
|
if content:
|
|
validation_error: dict[str, t.Any] = {}
|
|
nb, bytes_content = self._read_notebook(
|
|
os_path, as_version=4, capture_validation_error=validation_error, raw=True
|
|
)
|
|
self.mark_trusted_cells(nb, path)
|
|
model["content"] = nb
|
|
model["format"] = "json"
|
|
self.validate_notebook_model(model, validation_error)
|
|
|
|
if require_hash:
|
|
if bytes_content is None:
|
|
bytes_content, _ = self._read_file(os_path, "byte") # type: ignore[misc]
|
|
model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type]
|
|
|
|
return model
|
|
|
|
def get(self, path, content=True, type=None, format=None, require_hash=False):
|
|
"""Takes a path for an entity and returns its model
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
the API path that describes the relative path for the target
|
|
content : bool
|
|
Whether to include the contents in the reply
|
|
type : str, optional
|
|
The requested type - 'file', 'notebook', or 'directory'.
|
|
Will raise HTTPError 400 if the content doesn't match.
|
|
format : str, optional
|
|
The requested format for file contents. 'text' or 'base64'.
|
|
Ignored if this returns a notebook or directory model.
|
|
require_hash: bool, optional
|
|
Whether to include the hash of the file contents.
|
|
|
|
Returns
|
|
-------
|
|
model : dict
|
|
the contents model. If content=True, returns the contents
|
|
of the file or directory as well.
|
|
"""
|
|
path = path.strip("/")
|
|
os_path = self._get_os_path(path)
|
|
four_o_four = "file or directory does not exist: %r" % path
|
|
|
|
if not self.exists(path):
|
|
raise web.HTTPError(404, four_o_four)
|
|
|
|
if not self.allow_hidden and is_hidden(os_path, self.root_dir):
|
|
self.log.info("Refusing to serve hidden file or directory %r, via 404 Error", os_path)
|
|
raise web.HTTPError(404, four_o_four)
|
|
|
|
if os.path.isdir(os_path):
|
|
if type not in (None, "directory"):
|
|
raise web.HTTPError(
|
|
400,
|
|
f"{path} is a directory, not a {type}",
|
|
reason="bad type",
|
|
)
|
|
model = self._dir_model(path, content=content)
|
|
elif type == "notebook" or (type is None and path.endswith(".ipynb")):
|
|
model = self._notebook_model(path, content=content, require_hash=require_hash)
|
|
else:
|
|
if type == "directory":
|
|
raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
|
|
model = self._file_model(
|
|
path, content=content, format=format, require_hash=require_hash
|
|
)
|
|
self.emit(data={"action": "get", "path": path})
|
|
return model
|
|
|
|
def _save_directory(self, os_path, model, path=""):
|
|
"""create a directory"""
|
|
if not self.allow_hidden and is_hidden(os_path, self.root_dir):
|
|
raise web.HTTPError(400, "Cannot create directory %r" % os_path)
|
|
if not os.path.exists(os_path):
|
|
with self.perm_to_403():
|
|
os.mkdir(os_path)
|
|
elif not os.path.isdir(os_path):
|
|
raise web.HTTPError(400, "Not a directory: %s" % (os_path))
|
|
else:
|
|
self.log.debug("Directory %r already exists", os_path)
|
|
|
|
def save(self, model, path=""):
|
|
"""Save the file model and return the model with no content."""
|
|
path = path.strip("/")
|
|
|
|
self.run_pre_save_hooks(model=model, path=path)
|
|
|
|
if "type" not in model:
|
|
raise web.HTTPError(400, "No file type provided")
|
|
if "content" not in model and model["type"] != "directory":
|
|
raise web.HTTPError(400, "No file content provided")
|
|
os_path = self._get_os_path(path)
|
|
|
|
if not self.allow_hidden and is_hidden(os_path, self.root_dir):
|
|
raise web.HTTPError(400, f"Cannot create file or directory {os_path!r}")
|
|
|
|
self.log.debug("Saving %s", os_path)
|
|
|
|
validation_error: dict[str, t.Any] = {}
|
|
try:
|
|
if model["type"] == "notebook":
|
|
nb = nbformat.from_dict(model["content"])
|
|
self.check_and_sign(nb, path)
|
|
self._save_notebook(os_path, nb, capture_validation_error=validation_error)
|
|
# One checkpoint should always exist for notebooks.
|
|
if not self.checkpoints.list_checkpoints(path):
|
|
self.create_checkpoint(path)
|
|
elif model["type"] == "file":
|
|
# Missing format will be handled internally by _save_file.
|
|
self._save_file(os_path, model["content"], model.get("format"))
|
|
elif model["type"] == "directory":
|
|
self._save_directory(os_path, model, path)
|
|
else:
|
|
raise web.HTTPError(400, "Unhandled contents type: %s" % model["type"])
|
|
except web.HTTPError:
|
|
raise
|
|
except Exception as e:
|
|
self.log.error("Error while saving file: %s %s", path, e, exc_info=True)
|
|
raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e
|
|
|
|
validation_message = None
|
|
if model["type"] == "notebook":
|
|
self.validate_notebook_model(model, validation_error=validation_error)
|
|
validation_message = model.get("message", None)
|
|
|
|
model = self.get(path, content=False)
|
|
if validation_message:
|
|
model["message"] = validation_message
|
|
|
|
self.run_post_save_hooks(model=model, os_path=os_path)
|
|
self.emit(data={"action": "save", "path": path})
|
|
return model
|
|
|
|
def delete_file(self, path):
|
|
"""Delete file at path."""
|
|
path = path.strip("/")
|
|
os_path = self._get_os_path(path)
|
|
rm = os.unlink
|
|
|
|
if not self.allow_hidden and is_hidden(os_path, self.root_dir):
|
|
raise web.HTTPError(400, f"Cannot delete file or directory {os_path!r}")
|
|
|
|
four_o_four = "file or directory does not exist: %r" % path
|
|
if not self.exists(path):
|
|
raise web.HTTPError(404, four_o_four)
|
|
|
|
def is_non_empty_dir(os_path):
|
|
if os.path.isdir(os_path):
|
|
# A directory containing only leftover checkpoints is
|
|
# considered empty.
|
|
cp_dir = getattr(self.checkpoints, "checkpoint_dir", None)
|
|
if set(os.listdir(os_path)) - {cp_dir}:
|
|
return True
|
|
|
|
return False
|
|
|
|
if self.delete_to_trash:
|
|
if not self.always_delete_dir and sys.platform == "win32" and is_non_empty_dir(os_path):
|
|
# send2trash can really delete files on Windows, so disallow
|
|
# deleting non-empty files. See Github issue 3631.
|
|
raise web.HTTPError(400, "Directory %s not empty" % os_path)
|
|
# send2trash now supports deleting directories. see #1290
|
|
if not self.is_writable(path):
|
|
raise web.HTTPError(403, "Permission denied: %s" % path) from None
|
|
self.log.debug("Sending %s to trash", os_path)
|
|
try:
|
|
send2trash(os_path)
|
|
except OSError as e:
|
|
raise web.HTTPError(400, "send2trash failed: %s" % e) from e
|
|
return
|
|
|
|
if os.path.isdir(os_path):
|
|
# Don't permanently delete non-empty directories.
|
|
if not self.always_delete_dir and is_non_empty_dir(os_path):
|
|
raise web.HTTPError(400, "Directory %s not empty" % os_path)
|
|
self.log.debug("Removing directory %s", os_path)
|
|
with self.perm_to_403():
|
|
shutil.rmtree(os_path)
|
|
else:
|
|
self.log.debug("Unlinking file %s", os_path)
|
|
with self.perm_to_403():
|
|
rm(os_path)
|
|
|
|
def rename_file(self, old_path, new_path):
|
|
"""Rename a file."""
|
|
old_path = old_path.strip("/")
|
|
new_path = new_path.strip("/")
|
|
if new_path == old_path:
|
|
return
|
|
|
|
new_os_path = self._get_os_path(new_path)
|
|
old_os_path = self._get_os_path(old_path)
|
|
|
|
if not self.allow_hidden and (
|
|
is_hidden(old_os_path, self.root_dir) or is_hidden(new_os_path, self.root_dir)
|
|
):
|
|
raise web.HTTPError(400, f"Cannot rename file or directory {old_os_path!r}")
|
|
|
|
# Should we proceed with the move?
|
|
if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path):
|
|
raise web.HTTPError(409, "File already exists: %s" % new_path)
|
|
|
|
# Move the file
|
|
try:
|
|
with self.perm_to_403():
|
|
shutil.move(old_os_path, new_os_path)
|
|
except web.HTTPError:
|
|
raise
|
|
except FileNotFoundError:
|
|
raise web.HTTPError(404, f"File or directory does not exist: {old_path}") from None
|
|
except Exception as e:
|
|
raise web.HTTPError(500, f"Unknown error renaming file: {old_path} {e}") from e
|
|
|
|
def info_string(self):
|
|
"""Get the information string for the manager."""
|
|
return _i18n("Serving notebooks from local directory: %s") % self.root_dir
|
|
|
|
def get_kernel_path(self, path, model=None):
|
|
"""Return the initial API path of a kernel associated with a given notebook"""
|
|
if self.dir_exists(path):
|
|
return path
|
|
parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
|
|
return parent_dir
|
|
|
|
def copy(self, from_path, to_path=None):
|
|
"""
|
|
Copy an existing file or directory and return its new model.
|
|
If to_path not specified, it will be the parent directory of from_path.
|
|
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
|
|
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
|
|
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
|
|
from_path must be a full path to a file or directory.
|
|
"""
|
|
to_path_original = str(to_path)
|
|
path = from_path.strip("/")
|
|
if to_path is not None:
|
|
to_path = to_path.strip("/")
|
|
|
|
if "/" in path:
|
|
from_dir, from_name = path.rsplit("/", 1)
|
|
else:
|
|
from_dir = ""
|
|
from_name = path
|
|
|
|
model = self.get(path)
|
|
# limit the size of folders being copied to prevent a timeout error
|
|
if model["type"] == "directory":
|
|
self.check_folder_size(path)
|
|
else:
|
|
# let the super class handle copying files
|
|
return super().copy(from_path=from_path, to_path=to_path)
|
|
|
|
is_destination_specified = to_path is not None
|
|
to_name = copy_pat.sub(".", from_name)
|
|
if not is_destination_specified:
|
|
to_path = from_dir
|
|
if self.dir_exists(to_path):
|
|
name = copy_pat.sub(".", from_name)
|
|
to_name = super().increment_filename(name, to_path, insert="-Copy")
|
|
to_path = f"{to_path}/{to_name}"
|
|
|
|
return self._copy_dir(
|
|
from_path=from_path,
|
|
to_path_original=to_path_original,
|
|
to_name=to_name,
|
|
to_path=to_path,
|
|
)
|
|
|
|
def _copy_dir(self, from_path, to_path_original, to_name, to_path):
|
|
"""
|
|
handles copying directories
|
|
returns the model for the copied directory
|
|
"""
|
|
try:
|
|
os_from_path = self._get_os_path(from_path.strip("/"))
|
|
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
|
|
shutil.copytree(os_from_path, os_to_path)
|
|
model = self.get(to_path, content=False)
|
|
except OSError as err:
|
|
self.log.error(f"OSError in _copy_dir: {err}")
|
|
raise web.HTTPError(
|
|
400,
|
|
f"Can't copy '{from_path}' into Folder '{to_path}'",
|
|
) from err
|
|
|
|
return model
|
|
|
|
def check_folder_size(self, path):
|
|
"""
|
|
limit the size of folders being copied to be no more than the
|
|
trait max_copy_folder_size_mb to prevent a timeout error
|
|
"""
|
|
limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024
|
|
size = int(self._get_dir_size(self._get_os_path(path)))
|
|
# convert from KB to Bytes for macOS
|
|
size = size * 1024 if platform.system() == "Darwin" else size
|
|
|
|
if size > limit_bytes:
|
|
raise web.HTTPError(
|
|
400,
|
|
f"""
|
|
Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
|
|
"{path}" is {self._human_readable_size(size)}
|
|
""",
|
|
)
|
|
|
|
def _get_dir_size(self, path="."):
|
|
"""
|
|
calls the command line program du to get the directory size
|
|
"""
|
|
try:
|
|
if platform.system() == "Darwin":
|
|
# returns the size of the folder in KB
|
|
result = subprocess.run(
|
|
["du", "-sk", path], # noqa: S607
|
|
capture_output=True,
|
|
check=True,
|
|
).stdout.split()
|
|
else:
|
|
result = subprocess.run(
|
|
["du", "-s", "--block-size=1", path], # noqa: S607
|
|
capture_output=True,
|
|
check=True,
|
|
).stdout.split()
|
|
|
|
self.log.info(f"current status of du command {result}")
|
|
size = result[0].decode("utf-8")
|
|
except Exception:
|
|
self.log.warning(
|
|
"Not able to get the size of the %s directory. Copying might be slow if the directory is large!",
|
|
path,
|
|
)
|
|
return "0"
|
|
return size
|
|
|
|
def _human_readable_size(self, size):
|
|
"""
|
|
returns folder size in a human readable format
|
|
"""
|
|
if size == 0:
|
|
return "0 Bytes"
|
|
|
|
units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
|
|
order = int(math.log2(size) / 10) if size else 0
|
|
|
|
return f"{size / (1 << (order * 10)):.4g} {units[order]}"
|
|
|
|
|
|
class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
|
|
"""An async file contents manager."""
|
|
|
|
@default("checkpoints_class")
|
|
def _checkpoints_class_default(self):
|
|
return AsyncFileCheckpoints
|
|
|
|
async def _dir_model(self, path, content=True):
|
|
"""Build a model for a directory
|
|
|
|
if content is requested, will include a listing of the directory
|
|
"""
|
|
os_path = self._get_os_path(path)
|
|
|
|
four_o_four = "directory does not exist: %r" % path
|
|
|
|
if not os.path.isdir(os_path):
|
|
raise web.HTTPError(404, four_o_four)
|
|
elif not self.allow_hidden and is_hidden(os_path, self.root_dir):
|
|
self.log.info("Refusing to serve hidden directory %r, via 404 Error", os_path)
|
|
raise web.HTTPError(404, four_o_four)
|
|
|
|
model = self._base_model(path)
|
|
model["type"] = "directory"
|
|
model["size"] = None
|
|
if content:
|
|
model["content"] = contents = []
|
|
os_dir = os_path
|
|
dir_contents = await run_sync(os.listdir, os_dir)
|
|
for name in dir_contents:
|
|
try:
|
|
os_path = os.path.join(os_dir, name)
|
|
except UnicodeDecodeError as e:
|
|
self.log.warning("failed to decode filename '%s': %r", name, e)
|
|
continue
|
|
|
|
try:
|
|
st = await run_sync(os.lstat, os_path)
|
|
except OSError as e:
|
|
# skip over broken symlinks in listing
|
|
if e.errno == errno.ENOENT:
|
|
self.log.warning("%s doesn't exist", os_path)
|
|
elif e.errno != errno.EACCES: # Don't provide clues about protected files
|
|
self.log.warning("Error stat-ing %s: %r", os_path, e)
|
|
continue
|
|
|
|
if (
|
|
not stat.S_ISLNK(st.st_mode)
|
|
and not stat.S_ISREG(st.st_mode)
|
|
and not stat.S_ISDIR(st.st_mode)
|
|
):
|
|
self.log.debug("%s not a regular file", os_path)
|
|
continue
|
|
|
|
try:
|
|
if self.should_list(name) and (
|
|
self.allow_hidden or not is_file_hidden(os_path, stat_res=st)
|
|
):
|
|
contents.append(await self.get(path=f"{path}/{name}", content=False))
|
|
except OSError as e:
|
|
# ELOOP: recursive symlink, also don't show failure due to permissions
|
|
if e.errno not in [errno.ELOOP, errno.EACCES]:
|
|
self.log.warning(
|
|
"Unknown error checking if file %r is hidden",
|
|
os_path,
|
|
exc_info=True,
|
|
)
|
|
|
|
model["format"] = "json"
|
|
|
|
return model
|
|
|
|
async def _file_model(self, path, content=True, format=None, require_hash=False):
|
|
"""Build a model for a file
|
|
|
|
if content is requested, include the file contents.
|
|
|
|
format:
|
|
If 'text', the contents will be decoded as UTF-8.
|
|
If 'base64', the raw bytes contents will be encoded as base64.
|
|
If not specified, try to decode as UTF-8, and fall back to base64
|
|
|
|
if require_hash is true, the model will include 'hash'
|
|
"""
|
|
model = self._base_model(path)
|
|
model["type"] = "file"
|
|
|
|
os_path = self._get_os_path(path)
|
|
model["mimetype"] = mimetypes.guess_type(os_path)[0]
|
|
|
|
bytes_content = None
|
|
if content:
|
|
content, format, bytes_content = await self._read_file(os_path, format, raw=True) # type: ignore[misc]
|
|
if model["mimetype"] is None:
|
|
default_mime = {
|
|
"text": "text/plain",
|
|
"base64": "application/octet-stream",
|
|
}[format]
|
|
model["mimetype"] = default_mime
|
|
|
|
model.update(
|
|
content=content,
|
|
format=format,
|
|
)
|
|
|
|
if require_hash:
|
|
if bytes_content is None:
|
|
bytes_content, _ = await self._read_file(os_path, "byte") # type: ignore[assignment,misc]
|
|
model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type]
|
|
|
|
return model
|
|
|
|
async def _notebook_model(self, path, content=True, require_hash=False):
|
|
"""Build a notebook model
|
|
|
|
if content is requested, the notebook content will be populated
|
|
as a JSON structure (not double-serialized)
|
|
"""
|
|
model = self._base_model(path)
|
|
model["type"] = "notebook"
|
|
os_path = self._get_os_path(path)
|
|
|
|
bytes_content = None
|
|
if content:
|
|
validation_error: dict[str, t.Any] = {}
|
|
nb, bytes_content = await self._read_notebook(
|
|
os_path, as_version=4, capture_validation_error=validation_error, raw=True
|
|
)
|
|
self.mark_trusted_cells(nb, path)
|
|
model["content"] = nb
|
|
model["format"] = "json"
|
|
self.validate_notebook_model(model, validation_error)
|
|
|
|
if require_hash:
|
|
if bytes_content is None:
|
|
bytes_content, _ = await self._read_file(os_path, "byte") # type: ignore[misc]
|
|
model.update(**(self._get_hash(bytes_content))) # type: ignore[arg-type]
|
|
|
|
return model
|
|
|
|
async def get(self, path, content=True, type=None, format=None, require_hash=False):
|
|
"""Takes a path for an entity and returns its model
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
the API path that describes the relative path for the target
|
|
content : bool
|
|
Whether to include the contents in the reply
|
|
type : str, optional
|
|
The requested type - 'file', 'notebook', or 'directory'.
|
|
Will raise HTTPError 400 if the content doesn't match.
|
|
format : str, optional
|
|
The requested format for file contents. 'text' or 'base64'.
|
|
Ignored if this returns a notebook or directory model.
|
|
require_hash: bool, optional
|
|
Whether to include the hash of the file contents.
|
|
|
|
Returns
|
|
-------
|
|
model : dict
|
|
the contents model. If content=True, returns the contents
|
|
of the file or directory as well.
|
|
"""
|
|
path = path.strip("/")
|
|
|
|
if not self.exists(path):
|
|
raise web.HTTPError(404, "No such file or directory: %s" % path)
|
|
|
|
os_path = self._get_os_path(path)
|
|
if os.path.isdir(os_path):
|
|
if type not in (None, "directory"):
|
|
raise web.HTTPError(
|
|
400,
|
|
f"{path} is a directory, not a {type}",
|
|
reason="bad type",
|
|
)
|
|
model = await self._dir_model(path, content=content)
|
|
elif type == "notebook" or (type is None and path.endswith(".ipynb")):
|
|
model = await self._notebook_model(path, content=content, require_hash=require_hash)
|
|
else:
|
|
if type == "directory":
|
|
raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
|
|
model = await self._file_model(
|
|
path, content=content, format=format, require_hash=require_hash
|
|
)
|
|
self.emit(data={"action": "get", "path": path})
|
|
return model
|
|
|
|
async def _save_directory(self, os_path, model, path=""):
|
|
"""create a directory"""
|
|
if not self.allow_hidden and is_hidden(os_path, self.root_dir):
|
|
raise web.HTTPError(400, "Cannot create hidden directory %r" % os_path)
|
|
if not os.path.exists(os_path):
|
|
with self.perm_to_403():
|
|
await run_sync(os.mkdir, os_path)
|
|
elif not os.path.isdir(os_path):
|
|
raise web.HTTPError(400, "Not a directory: %s" % (os_path))
|
|
else:
|
|
self.log.debug("Directory %r already exists", os_path)
|
|
|
|
async def save(self, model, path=""):
|
|
"""Save the file model and return the model with no content."""
|
|
path = path.strip("/")
|
|
|
|
self.run_pre_save_hooks(model=model, path=path)
|
|
|
|
if "type" not in model:
|
|
raise web.HTTPError(400, "No file type provided")
|
|
if "content" not in model and model["type"] != "directory":
|
|
raise web.HTTPError(400, "No file content provided")
|
|
|
|
os_path = self._get_os_path(path)
|
|
self.log.debug("Saving %s", os_path)
|
|
|
|
validation_error: dict[str, t.Any] = {}
|
|
try:
|
|
if model["type"] == "notebook":
|
|
nb = nbformat.from_dict(model["content"])
|
|
self.check_and_sign(nb, path)
|
|
await self._save_notebook(os_path, nb, capture_validation_error=validation_error)
|
|
# One checkpoint should always exist for notebooks.
|
|
if not (await self.checkpoints.list_checkpoints(path)):
|
|
await self.create_checkpoint(path)
|
|
elif model["type"] == "file":
|
|
# Missing format will be handled internally by _save_file.
|
|
await self._save_file(os_path, model["content"], model.get("format"))
|
|
elif model["type"] == "directory":
|
|
await self._save_directory(os_path, model, path)
|
|
else:
|
|
raise web.HTTPError(400, "Unhandled contents type: %s" % model["type"])
|
|
except web.HTTPError:
|
|
raise
|
|
except Exception as e:
|
|
self.log.error("Error while saving file: %s %s", path, e, exc_info=True)
|
|
raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e
|
|
|
|
validation_message = None
|
|
if model["type"] == "notebook":
|
|
self.validate_notebook_model(model, validation_error=validation_error)
|
|
validation_message = model.get("message", None)
|
|
|
|
model = await self.get(path, content=False)
|
|
if validation_message:
|
|
model["message"] = validation_message
|
|
|
|
self.run_post_save_hooks(model=model, os_path=os_path)
|
|
self.emit(data={"action": "save", "path": path})
|
|
return model
|
|
|
|
async def delete_file(self, path):
|
|
"""Delete file at path."""
|
|
path = path.strip("/")
|
|
os_path = self._get_os_path(path)
|
|
rm = os.unlink
|
|
|
|
if not self.allow_hidden and is_hidden(os_path, self.root_dir):
|
|
raise web.HTTPError(400, f"Cannot delete file or directory {os_path!r}")
|
|
|
|
if not os.path.exists(os_path):
|
|
raise web.HTTPError(404, "File or directory does not exist: %s" % os_path)
|
|
|
|
async def is_non_empty_dir(os_path):
|
|
if os.path.isdir(os_path):
|
|
# A directory containing only leftover checkpoints is
|
|
# considered empty.
|
|
cp_dir = getattr(self.checkpoints, "checkpoint_dir", None)
|
|
dir_contents = set(await run_sync(os.listdir, os_path))
|
|
if dir_contents - {cp_dir}:
|
|
return True
|
|
|
|
return False
|
|
|
|
if self.delete_to_trash:
|
|
if (
|
|
not self.always_delete_dir
|
|
and sys.platform == "win32"
|
|
and await is_non_empty_dir(os_path)
|
|
):
|
|
# send2trash can really delete files on Windows, so disallow
|
|
# deleting non-empty files. See Github issue 3631.
|
|
raise web.HTTPError(400, "Directory %s not empty" % os_path)
|
|
# send2trash now supports deleting directories. see #1290
|
|
if not self.is_writable(path):
|
|
raise web.HTTPError(403, "Permission denied: %s" % path) from None
|
|
self.log.debug("Sending %s to trash", os_path)
|
|
try:
|
|
send2trash(os_path)
|
|
except OSError as e:
|
|
raise web.HTTPError(400, "send2trash failed: %s" % e) from e
|
|
return
|
|
|
|
if os.path.isdir(os_path):
|
|
# Don't permanently delete non-empty directories.
|
|
if not self.always_delete_dir and await is_non_empty_dir(os_path):
|
|
raise web.HTTPError(400, "Directory %s not empty" % os_path)
|
|
self.log.debug("Removing directory %s", os_path)
|
|
with self.perm_to_403():
|
|
await run_sync(shutil.rmtree, os_path)
|
|
else:
|
|
self.log.debug("Unlinking file %s", os_path)
|
|
with self.perm_to_403():
|
|
await run_sync(rm, os_path)
|
|
|
|
async def rename_file(self, old_path, new_path):
|
|
"""Rename a file."""
|
|
old_path = old_path.strip("/")
|
|
new_path = new_path.strip("/")
|
|
if new_path == old_path:
|
|
return
|
|
|
|
new_os_path = self._get_os_path(new_path)
|
|
old_os_path = self._get_os_path(old_path)
|
|
|
|
if not self.allow_hidden and (
|
|
is_hidden(old_os_path, self.root_dir) or is_hidden(new_os_path, self.root_dir)
|
|
):
|
|
raise web.HTTPError(400, f"Cannot rename file or directory {old_os_path!r}")
|
|
|
|
# Should we proceed with the move?
|
|
if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path):
|
|
raise web.HTTPError(409, "File already exists: %s" % new_path)
|
|
|
|
# Move the file
|
|
try:
|
|
with self.perm_to_403():
|
|
await run_sync(shutil.move, old_os_path, new_os_path)
|
|
except web.HTTPError:
|
|
raise
|
|
except FileNotFoundError:
|
|
raise web.HTTPError(404, f"File or directory does not exist: {old_path}") from None
|
|
except Exception as e:
|
|
raise web.HTTPError(500, f"Unknown error renaming file: {old_path} {e}") from e
|
|
|
|
async def dir_exists(self, path):
|
|
"""Does a directory exist at the given path"""
|
|
path = path.strip("/")
|
|
os_path = self._get_os_path(path=path)
|
|
return os.path.isdir(os_path)
|
|
|
|
async def file_exists(self, path):
|
|
"""Does a file exist at the given path"""
|
|
path = path.strip("/")
|
|
os_path = self._get_os_path(path)
|
|
return os.path.isfile(os_path)
|
|
|
|
async def is_hidden(self, path):
|
|
"""Is path a hidden directory or file"""
|
|
path = path.strip("/")
|
|
os_path = self._get_os_path(path=path)
|
|
return is_hidden(os_path, self.root_dir)
|
|
|
|
async def get_kernel_path(self, path, model=None):
|
|
"""Return the initial API path of a kernel associated with a given notebook"""
|
|
if await self.dir_exists(path):
|
|
return path
|
|
parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
|
|
return parent_dir
|
|
|
|
async def copy(self, from_path, to_path=None):
|
|
"""
|
|
Copy an existing file or directory and return its new model.
|
|
If to_path not specified, it will be the parent directory of from_path.
|
|
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
|
|
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
|
|
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
|
|
from_path must be a full path to a file or directory.
|
|
"""
|
|
to_path_original = str(to_path)
|
|
path = from_path.strip("/")
|
|
if to_path is not None:
|
|
to_path = to_path.strip("/")
|
|
|
|
if "/" in path:
|
|
from_dir, from_name = path.rsplit("/", 1)
|
|
else:
|
|
from_dir = ""
|
|
from_name = path
|
|
|
|
model = await self.get(path)
|
|
# limit the size of folders being copied to prevent a timeout error
|
|
if model["type"] == "directory":
|
|
await self.check_folder_size(path)
|
|
else:
|
|
# let the super class handle copying files
|
|
return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path)
|
|
|
|
is_destination_specified = to_path is not None
|
|
to_name = copy_pat.sub(".", from_name)
|
|
if not is_destination_specified:
|
|
to_path = from_dir
|
|
if await self.dir_exists(to_path):
|
|
name = copy_pat.sub(".", from_name)
|
|
to_name = await super().increment_filename(name, to_path, insert="-Copy")
|
|
to_path = f"{to_path}/{to_name}"
|
|
|
|
return await self._copy_dir(
|
|
from_path=from_path,
|
|
to_path_original=to_path_original,
|
|
to_name=to_name,
|
|
to_path=to_path,
|
|
)
|
|
|
|
async def _copy_dir(
|
|
self, from_path: str, to_path_original: str, to_name: str, to_path: str
|
|
) -> dict[str, t.Any]:
|
|
"""
|
|
handles copying directories
|
|
returns the model for the copied directory
|
|
"""
|
|
try:
|
|
os_from_path = self._get_os_path(from_path.strip("/"))
|
|
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
|
|
shutil.copytree(os_from_path, os_to_path)
|
|
model = await self.get(to_path, content=False)
|
|
except OSError as err:
|
|
self.log.error(f"OSError in _copy_dir: {err}")
|
|
raise web.HTTPError(
|
|
400,
|
|
f"Can't copy '{from_path}' into read-only Folder '{to_path}'",
|
|
) from err
|
|
|
|
return model # type:ignore[no-any-return]
|
|
|
|
async def check_folder_size(self, path: str) -> None:
|
|
"""
|
|
limit the size of folders being copied to be no more than the
|
|
trait max_copy_folder_size_mb to prevent a timeout error
|
|
"""
|
|
limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024
|
|
|
|
size = int(await self._get_dir_size(self._get_os_path(path)))
|
|
# convert from KB to Bytes for macOS
|
|
size = size * 1024 if platform.system() == "Darwin" else size
|
|
if size > limit_bytes:
|
|
raise web.HTTPError(
|
|
400,
|
|
f"""
|
|
Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
|
|
"{path}" is {await self._human_readable_size(size)}
|
|
""",
|
|
)
|
|
|
|
async def _get_dir_size(self, path: str = ".") -> str:
|
|
"""
|
|
calls the command line program du to get the directory size
|
|
"""
|
|
try:
|
|
if platform.system() == "Darwin":
|
|
# returns the size of the folder in KB
|
|
args = ["-sk", path]
|
|
else:
|
|
args = ["-s", "--block-size=1", path]
|
|
proc = await asyncio.create_subprocess_exec(
|
|
"du", *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
|
)
|
|
|
|
stdout, _ = await proc.communicate()
|
|
result = await proc.wait()
|
|
self.log.info(f"current status of du command {result}")
|
|
assert result == 0
|
|
size = stdout.decode("utf-8").split()[0]
|
|
except Exception:
|
|
self.log.warning(
|
|
"Not able to get the size of the %s directory. Copying might be slow if the directory is large!",
|
|
path,
|
|
)
|
|
return "0"
|
|
return size
|
|
|
|
async def _human_readable_size(self, size: int) -> str:
|
|
"""
|
|
returns folder size in a human readable format
|
|
"""
|
|
if size == 0:
|
|
return "0 Bytes"
|
|
|
|
units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
|
|
order = int(math.log2(size) / 10) if size else 0
|
|
|
|
return f"{size / (1 << (order * 10)):.4g} {units[order]}"
|