Skip to content

Container

Container dataclass

Bases: PathLike[str]

A small filesystem container with an optional key->relative-path registry.

The container owns a root directory and persists a registry file (default: tree.json) storing logical keys mapped to relative file paths.

Design choices / conventions:

  • container / "file.ext" is a file-creation helper:

    • It ensures parent directories exist.
    • Optionally auto-registers the file under key = stem ("file").
  • Directories should be created via mkdir() (not /):

    • Enforced by requiring a suffix when using / with a relative path.

Parameters:

Name Type Description Default
root PathLike

Root directory path of the container.

required
clean bool

If True, aggressively removes the existing root directory contents (files and subdirectories) before recreating it.

False
infos_name str

Registry filename stored at container root.

'tree.json'
auto_register bool

If True, / on a relative file path auto-registers the file under key = stem.

True
Source code in gdutils/datacontainer/container.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
@dataclass
class Container(os.PathLike[str]):
    """
    A small filesystem container with an optional key->relative-path registry.

    The container owns a root directory and persists a registry file (default:
    `tree.json`) storing logical keys mapped to relative file paths.

    Design choices / conventions:

    - `container / "file.ext"` is a file-creation helper:
        - It ensures parent directories exist.
        - Optionally auto-registers the file under key = stem ("file").

    - Directories should be created via `mkdir()` (not `/`):
        - Enforced by requiring a suffix when using `/` with a relative path.

    Args:
        root: Root directory path of the container.
        clean: If True, aggressively removes the existing root directory contents
            (files and subdirectories) before recreating it.
        infos_name: Registry filename stored at container root.
        auto_register: If True, `/` on a relative file path auto-registers the
            file under key = stem.
    """

    root: PathLike
    clean: bool = False
    infos_name: str = "tree.json"
    auto_register: bool = True

    _root: Path = field(init=False, repr=False)
    _infos_path: Path = field(init=False, repr=False)
    _files: Dict[str, str] = field(init=False, default_factory=dict, repr=False)
    _extras: Dict[str, Any] = field(init=False, default_factory=dict, repr=False)

    def __post_init__(self) -> None:
        """
        Initialize the container root and load the registry.

        If `clean=True`, recursively deletes the root directory contents, then
        recreates the root directory.
        """
        self._root = Path(self.root).expanduser().resolve()
        if self.clean:
            # replace with your clean_dir
            if self._root.exists():
                for p in sorted(self._root.glob("**/*"), reverse=True):
                    if p.is_file() or p.is_symlink():
                        p.unlink(missing_ok=True)
                    elif p.is_dir():
                        p.rmdir()
                self._root.rmdir()
        self._root.mkdir(parents=True, exist_ok=True)
        self._infos_path = self._root / self.infos_name
        self._files, self._extras = self._load_infos()

    # --- Path-like core -------------------------------------------------
    @property
    def path(self) -> Path:
        """Return the container root as a Path."""
        return self._root

    def __fspath__(self) -> str:
        """Allow using `os.fspath(container)` and passing to stdlib path APIs."""
        return os.fspath(self._root)

    def __str__(self) -> str:
        """String representation is the root path."""
        return str(self._root)

    def __repr__(self) -> str:
        """Debug representation."""
        return f"Container({self._root!s})"

    def __truediv__(self, other: PathLike) -> Path:
        """
        Build a path under the container root.

        Behavior:

            - If `other` is an absolute path: return it as-is (no registration).
            - If `other` is relative:

                * Requires a suffix (file-only policy). Use `mkdir()` for dirs.
                * Ensures parent dirs exist under container root.
                * If `auto_register=True`, registers under key = `other.stem`.

        Raises:

            RuntimeError: If a relative path without suffix is provided.
            KeyError: If auto-registration conflicts with an existing key pointing
                to a different path.
        """
        other_p = Path(other)
        if other_p.is_absolute():
            # absolute paths: return as-is, do not register
            target = other_p
        else:
            if other_p.suffix == "":
                raise RuntimeError(
                    "Use mkdir() for directories; '/' is for files only."
                )
            target = self._root / other_p

        target.parent.mkdir(parents=True, exist_ok=True)

        if self.auto_register and not other_p.is_absolute():
            self.register(other_p.stem, target.relative_to(self._root).as_posix())

        return target

    def joinpath(self, *parts: PathLike) -> Path:
        """
        Join path components under the container root.

        This behaves like `Path.joinpath` and does *not* enforce the "/" suffix
        policy nor auto-register.
        """
        # behaves like Path.joinpath, but keeps your '/' policy if you want:
        return self._root.joinpath(*map(Path, parts))

    # Delegate any unknown attribute to the underlying Path
    # IMPORTANT: keys take precedence; else Path methods/properties work.
    def __getattr__(self, name: str) -> Any:
        """
        Provide ergonomic access to registered files via attribute lookup.

        If `name` is a registered key, this returns `self.get(name)`.
        Otherwise, it forwards the attribute access to the underlying root Path.
        """
        if name in self._files:
            return self.get(name)
        return getattr(self._root, name)

    # --- Registry -------------------------------------------------------
    def _load_infos(self) -> tuple[Dict[str, str], Dict[str, Any]]:
        """
        Load the registry file if it exists.

        Expected schema:
            {"files": {"key": "relative/path.ext", ...}}

        Returns:
            A mapping of key -> relative POSIX path.

        Raises:
            ContainerInfosError: If the file cannot be read/parsed or schema invalid.
        """
        if not self._infos_path.is_file():
            return {}, {}
        try:
            data = json.loads(self._infos_path.read_text(encoding="utf-8"))
        except (OSError, json.JSONDecodeError) as e:
            raise ContainerInfosError(f"Cannot read {self._infos_path}") from e

        files = data.get("files")
        if not isinstance(files, dict):
            raise ContainerInfosError("Invalid schema: expected {'files': {key: path}}")

        out: Dict[str, str] = {}
        for k, v in files.items():
            if not isinstance(k, str) or not isinstance(v, str):
                raise ContainerInfosError(
                    "Invalid entry types: keys/values must be strings"
                )
            out[k] = v
        extras = {k: v for k, v in data.items() if k != "files"}
        return out, extras

    def save(self) -> None:
        """
        Persist the current registry to disk.

        Writes JSON to `self._infos_path` with stable ordering of keys.
        """
        payload = {"files": dict(sorted(self._files.items())), **self._extras}
        self._infos_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")

    def __enter__(self) -> "Container":
        """Context manager entry; returns self."""
        return self

    def __exit__(self, exc_type, exc, tb) -> None:
        """Context manager exit; always saves the registry."""
        self.save()

    def register(self, key: str, relpath: PathLike) -> Path:
        """
        Register a logical key to a relative path.

        Parameters:
            key: Logical name used to retrieve the file later.
            relpath: Path relative to the container root (stored as POSIX).

        Returns:
            The absolute path (under root) corresponding to the registered entry.

        Raises:
            KeyError: If `key` is already registered to a different relative path.
        """
        if "/" in key or "\\" in key:
            raise ValueError(f"Invalid registry key {key!r}: '/' not allowed")
        rel = Path(relpath).as_posix()
        if key in self._files and self._files[key] != rel:
            raise KeyError(f"{key!r} already registered as {self._files[key]!r}")
        self._files[key] = rel
        return self._root / rel

    def free(self, key: str) -> None:
        """Remove a key from the registry (no-op if missing)."""
        self._files.pop(key, None)

    def get(self, key: str) -> Path:
        """
        Resolve a registered key to an absolute path.

        Raises:
            AttributeError: If the key is not registered (to play nicely with __getattr__).
        """
        try:
            return self._root / self._files[key]
        except KeyError as e:
            raise AttributeError(key) from e

    def mkdir(
        self, relpath: PathLike, *, parents: bool = True, exist_ok: bool = True
    ) -> Path:
        """
        Create a directory under the container root.

        This is the preferred way to create directories (instead of using `/`).
        """
        p = self._root / Path(relpath)
        p.mkdir(parents=parents, exist_ok=exist_ok)
        return p

    # def tree(self, show_keys: bool = False) -> str:
    #     """
    #     Generate a visual tree representation of registered files.

    #     Parameters:
    #         show_keys: If True, leaf entries are displayed as 'logical_key -> filename'.
    #             If False, leaf entries show only the filename.

    #     Returns:
    #         A formatted tree string (built using `treelib`).
    #     """
    #     from treelib.tree import Tree

    #     tree = Tree()
    #     tree.create_node(tag=f"Container: {self._root.name}", identifier="root")

    #     # deterministic layout
    #     sorted_items = sorted(self._files.items(), key=lambda kv: kv[1])

    #     for key, relpath in sorted_items:
    #         parts = Path(relpath).parts
    #         current_id = "root"

    #         for i, part in enumerate(parts):
    #             node_id = "/".join(parts[: i + 1])  # unique id by full prefix

    #             if not tree.contains(node_id):
    #                 is_last = i == len(parts) - 1
    #                 label = f"{key} -> {part}" if (is_last and show_keys) else part
    #                 tree.create_node(tag=label, identifier=node_id, parent=current_id)

    #             current_id = node_id

    #     return str(tree.show(stdout=False))

    def get_extra(self, key: str, default: Any = None) -> Any:
        return self._extras.get(key, default)

    def set_extra(self, key: str, value: Any) -> None:
        self._extras[key] = value

path property

Return the container root as a Path.

__enter__()

Context manager entry; returns self.

Source code in gdutils/datacontainer/container.py
199
200
201
def __enter__(self) -> "Container":
    """Context manager entry; returns self."""
    return self

__exit__(exc_type, exc, tb)

Context manager exit; always saves the registry.

Source code in gdutils/datacontainer/container.py
203
204
205
def __exit__(self, exc_type, exc, tb) -> None:
    """Context manager exit; always saves the registry."""
    self.save()

__fspath__()

Allow using os.fspath(container) and passing to stdlib path APIs.

Source code in gdutils/datacontainer/container.py
83
84
85
def __fspath__(self) -> str:
    """Allow using `os.fspath(container)` and passing to stdlib path APIs."""
    return os.fspath(self._root)

__getattr__(name)

Provide ergonomic access to registered files via attribute lookup.

If name is a registered key, this returns self.get(name). Otherwise, it forwards the attribute access to the underlying root Path.

Source code in gdutils/datacontainer/container.py
144
145
146
147
148
149
150
151
152
153
def __getattr__(self, name: str) -> Any:
    """
    Provide ergonomic access to registered files via attribute lookup.

    If `name` is a registered key, this returns `self.get(name)`.
    Otherwise, it forwards the attribute access to the underlying root Path.
    """
    if name in self._files:
        return self.get(name)
    return getattr(self._root, name)

__post_init__()

Initialize the container root and load the registry.

If clean=True, recursively deletes the root directory contents, then recreates the root directory.

Source code in gdutils/datacontainer/container.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def __post_init__(self) -> None:
    """
    Initialize the container root and load the registry.

    If `clean=True`, recursively deletes the root directory contents, then
    recreates the root directory.
    """
    self._root = Path(self.root).expanduser().resolve()
    if self.clean:
        # replace with your clean_dir
        if self._root.exists():
            for p in sorted(self._root.glob("**/*"), reverse=True):
                if p.is_file() or p.is_symlink():
                    p.unlink(missing_ok=True)
                elif p.is_dir():
                    p.rmdir()
            self._root.rmdir()
    self._root.mkdir(parents=True, exist_ok=True)
    self._infos_path = self._root / self.infos_name
    self._files, self._extras = self._load_infos()

__repr__()

Debug representation.

Source code in gdutils/datacontainer/container.py
91
92
93
def __repr__(self) -> str:
    """Debug representation."""
    return f"Container({self._root!s})"

__str__()

String representation is the root path.

Source code in gdutils/datacontainer/container.py
87
88
89
def __str__(self) -> str:
    """String representation is the root path."""
    return str(self._root)

__truediv__(other)

Build a path under the container root.

Behavior:

- If `other` is an absolute path: return it as-is (no registration).
- If `other` is relative:

    * Requires a suffix (file-only policy). Use `mkdir()` for dirs.
    * Ensures parent dirs exist under container root.
    * If `auto_register=True`, registers under key = `other.stem`.

Raises:

RuntimeError: If a relative path without suffix is provided.
KeyError: If auto-registration conflicts with an existing key pointing
    to a different path.
Source code in gdutils/datacontainer/container.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def __truediv__(self, other: PathLike) -> Path:
    """
    Build a path under the container root.

    Behavior:

        - If `other` is an absolute path: return it as-is (no registration).
        - If `other` is relative:

            * Requires a suffix (file-only policy). Use `mkdir()` for dirs.
            * Ensures parent dirs exist under container root.
            * If `auto_register=True`, registers under key = `other.stem`.

    Raises:

        RuntimeError: If a relative path without suffix is provided.
        KeyError: If auto-registration conflicts with an existing key pointing
            to a different path.
    """
    other_p = Path(other)
    if other_p.is_absolute():
        # absolute paths: return as-is, do not register
        target = other_p
    else:
        if other_p.suffix == "":
            raise RuntimeError(
                "Use mkdir() for directories; '/' is for files only."
            )
        target = self._root / other_p

    target.parent.mkdir(parents=True, exist_ok=True)

    if self.auto_register and not other_p.is_absolute():
        self.register(other_p.stem, target.relative_to(self._root).as_posix())

    return target

free(key)

Remove a key from the registry (no-op if missing).

Source code in gdutils/datacontainer/container.py
229
230
231
def free(self, key: str) -> None:
    """Remove a key from the registry (no-op if missing)."""
    self._files.pop(key, None)

get(key)

Resolve a registered key to an absolute path.

Raises:

Type Description
AttributeError

If the key is not registered (to play nicely with getattr).

Source code in gdutils/datacontainer/container.py
233
234
235
236
237
238
239
240
241
242
243
def get(self, key: str) -> Path:
    """
    Resolve a registered key to an absolute path.

    Raises:
        AttributeError: If the key is not registered (to play nicely with __getattr__).
    """
    try:
        return self._root / self._files[key]
    except KeyError as e:
        raise AttributeError(key) from e

joinpath(*parts)

Join path components under the container root.

This behaves like Path.joinpath and does not enforce the "/" suffix policy nor auto-register.

Source code in gdutils/datacontainer/container.py
132
133
134
135
136
137
138
139
140
def joinpath(self, *parts: PathLike) -> Path:
    """
    Join path components under the container root.

    This behaves like `Path.joinpath` and does *not* enforce the "/" suffix
    policy nor auto-register.
    """
    # behaves like Path.joinpath, but keeps your '/' policy if you want:
    return self._root.joinpath(*map(Path, parts))

mkdir(relpath, *, parents=True, exist_ok=True)

Create a directory under the container root.

This is the preferred way to create directories (instead of using /).

Source code in gdutils/datacontainer/container.py
245
246
247
248
249
250
251
252
253
254
255
def mkdir(
    self, relpath: PathLike, *, parents: bool = True, exist_ok: bool = True
) -> Path:
    """
    Create a directory under the container root.

    This is the preferred way to create directories (instead of using `/`).
    """
    p = self._root / Path(relpath)
    p.mkdir(parents=parents, exist_ok=exist_ok)
    return p

register(key, relpath)

Register a logical key to a relative path.

Parameters:

Name Type Description Default
key str

Logical name used to retrieve the file later.

required
relpath PathLike

Path relative to the container root (stored as POSIX).

required

Returns:

Type Description
Path

The absolute path (under root) corresponding to the registered entry.

Raises:

Type Description
KeyError

If key is already registered to a different relative path.

Source code in gdutils/datacontainer/container.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def register(self, key: str, relpath: PathLike) -> Path:
    """
    Register a logical key to a relative path.

    Parameters:
        key: Logical name used to retrieve the file later.
        relpath: Path relative to the container root (stored as POSIX).

    Returns:
        The absolute path (under root) corresponding to the registered entry.

    Raises:
        KeyError: If `key` is already registered to a different relative path.
    """
    if "/" in key or "\\" in key:
        raise ValueError(f"Invalid registry key {key!r}: '/' not allowed")
    rel = Path(relpath).as_posix()
    if key in self._files and self._files[key] != rel:
        raise KeyError(f"{key!r} already registered as {self._files[key]!r}")
    self._files[key] = rel
    return self._root / rel

save()

Persist the current registry to disk.

Writes JSON to self._infos_path with stable ordering of keys.

Source code in gdutils/datacontainer/container.py
190
191
192
193
194
195
196
197
def save(self) -> None:
    """
    Persist the current registry to disk.

    Writes JSON to `self._infos_path` with stable ordering of keys.
    """
    payload = {"files": dict(sorted(self._files.items())), **self._extras}
    self._infos_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")

ContainerInfosError

Bases: RuntimeError

Raised when the container registry file cannot be read or does not match the expected schema.

Source code in gdutils/datacontainer/container.py
13
14
15
16
17
class ContainerInfosError(RuntimeError):
    """
    Raised when the container registry file cannot be read or does not match the
    expected schema.
    """