Source code for dag.codecs.dag_json
"""DAG-JSON codec - deterministic JSON with IPLD CID links.
Multicodec code: ``0x0129``
DAG-JSON is JSON with special representations for IPLD types that
JSON cannot natively express:
1. **CID links** are encoded as ``{"/": "<cid-multibase-string>"}``
2. **Bytes** are encoded as ``{"/": {"bytes": "<base64-string>"}}``
3. Map keys are sorted lexicographically (by UTF-8 bytes).
4. No whitespace between tokens.
The ``{"/": ...}`` namespace is reserved:
- ``{"/": "<string>"}`` → CID link
- ``{"/": {"bytes": "..."}}`` → bytes value
- Any other ``{"/": ...}`` is an error in strict mode.
Reference implementations:
- https://github.com/ipld/js-dag-json
- https://ipld.io/specs/codecs/dag-json/spec/
"""
from __future__ import annotations
import base64
import json
from typing import Any
from cid import make_cid
from ..codec import BlockCodec, register_codec
from ..ipld_model import CID, IPLDNode, is_cid
from ..multicodec_codes import DAG_JSON_CODE, DAG_JSON_NAME
_LINK_KEY = "/"
[docs]
class DagJsonCodec(BlockCodec):
"""DAG-JSON codec (``0x0129``).
Encodes IPLD data-model values into deterministic JSON with
CID links as ``{"/": "bafy..."}`` and bytes as
``{"/": {"bytes": "..."}}``.
"""
@property
def name(self) -> str:
return DAG_JSON_NAME
@property
def code(self) -> int:
return DAG_JSON_CODE
[docs]
def encode(self, node: IPLDNode) -> bytes:
"""Encode an IPLD value to DAG-JSON bytes.
- CIDs → ``{"/": "<cid-string>"}``
- bytes → ``{"/": {"bytes": "<base64-no-pad>"}}``
- Map keys are sorted.
- No whitespace.
"""
prepared = _prepare_for_json(node)
return json.dumps(prepared, sort_keys=True, separators=(",", ":")).encode("utf-8")
[docs]
def decode(self, data: bytes) -> IPLDNode:
"""Decode DAG-JSON bytes into an IPLD value.
Recognizes ``{"/": ...}`` sentinel objects and converts them
back to CID or bytes values.
"""
raw = json.loads(data)
return _restore_from_json(raw)
def _base64_encode_no_pad(data: bytes) -> str:
"""Base64-encode *data* without padding (``=``) characters.
DAG-JSON uses unpadded base64 for bytes representation.
"""
return base64.b64encode(data).rstrip(b"=").decode("ascii")
def _prepare_for_json(node: Any) -> Any:
"""Recursively convert IPLD values for JSON serialization."""
if is_cid(node):
return {_LINK_KEY: str(node)}
if isinstance(node, (bytes, bytearray)):
return {_LINK_KEY: {"bytes": _base64_encode_no_pad(bytes(node))}}
if isinstance(node, dict):
result = {}
for k, v in node.items():
result[k] = _prepare_for_json(v)
return result
if isinstance(node, list):
return [_prepare_for_json(item) for item in node]
return node
def _base64_decode_no_pad(s: str) -> bytes:
"""Decode an unpadded base64 string."""
padding = 4 - (len(s) % 4)
if padding != 4:
s += "=" * padding
return base64.b64decode(s)
def _restore_from_json(node: Any) -> Any:
"""Recursively restore IPLD values from parsed JSON."""
if isinstance(node, dict):
if len(node) == 1 and _LINK_KEY in node:
link_value = node[_LINK_KEY]
if isinstance(link_value, str):
return _parse_cid_string(link_value)
if isinstance(link_value, dict) and len(link_value) == 1 and "bytes" in link_value:
return _base64_decode_no_pad(link_value["bytes"])
return {k: _restore_from_json(v) for k, v in node.items()}
if isinstance(node, list):
return [_restore_from_json(item) for item in node]
return node
def _parse_cid_string(s: str) -> CID:
"""Parse a CID string into a CID object."""
return make_cid(s)
codec = DagJsonCodec()
"""Module-level singleton codec instance."""
name = codec.name
code = codec.code
encode = codec.encode
decode = codec.decode
register_codec(codec)