{
  "id": "004-utf8-multibyte",
  "category": "canonicalization",
  "description": "Layers containing multibyte UTF-8 content (French accents, Japanese). Verifies that canonicalization is byte-based (not code-point-based) and handles all Unicode consistently.",
  "spec_section": "LRFS v1.0 §3.3",
  "inputs": {
    "layers": {
      "llmind:description": "Bonjour — un résumé en français avec des caractères accentués: éàüñ.",
      "llmind:text": "日本語のテキスト内容も問題なく扱える。"
    }
  },
  "expected": {
    "canonical_bytes_hex": "6c6c6d696e643a6465736372697074696f6e00426f6e6a6f757220e2809420756e2072c3a973756dc3a920656e206672616ec3a761697320617665632064657320636172616374c3a872657320616363656e7475c3a9733a20c3a9c3a0c3bcc3b12e006c6c6d696e643a7465787400e697a5e69cace8aa9ee381aee38386e382ade382b9e38388e58685e5aeb9e38282e5958fe9a18ce381aae3818fe689b1e38188e3828be3808200",
    "canonical_bytes_length": 169
  }
}