pretty_print_json.py - chunhualiao/public-docs GitHub Wiki

#!/usr/bin/env python3
"""
Pretty-print a .json or .jsonl file to a .pretty output alongside the input.

Features:
- For .json: loads the entire file and writes a pretty, human-readable rendering.
- For .jsonl: parses each JSON object per line and writes each object as a pretty block separated by a delimiter.
- Converts literal \"\\n\" sequences inside string values into real newlines to improve readability of code blocks or multi-line text.

Note: The .pretty output is intended for human reading. It may not be valid JSON because strings will contain real newlines instead of escaped sequences.
"""

import argparse
import json
import os
import sys
from typing import Any, Iterable


SEPARATOR = "\n" + ("-" * 80) + "\n"


def pretty_json(obj: Any) -> str:
    """
    Render an object using json.dumps with indentation, then replace escaped newlines
    (\\n) inside strings with actual newlines to improve readability.
    The output is intended for human-consumption, not for machine parsing.
    """
    # Produce indented JSON-like text
    s = json.dumps(obj, indent=2, ensure_ascii=False)
    # Replace escaped newlines with actual newlines (human-readable)
    # This will cause strings in the output to span multiple lines.
    s = s.replace("\\n", "\n")
    # Also convert tabs where appropriate for readability
    s = s.replace("\\t", "\t")
    return s


def is_jsonl(path: str) -> bool:
    lower = path.lower()
    return lower.endswith(".jsonl")


def output_path(input_path: str) -> str:
    return input_path + ".pretty"


def iter_jsonl(path: str) -> Iterable[Any]:
    """
    Yield parsed JSON objects from a .jsonl file. Skips empty lines.
    """
    with open(path, "r", encoding="utf-8") as f:
        for line_no, line in enumerate(f, start=1):
            line = line.strip()
            if not line:
                continue
            try:
                yield json.loads(line)
            except json.JSONDecodeError as e:
                sys.stderr.write(f"[WARN] Skipping invalid JSON at line {line_no}: {e}\n")


def process_json_file(input_path: str, out_path: str) -> None:
    """
    Pretty print a single .json file.
    """
    with open(input_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    pretty = pretty_json(data)
    with open(out_path, "w", encoding="utf-8") as out:
        out.write(pretty)
        out.write("\n")  # Ensure trailing newline


def process_jsonl_file(input_path: str, out_path: str) -> None:
    """
    Pretty print a .jsonl file: each line is rendered as a block, separated by SEPARATOR.
    """
    objects = list(iter_jsonl(input_path))
    with open(out_path, "w", encoding="utf-8") as out:
        for i, obj in enumerate(objects):
            if i > 0:
                out.write(SEPARATOR)
            out.write(pretty_json(obj))
        out.write("\n")  # Ensure trailing newline


def main() -> int:
    parser = argparse.ArgumentParser(
        description="Pretty print .json or .jsonl to a .pretty output file for human-readable viewing."
    )
    parser.add_argument(
        "input",
        help="Path to the input .json or .jsonl file"
    )
    args = parser.parse_args()

    in_path = os.path.abspath(args.input)
    if not os.path.exists(in_path):
        sys.stderr.write(f"[ERROR] Input path not found: {in_path}\n")
        return 1

    out_path = output_path(in_path)

    try:
        if is_jsonl(in_path):
            process_jsonl_file(in_path, out_path)
        else:
            # Default: treat as .json
            process_json_file(in_path, out_path)
    except Exception as e:
        sys.stderr.write(f"[ERROR] Failed to process '{in_path}': {e}\n")
        return 2

    print(f"[INFO] Wrote pretty output: {out_path}")
    return 0


if __name__ == "__main__":
    sys.exit(main())