# SPDX-FileCopyrightText: 2022 James R. Barlow
# SPDX-License-Identifier: MPL-2.0

"""Implement some features in Python and monkey-patch them onto C++ classes.

In several cases the implementation of some higher levels features might as
well be in Python. Fortunately we can attach Python methods to C++ class
bindings after the fact.

We can also move the implementation to C++ if desired.
"""

from __future__ import annotations

import datetime
import mimetypes
import shutil
from collections.abc import KeysView, MutableMapping
from decimal import Decimal
from io import BytesIO
from pathlib import Path
from subprocess import run
from tempfile import NamedTemporaryFile
from typing import BinaryIO, Callable, ItemsView, Iterator, TypeVar, ValuesView
from warnings import warn

from . import Array, Dictionary, Name, Object, Page, Pdf, Stream
from ._augments import augment_override_cpp, augments
from ._qpdf import (
    AccessMode,
    AttachedFile,
    AttachedFileSpec,
    Attachments,
    NameTree,
    NumberTree,
    ObjectStreamMode,
    Rectangle,
    StreamDecodeLevel,
    StreamParser,
    Token,
    _ObjectMapping,
)
from .models import Encryption, EncryptionInfo, Outline, PdfMetadata, Permissions
from .models.metadata import decode_pdf_date, encode_pdf_date

# pylint: disable=no-member,unsupported-membership-test,unsubscriptable-object
# mypy: ignore-errors

__all__ = []

Numeric = TypeVar('Numeric', int, float, Decimal)


def _single_page_pdf(page) -> bytes:
    """Construct a single page PDF from the provided page in memory."""
    pdf = Pdf.new()
    pdf.pages.append(page)
    bio = BytesIO()
    pdf.save(bio)
    bio.seek(0)
    return bio.read()


def _mudraw(buffer, fmt) -> bytes:
    """Use mupdf draw to rasterize the PDF in the memory buffer."""
    # mudraw cannot read from stdin so NamedTemporaryFile is required
    with NamedTemporaryFile(suffix='.pdf') as tmp_in:
        tmp_in.write(buffer)
        tmp_in.seek(0)
        tmp_in.flush()

        proc = run(
            ['mudraw', '-F', fmt, '-o', '-', tmp_in.name],
            capture_output=True,
            check=True,
        )
        return proc.stdout


@augments(Object)
class Extend_Object:
    def _ipython_key_completions_(self):
        if isinstance(self, (Dictionary, Stream)):
            return self.keys()
        return None

    def emplace(self, other: Object, retain=(Name.Parent,)):
        """Copy all items from other without making a new object.

        Particularly when working with pages, it may be desirable to remove all
        of the existing page's contents and emplace (insert) a new page on top
        of it, in a way that preserves all links and references to the original
        page. (Or similarly, for other Dictionary objects in a PDF.)

        Any Dictionary keys in the iterable *retain* are preserved. By default,
        /Parent is retained.

        When a page is assigned (``pdf.pages[0] = new_page``), only the
        application knows if references to the original the original page are
        still valid. For example, a PDF optimizer might restructure a page
        object into another visually similar one, and references would be valid;
        but for a program that reorganizes page contents such as a N-up
        compositor, references may not be valid anymore.

        This method takes precautions to ensure that child objects in common
        with ``self`` and ``other`` are not inadvertently deleted.

        Example:
            >>> pdf.pages[0].objgen
            (16, 0)
            >>> pdf.pages[0].emplace(pdf.pages[1])
            >>> pdf.pages[0].objgen
            (16, 0)  # Same object

        .. versionchanged:: 2.11.1
            Added the *retain* argument.
        """
        if not self.same_owner_as(other):
            raise TypeError("Objects must have the same owner for emplace()")

        # .keys() returns strings, so make all strings
        retain = {str(k) for k in retain}
        self_keys = set(self.keys())
        other_keys = set(other.keys())

        assert all(isinstance(k, str) for k in (retain | self_keys | other_keys))

        del_keys = self_keys - other_keys - retain
        for k in (k for k in other_keys if k not in retain):
            self[k] = other[k]  # pylint: disable=unsupported-assignment-operation
        for k in del_keys:
            del self[k]  # pylint: disable=unsupported-delete-operation

    def _type_check_write(self, filter_, decode_parms):
        if isinstance(filter_, list):
            filter_ = Array(filter_)
        filter_ = filter_.wrap_in_array()

        if isinstance(decode_parms, list):
            decode_parms = Array(decode_parms)
        elif decode_parms is None:
            decode_parms = Array([])
        else:
            decode_parms = decode_parms.wrap_in_array()

        if not all(isinstance(item, Name) for item in filter_):
            raise TypeError(
                "filter must be: pikepdf.Name or pikepdf.Array([pikepdf.Name])"
            )
        if not all(
            (isinstance(item, Dictionary) or item is None) for item in decode_parms
        ):
            raise TypeError(
                "decode_parms must be: pikepdf.Dictionary or "
                "pikepdf.Array([pikepdf.Dictionary])"
            )
        if len(decode_parms) != 0 and len(filter_) != len(decode_parms):
            raise ValueError(
                f"filter ({repr(filter_)}) and decode_parms "
                f"({repr(decode_parms)}) must be arrays of same length"
            )
        if len(filter_) == 1:
            filter_ = filter_[0]
        if len(decode_parms) == 0:
            decode_parms = None
        elif len(decode_parms) == 1:
            decode_parms = decode_parms[0]
        return filter_, decode_parms

    def write(
        self,
        data: bytes,
        *,
        filter: Name | Array | None = None,
        decode_parms: Dictionary | Array | None = None,
        type_check: bool = True,
    ):  # pylint: disable=redefined-builtin
        """
        Replace stream object's data with new (possibly compressed) `data`.

        `filter` and `decode_parms` describe any compression that is already
        present on the input `data`. For example, if your data is already
        compressed with the Deflate algorithm, you would set
        ``filter=Name.FlateDecode``.

        When writing the PDF in :meth:`pikepdf.Pdf.save`,
        pikepdf may change the compression or apply compression to data that was
        not compressed, depending on the parameters given to that function. It
        will never change lossless to lossy encoding.

        PNG and TIFF images, even if compressed, cannot be directly inserted
        into a PDF and displayed as images.

        Args:
            data: the new data to use for replacement
            filter: The filter(s) with which the
                data is (already) encoded
            decode_parms: Parameters for the
                filters with which the object is encode
            type_check: Check arguments; use False only if you want to
                intentionally create malformed PDFs.

        If only one `filter` is specified, it may be a name such as
        `Name('/FlateDecode')`. If there are multiple filters, then array
        of names should be given.

        If there is only one filter, `decode_parms` is a Dictionary of
        parameters for that filter. If there are multiple filters, then
        `decode_parms` is an Array of Dictionary, where each array index
        is corresponds to the filter.
        """
        if type_check and filter is not None:
            filter, decode_parms = self._type_check_write(filter, decode_parms)

        self._write(data, filter=filter, decode_parms=decode_parms)


@augments(Pdf)
class Extend_Pdf:
    def _repr_mimebundle_(
        self, include=None, exclude=None
    ):  # pylint: disable=unused-argument
        """
        Present options to IPython or Jupyter for rich display of this object.

        See https://ipython.readthedocs.io/en/stable/config/integrating.html#rich-display
        """
        bio = BytesIO()
        self.save(bio)
        bio.seek(0)

        data = {'application/pdf': bio.read()}
        return data

    @property
    def docinfo(self) -> Dictionary:
        """
        Access the (deprecated) document information dictionary.

        The document information dictionary is a brief metadata record that can
        store some information about the origin of a PDF. It is deprecated and
        removed in the PDF 2.0 specification (not deprecated from the
        perspective of pikepdf). Use the ``.open_metadata()`` API instead, which
        will edit the modern (and unfortunately, more complicated) XMP metadata
        object and synchronize changes to the document information dictionary.

        This property simplifies access to the actual document information
        dictionary and ensures that it is created correctly if it needs to be
        created.

        A new, empty dictionary will be created if this property is accessed
        and dictionary does not exist. (This is to ensure that convenient code
        like ``pdf.docinfo[Name.Title] = "Title"`` will work when the dictionary
        does not exist at all.)

        You can delete the document information dictionary by deleting this property,
        ``del pdf.docinfo``. Note that accessing the property after deleting it
        will re-create with a new, empty dictionary.

        .. versionchanged: 2.4
            Added support for ``del pdf.docinfo``.
        """
        if Name.Info not in self.trailer:
            self.trailer.Info = self.make_indirect(Dictionary())
        return self.trailer.Info

    @docinfo.setter
    def docinfo(self, new_docinfo: Dictionary):
        if not new_docinfo.is_indirect:
            raise ValueError(
                "docinfo must be an indirect object - use Pdf.make_indirect"
            )
        self.trailer.Info = new_docinfo

    @docinfo.deleter
    def docinfo(self):
        if Name.Info in self.trailer:
            del self.trailer.Info

    def open_metadata(
        self,
        set_pikepdf_as_editor: bool = True,
        update_docinfo: bool = True,
        strict: bool = False,
    ) -> PdfMetadata:
        """
        Open the PDF's XMP metadata for editing.

        There is no ``.close()`` function on the metadata object, since this is
        intended to be used inside a ``with`` block only.

        For historical reasons, certain parts of PDF metadata are stored in
        two different locations and formats. This feature coordinates edits so
        that both types of metadata are updated consistently and "atomically"
        (assuming single threaded access). It operates on the ``Pdf`` in memory,
        not any file on disk. To persist metadata changes, you must still use
        ``Pdf.save()``.

        Example:
            >>> with pdf.open_metadata() as meta:
                    meta['dc:title'] = 'Set the Dublic Core Title'
                    meta['dc:description'] = 'Put the Abstract here'

        Args:
            set_pikepdf_as_editor: Automatically update the metadata ``pdf:Producer``
                to show that this version of pikepdf is the most recent software to
                modify the metadata, and ``xmp:MetadataDate`` to timestamp the update.
                Recommended, except for testing.

            update_docinfo: Update the standard fields of DocumentInfo
                (the old PDF metadata dictionary) to match the corresponding
                XMP fields. The mapping is described in
                :attr:`PdfMetadata.DOCINFO_MAPPING`. Nonstandard DocumentInfo
                fields and XMP metadata fields with no DocumentInfo equivalent
                are ignored.

            strict: If ``False`` (the default), we aggressively attempt
                to recover from any parse errors in XMP, and if that fails we
                overwrite the XMP with an empty XMP record.  If ``True``, raise
                errors when either metadata bytes are not valid and well-formed
                XMP (and thus, XML). Some trivial cases that are equivalent to
                empty or incomplete "XMP skeletons" are never treated as errors,
                and always replaced with a proper empty XMP block. Certain
                errors may be logged.
        """
        return PdfMetadata(
            self,
            pikepdf_mark=set_pikepdf_as_editor,
            sync_docinfo=update_docinfo,
            overwrite_invalid_xml=not strict,
        )

    def open_outline(self, max_depth: int = 15, strict: bool = False) -> Outline:
        """
        Open the PDF outline ("bookmarks") for editing.

        Recommend for use in a ``with`` block. Changes are committed to the
        PDF when the block exits. (The ``Pdf`` must still be opened.)

        Example:
            >>> with pdf.open_outline() as outline:
                    outline.root.insert(0, OutlineItem('Intro', 0))

        Args:
            max_depth: Maximum recursion depth of the outline to be
                imported and re-written to the document. ``0`` means only
                considering the root level, ``1`` the first-level
                sub-outline of each root element, and so on. Items beyond
                this depth will be silently ignored. Default is ``15``.
            strict: With the default behavior (set to ``False``),
                structural errors (e.g. reference loops) in the PDF document
                will only cancel processing further nodes on that particular
                level, recovering the valid parts of the document outline
                without raising an exception. When set to ``True``, any such
                error will raise an ``OutlineStructureError``, leaving the
                invalid parts in place.
                Similarly, outline objects that have been accidentally
                duplicated in the ``Outline`` container will be silently
                fixed (i.e. reproduced as new objects) or raise an
                ``OutlineStructureError``.
        """
        return Outline(self, max_depth=max_depth, strict=strict)

    def make_stream(self, data: bytes, d=None, **kwargs) -> Stream:
        """
        Create a new pikepdf.Stream object that is attached to this PDF.

        See:
            :meth:`pikepdf.Stream.__new__`

        """
        return Stream(self, data, d, **kwargs)

    def add_blank_page(
        self, *, page_size: tuple[Numeric, Numeric] = (612.0, 792.0)
    ) -> Page:
        """
        Add a blank page to this PDF.

        If pages already exist, the page will be added to the end. Pages may be
        reordered using ``Pdf.pages``.

        The caller may add content to the page by modifying its objects after creating
        it.

        Args:
            page_size (tuple): The size of the page in PDF units (1/72 inch or 0.35mm).
                Default size is set to a US Letter 8.5" x 11" page.
        """
        for dim in page_size:
            if not (3 <= dim <= 14400):
                raise ValueError('Page size must be between 3 and 14400 PDF units')

        page_dict = Dictionary(
            Type=Name.Page,
            MediaBox=Array([0, 0, page_size[0], page_size[1]]),
            Contents=self.make_stream(b''),
            Resources=Dictionary(),
        )
        page_obj = self.make_indirect(page_dict)
        self._add_page(page_obj, first=False)
        return Page(page_obj)

    def close(self) -> None:
        """
        Close a ``Pdf`` object and release resources acquired by pikepdf.

        If pikepdf opened the file handle it will close it (e.g. when opened with a file
        path). If the caller opened the file for pikepdf, the caller close the file.
        ``with`` blocks will call close when exit.

        pikepdf lazily loads data from PDFs, so some :class:`pikepdf.Object` may
        implicitly depend on the :class:`pikepdf.Pdf` being open. This is always the
        case for :class:`pikepdf.Stream` but can be true for any object. Do not close
        the `Pdf` object if you might still be accessing content from it.

        When an ``Object`` is copied from one ``Pdf`` to another, the ``Object`` is copied into
        the destination ``Pdf`` immediately, so after accessing all desired information
        from the source ``Pdf`` it may be closed.

        .. versionchanged:: 3.0
            In pikepdf 2.x, this function actually worked by resetting to a very short
            empty PDF. Code that relied on this quirk may not function correctly.
        """
        self._close()
        if getattr(self, '_tmp_stream', None):
            self._tmp_stream.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    @property
    def allow(self) -> Permissions:
        """
        Report permissions associated with this PDF.

        By default these permissions will be replicated when the PDF is
        saved. Permissions may also only be changed when a PDF is being saved,
        and are only available for encrypted PDFs. If a PDF is not encrypted,
        all operations are reported as allowed.

        pikepdf has no way of enforcing permissions.
        """
        results = {}
        for field in Permissions._fields:
            results[field] = getattr(self, '_allow_' + field)
        return Permissions(**results)

    @property
    def encryption(self) -> EncryptionInfo:
        """
        Report encryption information for this PDF.

        Encryption settings may only be changed when a PDF is saved.
        """
        return EncryptionInfo(self._encryption_data)

    def check(self) -> list[str]:
        """
        Check if PDF is well-formed.

        Similar to ``qpdf --check``.
        """

        class DiscardingParser(StreamParser):
            def __init__(self):  # pylint: disable=useless-super-delegation
                super().__init__()  # required for C++

            def handle_object(self, *_args):
                pass

            def handle_eof(self):
                pass

        problems: list[str] = []

        self._decode_all_streams_and_discard()

        discarding_parser = DiscardingParser()
        for page in self.pages:
            page.parse_contents(discarding_parser)

        for warning in self.get_warnings():
            problems.append("WARNING: " + warning)

        return problems

    def save(
        self,
        filename_or_stream: Path | str | BinaryIO | None = None,
        *,
        static_id: bool = False,
        preserve_pdfa: bool = True,
        min_version: str | tuple[str, int] = "",
        force_version: str | tuple[str, int] = "",
        fix_metadata_version: bool = True,
        compress_streams: bool = True,
        stream_decode_level: StreamDecodeLevel | None = None,
        object_stream_mode: ObjectStreamMode = ObjectStreamMode.preserve,
        normalize_content: bool = False,
        linearize: bool = False,
        qdf: bool = False,
        progress: Callable[[int], None] = None,
        encryption: Encryption | bool | None = None,
        recompress_flate: bool = False,
        deterministic_id: bool = False,
    ) -> None:
        """
        Save all modifications to this :class:`pikepdf.Pdf`.

        Args:
            filename_or_stream: Where to write the output. If a file
                exists in this location it will be overwritten.
                If the file was opened with ``allow_overwriting_input=True``,
                then it is permitted to overwrite the original file, and
                this parameter may be omitted to implicitly use the original
                filename. Otherwise, the filename may not be the same as the
                input file, as overwriting the input file would corrupt data
                since pikepdf using lazy loading.

            static_id: Indicates that the ``/ID`` metadata, normally
                calculated as a hash of certain PDF contents and metadata
                including the current time, should instead be set to a static
                value. Only use this for debugging and testing. Use
                ``deterministic_id`` if you want to get the same ``/ID`` for
                the same document contents.
            preserve_pdfa: Ensures that the file is generated in a
                manner compliant with PDF/A and other stricter variants.
                This should be True, the default, in most cases.

            min_version: Sets the minimum version of PDF
                specification that should be required. If left alone QPDF
                will decide. If a tuple, the second element is an integer, the
                extension level. If the version number is not a valid format,
                QPDF will decide what to do.
            force_version: Override the version recommend by QPDF,
                potentially creating an invalid file that does not display
                in old versions. See QPDF manual for details. If a tuple, the
                second element is an integer, the extension level.
            fix_metadata_version: If ``True`` (default) and the XMP metadata
                contains the optional PDF version field, ensure the version in
                metadata is correct. If the XMP metadata does not contain a PDF
                version field, none will be added. To ensure that the field is
                added, edit the metadata and insert a placeholder value in
                ``pdf:PDFVersion``. If XMP metadata does not exist, it will
                not be created regardless of the value of this argument.

            object_stream_mode:
                ``disable`` prevents the use of object streams.
                ``preserve`` keeps object streams from the input file.
                ``generate`` uses object streams wherever possible,
                creating the smallest files but requiring PDF 1.5+.

            compress_streams: Enables or disables the compression of
                stream objects in the PDF that are created without specifying
                any compression setting. Metadata is never compressed.
                By default this is set to ``True``, and should be except
                for debugging. Existing streams in the PDF or streams will not
                be modified. To decompress existing streams, you must set
                both ``compress_streams=False`` and ``stream_decode_level``
                to the desired decode level (e.g. ``.generalized`` will
                decompress most non-image content).

            stream_decode_level: Specifies how
                to encode stream objects. See documentation for
                :class:`pikepdf.StreamDecodeLevel`.

            recompress_flate: When disabled (the default), qpdf does not
                uncompress and recompress streams compressed with the Flate
                compression algorithm. If True, pikepdf will instruct qpdf to
                do this, which may be useful if recompressing streams to a
                higher compression level.

            normalize_content: Enables parsing and reformatting the
                content stream within PDFs. This may debugging PDFs easier.

            linearize: Enables creating linear or "fast web view",
                where the file's contents are organized sequentially so that
                a viewer can begin rendering before it has the whole file.
                As a drawback, it tends to make files larger.

            qdf: Save output QDF mode.  QDF mode is a special output
                mode in QPDF to allow editing of PDFs in a text editor. Use
                the program ``fix-qdf`` to fix convert back to a standard
                PDF.

            progress: Specify a callback function that is called
                as the PDF is written. The function will be called with an
                integer between 0-100 as the sole parameter, the progress
                percentage. This function may not access or modify the PDF
                while it is being written, or data corruption will almost
                certainly occur.

            encryption: If ``False``
                or omitted, existing encryption will be removed. If ``True``
                encryption settings are copied from the originating PDF.
                Alternately, an ``Encryption`` object may be provided that
                sets the parameters for new encryption.

            deterministic_id: Indicates that the ``/ID`` metadata, normally
                calculated as a hash of certain PDF contents and metadata
                including the current time, should instead be computed using
                only deterministic data like the file contents. At a small
                runtime cost, this enables generation of the same ``/ID`` if
                the same inputs are converted in the same way multiple times.
                Does not work for encrypted files.

        Raises:
            PdfError
            ForeignObjectError
            ValueError

        You may call ``.save()`` multiple times with different parameters
        to generate different versions of a file, and you *may* continue
        to modify the file after saving it. ``.save()`` does not modify
        the ``Pdf`` object in memory, except possibly by updating the XMP
        metadata version with ``fix_metadata_version``.

        .. note::

            :meth:`pikepdf.Pdf.remove_unreferenced_resources` before saving
            may eliminate unnecessary resources from the output file if there
            are any objects (such as images) that are referenced in a page's
            Resources dictionary but never called in the page's content stream.

        .. note::

            pikepdf can read PDFs with incremental updates, but always
            coalesces any incremental updates into a single non-incremental
            PDF file when saving.

        .. versionchanged:: 2.7
            Added *recompress_flate*.

        .. versionchanged:: 3.0
            Keyword arguments now mandatory for everything except the first
            argument.
        """
        if not filename_or_stream and getattr(self, '_original_filename', None):
            filename_or_stream = self._original_filename
        if not filename_or_stream:
            raise ValueError(
                "Cannot save to original filename because the original file was "
                "not opening using Pdf.open(..., allow_overwriting_input=True). "
                "Either specify a new destination filename/file stream or open "
                "with allow_overwriting_input=True. If this Pdf was created using "
                "Pdf.new(), you must specify a destination object since there is "
                "no original filename to save to."
            )
        self._save(
            filename_or_stream,
            static_id=static_id,
            preserve_pdfa=preserve_pdfa,
            min_version=min_version,
            force_version=force_version,
            fix_metadata_version=fix_metadata_version,
            compress_streams=compress_streams,
            stream_decode_level=stream_decode_level,
            object_stream_mode=object_stream_mode,
            normalize_content=normalize_content,
            linearize=linearize,
            qdf=qdf,
            progress=progress,
            encryption=encryption,
            samefile_check=getattr(self, '_tmp_stream', None) is None,
            recompress_flate=recompress_flate,
            deterministic_id=deterministic_id,
        )

    @staticmethod
    def open(
        filename_or_stream: Path | str | BinaryIO,
        *,
        password: str | bytes = "",
        hex_password: bool = False,
        ignore_xref_streams: bool = False,
        suppress_warnings: bool = True,
        attempt_recovery: bool = True,
        inherit_page_attributes: bool = True,
        access_mode: AccessMode = AccessMode.default,
        allow_overwriting_input: bool = False,
    ) -> Pdf:
        """
        Open an existing file at *filename_or_stream*.

        If *filename_or_stream* is path-like, the file will be opened for reading.
        The file should not be modified by another process while it is open in
        pikepdf, or undefined behavior may occur. This is because the file may be
        lazily loaded. Despite this restriction, pikepdf does not try to use any OS
        services to obtain an exclusive lock on the file. Some applications may
        want to attempt this or copy the file to a temporary location before
        editing. This behaviour changes if *allow_overwriting_input* is set: the whole
        file is then read and copied to memory, so that pikepdf can overwrite it
        when calling ``.save()``.

        When this function is called with a stream-like object, you must ensure
        that the data it returns cannot be modified, or undefined behavior will
        occur.

        Any changes to the file must be persisted by using ``.save()``.

        If *filename_or_stream* has ``.read()`` and ``.seek()`` methods, the file
        will be accessed as a readable binary stream. pikepdf will read the
        entire stream into a private buffer.

        ``.open()`` may be used in a ``with``-block; ``.close()`` will be called when
        the block exits, if applicable.

        Whenever pikepdf opens a file, it will close it. If you open the file
        for pikepdf or give it a stream-like object to read from, you must
        release that object when appropriate.

        Examples:
            >>> with Pdf.open("test.pdf") as pdf:
                    ...

            >>> pdf = Pdf.open("test.pdf", password="rosebud")

        Args:
            filename_or_stream: Filename or Python readable and seekable file
                stream of PDF to open.
            password: User or owner password to open an
                encrypted PDF. If the type of this parameter is ``str``
                it will be encoded as UTF-8. If the type is ``bytes`` it will
                be saved verbatim. Passwords are always padded or
                truncated to 32 bytes internally. Use ASCII passwords for
                maximum compatibility.
            hex_password: If True, interpret the password as a
                hex-encoded version of the exact encryption key to use, without
                performing the normal key computation. Useful in forensics.
            ignore_xref_streams: If True, ignore cross-reference
                streams. See qpdf documentation.
            suppress_warnings: If True (default), warnings are not
                printed to stderr. Use :meth:`pikepdf.Pdf.get_warnings()` to
                retrieve warnings.
            attempt_recovery: If True (default), attempt to recover
                from PDF parsing errors.
            inherit_page_attributes: If True (default), push attributes
                set on a group of pages to individual pages
            access_mode: If ``.default``, pikepdf will
                decide how to access the file. Currently, it will always
                selected stream access. To attempt memory mapping and fallback
                to stream if memory mapping failed, use ``.mmap``.  Use
                ``.mmap_only`` to require memory mapping or fail
                (this is expected to only be useful for testing). Applications
                should be prepared to handle the SIGBUS signal on POSIX in
                the event that the file is successfully mapped but later goes
                away.
            allow_overwriting_input: If True, allows calling ``.save()``
                to overwrite the input file. This is performed by loading the
                entire input file into memory at open time; this will use more
                memory and may recent performance especially when the opened
                file will not be modified.

        Raises:
            pikepdf.PasswordError: If the password failed to open the
                file.
            pikepdf.PdfError: If for other reasons we could not open
                the file.
            TypeError: If the type of ``filename_or_stream`` is not
                usable.
            FileNotFoundError: If the file was not found.

        Note:
            When *filename_or_stream* is a stream and the stream is located on a
            network, pikepdf assumes that the stream using buffering and read caches
            to achieve reasonable performance. Streams that fetch data over a network
            in response to every read or seek request, no matter how small, will
            perform poorly. It may be easier to download a PDF from network to
            temporary local storage (such as ``io.BytesIO``), manipulate it, and
            then re-upload it.

        .. versionchanged:: 3.0
            Keyword arguments now mandatory for everything except the first
            argument.
        """
        if isinstance(filename_or_stream, bytes) and filename_or_stream.startswith(
            b'%PDF-'
        ):
            warn(
                "It looks like you called with Pdf.open(data) with a bytes-like object "
                "containing a PDF. This will probably fail because this function "
                "expects a filename or opened file-like object. Instead, please use "
                "Pdf.open(BytesIO(data))."
            )

        tmp_stream, original_filename = None, False
        if allow_overwriting_input:
            try:
                Path(filename_or_stream)
            except TypeError as error:
                raise ValueError(
                    '"allow_overwriting_input=True" requires "open" first argument '
                    'to be a file path'
                ) from error
            original_filename = Path(filename_or_stream)
            with open(original_filename, 'rb') as pdf_file:
                tmp_stream = BytesIO()
                shutil.copyfileobj(pdf_file, tmp_stream)
        pdf = Pdf._open(
            tmp_stream or filename_or_stream,
            password=password,
            hex_password=hex_password,
            ignore_xref_streams=ignore_xref_streams,
            suppress_warnings=suppress_warnings,
            attempt_recovery=attempt_recovery,
            inherit_page_attributes=inherit_page_attributes,
            access_mode=access_mode,
        )
        pdf._tmp_stream = tmp_stream
        pdf._original_filename = original_filename
        return pdf


@augments(_ObjectMapping)
class Extend_ObjectMapping:
    def get(self, key, default=None) -> Object:
        try:
            return self[key]
        except KeyError:
            return default


def check_is_box(obj) -> None:
    try:
        if obj.is_rectangle:
            return
    except AttributeError:
        pass

    try:
        pdfobj = Array(obj)
        if pdfobj.is_rectangle:
            return
    except Exception as e:
        raise ValueError("object is not a rectangle") from e

    raise ValueError("object is not a rectangle")


@augments(Page)
class Extend_Page:
    @property
    def mediabox(self):
        """Return page's /MediaBox, in PDF units."""
        return self._get_mediabox(True)

    @mediabox.setter
    def mediabox(self, value):
        check_is_box(value)
        self.obj['/MediaBox'] = value

    @property
    def cropbox(self):
        """Return page's effective /CropBox, in PDF units.

        If the /CropBox is not defined, the /MediaBox is returned.
        """
        return self._get_cropbox(True, False)

    @cropbox.setter
    def cropbox(self, value):
        check_is_box(value)
        self.obj['/CropBox'] = value

    @property
    def trimbox(self):
        """Return page's effective /TrimBox, in PDF units.

        If the /TrimBox is not defined, the /CropBox is returned (and if
        /CropBox is not defined, /MediaBox is returned).
        """
        return self._get_trimbox(True, False)

    @trimbox.setter
    def trimbox(self, value):
        check_is_box(value)
        self.obj['/TrimBox'] = value

    @property
    def images(self) -> _ObjectMapping:
        """Return all regular images associated with this page.

        This method does not recurse into Form XObjects and does not
        attempt to find inline images.
        """
        return self._images

    @property
    def resources(self) -> Dictionary:
        """Return this page's resources dictionary."""
        return self.obj['/Resources']

    def add_resource(
        self,
        res: Object,
        res_type: Name,
        name: Name | None = None,
        *,
        prefix: str = '',
        replace_existing: bool = True,
    ) -> Name:
        """Add a new resource to the page's Resources dictionary.

        If the Resources dictionaries do not exist, they will be created.

        Args:
            self: The object to add to the resources dictionary.
            res: The dictionary object to insert into the resources
                dictionary.
            res_type: Should be one of the following Resource dictionary types:
                ExtGState, ColorSpace, Pattern, Shading, XObject, Font, Properties.
            name: The name of the object. If omitted, a random name will be
                generated with enough randomness to be globally unique.
            prefix: A prefix for the name of the object. Allows conveniently
                namespacing when using random names, e.g. prefix="Im" for images.
                Mutually exclusive with name parameter.
            replace_existing: If the name already exists in one of the resource
                dictionaries, remove it.

        Example:
            >>> resource_name = pdf.pages[0].add_resource(formxobj, Name.XObject)

        .. versionadded:: 2.3

        .. versionchanged:: 2.14
            If *res* does not belong to the same `Pdf` that owns this page,
            a copy of *res* is automatically created and added instead. In previous
            versions, it was necessary to change for this case manually.

        .. versionchanged:: 4.3.0
            Returns the name of the overlay in the resources dictionary instead
            of returning None.
        """
        if Name.Resources not in self.obj:
            self.obj.Resources = Dictionary()
        elif not isinstance(self.obj.Resources, Dictionary):
            raise TypeError("Page /Resources exists but is not a dictionary")
        resources = self.obj.Resources

        if res_type not in resources:
            resources[res_type] = Dictionary()

        if name is not None and prefix:
            raise ValueError("Must specify one of name= or prefix=")
        if name is None:
            name = Name.random(prefix=prefix)

        for res_dict in resources.as_dict().values():
            if not isinstance(res_dict, Dictionary):
                continue
            if name in res_dict:
                if replace_existing:
                    del res_dict[name]
                else:
                    raise ValueError(f"Name {name} already exists in page /Resources")

        resources[res_type][name] = res.with_same_owner_as(self.obj)
        return name

    def _over_underlay(
        self,
        other,
        rect: Rectangle | None,
        under: bool,
        push_stack: bool,
        shrink: bool,
        expand: bool,
    ) -> Name:
        formx = None
        if isinstance(other, Page):
            formx = other.as_form_xobject()
        elif isinstance(other, Dictionary) and other.get(Name.Type) == Name.Page:
            formx = Page(other).as_form_xobject()
        elif (
            isinstance(other, Stream)
            and other.get(Name.Type) == Name.XObject
            and other.get(Name.Subtype) == Name.Form
        ):
            formx = other

        if formx is None:
            raise TypeError(
                "other object is not something we can convert to Form XObject"
            )

        if rect is None:
            rect = Rectangle(self.trimbox)

        formx_placed_name = self.add_resource(formx, Name.XObject)
        cs = self.calc_form_xobject_placement(
            formx, formx_placed_name, rect, allow_shrink=shrink, allow_expand=expand
        )

        if push_stack:
            self.contents_add(b'q\n', prepend=True)  # prepend q
            self.contents_add(b'Q\n', prepend=False)  # i.e. append Q

        self.contents_add(cs, prepend=under)
        self.contents_coalesce()
        return formx_placed_name

    def add_overlay(
        self,
        other: Object | Page,
        rect: Rectangle | None = None,
        *,
        push_stack: bool = True,
        shrink: bool = True,
        expand: bool = True,
    ) -> Name:
        """Overlay another object on this page.

        Overlays will be drawn after all previous content, potentially drawing on top
        of existing content.

        Args:
            other: A Page or Form XObject to render as an overlay on top of this
                page.
            rect: The PDF rectangle (in PDF units) in which to draw the overlay.
                If omitted, this page's trimbox, cropbox or mediabox (in that order)
                will be used.
            push_stack: If True (default), push the graphics stack of the existing
                content stream to ensure that the overlay is rendered correctly.
                Officially PDF limits the graphics stack depth to 32. Most
                viewers will tolerate more, but excessive pushes may cause problems.
                Multiple content streams may also be coalesced into a single content
                stream where this parameter is True, since the PDF specification
                permits PDF writers to coalesce streams as they see fit.
            shrink: If True (default), allow the object to shrink to fit inside the
                rectangle. The aspect ratio will be preserved.
            expand: If True (default), allow the object to expand to fit inside the
                rectangle. The aspect ratio will be preserved.

        Returns:
            The name of the Form XObject that contains the overlay.

        .. versionadded:: 2.14

        .. versionchanged:: 4.0.0
            Added the *push_stack* parameter. Previously, this method behaved
            as if *push_stack* were False.

        .. versionchanged:: 4.2.0
            Added the *shrink* and *expand* parameters. Previously, this method
            behaved as if ``shrink=True, expand=False``.

        .. versionchanged:: 4.3.0
            Returns the name of the overlay in the resources dictionary instead
            of returning None.
        """
        return self._over_underlay(
            other,
            rect,
            under=False,
            push_stack=push_stack,
            expand=expand,
            shrink=shrink,
        )

    def add_underlay(
        self,
        other: Object | Page,
        rect: Rectangle | None = None,
        *,
        shrink: bool = True,
        expand: bool = True,
    ) -> Name:
        """Underlay another object beneath this page.

        Underlays will be drawn before all other content, so they may be overdrawn
        partially or completely.

        There is no *push_stack* parameter for this function, since adding an
        underlay can be done without manipulating the graphics stack.

        Args:
            other: A Page or Form XObject to render as an underlay underneath this
                page.
            rect: The PDF rectangle (in PDF units) in which to draw the underlay.
                If omitted, this page's trimbox, cropbox or mediabox (in that order)
                will be used.
            shrink: If True (default), allow the object to shrink to fit inside the
                rectangle. The aspect ratio will be preserved.
            expand: If True (default), allow the object to expand to fit inside the
                rectangle. The aspect ratio will be preserved.

        Returns:
            The name of the Form XObject that contains the underlay.

        .. versionadded:: 2.14

        .. versionchanged:: 4.2.0
            Added the *shrink* and *expand* parameters. Previously, this method
            behaved as if ``shrink=True, expand=False``. Fixed issue with wrong
            page rect being selected.
        """
        return self._over_underlay(
            other, rect, under=True, push_stack=False, expand=expand, shrink=shrink
        )

    def contents_add(self, contents: Stream | bytes, *, prepend: bool = False):
        """Append or prepend to an existing page's content stream.

        Args:
            contents: An existing content stream to append or prepend.
            prepend: Prepend if true, append if false (default).

        .. versionadded:: 2.14
        """
        return self._contents_add(contents, prepend=prepend)

    def __getattr__(self, name):
        return getattr(self.obj, name)

    @augment_override_cpp
    def __setattr__(self, name, value):
        if hasattr(self.__class__, name):
            object.__setattr__(self, name, value)
        else:
            setattr(self.obj, name, value)

    @augment_override_cpp
    def __delattr__(self, name):
        if hasattr(self.__class__, name):
            object.__delattr__(self, name)
        else:
            delattr(self.obj, name)

    def __getitem__(self, key):
        return self.obj[key]

    def __setitem__(self, key, value):
        self.obj[key] = value

    def __delitem__(self, key):
        del self.obj[key]

    def __contains__(self, key):
        return key in self.obj

    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default

    def emplace(self, other: Page, retain=(Name.Parent,)):
        return self.obj.emplace(other.obj, retain=retain)

    def __repr__(self):
        return (
            repr(self.obj)
            .replace('Dictionary', 'Page', 1)
            .replace('(Type="/Page")', '', 1)
        )

    def _repr_mimebundle_(self, include=None, exclude=None):
        data = {}
        bundle = {'application/pdf', 'image/png'}
        if include:
            bundle = {k for k in bundle if k in include}
        if exclude:
            bundle = {k for k in bundle if k not in exclude}
        pagedata = _single_page_pdf(self.obj)
        if 'application/pdf' in bundle:
            data['application/pdf'] = pagedata
        if 'image/png' in bundle:
            try:
                data['image/png'] = _mudraw(pagedata, 'png')
            except (FileNotFoundError, RuntimeError):
                pass
        return data


@augments(Token)
class Extend_Token:
    def __repr__(self):
        return f'pikepdf.Token({self.type_}, {self.raw_value})'


@augments(Rectangle)
class Extend_Rectangle:
    def __repr__(self):
        return f'pikepdf.Rectangle({self.llx}, {self.lly}, {self.urx}, {self.ury})'

    def __hash__(self):
        return hash((self.llx, self.lly, self.urx, self.ury))


@augments(Attachments)
class Extend_Attachments(MutableMapping):
    def __getitem__(self, k: str) -> AttachedFileSpec:
        filespec = self._get_filespec(k)
        if filespec is None:
            raise KeyError(k)
        return filespec

    def __setitem__(self, k: str, v: AttachedFileSpec) -> None:
        if not v.filename:
            v.filename = k
        return self._add_replace_filespec(k, v)

    def __delitem__(self, k: str) -> None:
        return self._remove_filespec(k)

    def __len__(self):
        return len(self._get_all_filespecs())

    def __iter__(self) -> Iterator[str]:
        yield from self._get_all_filespecs()

    def __repr__(self):
        return f"<pikepdf._qpdf.Attachments with {len(self)} attached files>"


@augments(AttachedFileSpec)
class Extend_AttachedFileSpec:
    @staticmethod
    def from_filepath(pdf: Pdf, path: Path | str, *, description: str = ''):
        """Construct a file specification from a file path.

        This function will automatically add a creation and modified date
        using the file system, and a MIME type inferred from the file's extension.

        If the data required for the attach is in memory, use
        :meth:`pikepdf.AttachedFileSpec` instead.

        Args:
            pdf: The Pdf to attach this file specification to.
            path: A file path for the file to attach to this Pdf.
            description: An optional description. May be shown to the user in
                PDF viewers.
        """
        mime, _ = mimetypes.guess_type(str(path))
        if mime is None:
            mime = ''
        if not isinstance(path, Path):
            path = Path(path)

        stat = path.stat()
        return AttachedFileSpec(
            pdf,
            path.read_bytes(),
            description=description,
            filename=str(path.name),
            mime_type=mime,
            creation_date=encode_pdf_date(
                datetime.datetime.fromtimestamp(stat.st_ctime)
            ),
            mod_date=encode_pdf_date(datetime.datetime.fromtimestamp(stat.st_mtime)),
        )

    def __repr__(self):
        if self.filename:
            return (
                f"<pikepdf._qpdf.AttachedFileSpec for {self.filename!r}, "
                f"description {self.description!r}>"
            )
        return f"<pikepdf._qpdf.AttachedFileSpec description {self.description!r}>"


@augments(AttachedFile)
class Extend_AttachedFile:
    @property
    def creation_date(self) -> datetime.datetime | None:
        if not self._creation_date:
            return None
        return decode_pdf_date(self._creation_date)

    @creation_date.setter
    def creation_date(self, value: datetime.datetime):
        self._creation_date = encode_pdf_date(value)

    @property
    def mod_date(self) -> datetime.datetime | None:
        if not self._mod_date:
            return None
        return decode_pdf_date(self._mod_date)

    @mod_date.setter
    def mod_date(self, value: datetime.datetime):
        self._mod_date = encode_pdf_date(value)

    def read_bytes(self) -> bytes:
        return self.obj.read_bytes()

    def __repr__(self):
        return (
            f'<pikepdf._qpdf.AttachedFile objid={self.obj.objgen} size={self.size} '
            f'mime_type={self.mime_type} creation_date={self.creation_date} '
            f'mod_date={self.mod_date}>'
        )


@augments(NameTree)
class Extend_NameTree:
    def keys(self):
        return KeysView(self._as_map())

    def values(self):
        return ValuesView(self._as_map())

    def items(self):
        return ItemsView(self._as_map())

    get = MutableMapping.get
    pop = MutableMapping.pop
    popitem = MutableMapping.popitem
    clear = MutableMapping.clear
    update = MutableMapping.update
    setdefault = MutableMapping.setdefault


MutableMapping.register(NameTree)


@augments(NumberTree)
class Extend_NumberTree:
    def keys(self):
        return KeysView(self._as_map())

    def values(self):
        return ValuesView(self._as_map())

    def items(self):
        return ItemsView(self._as_map())

    get = MutableMapping.get
    pop = MutableMapping.pop
    popitem = MutableMapping.popitem
    clear = MutableMapping.clear
    update = MutableMapping.update
    setdefault = MutableMapping.setdefault


MutableMapping.register(NumberTree)