Headline
CVE-2023-46250: SEC: Infinite recursion when using PdfWriter(clone_from=reader) (#2264) · py-pdf/pypdf@9b23ac3
pypdf is a free and open-source pure-python PDF library. An attacker who uses a vulnerability present in versions 3.7.0 through 3.16.4 can craft a PDF which leads to an infinite loop. This infinite loop blocks the current process and can utilize a single core of the CPU by 100%. It does not affect memory usage. That is, for example, the case when the pypdf-user manipulates an incoming malicious PDF e.g. by merging it with another PDF or by adding annotations. The issue was fixed in version 3.17.0. As a workaround, apply the patch manually by modifying pypdf/generic/_data_structures.py
.
Expand Up @@ -40,6 +40,7 @@ List, Optional, Sequence, Set, Tuple, Union, cast, Expand Down Expand Up @@ -187,14 +188,15 @@ def clone( except Exception: pass
visited: Set[Tuple[int, int]] = set() d__ = cast( "DictionaryObject", self._reference_clone(self.__class__(), pdf_dest, force_duplicate), ) if ignore_fields is None: ignore_fields = [] if len(d__.keys()) == 0: d__._clone(self, pdf_dest, force_duplicate, ignore_fields) d__._clone(self, pdf_dest, force_duplicate, ignore_fields, visited) return d__
def _clone( Expand All @@ -203,6 +205,7 @@ def _clone( pdf_dest: PdfWriterProtocol, force_duplicate: bool, ignore_fields: Optional[Sequence[Union[str, int]]], visited: Set[Tuple[int, int]], ) -> None: “"” Update the object from src. Expand Down Expand Up @@ -270,6 +273,14 @@ def _clone( cur_obj.__class__(), pdf_dest, force_duplicate ), ) # check to see if we’ve previously processed our item if clon.indirect_reference is not None: idnum = clon.indirect_reference.idnum generation = clon.indirect_reference.generation if (idnum, generation) in visited: cur_obj = None break visited.add((idnum, generation)) objs.append((cur_obj, clon)) assert prev_obj is not None prev_obj[NameObject(k)] = clon.indirect_reference Expand All @@ -282,7 +293,7 @@ def _clone( except Exception: cur_obj = None for s, c in objs: c._clone(s, pdf_dest, force_duplicate, ignore_fields) c._clone(s, pdf_dest, force_duplicate, ignore_fields, visited)
for k, v in src.items(): if k not in ignore_fields: Expand Down Expand Up @@ -798,6 +809,7 @@ def _clone( pdf_dest: PdfWriterProtocol, force_duplicate: bool, ignore_fields: Optional[Sequence[Union[str, int]]], visited: Set[Tuple[int, int]], ) -> None: “"” Update the object from src. Expand All @@ -820,7 +832,7 @@ def _clone( ) except Exception: pass super()._clone(src, pdf_dest, force_duplicate, ignore_fields) super()._clone(src, pdf_dest, force_duplicate, ignore_fields, visited)
def get_data(self) -> Union[bytes, str]: return self._data Expand Down Expand Up @@ -1048,6 +1060,7 @@ def clone( except Exception: pass
visited: Set[Tuple[int, int]] = set() d__ = cast( "ContentStream", self._reference_clone( Expand All @@ -1056,7 +1069,7 @@ def clone( ) if ignore_fields is None: ignore_fields = [] d__._clone(self, pdf_dest, force_duplicate, ignore_fields) d__._clone(self, pdf_dest, force_duplicate, ignore_fields, visited) return d__
def _clone( Expand All @@ -1065,6 +1078,7 @@ def _clone( pdf_dest: PdfWriterProtocol, force_duplicate: bool, ignore_fields: Optional[Sequence[Union[str, int]]], visited: Set[Tuple[int, int]], ) -> None: “"” Update the object from src. Expand All @@ -1081,7 +1095,7 @@ def _clone( self._operations = list(src_cs._operations) self.forced_encoding = src_cs.forced_encoding # no need to call DictionaryObjection or anything # like super(DictionaryObject,self)._clone(src, pdf_dest, force_duplicate, ignore_fields) # like super(DictionaryObject,self)._clone(src, pdf_dest, force_duplicate, ignore_fields, visited)
def _parse_content_stream(self, stream: StreamType) -> None: # 7.8.2 Content Streams Expand Down
Related news
### Impact An attacker who uses this vulnerability can craft a PDF which leads to an infinite loop. This infinite loop blocks the current process and can utilize a single core of the CPU by 100%. It does not affect memory usage. That is, for example, the case when the pypdf-user manipulates an incoming malicious PDF e.g. by merging it with another PDF or by adding annotations. ### Patches The issue was fixed with #2264 ### Workarounds If you cannot update your version of pypdf, you should modify `pypdf/generic/_data_structures.py` just like #2264 did.