Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,12 @@ def convert(
stream_info: StreamInfo,
**kwargs: Any, # Options to pass to the converter
) -> DocumentConverterResult:
file_path = stream_info.url or stream_info.local_path or stream_info.filename
file_path = (
stream_info.url
or stream_info.local_path
or stream_info.filename
or "(unknown)"
)
md_content = f"Content from the zip file `{file_path}`:\n\n"

with zipfile.ZipFile(file_stream, "r") as zipObj:
Expand Down
22 changes: 22 additions & 0 deletions packages/markitdown/tests/test_module_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import re
import shutil
import zipfile
import pytest
from unittest.mock import MagicMock

Expand Down Expand Up @@ -532,6 +533,27 @@ def test_markitdown_llm() -> None:
validate_strings(result, PPTX_TEST_STRINGS)


def test_zip_stream_no_filename_header() -> None:
"""Regression test: ZipConverter must not render the literal string 'None'
in the output header when the stream has no associated URL, local path, or
filename (e.g. when called via convert_stream() without stream_info)."""
markitdown = MarkItDown()

buf = io.BytesIO()
with zipfile.ZipFile(buf, "w") as zf:
zf.writestr("hello.txt", "Hello world")
buf.seek(0)

result = markitdown.convert_stream(
buf, stream_info=StreamInfo(mimetype="application/zip")
)
assert (
"None" not in result.markdown
), f"Header must not contain literal 'None'; got: {result.markdown[:120]!r}"
assert "Content from the zip file" in result.markdown
assert "Hello world" in result.markdown


if __name__ == "__main__":
"""Runs this file's tests from the command line."""
for test in [
Expand Down