diff --git a/packages/markitdown/src/markitdown/__main__.py b/packages/markitdown/src/markitdown/__main__.py
index ccb44b64b..3b4ffabff 100644
--- a/packages/markitdown/src/markitdown/__main__.py
+++ b/packages/markitdown/src/markitdown/__main__.py
@@ -3,7 +3,10 @@
 # SPDX-License-Identifier: MIT
 import argparse
 import sys
+import os
 import codecs
+import zipfile
+from datetime import datetime
 from typing import Any, Dict
 from textwrap import dedent
 from importlib.metadata import entry_points
@@ -11,6 +14,27 @@
 from ._markitdown import MarkItDown, StreamInfo, DocumentConverterResult
 
 
+def count_docx_images(filename: str) -> int:
+    """快速预检：统计 DOCX 中嵌入的图片数量"""
+    try:
+        with zipfile.ZipFile(filename) as z:
+            return len([f for f in z.namelist() if f.startswith("word/media/")])
+    except (zipfile.BadZipFile, FileNotFoundError):
+        return 0
+
+
+def ask_extract_images(image_count: int) -> bool:
+    """交互式询问是否提取图片"""
+    if not sys.stdin.isatty():
+        return False  # 非交互终端，不询问
+    print(f"\n📄 检测到文档中包含 {image_count} 张图片")
+    try:
+        answer = input("   是否提取图片到本地文件？(y/n): ").strip().lower()
+        return answer in ("y", "yes")
+    except (EOFError, KeyboardInterrupt):
+        return False
+
+
 def main():
     parser = argparse.ArgumentParser(
         description="Convert various file formats to markdown.",
@@ -138,6 +162,24 @@ def main():
         help="Keep data URIs (like base64-encoded images) in the output. By default, data URIs are truncated.",
     )
 
+    parser.add_argument(
+        "--extract-images",
+        action="store_true",
+        help="Extract embedded images from DOCX/PDF to a local directory.",
+    )
+
+    parser.add_argument(
+        "--no-extract-images",
+        action="store_true",
+        help="Do not extract images (skip interactive prompt).",
+    )
+
+    parser.add_argument(
+        "--images-dir",
+        default="images",
+        help="Base directory name for extracted images (default: images). A timestamp suffix is added.",
+    )
+
     parser.add_argument("filename", nargs="?")
     args = parser.parse_args()
 
@@ -244,25 +286,62 @@ def main():
     else:
         markitdown = MarkItDown(enable_plugins=args.use_plugins)
 
+    # --- 图片提取逻辑 ---
+    extract_images = False
+    if args.extract_images:
+        extract_images = True
+    elif args.no_extract_images:
+        extract_images = False
+    elif args.filename and args.output and args.filename.lower().endswith(".docx"):
+        count = count_docx_images(args.filename)
+        if count > 0:
+            extract_images = ask_extract_images(count)
+
+    # 构建 kwargs
+    convert_kwargs: Dict[str, Any] = {
+        "keep_data_uris": args.keep_data_uris,
+    }
+
+    if extract_images and args.output:
+        images_dir_name = _timestamped_images_dir_name(args.images_dir or "images")
+        args._actual_images_dir = images_dir_name
+        abs_images_dir = os.path.join(
+            os.path.dirname(os.path.abspath(args.output)),
+            images_dir_name,
+        )
+        os.makedirs(abs_images_dir, exist_ok=True)
+        convert_kwargs["extract_images"] = True
+        convert_kwargs["images_dir"] = abs_images_dir
+        convert_kwargs["images_rel_dir"] = images_dir_name
+        # extract_images 优先于 keep_data_uris
+        convert_kwargs["keep_data_uris"] = False
+
+    # --- 转换 ---
     if args.filename is None:
         result = markitdown.convert_stream(
             sys.stdin.buffer,
             stream_info=stream_info,
-            keep_data_uris=args.keep_data_uris,
+            **convert_kwargs,
         )
     else:
         result = markitdown.convert(
-            args.filename, stream_info=stream_info, keep_data_uris=args.keep_data_uris
+            args.filename,
+            stream_info=stream_info,
+            **convert_kwargs,
         )
 
-    _handle_output(args, result)
+    _handle_output(args, result, extract_images=extract_images)
 
 
-def _handle_output(args, result: DocumentConverterResult):
+def _handle_output(args, result: DocumentConverterResult, extract_images: bool = False):
     """Handle output to stdout or file"""
     if args.output:
         with open(args.output, "w", encoding="utf-8") as f:
             f.write(result.markdown)
+        if extract_images:
+            images_dir = getattr(args, "_actual_images_dir", args.images_dir or "images")
+            print(f"[OK] Generated {args.output}")
+            print(f"[OK] Images extracted to ./{images_dir}/")
     else:
         # Handle stdout encoding errors more gracefully
         print(
@@ -277,5 +356,10 @@ def _exit_with_error(message: str):
     sys.exit(1)
 
 
+def _timestamped_images_dir_name(base_name: str) -> str:
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return f"{base_name.rstrip(os.sep)}_{timestamp}"
+
+
 if __name__ == "__main__":
     main()
diff --git a/packages/markitdown/src/markitdown/converters/_docx_converter.py b/packages/markitdown/src/markitdown/converters/_docx_converter.py
index 3975107b1..39cb1c346 100644
--- a/packages/markitdown/src/markitdown/converters/_docx_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_docx_converter.py
@@ -1,5 +1,8 @@
 import sys
 import io
+import os
+import re
+import zipfile
 from warnings import warn
 
 from typing import BinaryIO, Any
@@ -75,9 +78,106 @@ def convert(
                 _dependency_exc_info[2]
             )
 
+        extract_images = kwargs.get("extract_images", False)
+        media_files: list[str] = []
+
+        # ① 提取阶段：从 ZIP 获取原图（按文档中出现顺序）
+        if extract_images:
+            file_stream.seek(0)
+            zip_bytes = io.BytesIO(file_stream.read())
+
+            with zipfile.ZipFile(zip_bytes) as z:
+                media_files = self._get_media_in_doc_order(z)
+
+                if media_files:
+                    images_dir = kwargs["images_dir"]
+                    os.makedirs(images_dir, exist_ok=True)
+
+                    for i, media_file in enumerate(media_files, 1):
+                        ext = os.path.splitext(media_file)[1]
+                        if ext.lower() == ".jpeg":
+                            ext = ".jpg"
+                        if not ext:
+                            data = z.read(media_file)
+                            ext = self._detect_ext(data)
+                        filename = f"image_{i}{ext}"
+                        with open(os.path.join(images_dir, filename), "wb") as f:
+                            f.write(z.read(media_file))
+
+            file_stream.seek(0)
+
+        # ② mammoth 转 HTML
         style_map = kwargs.get("style_map", None)
         pre_process_stream = pre_process_docx(file_stream)
-        return self._html_converter.convert_string(
-            mammoth.convert_to_html(pre_process_stream, style_map=style_map).value,
-            **kwargs,
-        )
+        html_value = mammoth.convert_to_html(
+            pre_process_stream, style_map=style_map
+        ).value
+
+        # ③ 替换 base64 → 相对路径
+        if extract_images and media_files:
+            images_rel = kwargs.get("images_rel_dir", "images")
+            for i, media_file in enumerate(media_files, 1):
+                ext = os.path.splitext(media_file)[1]
+                if ext.lower() == ".jpeg":
+                    ext = ".jpg"
+                filename = f"image_{i}{ext}"
+                # 替换 mammoth 生成的 data: URI 为文件路径
+                html_value = re.sub(
+                    r'<img([^>]*)src="data:image/[^"]+"',
+                    f'<img\\1src="{images_rel}/{filename}"',
+                    html_value,
+                    count=1,
+                )
+
+        # ④ HTML → Markdown
+        return self._html_converter.convert_string(html_value, **kwargs)
+
+    @staticmethod
+    def _get_media_in_doc_order(z: zipfile.ZipFile) -> list[str]:
+        """从 DOCX 的 document.xml.rels 和 document.xml 解析图片在文档中的出现顺序"""
+        from xml.etree.ElementTree import fromstring
+
+        try:
+            # 1. rels: rId -> media 路径
+            rels_xml = z.read("word/_rels/document.xml.rels")
+            rels_root = fromstring(rels_xml)
+            rid_to_media: dict[str, str] = {}
+            for rel in rels_root:
+                target = rel.get("Target", "")
+                if target.startswith("media/"):
+                    rid_to_media[rel.get("Id", "")] = target
+
+            # 2. document.xml: 按出现顺序收集 rId
+            doc_xml = z.read("word/document.xml")
+            doc_root = fromstring(doc_xml)
+            ns_a = "{http://schemas.openxmlformats.org/drawingml/2006/main}"
+            ns_r = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
+
+            ordered_media: list[str] = []
+            for blip in doc_root.iter(f"{ns_a}blip"):
+                rid = blip.get(f"{ns_r}embed")
+                if rid and rid in rid_to_media:
+                    ordered_media.append(f"word/{rid_to_media[rid]}")
+
+            return ordered_media
+        except Exception:
+            # fallback: 按文件名数字自然排序
+            raw = [f for f in z.namelist() if f.startswith("word/media/") and not f.endswith("/")]
+            return sorted(raw, key=lambda p: int("".join(c for c in os.path.basename(p) if c.isdigit()) or "0"))
+
+    @staticmethod
+    def _detect_ext(data: bytes) -> str:
+        """根据文件头 magic bytes 检测图片格式"""
+        if data[:8] == b"\x89PNG\r\n\x1a\n":
+            return ".png"
+        if data[:2] == b"\xff\xd8":
+            return ".jpg"
+        if data[:4] == b"GIF8":
+            return ".gif"
+        if data[:4] == b"RIFF" and len(data) > 12 and data[8:12] == b"WEBP":
+            return ".webp"
+        if data[:2] == b"BM":
+            return ".bmp"
+        if data[:4] == b"\x00\x00\x01\x00":
+            return ".ico"
+        return ".png"  # 默认
diff --git a/packages/markitdown/src/markitdown/converters/_pdf_converter.py b/packages/markitdown/src/markitdown/converters/_pdf_converter.py
index ffbcbd990..6ffa610f1 100644
--- a/packages/markitdown/src/markitdown/converters/_pdf_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_pdf_converter.py
@@ -1,5 +1,6 @@
 import sys
 import io
+import os
 import re
 from typing import BinaryIO, Any
 
@@ -492,6 +493,169 @@ def _extract_tables_from_words(page: Any) -> list[list[list[str]]]:
     return [table_rows]
 
 
+def _detect_image_ext(data: bytes) -> str | None:
+    """Return a file extension for common image byte signatures."""
+    if data[:8] == b"\x89PNG\r\n\x1a\n":
+        return ".png"
+    if data[:2] == b"\xff\xd8":
+        return ".jpg"
+    if data[:4] == b"GIF8":
+        return ".gif"
+    if data[:4] == b"RIFF" and len(data) > 12 and data[8:12] == b"WEBP":
+        return ".webp"
+    if data[:2] == b"BM":
+        return ".bmp"
+    return None
+
+
+def _write_pdf_image(
+    page: Any,
+    image: dict,
+    images_dir: str,
+    images_rel_dir: str,
+    image_index: int,
+) -> dict[str, Any] | None:
+    image_bytes = b""
+    ext: str | None = None
+    stream = image.get("stream")
+
+    if stream is not None and hasattr(stream, "get_data"):
+        try:
+            raw_bytes = stream.get_data()
+            raw_ext = _detect_image_ext(raw_bytes)
+            if raw_ext is not None:
+                image_bytes = raw_bytes
+                ext = raw_ext
+            else:
+                try:
+                    from PIL import Image  # type: ignore[import-not-found]
+
+                    try:
+                        pil_image = Image.open(io.BytesIO(raw_bytes))
+                    except Exception:
+                        width, height = image.get("srcsize") or (
+                            image.get("width"),
+                            image.get("height"),
+                        )
+                        colorspace = str(image.get("colorspace", "")).lower()
+                        mode = "L" if "gray" in colorspace else "RGB"
+                        pil_image = Image.frombytes(
+                            mode,
+                            (int(width), int(height)),
+                            raw_bytes,
+                        )
+
+                    with pil_image:
+                        if pil_image.mode not in ("RGB", "L", "RGBA"):
+                            pil_image = pil_image.convert("RGB")
+                        out = io.BytesIO()
+                        pil_image.save(out, format="PNG")
+                        image_bytes = out.getvalue()
+                        ext = ".png"
+                except Exception:
+                    pass
+        except Exception:
+            pass
+
+    if not image_bytes:
+        try:
+            x0 = image.get("x0", 0)
+            x1 = image.get("x1", 0)
+            top = image.get("top", 0)
+            bottom = image.get("bottom", 0)
+            if x1 <= x0 or bottom <= top:
+                return None
+
+            cropped_page = page.within_bbox((x0, top, x1, bottom))
+            page_image = cropped_page.to_image(resolution=150)
+            out = io.BytesIO()
+            page_image.original.save(out, format="PNG")
+            image_bytes = out.getvalue()
+            ext = ".png"
+        except Exception:
+            return None
+
+    if ext is None:
+        ext = ".png"
+
+    filename = f"image_{image_index}{ext}"
+    os.makedirs(images_dir, exist_ok=True)
+    with open(os.path.join(images_dir, filename), "wb") as image_file:
+        image_file.write(image_bytes)
+
+    return {
+        "top": image.get("top", 0),
+        "markdown": f"![image_{image_index}]({images_rel_dir}/{filename})",
+    }
+
+
+def _extract_text_lines_with_positions(page: Any) -> list[dict[str, Any]]:
+    words = page.extract_words(keep_blank_chars=True, x_tolerance=3, y_tolerance=3)
+    if not words:
+        text = page.extract_text()
+        if text and text.strip():
+            return [
+                {"top": float(idx), "text": line.strip()}
+                for idx, line in enumerate(text.splitlines())
+                if line.strip()
+            ]
+        return []
+
+    y_tolerance = 5
+    rows_by_y: dict[float, list[dict]] = {}
+    for word in words:
+        y_key = round(word["top"] / y_tolerance) * y_tolerance
+        rows_by_y.setdefault(y_key, []).append(word)
+
+    lines: list[dict[str, Any]] = []
+    for y_key in sorted(rows_by_y.keys()):
+        row_words = sorted(rows_by_y[y_key], key=lambda w: w["x0"])
+        text = " ".join(word["text"] for word in row_words).strip()
+        if text:
+            lines.append({"top": y_key, "text": text})
+    return lines
+
+
+def _extract_pdf_with_images(
+    pdf_bytes: io.BytesIO,
+    images_dir: str,
+    images_rel_dir: str,
+) -> str:
+    markdown_chunks: list[str] = []
+    image_index = 1
+
+    with pdfplumber.open(pdf_bytes) as pdf:
+        for page in pdf.pages:
+            items: list[dict[str, Any]] = [
+                {"top": line["top"], "markdown": line["text"]}
+                for line in _extract_text_lines_with_positions(page)
+            ]
+
+            for image in getattr(page, "images", []) or []:
+                image_item = _write_pdf_image(
+                    page,
+                    image,
+                    images_dir,
+                    images_rel_dir,
+                    image_index,
+                )
+                if image_item is not None:
+                    items.append(image_item)
+                    image_index += 1
+
+            page_markdown = "\n\n".join(
+                item["markdown"]
+                for item in sorted(items, key=lambda item: item["top"])
+                if item["markdown"].strip()
+            )
+            if page_markdown.strip():
+                markdown_chunks.append(page_markdown.strip())
+
+            page.close()
+
+    return "\n\n".join(markdown_chunks).strip()
+
+
 class PdfConverter(DocumentConverter):
     """
     Converts PDFs to Markdown.
@@ -539,6 +703,22 @@ def convert(
         # Read file stream into BytesIO for compatibility with pdfplumber
         pdf_bytes = io.BytesIO(file_stream.read())
 
+        if kwargs.get("extract_images", False):
+            images_dir = kwargs["images_dir"]
+            images_rel_dir = kwargs.get("images_rel_dir", "images")
+            try:
+                markdown = _extract_pdf_with_images(
+                    pdf_bytes,
+                    images_dir=images_dir,
+                    images_rel_dir=images_rel_dir,
+                )
+            except Exception:
+                pdf_bytes.seek(0)
+                markdown = pdfminer.high_level.extract_text(pdf_bytes)
+
+            markdown = _merge_partial_numbering_lines(markdown)
+            return DocumentConverterResult(markdown=markdown)
+
         try:
             # Single pass: check every page for form-style content.
             # Pages with tables/forms get rich extraction; plain-text
diff --git a/packages/markitdown/tests/test_files/pdf_image_middle.pdf b/packages/markitdown/tests/test_files/pdf_image_middle.pdf
new file mode 100644
index 000000000..d90bc9d3e
Binary files /dev/null and b/packages/markitdown/tests/test_files/pdf_image_middle.pdf differ
diff --git a/packages/markitdown/tests/test_module_misc.py b/packages/markitdown/tests/test_module_misc.py
index 4d62e4919..fff9ef614 100644
--- a/packages/markitdown/tests/test_module_misc.py
+++ b/packages/markitdown/tests/test_module_misc.py
@@ -3,7 +3,9 @@
 import os
 import re
 import shutil
+import subprocess
 import pytest
+import sys
 from unittest.mock import MagicMock
 
 from markitdown._uri_utils import parse_data_uri, file_uri_to_path
@@ -107,6 +109,16 @@ def validate_strings(result, expected_strings, exclude_strings=None):
             assert string not in text_content
 
 
+def _has_pdf_dependencies() -> bool:
+    try:
+        import pdfminer  # noqa: F401
+        import pdfplumber  # noqa: F401
+
+        return True
+    except ModuleNotFoundError:
+        return False
+
+
 def test_stream_info_operations() -> None:
     """Test operations performed on StreamInfo objects."""
 
@@ -220,6 +232,71 @@ def test_data_uris() -> None:
     assert data == b"Hello, World!"
 
 
+@pytest.mark.skipif(
+    not _has_pdf_dependencies(),
+    reason="PDF optional dependencies not installed",
+)
+def test_pdf_extract_images_to_markdown(tmp_path) -> None:
+    pdf_path = os.path.join(TEST_FILES_DIR, "pdf_image_middle.pdf")
+    images_dir = tmp_path / "images"
+
+    result = MarkItDown().convert(
+        pdf_path,
+        extract_images=True,
+        images_dir=str(images_dir),
+        images_rel_dir="images",
+    )
+
+    markdown = result.markdown
+    assert "Here is some introductory text." in markdown
+    assert "![image_1](images/image_1." in markdown
+    assert "Section 2: Details" in markdown
+    assert (
+        markdown.index("Here is some introductory text.")
+        < markdown.index("![image_1](images/image_1.")
+        < markdown.index("Section 2: Details")
+    )
+
+    image_files = list(images_dir.glob("image_1.*"))
+    assert len(image_files) == 1
+    assert image_files[0].stat().st_size > 0
+
+
+@pytest.mark.skipif(
+    not _has_pdf_dependencies(),
+    reason="PDF optional dependencies not installed",
+)
+def test_cli_pdf_extract_images_uses_timestamped_dir(tmp_path) -> None:
+    pdf_path = os.path.join(TEST_FILES_DIR, "pdf_image_middle.pdf")
+    output_path = tmp_path / "out.md"
+
+    result = subprocess.run(
+        [
+            sys.executable,
+            "-m",
+            "markitdown",
+            pdf_path,
+            "-o",
+            str(output_path),
+            "--extract-images",
+            "--images-dir",
+            "assets",
+        ],
+        capture_output=True,
+        text=True,
+    )
+
+    assert result.returncode == 0, result.stderr
+    markdown = output_path.read_text(encoding="utf-8")
+    image_dirs = list(tmp_path.glob("assets_*"))
+    assert len(image_dirs) == 1
+    assert image_dirs[0].is_dir()
+    assert f"![image_1]({image_dirs[0].name}/image_1." in markdown
+    image_files = list(image_dirs[0].glob("image_1.*"))
+    assert len(image_files) == 1
+    assert image_files[0].stat().st_size > 0
+
+
 def test_file_uris() -> None:
     # Test file URI with an empty host
     file_uri = "file:///path/to/file.txt"