Initial commit: DOCX to Markdown converter with improved heading level handling

2025-09-04 16:56:18 +08:00 · 2025-09-04 16:56:18 +08:00 · 043d053b12
commit 043d053b12
29 changed files with 3722 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,135 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# IDE-specific files
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# OS-specific files
+.DS_Store
+Thumbs.db
+
+# Project-specific ignores
+*.docx
+*.zip
+test_*.md
+test_*.docx
+test_multilevel.md
+test_chinese.md
+images/
--- a/README.md
+++ b/README.md
@ -0,0 +1,71 @@
+# DOCX to Markdown Converter
+
+This Python script converts DOCX files to Markdown format, preserving formatting such as headings, bold, italic, underline, strikethrough, and highlight. It also extracts images from the DOCX file and saves them in an `images` directory.
+
+## Features
+
+- Converts DOCX to Markdown format
+- Preserves text formatting (headings, bold, italic, underline, strikethrough, highlight)
+- Extracts images and saves them in an `images` directory
+- Processes tables and converts them to Markdown format
+- Command-line interface for specifying input and output files
+
+## Requirements
+
+- Python 3.x
+- python-docx library
+
+Install the required dependencies with:
+```bash
+pip install python-docx
+```
+
+## Usage
+
+```bash
+python docx_to_md.py <input.docx> [output_directory]
+```
+
+### Examples
+
+```bash
+# Convert a DOCX file to Markdown (output to current directory)
+python docx_to_md.py document.docx
+
+# Convert a DOCX file to Markdown with a specific output directory
+python docx_to_md.py document.docx /path/to/output/directory
+
+# If not specified, the output directory defaults to the current directory
+python docx_to_md.py document.docx
+```
+
+The output Markdown file will have the same name as the input DOCX file, but with a `.md` extension.
+
+## How It Works
+
+1. The script reads the DOCX file using the `python-docx` library
+2. It extracts all images from the document and saves them in an `images` subdirectory
+3. It processes paragraphs, preserving formatting:
+   - Headings are converted to Markdown headings (#, ##, ###, etc.)
+   - Bold text is wrapped in `**`
+   - Italic text is wrapped in `*`
+   - Underlined text is wrapped in `*`
+   - Strikethrough text is wrapped in `~~`
+   - Highlighted text is wrapped in `**`
+4. Tables are converted to Markdown table format
+5. The output is written to the specified Markdown file
+
+## Output Structure
+
+The script creates the following structure:
+```
+output.md          # The main Markdown file
+images/            # Directory containing extracted images
+  image_1.png
+  image_2.png
+  ...
+```
+
+## License
+
+This project is licensed under the MIT License.
--- a/analyze_outline.py
+++ b/analyze_outline.py
@ -0,0 +1,74 @@
+import docx
+import argparse
+import os
+import re
+
+def analyze_document_structure(docx_path):
+    """Analyze document structure to determine heading levels"""
+    doc = docx.Document(docx_path)
+    
+    # Collect all paragraphs with heading styles
+    heading_paragraphs = []
+    
+    for i, paragraph in enumerate(doc.paragraphs):
+        style_name = paragraph.style.name
+        
+        # Check for heading styles by name
+        if style_name.startswith('Heading') or '标题' in style_name:
+            # Extract level number from style name if possible
+            level_match = re.search(r'[标题Hh]eading\s*(\d+)|[标题標題]\s*(\d+)', style_name)
+            level = None
+            if level_match:
+                level = int(level_match.group(1) or level_match.group(2))
+            
+            heading_paragraphs.append({
+                'index': i,
+                'text': paragraph.text,
+                'style': style_name,
+                'style_level': level,
+                'indent': len(paragraph.text) - len(paragraph.text.lstrip())  # Simple indent detection
+            })
+    
+    print("Document structure analysis:")
+    print(f"Total heading paragraphs found: {len(heading_paragraphs)}")
+    
+    # Print all heading paragraphs
+    for i, heading in enumerate(heading_paragraphs):
+        print(f"{i+1:2d}. Style: '{heading['style']}', Level: {heading['style_level']}, Indent: {heading['indent']}")
+        print(f"    Text: {heading['text'][:100]}")
+    
+    # Determine actual levels based on document structure
+    print("\nAnalyzing document structure to determine actual heading levels:")
+    
+    # Simple approach: assume all headings with same style are at same level
+    # For this document, all are "标题 11" but they are clearly different levels in document structure
+    # We'll need to analyze content to determine real levels
+    
+    # Let's look at the text patterns to determine levels
+    for i, heading in enumerate(heading_paragraphs):
+        text = heading['text'].strip()
+        # Common patterns for chapter/section headings in Chinese documents
+        chapter_match = re.match(r'第[一二三四五六七八九十\d]+[章篇节]', text)
+        section_match = re.match(r'[一二三四五六七八九十\d]+[、.]', text)
+        subsection_match = re.match(r'[(（][一二三四五六七八九十\d]+[)）]', text)
+        
+        actual_level = 1  # Default to top level
+        if chapter_match:
+            actual_level = 1  # Chapter level
+        elif section_match:
+            actual_level = 2  # Section level
+        elif subsection_match:
+            actual_level = 3  # Subsection level
+            
+        print(f"{i+1:2d}. Text: {text[:30]:30s} | Style level: {heading['style_level'] or 'None':8} | Actual level: {actual_level}")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Analyze document structure in DOCX file")
+    parser.add_argument("docx_file", help="Path to the DOCX file")
+    
+    args = parser.parse_args()
+    
+    if os.path.exists(args.docx_file):
+        analyze_document_structure(args.docx_file)
+    else:
+        print(f"File {args.docx_file} not found")
--- a/create_test_doc.py
+++ b/create_test_doc.py
@ -0,0 +1,36 @@
+import docx
+from docx.shared import Pt
+from docx.enum.style import WD_STYLE_TYPE
+import os
+
+def create_chinese_test_document():
+    """Create a test document with Chinese heading styles"""
+    # Create a new document
+    doc = docx.Document()
+    
+    # Add title
+    title = doc.add_paragraph('文档标题', style='Title')
+    
+    # Add some regular text
+    doc.add_paragraph('这是文档中的一些常规文本。')
+    
+    # Try to use Chinese heading styles
+    # For this test, we'll use the default heading styles but with Chinese text
+    heading1 = doc.add_paragraph('第一章 简介', style='Heading 1')
+    
+    doc.add_paragraph('这是第一章的内容。')
+    
+    heading2 = doc.add_paragraph('1.1 背景', style='Heading 2')
+    
+    doc.add_paragraph('这是1.1节的内容。')
+    
+    heading2_2 = doc.add_paragraph('1.2 目标', style='Heading 2')
+    
+    doc.add_paragraph('这是1.2节的内容。')
+    
+    # Save the document
+    doc.save('test_chinese.docx')
+    print("Test document 'test_chinese.docx' created successfully.")
+
+if __name__ == "__main__":
+    create_chinese_test_document()
--- a/docx_to_md.py
+++ b/docx_to_md.py
@ -0,0 +1,306 @@
+import docx
+import os
+import argparse
+from docx.shared import Inches
+from docx.enum.text import WD_COLOR_INDEX
+from docx.oxml.shared import qn
+from docx.oxml import OxmlElement
+import re
+
+def get_used_outline_levels(doc):
+    """Get all outline levels that are actually used in the document paragraphs"""
+    outline_levels = set()
+    
+    for paragraph in doc.paragraphs:
+        try:
+            # Check if paragraph has outline level defined
+            if paragraph.style._element.pPr is not None and paragraph.style._element.pPr.outlineLvl is not None:
+                level = paragraph.style._element.pPr.outlineLvl.val
+                outline_levels.add(level)
+        except AttributeError:
+            pass
+    
+    return sorted(outline_levels)
+
+def get_heading_level_from_style(style_name):
+    """Extract heading level from style name, supporting both English and Chinese styles"""
+    # Check for patterns like "Heading 1", "标题 1", "标题1", etc.
+    level_match = re.search(r'[Hh]eading\s*(\d+)|[标题標題]\s*(\d+)|[标题标题]\s*(\d+)', style_name)
+    if level_match:
+        return int(level_match.group(1) or level_match.group(2) or level_match.group(3))
+    
+    # Check for patterns like "Heading1", "标题1", etc. (no space)
+    level_match = re.search(r'[Hh]eading(\d+)|[标题標題](\d+)|[标题标题](\d+)', style_name)
+    if level_match:
+        return int(level_match.group(1) or level_match.group(2) or level_match.group(3))
+    
+    return None
+
+def map_outline_levels_to_markdown_levels(outline_levels):
+    """Map document outline levels to markdown heading levels (highest = #, next = ##, etc.)"""
+    if not outline_levels:
+        return {}
+    
+    # Map outline levels to markdown levels (lowest outline level value = highest heading level)
+    # In Word, outline level 0 is the highest, level 1 is next, etc.
+    level_mapping = {}
+    for i, level in enumerate(sorted(outline_levels)):
+        level_mapping[level] = i + 1
+    
+    return level_mapping
+
+def convert_docx_to_md(docx_path, md_path):
+    """
+    Convert a DOCX file to Markdown format.
+    
+    Args:
+        docx_path (str): Path to the input DOCX file
+        md_path (str): Path to the output MD file
+    """
+    # Load the document
+    doc = docx.Document(docx_path)
+    
+    # Create directory for images if it doesn't exist
+    md_dir = os.path.dirname(md_path)
+    images_dir = os.path.join(md_dir, "images")
+    if not os.path.exists(images_dir):
+        os.makedirs(images_dir)
+    
+    md_content = []
+    image_count = 1
+    
+    # Extract all images first and create a mapping
+    image_mapping = {}
+    for rel in doc.part.rels.values():
+        if "image" in rel.target_ref:
+            image = rel.target_part.blob
+            image_filename = f"image_{image_count}.png"
+            image_path = os.path.join(images_dir, image_filename)
+            
+            with open(image_path, "wb") as f:
+                f.write(image)
+            
+            # Store the relationship ID and image filename
+            image_mapping[rel.rId] = image_filename
+            image_count += 1
+    
+    # Get outline levels that are actually used in the document and create mapping to markdown levels
+    used_outline_levels = get_used_outline_levels(doc)
+    level_mapping = map_outline_levels_to_markdown_levels(used_outline_levels)
+    
+    # Print debug information
+    print(f"Used outline levels in document: {used_outline_levels}")
+    print(f"Mapping to Markdown levels: {level_mapping}")
+    
+    # Create a more sophisticated approach to handle document structure
+    # We'll iterate through the document's XML elements to preserve order
+    
+    # Get all body elements in order
+    body_elements = doc.element.body.xpath('./*')
+    
+    # Keep track of which tables we've processed
+    processed_tables = set()
+    
+    # Process each element in order
+    table_counter = 0
+    for element in body_elements:
+        # Check if it's a paragraph
+        if element.tag.endswith('p'):
+            # Convert to paragraph object
+            para = docx.text.paragraph.Paragraph(element, doc)
+            
+            # Handle headings based on outline level or style name
+            md_heading_level = None
+            
+            # First, try to get outline level from the paragraph's style
+            try:
+                if para.style._element.pPr is not None and para.style._element.pPr.outlineLvl is not None:
+                    outline_level = para.style._element.pPr.outlineLvl.val
+                    # Map to markdown level
+                    if outline_level in level_mapping:
+                        md_heading_level = level_mapping[outline_level]
+            except AttributeError:
+                pass
+            
+            # If we can't get outline level, try to extract from style name
+            if md_heading_level is None:
+                style_level = get_heading_level_from_style(para.style.name)
+                if style_level is not None:
+                    # For style-based levels, we'll map them directly but cap at reasonable levels
+                    md_heading_level = min(style_level, 6)  # Markdown supports up to 6 levels
+            
+            if md_heading_level is not None:
+                # Convert to Markdown heading
+                md_content.append('#' * md_heading_level + ' ' + para.text + '\n')
+            else:
+                # Process runs for formatting
+                para_content = ""
+                for run in para.runs:
+                    text = run.text
+                    
+                    # Skip empty text
+                    if not text:
+                        continue
+                        
+                    # Handle bold
+                    if run.bold:
+                        text = f"**{text}**"
+                    # Handle italic
+                    if run.italic:
+                        text = f"*{text}*"
+                    # Handle underline (not standard in MD, using emphasis)
+                    if run.underline:
+                        text = f"*{text}*"
+                    # Handle strikethrough
+                    if run.font.strike:
+                        text = f"~~{text}~~"
+                    # Handle highlight (convert to bold as approximation)
+                    if run.font.highlight_color and run.font.highlight_color != WD_COLOR_INDEX.NONE:
+                        text = f"**{text}**"
+                        
+                    para_content += text
+                
+                # Check for inline images in this paragraph
+                inline_images = []
+                # Look for drawing elements in the paragraph
+                drawing_elements = para._element.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}drawing')
+                
+                for drawing in drawing_elements:
+                    # Find the blip (image) element
+                    blip_elements = drawing.findall('.//{http://schemas.openxmlformats.org/drawingml/2006/main}blip')
+                    
+                    for blip in blip_elements:
+                        # Get the embed attribute which references the image relationship
+                        rId = blip.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed')
+                        if rId and rId in image_mapping:
+                            inline_images.append(image_mapping[rId])
+                
+                # Add paragraph content
+                if para_content.strip() or inline_images:
+                    # Add the paragraph text
+                    if para_content.strip():
+                        md_content.append(para_content + '\n')
+                    
+                    # Add inline images that belong to this paragraph
+                    for image_filename in inline_images:
+                        md_content.append(f"\n![Image](images/{image_filename})\n")
+        
+        # Check if it's a table
+        elif element.tag.endswith('tbl'):
+            # Find the corresponding table object
+            for i, table in enumerate(doc.tables):
+                if i not in processed_tables and table._element.xml == element.xml:
+                    table_counter += 1
+                    md_table = convert_table_to_md(table)
+                    md_content.append(f"\n<!-- Table {table_counter} -->\n")
+                    md_content.append(md_table)
+                    processed_tables.add(i)
+                    break
+    
+    # Write to file
+    with open(md_path, "w", encoding="utf-8") as f:
+        f.write('\n'.join(md_content))
+
+def convert_table_to_md(table):
+    """
+    Convert a DOCX table to Markdown format.
+    
+    Args:
+        table: A python-docx table object
+        
+    Returns:
+        str: Markdown formatted table
+    """
+    md_table = []
+    
+    # Process all rows to find max cells per row
+    rows_data = []
+    max_cells = 0
+    
+    for row in table.rows:
+        row_data = []
+        for cell in row.cells:
+            # Clean up cell text
+            cell_text = cell.text.strip().replace('\n', '<br>')
+            row_data.append(cell_text)
+        rows_data.append(row_data)
+        max_cells = max(max_cells, len(row_data))
+    
+    # Ensure all rows have the same number of cells
+    for row_data in rows_data:
+        while len(row_data) < max_cells:
+            row_data.append("")
+    
+    # Process header row
+    if rows_data:
+        header = "| " + " | ".join(rows_data[0]) + " |"
+        md_table.append(header)
+        
+        # Add separator row
+        separator = "| " + " | ".join(["---" for _ in range(max_cells)]) + " |"
+        md_table.append(separator)
+        
+        # Process data rows
+        for row_data in rows_data[1:]:
+            row_str = "| " + " | ".join(row_data) + " |"
+            md_table.append(row_str)
+    
+    md_table.append("")  # Add blank line after table
+    return "\n".join(md_table)
+
+def extract_images_from_docx(docx_path, images_dir):
+    """
+    Extract images from a DOCX file to a specified directory.
+    
+    Args:
+        docx_path (str): Path to the DOCX file
+        images_dir (str): Directory to save images
+        
+    Returns:
+        list: List of image filenames
+    """
+    doc = docx.Document(docx_path)
+    image_filenames = []
+    
+    if not os.path.exists(images_dir):
+        os.makedirs(images_dir)
+    
+    image_count = 1
+    for rel in doc.part.rels.values():
+        if "image" in rel.target_ref:
+            image = rel.target_part.blob
+            image_filename = f"image_{image_count}.png"
+            image_path = os.path.join(images_dir, image_filename)
+            
+            with open(image_path, "wb") as f:
+                f.write(image)
+            
+            image_filenames.append(image_filename)
+            image_count += 1
+    
+    return image_filenames
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert DOCX file to Markdown format")
+    parser.add_argument("docx_file", help="Path to the input DOCX file")
+    parser.add_argument("output_dir", nargs='?', default=".", help="Output directory (default: current directory)")
+    
+    args = parser.parse_args()
+    
+    docx_file = args.docx_file
+    output_dir = args.output_dir
+    
+    # Create output directory if it doesn't exist
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    
+    # Generate MD filename based on DOCX filename
+    docx_basename = os.path.splitext(os.path.basename(docx_file))[0]
+    md_file = os.path.join(output_dir, docx_basename + ".md")
+    
+    if os.path.exists(docx_file):
+        convert_docx_to_md(docx_file, md_file)
+        print(f"Converted {docx_file} to {md_file}")
+    else:
+        print(f"File {docx_file} not found")
+        exit(1)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1 @@
+python-docx>=0.8.11
--- a/test123/testfile.md
+++ b/test123/testfile.md
--- a/testfile.md
+++ b/testfile.md
--- a/testfile2/[Content_Types].xml
+++ b/testfile2/[Content_Types].xml
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default Extension="xml" ContentType="application/xml"/><Default Extension="png" ContentType="image/png"/><Default Extension="jpeg" ContentType="image/jpeg"/><Default Extension="JPG" ContentType="image/.jpg"/><Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/><Override PartName="/customXml/itemProps1.xml" ContentType="application/vnd.openxmlformats-officedocument.customXmlProperties+xml"/><Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/><Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/><Override PartName="/docProps/custom.xml" ContentType="application/vnd.openxmlformats-officedocument.custom-properties+xml"/><Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/><Override PartName="/word/fontTable.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml"/><Override PartName="/word/footer1.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/><Override PartName="/word/header1.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/><Override PartName="/word/header2.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/><Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/><Override PartName="/word/settings.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"/><Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/><Override PartName="/word/theme/theme1.xml" ContentType="application/vnd.openxmlformats-officedocument.theme+xml"/></Types>
--- a/testfile2/_rels/.rels
+++ b/testfile2/_rels/.rels
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/><Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/><Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties" Target="docProps/custom.xml"/></Relationships>
--- a/testfile2/customXml/_rels/item1.xml.rels
+++ b/testfile2/customXml/_rels/item1.xml.rels
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXmlProps" Target="itemProps1.xml"/></Relationships>
--- a/testfile2/customXml/item1.xml
+++ b/testfile2/customXml/item1.xml
@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><b:Sources SelectedStyle="\APASixthEditionOfficeOnline.xsl" StyleName="APA" Version="6" xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography"></b:Sources>
--- a/testfile2/customXml/itemProps1.xml
+++ b/testfile2/customXml/itemProps1.xml
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<ds:datastoreItem ds:itemID="{05B1D8CB-18B6-4998-ADB3-F0506F1C54F4}" xmlns:ds="http://schemas.openxmlformats.org/officeDocument/2006/customXml"><ds:schemaRefs/></ds:datastoreItem>
--- a/testfile2/docProps/app.xml
+++ b/testfile2/docProps/app.xml
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"><Template>Normal</Template><Company>微软中国</Company><Pages>62</Pages><Words>8814</Words><Characters>9605</Characters><TotalTime>3</TotalTime><ScaleCrop>false</ScaleCrop><LinksUpToDate>false</LinksUpToDate><CharactersWithSpaces>9638</CharactersWithSpaces><Application>WPS Office_12.1.0.20305_F1E327BC-269C-435d-A152-05C5408002CA</Application><DocSecurity>0</DocSecurity></Properties>
--- a/testfile2/docProps/core.xml
+++ b/testfile2/docProps/core.xml
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcmitype="http://purl.org/dc/dcmitype/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><dcterms:created xsi:type="dcterms:W3CDTF">2024-07-19T03:11:00Z</dcterms:created><dc:creator>user</dc:creator><cp:lastModifiedBy>MaxwellRobert</cp:lastModifiedBy><dcterms:modified xsi:type="dcterms:W3CDTF">2025-04-15T02:04:50Z</dcterms:modified><dc:title>苏 州 市 政 府 采 购</dc:title><cp:revision>47</cp:revision></cp:coreProperties>
--- a/testfile2/docProps/custom.xml
+++ b/testfile2/docProps/custom.xml
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/custom-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"><property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="2" name="KSOProductBuildVer"><vt:lpwstr>2052-12.1.0.20305</vt:lpwstr></property><property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="3" name="ICV"><vt:lpwstr>81B56B0885BE464988AB3E9E30F558A6_13</vt:lpwstr></property><property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="4" name="KSOTemplateDocerSaveRecord"><vt:lpwstr>eyJoZGlkIjoiM2Q0OWNiYTJkNzViZjE2ZWQyMGEyYjc4MGIwNGM3YTkiLCJ1c2VySWQiOiIzOTQ4NDE0MjYifQ==</vt:lpwstr></property></Properties>
--- a/testfile2/word/_rels/document.xml.rels
+++ b/testfile2/word/_rels/document.xml.rels
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId9" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXml" Target="../customXml/item1.xml"/><Relationship Id="rId8" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering" Target="numbering.xml"/><Relationship Id="rId7" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="media/image2.jpeg"/><Relationship Id="rId6" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme" Target="theme/theme1.xml"/><Relationship Id="rId5" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" Target="footer1.xml"/><Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" Target="header2.xml"/><Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" Target="header1.xml"/><Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings" Target="settings.xml"/><Relationship Id="rId10" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable" Target="fontTable.xml"/><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/></Relationships>
--- a/testfile2/word/_rels/footer1.xml.rels
+++ b/testfile2/word/_rels/footer1.xml.rels
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="media/image1.png"/></Relationships>
--- a/testfile2/word/document.xml
+++ b/testfile2/word/document.xml
--- a/testfile2/word/fontTable.xml
+++ b/testfile2/word/fontTable.xml
--- a/testfile2/word/footer1.xml
+++ b/testfile2/word/footer1.xml
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<w:ftr xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" xmlns:wpsCustomData="http://www.wps.cn/officeDocument/2013/wpsCustomData" mc:Ignorable="w14 w15 wp14"><w:p w14:paraId="550D77A6"><w:pPr><w:pStyle w:val="258"/><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr></w:pPr><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:drawing><wp:inline distT="0" distB="0" distL="0" distR="0"><wp:extent cx="212725" cy="137160"/><wp:effectExtent l="0" t="0" r="0" b="0"/><wp:docPr id="1" name="图片 17"/><wp:cNvGraphicFramePr><a:graphicFrameLocks xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" noChangeAspect="1"/></wp:cNvGraphicFramePr><a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:nvPicPr><pic:cNvPr id="1" name="图片 17"/><pic:cNvPicPr><a:picLocks noChangeAspect="1"/></pic:cNvPicPr></pic:nvPicPr><pic:blipFill><a:blip r:embed="rId1"/><a:stretch><a:fillRect/></a:stretch></pic:blipFill><pic:spPr><a:xfrm><a:off x="0" y="0"/><a:ext cx="218851" cy="140835"/></a:xfrm><a:prstGeom prst="rect"><a:avLst/></a:prstGeom><a:noFill/></pic:spPr></pic:pic></a:graphicData></a:graphic></wp:inline></w:drawing></w:r><w:r><w:rPr><w:rFonts w:hint="eastAsia" w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:t xml:space="preserve">苏州诚和招投标咨询有限公司                                                                   </w:t></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:t>第</w:t></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:fldChar w:fldCharType="begin"/></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:instrText xml:space="preserve">PAGE   \* MERGEFORMAT</w:instrText></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:fldChar w:fldCharType="separate"/></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/><w:lang w:val="zh-CN"/></w:rPr><w:t>17</w:t></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:fldChar w:fldCharType="end"/></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:t>页</w:t></w:r></w:p></w:ftr>
--- a/testfile2/word/header1.xml
+++ b/testfile2/word/header1.xml
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<w:hdr xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" xmlns:wpsCustomData="http://www.wps.cn/officeDocument/2013/wpsCustomData" mc:Ignorable="w14 w15 wp14"><w:p w14:paraId="0EEADFAB"><w:pPr><w:pStyle w:val="304"/><w:jc w:val="left"/><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr></w:pPr><w:r><w:rPr><w:rFonts w:hint="eastAsia" w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:t>政府采购招标文件                                                     采购编号：</w:t></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:t>JSZC-320500-SZCH-G2025-0018</w:t></w:r></w:p></w:hdr>
--- a/testfile2/word/header2.xml
+++ b/testfile2/word/header2.xml
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<w:hdr xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" xmlns:wpsCustomData="http://www.wps.cn/officeDocument/2013/wpsCustomData" mc:Ignorable="w14 w15 wp14"><w:p w14:paraId="41E4E977"><w:pPr><w:pStyle w:val="304"/><w:pBdr><w:bottom w:val="none" w:color="000000" w:sz="0" w:space="0"/></w:pBdr><w:jc w:val="both"/></w:pPr></w:p></w:hdr>
--- a/testfile2/word/media/image1.png
+++ b/testfile2/word/media/image1.png
--- a/testfile2/word/media/image2.jpeg
+++ b/testfile2/word/media/image2.jpeg
--- a/testfile2/word/numbering.xml
+++ b/testfile2/word/numbering.xml
--- a/testfile2/word/settings.xml
+++ b/testfile2/word/settings.xml
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<w:settings xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:sl="http://schemas.openxmlformats.org/schemaLibrary/2006/main" xmlns:wpsCustomData="http://www.wps.cn/officeDocument/2013/wpsCustomData" mc:Ignorable="w14"><w:zoom w:percent="160"/><w:doNotDisplayPageBoundaries w:val="1"/><w:documentProtection w:enforcement="0"/><w:defaultTabStop w:val="425"/><w:characterSpacingControl w:val="doNotCompress"/><w:compat><w:balanceSingleByteDoubleByteWidth/><w:useFELayout/><w:compatSetting w:name="compatibilityMode" w:uri="http://schemas.microsoft.com/office/word" w:val="15"/><w:compatSetting w:name="overrideTableStyleFontSizeAndJustification" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="enableOpenTypeFeatures" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="doNotFlipMirrorIndents" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/></w:compat><w:rsids><w:rsidRoot w:val="00000000"/><w:rsid w:val="75446633"/></w:rsids><m:mathPr><m:mathFont m:val="Cambria Math"/><m:brkBin m:val="before"/><m:brkBinSub m:val="--"/><m:smallFrac m:val="0"/><m:dispDef/><m:lMargin m:val="0"/><m:rMargin m:val="0"/><m:defJc m:val="centerGroup"/><m:wrapIndent m:val="1440"/><m:intLim m:val="subSup"/><m:naryLim m:val="undOvr"/></m:mathPr><w:themeFontLang w:val="en-US" w:eastAsia="zh-CN"/><w:clrSchemeMapping w:bg1="light1" w:t1="dark1" w:bg2="light2" w:t2="dark2" w:accent1="accent1" w:accent2="accent2" w:accent3="accent3" w:accent4="accent4" w:accent5="accent5" w:accent6="accent6" w:hyperlink="hyperlink" w:followedHyperlink="followedHyperlink"/><w:shapeDefaults><o:shapedefaults fillcolor="#FFFFFF" fill="t" stroke="t"><v:fill on="t" focussize="0,0"/><v:stroke color="#000000"/></o:shapedefaults><o:shapelayout v:ext="edit"><o:idmap v:ext="edit" data="1"/></o:shapelayout></w:shapeDefaults></w:settings>
--- a/testfile2/word/styles.xml
+++ b/testfile2/word/styles.xml
--- a/testfile2/word/theme/theme1.xml
+++ b/testfile2/word/theme/theme1.xml
@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<a:theme xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" name="Office"><a:themeElements><a:clrScheme name=""><a:dk1><a:srgbClr val="000000"/></a:dk1><a:lt1><a:srgbClr val="FFFFFF"/></a:lt1><a:dk2><a:srgbClr val="1F497D"/></a:dk2><a:lt2><a:srgbClr val="EEECE1"/></a:lt2><a:accent1><a:srgbClr val="4F81BD"/></a:accent1><a:accent2><a:srgbClr val="C0504D"/></a:accent2><a:accent3><a:srgbClr val="9BBB59"/></a:accent3><a:accent4><a:srgbClr val="8064A2"/></a:accent4><a:accent5><a:srgbClr val="4BACC6"/></a:accent5><a:accent6><a:srgbClr val="F79646"/></a:accent6><a:hlink><a:srgbClr val="0000FF"/></a:hlink><a:folHlink><a:srgbClr val="800080"/></a:folHlink></a:clrScheme><a:fontScheme name=""><a:majorFont><a:latin typeface="Arial"/><a:ea typeface="黑体"/><a:cs typeface="Arial"/></a:majorFont><a:minorFont><a:latin typeface="Arial"/><a:ea typeface="宋体"/><a:cs typeface="Arial"/></a:minorFont></a:fontScheme><a:fmtScheme name=""><a:fillStyleLst><a:solidFill><a:schemeClr val="phClr"/></a:solidFill><a:solidFill><a:srgbClr val="FFFFFF"/></a:solidFill><a:solidFill><a:srgbClr val="FFFFFF"/></a:solidFill></a:fillStyleLst><a:lnStyleLst><a:ln w="9525"><a:solidFill><a:schemeClr val="phClr"><a:shade val="95000"/><a:satMod val="105000"/></a:schemeClr></a:solidFill></a:ln><a:ln w="25400"><a:solidFill><a:schemeClr val="phClr"/></a:solidFill></a:ln><a:ln w="38100"><a:solidFill><a:schemeClr val="phClr"/></a:solidFill></a:ln></a:lnStyleLst><a:effectStyleLst><a:effectStyle><a:effectLst><a:outerShdw blurRad="40000" dist="20000" dir="5400000" rotWithShape="0"><a:srgbClr val="000000"><a:alpha val="38000"/></a:srgbClr></a:outerShdw></a:effectLst></a:effectStyle><a:effectStyle><a:effectLst><a:outerShdw blurRad="40000" dist="23000" dir="5400000" rotWithShape="0"><a:srgbClr val="000000"><a:alpha val="35000"/></a:srgbClr></a:outerShdw></a:effectLst></a:effectStyle><a:effectStyle><a:effectLst><a:outerShdw blurRad="40000" dist="23000" dir="5400000" rotWithShape="0"><a:srgbClr val="000000"><a:alpha val="35000"/></a:srgbClr></a:outerShdw></a:effectLst></a:effectStyle></a:effectStyleLst><a:bgFillStyleLst><a:solidFill><a:schemeClr val="phClr"/></a:solidFill><a:solidFill><a:srgbClr val="000000"/></a:solidFill><a:solidFill><a:srgbClr val="000000"/></a:solidFill></a:bgFillStyleLst></a:fmtScheme></a:themeElements><a:objectDefaults/></a:theme>
				`@ -0,0 +1 @@`
				`<?xml version="1.0" standalone="no"?><b:Sources SelectedStyle="\APASixthEditionOfficeOnline.xsl" StyleName="APA" Version="6" xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography"></b:Sources>`