Initial commit: DOCX to Markdown converter with improved heading level handling

This commit is contained in:
maxwell 2025-09-04 16:56:18 +08:00
commit 043d053b12
29 changed files with 3722 additions and 0 deletions

135
.gitignore vendored Normal file
View File

@ -0,0 +1,135 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# IDE-specific files
.idea/
.vscode/
*.swp
*.swo
*~
# OS-specific files
.DS_Store
Thumbs.db
# Project-specific ignores
*.docx
*.zip
test_*.md
test_*.docx
test_multilevel.md
test_chinese.md
images/

71
README.md Normal file
View File

@ -0,0 +1,71 @@
# DOCX to Markdown Converter
This Python script converts DOCX files to Markdown format, preserving formatting such as headings, bold, italic, underline, strikethrough, and highlight. It also extracts images from the DOCX file and saves them in an `images` directory.
## Features
- Converts DOCX to Markdown format
- Preserves text formatting (headings, bold, italic, underline, strikethrough, highlight)
- Extracts images and saves them in an `images` directory
- Processes tables and converts them to Markdown format
- Command-line interface for specifying input and output files
## Requirements
- Python 3.x
- python-docx library
Install the required dependencies with:
```bash
pip install python-docx
```
## Usage
```bash
python docx_to_md.py <input.docx> [output_directory]
```
### Examples
```bash
# Convert a DOCX file to Markdown (output to current directory)
python docx_to_md.py document.docx
# Convert a DOCX file to Markdown with a specific output directory
python docx_to_md.py document.docx /path/to/output/directory
# If not specified, the output directory defaults to the current directory
python docx_to_md.py document.docx
```
The output Markdown file will have the same name as the input DOCX file, but with a `.md` extension.
## How It Works
1. The script reads the DOCX file using the `python-docx` library
2. It extracts all images from the document and saves them in an `images` subdirectory
3. It processes paragraphs, preserving formatting:
- Headings are converted to Markdown headings (#, ##, ###, etc.)
- Bold text is wrapped in `**`
- Italic text is wrapped in `*`
- Underlined text is wrapped in `*`
- Strikethrough text is wrapped in `~~`
- Highlighted text is wrapped in `**`
4. Tables are converted to Markdown table format
5. The output is written to the specified Markdown file
## Output Structure
The script creates the following structure:
```
output.md # The main Markdown file
images/ # Directory containing extracted images
image_1.png
image_2.png
...
```
## License
This project is licensed under the MIT License.

74
analyze_outline.py Normal file
View File

@ -0,0 +1,74 @@
import docx
import argparse
import os
import re
def analyze_document_structure(docx_path):
"""Analyze document structure to determine heading levels"""
doc = docx.Document(docx_path)
# Collect all paragraphs with heading styles
heading_paragraphs = []
for i, paragraph in enumerate(doc.paragraphs):
style_name = paragraph.style.name
# Check for heading styles by name
if style_name.startswith('Heading') or '标题' in style_name:
# Extract level number from style name if possible
level_match = re.search(r'[标题Hh]eading\s*(\d+)|[标题標題]\s*(\d+)', style_name)
level = None
if level_match:
level = int(level_match.group(1) or level_match.group(2))
heading_paragraphs.append({
'index': i,
'text': paragraph.text,
'style': style_name,
'style_level': level,
'indent': len(paragraph.text) - len(paragraph.text.lstrip()) # Simple indent detection
})
print("Document structure analysis:")
print(f"Total heading paragraphs found: {len(heading_paragraphs)}")
# Print all heading paragraphs
for i, heading in enumerate(heading_paragraphs):
print(f"{i+1:2d}. Style: '{heading['style']}', Level: {heading['style_level']}, Indent: {heading['indent']}")
print(f" Text: {heading['text'][:100]}")
# Determine actual levels based on document structure
print("\nAnalyzing document structure to determine actual heading levels:")
# Simple approach: assume all headings with same style are at same level
# For this document, all are "标题 11" but they are clearly different levels in document structure
# We'll need to analyze content to determine real levels
# Let's look at the text patterns to determine levels
for i, heading in enumerate(heading_paragraphs):
text = heading['text'].strip()
# Common patterns for chapter/section headings in Chinese documents
chapter_match = re.match(r'第[一二三四五六七八九十\d]+[章篇节]', text)
section_match = re.match(r'[一二三四五六七八九十\d]+[、.]', text)
subsection_match = re.match(r'[(][一二三四五六七八九十\d]+[)]', text)
actual_level = 1 # Default to top level
if chapter_match:
actual_level = 1 # Chapter level
elif section_match:
actual_level = 2 # Section level
elif subsection_match:
actual_level = 3 # Subsection level
print(f"{i+1:2d}. Text: {text[:30]:30s} | Style level: {heading['style_level'] or 'None':8} | Actual level: {actual_level}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Analyze document structure in DOCX file")
parser.add_argument("docx_file", help="Path to the DOCX file")
args = parser.parse_args()
if os.path.exists(args.docx_file):
analyze_document_structure(args.docx_file)
else:
print(f"File {args.docx_file} not found")

36
create_test_doc.py Normal file
View File

@ -0,0 +1,36 @@
import docx
from docx.shared import Pt
from docx.enum.style import WD_STYLE_TYPE
import os
def create_chinese_test_document():
"""Create a test document with Chinese heading styles"""
# Create a new document
doc = docx.Document()
# Add title
title = doc.add_paragraph('文档标题', style='Title')
# Add some regular text
doc.add_paragraph('这是文档中的一些常规文本。')
# Try to use Chinese heading styles
# For this test, we'll use the default heading styles but with Chinese text
heading1 = doc.add_paragraph('第一章 简介', style='Heading 1')
doc.add_paragraph('这是第一章的内容。')
heading2 = doc.add_paragraph('1.1 背景', style='Heading 2')
doc.add_paragraph('这是1.1节的内容。')
heading2_2 = doc.add_paragraph('1.2 目标', style='Heading 2')
doc.add_paragraph('这是1.2节的内容。')
# Save the document
doc.save('test_chinese.docx')
print("Test document 'test_chinese.docx' created successfully.")
if __name__ == "__main__":
create_chinese_test_document()

306
docx_to_md.py Normal file
View File

@ -0,0 +1,306 @@
import docx
import os
import argparse
from docx.shared import Inches
from docx.enum.text import WD_COLOR_INDEX
from docx.oxml.shared import qn
from docx.oxml import OxmlElement
import re
def get_used_outline_levels(doc):
"""Get all outline levels that are actually used in the document paragraphs"""
outline_levels = set()
for paragraph in doc.paragraphs:
try:
# Check if paragraph has outline level defined
if paragraph.style._element.pPr is not None and paragraph.style._element.pPr.outlineLvl is not None:
level = paragraph.style._element.pPr.outlineLvl.val
outline_levels.add(level)
except AttributeError:
pass
return sorted(outline_levels)
def get_heading_level_from_style(style_name):
"""Extract heading level from style name, supporting both English and Chinese styles"""
# Check for patterns like "Heading 1", "标题 1", "标题1", etc.
level_match = re.search(r'[Hh]eading\s*(\d+)|[标题標題]\s*(\d+)|[标题标题]\s*(\d+)', style_name)
if level_match:
return int(level_match.group(1) or level_match.group(2) or level_match.group(3))
# Check for patterns like "Heading1", "标题1", etc. (no space)
level_match = re.search(r'[Hh]eading(\d+)|[标题標題](\d+)|[标题标题](\d+)', style_name)
if level_match:
return int(level_match.group(1) or level_match.group(2) or level_match.group(3))
return None
def map_outline_levels_to_markdown_levels(outline_levels):
"""Map document outline levels to markdown heading levels (highest = #, next = ##, etc.)"""
if not outline_levels:
return {}
# Map outline levels to markdown levels (lowest outline level value = highest heading level)
# In Word, outline level 0 is the highest, level 1 is next, etc.
level_mapping = {}
for i, level in enumerate(sorted(outline_levels)):
level_mapping[level] = i + 1
return level_mapping
def convert_docx_to_md(docx_path, md_path):
"""
Convert a DOCX file to Markdown format.
Args:
docx_path (str): Path to the input DOCX file
md_path (str): Path to the output MD file
"""
# Load the document
doc = docx.Document(docx_path)
# Create directory for images if it doesn't exist
md_dir = os.path.dirname(md_path)
images_dir = os.path.join(md_dir, "images")
if not os.path.exists(images_dir):
os.makedirs(images_dir)
md_content = []
image_count = 1
# Extract all images first and create a mapping
image_mapping = {}
for rel in doc.part.rels.values():
if "image" in rel.target_ref:
image = rel.target_part.blob
image_filename = f"image_{image_count}.png"
image_path = os.path.join(images_dir, image_filename)
with open(image_path, "wb") as f:
f.write(image)
# Store the relationship ID and image filename
image_mapping[rel.rId] = image_filename
image_count += 1
# Get outline levels that are actually used in the document and create mapping to markdown levels
used_outline_levels = get_used_outline_levels(doc)
level_mapping = map_outline_levels_to_markdown_levels(used_outline_levels)
# Print debug information
print(f"Used outline levels in document: {used_outline_levels}")
print(f"Mapping to Markdown levels: {level_mapping}")
# Create a more sophisticated approach to handle document structure
# We'll iterate through the document's XML elements to preserve order
# Get all body elements in order
body_elements = doc.element.body.xpath('./*')
# Keep track of which tables we've processed
processed_tables = set()
# Process each element in order
table_counter = 0
for element in body_elements:
# Check if it's a paragraph
if element.tag.endswith('p'):
# Convert to paragraph object
para = docx.text.paragraph.Paragraph(element, doc)
# Handle headings based on outline level or style name
md_heading_level = None
# First, try to get outline level from the paragraph's style
try:
if para.style._element.pPr is not None and para.style._element.pPr.outlineLvl is not None:
outline_level = para.style._element.pPr.outlineLvl.val
# Map to markdown level
if outline_level in level_mapping:
md_heading_level = level_mapping[outline_level]
except AttributeError:
pass
# If we can't get outline level, try to extract from style name
if md_heading_level is None:
style_level = get_heading_level_from_style(para.style.name)
if style_level is not None:
# For style-based levels, we'll map them directly but cap at reasonable levels
md_heading_level = min(style_level, 6) # Markdown supports up to 6 levels
if md_heading_level is not None:
# Convert to Markdown heading
md_content.append('#' * md_heading_level + ' ' + para.text + '\n')
else:
# Process runs for formatting
para_content = ""
for run in para.runs:
text = run.text
# Skip empty text
if not text:
continue
# Handle bold
if run.bold:
text = f"**{text}**"
# Handle italic
if run.italic:
text = f"*{text}*"
# Handle underline (not standard in MD, using emphasis)
if run.underline:
text = f"*{text}*"
# Handle strikethrough
if run.font.strike:
text = f"~~{text}~~"
# Handle highlight (convert to bold as approximation)
if run.font.highlight_color and run.font.highlight_color != WD_COLOR_INDEX.NONE:
text = f"**{text}**"
para_content += text
# Check for inline images in this paragraph
inline_images = []
# Look for drawing elements in the paragraph
drawing_elements = para._element.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}drawing')
for drawing in drawing_elements:
# Find the blip (image) element
blip_elements = drawing.findall('.//{http://schemas.openxmlformats.org/drawingml/2006/main}blip')
for blip in blip_elements:
# Get the embed attribute which references the image relationship
rId = blip.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed')
if rId and rId in image_mapping:
inline_images.append(image_mapping[rId])
# Add paragraph content
if para_content.strip() or inline_images:
# Add the paragraph text
if para_content.strip():
md_content.append(para_content + '\n')
# Add inline images that belong to this paragraph
for image_filename in inline_images:
md_content.append(f"\n![Image](images/{image_filename})\n")
# Check if it's a table
elif element.tag.endswith('tbl'):
# Find the corresponding table object
for i, table in enumerate(doc.tables):
if i not in processed_tables and table._element.xml == element.xml:
table_counter += 1
md_table = convert_table_to_md(table)
md_content.append(f"\n<!-- Table {table_counter} -->\n")
md_content.append(md_table)
processed_tables.add(i)
break
# Write to file
with open(md_path, "w", encoding="utf-8") as f:
f.write('\n'.join(md_content))
def convert_table_to_md(table):
"""
Convert a DOCX table to Markdown format.
Args:
table: A python-docx table object
Returns:
str: Markdown formatted table
"""
md_table = []
# Process all rows to find max cells per row
rows_data = []
max_cells = 0
for row in table.rows:
row_data = []
for cell in row.cells:
# Clean up cell text
cell_text = cell.text.strip().replace('\n', '<br>')
row_data.append(cell_text)
rows_data.append(row_data)
max_cells = max(max_cells, len(row_data))
# Ensure all rows have the same number of cells
for row_data in rows_data:
while len(row_data) < max_cells:
row_data.append("")
# Process header row
if rows_data:
header = "| " + " | ".join(rows_data[0]) + " |"
md_table.append(header)
# Add separator row
separator = "| " + " | ".join(["---" for _ in range(max_cells)]) + " |"
md_table.append(separator)
# Process data rows
for row_data in rows_data[1:]:
row_str = "| " + " | ".join(row_data) + " |"
md_table.append(row_str)
md_table.append("") # Add blank line after table
return "\n".join(md_table)
def extract_images_from_docx(docx_path, images_dir):
"""
Extract images from a DOCX file to a specified directory.
Args:
docx_path (str): Path to the DOCX file
images_dir (str): Directory to save images
Returns:
list: List of image filenames
"""
doc = docx.Document(docx_path)
image_filenames = []
if not os.path.exists(images_dir):
os.makedirs(images_dir)
image_count = 1
for rel in doc.part.rels.values():
if "image" in rel.target_ref:
image = rel.target_part.blob
image_filename = f"image_{image_count}.png"
image_path = os.path.join(images_dir, image_filename)
with open(image_path, "wb") as f:
f.write(image)
image_filenames.append(image_filename)
image_count += 1
return image_filenames
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert DOCX file to Markdown format")
parser.add_argument("docx_file", help="Path to the input DOCX file")
parser.add_argument("output_dir", nargs='?', default=".", help="Output directory (default: current directory)")
args = parser.parse_args()
docx_file = args.docx_file
output_dir = args.output_dir
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Generate MD filename based on DOCX filename
docx_basename = os.path.splitext(os.path.basename(docx_file))[0]
md_file = os.path.join(output_dir, docx_basename + ".md")
if os.path.exists(docx_file):
convert_docx_to_md(docx_file, md_file)
print(f"Converted {docx_file} to {md_file}")
else:
print(f"File {docx_file} not found")
exit(1)

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
python-docx>=0.8.11

1531
test123/testfile.md Normal file

File diff suppressed because it is too large Load Diff

1531
testfile.md Normal file

File diff suppressed because it is too large Load Diff

2
testfile2/[Content_Types].xml Executable file
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default Extension="xml" ContentType="application/xml"/><Default Extension="png" ContentType="image/png"/><Default Extension="jpeg" ContentType="image/jpeg"/><Default Extension="JPG" ContentType="image/.jpg"/><Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/><Override PartName="/customXml/itemProps1.xml" ContentType="application/vnd.openxmlformats-officedocument.customXmlProperties+xml"/><Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/><Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/><Override PartName="/docProps/custom.xml" ContentType="application/vnd.openxmlformats-officedocument.custom-properties+xml"/><Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/><Override PartName="/word/fontTable.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml"/><Override PartName="/word/footer1.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/><Override PartName="/word/header1.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/><Override PartName="/word/header2.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/><Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/><Override PartName="/word/settings.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"/><Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/><Override PartName="/word/theme/theme1.xml" ContentType="application/vnd.openxmlformats-officedocument.theme+xml"/></Types>

2
testfile2/_rels/.rels Executable file
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/><Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/><Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties" Target="docProps/custom.xml"/></Relationships>

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXmlProps" Target="itemProps1.xml"/></Relationships>

1
testfile2/customXml/item1.xml Executable file
View File

@ -0,0 +1 @@
<?xml version="1.0" standalone="no"?><b:Sources SelectedStyle="\APASixthEditionOfficeOnline.xsl" StyleName="APA" Version="6" xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography"></b:Sources>

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<ds:datastoreItem ds:itemID="{05B1D8CB-18B6-4998-ADB3-F0506F1C54F4}" xmlns:ds="http://schemas.openxmlformats.org/officeDocument/2006/customXml"><ds:schemaRefs/></ds:datastoreItem>

2
testfile2/docProps/app.xml Executable file
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"><Template>Normal</Template><Company>微软中国</Company><Pages>62</Pages><Words>8814</Words><Characters>9605</Characters><TotalTime>3</TotalTime><ScaleCrop>false</ScaleCrop><LinksUpToDate>false</LinksUpToDate><CharactersWithSpaces>9638</CharactersWithSpaces><Application>WPS Office_12.1.0.20305_F1E327BC-269C-435d-A152-05C5408002CA</Application><DocSecurity>0</DocSecurity></Properties>

2
testfile2/docProps/core.xml Executable file
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dcmitype="http://purl.org/dc/dcmitype/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><dcterms:created xsi:type="dcterms:W3CDTF">2024-07-19T03:11:00Z</dcterms:created><dc:creator>user</dc:creator><cp:lastModifiedBy>MaxwellRobert</cp:lastModifiedBy><dcterms:modified xsi:type="dcterms:W3CDTF">2025-04-15T02:04:50Z</dcterms:modified><dc:title>苏 州 市 政 府 采 购</dc:title><cp:revision>47</cp:revision></cp:coreProperties>

2
testfile2/docProps/custom.xml Executable file
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/custom-properties" xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"><property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="2" name="KSOProductBuildVer"><vt:lpwstr>2052-12.1.0.20305</vt:lpwstr></property><property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="3" name="ICV"><vt:lpwstr>81B56B0885BE464988AB3E9E30F558A6_13</vt:lpwstr></property><property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="4" name="KSOTemplateDocerSaveRecord"><vt:lpwstr>eyJoZGlkIjoiM2Q0OWNiYTJkNzViZjE2ZWQyMGEyYjc4MGIwNGM3YTkiLCJ1c2VySWQiOiIzOTQ4NDE0MjYifQ==</vt:lpwstr></property></Properties>

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId9" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXml" Target="../customXml/item1.xml"/><Relationship Id="rId8" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering" Target="numbering.xml"/><Relationship Id="rId7" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="media/image2.jpeg"/><Relationship Id="rId6" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme" Target="theme/theme1.xml"/><Relationship Id="rId5" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" Target="footer1.xml"/><Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" Target="header2.xml"/><Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" Target="header1.xml"/><Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings" Target="settings.xml"/><Relationship Id="rId10" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable" Target="fontTable.xml"/><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/></Relationships>

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="media/image1.png"/></Relationships>

2
testfile2/word/document.xml Executable file

File diff suppressed because one or more lines are too long

2
testfile2/word/fontTable.xml Executable file

File diff suppressed because one or more lines are too long

2
testfile2/word/footer1.xml Executable file
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:ftr xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" xmlns:wpsCustomData="http://www.wps.cn/officeDocument/2013/wpsCustomData" mc:Ignorable="w14 w15 wp14"><w:p w14:paraId="550D77A6"><w:pPr><w:pStyle w:val="258"/><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr></w:pPr><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:drawing><wp:inline distT="0" distB="0" distL="0" distR="0"><wp:extent cx="212725" cy="137160"/><wp:effectExtent l="0" t="0" r="0" b="0"/><wp:docPr id="1" name="图片 17"/><wp:cNvGraphicFramePr><a:graphicFrameLocks xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" noChangeAspect="1"/></wp:cNvGraphicFramePr><a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:nvPicPr><pic:cNvPr id="1" name="图片 17"/><pic:cNvPicPr><a:picLocks noChangeAspect="1"/></pic:cNvPicPr></pic:nvPicPr><pic:blipFill><a:blip r:embed="rId1"/><a:stretch><a:fillRect/></a:stretch></pic:blipFill><pic:spPr><a:xfrm><a:off x="0" y="0"/><a:ext cx="218851" cy="140835"/></a:xfrm><a:prstGeom prst="rect"><a:avLst/></a:prstGeom><a:noFill/></pic:spPr></pic:pic></a:graphicData></a:graphic></wp:inline></w:drawing></w:r><w:r><w:rPr><w:rFonts w:hint="eastAsia" w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:t xml:space="preserve">苏州诚和招投标咨询有限公司 </w:t></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:t></w:t></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:fldChar w:fldCharType="begin"/></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:instrText xml:space="preserve">PAGE \* MERGEFORMAT</w:instrText></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:fldChar w:fldCharType="separate"/></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/><w:lang w:val="zh-CN"/></w:rPr><w:t>17</w:t></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:fldChar w:fldCharType="end"/></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:t></w:t></w:r></w:p></w:ftr>

2
testfile2/word/header1.xml Executable file
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:hdr xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" xmlns:wpsCustomData="http://www.wps.cn/officeDocument/2013/wpsCustomData" mc:Ignorable="w14 w15 wp14"><w:p w14:paraId="0EEADFAB"><w:pPr><w:pStyle w:val="304"/><w:jc w:val="left"/><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr></w:pPr><w:r><w:rPr><w:rFonts w:hint="eastAsia" w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:t>政府采购招标文件 采购编号:</w:t></w:r><w:r><w:rPr><w:rFonts w:ascii="宋体" w:hAnsi="宋体" w:eastAsia="宋体"/></w:rPr><w:t>JSZC-320500-SZCH-G2025-0018</w:t></w:r></w:p></w:hdr>

2
testfile2/word/header2.xml Executable file
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:hdr xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" xmlns:wpsCustomData="http://www.wps.cn/officeDocument/2013/wpsCustomData" mc:Ignorable="w14 w15 wp14"><w:p w14:paraId="41E4E977"><w:pPr><w:pStyle w:val="304"/><w:pBdr><w:bottom w:val="none" w:color="000000" w:sz="0" w:space="0"/></w:pBdr><w:jc w:val="both"/></w:pPr></w:p></w:hdr>

BIN
testfile2/word/media/image1.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

BIN
testfile2/word/media/image2.jpeg Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

2
testfile2/word/numbering.xml Executable file

File diff suppressed because one or more lines are too long

2
testfile2/word/settings.xml Executable file
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:settings xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:sl="http://schemas.openxmlformats.org/schemaLibrary/2006/main" xmlns:wpsCustomData="http://www.wps.cn/officeDocument/2013/wpsCustomData" mc:Ignorable="w14"><w:zoom w:percent="160"/><w:doNotDisplayPageBoundaries w:val="1"/><w:documentProtection w:enforcement="0"/><w:defaultTabStop w:val="425"/><w:characterSpacingControl w:val="doNotCompress"/><w:compat><w:balanceSingleByteDoubleByteWidth/><w:useFELayout/><w:compatSetting w:name="compatibilityMode" w:uri="http://schemas.microsoft.com/office/word" w:val="15"/><w:compatSetting w:name="overrideTableStyleFontSizeAndJustification" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="enableOpenTypeFeatures" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/><w:compatSetting w:name="doNotFlipMirrorIndents" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/></w:compat><w:rsids><w:rsidRoot w:val="00000000"/><w:rsid w:val="75446633"/></w:rsids><m:mathPr><m:mathFont m:val="Cambria Math"/><m:brkBin m:val="before"/><m:brkBinSub m:val="--"/><m:smallFrac m:val="0"/><m:dispDef/><m:lMargin m:val="0"/><m:rMargin m:val="0"/><m:defJc m:val="centerGroup"/><m:wrapIndent m:val="1440"/><m:intLim m:val="subSup"/><m:naryLim m:val="undOvr"/></m:mathPr><w:themeFontLang w:val="en-US" w:eastAsia="zh-CN"/><w:clrSchemeMapping w:bg1="light1" w:t1="dark1" w:bg2="light2" w:t2="dark2" w:accent1="accent1" w:accent2="accent2" w:accent3="accent3" w:accent4="accent4" w:accent5="accent5" w:accent6="accent6" w:hyperlink="hyperlink" w:followedHyperlink="followedHyperlink"/><w:shapeDefaults><o:shapedefaults fillcolor="#FFFFFF" fill="t" stroke="t"><v:fill on="t" focussize="0,0"/><v:stroke color="#000000"/></o:shapedefaults><o:shapelayout v:ext="edit"><o:idmap v:ext="edit" data="1"/></o:shapelayout></w:shapeDefaults></w:settings>

2
testfile2/word/styles.xml Executable file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<a:theme xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" name="Office"><a:themeElements><a:clrScheme name=""><a:dk1><a:srgbClr val="000000"/></a:dk1><a:lt1><a:srgbClr val="FFFFFF"/></a:lt1><a:dk2><a:srgbClr val="1F497D"/></a:dk2><a:lt2><a:srgbClr val="EEECE1"/></a:lt2><a:accent1><a:srgbClr val="4F81BD"/></a:accent1><a:accent2><a:srgbClr val="C0504D"/></a:accent2><a:accent3><a:srgbClr val="9BBB59"/></a:accent3><a:accent4><a:srgbClr val="8064A2"/></a:accent4><a:accent5><a:srgbClr val="4BACC6"/></a:accent5><a:accent6><a:srgbClr val="F79646"/></a:accent6><a:hlink><a:srgbClr val="0000FF"/></a:hlink><a:folHlink><a:srgbClr val="800080"/></a:folHlink></a:clrScheme><a:fontScheme name=""><a:majorFont><a:latin typeface="Arial"/><a:ea typeface="黑体"/><a:cs typeface="Arial"/></a:majorFont><a:minorFont><a:latin typeface="Arial"/><a:ea typeface="宋体"/><a:cs typeface="Arial"/></a:minorFont></a:fontScheme><a:fmtScheme name=""><a:fillStyleLst><a:solidFill><a:schemeClr val="phClr"/></a:solidFill><a:solidFill><a:srgbClr val="FFFFFF"/></a:solidFill><a:solidFill><a:srgbClr val="FFFFFF"/></a:solidFill></a:fillStyleLst><a:lnStyleLst><a:ln w="9525"><a:solidFill><a:schemeClr val="phClr"><a:shade val="95000"/><a:satMod val="105000"/></a:schemeClr></a:solidFill></a:ln><a:ln w="25400"><a:solidFill><a:schemeClr val="phClr"/></a:solidFill></a:ln><a:ln w="38100"><a:solidFill><a:schemeClr val="phClr"/></a:solidFill></a:ln></a:lnStyleLst><a:effectStyleLst><a:effectStyle><a:effectLst><a:outerShdw blurRad="40000" dist="20000" dir="5400000" rotWithShape="0"><a:srgbClr val="000000"><a:alpha val="38000"/></a:srgbClr></a:outerShdw></a:effectLst></a:effectStyle><a:effectStyle><a:effectLst><a:outerShdw blurRad="40000" dist="23000" dir="5400000" rotWithShape="0"><a:srgbClr val="000000"><a:alpha val="35000"/></a:srgbClr></a:outerShdw></a:effectLst></a:effectStyle><a:effectStyle><a:effectLst><a:outerShdw blurRad="40000" dist="23000" dir="5400000" rotWithShape="0"><a:srgbClr val="000000"><a:alpha val="35000"/></a:srgbClr></a:outerShdw></a:effectLst></a:effectStyle></a:effectStyleLst><a:bgFillStyleLst><a:solidFill><a:schemeClr val="phClr"/></a:solidFill><a:solidFill><a:srgbClr val="000000"/></a:solidFill><a:solidFill><a:srgbClr val="000000"/></a:solidFill></a:bgFillStyleLst></a:fmtScheme></a:themeElements><a:objectDefaults/></a:theme>