From d0c8eea5cd38302e32c0bca4efa1136bf154ddb3 Mon Sep 17 00:00:00 2001
From: zws <2985693012@qq.com>
Date: Wed, 29 Oct 2025 14:28:22 +0800
Subject: [PATCH 1/2] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E4=BA=86=E4=B8=80?=
 =?UTF-8?q?=E4=B8=AA=20=E5=86=99=E5=85=A5=20Word=20=E6=96=87=E6=A1=A3?=
 =?UTF-8?q?=EF=BC=88.docx=EF=BC=89=20=E7=9A=84=E5=B7=A5=E5=85=B7=E6=A8=A1?=
 =?UTF-8?q?=E5=9D=97=E3=80=82=20=E8=AF=A5=E5=B7=A5=E5=85=B7=E8=83=BD?=
 =?UTF-8?q?=E5=A4=9F=E5=B0=86=E7=94=9F=E6=88=90=E7=9A=84=E6=8A=A5=E5=91=8A?=
 =?UTF-8?q?=E5=86=85=E5=AE=B9=E4=BB=A5=20.docx=20=E6=A0=BC=E5=BC=8F?=
 =?UTF-8?q?=E8=87=AA=E5=8A=A8=E5=86=99=E5=87=BA=E5=88=B0=E6=9C=AC=E5=9C=B0?=
 =?UTF-8?q?=E6=88=96=E6=8C=87=E5=AE=9A=E8=B7=AF=E5=BE=84=EF=BC=8C=E6=96=B9?=
 =?UTF-8?q?=E4=BE=BF=E7=94=A8=E6=88=B7=E7=9B=B4=E6=8E=A5=E8=8E=B7=E5=8F=96?=
 =?UTF-8?q?=E5=8F=AF=E7=BC=96=E8=BE=91=E7=9A=84=E6=8A=A5=E5=91=8A=E6=96=87?=
 =?UTF-8?q?=E4=BB=B6=E3=80=82=20=E6=96=B0=E5=A2=9E=E4=BA=86=20word=5Fwrite?=
 =?UTF-8?q?r=20=E5=B7=A5=E5=85=B7=E6=96=87=E4=BB=B6=EF=BC=8C=E7=94=A8?=
 =?UTF-8?q?=E4=BA=8E=E5=B0=86=E6=96=87=E6=9C=AC=E5=86=85=E5=AE=B9=E5=AF=BC?=
 =?UTF-8?q?=E5=87=BA=E4=B8=BA=20Word=20=E6=96=87=E6=A1=A3=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PS:
我是第一次提交 PR，如果有任何不合适或不规范的地方，请多多指正！
我会积极修改并学习改进，非常感谢你的时间与帮助!
---
 .../tool/common_tool/write_word_tool.py       | 62 +++++++++++++
 .../agent/action/tool/test_write_word_tool.py | 89 +++++++++++++++++++
 2 files changed, 151 insertions(+)
 create mode 100644 agentuniverse/agent/action/tool/common_tool/write_word_tool.py
 create mode 100644 tests/test_agentuniverse/unit/agent/action/tool/test_write_word_tool.py

diff --git a/agentuniverse/agent/action/tool/common_tool/write_word_tool.py b/agentuniverse/agent/action/tool/common_tool/write_word_tool.py
new file mode 100644
index 00000000..4b7cade2
--- /dev/null
+++ b/agentuniverse/agent/action/tool/common_tool/write_word_tool.py
@@ -0,0 +1,62 @@
+import os
+import json
+from typing import Any, Dict
+
+from agentuniverse.agent.action.tool.tool import Tool
+
+
+class WriteWordDocumentTool(Tool):
+    def execute(self, file_path: str, content: str = "", append: bool = False) -> str:
+        directory = os.path.dirname(file_path)
+        if directory and not os.path.exists(directory):
+            try:
+                os.makedirs(directory, exist_ok=True)
+            except Exception as e:
+                return json.dumps(
+                    {"error": f"Failed to create directory: {str(e)}", "file_path": file_path, "status": "error"}
+                )
+
+        try:
+            from docx import Document  # type: ignore
+        except ImportError as e:
+            return json.dumps(
+                {
+                    "error": f"python-docx is required to write Word documents: {str(e)}",
+                    "file_path": file_path,
+                    "status": "error",
+                }
+            )
+
+        if not file_path.lower().endswith(".docx"):
+            return json.dumps(
+                {"error": "The target file must have a .docx extension.", "file_path": file_path, "status": "error"}
+            )
+
+        document = None
+        if append and os.path.exists(file_path):
+            try:
+                document = Document(file_path)
+            except Exception as e:
+                return json.dumps(
+                    {"error": f"Failed to load existing document: {str(e)}", "file_path": file_path, "status": "error"}
+                )
+        else:
+            document = Document()
+
+        try:
+            document.add_paragraph(content)
+            document.save(file_path)
+            file_size = os.path.getsize(file_path)
+            return json.dumps(
+                {
+                    "file_path": file_path,
+                    "bytes_written": len(content.encode("utf-8")),
+                    "file_size": file_size,
+                    "append_mode": append,
+                    "status": "success",
+                }
+            )
+        except Exception as e:
+            return json.dumps(
+                {"error": f"Failed to write document: {str(e)}", "file_path": file_path, "status": "error"}
+            )
diff --git a/tests/test_agentuniverse/unit/agent/action/tool/test_write_word_tool.py b/tests/test_agentuniverse/unit/agent/action/tool/test_write_word_tool.py
new file mode 100644
index 00000000..c987aadd
--- /dev/null
+++ b/tests/test_agentuniverse/unit/agent/action/tool/test_write_word_tool.py
@@ -0,0 +1,89 @@
+import os
+import json
+import tempfile
+import unittest
+from agentuniverse.agent.action.tool.common_tool.write_word_tool import WriteWordDocumentTool
+
+
+class WriteWordDocumentToolTest(unittest.TestCase):
+    def setUp(self):
+        self.tool = WriteWordDocumentTool()
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        for root, dirs, files in os.walk(self.temp_dir, topdown=False):
+            for name in files:
+                os.unlink(os.path.join(root, name))
+            for name in dirs:
+                os.rmdir(os.path.join(root, name))
+        os.rmdir(self.temp_dir)
+
+    def test_write_new_word_file(self):
+        file_path = os.path.join(self.temp_dir, "test_new.docx")
+        content = "***This is a test paragraph.***"
+
+        result_json = self.tool.execute(file_path=file_path, content=content, append=False)
+        result = json.loads(result_json)
+
+        self.assertEqual(result["status"], "success")
+        self.assertEqual(result["file_path"], file_path)
+        self.assertTrue(os.path.exists(file_path))
+
+    def test_append_to_word_file(self):
+        file_path = os.path.join(self.temp_dir, "test_append.docx")
+
+        initial_content = "Initial paragraph."
+        self.tool.execute(file_path=file_path, content=initial_content, append=False)
+
+        append_content = "Appended paragraph."
+        result_json = self.tool.execute(file_path=file_path, content=append_content, append=True)
+        result = json.loads(result_json)
+
+        self.assertEqual(result["status"], "success")
+        self.assertEqual(result["append_mode"], True)
+
+    def test_invalid_file_extension(self):
+        file_path = os.path.join(self.temp_dir, "invalid_file.txt")
+        content = "This should fail."
+
+        result_json = self.tool.execute(file_path=file_path, content=content, append=False)
+        result = json.loads(result_json)
+
+        self.assertEqual(result["status"], "error")
+        self.assertIn("The target file must have a .docx extension.", result["error"])
+
+    def test_create_directory_structure(self):
+        file_path = os.path.join(self.temp_dir, "nested/dir/structure/test.docx")
+        content = "Test content in nested directory."
+
+        result_json = self.tool.execute(file_path=file_path, content=content, append=False)
+        result = json.loads(result_json)
+
+        self.assertEqual(result["status"], "success")
+        self.assertTrue(os.path.exists(file_path))
+        self.assertTrue(os.path.isdir(os.path.join(self.temp_dir, "nested/dir/structure")))
+
+    def test_missing_dependency(self):
+        original_import = __import__
+
+        def mock_import(name, *args):
+            if name == "docx":
+                raise ImportError("No module named 'docx'")
+            return original_import(name, *args)
+
+        try:
+            __builtins__["__import__"] = mock_import
+            file_path = os.path.join(self.temp_dir, "test_missing_dependency.docx")
+            content = "This should fail due to missing dependency."
+
+            result_json = self.tool.execute(file_path=file_path, content=content, append=False)
+            result = json.loads(result_json)
+
+            self.assertEqual(result["status"], "error")
+            self.assertIn("python-docx is required to write Word documents", result["error"])
+        finally:
+            __builtins__["__import__"] = original_import
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

From 40b863536621a5b0775e45c4b754dd2499f5ba3c Mon Sep 17 00:00:00 2001
From: Sunflower <129384209+SunFlowerUE@users.noreply.github.com>
Date: Thu, 30 Oct 2025 11:43:25 +0800
Subject: [PATCH 2/2] Enhance reader text handling with encoding detection

---
 .../knowledge/reader/file/csv_reader.py       | 77 +++++++++++--------
 .../knowledge/reader/file/file_reader.py      |  7 ++
 .../knowledge/reader/file/txt_reader.py       | 16 ++--
 .../action/knowledge/reader/reader_manager.py |  3 +
 .../agent/action/knowledge/reader/utils.py    | 69 +++++++++++++++++
 .../reader/file/test_text_encoding.py         | 38 +++++++++
 6 files changed, 168 insertions(+), 42 deletions(-)
 create mode 100644 agentuniverse/agent/action/knowledge/reader/utils.py
 create mode 100644 tests/test_agentuniverse/unit/agent/action/knowledge/reader/file/test_text_encoding.py

diff --git a/agentuniverse/agent/action/knowledge/reader/file/csv_reader.py b/agentuniverse/agent/action/knowledge/reader/file/csv_reader.py
index 4888886a..d37d24c4 100644
--- a/agentuniverse/agent/action/knowledge/reader/file/csv_reader.py
+++ b/agentuniverse/agent/action/knowledge/reader/file/csv_reader.py
@@ -6,71 +6,80 @@
 # @FileName: csv_reader.py
 
 import csv
+import io
 from pathlib import Path
-from typing import List, Union, Optional, Dict
+from typing import List, Union, Optional, Dict, TextIO
 
 from agentuniverse.agent.action.knowledge.reader.reader import Reader
 from agentuniverse.agent.action.knowledge.store.document import Document
+from agentuniverse.agent.action.knowledge.reader.utils import detect_file_encoding
 
 
 class CSVReader(Reader):
     """CSV file reader.
-    
+
     Used to read and parse CSV format files, supports local file paths or file objects as input.
     """
 
-    def _load_data(self, 
-                  file: Union[str, Path], 
+    def _load_data(self,
+                  file: Union[str, Path, TextIO],
                   delimiter: str = ",",
                   quotechar: str = '"',
                   ext_info: Optional[Dict] = None) -> List[Document]:
-        """Parse CSV file.
-
-        Args:
-            file: CSV file path or file object
-            delimiter: CSV delimiter, default is comma
-            quotechar: Quote character, default is double quote
-            ext_info: Additional metadata information
-
-        Returns:
-            List[Document]: List of documents containing CSV content
-
-        Raises:
-            FileNotFoundError: Raised when file does not exist
-            ValueError: Raised when file reading fails
-        """
+        """Parse CSV file."""
         try:
+            text_stream: TextIO
+            should_close = False
+
             if isinstance(file, str):
                 file = Path(file)
-            
+
             if isinstance(file, Path):
                 if not file.exists():
                     raise FileNotFoundError(f"File not found: {file}")
-                file_content = file.open(newline="", mode="r", encoding="utf-8")
+                encoding = detect_file_encoding(file)
+                text_stream = file.open(newline="", mode="r", encoding=encoding)
+                should_close = True
+            elif hasattr(file, "read"):
+                try:
+                    file.seek(0)
+                except (AttributeError, OSError):
+                    pass
+                raw_content = file.read()
+                if isinstance(raw_content, bytes):
+                    encoding = detect_file_encoding(raw_content)
+                    text_stream = io.StringIO(raw_content.decode(encoding))
+                elif isinstance(raw_content, str):
+                    text_stream = io.StringIO(raw_content)
+                else:
+                    raise ValueError("Unsupported file object type")
+                should_close = True
             else:
-                file.seek(0)
-                file_content = file
+                raise TypeError("file must be a path string, Path, or file-like object")
 
-            csv_content = []
-            with file_content as csvfile:
-                csv_reader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar)
+            csv_content: List[str] = []
+            try:
+                csv_reader = csv.reader(text_stream, delimiter=delimiter, quotechar=quotechar)
                 for row in csv_reader:
-                    # Filter out completely empty rows
                     if any(cell.strip() for cell in row):
-                        # Remove empty values at the end of row
                         while row and not row[-1].strip():
                             row.pop()
-                        # Only add non-empty values to result
                         csv_content.append(", ".join(filter(None, row)))
-            
-            # Combine all valid rows into final text
+            finally:
+                if should_close:
+                    text_stream.close()
+
             final_content = "\n".join(csv_content)
 
-            # Get metadata
-            metadata = {"file_name": getattr(file, 'name', 'unknown')}
+            if isinstance(file, Path):
+                file_name = file.name
+            else:
+                name_attr = getattr(file, 'name', None)
+                file_name = Path(name_attr).name if isinstance(name_attr, str) else 'unknown'
+            metadata = {"file_name": file_name}
             if ext_info:
                 metadata.update(ext_info)
-            # print(f"csv_content: {final_content} \n metadata: {metadata}")
+
             return [Document(text=final_content, metadata=metadata)]
         except Exception as e:
             raise ValueError(f"Failed to read CSV file: {str(e)}") from e
diff --git a/agentuniverse/agent/action/knowledge/reader/file/file_reader.py b/agentuniverse/agent/action/knowledge/reader/file/file_reader.py
index 581d323e..5f7acd07 100644
--- a/agentuniverse/agent/action/knowledge/reader/file/file_reader.py
+++ b/agentuniverse/agent/action/knowledge/reader/file/file_reader.py
@@ -10,8 +10,11 @@ from typing import Dict, Type, List, Optional
 
 from agentuniverse.agent.action.knowledge.reader.file.docx_reader import DocxReader
 from agentuniverse.agent.action.knowledge.reader.file.epub_reader import EpubReader
+from agentuniverse.agent.action.knowledge.reader.file.markdown_reader import MarkdownReader
 from agentuniverse.agent.action.knowledge.reader.file.pdf_reader import PdfReader
 from agentuniverse.agent.action.knowledge.reader.file.pptx_reader import PptxReader
+from agentuniverse.agent.action.knowledge.reader.file.txt_reader import TxtReader
+from agentuniverse.agent.action.knowledge.reader.file.csv_reader import CSVReader
 from agentuniverse.agent.action.knowledge.reader.file.xlsx_reader import XlsxReader
 from agentuniverse.agent.action.knowledge.reader.reader import Reader
 from agentuniverse.agent.action.knowledge.store.document import Document
@@ -22,6 +25,10 @@ DEFAULT_FILE_READERS: Dict[str, Type[Reader]] = {
     ".pptx": PptxReader,
     ".xlsx": XlsxReader,
     ".epub": EpubReader,
+    ".txt": TxtReader,
+    ".md": MarkdownReader,
+    ".markdown": MarkdownReader,
+    ".csv": CSVReader,
 }
 
 
diff --git a/agentuniverse/agent/action/knowledge/reader/file/txt_reader.py b/agentuniverse/agent/action/knowledge/reader/file/txt_reader.py
index 6373902a..d837277c 100644
--- a/agentuniverse/agent/action/knowledge/reader/file/txt_reader.py
+++ b/agentuniverse/agent/action/knowledge/reader/file/txt_reader.py
@@ -1,19 +1,19 @@
-
 from pathlib import Path
 from typing import List, Optional, Dict
 
 from agentuniverse.agent.action.knowledge.reader.reader import Reader
 from agentuniverse.agent.action.knowledge.store.document import Document
+from agentuniverse.agent.action.knowledge.reader.utils import detect_file_encoding
 
 
 class LineTxtReader(Reader):
 
     def _load_data(self, fpath: Path, ext_info: Optional[Dict] = None) -> List[Document]:
-        dlist = []
+        dlist: List[Document] = []
+        encoding = detect_file_encoding(fpath)
 
-        with open(fpath, 'r', encoding='utf-8') as file:
-
-            metadata = {"file_name": file.name}
+        with open(fpath, 'r', encoding=encoding) as file:
+            metadata = {"file_name": Path(file.name).name}
             if ext_info is not None:
                 metadata.update(ext_info)
 
@@ -27,10 +27,10 @@ class TxtReader(Reader):
     """Txt reader."""
 
     def _load_data(self, fpath: Path, ext_info: Optional[Dict] = None) -> List[Document]:
+        encoding = detect_file_encoding(fpath)
 
-        with open(fpath, 'r', encoding='utf-8') as file:
-
-            metadata = {"file_name": file.name}
+        with open(fpath, 'r', encoding=encoding) as file:
+            metadata = {"file_name": Path(file.name).name}
             if ext_info is not None:
                 metadata.update(ext_info)
 
diff --git a/agentuniverse/agent/action/knowledge/reader/reader_manager.py b/agentuniverse/agent/action/knowledge/reader/reader_manager.py
index 21a95ccf..4eaee2a4 100644
--- a/agentuniverse/agent/action/knowledge/reader/reader_manager.py
+++ b/agentuniverse/agent/action/knowledge/reader/reader_manager.py
@@ -21,6 +21,9 @@ class ReaderManager(ComponentManagerBase[Reader]):
         "pptx": "default_pptx_reader",
         "docx": "default_docx_reader",
         "txt": "default_txt_reader",
+        "md": "default_markdown_reader",
+        "markdown": "default_markdown_reader",
+        "csv": "default_csv_reader",
         # extended defaults for web & images
         "url": "default_web_page_reader",
         "png": "default_image_ocr_reader",
diff --git a/agentuniverse/agent/action/knowledge/reader/utils.py b/agentuniverse/agent/action/knowledge/reader/utils.py
new file mode 100644
index 00000000..48ada34f
--- /dev/null
+++ b/agentuniverse/agent/action/knowledge/reader/utils.py
@@ -0,0 +1,69 @@
+"""Utility helpers for reader implementations."""
+from __future__ import annotations
+
+from pathlib import Path
+from typing import BinaryIO, Iterable, Sequence, Union
+
+# Candidate encodings to try when automatic detection libraries are not available.
+_FALLBACK_ENCODINGS: Sequence[str] = (
+    "utf-8",
+    "utf-8-sig",
+    "gb18030",
+    "gbk",
+    "big5",
+    "shift_jis",
+    "latin-1",
+)
+
+
+def _read_sample_bytes(source: Union[str, Path, BinaryIO, bytes, bytearray],
+                       sample_size: int) -> bytes:
+    """Read a byte sample from the given file path or binary handle."""
+    if isinstance(source, (bytes, bytearray)):
+        return bytes(source[:sample_size])
+    if isinstance(source, (str, Path)):
+        path = Path(source)
+        with path.open("rb") as handle:
+            return handle.read(sample_size)
+
+    # File-like object – preserve the original pointer
+    handle = source
+    current_pos = handle.tell()
+    try:
+        data = handle.read(sample_size)
+    finally:
+        handle.seek(current_pos)
+    return data if data is not None else b""
+
+
+def detect_file_encoding(source: Union[str, Path, BinaryIO, bytes, bytearray],
+                         sample_size: int = 32 * 1024,
+                         fallback_encodings: Iterable[str] = _FALLBACK_ENCODINGS) -> str:
+    """Best-effort detection of the text encoding for the given file."""
+    sample = _read_sample_bytes(source, sample_size)
+    if not sample:
+        return "utf-8"
+
+    # First try decoding with a curated list of encodings
+    for encoding in fallback_encodings:
+        try:
+            sample.decode(encoding)
+            return encoding
+        except UnicodeDecodeError:
+            continue
+
+    # If the curated list fails, fall back to charset_normalizer if available
+    try:  # pragma: no cover - optional dependency
+        from charset_normalizer import from_bytes
+    except ImportError:  # pragma: no cover - handled above
+        best_guess = None
+    else:
+        result = from_bytes(sample).best()
+        best_guess = result.encoding if result is not None else None
+        if best_guess:
+            return best_guess
+
+    return "utf-8"
+
+
+__all__ = ["detect_file_encoding"]
diff --git a/tests/test_agentuniverse/unit/agent/action/knowledge/reader/file/test_text_encoding.py b/tests/test_agentuniverse/unit/agent/action/knowledge/reader/file/test_text_encoding.py
new file mode 100644
index 00000000..2a09e3d6
--- /dev/null
+++ b/tests/test_agentuniverse/unit/agent/action/knowledge/reader/file/test_text_encoding.py
@@ -0,0 +1,38 @@
+from agentuniverse.agent.action.knowledge.reader.file.csv_reader import CSVReader
+from agentuniverse.agent.action.knowledge.reader.file.txt_reader import TxtReader
+from agentuniverse.agent.action.knowledge.reader.utils import detect_file_encoding
+
+
+def test_detect_file_encoding_gb18030(tmp_path):
+    sample_text = "示例文本"
+    file_path = tmp_path / "sample.txt"
+    file_path.write_text(sample_text, encoding="gb18030")
+
+    detected = detect_file_encoding(file_path)
+    assert detected in {"gb18030", "gbk"}
+
+
+def test_txt_reader_handles_gbk(tmp_path):
+    content = "第一行\n第二行"
+    file_path = tmp_path / "gbk.txt"
+    file_path.write_text(content, encoding="gb18030")
+
+    reader = TxtReader()
+    documents = reader.load_data(file_path)
+
+    assert len(documents) == 1
+    assert documents[0].text == content
+    assert documents[0].metadata["file_name"] == file_path.name
+
+
+def test_csv_reader_handles_utf8_bom(tmp_path):
+    rows = ["col1,col2", "值1,值2"]
+    file_path = tmp_path / "data.csv"
+    file_path.write_text("\n".join(rows), encoding="utf-8-sig")
+
+    reader = CSVReader()
+    documents = reader.load_data(file_path)
+
+    assert len(documents) == 1
+    assert "值1" in documents[0].text
+    assert documents[0].metadata["file_name"] == file_path.name