小学语文老师智能体样例

This commit is contained in:
YIOYI
2025-10-24 11:24:13 +08:00
parent 287d1c9a0d
commit 2fb99dd61e
47 changed files with 889 additions and 0 deletions

View File

@@ -0,0 +1,170 @@
docs/source
# From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore
Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
.DS_Store
*.pyc
custom_key.toml

View File

@@ -0,0 +1,2 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,25 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-
# @Time : 2024/4/8 20:58
# @Author : jerry.zzw
# @Email : jerry.zzw@antgroup.com
# @FileName: mcp_application.py
from agentuniverse.agent_serve.web.mcp.mcp_server_manager import MCPServerManager
from agentuniverse.base.agentuniverse import AgentUniverse
class ServerApplication:
"""
Server application.
"""
@classmethod
def start(cls):
AgentUniverse().start(core_mode=True)
MCPServerManager().start_server()
if __name__ == "__main__":
ServerApplication.start()

View File

@@ -0,0 +1,24 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-
# @Time : 2024/4/8 20:58
# @Author : jerry.zzw
# @Email : jerry.zzw@antgroup.com
# @FileName: server_application.py
from agentuniverse.agent_serve.web.web_booster import start_web_server
from agentuniverse.base.agentuniverse import AgentUniverse
class ServerApplication:
"""
Server application.
"""
@classmethod
def start(cls):
AgentUniverse().start()
start_web_server()
if __name__ == "__main__":
ServerApplication.start()

View File

@@ -0,0 +1,65 @@
[BASE_INFO]
# The app name will be applied to all processes including agent service integration.
appname = 'demo_app'
[CORE_PACKAGE]
# Perform a full component scan and registration for all the paths under this list.
default = ['primary_chinese_teacher_agent.intelligence.agentic']
# Scan and register agent components for all paths under this list, with priority over the default.
agent = ['primary_chinese_teacher_agent.intelligence.agentic.agent']
# Scan and register knowledge components for all paths under this list, with priority over the default.
knowledge = ['primary_chinese_teacher_agent.intelligence.agentic.knowledge']
# Scan and register llm components for all paths under this list, with priority over the default.
llm = ['primary_chinese_teacher_agent.intelligence.agentic.llm']
# Scan and register planner components for all paths under this list, with priority over the default.
planner = []
# Scan and register tool components for all paths under this list, with priority over the default.
tool = ['primary_chinese_teacher_agent.intelligence.agentic.tool']
# Scan and register memory components for all paths under this list, with priority over the default.
memory = ['primary_chinese_teacher_agent.intelligence.agentic.memory']
# Scan and register service components for all paths under this list, with priority over the default.
service = ['primary_chinese_teacher_agent.intelligence.service.agent_service']
# Scan and register prompt components for all paths under this list, with priority over the default.
prompt = ['primary_chinese_teacher_agent.intelligence.agentic.prompt']
# Scan and register product components for all paths under this list, with priority over the default.
#product = ['au_sample_standard_app.platform.difizen.product']
# Scan and register workflow components for all paths under this list, with priority over the default.
#workflow = ['au_sample_standard_app.platform.difizen.workflow']
# Scan and register store components for all paths under this list, with priority over the default.
store = ['primary_chinese_teacher_agent.intelligence.agentic.knowledge.store']
# Scan and register rag_router components for all paths under this list, with priority over the default.
rag_router = ['primary_chinese_teacher_agent.intelligence.agentic.knowledge.rag_router']
# Scan and register doc_processor components for all paths under this list, with priority over the default.
doc_processor = ['primary_chinese_teacher_agent.intelligence.agentic.knowledge.doc_processor']
# Scan and register query_paraphraser components for all paths under this list, with priority over the default.
query_paraphraser = ['primary_chinese_teacher_agent.intelligence.agentic.knowledge.query_paraphraser']
# Scan and register memory_compressor components for all paths under this list, with priority over the default.
memory_compressor = ['primary_chinese_teacher_agent.intelligence.agentic.memory.memory_compressor']
# Scan and register memory_storage components for all paths under this list, with priority over the default.
memory_storage = ['primary_chinese_teacher_agent.intelligence.agentic.memory.memory_storage']
[SUB_CONFIG_PATH]
# Log config file path, an absolute path or a relative path based on the dir where the current config file is located.
log_config_path = './log_config.toml'
# Custom key file path, use to save your own secret key like open ai or sth else. REMEMBER TO ADD IT TO .gitignore.
custom_key_path = './custom_key.toml'
[DB]
# A sqlalchemy db uri used for storing various info, for example, service request, generated during application running.
# If it's empty, agentUniverse will create a local sqlite db as default choice.
system_db_uri = ''
[GUNICORN]
# Use gunicorn as http server when activate is 'true', or only use flask.
activate = 'false'
# Gunicorn config file path, an absolute path or a relative path based on the dir where the current config file is located.
gunicorn_config_path = './gunicorn_config.toml'
[GRPC]
activate = 'false'
max_workers = 10
server_port = 50051
[MONITOR]
activate = false
dir = './monitor'

View File

@@ -0,0 +1,46 @@
# Example file of custom_key.toml. Rename to custom_key.toml while using.
[KEY_LIST]
# Perform a full component scan and registration for all the paths under this list.
example_key = 'AnExampleKey'
# models
#kimi default name: default_kimi_llm
#KIMI_API_KEY='sk-xxxxxxxx'
#
##Qwen default name: default_qwen_llm
#DASHSCOPE_API_KEY='sk-xxxxxx'
#
##Opean default name: default_openai_llm
#OPENAI_API_KEY='sk-xxxxxx'
#
##DEEPSEEK default name: default_deepseek_llm
#DEEPSEEK_API_KEY='sk-xxxxxxx'
#DEEPSEEK_API_BASE='https://api.deepseek.com/v1'
#
## WenXin default name: default_wenxin_llm
#QIANFAN_AK='xxxx'
#QIANFAN_SK='xxxx'
#
##Ollama default name: default_ollama_llm
#OLLAMA_BASE_URL='xxxxxx'
#
##claude default name: default_claude_llm
#ANTHROPIC_API_KEY='xxxxxx'
#ANTHROPIC_API_URL='xxxxxx'
#
##baichuan default name: default_baichuan_llm
#BAICHUAN_API_KEY='xxxxxx'
#
##ZHIPU default name: default_zhipu_llm
#ZHIPU_API_KEY='xxxxxx'
#ZHIPU_API_BASE='https://open.bigmodel.cn/api/paas/v4/'
# search
#Google search
#SERPER_API_KEY='xxxxxx'
#
##search api
#SEARCHAPI_API_KEY='xxxxxx'
#
##bing search
#BING_SUBSCRIPTION_KEY='xxxxxx'

View File

@@ -0,0 +1,8 @@
[GUNICORN_CONFIG]
bind = '0.0.0.0:8888'
backlog = 2048
worker_class = 'gthread'
threads = 4
workers = 5
timeout = 60
keepalive = 10

View File

@@ -0,0 +1,32 @@
[LOG_CONFIG]
[LOG_CONFIG.BASIC_CONFIG]
# Loguru log level.
log_level = "INFO"
# Output path of the log file. If value is empty, agentuniverse will create a subdir under your workdir to save logs.
log_path = "./.test_log_dir"
# Specifies the log rotation policy, controlling when a new log file is created. It can be a time period
# (e.g., "1 week"), a file size (e.g., "100 MB"), or a function returning True when rotation should occur.
log_rotation = "100 MB"
# Specifies the duration to keep old log files. It can be a time span (e.g., "30 days") or a function to filter the
# files to be retained. Files outside this policy are purged.
log_retention = "7 days"
[LOG_CONFIG.EXTEND_MODULE]
# Whether you use Aliyun Simple Log Service (SLS), if the value is "True", you should fill in the ALIYUN_SLS_CONFIG below.
sls_log = "False"
[LOG_CONFIG.ALIYUN_SLS_CONFIG]
# Aliyun sls endpoint.
sls_endpoint = "mock_endpoint"
# Your sls log project name.
sls_project = "mock_project"
# Your sls log store name.
sls_log_store = "mock_log_store"
# Aliyun sls access_key_id.
access_key_id = "mock_key_id"
# Aliyun sls access_key_secret.
access_key_secret = "mock_key_secret"
# Log queue max size, agentuniverse uses a queue to save the logs to be sent, they will be sent periodically.
sls_log_queue_max_size = 1000
# Interval of sending logs to aliyun sls.
sls_log_send_interval = 3.0

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,2 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,56 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-
# @Time : 2024/12/26 17:10
# @Author : wangchongshi
# @Email : wangchongshi.wcs@antgroup.com
# @FileName: primary_chinese_teacher_agent.py
from langchain_core.output_parsers import StrOutputParser
from agentuniverse.agent.agent import Agent
from agentuniverse.agent.input_object import InputObject
from agentuniverse.base.util.prompt_util import process_llm_token
from agentuniverse.llm.llm import LLM
from agentuniverse.prompt.prompt import Prompt
from primary_chinese_teacher_agent.intelligence.utils.constant.prod_description import \
PROD_DESCRIPTION_A
class PrimaryChineseTeacherAgent(Agent):
def input_keys(self) -> list[str]:
return ['input']
def output_keys(self) -> list[str]:
return ['output']
def parse_input(self, input_object: InputObject, agent_input: dict) -> dict:
agent_input['input'] = input_object.get_data('input')
return agent_input
def parse_result(self, agent_result: dict) -> dict:
return {**agent_result, 'output': agent_result['output']}
def execute(self, input_object: InputObject, agent_input: dict, **kwargs) -> dict:
"""Execute pet insurance agent instance.
Args:
input_object (InputObject): input parameters passed by the user.
agent_input (dict): agent input parsed from `input_object` by the user.
Returns:
dict: agent result.
"""
# 1. get the llm instance.
llm: LLM = self.process_llm(**kwargs)
# 2. assemble the background.
agent_input['background'] = PROD_DESCRIPTION_A
# 3. get the agent prompt.
prompt: Prompt = self.process_prompt(agent_input, **kwargs)
process_llm_token(llm, prompt.as_langchain(), self.agent_model.profile, agent_input)
# 4. invoke agent.
chain = prompt.as_langchain() | llm.as_langchain_runnable(
self.agent_model.llm_params()) | StrOutputParser()
res = self.invoke_chain(chain, agent_input, input_object, **kwargs)
# 5. return result.
return {**agent_input, 'output': res}

View File

@@ -0,0 +1,31 @@
info:
name: 'primary_chinese_teacher_agent'
description: '小学一年级语文老师智能体'
profile:
introduction: 你是一位有爱心、耐心、懂教育心理学的小学一年级语文老师。
target: 你的目标是根据学生或家长提出的问题,提供清晰、亲切、专业的语文学习指导。
instruction: |
你需要遵守的规则是:
1. 必须使用中文回答,语气温和、简洁、适合一年级学生理解。
2. 若问题来自家长,应体现专业指导性;若来自学生,应体现引导与鼓励。
3. 回答要结构清晰,必要时分点陈述或使用空行提升可读性。
4. 答案应贴近小学一年级语文教学内容,包括识字、拼音、朗读、写字、课文理解等。
5. 不使用复杂的专业术语或超纲知识。
6. 若提问内容超出一年级语文教学范围,可温和说明,并引导至合适的学习阶段或方法。
7. 保持鼓励与启发性,适当使用温馨提示或小贴士风格。
背景信息是:
{background}
开始!
需要回答的问题是:{input}
llm_model:
name: 'deepseek-llm'
model_name: 'deepseek-chat'
temperature: 0.2
action:
tool:
metadata:
type: 'AGENT'
module: 'primary_chinese_teacher_agent.intelligence.agentic.agent.agent_instance.primary_chinese_teacher_agent'
class: 'PrimaryChineseTeacherAgent'

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,2 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,9 @@
name: 'deepseek-llm'
description: 'deepseek-chat'
model_name: 'deepseek-chat'
max_tokens: 2000
api_key: '${DEEPSEEK_API_KEY}'
metadata:
type: 'LLM'
module: 'agentuniverse.llm.default.deep_seek_openai_style_llm'
class: 'DefaultDeepSeekLLM'

View File

@@ -0,0 +1,103 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-
# @Time : 2024/12/25 16:39
# @Author : wangchongshi
# @Email : wangchongshi.wcs@antgroup.com
# @FileName: langchain_instance.py
from typing import Optional, List, Any, Iterator, AsyncIterator
from langchain_core.callbacks import CallbackManagerForLLMRun, AsyncCallbackManagerForLLMRun
from agentuniverse.llm.llm import LLM
from langchain_core.language_models import LLM as LangChainLLM
from agentuniverse.llm.llm_output import LLMOutput
from langchain_core.outputs import GenerationChunk
class LangChainInstance(LangChainLLM):
llm: LLM = None
llm_type: str = "AgentUniverse"
streaming: bool = False
def __init__(self, llm: LLM, llm_type: str, **kwargs):
super().__init__(**kwargs)
self.llm = llm
self.llm_type = llm_type
def _call(self, prompt: str, stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any) -> str:
should_stream = kwargs.pop("streaming", False) if "streaming" in kwargs else self.streaming
llm_output = self.llm.call(prompt=prompt, stop=stop, **kwargs)
if not should_stream:
return llm_output.text
return self.parse_stream_result(llm_output, run_manager)
async def _acall(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
should_stream = kwargs.pop("streaming", False) if "streaming" in kwargs else self.streaming
llm_output = await self.llm.acall(prompt=prompt, stop=stop, **kwargs)
if not should_stream:
return llm_output.text
return await self.aparse_stream_result(llm_output, run_manager)
def _stream(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> Iterator[GenerationChunk]:
kwargs['stream'] = True
llm_output = self.llm.call(prompt=prompt, stop=stop, **kwargs)
for line in llm_output:
yield GenerationChunk(text=line.text, generation_info=line.raw)
async def _astream(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> AsyncIterator[GenerationChunk]:
kwargs['stream'] = True
llm_output = await self.llm.acall(prompt=prompt, stop=stop, **kwargs)
async for line in llm_output:
yield GenerationChunk(text=line.text, generation_info=line.raw)
@staticmethod
def parse_stream_result(stream_result: Iterator[LLMOutput],
run_manager: Optional[CallbackManagerForLLMRun] = None) -> str:
all_data = ""
for line in stream_result:
all_data += line.text
if run_manager:
run_manager.on_llm_new_token(line.text)
return all_data
@staticmethod
async def aparse_stream_result(stream_result: AsyncIterator[LLMOutput],
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None) -> str:
all_data = ""
async for line in stream_result:
all_data += line.text
if run_manager:
await run_manager.on_llm_new_token(line.text)
return all_data
@property
def _llm_type(self) -> str:
return self.llm_type
def get_num_tokens(self, text: str) -> int:
return self.llm.get_num_tokens(text)
def get_token_ids(self, text: str) -> List[int]:
return self.llm.get_token_ids(text)

View File

@@ -0,0 +1,6 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-
# @Time : 2024/12/10 10:51
# @Author : jijiawei
# @Email : jijiawei.jjw@antgroup.com
# @FileName: __init__.py

View File

@@ -0,0 +1,9 @@
name: 'qwen_llm'
description: 'demo qwen llm with spi'
model_name: 'qwen2-72b-instruct'
max_tokens: 2500
api_key: '${DASHSCOPE_API_KEY}'
metadata:
type: 'LLM'
module: 'agentuniverse.llm.default.qwen_openai_style_llm'
class: 'QWenOpenAIStyleLLM'

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,2 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,18 @@
introduction: 你是一位温柔耐心的小学一年级语文老师。
target: 你的目标是根据学生或家长的问题,用简单、生动、易懂的语言讲解小学一年级语文知识,帮助学生理解和学习。
instruction: |
你需要遵守的规则是:
1. 必须使用中文回答,语气要温和亲切,符合小学一年级学生的理解能力。
2. 回答要简洁清晰,避免使用复杂或抽象的词语。
3. 如果讲解知识点,要举出具体例子帮助理解。
4. 对学生要多鼓励和表扬,帮助他们树立学习信心。
5. 回答可以通过分段或留空行的方式,让内容更容易阅读。
背景信息是:
{background}
开始!
需要回答的问题是: {input}
metadata:
type: 'PROMPT'
version: 'primary_chinese_teacher.cn'

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,2 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,5 @@
name: 'primary_chinese_teacher_agent_service'
description: ''
agent: 'primary_chinese_teacher_agent'
metadata:
type: 'SERVICE'

View File

@@ -0,0 +1,2 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,19 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-
from agentuniverse.agent.output_object import OutputObject
from agentuniverse.base.agentuniverse import AgentUniverse
from agentuniverse.agent.agent import Agent
from agentuniverse.agent.agent_manager import AgentManager
AgentUniverse().start(config_path='../../config/config.toml', core_mode=True)
def chat(question: str):
instance: Agent = AgentManager().get_instance_obj('primary_chinese_teacher_agent')
output_object: OutputObject = instance.run(input=question)
print(output_object.get_data('output'))
if __name__ == '__main__':
chat("我是刚刚幼儿园毕业的小朋友,请问一年级上册要预习哪些课文?")

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,3 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,91 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-
# @Time : 2024/7/1 21:09
# @Author : wangchongshi
# @Email : wangchongshi.wcs@antgroup.com
# @FileName: jsonl_file_util.py
import json
import os
import sys
from agentuniverse.base.util.logging.logging_util import LOGGER
DATA_DIR = './data/'
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
class JsonFileOps(object):
def __init__(self):
return
@classmethod
def is_file_exist(cls, file_path):
file_name, ext = os.path.splitext(file_path)
if ext.lower() != '.jsonl':
raise Exception('Unsupported file extension')
return os.path.exists(file_path)
class JsonFileReader(object):
def __init__(self, file_path: str):
self.file_handler = None
self.file_name = file_path
if JsonFileOps.is_file_exist(file_path):
self.file_handler = open(file_path, 'r', encoding='utf-8')
def read_json_obj(self):
if not self.file_handler:
raise Exception(f"None json file to read: {self.file_name}")
json_line = self.file_handler.readline()
if json_line:
try:
json_obj = json.loads(json_line.strip())
return json_obj
except Exception as e:
LOGGER.warn(f"except[read_json_line]>>>{e}:{json_line}")
return json.loads('{}')
else:
return None
def read_json_obj_list(self):
obj_list = []
while True:
obj = self.read_json_obj()
if obj is None:
break
obj_list.append(obj)
return obj_list
class JsonFileWriter(object):
def __init__(self, output_file_name: str, extension='jsonl', directory=DATA_DIR):
self.outfile_path = directory + output_file_name + '.' + extension
directory = os.path.dirname(self.outfile_path)
if not os.path.exists(directory):
os.makedirs(directory)
self.outfile_handler = open(self.outfile_path, 'w', encoding='utf-8')
def write_json_obj(self, json_obj: dict):
try:
# confirm that it's a json string and then write.
json_line = json.dumps(json_obj, ensure_ascii=False)
self.outfile_handler.write(json_line.strip() + '\n')
self.outfile_handler.flush()
except Exception as e:
LOGGER.warn(f"except[write_json_obj]>>>{e}:{json_obj}")
return
def write_json_obj_list(self, json_obj_list: list):
for i in range(0, len(json_obj_list)):
self.write_json_obj(json_obj_list[i])
return
def write_json_query_answer(self, query: str, answer: str):
json_obj = {"query": query, "answer": answer}
self.write_json_obj(json_obj)
def write_json_query_answer_list(self, query_answer_list: list):
for i in range(0, len(query_answer_list)):
self.write_json_query_answer(query_answer_list[i][0], query_answer_list[i][1])

View File

@@ -0,0 +1,50 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-
# @Time : 2024/7/1 16:05
# @Author : wangchongshi
# @Email : wangchongshi.wcs@antgroup.com
# @FileName: txt_file_util.py
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
class TxtFileOps(object):
def __init__(self):
return
@classmethod
def is_file_exist(cls, file_path):
file_name, ext = os.path.splitext(file_path)
if ext.lower() != '.txt':
raise Exception('Unsupported file extension')
return os.path.exists(file_path)
class TxtFileReader(object):
def __init__(self, file_path: str):
self.file_handler = None
self.file_name = file_path
if TxtFileOps.is_file_exist(file_path):
self.file_handler = open(file_path, 'r', encoding='utf-8')
def read_txt_obj(self):
if not self.file_handler:
raise Exception(f"No txt file to read: {self.file_name}")
txt_line = self.file_handler.readline()
if txt_line:
return txt_line.strip()
else:
return None
def read_txt_obj_list(self):
obj_list = []
while True:
obj = self.read_txt_obj()
if obj is None:
break
obj_list.append(obj)
return obj_list

View File

@@ -0,0 +1,2 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-

View File

@@ -0,0 +1,48 @@
# !/usr/bin/env python3
# -*- coding:utf-8 -*-
# mock的特定背景知识用于回答小学一年级语文教学相关问题。
PROD_DESCRIPTION_A = """
小学一年级语文教学背景知识
1. 教学目标
课程目标:帮助学生认识常用汉字、培养阅读兴趣、掌握基础拼音、学习正确的书写姿势与笔顺。
核心素养:语言积累与运用、阅读理解、表达与交流、文化启蒙。
2. 教学内容
识字与写字学习常见汉字约350个掌握基本笔画和偏旁部首。
拼音学习掌握23个声母、24个韵母、4个声调能正确拼读音节。
阅读与理解:通过课文、童话、儿歌等内容培养阅读兴趣。
口语表达:鼓励学生大胆说话,进行简单的日常交流与表达。
语文活动:朗读比赛、讲故事、识字游戏等。
3. 教学方法
多感官教学:通过听、说、读、写结合的方式增强记忆。
游戏化学习:通过识字卡片、拼音接龙等方式提高学习兴趣。
分层辅导:针对不同学习水平的学生给予个性化指导。
激励评价:采用表扬、贴纸奖励、朗读之星等方式增强积极性。
4. 教学工具与资源
教材:《语文》(人教版一年级上、下册)
辅助材料:识字卡、拼音挂图、绘本、动画朗读课件。
数字资源:小学语文点读应用、在线拼音学习网站。
5. 教师角色与教学理念
教师角色:语文启蒙引导者,激发学生语言兴趣与文化情感。
教学理念:以学生为中心,重视兴趣培养与基础打牢。
课堂氛围:轻松活泼,鼓励表达与思考。
6. 家校配合建议
家庭阅读家长每天陪读10分钟共同朗读课文。
书写习惯:督促孩子正确握笔、端正坐姿、按笔顺书写。
语言交流:多与孩子交流日常生活,引导使用完整句子表达。
7. 评价方式
形成性评价:课堂表现、朗读态度、识字数量、书写整洁度。
终结性评价:期末阅读理解、听写测试、朗读展示。
8. 教学特色活动
汉字趣味日:识字游戏、笔画比赛。
童话故事周:学生分组表演课文内容。
拼音小达人:拼读竞赛与口语展示。
"""