mirror of
https://github.com/agentuniverse-ai/agentUniverse.git
synced 2026-02-09 01:59:19 +08:00
feat: add data agent in the agentUniverse.
This commit is contained in:
@@ -7,7 +7,6 @@
|
||||
# @FileName: dedupe.py
|
||||
from collections import Counter
|
||||
|
||||
from simhash import Simhash
|
||||
from agentuniverse_dataflow.node.data.base.prompt_base import PromptBase
|
||||
|
||||
|
||||
@@ -29,6 +28,13 @@ class DedupeNode(PromptBase):
|
||||
if not self._prompt_list or len(self._prompt_list) == 0:
|
||||
return
|
||||
|
||||
try:
|
||||
from simhash import Simhash
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"simhash is required at the DedupeNode. Please: `pip install simhash`"
|
||||
)
|
||||
|
||||
# calculate the simhash value for each document.
|
||||
simhashes = [(doc, Simhash(doc)) for doc in self._prompt_list]
|
||||
|
||||
|
||||
@@ -51,6 +51,8 @@ dashscope = "^1.19.1"
|
||||
anthropic = "^0.26.0"
|
||||
ollama = '^0.2.1'
|
||||
langchain-anthropic = '^0.1.13'
|
||||
pandas = "^2.2.2"
|
||||
pyarrow = "^16.1.0"
|
||||
|
||||
[tool.poetry.extras]
|
||||
log_ext = ["aliyun-log-python-sdk"]
|
||||
|
||||
Reference in New Issue
Block a user