在项目2_基于LangGraph实现智能分诊系统中,流式输出在中间层,往往占用大量的时间,造成前端得到响应缓慢,消耗用户的耐心。因此,在main.py中关闭了流式输出,运行后,tag能接受到问题,main.py的日志中能输出问题的答案,但前端无法接受到响应的答案,一直显示回复生成中。请教下老师,前端怎么调整,是前端没有接受到流式输出的结尾,一直在等待吗,还是其他原因。
os
re
json
contextlib asynccontextmanager
typing List, Tuple
fastapi FastAPI, HTTPException, Depends
fastapi.responses JSONResponse
uvicorn
logging
concurrent_log_handler ConcurrentRotatingFileHandler
sys
time
uuid
typing Optional
pydantic BaseModel, Field
ragAgent (
ToolConfig,
create_graph,
save_graph_visualization,
get_llm,
get_tools,
Config,
ConnectionPool,
ConnectionPoolError,
monitor_connection_pool,
)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.handlers = [] handler = ConcurrentRotatingFileHandler(
Config.LOG_FILE,
=Config.MAX_BYTES,
=Config.BACKUP_COUNT
)
handler.setLevel(logging.DEBUG)
handler.setFormatter(logging.Formatter(
))
logger.addHandler(handler)
Message(BaseModel):
role: content: ChatCompletionRequest(BaseModel):
messages: List[Message]
userId: Optional[] = conversationId: Optional[] = ChatCompletionResponseChoice(BaseModel):
index: message: Message
finish_reason: Optional[] = ChatCompletionResponse(BaseModel):
id: = Field(=: uuid.uuid4().hex)
object: = created: = Field(=: (time.time()))
choices: List[ChatCompletionResponseChoice]
system_fingerprint: Optional[] = format_response(response):
response:
paragraphs = re.split(, response)
formatted_paragraphs = []
para paragraphs:
para:
parts = para.split()
i, part (parts):
i % == : parts[i] = part.strip()para = .join(parts)
:
para = re.sub(, , para)
formatted_paragraphs.append(para.strip())
.join(formatted_paragraphs)
lifespan():
graph, tool_config
= :
llm_chat, llm_embedding = get_llm(Config.LLM_TYPE)
tools = get_tools(llm_embedding)
tool_config = ToolConfig(tools)
connection_kwargs = {: , : , : }
db_connection_pool = ConnectionPool(
=Config.DB_URI,
=,
=,
=connection_kwargs,
=)
:
db_connection_pool.open()
logger.info()
e:
logger.error(e)
ConnectionPoolError((e))
= monitor_connection_pool(db_connection_pool, =)
:
graph = create_graph(db_connection_pool, llm_chat, llm_embedding, tool_config)
ConnectionPoolError e:
logger.error(e)
sys.exit()
save_graph_visualization(graph)
ConnectionPoolError e:
logger.error(e)
sys.exit()
e:
logger.error(e)
sys.exit()
db_connection_pool db_connection_pool.closed:
db_connection_pool.close()
logger.info()
logger.info()
app = FastAPI(=lifespan)
generate_response(user_input, graph, tool_config, config):
= :
events = graph.stream({: [{: , : user_input}], : }, config)
content_fragments = []
event events:
value event.values():
value (value[], ):
logger.warning()
last_message = value[][-]
(last_message, ) last_message.tool_calls:
tool_call last_message.tool_calls:
(tool_call, ) tool_call:
logger.info(tool_call[])
(last_message, ):
fragment = last_message.content
content_fragments.append(fragment)
(last_message, ) last_message.name tool_config.get_tool_names():
tool_name = last_message.name
logger.info(tool_namefragment)
:
logger.info(fragment)
:
logger.info()
content = .join(content_fragments)
logger.info(content)
ve:
logger.error(ve)
content = e:
logger.error(e)
content = formatted_response = format_response(content)
logger.info(formatted_response)
formatted_response
get_dependencies() -> Tuple[, ]:
graph tool_config:
HTTPException(=, =)
graph, tool_config
()
chat_completions(request: ChatCompletionRequest, dependencies: Tuple[, ] = Depends(get_dependencies)):
:
graph, tool_config = dependencies
request.messages request.messages[-].content:
logger.error()
HTTPException(=, =)
user_input = request.messages[-].content
logger.info(user_input)
config = {
: {
: request.userId request.conversationId ,
: request.userId }
}
response_content = generate_response(user_input, graph, tool_config, config)
response = ChatCompletionResponse(
=[
ChatCompletionResponseChoice(
=,
=Message(=, =response_content),
=)
]
)
logger.info(request)
JSONResponse(=response.model_dump())
HTTPException he:
he
e:
logger.error((e))
HTTPException(=, =)
__name__ == :
logger.info(Config.HOSTConfig.PORT)
uvicorn.run(app, =Config.HOST, =Config.PORT)