[DOC] Add reasoning capability to vLLM streamlit code (#19557)
This commit is contained in:
@ -11,6 +11,7 @@ Features:
|
||||
- Streaming response display
|
||||
- Configurable API endpoint
|
||||
- Real-time chat history
|
||||
- Reasoning Display: Optional thinking process visualization
|
||||
|
||||
Requirements:
|
||||
pip install streamlit openai
|
||||
@ -51,13 +52,33 @@ if "messages" not in st.session_state:
|
||||
if "active_session" not in st.session_state:
|
||||
st.session_state.active_session = None
|
||||
|
||||
# Add new session state for reasoning
|
||||
if "show_reasoning" not in st.session_state:
|
||||
st.session_state.show_reasoning = {}
|
||||
|
||||
# Initialize session state for API base URL
|
||||
if "api_base_url" not in st.session_state:
|
||||
st.session_state.api_base_url = openai_api_base
|
||||
|
||||
|
||||
def create_new_chat_session():
|
||||
"""Create a new chat session with timestamp as ID"""
|
||||
"""Create a new chat session with timestamp as unique identifier.
|
||||
|
||||
This function initializes a new chat session by:
|
||||
1. Generating a timestamp-based session ID
|
||||
2. Creating an empty message list for the new session
|
||||
3. Setting the new session as both current and active session
|
||||
4. Resetting the messages list for the new session
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Session State Updates:
|
||||
- sessions: Adds new empty message list with timestamp key
|
||||
- current_session: Sets to new session ID
|
||||
- active_session: Sets to new session ID
|
||||
- messages: Resets to empty list
|
||||
"""
|
||||
session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
st.session_state.sessions[session_id] = []
|
||||
st.session_state.current_session = session_id
|
||||
@ -66,30 +87,98 @@ def create_new_chat_session():
|
||||
|
||||
|
||||
def switch_to_chat_session(session_id):
|
||||
"""Switch to a different chat session"""
|
||||
"""Switch the active chat context to a different session.
|
||||
|
||||
Args:
|
||||
session_id (str): The timestamp ID of the session to switch to
|
||||
|
||||
This function handles chat session switching by:
|
||||
1. Setting the specified session as current
|
||||
2. Updating the active session marker
|
||||
3. Loading the messages history from the specified session
|
||||
|
||||
Session State Updates:
|
||||
- current_session: Updated to specified session_id
|
||||
- active_session: Updated to specified session_id
|
||||
- messages: Loaded from sessions[session_id]
|
||||
"""
|
||||
st.session_state.current_session = session_id
|
||||
st.session_state.active_session = session_id
|
||||
st.session_state.messages = st.session_state.sessions[session_id]
|
||||
|
||||
|
||||
def get_llm_response(messages, model):
|
||||
"""Get streaming response from llm
|
||||
def get_llm_response(messages, model, reason, content_ph=None, reasoning_ph=None):
|
||||
"""Generate and stream LLM response with optional reasoning process.
|
||||
|
||||
Args:
|
||||
messages: List of message dictionaries
|
||||
model: Name of model
|
||||
messages (list): List of conversation message dicts with 'role' and 'content'
|
||||
model (str): The model identifier to use for generation
|
||||
reason (bool): Whether to enable and display reasoning process
|
||||
content_ph (streamlit.empty): Placeholder for streaming response content
|
||||
reasoning_ph (streamlit.empty): Placeholder for streaming reasoning process
|
||||
|
||||
Returns:
|
||||
Streaming response object or error message string
|
||||
tuple: (str, str)
|
||||
- First string contains the complete response text
|
||||
- Second string contains the complete reasoning text (if enabled)
|
||||
|
||||
Features:
|
||||
- Streams both reasoning and response text in real-time
|
||||
- Handles model API errors gracefully
|
||||
- Supports live updating of thinking process
|
||||
- Maintains separate content and reasoning displays
|
||||
|
||||
Raises:
|
||||
Exception: Wrapped in error message if API call fails
|
||||
|
||||
Note:
|
||||
The function uses streamlit placeholders for live updates.
|
||||
When reason=True, the reasoning process appears above the response.
|
||||
"""
|
||||
full_text = ""
|
||||
think_text = ""
|
||||
live_think = None
|
||||
# Build request parameters
|
||||
params = {"model": model, "messages": messages, "stream": True}
|
||||
if reason:
|
||||
params["extra_body"] = {"chat_template_kwargs": {"enable_thinking": True}}
|
||||
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model=model, messages=messages, stream=True
|
||||
)
|
||||
return response
|
||||
response = client.chat.completions.create(**params)
|
||||
if isinstance(response, str):
|
||||
if content_ph:
|
||||
content_ph.markdown(response)
|
||||
return response, ""
|
||||
|
||||
# Prepare reasoning expander above content
|
||||
if reason and reasoning_ph:
|
||||
exp = reasoning_ph.expander("💭 Thinking Process (live)", expanded=True)
|
||||
live_think = exp.empty()
|
||||
|
||||
# Stream chunks
|
||||
for chunk in response:
|
||||
delta = chunk.choices[0].delta
|
||||
# Stream reasoning first
|
||||
if reason and hasattr(delta, "reasoning_content") and live_think:
|
||||
rc = delta.reasoning_content
|
||||
if rc:
|
||||
think_text += rc
|
||||
live_think.markdown(think_text + "▌")
|
||||
# Then stream content
|
||||
if hasattr(delta, "content") and delta.content and content_ph:
|
||||
full_text += delta.content
|
||||
content_ph.markdown(full_text + "▌")
|
||||
|
||||
# Finalize displays: reasoning remains above, content below
|
||||
if reason and live_think:
|
||||
live_think.markdown(think_text)
|
||||
if content_ph:
|
||||
content_ph.markdown(full_text)
|
||||
|
||||
return full_text, think_text
|
||||
except Exception as e:
|
||||
st.error(f"Error details: {str(e)}")
|
||||
return f"Error: {str(e)}"
|
||||
return f"Error: {str(e)}", ""
|
||||
|
||||
|
||||
# Sidebar - API Settings first
|
||||
@ -108,6 +197,7 @@ st.sidebar.title("Chat Sessions")
|
||||
if st.sidebar.button("New Session"):
|
||||
create_new_chat_session()
|
||||
|
||||
|
||||
# Display all sessions in reverse chronological order
|
||||
for session_id in sorted(st.session_state.sessions.keys(), reverse=True):
|
||||
# Mark the active session with a pinned button
|
||||
@ -143,47 +233,79 @@ if st.session_state.current_session is None:
|
||||
create_new_chat_session()
|
||||
st.session_state.active_session = st.session_state.current_session
|
||||
|
||||
# Display chat history for current session
|
||||
for message in st.session_state.messages:
|
||||
with st.chat_message(message["role"]):
|
||||
st.write(message["content"])
|
||||
# Update the chat history display section
|
||||
for idx, msg in enumerate(st.session_state.messages):
|
||||
# Render user messages normally
|
||||
if msg["role"] == "user":
|
||||
with st.chat_message("user"):
|
||||
st.write(msg["content"])
|
||||
# Render assistant messages with reasoning above
|
||||
else:
|
||||
# If reasoning exists for this assistant message, show it above the content
|
||||
if idx in st.session_state.show_reasoning:
|
||||
with st.expander("💭 Thinking Process", expanded=False):
|
||||
st.markdown(st.session_state.show_reasoning[idx])
|
||||
with st.chat_message("assistant"):
|
||||
st.write(msg["content"])
|
||||
|
||||
# Handle user input and generate llm response
|
||||
|
||||
# Setup & Cache reasoning support check
|
||||
@st.cache_data(show_spinner=False)
|
||||
def server_supports_reasoning():
|
||||
"""Check if the current model supports reasoning capability.
|
||||
|
||||
Returns:
|
||||
bool: True if the model supports reasoning, False otherwise
|
||||
"""
|
||||
resp = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
stream=False,
|
||||
)
|
||||
return hasattr(resp.choices[0].message, "reasoning_content") and bool(
|
||||
resp.choices[0].message.reasoning_content
|
||||
)
|
||||
|
||||
|
||||
# Check support
|
||||
supports_reasoning = server_supports_reasoning()
|
||||
|
||||
# Add reasoning toggle in sidebar if supported
|
||||
reason = False # Default to False
|
||||
if supports_reasoning:
|
||||
reason = st.sidebar.checkbox("Enable Reasoning", value=False)
|
||||
else:
|
||||
st.sidebar.markdown(
|
||||
"<span style='color:gray;'>Reasoning unavailable for this model.</span>",
|
||||
unsafe_allow_html=True,
|
||||
)
|
||||
# reason remains False
|
||||
|
||||
# Update the input handling section
|
||||
if prompt := st.chat_input("Type your message here..."):
|
||||
# Save user message to session
|
||||
# Save and display user message
|
||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||
st.session_state.sessions[st.session_state.current_session] = (
|
||||
st.session_state.messages
|
||||
)
|
||||
|
||||
# Display user message
|
||||
with st.chat_message("user"):
|
||||
st.write(prompt)
|
||||
|
||||
# Prepare messages for llm
|
||||
messages_for_llm = [
|
||||
# Prepare LLM messages
|
||||
msgs = [
|
||||
{"role": m["role"], "content": m["content"]} for m in st.session_state.messages
|
||||
]
|
||||
|
||||
# Generate and display llm response
|
||||
# Stream assistant response
|
||||
with st.chat_message("assistant"):
|
||||
message_placeholder = st.empty()
|
||||
full_response = ""
|
||||
|
||||
# Get streaming response from llm
|
||||
response = get_llm_response(messages_for_llm, model)
|
||||
if isinstance(response, str):
|
||||
message_placeholder.markdown(response)
|
||||
full_response = response
|
||||
else:
|
||||
for chunk in response:
|
||||
if hasattr(chunk.choices[0].delta, "content"):
|
||||
content = chunk.choices[0].delta.content
|
||||
if content:
|
||||
full_response += content
|
||||
message_placeholder.markdown(full_response + "▌")
|
||||
|
||||
message_placeholder.markdown(full_response)
|
||||
|
||||
# Save llm response to session history
|
||||
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
||||
# Placeholders: reasoning above, content below
|
||||
reason_ph = st.empty()
|
||||
content_ph = st.empty()
|
||||
full, think = get_llm_response(msgs, model, reason, content_ph, reason_ph)
|
||||
# Determine index for this new assistant message
|
||||
message_index = len(st.session_state.messages)
|
||||
# Save assistant reply
|
||||
st.session_state.messages.append({"role": "assistant", "content": full})
|
||||
# Persist reasoning in session state if any
|
||||
if reason and think:
|
||||
st.session_state.show_reasoning[message_index] = think
|
||||
|
||||
Reference in New Issue
Block a user