mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-03-21 06:28:10 +08:00
### What problem does this PR solve? 1. Change go admin server port from 9385 to 9383 to avoid conflicts 2. Start go server after python servers are started completely, in entrypoint.sh 3. Fix some database migration issue 4. Add more API routes in web to compliant with EE. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Signed-off-by: Jin Hai <haijin.chn@gmail.com>
308 lines
9.2 KiB
Bash
Executable File
308 lines
9.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
set -e
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Usage and command-line argument parsing
|
|
# -----------------------------------------------------------------------------
|
|
function usage() {
|
|
echo "Usage: $0 [--disable-webserver] [--disable-taskexecutor] [--disable-datasync] [--consumer-no-beg=<num>] [--consumer-no-end=<num>] [--workers=<num>] [--host-id=<string>]"
|
|
echo
|
|
echo " --disable-webserver Disables the web server (nginx + ragflow_server)."
|
|
echo " --disable-taskexecutor Disables task executor workers."
|
|
echo " --disable-datasync Disables synchronization of datasource workers."
|
|
echo " --enable-mcpserver Enables the MCP server."
|
|
echo " --enable-adminserver Enables the Admin server."
|
|
echo " --init-superuser Initializes the superuser."
|
|
echo " --consumer-no-beg=<num> Start range for consumers (if using range-based)."
|
|
echo " --consumer-no-end=<num> End range for consumers (if using range-based)."
|
|
echo " --workers=<num> Number of task executors to run (if range is not used)."
|
|
echo " --host-id=<string> Unique ID for the host (defaults to \`hostname\`)."
|
|
echo
|
|
echo "Examples:"
|
|
echo " $0 --disable-taskexecutor"
|
|
echo " $0 --disable-webserver --consumer-no-beg=0 --consumer-no-end=5"
|
|
echo " $0 --disable-webserver --workers=2 --host-id=myhost123"
|
|
echo " $0 --enable-mcpserver"
|
|
echo " $0 --enable-adminserver"
|
|
echo " $0 --init-superuser"
|
|
exit 1
|
|
}
|
|
|
|
ENABLE_WEBSERVER=1 # Default to enable web server
|
|
ENABLE_TASKEXECUTOR=1 # Default to enable task executor
|
|
ENABLE_DATASYNC=1
|
|
ENABLE_MCP_SERVER=0
|
|
ENABLE_ADMIN_SERVER=0 # Default close admin server
|
|
INIT_SUPERUSER_ARGS="" # Default to not initialize superuser
|
|
CONSUMER_NO_BEG=0
|
|
CONSUMER_NO_END=0
|
|
WORKERS=1
|
|
|
|
MCP_HOST="127.0.0.1"
|
|
MCP_PORT=9382
|
|
MCP_BASE_URL="http://127.0.0.1:9380"
|
|
MCP_SCRIPT_PATH="/ragflow/mcp/server/server.py"
|
|
MCP_MODE="self-host"
|
|
MCP_HOST_API_KEY=""
|
|
MCP_TRANSPORT_SSE_FLAG="--transport-sse-enabled"
|
|
MCP_TRANSPORT_STREAMABLE_HTTP_FLAG="--transport-streamable-http-enabled"
|
|
MCP_JSON_RESPONSE_FLAG="--json-response"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Host ID logic:
|
|
# 1. By default, use the system hostname if length <= 32
|
|
# 2. Otherwise, use the full MD5 hash of the hostname (32 hex chars)
|
|
# -----------------------------------------------------------------------------
|
|
CURRENT_HOSTNAME="$(hostname)"
|
|
if [ ${#CURRENT_HOSTNAME} -le 32 ]; then
|
|
DEFAULT_HOST_ID="$CURRENT_HOSTNAME"
|
|
else
|
|
DEFAULT_HOST_ID="$(echo -n "$CURRENT_HOSTNAME" | md5sum | cut -d ' ' -f 1)"
|
|
fi
|
|
|
|
HOST_ID="$DEFAULT_HOST_ID"
|
|
|
|
# Parse arguments
|
|
for arg in "$@"; do
|
|
case $arg in
|
|
--disable-webserver)
|
|
ENABLE_WEBSERVER=0
|
|
shift
|
|
;;
|
|
--disable-taskexecutor)
|
|
ENABLE_TASKEXECUTOR=0
|
|
shift
|
|
;;
|
|
--disable-datasync)
|
|
ENABLE_DATASYNC=0
|
|
shift
|
|
;;
|
|
--enable-mcpserver)
|
|
ENABLE_MCP_SERVER=1
|
|
shift
|
|
;;
|
|
--enable-adminserver)
|
|
ENABLE_ADMIN_SERVER=1
|
|
shift
|
|
;;
|
|
--init-superuser)
|
|
INIT_SUPERUSER_ARGS="--init-superuser"
|
|
shift
|
|
;;
|
|
--mcp-host=*)
|
|
MCP_HOST="${arg#*=}"
|
|
shift
|
|
;;
|
|
--mcp-port=*)
|
|
MCP_PORT="${arg#*=}"
|
|
shift
|
|
;;
|
|
--mcp-base-url=*)
|
|
MCP_BASE_URL="${arg#*=}"
|
|
shift
|
|
;;
|
|
--mcp-mode=*)
|
|
MCP_MODE="${arg#*=}"
|
|
shift
|
|
;;
|
|
--mcp-host-api-key=*)
|
|
MCP_HOST_API_KEY="${arg#*=}"
|
|
shift
|
|
;;
|
|
--mcp-script-path=*)
|
|
MCP_SCRIPT_PATH="${arg#*=}"
|
|
shift
|
|
;;
|
|
--no-transport-sse-enabled)
|
|
MCP_TRANSPORT_SSE_FLAG="--no-transport-sse-enabled"
|
|
shift
|
|
;;
|
|
--no-transport-streamable-http-enabled)
|
|
MCP_TRANSPORT_STREAMABLE_HTTP_FLAG="--no-transport-streamable-http-enabled"
|
|
shift
|
|
;;
|
|
--no-json-response)
|
|
MCP_JSON_RESPONSE_FLAG="--no-json-response"
|
|
shift
|
|
;;
|
|
--consumer-no-beg=*)
|
|
CONSUMER_NO_BEG="${arg#*=}"
|
|
shift
|
|
;;
|
|
--consumer-no-end=*)
|
|
CONSUMER_NO_END="${arg#*=}"
|
|
shift
|
|
;;
|
|
--workers=*)
|
|
WORKERS="${arg#*=}"
|
|
shift
|
|
;;
|
|
--host-id=*)
|
|
HOST_ID="${arg#*=}"
|
|
shift
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Replace env variables in the service_conf.yaml file
|
|
# -----------------------------------------------------------------------------
|
|
CONF_DIR="/ragflow/conf"
|
|
TEMPLATE_FILE="${CONF_DIR}/service_conf.yaml.template"
|
|
CONF_FILE="${CONF_DIR}/service_conf.yaml"
|
|
|
|
rm -f "${CONF_FILE}"
|
|
DEF_ENV_VALUE_PATTERN="\$\{([^:]+):-([^}]+)\}"
|
|
while IFS= read -r line || [[ -n "$line" ]]; do
|
|
if [[ "$line" =~ DEF_ENV_VALUE_PATTERN ]]; then
|
|
varname="${BASH_REMATCH[1]}"
|
|
default="${BASH_REMATCH[2]}"
|
|
|
|
if [ -n "${!varname}" ]; then
|
|
eval "echo \"$line"\" >> "${CONF_FILE}"
|
|
else
|
|
echo "$line" | sed -E "s/\\\$\{[^:]+:-([^}]+)\}/\1/g" >> "${CONF_FILE}"
|
|
fi
|
|
else
|
|
eval "echo \"$line\"" >> "${CONF_FILE}"
|
|
fi
|
|
done < "${TEMPLATE_FILE}"
|
|
|
|
export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu/"
|
|
PY=python3
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Function(s)
|
|
# -----------------------------------------------------------------------------
|
|
|
|
function task_exe() {
|
|
local consumer_id="$1"
|
|
local host_id="$2"
|
|
|
|
JEMALLOC_PATH="$(pkg-config --variable=libdir jemalloc)/libjemalloc.so"
|
|
while true; do
|
|
LD_PRELOAD="$JEMALLOC_PATH" \
|
|
"$PY" rag/svr/task_executor.py "${host_id}_${consumer_id}" &
|
|
wait;
|
|
sleep 1;
|
|
done
|
|
}
|
|
|
|
function start_mcp_server() {
|
|
echo "Starting MCP Server on ${MCP_HOST}:${MCP_PORT} with base URL ${MCP_BASE_URL}..."
|
|
"$PY" "${MCP_SCRIPT_PATH}" \
|
|
--host="${MCP_HOST}" \
|
|
--port="${MCP_PORT}" \
|
|
--base-url="${MCP_BASE_URL}" \
|
|
--mode="${MCP_MODE}" \
|
|
--api-key="${MCP_HOST_API_KEY}" \
|
|
"${MCP_TRANSPORT_SSE_FLAG}" \
|
|
"${MCP_TRANSPORT_STREAMABLE_HTTP_FLAG}" \
|
|
"${MCP_JSON_RESPONSE_FLAG}" &
|
|
}
|
|
|
|
function ensure_docling() {
|
|
[[ "${USE_DOCLING}" == "true" ]] || { echo "[docling] disabled by USE_DOCLING"; return 0; }
|
|
DOCLING_PIN="${DOCLING_VERSION:-==2.71.0}"
|
|
"$PY" -c "import importlib.util,sys; sys.exit(0 if importlib.util.find_spec('docling') else 1)" \
|
|
|| uv pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.org/simple --no-cache-dir "docling${DOCLING_PIN}"
|
|
}
|
|
|
|
function ensure_db_init() {
|
|
echo "Initializing database tables..."
|
|
"$PY" -c "from api.db.db_models import init_database_tables as init_web_db; init_web_db()"
|
|
}
|
|
|
|
function wait_for_server() {
|
|
local url="$1"
|
|
local server_name="$2"
|
|
local timeout=90
|
|
local interval=2
|
|
local start_time=$(date +%s)
|
|
|
|
echo "Waiting for $server_name to be ready at $url..."
|
|
while ! curl -f -s -o /dev/null "$url"; do
|
|
if [ $(($(date +%s) - start_time)) -gt $timeout ]; then
|
|
echo "Timeout waiting for $server_name after $timeout seconds"
|
|
return 1
|
|
fi
|
|
sleep $interval
|
|
done
|
|
echo "$server_name is ready."
|
|
}
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Start components based on flags
|
|
# -----------------------------------------------------------------------------
|
|
ensure_docling
|
|
ensure_db_init
|
|
|
|
if [[ "${ENABLE_WEBSERVER}" -eq 1 ]]; then
|
|
echo "Starting nginx..."
|
|
/usr/sbin/nginx
|
|
|
|
echo "Starting ragflow_server..."
|
|
while true; do
|
|
"$PY" api/ragflow_server.py ${INIT_SUPERUSER_ARGS} &
|
|
|
|
if [[ "${API_PROXY_SCHEME}" == "hybrid" ]]; then
|
|
wait_for_server "http://127.0.0.1:9380/healthz" "ragflow_server"
|
|
echo "Starting RAGFlow server in hybrid mode..."
|
|
bin/server_main &
|
|
fi
|
|
wait;
|
|
sleep 1;
|
|
done &
|
|
fi
|
|
|
|
if [[ "${ENABLE_DATASYNC}" -eq 1 ]]; then
|
|
echo "Starting data sync..."
|
|
while true; do
|
|
"$PY" rag/svr/sync_data_source.py &
|
|
wait;
|
|
sleep 1;
|
|
done &
|
|
fi
|
|
|
|
if [[ "${ENABLE_ADMIN_SERVER}" -eq 1 ]]; then
|
|
echo "Starting admin_server..."
|
|
while true; do
|
|
"$PY" admin/server/admin_server.py &
|
|
if [[ "${API_PROXY_SCHEME}" == "hybrid" ]]; then
|
|
wait_for_server "http://127.0.0.1:9381/api/v1/admin/ping" "admin_server"
|
|
echo "Starting Admin server in hybrid mode..."
|
|
bin/admin_server &
|
|
fi
|
|
wait;
|
|
sleep 1;
|
|
done &
|
|
fi
|
|
|
|
if [[ "${ENABLE_MCP_SERVER}" -eq 1 ]]; then
|
|
start_mcp_server
|
|
fi
|
|
|
|
|
|
if [[ "${ENABLE_TASKEXECUTOR}" -eq 1 ]]; then
|
|
if [[ "${CONSUMER_NO_END}" -gt "${CONSUMER_NO_BEG}" ]]; then
|
|
echo "Starting task executors on host '${HOST_ID}' for IDs in [${CONSUMER_NO_BEG}, ${CONSUMER_NO_END})..."
|
|
for (( i=CONSUMER_NO_BEG; i<CONSUMER_NO_END; i++ ))
|
|
do
|
|
task_exe "${i}" "${HOST_ID}" &
|
|
done
|
|
else
|
|
# Otherwise, start a fixed number of workers
|
|
echo "Starting ${WORKERS} task executor(s) on host '${HOST_ID}'..."
|
|
for (( i=0; i<WORKERS; i++ ))
|
|
do
|
|
task_exe "${i}" "${HOST_ID}" &
|
|
done
|
|
fi
|
|
fi
|
|
|
|
wait
|