mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-03-06 08:06:43 +08:00
# RAGFlow Go Implementation Plan 🚀 This repository tracks the progress of porting RAGFlow to Go. We'll implement core features and provide performance comparisons between Python and Go versions. ## Implementation Checklist - [x] User Management APIs - [x] Dataset Management Operations - [x] Retrieval Test - [x] Chat Management Operations - [x] Infinity Go SDK --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Yingfeng Zhang <yingfeng.zhang@gmail.com>
150 lines
4.8 KiB
C++
150 lines
4.8 KiB
C++
// Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "api.h"
|
|
#include "stem_UTF_8_danish.h"
|
|
#include "stem_UTF_8_dutch.h"
|
|
#include "stem_UTF_8_english.h"
|
|
#include "stem_UTF_8_finnish.h"
|
|
#include "stem_UTF_8_french.h"
|
|
#include "stem_UTF_8_german.h"
|
|
#include "stem_UTF_8_hungarian.h"
|
|
#include "stem_UTF_8_italian.h"
|
|
#include "stem_UTF_8_norwegian.h"
|
|
#include "stem_UTF_8_porter.h"
|
|
#include "stem_UTF_8_portuguese.h"
|
|
#include "stem_UTF_8_romanian.h"
|
|
#include "stem_UTF_8_russian.h"
|
|
#include "stem_UTF_8_spanish.h"
|
|
#include "stem_UTF_8_swedish.h"
|
|
#include "stem_UTF_8_turkish.h"
|
|
#include "stemmer.h"
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
#endif
|
|
struct StemFunc {
|
|
|
|
struct SN_env *(*create)(void);
|
|
void (*close)(struct SN_env *);
|
|
int (*stem)(struct SN_env *);
|
|
|
|
struct SN_env *env;
|
|
};
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
StemFunc STEM_FUNCTION[STEM_LANG_EOS] = {
|
|
{0, 0, 0, 0},
|
|
{danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem, 0},
|
|
{dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem, 0},
|
|
{english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem, 0},
|
|
{finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem, 0},
|
|
{french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem, 0},
|
|
{german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem, 0},
|
|
{hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem, 0},
|
|
{italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem, 0},
|
|
{norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem, 0},
|
|
{porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem, 0},
|
|
{portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem, 0},
|
|
{romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem, 0},
|
|
{russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem, 0},
|
|
{spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem, 0},
|
|
{swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem, 0},
|
|
{turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem, 0},
|
|
};
|
|
|
|
Stemmer::Stemmer() {
|
|
// stemLang_ = STEM_LANG_UNKNOWN;
|
|
stem_function_ = 0;
|
|
}
|
|
|
|
Stemmer::~Stemmer() { DeInit(); }
|
|
|
|
bool Stemmer::Init(Language language) {
|
|
// create stemming function structure
|
|
stem_function_ = static_cast<void *>(new StemFunc);
|
|
if (stem_function_ == 0) {
|
|
return false;
|
|
}
|
|
|
|
// set stemming functions
|
|
if (language > 0 && language < STEM_LANG_EOS) {
|
|
static_cast<StemFunc *>(stem_function_)->create = STEM_FUNCTION[language].create;
|
|
static_cast<StemFunc *>(stem_function_)->close = STEM_FUNCTION[language].close;
|
|
static_cast<StemFunc *>(stem_function_)->stem = STEM_FUNCTION[language].stem;
|
|
static_cast<StemFunc *>(stem_function_)->env = STEM_FUNCTION[language].env;
|
|
} else {
|
|
delete static_cast<StemFunc *>(stem_function_);
|
|
stem_function_ = 0;
|
|
return false;
|
|
}
|
|
|
|
// create env
|
|
static_cast<StemFunc *>(stem_function_)->env = static_cast<StemFunc *>(stem_function_)->create();
|
|
if (static_cast<StemFunc *>(stem_function_)->env == 0) {
|
|
DeInit();
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
////////////
|
|
// struct SN_env {
|
|
// symbol *p;
|
|
// int c;
|
|
// int l;
|
|
// int lb;
|
|
// int bra;
|
|
// int ket;
|
|
// symbol **S;
|
|
// int *I;
|
|
// unsigned char *B;
|
|
// };
|
|
////////////
|
|
|
|
void Stemmer::DeInit(void) {
|
|
if (stem_function_) {
|
|
static_cast<StemFunc *>(stem_function_)->close(((StemFunc *)stem_function_)->env);
|
|
delete static_cast<StemFunc *>(stem_function_);
|
|
stem_function_ = 0;
|
|
}
|
|
}
|
|
|
|
bool Stemmer::Stem(const std::string &term, std::string &resultWord) {
|
|
if (!stem_function_) {
|
|
return false;
|
|
}
|
|
|
|
// set environment
|
|
if (SN_set_current(static_cast<StemFunc *>(stem_function_)->env, term.length(), (const symbol *)term.c_str())) {
|
|
static_cast<StemFunc *>(stem_function_)->env->l = 0;
|
|
return false;
|
|
}
|
|
|
|
// stemming
|
|
if (((StemFunc *)stem_function_)->stem(((StemFunc *)stem_function_)->env) < 0) {
|
|
return false;
|
|
}
|
|
|
|
((StemFunc *)stem_function_)->env->p[((StemFunc *)stem_function_)->env->l] = 0;
|
|
|
|
resultWord = (char *)((StemFunc *)stem_function_)->env->p;
|
|
|
|
return true;
|
|
}
|