Files
ragflow/internal/cpp/stemmer/stemmer.cpp
Jin Hai 70e9743ef1 RAGFlow go API server (#13240)
# RAGFlow Go Implementation Plan 🚀

This repository tracks the progress of porting RAGFlow to Go. We'll
implement core features and provide performance comparisons between
Python and Go versions.

## Implementation Checklist

- [x] User Management APIs
- [x] Dataset Management Operations
- [x] Retrieval Test
- [x] Chat Management Operations
- [x] Infinity Go SDK

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
Co-authored-by: Yingfeng Zhang <yingfeng.zhang@gmail.com>
2026-03-04 19:17:16 +08:00

150 lines
4.8 KiB
C++

// Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "api.h"
#include "stem_UTF_8_danish.h"
#include "stem_UTF_8_dutch.h"
#include "stem_UTF_8_english.h"
#include "stem_UTF_8_finnish.h"
#include "stem_UTF_8_french.h"
#include "stem_UTF_8_german.h"
#include "stem_UTF_8_hungarian.h"
#include "stem_UTF_8_italian.h"
#include "stem_UTF_8_norwegian.h"
#include "stem_UTF_8_porter.h"
#include "stem_UTF_8_portuguese.h"
#include "stem_UTF_8_romanian.h"
#include "stem_UTF_8_russian.h"
#include "stem_UTF_8_spanish.h"
#include "stem_UTF_8_swedish.h"
#include "stem_UTF_8_turkish.h"
#include "stemmer.h"
#ifdef __cplusplus
extern "C" {
#endif
struct StemFunc {
struct SN_env *(*create)(void);
void (*close)(struct SN_env *);
int (*stem)(struct SN_env *);
struct SN_env *env;
};
#ifdef __cplusplus
}
#endif
StemFunc STEM_FUNCTION[STEM_LANG_EOS] = {
{0, 0, 0, 0},
{danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem, 0},
{dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem, 0},
{english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem, 0},
{finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem, 0},
{french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem, 0},
{german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem, 0},
{hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem, 0},
{italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem, 0},
{norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem, 0},
{porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem, 0},
{portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem, 0},
{romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem, 0},
{russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem, 0},
{spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem, 0},
{swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem, 0},
{turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem, 0},
};
Stemmer::Stemmer() {
// stemLang_ = STEM_LANG_UNKNOWN;
stem_function_ = 0;
}
Stemmer::~Stemmer() { DeInit(); }
bool Stemmer::Init(Language language) {
// create stemming function structure
stem_function_ = static_cast<void *>(new StemFunc);
if (stem_function_ == 0) {
return false;
}
// set stemming functions
if (language > 0 && language < STEM_LANG_EOS) {
static_cast<StemFunc *>(stem_function_)->create = STEM_FUNCTION[language].create;
static_cast<StemFunc *>(stem_function_)->close = STEM_FUNCTION[language].close;
static_cast<StemFunc *>(stem_function_)->stem = STEM_FUNCTION[language].stem;
static_cast<StemFunc *>(stem_function_)->env = STEM_FUNCTION[language].env;
} else {
delete static_cast<StemFunc *>(stem_function_);
stem_function_ = 0;
return false;
}
// create env
static_cast<StemFunc *>(stem_function_)->env = static_cast<StemFunc *>(stem_function_)->create();
if (static_cast<StemFunc *>(stem_function_)->env == 0) {
DeInit();
return false;
}
return true;
}
////////////
// struct SN_env {
// symbol *p;
// int c;
// int l;
// int lb;
// int bra;
// int ket;
// symbol **S;
// int *I;
// unsigned char *B;
// };
////////////
void Stemmer::DeInit(void) {
if (stem_function_) {
static_cast<StemFunc *>(stem_function_)->close(((StemFunc *)stem_function_)->env);
delete static_cast<StemFunc *>(stem_function_);
stem_function_ = 0;
}
}
bool Stemmer::Stem(const std::string &term, std::string &resultWord) {
if (!stem_function_) {
return false;
}
// set environment
if (SN_set_current(static_cast<StemFunc *>(stem_function_)->env, term.length(), (const symbol *)term.c_str())) {
static_cast<StemFunc *>(stem_function_)->env->l = 0;
return false;
}
// stemming
if (((StemFunc *)stem_function_)->stem(((StemFunc *)stem_function_)->env) < 0) {
return false;
}
((StemFunc *)stem_function_)->env->p[((StemFunc *)stem_function_)->env->l] = 0;
resultWord = (char *)((StemFunc *)stem_function_)->env->p;
return true;
}