Fixes: Tiktoken splitter issue

This commit is contained in:
sauravniraula 2025-05-11 20:58:22 +05:45
parent b055b7e509
commit 5b77b4bbee
No known key found for this signature in database
GPG key ID: 60FCC1B5A5E83326
3 changed files with 27 additions and 21 deletions

View file

@ -1,17 +1,19 @@
require("dotenv").config();
import { app, BrowserWindow } from "electron";
import path from "path";
import { createUserConfig, findTwoUnusedPorts, killProcess } from "./utils";
import { createUserConfig, findTwoUnusedPorts, killProcess, setupEnv } from "./utils";
import { startFastApiServer, startNextJsServer } from "./servers";
import { ChildProcessByStdio } from "child_process";
import { localhost } from "./constants";
var isDev = false;
// var isDev = !app.isPackaged;
var isDev = !app.isPackaged;
// var isDev = false;
var baseDir = app.getAppPath();
var fastapiDir = isDev ? path.join(baseDir, "servers/fastapi") : path.join(baseDir, "resources/fastapi");
var nextjsDir = isDev ? path.join(baseDir, "servers/nextjs") : path.join(baseDir, "resources/nextjs");
var libreofficePath = path.join(baseDir, "dependencies/libreoffice/linux_build/libreoffice.appimage");
var tempDir = app.getPath("temp");
var dataDir = app.getPath("userData");
var userConfigPath = path.join(dataDir, "userConfig.json");
@ -20,8 +22,6 @@ var win: BrowserWindow | undefined;
var fastApiProcess: ChildProcessByStdio<any, any, any> | undefined;
var nextjsProcess: ChildProcessByStdio<any, any, any> | undefined;
const createWindow = () => {
win = new BrowserWindow({
webPreferences: {
@ -35,37 +35,31 @@ const createWindow = () => {
async function startServers(fastApiPort: number, nextjsPort: number) {
try {
process.env.NEXT_PUBLIC_FAST_API = `${localhost}:${fastApiPort}`;
process.env.NEXT_PUBLIC_URL = `${localhost}:${nextjsPort}`;
process.env.TEMP_DIRECTORY = tempDir;
process.env.NEXT_PUBLIC_USER_CONFIG_PATH = userConfigPath;
fastApiProcess = await startFastApiServer(
fastapiDir,
fastApiPort,
{
DEBUG: isDev ? "True" : "False",
LLM: process.env.LLM,
LIBREOFFICE: process.env.LIBREOFFICE,
LIBREOFFICE: libreofficePath,
OPENAI_API_KEY: process.env.OPENAI_API_KEY,
GOOGLE_API_KEY: process.env.GOOGLE_API_KEY,
APP_DATA_DIRECTORY: dataDir,
TEMP_DIRECTORY: tempDir,
USER_CONFIG_PATH: userConfigPath,
},
isDev
isDev,
);
nextjsProcess = await startNextJsServer(
nextjsDir,
nextjsPort,
{
NEXT_PUBLIC_FAST_API: `${localhost}:${fastApiPort}`,
TEMP_DIRECTORY: tempDir,
NEXT_PUBLIC_URL: `${localhost}:${nextjsPort}`,
NEXT_PUBLIC_USER_CONFIG_PATH: userConfigPath,
NEXT_PUBLIC_FAST_API: process.env.NEXT_PUBLIC_FAST_API,
TEMP_DIRECTORY: process.env.TEMP_DIRECTORY,
NEXT_PUBLIC_URL: process.env.NEXT_PUBLIC_URL,
NEXT_PUBLIC_USER_CONFIG_PATH: process.env.NEXT_PUBLIC_USER_CONFIG_PATH,
},
isDev
isDev,
);
} catch (error) {
console.error("Server startup error:", error);
@ -95,6 +89,9 @@ app.whenReady().then(async () => {
const [fastApiPort, nextjsPort] = await findTwoUnusedPorts();
console.log(`FastAPI port: ${fastApiPort}, NextJS port: ${nextjsPort}`);
//? Setup environment variables to be used in the preload.ts file
setupEnv(app, fastApiPort, nextjsPort);
await startServers(fastApiPort, nextjsPort);
win?.loadURL(`${localhost}:${nextjsPort}`);
});

View file

@ -6,6 +6,7 @@ import { platform } from 'os'
import type { App } from "electron"
import fs from 'fs'
import path from 'path'
import { localhost } from './constants'
const execAsync = promisify(exec)
@ -29,6 +30,13 @@ export function createUserConfig(app: App, userConfig: UserConfig) {
fs.writeFileSync(configPath, JSON.stringify(mergedConfig))
}
export function setupEnv(app: App, fastApiPort: number, nextjsPort: number) {
process.env.NEXT_PUBLIC_FAST_API = `${localhost}:${fastApiPort}`;
process.env.NEXT_PUBLIC_URL = `${localhost}:${nextjsPort}`;
process.env.TEMP_DIRECTORY = app.getPath("temp");
process.env.NEXT_PUBLIC_USER_CONFIG_PATH = app.getPath("userData") + "/userConfig.json";
}
export function killProcess(pid: number) {
return new Promise((resolve, reject) => {

View file

@ -40,9 +40,10 @@ async def generate_document_summary(documents: List[Document]):
if os.getenv("LLM") == "openai"
else ChatGoogleGenerativeAI(model="gemini-2.0-flash", max_output_tokens=8000)
)
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
encoding_name="cl100k_base", chunk_size=200000, chunk_overlap=0
)
# text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
# encoding_name="cl100k_base", chunk_size=200000, chunk_overlap=0
# )
text_splitter = CharacterTextSplitter(chunk_size=200000, chunk_overlap=0)
chain = prompt_template | model
coroutines = []