From 8c972e4e43ea14f77d17c2eda3ec1cefbaf40c6f Mon Sep 17 00:00:00 2001 From: sauravniraula Date: Tue, 5 Aug 2025 21:05:33 +0545 Subject: [PATCH 1/5] fix: removes redis server startup from start.js --- start.js | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/start.js b/start.js index 60f6c438..a9cc9af2 100644 --- a/start.js +++ b/start.js @@ -145,27 +145,12 @@ const startServers = async () => { console.error("Ollama process failed to start:", err); }); - const redisProcess = spawn( - "redis-server", - [], - { - cwd: "/", - stdio: "inherit", - env: process.env, - } - ); - - redisProcess.on("error", err => { - console.error("Redis process failed to start:", err); - }); - // Keep the Node process alive until both servers exit const exitCode = await Promise.race([ new Promise(resolve => fastApiProcess.on("exit", resolve)), new Promise(resolve => nextjsProcess.on("exit", resolve)), new Promise(resolve => ollamaProcess.on("exit", resolve)), - new Promise(resolve => redisProcess.on("exit", resolve)), ]); console.log(`One of the processes exited. Exit code: ${exitCode}`); From d0fd1b3aeddefb2f05e33ef0bb072b13b1b436ab Mon Sep 17 00:00:00 2001 From: sauravniraula Date: Tue, 5 Aug 2025 22:00:08 +0545 Subject: [PATCH 2/5] chore: error handling if generated outline is not valid json --- servers/fastapi/api/v1/ppt/endpoints/outlines.py | 10 +++++++++- .../fastapi/api/v1/ppt/endpoints/presentation.py | 9 ++++++++- .../fastapi/app_mcp/wrapper/generate_outline.py | 14 ++++++++++++-- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/servers/fastapi/api/v1/ppt/endpoints/outlines.py b/servers/fastapi/api/v1/ppt/endpoints/outlines.py index 0638543e..b0ec47af 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/outlines.py +++ b/servers/fastapi/api/v1/ppt/endpoints/outlines.py @@ -68,7 +68,15 @@ async def stream_outlines( ).to_string() presentation_outlines_text += chunk - presentation_outlines_json = json.loads(presentation_outlines_text) + try: + presentation_outlines_json = json.loads(presentation_outlines_text) + except Exception as e: + print(e) + raise HTTPException( + status_code=400, + detail="Failed to generate presentation outlines. Please try again.", + ) + presentation_outlines = PresentationOutlineModel( **presentation_outlines_json ) diff --git a/servers/fastapi/api/v1/ppt/endpoints/presentation.py b/servers/fastapi/api/v1/ppt/endpoints/presentation.py index 215f4b4a..5b4589a2 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/presentation.py +++ b/servers/fastapi/api/v1/ppt/endpoints/presentation.py @@ -355,7 +355,14 @@ async def generate_presentation_api( ): presentation_outlines_text += chunk - presentation_outlines_json = json.loads(presentation_outlines_text) + try: + presentation_outlines_json = json.loads(presentation_outlines_text) + except Exception as e: + print(e) + raise HTTPException( + status_code=400, + detail="Failed to generate presentation outlines. Please try again.", + ) presentation_outlines = PresentationOutlineModel(**presentation_outlines_json) outlines = presentation_outlines.slides[:n_slides] total_outlines = len(outlines) diff --git a/servers/fastapi/app_mcp/wrapper/generate_outline.py b/servers/fastapi/app_mcp/wrapper/generate_outline.py index 0c2d88a7..c2efdc38 100644 --- a/servers/fastapi/app_mcp/wrapper/generate_outline.py +++ b/servers/fastapi/app_mcp/wrapper/generate_outline.py @@ -1,5 +1,6 @@ import json import os +from fastapi import HTTPException from typing import Dict, Any, Optional, List, Annotated from models.presentation_outline_model import PresentationOutlineModel from utils.llm_calls.generate_presentation_outlines import generate_ppt_outline @@ -63,8 +64,17 @@ async def generate_outline( await asyncio.sleep(0) presentation_outlines_text += chunk - presentation_outlines_json = json.loads(presentation_outlines_text) - presentation_outlines = PresentationOutlineModel(**presentation_outlines_json) + try: + presentation_outlines_json = json.loads(presentation_outlines_text) + presentation_outlines = PresentationOutlineModel( + **presentation_outlines_json + ) + except Exception as e: + print(e) + raise HTTPException( + status_code=400, + detail="Failed to generate presentation outlines. Please try again.", + ) # Truncate slides to n_slides presentation_outlines.slides = presentation_outlines.slides[:n_slides] From bf16491c73838f35d1f4bb18b7905e276ba29c9a Mon Sep 17 00:00:00 2001 From: sauravniraula Date: Tue, 5 Aug 2025 23:19:59 +0545 Subject: [PATCH 3/5] fix: nextjs build issues and docling support for Powerpoint and Word --- Dockerfile | 2 +- Dockerfile.dev | 2 +- servers/fastapi/services/docling_service.py | 14 +++-- servers/fastapi/utils/get_dynamic_models.py | 2 +- .../outline/components/GenerateButton.tsx | 3 - .../outline/components/OutlinePage.tsx | 1 - .../outline/hooks/useOutlineManagement.ts | 15 ++--- .../hooks/usePresentationGeneration.ts | 4 +- .../pdf-maker/PdfMakerPage.tsx | 9 +-- .../store/slices/presentationGeneration.ts | 57 +++++++++---------- start.js | 24 ++++---- 11 files changed, 59 insertions(+), 74 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9ba7c3bf..3815c237 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ WORKDIR /app # Set environment variables ENV APP_DATA_DIRECTORY=/app_data ENV TEMP_DIRECTORY=/tmp/presenton -ENV PYTHONPATH="${PYTHONPATH}:/app/servers/fastapi" +# ENV PYTHONPATH="${PYTHONPATH}:/app/servers/fastapi" # Install ollama diff --git a/Dockerfile.dev b/Dockerfile.dev index 20b37e52..f4e860a1 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -19,7 +19,7 @@ RUN ls -a # Set environment variables ENV APP_DATA_DIRECTORY=/app_data ENV TEMP_DIRECTORY=/tmp/presenton -ENV PYTHONPATH="${PYTHONPATH}:/app/servers/fastapi" +# ENV PYTHONPATH="${PYTHONPATH}:/app/servers/fastapi" # Install ollama RUN curl -fsSL http://ollama.com/install.sh | sh diff --git a/servers/fastapi/services/docling_service.py b/servers/fastapi/services/docling_service.py index 6a9220b2..f6ae203e 100644 --- a/servers/fastapi/services/docling_service.py +++ b/servers/fastapi/services/docling_service.py @@ -1,4 +1,9 @@ -from docling.document_converter import DocumentConverter, PdfFormatOption +from docling.document_converter import ( + DocumentConverter, + PdfFormatOption, + PowerpointFormatOption, + WordFormatOption, +) from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.datamodel.base_models import InputFormat @@ -9,17 +14,18 @@ class DoclingService: self.pipeline_options.do_ocr = False self.converter = DocumentConverter( + allowed_formats=[InputFormat.PPTX, InputFormat.PDF, InputFormat.DOCX], format_options={ - InputFormat.DOCX: PdfFormatOption( + InputFormat.DOCX: WordFormatOption( pipeline_options=self.pipeline_options, ), - InputFormat.PPTX: PdfFormatOption( + InputFormat.PPTX: PowerpointFormatOption( pipeline_options=self.pipeline_options, ), InputFormat.PDF: PdfFormatOption( pipeline_options=self.pipeline_options, ), - } + }, ) def parse_to_markdown(self, file_path: str) -> str: diff --git a/servers/fastapi/utils/get_dynamic_models.py b/servers/fastapi/utils/get_dynamic_models.py index c2012217..744a6a5a 100644 --- a/servers/fastapi/utils/get_dynamic_models.py +++ b/servers/fastapi/utils/get_dynamic_models.py @@ -7,7 +7,7 @@ from models.presentation_structure_model import PresentationStructureModel def get_presentation_outline_model_with_n_slides(n_slides: int): class PresentationOutlineModelWithNSlides(PresentationOutlineModel): slides: List[str] = Field( - description="Markdown content for each slide", + description="Markdown content for each slide in about 100 to 200 words", min_items=n_slides, max_items=n_slides, ) diff --git a/servers/nextjs/app/(presentation-generator)/outline/components/GenerateButton.tsx b/servers/nextjs/app/(presentation-generator)/outline/components/GenerateButton.tsx index f0ccc554..bc5ee297 100644 --- a/servers/nextjs/app/(presentation-generator)/outline/components/GenerateButton.tsx +++ b/servers/nextjs/app/(presentation-generator)/outline/components/GenerateButton.tsx @@ -1,12 +1,10 @@ import React from "react"; import { Button } from "@/components/ui/button"; -import { SlideOutline } from "@/store/slices/presentationGeneration"; import { LoadingState, StreamState, LayoutGroup } from "../types/index"; interface GenerateButtonProps { loadingState: LoadingState; streamState: StreamState; - outlines: SlideOutline[] | null; selectedLayoutGroup: LayoutGroup | null; onSubmit: () => void; } @@ -14,7 +12,6 @@ interface GenerateButtonProps { const GenerateButton: React.FC = ({ loadingState, streamState, - outlines, selectedLayoutGroup, onSubmit }) => { diff --git a/servers/nextjs/app/(presentation-generator)/outline/components/OutlinePage.tsx b/servers/nextjs/app/(presentation-generator)/outline/components/OutlinePage.tsx index 3dad4300..eceb950b 100644 --- a/servers/nextjs/app/(presentation-generator)/outline/components/OutlinePage.tsx +++ b/servers/nextjs/app/(presentation-generator)/outline/components/OutlinePage.tsx @@ -85,7 +85,6 @@ const OutlinePage: React.FC = () => { diff --git a/servers/nextjs/app/(presentation-generator)/outline/hooks/useOutlineManagement.ts b/servers/nextjs/app/(presentation-generator)/outline/hooks/useOutlineManagement.ts index db15c4a6..6d27b1e5 100644 --- a/servers/nextjs/app/(presentation-generator)/outline/hooks/useOutlineManagement.ts +++ b/servers/nextjs/app/(presentation-generator)/outline/hooks/useOutlineManagement.ts @@ -1,9 +1,9 @@ import { useCallback } from "react"; import { useDispatch } from "react-redux"; import { arrayMove } from "@dnd-kit/sortable"; -import { setOutlines, SlideOutline } from "@/store/slices/presentationGeneration"; +import { setOutlines } from "@/store/slices/presentationGeneration"; -export const useOutlineManagement = (outlines: SlideOutline[] | null) => { +export const useOutlineManagement = (outlines: string[] | null) => { const dispatch = useDispatch(); const handleDragEnd = useCallback((event: any) => { @@ -12,8 +12,8 @@ export const useOutlineManagement = (outlines: SlideOutline[] | null) => { if (!active || !over || !outlines) return; if (active.id !== over.id) { - const oldIndex = outlines.findIndex((item) => item.title === active.id); - const newIndex = outlines.findIndex((item) => item.title === over.id); + const oldIndex = outlines.findIndex((item) => item === active.id); + const newIndex = outlines.findIndex((item) => item === over.id); const reorderedArray = arrayMove(outlines, oldIndex, newIndex); dispatch(setOutlines(reorderedArray)); } @@ -22,12 +22,7 @@ export const useOutlineManagement = (outlines: SlideOutline[] | null) => { const handleAddSlide = useCallback(() => { if (!outlines) return; - const newSlide: SlideOutline = { - title: "Outline title", - body: "Outline body", - }; - - const updatedOutlines = [...outlines, newSlide]; + const updatedOutlines = [...outlines, "Outline title"]; dispatch(setOutlines(updatedOutlines)); }, [outlines, dispatch]); diff --git a/servers/nextjs/app/(presentation-generator)/outline/hooks/usePresentationGeneration.ts b/servers/nextjs/app/(presentation-generator)/outline/hooks/usePresentationGeneration.ts index def04fe7..726056c9 100644 --- a/servers/nextjs/app/(presentation-generator)/outline/hooks/usePresentationGeneration.ts +++ b/servers/nextjs/app/(presentation-generator)/outline/hooks/usePresentationGeneration.ts @@ -2,7 +2,7 @@ import { useState, useCallback } from "react"; import { useDispatch } from "react-redux"; import { useRouter } from "next/navigation"; import { toast } from "sonner"; -import { clearPresentationData, SlideOutline } from "@/store/slices/presentationGeneration"; +import { clearPresentationData } from "@/store/slices/presentationGeneration"; import { PresentationGenerationApi } from "../../services/api/presentation-generation"; import { LayoutGroup, LoadingState, TABS } from "../types/index"; @@ -15,7 +15,7 @@ const DEFAULT_LOADING_STATE: LoadingState = { export const usePresentationGeneration = ( presentationId: string | null, - outlines: SlideOutline[] | null, + outlines: string[] | null, selectedLayoutGroup: LayoutGroup | null, setActiveTab: (tab: string) => void ) => { diff --git a/servers/nextjs/app/(presentation-generator)/pdf-maker/PdfMakerPage.tsx b/servers/nextjs/app/(presentation-generator)/pdf-maker/PdfMakerPage.tsx index 5fb51ee5..88701da1 100644 --- a/servers/nextjs/app/(presentation-generator)/pdf-maker/PdfMakerPage.tsx +++ b/servers/nextjs/app/(presentation-generator)/pdf-maker/PdfMakerPage.tsx @@ -3,19 +3,12 @@ import React, { useEffect, useState } from "react"; import { useDispatch, useSelector } from "react-redux"; import { RootState } from "@/store/store"; import { Skeleton } from "@/components/ui/skeleton"; - - -import { DashboardApi } from "@/app/(presentation-generator)/dashboard/api/dashboard"; - - import { toast } from "sonner"; - - - import { Button } from "@/components/ui/button"; import { AlertCircle } from "lucide-react"; import { useGroupLayouts } from "../hooks/useGroupLayouts"; import { setPresentationData } from "@/store/slices/presentationGeneration"; +import { DashboardApi } from "../services/api/dashboard"; diff --git a/servers/nextjs/store/slices/presentationGeneration.ts b/servers/nextjs/store/slices/presentationGeneration.ts index 1e0f012e..623496a7 100644 --- a/servers/nextjs/store/slices/presentationGeneration.ts +++ b/servers/nextjs/store/slices/presentationGeneration.ts @@ -1,11 +1,6 @@ import { Slide } from "@/app/(presentation-generator)/types/slide"; import { createSlice, PayloadAction } from "@reduxjs/toolkit"; - - - - - export interface PresentationData { id: string; language: string; @@ -137,7 +132,7 @@ const presentationGenerationSlice = createSlice({ action.payload.slide; } }, - + // Update slide content at specific data path (for Tiptap text editing) updateSlideContent: ( state, @@ -154,12 +149,12 @@ const presentationGenerationSlice = createSlice({ ) { const slide = state.presentationData.slides[action.payload.slideIndex]; const { dataPath, content } = action.payload; - + // Helper function to set nested property value const setNestedValue = (obj: any, path: string, value: string) => { const keys = path.split(/[.\[\]]+/).filter(Boolean); let current = obj; - + // Navigate to the parent object for (let i = 0; i < keys.length - 1; i++) { const key = keys[i]; @@ -178,7 +173,7 @@ const presentationGenerationSlice = createSlice({ current = current[index]; } } - + // Set the final value const finalKey = keys[keys.length - 1]; if (isNaN(Number(finalKey))) { @@ -187,7 +182,7 @@ const presentationGenerationSlice = createSlice({ current[Number(finalKey)] = value; } }; - + // Update the slide content if (dataPath && slide.content) { setNestedValue(slide.content, dataPath, content); @@ -198,8 +193,8 @@ const presentationGenerationSlice = createSlice({ addNewSlide: (state, action: PayloadAction<{ slideData: any; index: number }>) => { if (state.presentationData?.slides) { // Insert the new slide at the specified index + 1 (after current slide) - state.presentationData.slides.splice(action.payload.index +1, 0, action.payload.slideData); - + state.presentationData.slides.splice(action.payload.index + 1, 0, action.payload.slideData); + // Update indices for all slides to ensure they remain sequential state.presentationData.slides = state.presentationData.slides.map( (slide: any, idx: number) => ({ @@ -227,12 +222,12 @@ const presentationGenerationSlice = createSlice({ ) { const slide = state.presentationData.slides[action.payload.slideIndex]; const { dataPath, imageUrl, prompt } = action.payload; - + // Helper function to set nested property value for images const setNestedImageValue = (obj: any, path: string, url: string, promptText?: string) => { const keys = path.split(/[.\[\]]+/).filter(Boolean); let current = obj; - + // Navigate to the parent object for (let i = 0; i < keys.length - 1; i++) { const key = keys[i]; @@ -249,33 +244,33 @@ const presentationGenerationSlice = createSlice({ current = current[index]; } } - + // Set the image properties const finalKey = keys[keys.length - 1]; const target = isNaN(Number(finalKey)) ? current[finalKey] : current[Number(finalKey)]; - + // Preserve existing properties if the target already exists const updatedValue = { ...(target && typeof target === 'object' ? target : {}), __image_url__: url, __image_prompt__: promptText || (target?.__image_prompt__) || '' }; - + if (isNaN(Number(finalKey))) { current[finalKey] = updatedValue; } else { current[Number(finalKey)] = updatedValue; } - + // Add debugging console.log('Redux: Updated slide image at path:', path, 'with URL:', url); }; - + // Update the slide image if (dataPath && slide.content) { setNestedImageValue(slide.content, dataPath, imageUrl, prompt); } - + // Also update the images array if it exists if (slide.images && Array.isArray(slide.images)) { const imageIndex = parseInt(dataPath.split('[')[1]?.split(']')[0]) || 0; @@ -293,7 +288,7 @@ const presentationGenerationSlice = createSlice({ itemIndex: number; properties: any; }> - ) => { + ) => { if ( state.presentationData && state.presentationData.slides && @@ -305,8 +300,8 @@ const presentationGenerationSlice = createSlice({ ...slide.properties, [itemIndex]: properties }; - - } + + } }, // Update slide icon at specific data path @@ -326,12 +321,12 @@ const presentationGenerationSlice = createSlice({ ) { const slide = state.presentationData.slides[action.payload.slideIndex]; const { dataPath, iconUrl, query } = action.payload; - + // Helper function to set nested property value for icons const setNestedIconValue = (obj: any, path: string, url: string, queryText?: string) => { const keys = path.split(/[.\[\]]+/).filter(Boolean); let current = obj; - + // Navigate to the parent object for (let i = 0; i < keys.length - 1; i++) { const key = keys[i]; @@ -348,33 +343,33 @@ const presentationGenerationSlice = createSlice({ current = current[index]; } } - + // Set the icon properties const finalKey = keys[keys.length - 1]; const target = isNaN(Number(finalKey)) ? current[finalKey] : current[Number(finalKey)]; - + // Preserve existing properties if the target already exists const updatedValue = { ...(target && typeof target === 'object' ? target : {}), __icon_url__: url, __icon_query__: queryText || (target?.__icon_query__) || '' }; - + if (isNaN(Number(finalKey))) { current[finalKey] = updatedValue; } else { current[Number(finalKey)] = updatedValue; } - + // Add debugging console.log('Redux: Updated slide icon at path:', path, 'with URL:', url); }; - + // Update the slide icon if (dataPath && slide.content) { setNestedIconValue(slide.content, dataPath, iconUrl, query); } - + // Also update the icons array if it exists if (slide.icons && Array.isArray(slide.icons)) { const iconIndex = parseInt(dataPath.split('[')[1]?.split(']')[0]) || 0; diff --git a/start.js b/start.js index a9cc9af2..f2dfa1b0 100644 --- a/start.js +++ b/start.js @@ -103,19 +103,19 @@ const startServers = async () => { console.error("FastAPI process failed to start:", err); }); - const appmcpProcess = spawn( - "python", - ["mcp_server.py", "--port", appmcpPort.toString()], - { - cwd: fastapiDir, - stdio: "inherit", - env: process.env, - }, - ); + // const appmcpProcess = spawn( + // "python", + // ["mcp_server.py", "--port", appmcpPort.toString()], + // { + // cwd: fastapiDir, + // stdio: "inherit", + // env: process.env, + // }, + // ); - appmcpProcess.on("error", (err) => { - console.error("App MCP process failed to start:", err); - }); + // appmcpProcess.on("error", (err) => { + // console.error("App MCP process failed to start:", err); + // }); const nextjsProcess = spawn( "npm", From 0fe272d82caa4366642f827c4b9b910dedf8c912 Mon Sep 17 00:00:00 2001 From: sauravniraula Date: Tue, 5 Aug 2025 23:54:18 +0545 Subject: [PATCH 4/5] refactor: removes nltk from score based chunker --- Dockerfile | 2 +- Dockerfile.dev | 2 +- .../fastapi/services/score_based_chunker.py | 218 ++++++++---------- 3 files changed, 93 insertions(+), 129 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3815c237..220d6876 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,7 +24,7 @@ RUN curl -fsSL https://ollama.com/install.sh | sh # Install dependencies for FastAPI RUN pip install aiohttp aiomysql aiosqlite asyncpg fastapi[standard] \ - pathvalidate pdfplumber nltk chromadb sqlmodel \ + pathvalidate pdfplumber chromadb sqlmodel \ anthropic google-genai openai fastmcp RUN pip install docling --extra-index-url https://download.pytorch.org/whl/cpu diff --git a/Dockerfile.dev b/Dockerfile.dev index f4e860a1..4f3e80e5 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -26,7 +26,7 @@ RUN curl -fsSL http://ollama.com/install.sh | sh # Install dependencies for FastAPI RUN pip install aiohttp aiomysql aiosqlite asyncpg fastapi[standard] \ - pathvalidate pdfplumber nltk chromadb sqlmodel \ + pathvalidate pdfplumber chromadb sqlmodel \ anthropic google-genai openai fastmcp RUN pip install docling --extra-index-url https://download.pytorch.org/whl/cpu diff --git a/servers/fastapi/services/score_based_chunker.py b/servers/fastapi/services/score_based_chunker.py index 0af245a2..c67de796 100644 --- a/servers/fastapi/services/score_based_chunker.py +++ b/servers/fastapi/services/score_based_chunker.py @@ -1,133 +1,79 @@ import asyncio from typing import List -import nltk from models.document_chunk import DocumentChunk -try: - nltk.data.find("tokenizers/punkt", paths=["./nltk"]) -except LookupError: - nltk.download("punkt", download_dir="./nltk") - class ScoreBasedChunker: - def extract_sentences(self, text: str, min_sentences: int) -> List[str]: - sentences = self.extract_sentences_markdown(text) - if len(sentences) < min_sentences: - sentences = self.extract_sentences_nltk(text) - if len(sentences) < min_sentences: - sentences = self.extract_sentences_by_stop_words(text) - if len(sentences) < min_sentences: - sentences = self.extract_sentences_by_new_line(text) - if len(sentences) < min_sentences: - raise ValueError( - f"Only {len(sentences)} sentences found, requested {min_sentences}" - ) - return sentences - - def extract_sentences_markdown(self, text: str) -> List[str]: + def extract_headings(self, text: str) -> List[str]: lines = text.split("\n") - sentences = [] - + headings = [] + for line in lines: line = line.strip() - if line: - if line.startswith("#"): - sentences.append(line) - else: - if line.endswith((".", "!", "?")): - sentences.append(line) - else: - sentences.append(line) - - return sentences - - def extract_sentences_nltk(self, text: str) -> List[str]: - sentences = nltk.sent_tokenize(text) - return sentences - - def extract_sentences_by_stop_words(self, text: str) -> List[str]: - sentences = [] - current_sentence = "" - - for char in text: - current_sentence += char - if char in ".!?": - sentences.append(current_sentence.strip()) - current_sentence = "" - - if current_sentence.strip(): - sentences.append(current_sentence.strip()) - - return [s for s in sentences if s] - - def extract_sentences_by_new_line(self, text: str) -> List[str]: - sentences = text.split("\n") - result = [] - for i, sentence in enumerate(sentences): - if i < len(sentences) - 1: - result.append(sentence + "\n") - else: - result.append(sentence) - return result - - def score_sentences_for_heading(self, sentences: List[str]) -> List[float]: - sentences_scores = [] + if line.startswith("#"): + headings.append(line) + + return headings + def score_headings(self, headings: List[str]) -> List[float]: + heading_scores = [] last_heading_index = -1 first_heading_found = False - for i, sentence in enumerate(sentences): + for i, heading in enumerate(headings): score = 0.0 + + heading_level = len(heading) - len(heading.lstrip("#")) + + if heading_level <= 3: + score += 10.0 - (heading_level - 1) * 2.0 + else: + score += 4.0 - (heading_level - 4) * 0.5 - if sentence.strip().startswith("#"): - heading_level = len(sentence) - len(sentence.lstrip("#")) + if not first_heading_found: + score += 5.0 + first_heading_found = True - if heading_level <= 3: - score += 10.0 - (heading_level - 1) * 2.0 - else: - score += 4.0 - (heading_level - 4) * 0.5 + if last_heading_index != -1: + distance = i - last_heading_index + distance_bonus = min(5.0, distance * 0.5) + score += distance_bonus - if not first_heading_found: - score += 5.0 - first_heading_found = True + last_heading_index = i + heading_scores.append(score) - if last_heading_index != -1: - distance = i - last_heading_index - distance_bonus = min(5.0, distance * 0.5) - score += distance_bonus + return heading_scores - last_heading_index = i - - sentences_scores.append(score) - - return sentences_scores - - def get_chunks( - self, sentences: List[str], sentences_scores: List[float], top_k: int = 10 + def get_chunks_from_headings( + self, + text: str, + headings: List[str], + heading_scores: List[float], + top_k: int = 10, ) -> List[DocumentChunk]: - if not sentences_scores: - sentences_scores = self.score_sentences_for_heading(sentences) + if not heading_scores: + heading_scores = self.score_headings(headings) chunks = [] - heading_scores = [] + heading_indices = [] - for i, score in enumerate(sentences_scores): + for i, score in enumerate(heading_scores): if score > 0: - heading_scores.append((i, score)) + heading_indices.append((i, score)) - if len(heading_scores) == 0: + if len(heading_indices) == 0: return chunks - heading_scores.sort(key=lambda x: (-x[1], x[0])) + heading_indices.sort(key=lambda x: (-x[1], x[0])) - if len(heading_scores) <= top_k: - selected_headings = [idx for idx, _ in heading_scores] - selected_headings.sort() + if len(heading_indices) <= top_k: + selected_indices = [idx for idx, _ in heading_indices] + selected_indices.sort() else: score_groups = {} - for idx, score in heading_scores: + for idx, score in heading_indices: rounded_score = round(score) if rounded_score not in score_groups: score_groups[rounded_score] = [] @@ -137,62 +83,80 @@ class ScoreBasedChunker: score_groups.items(), key=lambda x: x[0], reverse=True ) - selected_headings = [] + selected_indices = [] - for score, headings in sorted_groups: - headings.sort() - remaining_needed = top_k - len(selected_headings) + for score, indices in sorted_groups: + indices.sort() + remaining_needed = top_k - len(selected_indices) if remaining_needed <= 0: break - if len(headings) <= remaining_needed: - selected_headings.extend(headings) + if len(indices) <= remaining_needed: + selected_indices.extend(indices) else: if remaining_needed == 1: - mid_idx = len(headings) // 2 - selected_headings.append(headings[mid_idx]) + mid_idx = len(indices) // 2 + selected_indices.append(indices[mid_idx]) elif remaining_needed == 2: - selected_headings.append(headings[0]) - selected_headings.append(headings[-1]) + selected_indices.append(indices[0]) + selected_indices.append(indices[-1]) else: - step = (len(headings) - 1) / (remaining_needed - 1) + step = (len(indices) - 1) / (remaining_needed - 1) for i in range(remaining_needed): index = int(round(i * step)) - if index < len(headings): - selected_headings.append(headings[index]) + if index < len(indices): + selected_indices.append(indices[index]) - selected_headings.sort() + selected_indices.sort() - for i, heading_idx in enumerate(selected_headings): - heading = sentences[heading_idx] - - if i + 1 < len(selected_headings): - next_heading_idx = selected_headings[i + 1] - content_end = next_heading_idx + lines = text.split("\n") + heading_positions = {} + + for i, line in enumerate(lines): + line_stripped = line.strip() + if line_stripped.startswith("#"): + for heading_idx, heading in enumerate(headings): + if heading == line_stripped and heading_idx not in heading_positions: + heading_positions[heading_idx] = i + break + + for i, heading_idx in enumerate(selected_indices): + if heading_idx not in heading_positions: + continue + + heading = headings[heading_idx] + heading_line_idx = heading_positions[heading_idx] + + if i + 1 < len(selected_indices): + next_heading_idx = selected_indices[i + 1] + if next_heading_idx in heading_positions: + next_heading_line_idx = heading_positions[next_heading_idx] + content_end = next_heading_line_idx + else: + content_end = len(lines) else: - content_end = len(sentences) + content_end = len(lines) - content_sentences = sentences[heading_idx + 1 : content_end] - content = " ".join(content_sentences).strip() + content_lines = lines[heading_line_idx + 1 : content_end] + content = "\n".join(content_lines).strip() chunk = DocumentChunk( heading=heading, content=content, heading_index=heading_idx, - score=sentences_scores[heading_idx], + score=heading_scores[heading_idx], ) chunks.append(chunk) + return chunks async def get_n_chunks(self, text: str, n: int) -> List[DocumentChunk]: - sentences = await asyncio.to_thread(self.extract_sentences, text, n) - sentences_scores = await asyncio.to_thread( - self.score_sentences_for_heading, sentences - ) + headings = await asyncio.to_thread(self.extract_headings, text) + heading_scores = await asyncio.to_thread(self.score_headings, headings) chunks = await asyncio.to_thread( - self.get_chunks, sentences, sentences_scores, n + self.get_chunks_from_headings, text, headings, heading_scores, n ) if len(chunks) < n: raise ValueError(f"Only {len(chunks)} chunks found, requested {n}") From 1296c2fc6aca66f886e44992916e4dce4ed682e7 Mon Sep 17 00:00:00 2001 From: shiva raj badu Date: Wed, 6 Aug 2025 01:09:05 +0545 Subject: [PATCH 5/5] fix(Nextjs): improve outline stream & replace router.push with router.replace --- .../components/DocumentPreviewPage.tsx | 2 +- .../outline/hooks/useOutlineStreaming.ts | 6 +++--- .../outline/hooks/usePresentationGeneration.ts | 2 +- .../presentation/hooks/usePresentationStreaming.ts | 13 ++++++++++++- .../upload/components/UploadPage.tsx | 1 + 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/servers/nextjs/app/(presentation-generator)/documents-preview/components/DocumentPreviewPage.tsx b/servers/nextjs/app/(presentation-generator)/documents-preview/components/DocumentPreviewPage.tsx index 9de1244c..f77e9017 100644 --- a/servers/nextjs/app/(presentation-generator)/documents-preview/components/DocumentPreviewPage.tsx +++ b/servers/nextjs/app/(presentation-generator)/documents-preview/components/DocumentPreviewPage.tsx @@ -148,7 +148,7 @@ const DocumentsPreviewPage: React.FC = () => { }); dispatch(setPresentationId(createResponse.id)); - router.push("/outline"); + router.replace("/outline"); } catch (error: any) { console.error("Error in radar presentation creation:", error); toast.error('Error', { diff --git a/servers/nextjs/app/(presentation-generator)/outline/hooks/useOutlineStreaming.ts b/servers/nextjs/app/(presentation-generator)/outline/hooks/useOutlineStreaming.ts index 3b29f113..f5db594e 100644 --- a/servers/nextjs/app/(presentation-generator)/outline/hooks/useOutlineStreaming.ts +++ b/servers/nextjs/app/(presentation-generator)/outline/hooks/useOutlineStreaming.ts @@ -85,11 +85,11 @@ export const useOutlineStreaming = (presentationId: string | null) => { } catch (error) { setStreamState({ isStreaming: false, isLoading: false }); toast.error("Failed to initialize connection"); + }finally{ + setStreamState({ isStreaming: false, isLoading: false }); } }; - - initializeStream(); - + initializeStream(); return () => { if (eventSource) { eventSource.close(); diff --git a/servers/nextjs/app/(presentation-generator)/outline/hooks/usePresentationGeneration.ts b/servers/nextjs/app/(presentation-generator)/outline/hooks/usePresentationGeneration.ts index 726056c9..d3e0604f 100644 --- a/servers/nextjs/app/(presentation-generator)/outline/hooks/usePresentationGeneration.ts +++ b/servers/nextjs/app/(presentation-generator)/outline/hooks/usePresentationGeneration.ts @@ -84,7 +84,7 @@ export const usePresentationGeneration = ( if (response) { dispatch(clearPresentationData()); - router.push(`/presentation?id=${presentationId}&stream=true`); + router.replace(`/presentation?id=${presentationId}&stream=true`); } } catch (error: any) { console.error('Error In Presentation Generation(prepare).', error); diff --git a/servers/nextjs/app/(presentation-generator)/presentation/hooks/usePresentationStreaming.ts b/servers/nextjs/app/(presentation-generator)/presentation/hooks/usePresentationStreaming.ts index 042ba11a..b28fe3ca 100644 --- a/servers/nextjs/app/(presentation-generator)/presentation/hooks/usePresentationStreaming.ts +++ b/servers/nextjs/app/(presentation-generator)/presentation/hooks/usePresentationStreaming.ts @@ -6,7 +6,7 @@ import { setStreaming, } from "@/store/slices/presentationGeneration"; import { jsonrepair } from "jsonrepair"; -import { RootState } from "@/store/store"; +import { toast } from "sonner"; export const usePresentationStreaming = ( presentationId: string, @@ -89,6 +89,17 @@ export const usePresentationStreaming = ( newUrl.searchParams.delete("stream"); window.history.replaceState({}, "", newUrl.toString()); break; + case "error": + eventSource.close(); + toast.error("Error in outline streaming", { + description: + data.detail || + "Failed to connect to the server. Please try again.", + }); + setLoading(false); + dispatch(setStreaming(false)); + setError(true); + break; } }); diff --git a/servers/nextjs/app/(presentation-generator)/upload/components/UploadPage.tsx b/servers/nextjs/app/(presentation-generator)/upload/components/UploadPage.tsx index 0c16b25f..f8799d51 100644 --- a/servers/nextjs/app/(presentation-generator)/upload/components/UploadPage.tsx +++ b/servers/nextjs/app/(presentation-generator)/upload/components/UploadPage.tsx @@ -131,6 +131,7 @@ const UploadPage = () => { config, files: responses, })); + dispatch(clearOutlines()); router.push("/documents-preview"); };