feat: 使用 Gemini 边界框坐标改进文字排版
- 修改 Gemini prompt,提取文本元素的位置坐标(百分比) - 新增 BoundingBox 和 TextElement 类型定义 - PPTX 生成使用动态位置而非硬编码固定位置 - SlideCard 显示每个文本元素的类型和位置信息 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -68,7 +68,7 @@ const App: React.FC = () => {
|
||||
const initialSlides: ProcessedSlide[] = images.map((img, idx) => ({
|
||||
id: idx + 1,
|
||||
originalImage: img,
|
||||
content: { title: '', bullets: [] },
|
||||
content: { elements: [] },
|
||||
status: 'pending'
|
||||
}));
|
||||
|
||||
@@ -121,36 +121,50 @@ const App: React.FC = () => {
|
||||
const pres = new pptxgen();
|
||||
pres.layout = 'LAYOUT_16x9';
|
||||
|
||||
// 16:9 幻灯片尺寸: 10" x 5.625"
|
||||
const SLIDE_WIDTH = 10;
|
||||
const SLIDE_HEIGHT = 5.625;
|
||||
|
||||
processedSlides.forEach((slide) => {
|
||||
const { title, bullets, notes, footer } = slide.content;
|
||||
|
||||
const { elements, notes } = slide.content;
|
||||
|
||||
const pptSlide = pres.addSlide();
|
||||
|
||||
// Add Background
|
||||
if (slide.cleanedImage) {
|
||||
pptSlide.background = { data: slide.cleanedImage };
|
||||
}
|
||||
|
||||
// Title (Fixed Top Position)
|
||||
pptSlide.addText(title || "Untitled", {
|
||||
x: 0.5, y: 0.5, w: '90%', h: 1,
|
||||
fontSize: 24, bold: true, color: '000000', fontFace: 'Arial'
|
||||
});
|
||||
|
||||
// Body Content (Fixed Body Position)
|
||||
if (bullets && bullets.length > 0) {
|
||||
const bulletText = bullets.map(b => ({ text: b, options: { breakLine: true } }));
|
||||
pptSlide.addText(bulletText, {
|
||||
x: 0.5, y: 1.5, w: '90%', h: 3.5,
|
||||
fontSize: 18, color: '333333', bullet: true, paraSpaceBefore: 10, valign: 'top'
|
||||
});
|
||||
}
|
||||
// 使用动态位置添加每个文本元素
|
||||
if (elements && elements.length > 0) {
|
||||
elements.forEach(element => {
|
||||
const { text, elementType, boundingBox, fontSize, alignment } = element;
|
||||
|
||||
// Footer
|
||||
if (footer) {
|
||||
pptSlide.addText(footer, {
|
||||
x: 0.5, y: 5.2, w: '90%', h: 0.3,
|
||||
fontSize: 10, color: '888888'
|
||||
// 将百分比坐标转换为英寸
|
||||
const x = (boundingBox.x / 100) * SLIDE_WIDTH;
|
||||
const y = (boundingBox.y / 100) * SLIDE_HEIGHT;
|
||||
const w = (boundingBox.width / 100) * SLIDE_WIDTH;
|
||||
const h = (boundingBox.height / 100) * SLIDE_HEIGHT;
|
||||
|
||||
// 根据 fontSize 确定字号
|
||||
const fontSizeValue = fontSize === 'large' ? 24
|
||||
: fontSize === 'medium' ? 18 : 12;
|
||||
|
||||
// 根据元素类型设置样式
|
||||
const isBold = elementType === 'title' || elementType === 'subtitle';
|
||||
const color = elementType === 'footer' || elementType === 'caption' ? '888888' : '000000';
|
||||
const isBullet = elementType === 'bullet';
|
||||
|
||||
pptSlide.addText(text, {
|
||||
x, y, w, h,
|
||||
fontSize: fontSizeValue,
|
||||
bold: isBold,
|
||||
color,
|
||||
fontFace: 'Arial',
|
||||
align: alignment || 'left',
|
||||
valign: 'top',
|
||||
bullet: isBullet ? { type: 'bullet' } : undefined
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -65,35 +65,49 @@ export const SlideCard: React.FC<SlideCardProps> = ({ slide }) => {
|
||||
<span className="text-blue-600 text-sm font-medium">Analyzing layout & text...</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-4 h-full flex flex-col">
|
||||
<div className="space-y-3 h-full flex flex-col">
|
||||
<div className="flex justify-between items-start">
|
||||
<div>
|
||||
<span className="text-xs font-bold text-blue-600 uppercase tracking-wide">Title</span>
|
||||
<h3 className="text-lg font-semibold text-gray-900 leading-tight">{slide.content.title || "Untitled Slide"}</h3>
|
||||
</div>
|
||||
<span className="text-xs font-bold text-blue-600 uppercase tracking-wide">
|
||||
{slide.content.elements?.length || 0} Text Elements Detected
|
||||
</span>
|
||||
{hasCleanedBg && (
|
||||
<span className="inline-flex items-center rounded-full bg-green-50 px-2 py-1 text-xs font-medium text-green-700 ring-1 ring-inset ring-green-600/20">
|
||||
Background Cleaned
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="flex-1 overflow-hidden relative border rounded-md p-3 bg-gray-50">
|
||||
<span className="text-xs font-bold text-gray-500 uppercase tracking-wide block mb-2">Content</span>
|
||||
<ul className="list-disc pl-5 text-sm text-gray-700 space-y-1 overflow-y-auto max-h-40">
|
||||
{slide.content.bullets && slide.content.bullets.length > 0 ? (
|
||||
slide.content.bullets.map((bullet, idx) => (
|
||||
<li key={idx}>{bullet}</li>
|
||||
))
|
||||
) : (
|
||||
<li className="text-gray-400 italic list-none">No body text detected.</li>
|
||||
)}
|
||||
</ul>
|
||||
|
||||
<div className="flex-1 overflow-y-auto space-y-2 max-h-60">
|
||||
{slide.content.elements && slide.content.elements.length > 0 ? (
|
||||
slide.content.elements.map((element, idx) => (
|
||||
<div key={idx} className="border rounded-md p-2 bg-gray-50">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span className={`text-xs font-semibold px-1.5 py-0.5 rounded ${
|
||||
element.elementType === 'title' ? 'bg-blue-100 text-blue-700' :
|
||||
element.elementType === 'subtitle' ? 'bg-purple-100 text-purple-700' :
|
||||
element.elementType === 'bullet' ? 'bg-green-100 text-green-700' :
|
||||
element.elementType === 'footer' ? 'bg-gray-200 text-gray-600' :
|
||||
'bg-yellow-100 text-yellow-700'
|
||||
}`}>
|
||||
{element.elementType}
|
||||
</span>
|
||||
<span className="text-xs text-gray-400">
|
||||
({Math.round(element.boundingBox.x)}%, {Math.round(element.boundingBox.y)}%)
|
||||
</span>
|
||||
</div>
|
||||
<p className={`text-sm text-gray-800 ${element.elementType === 'title' ? 'font-semibold' : ''}`}>
|
||||
{element.text.length > 150 ? element.text.slice(0, 150) + '...' : element.text}
|
||||
</p>
|
||||
</div>
|
||||
))
|
||||
) : (
|
||||
<div className="text-gray-400 italic text-sm">No text elements detected.</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{slide.content.footer && (
|
||||
{slide.content.notes && (
|
||||
<div className="text-xs text-gray-400 border-t pt-2">
|
||||
<span className="font-semibold">Footer:</span> {slide.content.footer}
|
||||
<span className="font-semibold">Notes:</span> {slide.content.notes}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -21,16 +21,26 @@ export const analyzeSlideImage = async (apiKey: string, base64Image: string): Pr
|
||||
},
|
||||
},
|
||||
{
|
||||
text: `Analyze this presentation slide image and extract the content into a structured format for recreating it in PowerPoint.
|
||||
|
||||
Task:
|
||||
1. Extract the main 'title' of the slide.
|
||||
2. Extract the body text as a list of 'bullets'. Condense multi-line paragraphs into single distinct points where appropriate.
|
||||
3. If there is small text at the bottom, extract it as 'footer'.
|
||||
4. If there are speaker notes (or text that looks like it belongs in the notes), extract it.
|
||||
|
||||
Do not describe the visual layout (e.g., "blue background"), just extract the text content.
|
||||
`,
|
||||
text: `Analyze this presentation slide image and extract all text elements with their positions.
|
||||
|
||||
For each text element on the slide, provide:
|
||||
1. The exact text content
|
||||
2. The element type: "title", "subtitle", "body", "bullet", "footer", "caption", or "other"
|
||||
3. The bounding box as percentages of slide dimensions (0-100):
|
||||
- x: horizontal position from left edge
|
||||
- y: vertical position from top edge
|
||||
- width: element width
|
||||
- height: element height
|
||||
4. Font size category: "large" (titles, >24pt), "medium" (body, 14-24pt), "small" (<14pt)
|
||||
5. Text alignment: "left", "center", or "right"
|
||||
|
||||
Important:
|
||||
- Return ALL visible text elements, not just main content
|
||||
- Estimate positions visually as percentages of the slide
|
||||
- Group related bullet points together as one element with line breaks
|
||||
- For speaker notes (if visible), include them separately in the "notes" field
|
||||
|
||||
Return the data as a JSON object with an "elements" array.`,
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -39,15 +49,41 @@ export const analyzeSlideImage = async (apiKey: string, base64Image: string): Pr
|
||||
responseSchema: {
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
title: { type: Type.STRING },
|
||||
bullets: {
|
||||
type: Type.ARRAY,
|
||||
items: { type: Type.STRING }
|
||||
elements: {
|
||||
type: Type.ARRAY,
|
||||
items: {
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
text: { type: Type.STRING },
|
||||
elementType: {
|
||||
type: Type.STRING,
|
||||
enum: ["title", "subtitle", "body", "bullet", "footer", "caption", "other"]
|
||||
},
|
||||
boundingBox: {
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
x: { type: Type.NUMBER },
|
||||
y: { type: Type.NUMBER },
|
||||
width: { type: Type.NUMBER },
|
||||
height: { type: Type.NUMBER }
|
||||
},
|
||||
required: ['x', 'y', 'width', 'height']
|
||||
},
|
||||
fontSize: {
|
||||
type: Type.STRING,
|
||||
enum: ["large", "medium", "small"]
|
||||
},
|
||||
alignment: {
|
||||
type: Type.STRING,
|
||||
enum: ["left", "center", "right"]
|
||||
}
|
||||
},
|
||||
required: ['text', 'elementType', 'boundingBox']
|
||||
}
|
||||
},
|
||||
footer: { type: Type.STRING },
|
||||
notes: { type: Type.STRING },
|
||||
notes: { type: Type.STRING }
|
||||
},
|
||||
required: ['title', 'bullets'],
|
||||
required: ['elements'],
|
||||
},
|
||||
},
|
||||
});
|
||||
@@ -60,8 +96,13 @@ export const analyzeSlideImage = async (apiKey: string, base64Image: string): Pr
|
||||
} catch (error) {
|
||||
console.error("Error analyzing slide with Gemini:", error);
|
||||
return {
|
||||
title: "Error analyzing slide",
|
||||
bullets: ["Could not extract content due to an AI error."],
|
||||
elements: [{
|
||||
text: "Error analyzing slide",
|
||||
elementType: "title",
|
||||
boundingBox: { x: 5, y: 5, width: 90, height: 10 },
|
||||
fontSize: "large",
|
||||
alignment: "left"
|
||||
}],
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,9 +1,26 @@
|
||||
export interface BoundingBox {
|
||||
x: number; // 百分比 0-100
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface TextElement {
|
||||
text: string;
|
||||
elementType: 'title' | 'subtitle' | 'body' | 'bullet' | 'footer' | 'caption' | 'other';
|
||||
boundingBox: BoundingBox;
|
||||
fontSize?: 'large' | 'medium' | 'small';
|
||||
alignment?: 'left' | 'center' | 'right';
|
||||
}
|
||||
|
||||
export interface SlideContent {
|
||||
title: string;
|
||||
bullets: string[];
|
||||
footer?: string;
|
||||
elements: TextElement[];
|
||||
notes?: string;
|
||||
layoutSuggestion?: string; // e.g., 'Title and Content'
|
||||
// 保留旧字段用于向后兼容
|
||||
title?: string;
|
||||
bullets?: string[];
|
||||
footer?: string;
|
||||
layoutSuggestion?: string;
|
||||
}
|
||||
|
||||
export interface ProcessedSlide {
|
||||
|
||||
Reference in New Issue
Block a user