feat: add scrape screenshot support
This commit is contained in:
@@ -16,7 +16,7 @@ import { WorkflowFile } from "maxun-core";
|
|||||||
import { addGoogleSheetUpdateTask, googleSheetUpdateTasks, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet";
|
import { addGoogleSheetUpdateTask, googleSheetUpdateTasks, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet";
|
||||||
import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from "../workflow-management/integrations/airtable";
|
import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from "../workflow-management/integrations/airtable";
|
||||||
import { sendWebhook } from "../routes/webhook";
|
import { sendWebhook } from "../routes/webhook";
|
||||||
import { convertPageToHTML, convertPageToMarkdown } from '../markdownify/scrape';
|
import { convertPageToHTML, convertPageToMarkdown, convertPageToScreenshot } from '../markdownify/scrape';
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
@@ -689,7 +689,9 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
|
|||||||
|
|
||||||
// Override if API request defines formats
|
// Override if API request defines formats
|
||||||
if (requestedFormats && Array.isArray(requestedFormats) && requestedFormats.length > 0) {
|
if (requestedFormats && Array.isArray(requestedFormats) && requestedFormats.length > 0) {
|
||||||
formats = requestedFormats.filter((f): f is 'markdown' | 'html' => ['markdown', 'html'].includes(f));
|
formats = requestedFormats.filter((f): f is 'markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage' =>
|
||||||
|
['markdown', 'html', 'screenshot-visible', 'screenshot-fullpage'].includes(f)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
await run.update({
|
await run.update({
|
||||||
@@ -707,6 +709,7 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
|
|||||||
let markdown = '';
|
let markdown = '';
|
||||||
let html = '';
|
let html = '';
|
||||||
const serializableOutput: any = {};
|
const serializableOutput: any = {};
|
||||||
|
const binaryOutput: any = {};
|
||||||
|
|
||||||
const SCRAPE_TIMEOUT = 120000;
|
const SCRAPE_TIMEOUT = 120000;
|
||||||
|
|
||||||
@@ -728,14 +731,52 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
|
|||||||
serializableOutput.html = [{ content: html }];
|
serializableOutput.html = [{ content: html }];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (formats.includes("screenshot-visible")) {
|
||||||
|
const screenshotPromise = convertPageToScreenshot(url, currentPage, false);
|
||||||
|
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||||
|
setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||||
|
});
|
||||||
|
const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]);
|
||||||
|
|
||||||
|
if (!binaryOutput['screenshot-visible']) {
|
||||||
|
binaryOutput['screenshot-visible'] = {
|
||||||
|
data: screenshotBuffer.toString('base64'),
|
||||||
|
mimeType: 'image/png'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (formats.includes("screenshot-fullpage")) {
|
||||||
|
const screenshotPromise = convertPageToScreenshot(url, currentPage, true);
|
||||||
|
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||||
|
setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||||
|
});
|
||||||
|
const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]);
|
||||||
|
|
||||||
|
if (!binaryOutput['screenshot-fullpage']) {
|
||||||
|
binaryOutput['screenshot-fullpage'] = {
|
||||||
|
data: screenshotBuffer.toString('base64'),
|
||||||
|
mimeType: 'image/png'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
await run.update({
|
await run.update({
|
||||||
status: 'success',
|
status: 'success',
|
||||||
finishedAt: new Date().toLocaleString(),
|
finishedAt: new Date().toLocaleString(),
|
||||||
log: `${formats.join(', ')} conversion completed successfully`,
|
log: `${formats.join(', ')} conversion completed successfully`,
|
||||||
serializableOutput,
|
serializableOutput,
|
||||||
binaryOutput: {},
|
binaryOutput,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Upload binary output (screenshots) to MinIO if present
|
||||||
|
let uploadedBinaryOutput: Record<string, string> = {};
|
||||||
|
if (Object.keys(binaryOutput).length > 0) {
|
||||||
|
const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
|
||||||
|
uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, binaryOutput);
|
||||||
|
await run.update({ binaryOutput: uploadedBinaryOutput });
|
||||||
|
}
|
||||||
|
|
||||||
logger.log('info', `Markdown robot execution completed for API run ${id}`);
|
logger.log('info', `Markdown robot execution completed for API run ${id}`);
|
||||||
|
|
||||||
// Push success socket event
|
// Push success socket event
|
||||||
@@ -775,6 +816,8 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
|
|||||||
|
|
||||||
if (formats.includes('markdown')) webhookPayload.markdown = markdown;
|
if (formats.includes('markdown')) webhookPayload.markdown = markdown;
|
||||||
if (formats.includes('html')) webhookPayload.html = html;
|
if (formats.includes('html')) webhookPayload.html = html;
|
||||||
|
if (uploadedBinaryOutput['screenshot-visible']) webhookPayload.screenshot_visible = uploadedBinaryOutput['screenshot-visible'];
|
||||||
|
if (uploadedBinaryOutput['screenshot-fullpage']) webhookPayload.screenshot_fullpage = uploadedBinaryOutput['screenshot-fullpage'];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload);
|
await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload);
|
||||||
|
|||||||
@@ -123,3 +123,28 @@ export async function convertPageToHTML(url: string, page: Page): Promise<string
|
|||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a screenshot of the page
|
||||||
|
* @param url - The URL to screenshot
|
||||||
|
* @param page - Existing Playwright page instance to use
|
||||||
|
* @param fullPage - Whether to capture the full scrollable page (true) or just visible viewport (false)
|
||||||
|
*/
|
||||||
|
export async function convertPageToScreenshot(url: string, page: Page, fullPage: boolean = false): Promise<Buffer> {
|
||||||
|
try {
|
||||||
|
const screenshotType = fullPage ? 'full page' : 'visible viewport';
|
||||||
|
logger.log('info', `[Scrape] Taking ${screenshotType} screenshot of ${url}`);
|
||||||
|
|
||||||
|
await gotoWithFallback(page, url);
|
||||||
|
|
||||||
|
const screenshot = await page.screenshot({
|
||||||
|
type: 'png',
|
||||||
|
fullPage
|
||||||
|
});
|
||||||
|
|
||||||
|
return screenshot;
|
||||||
|
} catch (error: any) {
|
||||||
|
logger.error(`[Scrape] Error during screenshot: ${error.message}`);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ interface RobotMeta {
|
|||||||
params: any[];
|
params: any[];
|
||||||
type?: 'extract' | 'scrape';
|
type?: 'extract' | 'scrape';
|
||||||
url?: string;
|
url?: string;
|
||||||
formats?: ('markdown' | 'html')[];
|
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RobotWorkflow {
|
interface RobotWorkflow {
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } fr
|
|||||||
import { io as serverIo } from "./server";
|
import { io as serverIo } from "./server";
|
||||||
import { sendWebhook } from './routes/webhook';
|
import { sendWebhook } from './routes/webhook';
|
||||||
import { BinaryOutputService } from './storage/mino';
|
import { BinaryOutputService } from './storage/mino';
|
||||||
import { convertPageToMarkdown, convertPageToHTML } from './markdownify/scrape';
|
import { convertPageToMarkdown, convertPageToHTML, convertPageToScreenshot } from './markdownify/scrape';
|
||||||
|
|
||||||
if (!process.env.DB_USER || !process.env.DB_PASSWORD || !process.env.DB_HOST || !process.env.DB_PORT || !process.env.DB_NAME) {
|
if (!process.env.DB_USER || !process.env.DB_PASSWORD || !process.env.DB_HOST || !process.env.DB_PORT || !process.env.DB_NAME) {
|
||||||
throw new Error('Failed to start pgboss worker: one or more required environment variables are missing.');
|
throw new Error('Failed to start pgboss worker: one or more required environment variables are missing.');
|
||||||
@@ -244,6 +244,7 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
|||||||
let markdown = '';
|
let markdown = '';
|
||||||
let html = '';
|
let html = '';
|
||||||
const serializableOutput: any = {};
|
const serializableOutput: any = {};
|
||||||
|
const binaryOutput: any = {};
|
||||||
|
|
||||||
const SCRAPE_TIMEOUT = 120000;
|
const SCRAPE_TIMEOUT = 120000;
|
||||||
|
|
||||||
@@ -265,15 +266,52 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
|||||||
serializableOutput.html = [{ content: html }];
|
serializableOutput.html = [{ content: html }];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (formats.includes("screenshot-visible")) {
|
||||||
|
const screenshotPromise = convertPageToScreenshot(url, currentPage, false);
|
||||||
|
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||||
|
setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||||
|
});
|
||||||
|
const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]);
|
||||||
|
|
||||||
|
if (!binaryOutput['screenshot-visible']) {
|
||||||
|
binaryOutput['screenshot-visible'] = {
|
||||||
|
data: screenshotBuffer.toString('base64'),
|
||||||
|
mimeType: 'image/png'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (formats.includes("screenshot-fullpage")) {
|
||||||
|
const screenshotPromise = convertPageToScreenshot(url, currentPage, true);
|
||||||
|
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||||
|
setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||||
|
});
|
||||||
|
const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]);
|
||||||
|
|
||||||
|
if (!binaryOutput['screenshot-fullpage']) {
|
||||||
|
binaryOutput['screenshot-fullpage'] = {
|
||||||
|
data: screenshotBuffer.toString('base64'),
|
||||||
|
mimeType: 'image/png'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Success update
|
// Success update
|
||||||
await run.update({
|
await run.update({
|
||||||
status: 'success',
|
status: 'success',
|
||||||
finishedAt: new Date().toLocaleString(),
|
finishedAt: new Date().toLocaleString(),
|
||||||
log: `${formats.join(', ').toUpperCase()} conversion completed successfully`,
|
log: `${formats.join(', ').toUpperCase()} conversion completed successfully`,
|
||||||
serializableOutput,
|
serializableOutput,
|
||||||
binaryOutput: {},
|
binaryOutput,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let uploadedBinaryOutput: Record<string, string> = {};
|
||||||
|
if (Object.keys(binaryOutput).length > 0) {
|
||||||
|
const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
|
||||||
|
uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, binaryOutput);
|
||||||
|
await run.update({ binaryOutput: uploadedBinaryOutput });
|
||||||
|
}
|
||||||
|
|
||||||
logger.log('info', `Markdown robot execution completed for run ${data.runId}`);
|
logger.log('info', `Markdown robot execution completed for run ${data.runId}`);
|
||||||
|
|
||||||
// Notify sockets
|
// Notify sockets
|
||||||
@@ -304,6 +342,8 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
|||||||
|
|
||||||
if (formats.includes('markdown')) webhookPayload.markdown = markdown;
|
if (formats.includes('markdown')) webhookPayload.markdown = markdown;
|
||||||
if (formats.includes('html')) webhookPayload.html = html;
|
if (formats.includes('html')) webhookPayload.html = html;
|
||||||
|
if (uploadedBinaryOutput['screenshot-visible']) webhookPayload.screenshot_visible = uploadedBinaryOutput['screenshot-visible'];
|
||||||
|
if (uploadedBinaryOutput['screenshot-fullpage']) webhookPayload.screenshot_fullpage = uploadedBinaryOutput['screenshot-fullpage'];
|
||||||
|
|
||||||
await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload);
|
await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload);
|
||||||
logger.log('info', `Webhooks sent successfully for markdown robot run ${data.runId}`);
|
logger.log('info', `Webhooks sent successfully for markdown robot run ${data.runId}`);
|
||||||
@@ -427,7 +467,7 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
|||||||
|
|
||||||
logger.log('info', `Workflow execution completed for run ${data.runId}`);
|
logger.log('info', `Workflow execution completed for run ${data.runId}`);
|
||||||
|
|
||||||
const binaryOutputService = new BinaryOutputService('maxuncloud-run-screenshots');
|
const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
|
||||||
const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(
|
const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(
|
||||||
run,
|
run,
|
||||||
interpretationInfo.binaryOutput
|
interpretationInfo.binaryOutput
|
||||||
|
|||||||
@@ -456,7 +456,7 @@ router.post('/recordings/scrape', requireSignIn, async (req: AuthenticatedReques
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Validate format
|
// Validate format
|
||||||
const validFormats = ['markdown', 'html'];
|
const validFormats = ['markdown', 'html', 'screenshot-visible', 'screenshot-fullpage'];
|
||||||
|
|
||||||
if (!Array.isArray(formats) || formats.length === 0) {
|
if (!Array.isArray(formats) || formats.length === 0) {
|
||||||
return res.status(400).json({ error: 'At least one output format must be selected.' });
|
return res.status(400).json({ error: 'At least one output format must be selected.' });
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import { WorkflowFile } from "maxun-core";
|
|||||||
import { Page } from "playwright-core";
|
import { Page } from "playwright-core";
|
||||||
import { sendWebhook } from "../../routes/webhook";
|
import { sendWebhook } from "../../routes/webhook";
|
||||||
import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from "../integrations/airtable";
|
import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from "../integrations/airtable";
|
||||||
import { convertPageToMarkdown, convertPageToHTML } from "../../markdownify/scrape";
|
import { convertPageToMarkdown, convertPageToHTML, convertPageToScreenshot } from "../../markdownify/scrape";
|
||||||
|
|
||||||
async function createWorkflowAndStoreMetadata(id: string, userId: string) {
|
async function createWorkflowAndStoreMetadata(id: string, userId: string) {
|
||||||
try {
|
try {
|
||||||
@@ -268,6 +268,7 @@ async function executeRun(id: string, userId: string) {
|
|||||||
let markdown = '';
|
let markdown = '';
|
||||||
let html = '';
|
let html = '';
|
||||||
const serializableOutput: any = {};
|
const serializableOutput: any = {};
|
||||||
|
const binaryOutput: any = {};
|
||||||
|
|
||||||
const SCRAPE_TIMEOUT = 120000;
|
const SCRAPE_TIMEOUT = 120000;
|
||||||
|
|
||||||
@@ -290,13 +291,51 @@ async function executeRun(id: string, userId: string) {
|
|||||||
serializableOutput.html = [{ content: html }];
|
serializableOutput.html = [{ content: html }];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (formats.includes("screenshot-visible")) {
|
||||||
|
const screenshotPromise = convertPageToScreenshot(url, currentPage, false);
|
||||||
|
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||||
|
setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||||
|
});
|
||||||
|
const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]);
|
||||||
|
|
||||||
|
if (!binaryOutput['screenshot-visible']) {
|
||||||
|
binaryOutput['screenshot-visible'] = {
|
||||||
|
data: screenshotBuffer.toString('base64'),
|
||||||
|
mimeType: 'image/png'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Screenshot - full page
|
||||||
|
if (formats.includes("screenshot-fullpage")) {
|
||||||
|
const screenshotPromise = convertPageToScreenshot(url, currentPage, true);
|
||||||
|
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||||
|
setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||||
|
});
|
||||||
|
const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]);
|
||||||
|
|
||||||
|
if (!binaryOutput['screenshot-fullpage']) {
|
||||||
|
binaryOutput['screenshot-fullpage'] = {
|
||||||
|
data: screenshotBuffer.toString('base64'),
|
||||||
|
mimeType: 'image/png'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
await run.update({
|
await run.update({
|
||||||
status: 'success',
|
status: 'success',
|
||||||
finishedAt: new Date().toLocaleString(),
|
finishedAt: new Date().toLocaleString(),
|
||||||
log: `${formats.join(', ')} conversion completed successfully`,
|
log: `${formats.join(', ')} conversion completed successfully`,
|
||||||
serializableOutput,
|
serializableOutput,
|
||||||
binaryOutput: {},
|
binaryOutput,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let uploadedBinaryOutput: Record<string, string> = {};
|
||||||
|
if (Object.keys(binaryOutput).length > 0) {
|
||||||
|
const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
|
||||||
|
uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, binaryOutput);
|
||||||
|
await run.update({ binaryOutput: uploadedBinaryOutput });
|
||||||
|
}
|
||||||
|
|
||||||
logger.log('info', `Markdown robot execution completed for scheduled run ${id}`);
|
logger.log('info', `Markdown robot execution completed for scheduled run ${id}`);
|
||||||
|
|
||||||
@@ -335,6 +374,8 @@ async function executeRun(id: string, userId: string) {
|
|||||||
|
|
||||||
if (formats.includes('markdown')) webhookPayload.markdown = markdown;
|
if (formats.includes('markdown')) webhookPayload.markdown = markdown;
|
||||||
if (formats.includes('html')) webhookPayload.html = html;
|
if (formats.includes('html')) webhookPayload.html = html;
|
||||||
|
if (uploadedBinaryOutput['screenshot-visible']) webhookPayload.screenshot_visible = uploadedBinaryOutput['screenshot-visible'];
|
||||||
|
if (uploadedBinaryOutput['screenshot-fullpage']) webhookPayload.screenshot_fullpage = uploadedBinaryOutput['screenshot-fullpage'];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload);
|
await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload);
|
||||||
|
|||||||
@@ -16,10 +16,10 @@ import {
|
|||||||
CardContent,
|
CardContent,
|
||||||
Tabs,
|
Tabs,
|
||||||
Tab,
|
Tab,
|
||||||
RadioGroup,
|
|
||||||
Radio,
|
|
||||||
FormControl,
|
FormControl,
|
||||||
FormLabel
|
Select,
|
||||||
|
MenuItem,
|
||||||
|
InputLabel
|
||||||
} from '@mui/material';
|
} from '@mui/material';
|
||||||
import { ArrowBack, PlayCircleOutline, Article, Code, Description } from '@mui/icons-material';
|
import { ArrowBack, PlayCircleOutline, Article, Code, Description } from '@mui/icons-material';
|
||||||
import { useGlobalInfoStore } from '../../../context/globalInfo';
|
import { useGlobalInfoStore } from '../../../context/globalInfo';
|
||||||
@@ -376,7 +376,7 @@ const RobotCreate: React.FC = () => {
|
|||||||
/>
|
/>
|
||||||
|
|
||||||
<Typography variant="body2" color="text.secondary" mb={3}>
|
<Typography variant="body2" color="text.secondary" mb={3}>
|
||||||
Turn websites into LLM-ready Markdown & clean HTML for AI apps.
|
Turn websites into LLM-ready Markdown, clean HTML, or screenshots for AI apps.
|
||||||
</Typography>
|
</Typography>
|
||||||
|
|
||||||
<Box sx={{ width: '100%', maxWidth: 700, mb: 2 }}>
|
<Box sx={{ width: '100%', maxWidth: 700, mb: 2 }}>
|
||||||
@@ -399,40 +399,52 @@ const RobotCreate: React.FC = () => {
|
|||||||
sx={{ mb: 2 }}
|
sx={{ mb: 2 }}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<FormControl component="fieldset" sx={{ width: '100%', textAlign: 'left' }}>
|
<Box sx={{ width: '100%', display: 'flex', justifyContent: 'flex-start' }}>
|
||||||
<p>Output Format (Select at least one)</p>
|
<FormControl sx={{ mb: 2, width: '300px' }}>
|
||||||
<FormControlLabel
|
<InputLabel id="output-formats-label">Output Formats *</InputLabel>
|
||||||
control={
|
<Select
|
||||||
<Checkbox
|
labelId="output-formats-label"
|
||||||
checked={outputFormats.includes('markdown')}
|
id="output-formats"
|
||||||
onChange={(e) => {
|
multiple
|
||||||
if (e.target.checked) {
|
value={outputFormats}
|
||||||
setOutputFormats([...outputFormats, 'markdown']);
|
label="Output Formats *"
|
||||||
} else {
|
onChange={(e) => {
|
||||||
setOutputFormats(outputFormats.filter(f => f !== 'markdown'));
|
const value = typeof e.target.value === 'string' ? e.target.value.split(',') : e.target.value;
|
||||||
}
|
setOutputFormats(value);
|
||||||
}}
|
}}
|
||||||
/>
|
renderValue={(selected) => {
|
||||||
}
|
if (selected.length === 0) {
|
||||||
label="Markdown"
|
return <em style={{ color: '#999' }}>Select formats</em>;
|
||||||
/>
|
}
|
||||||
|
return `${selected.length} format${selected.length > 1 ? 's' : ''} selected`;
|
||||||
<FormControlLabel
|
}}
|
||||||
control={
|
MenuProps={{
|
||||||
<Checkbox
|
PaperProps: {
|
||||||
checked={outputFormats.includes('html')}
|
style: {
|
||||||
onChange={(e) => {
|
maxHeight: 300,
|
||||||
if (e.target.checked) {
|
},
|
||||||
setOutputFormats([...outputFormats, 'html']);
|
},
|
||||||
} else {
|
}}
|
||||||
setOutputFormats(outputFormats.filter(f => f !== 'html'));
|
>
|
||||||
}
|
<MenuItem value="markdown">
|
||||||
}}
|
<Checkbox checked={outputFormats.includes('markdown')} />
|
||||||
/>
|
Markdown
|
||||||
}
|
</MenuItem>
|
||||||
label="HTML"
|
<MenuItem value="html">
|
||||||
/>
|
<Checkbox checked={outputFormats.includes('html')} />
|
||||||
</FormControl>
|
HTML
|
||||||
|
</MenuItem>
|
||||||
|
<MenuItem value="screenshot-visible">
|
||||||
|
<Checkbox checked={outputFormats.includes('screenshot-visible')} />
|
||||||
|
Screenshot - Visible Viewport
|
||||||
|
</MenuItem>
|
||||||
|
<MenuItem value="screenshot-fullpage">
|
||||||
|
<Checkbox checked={outputFormats.includes('screenshot-fullpage')} />
|
||||||
|
Screenshot - Full Page
|
||||||
|
</MenuItem>
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
</Box>
|
||||||
</Box>
|
</Box>
|
||||||
|
|
||||||
<Button
|
<Button
|
||||||
@@ -461,7 +473,7 @@ const RobotCreate: React.FC = () => {
|
|||||||
notify('success', `${scrapeRobotName} created successfully!`);
|
notify('success', `${scrapeRobotName} created successfully!`);
|
||||||
navigate('/robots');
|
navigate('/robots');
|
||||||
} else {
|
} else {
|
||||||
notify('error', 'Failed to create markdown robot');
|
notify('error', 'Failed to create scrape robot');
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
disabled={!url.trim() || !scrapeRobotName.trim() || outputFormats.length === 0 || isLoading}
|
disabled={!url.trim() || !scrapeRobotName.trim() || outputFormats.length === 0 || isLoading}
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ interface RobotMeta {
|
|||||||
params: any[];
|
params: any[];
|
||||||
type?: 'extract' | 'scrape';
|
type?: 'extract' | 'scrape';
|
||||||
url?: string;
|
url?: string;
|
||||||
formats?: ('markdown' | 'html')[];
|
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RobotWorkflow {
|
interface RobotWorkflow {
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ interface RobotMeta {
|
|||||||
params: any[];
|
params: any[];
|
||||||
type?: 'extract' | 'scrape';
|
type?: 'extract' | 'scrape';
|
||||||
url?: string;
|
url?: string;
|
||||||
formats?: ('markdown' | 'html')[];
|
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RobotWorkflow {
|
interface RobotWorkflow {
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ interface RobotMeta {
|
|||||||
params: any[];
|
params: any[];
|
||||||
type?: 'extract' | 'scrape';
|
type?: 'extract' | 'scrape';
|
||||||
url?: string;
|
url?: string;
|
||||||
formats?: ('markdown' | 'html')[];
|
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RobotWorkflow {
|
interface RobotWorkflow {
|
||||||
|
|||||||
@@ -135,16 +135,19 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
const rawKeys = Object.keys(row.binaryOutput);
|
const rawKeys = Object.keys(row.binaryOutput);
|
||||||
|
|
||||||
const isLegacyPattern = rawKeys.every(key => /^item-\d+-\d+$/.test(key));
|
const isLegacyPattern = rawKeys.every(key => /^item-\d+-\d+$/.test(key));
|
||||||
|
|
||||||
let normalizedScreenshotKeys: string[];
|
let normalizedScreenshotKeys: string[];
|
||||||
|
|
||||||
if (isLegacyPattern) {
|
if (isLegacyPattern) {
|
||||||
// Legacy unnamed screenshots → Screenshot 1, Screenshot 2...
|
// Legacy unnamed screenshots → Screenshot 1, Screenshot 2...
|
||||||
normalizedScreenshotKeys = rawKeys.map((_, index) => `Screenshot ${index + 1}`);
|
normalizedScreenshotKeys = rawKeys.map((_, index) => `Screenshot ${index + 1}`);
|
||||||
} else {
|
} else {
|
||||||
// Same rule as captured lists: if name missing or generic, auto-label
|
|
||||||
normalizedScreenshotKeys = rawKeys.map((key, index) => {
|
normalizedScreenshotKeys = rawKeys.map((key, index) => {
|
||||||
if (!key || key.toLowerCase().includes("screenshot")) {
|
if (key === 'screenshot-visible') {
|
||||||
|
return 'Screenshot (Visible)';
|
||||||
|
} else if (key === 'screenshot-fullpage') {
|
||||||
|
return 'Screenshot (Full Page)';
|
||||||
|
} else if (!key || key.toLowerCase().includes("screenshot")) {
|
||||||
return `Screenshot ${index + 1}`;
|
return `Screenshot ${index + 1}`;
|
||||||
}
|
}
|
||||||
return key;
|
return key;
|
||||||
@@ -739,6 +742,67 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
</AccordionDetails>
|
</AccordionDetails>
|
||||||
</Accordion>
|
</Accordion>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{hasScreenshots && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
{t('run_content.captured_screenshot.title', 'Captured Screenshots')}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
{screenshotKeys.length > 1 && (
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: 'flex',
|
||||||
|
borderBottom: '1px solid',
|
||||||
|
borderColor: 'divider',
|
||||||
|
mb: 2,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{screenshotKeys.map((key, idx) => (
|
||||||
|
<Box
|
||||||
|
key={key}
|
||||||
|
onClick={() => setCurrentScreenshotIndex(idx)}
|
||||||
|
sx={{
|
||||||
|
px: 3,
|
||||||
|
py: 1,
|
||||||
|
cursor: 'pointer',
|
||||||
|
backgroundColor:
|
||||||
|
currentScreenshotIndex === idx
|
||||||
|
? (theme) => theme.palette.mode === 'dark'
|
||||||
|
? '#121111ff'
|
||||||
|
: '#e9ecef'
|
||||||
|
: 'transparent',
|
||||||
|
borderBottom: currentScreenshotIndex === idx ? '3px solid #FF00C3' : 'none',
|
||||||
|
color: (theme) => theme.palette.mode === 'dark' ? '#fff' : '#000',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{key}
|
||||||
|
</Box>
|
||||||
|
))}
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<Box sx={{ mt: 1 }}>
|
||||||
|
{screenshotKeys.length > 0 && (
|
||||||
|
<img
|
||||||
|
src={row.binaryOutput[screenshotKeyMap[screenshotKeys[currentScreenshotIndex]]]}
|
||||||
|
alt={`Screenshot ${screenshotKeys[currentScreenshotIndex]}`}
|
||||||
|
style={{
|
||||||
|
maxWidth: '100%',
|
||||||
|
height: 'auto',
|
||||||
|
border: '1px solid #e0e0e0',
|
||||||
|
borderRadius: '4px'
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
</>
|
</>
|
||||||
) : (
|
) : (
|
||||||
// Extract robot output
|
// Extract robot output
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ interface RobotMeta {
|
|||||||
params: any[];
|
params: any[];
|
||||||
type?: 'extract' | 'scrape';
|
type?: 'extract' | 'scrape';
|
||||||
url?: string;
|
url?: string;
|
||||||
formats?: ('markdown' | 'html')[];
|
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RobotWorkflow {
|
interface RobotWorkflow {
|
||||||
|
|||||||
Reference in New Issue
Block a user