Merge pull request #889 from getmaxun/markdownify
feat: scrape [html + markdown]
This commit is contained in:
@@ -46,6 +46,7 @@
|
|||||||
"idcac-playwright": "^0.1.3",
|
"idcac-playwright": "^0.1.3",
|
||||||
"ioredis": "^5.4.1",
|
"ioredis": "^5.4.1",
|
||||||
"joi": "^17.6.0",
|
"joi": "^17.6.0",
|
||||||
|
"joplin-turndown-plugin-gfm": "^1.0.12",
|
||||||
"jsonwebtoken": "^9.0.2",
|
"jsonwebtoken": "^9.0.2",
|
||||||
"jwt-decode": "^4.0.0",
|
"jwt-decode": "^4.0.0",
|
||||||
"lodash": "^4.17.21",
|
"lodash": "^4.17.21",
|
||||||
@@ -80,6 +81,7 @@
|
|||||||
"styled-components": "^5.3.3",
|
"styled-components": "^5.3.3",
|
||||||
"swagger-jsdoc": "^6.2.8",
|
"swagger-jsdoc": "^6.2.8",
|
||||||
"swagger-ui-express": "^5.0.1",
|
"swagger-ui-express": "^5.0.1",
|
||||||
|
"turndown": "^7.2.2",
|
||||||
"typedoc": "^0.23.8",
|
"typedoc": "^0.23.8",
|
||||||
"typescript": "^5.0.0",
|
"typescript": "^5.0.0",
|
||||||
"uuid": "^8.3.2",
|
"uuid": "^8.3.2",
|
||||||
@@ -126,6 +128,7 @@
|
|||||||
"@types/styled-components": "^5.1.23",
|
"@types/styled-components": "^5.1.23",
|
||||||
"@types/swagger-jsdoc": "^6.0.4",
|
"@types/swagger-jsdoc": "^6.0.4",
|
||||||
"@types/swagger-ui-express": "^4.1.6",
|
"@types/swagger-ui-express": "^4.1.6",
|
||||||
|
"@types/turndown": "^5.0.6",
|
||||||
"@vitejs/plugin-react": "^4.3.3",
|
"@vitejs/plugin-react": "^4.3.3",
|
||||||
"ajv": "^8.8.2",
|
"ajv": "^8.8.2",
|
||||||
"concurrently": "^7.0.0",
|
"concurrently": "^7.0.0",
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import { WorkflowFile } from "maxun-core";
|
|||||||
import { googleSheetUpdateTasks, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet";
|
import { googleSheetUpdateTasks, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet";
|
||||||
import { airtableUpdateTasks, processAirtableUpdates } from "../workflow-management/integrations/airtable";
|
import { airtableUpdateTasks, processAirtableUpdates } from "../workflow-management/integrations/airtable";
|
||||||
import { sendWebhook } from "../routes/webhook";
|
import { sendWebhook } from "../routes/webhook";
|
||||||
|
import { convertPageToHTML, convertPageToMarkdown } from '../markdownify/scrape';
|
||||||
|
|
||||||
chromium.use(stealthPlugin());
|
chromium.use(stealthPlugin());
|
||||||
|
|
||||||
@@ -344,7 +345,9 @@ function formatRunResponse(run: any) {
|
|||||||
runByAPI: run.runByAPI,
|
runByAPI: run.runByAPI,
|
||||||
data: {
|
data: {
|
||||||
textData: {},
|
textData: {},
|
||||||
listData: {}
|
listData: {},
|
||||||
|
markdown: '',
|
||||||
|
html: ''
|
||||||
},
|
},
|
||||||
screenshots: [] as any[],
|
screenshots: [] as any[],
|
||||||
};
|
};
|
||||||
@@ -359,6 +362,14 @@ function formatRunResponse(run: any) {
|
|||||||
formattedRun.data.listData = output.scrapeList;
|
formattedRun.data.listData = output.scrapeList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (output.markdown && Array.isArray(output.markdown)) {
|
||||||
|
formattedRun.data.markdown = output.markdown[0]?.content || '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (output.html && Array.isArray(output.html)) {
|
||||||
|
formattedRun.data.html = output.html[0]?.content || '';
|
||||||
|
}
|
||||||
|
|
||||||
if (run.binaryOutput) {
|
if (run.binaryOutput) {
|
||||||
Object.keys(run.binaryOutput).forEach(key => {
|
Object.keys(run.binaryOutput).forEach(key => {
|
||||||
if (run.binaryOutput[key]) {
|
if (run.binaryOutput[key]) {
|
||||||
@@ -569,9 +580,9 @@ async function triggerIntegrationUpdates(runId: string, robotMetaId: string): Pr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function readyForRunHandler(browserId: string, id: string, userId: string){
|
async function readyForRunHandler(browserId: string, id: string, userId: string, requestedFormats?: string[]){
|
||||||
try {
|
try {
|
||||||
const result = await executeRun(id, userId);
|
const result = await executeRun(id, userId, requestedFormats);
|
||||||
|
|
||||||
if (result && result.success) {
|
if (result && result.success) {
|
||||||
logger.log('info', `Interpretation of ${id} succeeded`);
|
logger.log('info', `Interpretation of ${id} succeeded`);
|
||||||
@@ -608,7 +619,7 @@ function AddGeneratedFlags(workflow: WorkflowFile) {
|
|||||||
return copy;
|
return copy;
|
||||||
};
|
};
|
||||||
|
|
||||||
async function executeRun(id: string, userId: string) {
|
async function executeRun(id: string, userId: string, requestedFormats?: string[]) {
|
||||||
let browser: any = null;
|
let browser: any = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -651,6 +662,166 @@ async function executeRun(id: string, userId: string) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (recording.recording_meta.type === 'scrape') {
|
||||||
|
logger.log('info', `Executing scrape robot for API run ${id}`);
|
||||||
|
|
||||||
|
let formats = recording.recording_meta.formats || ['markdown'];
|
||||||
|
|
||||||
|
// Override if API request defines formats
|
||||||
|
if (requestedFormats && Array.isArray(requestedFormats) && requestedFormats.length > 0) {
|
||||||
|
formats = requestedFormats.filter((f): f is 'markdown' | 'html' => ['markdown', 'html'].includes(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
await run.update({
|
||||||
|
status: 'running',
|
||||||
|
log: `Converting page to: ${formats.join(', ')}`
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const url = recording.recording_meta.url;
|
||||||
|
|
||||||
|
if (!url) {
|
||||||
|
throw new Error('No URL specified for markdown robot');
|
||||||
|
}
|
||||||
|
|
||||||
|
let markdown = '';
|
||||||
|
let html = '';
|
||||||
|
const serializableOutput: any = {};
|
||||||
|
|
||||||
|
// Markdown conversion
|
||||||
|
if (formats.includes('markdown')) {
|
||||||
|
markdown = await convertPageToMarkdown(url);
|
||||||
|
serializableOutput.markdown = [{ content: markdown }];
|
||||||
|
}
|
||||||
|
|
||||||
|
// HTML conversion
|
||||||
|
if (formats.includes('html')) {
|
||||||
|
html = await convertPageToHTML(url);
|
||||||
|
serializableOutput.html = [{ content: html }];
|
||||||
|
}
|
||||||
|
|
||||||
|
await run.update({
|
||||||
|
status: 'success',
|
||||||
|
finishedAt: new Date().toLocaleString(),
|
||||||
|
log: `${formats.join(', ')} conversion completed successfully`,
|
||||||
|
serializableOutput,
|
||||||
|
binaryOutput: {},
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.log('info', `Markdown robot execution completed for API run ${id}`);
|
||||||
|
|
||||||
|
// Push success socket event
|
||||||
|
try {
|
||||||
|
const completionData = {
|
||||||
|
runId: plainRun.runId,
|
||||||
|
robotMetaId: plainRun.robotMetaId,
|
||||||
|
robotName: recording.recording_meta.name,
|
||||||
|
status: 'success',
|
||||||
|
finishedAt: new Date().toLocaleString()
|
||||||
|
};
|
||||||
|
|
||||||
|
serverIo
|
||||||
|
.of('/queued-run')
|
||||||
|
.to(`user-${userId}`)
|
||||||
|
.emit('run-completed', completionData);
|
||||||
|
} catch (socketError: any) {
|
||||||
|
logger.log(
|
||||||
|
'warn',
|
||||||
|
`Failed to send run-completed notification for markdown robot run ${id}: ${socketError.message}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build webhook payload
|
||||||
|
const webhookPayload: any = {
|
||||||
|
robot_id: plainRun.robotMetaId,
|
||||||
|
run_id: plainRun.runId,
|
||||||
|
robot_name: recording.recording_meta.name,
|
||||||
|
status: 'success',
|
||||||
|
started_at: plainRun.startedAt,
|
||||||
|
finished_at: new Date().toLocaleString(),
|
||||||
|
metadata: {
|
||||||
|
browser_id: plainRun.browserId,
|
||||||
|
user_id: userId,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
if (formats.includes('markdown')) webhookPayload.markdown = markdown;
|
||||||
|
if (formats.includes('html')) webhookPayload.html = html;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload);
|
||||||
|
logger.log(
|
||||||
|
'info',
|
||||||
|
`Webhooks sent successfully for markdown robot API run ${plainRun.runId}`
|
||||||
|
);
|
||||||
|
} catch (webhookError: any) {
|
||||||
|
logger.log(
|
||||||
|
'warn',
|
||||||
|
`Failed to send webhooks for markdown robot run ${plainRun.runId}: ${webhookError.message}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
capture("maxun-oss-run-created-api", {
|
||||||
|
runId: plainRun.runId,
|
||||||
|
user_id: userId,
|
||||||
|
status: "success",
|
||||||
|
robot_type: "scrape",
|
||||||
|
formats
|
||||||
|
});
|
||||||
|
|
||||||
|
await destroyRemoteBrowser(plainRun.browserId, userId);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
interpretationInfo: run.toJSON()
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
logger.log(
|
||||||
|
'error',
|
||||||
|
`${formats.join(', ')} conversion failed for API run ${id}: ${error.message}`
|
||||||
|
);
|
||||||
|
|
||||||
|
await run.update({
|
||||||
|
status: 'failed',
|
||||||
|
finishedAt: new Date().toLocaleString(),
|
||||||
|
log: `${formats.join(', ')} conversion failed: ${error.message}`,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Send failure socket event
|
||||||
|
try {
|
||||||
|
const failureData = {
|
||||||
|
runId: plainRun.runId,
|
||||||
|
robotMetaId: plainRun.robotMetaId,
|
||||||
|
robotName: recording.recording_meta.name,
|
||||||
|
status: 'failed',
|
||||||
|
finishedAt: new Date().toLocaleString()
|
||||||
|
};
|
||||||
|
|
||||||
|
serverIo
|
||||||
|
.of('/queued-run')
|
||||||
|
.to(`user-${userId}`)
|
||||||
|
.emit('run-completed', failureData);
|
||||||
|
} catch (socketError: any) {
|
||||||
|
logger.log(
|
||||||
|
'warn',
|
||||||
|
`Failed to send run-failed notification for markdown robot run ${id}: ${socketError.message}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
capture("maxun-oss-run-created-api", {
|
||||||
|
runId: plainRun.runId,
|
||||||
|
user_id: userId,
|
||||||
|
status: "failed",
|
||||||
|
robot_type: "scrape",
|
||||||
|
formats
|
||||||
|
});
|
||||||
|
|
||||||
|
await destroyRemoteBrowser(plainRun.browserId, userId);
|
||||||
|
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
plainRun.status = 'running';
|
plainRun.status = 'running';
|
||||||
|
|
||||||
browser = browserPool.getRemoteBrowser(plainRun.browserId);
|
browser = browserPool.getRemoteBrowser(plainRun.browserId);
|
||||||
@@ -848,7 +1019,7 @@ async function executeRun(id: string, userId: string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function handleRunRecording(id: string, userId: string) {
|
export async function handleRunRecording(id: string, userId: string, requestedFormats?: string[]) {
|
||||||
try {
|
try {
|
||||||
const result = await createWorkflowAndStoreMetadata(id, userId);
|
const result = await createWorkflowAndStoreMetadata(id, userId);
|
||||||
const { browserId, runId: newRunId } = result;
|
const { browserId, runId: newRunId } = result;
|
||||||
@@ -862,7 +1033,7 @@ export async function handleRunRecording(id: string, userId: string) {
|
|||||||
rejectUnauthorized: false
|
rejectUnauthorized: false
|
||||||
});
|
});
|
||||||
|
|
||||||
socket.on('ready-for-run', () => readyForRunHandler(browserId, newRunId, userId));
|
socket.on('ready-for-run', () => readyForRunHandler(browserId, newRunId, userId, requestedFormats));
|
||||||
|
|
||||||
logger.log('info', `Running Robot: ${id}`);
|
logger.log('info', `Running Robot: ${id}`);
|
||||||
|
|
||||||
@@ -889,12 +1060,11 @@ async function waitForRunCompletion(runId: string, interval: number = 2000) {
|
|||||||
if (!run) throw new Error('Run not found');
|
if (!run) throw new Error('Run not found');
|
||||||
|
|
||||||
if (run.status === 'success') {
|
if (run.status === 'success') {
|
||||||
return run.toJSON();
|
return run;
|
||||||
} else if (run.status === 'failed') {
|
} else if (run.status === 'failed') {
|
||||||
throw new Error('Run failed');
|
throw new Error('Run failed');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for the next polling interval
|
|
||||||
await new Promise(resolve => setTimeout(resolve, interval));
|
await new Promise(resolve => setTimeout(resolve, interval));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -914,6 +1084,21 @@ async function waitForRunCompletion(runId: string, interval: number = 2000) {
|
|||||||
* type: string
|
* type: string
|
||||||
* required: true
|
* required: true
|
||||||
* description: The ID of the robot to run.
|
* description: The ID of the robot to run.
|
||||||
|
* requestBody:
|
||||||
|
* required: false
|
||||||
|
* content:
|
||||||
|
* application/json:
|
||||||
|
* schema:
|
||||||
|
* type: object
|
||||||
|
* properties:
|
||||||
|
* formats:
|
||||||
|
* type: array
|
||||||
|
* items:
|
||||||
|
* type: string
|
||||||
|
* enum: [markdown, html]
|
||||||
|
* description: Optional override formats for this run.
|
||||||
|
* example:
|
||||||
|
* formats: ["html"]
|
||||||
* responses:
|
* responses:
|
||||||
* 200:
|
* 200:
|
||||||
* description: Robot run started successfully.
|
* description: Robot run started successfully.
|
||||||
@@ -972,7 +1157,10 @@ router.post("/robots/:id/runs", requireAPIKey, async (req: AuthenticatedRequest,
|
|||||||
if (!req.user) {
|
if (!req.user) {
|
||||||
return res.status(401).json({ ok: false, error: 'Unauthorized' });
|
return res.status(401).json({ ok: false, error: 'Unauthorized' });
|
||||||
}
|
}
|
||||||
const runId = await handleRunRecording(req.params.id, req.user.id);
|
|
||||||
|
const requestedFormats = req.body.formats;
|
||||||
|
|
||||||
|
const runId = await handleRunRecording(req.params.id, req.user.id, requestedFormats);
|
||||||
|
|
||||||
if (!runId) {
|
if (!runId) {
|
||||||
throw new Error('Run ID is undefined');
|
throw new Error('Run ID is undefined');
|
||||||
|
|||||||
160
server/src/markdownify/markdown.ts
Normal file
160
server/src/markdownify/markdown.ts
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
export async function parseMarkdown(
|
||||||
|
html: string | null | undefined,
|
||||||
|
baseUrl?: string | null
|
||||||
|
): Promise<string> {
|
||||||
|
const TurndownService = require("turndown");
|
||||||
|
const { gfm } = require("joplin-turndown-plugin-gfm");
|
||||||
|
const cheerio = require("cheerio");
|
||||||
|
const { URL } = require("url");
|
||||||
|
|
||||||
|
if (!html) return "";
|
||||||
|
|
||||||
|
const tidiedHtml = tidyHtml(html);
|
||||||
|
|
||||||
|
const t = new TurndownService({
|
||||||
|
headingStyle: "atx", // ensures #### instead of ------
|
||||||
|
codeBlockStyle: "fenced",
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------
|
||||||
|
// Proper ATX headings #### instead of underline-style
|
||||||
|
// ---------------------------------------------
|
||||||
|
t.addRule("forceAtxHeadings", {
|
||||||
|
filter: ["h1", "h2", "h3", "h4", "h5", "h6"],
|
||||||
|
replacement: (content: string, node: any) => {
|
||||||
|
const level = Number(node.nodeName.charAt(1));
|
||||||
|
const clean = content.trim();
|
||||||
|
return `\n${"#".repeat(level)} ${clean}\n`;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------
|
||||||
|
// Remove SVGs
|
||||||
|
// ---------------------------------------------
|
||||||
|
t.addRule("truncate-svg", {
|
||||||
|
filter: "svg",
|
||||||
|
replacement: () => "",
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------
|
||||||
|
// Improved paragraph cleanup
|
||||||
|
// ---------------------------------------------
|
||||||
|
t.addRule("improved-paragraph", {
|
||||||
|
filter: "p",
|
||||||
|
replacement: (innerText: string) => {
|
||||||
|
const trimmed = innerText.trim();
|
||||||
|
if (!trimmed) return "";
|
||||||
|
return `${trimmed.replace(/\n{3,}/g, "\n\n")}\n\n`;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------
|
||||||
|
// Inline link with fallback text
|
||||||
|
// ---------------------------------------------
|
||||||
|
t.addRule("inlineLink", {
|
||||||
|
filter: (node: any, opts: any) =>
|
||||||
|
node.nodeName === "A" && node.getAttribute("href"),
|
||||||
|
|
||||||
|
replacement: (content: string, node: any) => {
|
||||||
|
let text = content.trim();
|
||||||
|
|
||||||
|
// Fallback: aria-label → title → domain
|
||||||
|
if (!text) {
|
||||||
|
text =
|
||||||
|
node.getAttribute("aria-label")?.trim() ||
|
||||||
|
node.getAttribute("title")?.trim() ||
|
||||||
|
getDomainFromUrl(node.getAttribute("href")) ||
|
||||||
|
"link";
|
||||||
|
}
|
||||||
|
|
||||||
|
let href = node.getAttribute("href").trim();
|
||||||
|
|
||||||
|
// relative → absolute
|
||||||
|
if (baseUrl && isRelativeUrl(href)) {
|
||||||
|
try {
|
||||||
|
const u = new URL(href, baseUrl);
|
||||||
|
href = u.toString();
|
||||||
|
} catch { }
|
||||||
|
}
|
||||||
|
|
||||||
|
href = cleanUrl(href);
|
||||||
|
|
||||||
|
return `[${text}](${href})`;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
t.use(gfm);
|
||||||
|
|
||||||
|
// Convert HTML → Markdown
|
||||||
|
try {
|
||||||
|
let out = await t.turndown(tidiedHtml);
|
||||||
|
out = fixBrokenLinks(out);
|
||||||
|
out = stripSkipLinks(out);
|
||||||
|
return out.trim();
|
||||||
|
} catch (err) {
|
||||||
|
console.error("HTML→Markdown failed", { err });
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// -----------------------------------------------------
|
||||||
|
function isRelativeUrl(url: string): boolean {
|
||||||
|
return !url.includes("://") && !url.startsWith("mailto:") && !url.startsWith("tel:");
|
||||||
|
}
|
||||||
|
|
||||||
|
function getDomainFromUrl(url: string): string | null {
|
||||||
|
try {
|
||||||
|
const u = new URL(url);
|
||||||
|
return u.hostname.replace("www.", "");
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanUrl(u: string): string {
|
||||||
|
return u;
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanAttribute(attr: string) {
|
||||||
|
return attr ? attr.replace(/(\n+\s*)+/g, "\n") : "";
|
||||||
|
}
|
||||||
|
|
||||||
|
function tidyHtml(html: string): string {
|
||||||
|
const cheerio = require("cheerio");
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
|
||||||
|
const manuallyCleanedElements = [
|
||||||
|
"script",
|
||||||
|
"style",
|
||||||
|
"iframe",
|
||||||
|
"noscript",
|
||||||
|
"meta",
|
||||||
|
"link",
|
||||||
|
"object",
|
||||||
|
"embed",
|
||||||
|
"canvas",
|
||||||
|
"audio",
|
||||||
|
"video",
|
||||||
|
];
|
||||||
|
|
||||||
|
manuallyCleanedElements.forEach((tag) => $(tag).remove());
|
||||||
|
return $("body").html();
|
||||||
|
}
|
||||||
|
|
||||||
|
function fixBrokenLinks(md: string): string {
|
||||||
|
let depth = 0;
|
||||||
|
let result = "";
|
||||||
|
|
||||||
|
for (const ch of md) {
|
||||||
|
if (ch === "[") depth++;
|
||||||
|
if (ch === "]") depth = Math.max(0, depth - 1);
|
||||||
|
result += depth > 0 && ch === "\n" ? "\\\n" : ch;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
function stripSkipLinks(md: string): string {
|
||||||
|
return md.replace(/\[Skip to Content\]\(#[^\)]*\)/gi, "");
|
||||||
|
}
|
||||||
111
server/src/markdownify/scrape.ts
Normal file
111
server/src/markdownify/scrape.ts
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
import { chromium } from "playwright";
|
||||||
|
import { parseMarkdown } from "./markdown";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetches a webpage, strips scripts/styles/images/etc,
|
||||||
|
* returns clean Markdown using parser.
|
||||||
|
*/
|
||||||
|
export async function convertPageToMarkdown(url: string): Promise<string> {
|
||||||
|
const browser = await chromium.launch();
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
||||||
|
|
||||||
|
await page.addInitScript(() => {
|
||||||
|
const selectors = [
|
||||||
|
"script",
|
||||||
|
"style",
|
||||||
|
"link[rel='stylesheet']",
|
||||||
|
"noscript",
|
||||||
|
"meta",
|
||||||
|
"svg",
|
||||||
|
"img",
|
||||||
|
"picture",
|
||||||
|
"source",
|
||||||
|
"video",
|
||||||
|
"audio",
|
||||||
|
"iframe",
|
||||||
|
"object",
|
||||||
|
"embed"
|
||||||
|
];
|
||||||
|
|
||||||
|
selectors.forEach(sel => {
|
||||||
|
document.querySelectorAll(sel).forEach(e => e.remove());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Remove inline event handlers (onclick, onload…)
|
||||||
|
const all = document.querySelectorAll("*");
|
||||||
|
all.forEach(el => {
|
||||||
|
[...el.attributes].forEach(attr => {
|
||||||
|
if (attr.name.startsWith("on")) {
|
||||||
|
el.removeAttribute(attr.name);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Re-extract HTML after cleanup
|
||||||
|
const cleanedHtml = await page.evaluate(() => {
|
||||||
|
return document.documentElement.outerHTML;
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
// Convert cleaned HTML → Markdown
|
||||||
|
const markdown = await parseMarkdown(cleanedHtml, url);
|
||||||
|
return markdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetches a webpage, strips scripts/styles/images/etc,
|
||||||
|
* returns clean HTML.
|
||||||
|
*/
|
||||||
|
export async function convertPageToHTML(url: string): Promise<string> {
|
||||||
|
const browser = await chromium.launch();
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
||||||
|
|
||||||
|
await page.addInitScript(() => {
|
||||||
|
const selectors = [
|
||||||
|
"script",
|
||||||
|
"style",
|
||||||
|
"link[rel='stylesheet']",
|
||||||
|
"noscript",
|
||||||
|
"meta",
|
||||||
|
"svg",
|
||||||
|
"img",
|
||||||
|
"picture",
|
||||||
|
"source",
|
||||||
|
"video",
|
||||||
|
"audio",
|
||||||
|
"iframe",
|
||||||
|
"object",
|
||||||
|
"embed"
|
||||||
|
];
|
||||||
|
|
||||||
|
selectors.forEach(sel => {
|
||||||
|
document.querySelectorAll(sel).forEach(e => e.remove());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Remove inline event handlers (onclick, onload…)
|
||||||
|
const all = document.querySelectorAll("*");
|
||||||
|
all.forEach(el => {
|
||||||
|
[...el.attributes].forEach(attr => {
|
||||||
|
if (attr.name.startsWith("on")) {
|
||||||
|
el.removeAttribute(attr.name);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Re-extract HTML after cleanup
|
||||||
|
const cleanedHtml = await page.evaluate(() => {
|
||||||
|
return document.documentElement.outerHTML;
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
// Return cleaned HTML directly
|
||||||
|
return cleanedHtml;
|
||||||
|
}
|
||||||
6
server/src/markdownify/test.ts
Normal file
6
server/src/markdownify/test.ts
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
import { convertPageToMarkdown } from "./scrape";
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
const md = await convertPageToMarkdown("https://quotes.toscrape.com/");
|
||||||
|
console.log(md);
|
||||||
|
})();
|
||||||
@@ -9,6 +9,9 @@ interface RobotMeta {
|
|||||||
pairs: number;
|
pairs: number;
|
||||||
updatedAt: string;
|
updatedAt: string;
|
||||||
params: any[];
|
params: any[];
|
||||||
|
type?: 'extract' | 'scrape';
|
||||||
|
url?: string;
|
||||||
|
formats?: ('markdown' | 'html')[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RobotWorkflow {
|
interface RobotWorkflow {
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import { airtableUpdateTasks, processAirtableUpdates } from './workflow-manageme
|
|||||||
import { io as serverIo } from "./server";
|
import { io as serverIo } from "./server";
|
||||||
import { sendWebhook } from './routes/webhook';
|
import { sendWebhook } from './routes/webhook';
|
||||||
import { BinaryOutputService } from './storage/mino';
|
import { BinaryOutputService } from './storage/mino';
|
||||||
|
import { convertPageToMarkdown, convertPageToHTML } from './markdownify/scrape';
|
||||||
|
|
||||||
if (!process.env.DB_USER || !process.env.DB_PASSWORD || !process.env.DB_HOST || !process.env.DB_PORT || !process.env.DB_NAME) {
|
if (!process.env.DB_USER || !process.env.DB_PASSWORD || !process.env.DB_HOST || !process.env.DB_PORT || !process.env.DB_NAME) {
|
||||||
throw new Error('Failed to start pgboss worker: one or more required environment variables are missing.');
|
throw new Error('Failed to start pgboss worker: one or more required environment variables are missing.');
|
||||||
@@ -183,11 +184,140 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
|||||||
try {
|
try {
|
||||||
// Find the recording
|
// Find the recording
|
||||||
const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
|
const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
|
||||||
|
|
||||||
if (!recording) {
|
if (!recording) {
|
||||||
throw new Error(`Recording for run ${data.runId} not found`);
|
throw new Error(`Recording for run ${data.runId} not found`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (recording.recording_meta.type === 'scrape') {
|
||||||
|
logger.log('info', `Executing scrape robot for run ${data.runId}`);
|
||||||
|
|
||||||
|
const formats = recording.recording_meta.formats || ['markdown'];
|
||||||
|
|
||||||
|
await run.update({
|
||||||
|
status: 'running',
|
||||||
|
log: `Converting page to ${formats.join(', ')}`
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const url = recording.recording_meta.url;
|
||||||
|
|
||||||
|
if (!url) {
|
||||||
|
throw new Error('No URL specified for markdown robot');
|
||||||
|
}
|
||||||
|
|
||||||
|
let markdown = '';
|
||||||
|
let html = '';
|
||||||
|
const serializableOutput: any = {};
|
||||||
|
|
||||||
|
// Markdown conversion
|
||||||
|
if (formats.includes('markdown')) {
|
||||||
|
markdown = await convertPageToMarkdown(url);
|
||||||
|
serializableOutput.markdown = [{ content: markdown }];
|
||||||
|
}
|
||||||
|
|
||||||
|
// HTML conversion
|
||||||
|
if (formats.includes('html')) {
|
||||||
|
html = await convertPageToHTML(url);
|
||||||
|
serializableOutput.html = [{ content: html }];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Success update
|
||||||
|
await run.update({
|
||||||
|
status: 'success',
|
||||||
|
finishedAt: new Date().toLocaleString(),
|
||||||
|
log: `${formats.join(', ').toUpperCase()} conversion completed successfully`,
|
||||||
|
serializableOutput,
|
||||||
|
binaryOutput: {},
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.log('info', `Markdown robot execution completed for run ${data.runId}`);
|
||||||
|
|
||||||
|
// Notify sockets
|
||||||
|
try {
|
||||||
|
const completionData = {
|
||||||
|
runId: data.runId,
|
||||||
|
robotMetaId: plainRun.robotMetaId,
|
||||||
|
robotName: recording.recording_meta.name,
|
||||||
|
status: 'success',
|
||||||
|
finishedAt: new Date().toLocaleString()
|
||||||
|
};
|
||||||
|
|
||||||
|
serverIo.of(browserId).emit('run-completed', completionData);
|
||||||
|
serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', completionData);
|
||||||
|
} catch (socketError: any) {
|
||||||
|
logger.log('warn', `Failed to send run-completed notification for markdown robot run ${data.runId}: ${socketError.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Webhooks
|
||||||
|
try {
|
||||||
|
const webhookPayload: any = {
|
||||||
|
runId: data.runId,
|
||||||
|
robotId: plainRun.robotMetaId,
|
||||||
|
robotName: recording.recording_meta.name,
|
||||||
|
status: 'success',
|
||||||
|
finishedAt: new Date().toLocaleString(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if (formats.includes('markdown')) webhookPayload.markdown = markdown;
|
||||||
|
if (formats.includes('html')) webhookPayload.html = html;
|
||||||
|
|
||||||
|
await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload);
|
||||||
|
logger.log('info', `Webhooks sent successfully for markdown robot run ${data.runId}`);
|
||||||
|
} catch (webhookError: any) {
|
||||||
|
logger.log('warn', `Failed to send webhooks for markdown robot run ${data.runId}: ${webhookError.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
capture("maxun-oss-run-created-manual", {
|
||||||
|
runId: data.runId,
|
||||||
|
user_id: data.userId,
|
||||||
|
status: "success",
|
||||||
|
robot_type: "scrape",
|
||||||
|
formats,
|
||||||
|
});
|
||||||
|
|
||||||
|
await destroyRemoteBrowser(browserId, data.userId);
|
||||||
|
|
||||||
|
return { success: true };
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
logger.log('error', `${formats.join(', ')} conversion failed for run ${data.runId}: ${error.message}`);
|
||||||
|
|
||||||
|
await run.update({
|
||||||
|
status: 'failed',
|
||||||
|
finishedAt: new Date().toLocaleString(),
|
||||||
|
log: `${formats.join(', ').toUpperCase()} conversion failed: ${error.message}`,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const failureData = {
|
||||||
|
runId: data.runId,
|
||||||
|
robotMetaId: plainRun.robotMetaId,
|
||||||
|
robotName: recording.recording_meta.name,
|
||||||
|
status: 'failed',
|
||||||
|
finishedAt: new Date().toLocaleString()
|
||||||
|
};
|
||||||
|
|
||||||
|
serverIo.of(browserId).emit('run-completed', failureData);
|
||||||
|
serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', failureData);
|
||||||
|
} catch (socketError: any) {
|
||||||
|
logger.log('warn', `Failed to send run-failed notification for markdown robot run ${data.runId}: ${socketError.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
capture("maxun-oss-run-created-manual", {
|
||||||
|
runId: data.runId,
|
||||||
|
user_id: data.userId,
|
||||||
|
status: "failed",
|
||||||
|
robot_type: "scrape",
|
||||||
|
formats,
|
||||||
|
});
|
||||||
|
|
||||||
|
await destroyRemoteBrowser(browserId, data.userId);
|
||||||
|
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const isRunAborted = async (): Promise<boolean> => {
|
const isRunAborted = async (): Promise<boolean> => {
|
||||||
try {
|
try {
|
||||||
const currentRun = await Run.findOne({ where: { runId: data.runId } });
|
const currentRun = await Run.findOne({ where: { runId: data.runId } });
|
||||||
|
|||||||
@@ -274,7 +274,10 @@ router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, r
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (targetUrl) {
|
if (targetUrl) {
|
||||||
|
robot.set('recording_meta', { ...robot.recording_meta, url: targetUrl });
|
||||||
|
|
||||||
const updatedWorkflow = [...robot.recording.workflow];
|
const updatedWorkflow = [...robot.recording.workflow];
|
||||||
|
let foundGoto = false;
|
||||||
|
|
||||||
for (let i = updatedWorkflow.length - 1; i >= 0; i--) {
|
for (let i = updatedWorkflow.length - 1; i >= 0; i--) {
|
||||||
const step = updatedWorkflow[i];
|
const step = updatedWorkflow[i];
|
||||||
@@ -289,6 +292,7 @@ router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, r
|
|||||||
|
|
||||||
robot.set('recording', { ...robot.recording, workflow: updatedWorkflow });
|
robot.set('recording', { ...robot.recording, workflow: updatedWorkflow });
|
||||||
robot.changed('recording', true);
|
robot.changed('recording', true);
|
||||||
|
foundGoto = true;
|
||||||
i = -1;
|
i = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -331,10 +335,11 @@ router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, r
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (name) {
|
if (name || targetUrl) {
|
||||||
updates.recording_meta = {
|
updates.recording_meta = {
|
||||||
...robot.recording_meta,
|
...robot.recording_meta,
|
||||||
name
|
...(name && { name }),
|
||||||
|
...(targetUrl && { url: targetUrl })
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -432,6 +437,91 @@ router.post('/recordings/:id/duplicate', requireSignIn, async (req: Authenticate
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST endpoint for creating a markdown robot
|
||||||
|
*/
|
||||||
|
router.post('/recordings/scrape', requireSignIn, async (req: AuthenticatedRequest, res) => {
|
||||||
|
try {
|
||||||
|
const { url, name, formats } = req.body;
|
||||||
|
|
||||||
|
if (!url) {
|
||||||
|
return res.status(400).json({ error: 'The "url" field is required.' });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!req.user) {
|
||||||
|
return res.status(401).send({ error: 'Unauthorized' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate URL format
|
||||||
|
try {
|
||||||
|
new URL(url);
|
||||||
|
} catch (err) {
|
||||||
|
return res.status(400).json({ error: 'Invalid URL format' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate format
|
||||||
|
const validFormats = ['markdown', 'html'];
|
||||||
|
|
||||||
|
if (!Array.isArray(formats) || formats.length === 0) {
|
||||||
|
return res.status(400).json({ error: 'At least one output format must be selected.' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const invalid = formats.filter(f => !validFormats.includes(f));
|
||||||
|
if (invalid.length > 0) {
|
||||||
|
return res.status(400).json({ error: `Invalid formats: ${invalid.join(', ')}` });
|
||||||
|
}
|
||||||
|
|
||||||
|
const robotName = name || `Markdown Robot - ${new URL(url).hostname}`;
|
||||||
|
const currentTimestamp = new Date().toLocaleString();
|
||||||
|
const robotId = uuid();
|
||||||
|
|
||||||
|
const newRobot = await Robot.create({
|
||||||
|
id: uuid(),
|
||||||
|
userId: req.user.id,
|
||||||
|
recording_meta: {
|
||||||
|
name: robotName,
|
||||||
|
id: robotId,
|
||||||
|
createdAt: currentTimestamp,
|
||||||
|
updatedAt: currentTimestamp,
|
||||||
|
pairs: 0,
|
||||||
|
params: [],
|
||||||
|
type: 'scrape',
|
||||||
|
url: url,
|
||||||
|
formats: formats,
|
||||||
|
},
|
||||||
|
recording: { workflow: [] },
|
||||||
|
google_sheet_email: null,
|
||||||
|
google_sheet_name: null,
|
||||||
|
google_sheet_id: null,
|
||||||
|
google_access_token: null,
|
||||||
|
google_refresh_token: null,
|
||||||
|
schedule: null,
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.log('info', `Markdown robot created with id: ${newRobot.id}`);
|
||||||
|
capture(
|
||||||
|
'maxun-oss-robot-created',
|
||||||
|
{
|
||||||
|
robot_meta: newRobot.recording_meta,
|
||||||
|
recording: newRobot.recording,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return res.status(201).json({
|
||||||
|
message: 'Markdown robot created successfully.',
|
||||||
|
robot: newRobot,
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof Error) {
|
||||||
|
logger.log('error', `Error creating markdown robot: ${error.message}`);
|
||||||
|
return res.status(500).json({ error: error.message });
|
||||||
|
} else {
|
||||||
|
logger.log('error', 'Unknown error creating markdown robot');
|
||||||
|
return res.status(500).json({ error: 'An unknown error occurred.' });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DELETE endpoint for deleting a recording from the storage.
|
* DELETE endpoint for deleting a recording from the storage.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import { WorkflowFile } from "maxun-core";
|
|||||||
import { Page } from "playwright";
|
import { Page } from "playwright";
|
||||||
import { sendWebhook } from "../../routes/webhook";
|
import { sendWebhook } from "../../routes/webhook";
|
||||||
import { airtableUpdateTasks, processAirtableUpdates } from "../integrations/airtable";
|
import { airtableUpdateTasks, processAirtableUpdates } from "../integrations/airtable";
|
||||||
|
import { convertPageToMarkdown, convertPageToHTML } from "../../markdownify/scrape";
|
||||||
chromium.use(stealthPlugin());
|
chromium.use(stealthPlugin());
|
||||||
|
|
||||||
async function createWorkflowAndStoreMetadata(id: string, userId: string) {
|
async function createWorkflowAndStoreMetadata(id: string, userId: string) {
|
||||||
@@ -207,6 +208,172 @@ async function executeRun(id: string, userId: string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (recording.recording_meta.type === 'scrape') {
|
||||||
|
logger.log('info', `Executing scrape robot for scheduled run ${id}`);
|
||||||
|
|
||||||
|
const formats = recording.recording_meta.formats || ['markdown'];
|
||||||
|
|
||||||
|
await run.update({
|
||||||
|
status: 'running',
|
||||||
|
log: `Converting page to: ${formats.join(', ')}`
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const runStartedData = {
|
||||||
|
runId: plainRun.runId,
|
||||||
|
robotMetaId: plainRun.robotMetaId,
|
||||||
|
robotName: recording.recording_meta.name,
|
||||||
|
status: 'running',
|
||||||
|
startedAt: plainRun.startedAt
|
||||||
|
};
|
||||||
|
|
||||||
|
serverIo.of('/queued-run').to(`user-${userId}`).emit('run-started', runStartedData);
|
||||||
|
logger.log(
|
||||||
|
'info',
|
||||||
|
`Markdown robot run started notification sent for run: ${plainRun.runId} to user-${userId}`
|
||||||
|
);
|
||||||
|
} catch (socketError: any) {
|
||||||
|
logger.log(
|
||||||
|
'warn',
|
||||||
|
`Failed to send run-started notification for markdown robot run ${plainRun.runId}: ${socketError.message}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const url = recording.recording_meta.url;
|
||||||
|
|
||||||
|
if (!url) {
|
||||||
|
throw new Error('No URL specified for markdown robot');
|
||||||
|
}
|
||||||
|
|
||||||
|
let markdown = '';
|
||||||
|
let html = '';
|
||||||
|
const serializableOutput: any = {};
|
||||||
|
|
||||||
|
// Markdown conversion
|
||||||
|
if (formats.includes('markdown')) {
|
||||||
|
markdown = await convertPageToMarkdown(url);
|
||||||
|
serializableOutput.markdown = [{ content: markdown }];
|
||||||
|
}
|
||||||
|
|
||||||
|
// HTML conversion
|
||||||
|
if (formats.includes('html')) {
|
||||||
|
html = await convertPageToHTML(url);
|
||||||
|
serializableOutput.html = [{ content: html }];
|
||||||
|
}
|
||||||
|
|
||||||
|
await run.update({
|
||||||
|
status: 'success',
|
||||||
|
finishedAt: new Date().toLocaleString(),
|
||||||
|
log: `${formats.join(', ')} conversion completed successfully`,
|
||||||
|
serializableOutput,
|
||||||
|
binaryOutput: {},
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.log('info', `Markdown robot execution completed for scheduled run ${id}`);
|
||||||
|
|
||||||
|
// Run-completed socket notifications
|
||||||
|
try {
|
||||||
|
const completionData = {
|
||||||
|
runId: plainRun.runId,
|
||||||
|
robotMetaId: plainRun.robotMetaId,
|
||||||
|
robotName: recording.recording_meta.name,
|
||||||
|
status: 'success',
|
||||||
|
finishedAt: new Date().toLocaleString()
|
||||||
|
};
|
||||||
|
|
||||||
|
serverIo.of(plainRun.browserId).emit('run-completed', completionData);
|
||||||
|
serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', completionData);
|
||||||
|
} catch (socketError: any) {
|
||||||
|
logger.log(
|
||||||
|
'warn',
|
||||||
|
`Failed to send run-completed notification for markdown robot run ${id}: ${socketError.message}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Webhook payload
|
||||||
|
const webhookPayload: any = {
|
||||||
|
robot_id: plainRun.robotMetaId,
|
||||||
|
run_id: plainRun.runId,
|
||||||
|
robot_name: recording.recording_meta.name,
|
||||||
|
status: 'success',
|
||||||
|
started_at: plainRun.startedAt,
|
||||||
|
finished_at: new Date().toLocaleString(),
|
||||||
|
metadata: {
|
||||||
|
browser_id: plainRun.browserId,
|
||||||
|
user_id: userId,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (formats.includes('markdown')) webhookPayload.markdown = markdown;
|
||||||
|
if (formats.includes('html')) webhookPayload.html = html;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload);
|
||||||
|
logger.log(
|
||||||
|
'info',
|
||||||
|
`Webhooks sent successfully for markdown robot scheduled run ${plainRun.runId}`
|
||||||
|
);
|
||||||
|
} catch (webhookError: any) {
|
||||||
|
logger.log(
|
||||||
|
'warn',
|
||||||
|
`Failed to send webhooks for markdown robot run ${plainRun.runId}: ${webhookError.message}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
capture("maxun-oss-run-created-scheduled", {
|
||||||
|
runId: plainRun.runId,
|
||||||
|
user_id: userId,
|
||||||
|
status: "success",
|
||||||
|
robot_type: "scrape",
|
||||||
|
formats
|
||||||
|
});
|
||||||
|
|
||||||
|
await destroyRemoteBrowser(plainRun.browserId, userId);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
logger.log('error', `${formats.join(', ')} conversion failed for scheduled run ${id}: ${error.message}`);
|
||||||
|
|
||||||
|
await run.update({
|
||||||
|
status: 'failed',
|
||||||
|
finishedAt: new Date().toLocaleString(),
|
||||||
|
log: `${formats.join(', ')} conversion failed: ${error.message}`,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const failureData = {
|
||||||
|
runId: plainRun.runId,
|
||||||
|
robotMetaId: plainRun.robotMetaId,
|
||||||
|
robotName: recording.recording_meta.name,
|
||||||
|
status: 'failed',
|
||||||
|
finishedAt: new Date().toLocaleString()
|
||||||
|
};
|
||||||
|
|
||||||
|
serverIo.of(plainRun.browserId).emit('run-completed', failureData);
|
||||||
|
serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', failureData);
|
||||||
|
} catch (socketError: any) {
|
||||||
|
logger.log(
|
||||||
|
'warn',
|
||||||
|
`Failed to send run-failed notification for markdown robot run ${id}: ${socketError.message}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
capture("maxun-oss-run-created-scheduled", {
|
||||||
|
runId: plainRun.runId,
|
||||||
|
user_id: userId,
|
||||||
|
status: "failed",
|
||||||
|
robot_type: "scrape",
|
||||||
|
formats
|
||||||
|
});
|
||||||
|
|
||||||
|
await destroyRemoteBrowser(plainRun.browserId, userId);
|
||||||
|
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
plainRun.status = 'running';
|
plainRun.status = 'running';
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -217,7 +384,7 @@ async function executeRun(id: string, userId: string) {
|
|||||||
status: 'running',
|
status: 'running',
|
||||||
startedAt: plainRun.startedAt
|
startedAt: plainRun.startedAt
|
||||||
};
|
};
|
||||||
|
|
||||||
serverIo.of('/queued-run').to(`user-${userId}`).emit('run-started', runStartedData);
|
serverIo.of('/queued-run').to(`user-${userId}`).emit('run-started', runStartedData);
|
||||||
logger.log('info', `Run started notification sent for run: ${plainRun.runId} to user-${userId}`);
|
logger.log('info', `Run started notification sent for run: ${plainRun.runId} to user-${userId}`);
|
||||||
} catch (socketError: any) {
|
} catch (socketError: any) {
|
||||||
|
|||||||
@@ -28,6 +28,36 @@ export const getStoredRecordings = async (): Promise<string[] | null> => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const createScrapeRobot = async (
|
||||||
|
url: string,
|
||||||
|
name?: string,
|
||||||
|
formats: string[] = ['markdown']
|
||||||
|
): Promise<any> => {
|
||||||
|
try {
|
||||||
|
const response = await axios.post(
|
||||||
|
`${apiUrl}/storage/recordings/scrape`,
|
||||||
|
{
|
||||||
|
url,
|
||||||
|
name,
|
||||||
|
formats,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
withCredentials: true,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.status === 201) {
|
||||||
|
return response.data;
|
||||||
|
} else {
|
||||||
|
throw new Error('Failed to create markdown robot');
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error creating markdown robot:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
export const updateRecording = async (id: string, data: {
|
export const updateRecording = async (id: string, data: {
|
||||||
name?: string;
|
name?: string;
|
||||||
limits?: Array<{pairIndex: number, actionIndex: number, argIndex: number, limit: number}>;
|
limits?: Array<{pairIndex: number, actionIndex: number, argIndex: number, limit: number}>;
|
||||||
|
|||||||
@@ -121,6 +121,7 @@ const TableRowMemoized = memo(({ row, columns, handlers }: any) => {
|
|||||||
handleEdit={() => handlers.handleEditRobot(row.id, row.name, row.params || [])}
|
handleEdit={() => handlers.handleEditRobot(row.id, row.name, row.params || [])}
|
||||||
handleDuplicate={() => handlers.handleDuplicateRobot(row.id, row.name, row.params || [])}
|
handleDuplicate={() => handlers.handleDuplicateRobot(row.id, row.name, row.params || [])}
|
||||||
handleDelete={() => handlers.handleDelete(row.id)}
|
handleDelete={() => handlers.handleDelete(row.id)}
|
||||||
|
robotType={row.type}
|
||||||
/>
|
/>
|
||||||
</MemoizedTableCell>
|
</MemoizedTableCell>
|
||||||
);
|
);
|
||||||
@@ -742,9 +743,10 @@ interface OptionsButtonProps {
|
|||||||
handleEdit: () => void;
|
handleEdit: () => void;
|
||||||
handleDelete: () => void;
|
handleDelete: () => void;
|
||||||
handleDuplicate: () => void;
|
handleDuplicate: () => void;
|
||||||
|
robotType: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
const OptionsButton = ({ handleRetrain, handleEdit, handleDelete, handleDuplicate }: OptionsButtonProps) => {
|
const OptionsButton = ({ handleRetrain, handleEdit, handleDelete, handleDuplicate, robotType }: OptionsButtonProps) => {
|
||||||
const [anchorEl, setAnchorEl] = React.useState<null | HTMLElement>(null);
|
const [anchorEl, setAnchorEl] = React.useState<null | HTMLElement>(null);
|
||||||
|
|
||||||
const handleClick = (event: React.MouseEvent<HTMLElement>) => {
|
const handleClick = (event: React.MouseEvent<HTMLElement>) => {
|
||||||
@@ -771,34 +773,33 @@ const OptionsButton = ({ handleRetrain, handleEdit, handleDelete, handleDuplicat
|
|||||||
open={Boolean(anchorEl)}
|
open={Boolean(anchorEl)}
|
||||||
onClose={handleClose}
|
onClose={handleClose}
|
||||||
>
|
>
|
||||||
<MenuItem onClick={() => { handleRetrain(); handleClose(); }}>
|
{robotType !== 'scrape' && (
|
||||||
<ListItemIcon>
|
<MenuItem onClick={() => { handleRetrain(); handleClose(); }}>
|
||||||
<Refresh fontSize="small" />
|
<ListItemIcon>
|
||||||
</ListItemIcon>
|
<Refresh fontSize="small" />
|
||||||
<ListItemText>{t('recordingtable.retrain')}</ListItemText>
|
</ListItemIcon>
|
||||||
</MenuItem>
|
<ListItemText>Retrain</ListItemText>
|
||||||
|
</MenuItem>
|
||||||
|
)}
|
||||||
|
|
||||||
<MenuItem onClick={() => { handleEdit(); handleClose(); }}>
|
<MenuItem onClick={() => { handleEdit(); handleClose(); }}>
|
||||||
<ListItemIcon>
|
<ListItemIcon><Edit fontSize="small" /></ListItemIcon>
|
||||||
<Edit fontSize="small" />
|
<ListItemText>Edit</ListItemText>
|
||||||
</ListItemIcon>
|
|
||||||
<ListItemText>{t('recordingtable.edit')}</ListItemText>
|
|
||||||
</MenuItem>
|
</MenuItem>
|
||||||
|
|
||||||
<MenuItem onClick={() => { handleDelete(); handleClose(); }}>
|
<MenuItem onClick={() => { handleDelete(); handleClose(); }}>
|
||||||
<ListItemIcon>
|
<ListItemIcon><DeleteForever fontSize="small" /></ListItemIcon>
|
||||||
<DeleteForever fontSize="small" />
|
<ListItemText>Delete</ListItemText>
|
||||||
</ListItemIcon>
|
|
||||||
<ListItemText>{t('recordingtable.delete')}</ListItemText>
|
|
||||||
</MenuItem>
|
</MenuItem>
|
||||||
|
|
||||||
<MenuItem onClick={() => { handleDuplicate(); handleClose(); }}>
|
{robotType !== 'scrape' && (
|
||||||
<ListItemIcon>
|
<MenuItem onClick={() => { handleDuplicate(); handleClose(); }}>
|
||||||
<ContentCopy fontSize="small" />
|
<ListItemIcon><ContentCopy fontSize="small" /></ListItemIcon>
|
||||||
</ListItemIcon>
|
<ListItemText>Duplicate</ListItemText>
|
||||||
<ListItemText>{t('recordingtable.duplicate')}</ListItemText>
|
</MenuItem>
|
||||||
</MenuItem>
|
)}
|
||||||
</Menu>
|
</Menu>
|
||||||
|
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -13,29 +13,65 @@ import {
|
|||||||
Card,
|
Card,
|
||||||
CircularProgress,
|
CircularProgress,
|
||||||
Container,
|
Container,
|
||||||
CardContent
|
CardContent,
|
||||||
|
Tabs,
|
||||||
|
Tab,
|
||||||
|
RadioGroup,
|
||||||
|
Radio,
|
||||||
|
FormControl,
|
||||||
|
FormLabel
|
||||||
} from '@mui/material';
|
} from '@mui/material';
|
||||||
import { ArrowBack, PlayCircleOutline, Article } from '@mui/icons-material';
|
import { ArrowBack, PlayCircleOutline, Article, Code, Description } from '@mui/icons-material';
|
||||||
import { useGlobalInfoStore } from '../../../context/globalInfo';
|
import { useGlobalInfoStore } from '../../../context/globalInfo';
|
||||||
import { canCreateBrowserInState, getActiveBrowserId, stopRecording } from '../../../api/recording';
|
import { canCreateBrowserInState, getActiveBrowserId, stopRecording } from '../../../api/recording';
|
||||||
|
import { createScrapeRobot } from "../../../api/storage";
|
||||||
import { AuthContext } from '../../../context/auth';
|
import { AuthContext } from '../../../context/auth';
|
||||||
import { GenericModal } from '../../ui/GenericModal';
|
import { GenericModal } from '../../ui/GenericModal';
|
||||||
|
|
||||||
|
|
||||||
|
interface TabPanelProps {
|
||||||
|
children?: React.ReactNode;
|
||||||
|
index: number;
|
||||||
|
value: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function TabPanel(props: TabPanelProps) {
|
||||||
|
const { children, value, index, ...other } = props;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
role="tabpanel"
|
||||||
|
hidden={value !== index}
|
||||||
|
id={`robot-tabpanel-${index}`}
|
||||||
|
aria-labelledby={`robot-tab-${index}`}
|
||||||
|
{...other}
|
||||||
|
>
|
||||||
|
{value === index && <Box>{children}</Box>}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const RobotCreate: React.FC = () => {
|
const RobotCreate: React.FC = () => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const navigate = useNavigate();
|
const navigate = useNavigate();
|
||||||
const { setBrowserId, setRecordingUrl, notify, setRecordingId } = useGlobalInfoStore();
|
const { setBrowserId, setRecordingUrl, notify, setRecordingId, setRerenderRobots } = useGlobalInfoStore();
|
||||||
|
|
||||||
|
const [tabValue, setTabValue] = useState(0);
|
||||||
const [url, setUrl] = useState('');
|
const [url, setUrl] = useState('');
|
||||||
|
const [scrapeRobotName, setScrapeRobotName] = useState('');
|
||||||
const [needsLogin, setNeedsLogin] = useState(false);
|
const [needsLogin, setNeedsLogin] = useState(false);
|
||||||
const [isLoading, setIsLoading] = useState(false);
|
const [isLoading, setIsLoading] = useState(false);
|
||||||
const [isWarningModalOpen, setWarningModalOpen] = useState(false);
|
const [isWarningModalOpen, setWarningModalOpen] = useState(false);
|
||||||
const [activeBrowserId, setActiveBrowserId] = useState('');
|
const [activeBrowserId, setActiveBrowserId] = useState('');
|
||||||
|
const [outputFormats, setOutputFormats] = useState<string[]>([]);
|
||||||
|
|
||||||
const { state } = React.useContext(AuthContext);
|
const { state } = React.useContext(AuthContext);
|
||||||
const { user } = state;
|
const { user } = state;
|
||||||
|
|
||||||
|
const handleTabChange = (event: React.SyntheticEvent, newValue: number) => {
|
||||||
|
setTabValue(newValue);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
const handleStartRecording = async () => {
|
const handleStartRecording = async () => {
|
||||||
if (!url.trim()) {
|
if (!url.trim()) {
|
||||||
@@ -146,155 +182,307 @@ const RobotCreate: React.FC = () => {
|
|||||||
<ArrowBack />
|
<ArrowBack />
|
||||||
</IconButton>
|
</IconButton>
|
||||||
<Typography variant="h5" component="h1">
|
<Typography variant="h5" component="h1">
|
||||||
New Data Extraction Robot
|
Create New Robot
|
||||||
</Typography>
|
</Typography>
|
||||||
</Box>
|
</Box>
|
||||||
|
|
||||||
<Card sx={{ mb: 4, p: 4, textAlign: 'center' }}>
|
<Box sx={{ borderBottom: 1, borderColor: 'divider', mb: 2, mt: "-30px" }}>
|
||||||
<Box display="flex" flexDirection="column" alignItems="center">
|
<Tabs
|
||||||
{/* Logo (kept as original) */}
|
value={tabValue}
|
||||||
<img
|
centered
|
||||||
src="https://ik.imagekit.io/ys1blv5kv/maxunlogo.png"
|
onChange={handleTabChange}
|
||||||
width={73}
|
aria-label="robot type tabs"
|
||||||
height={65}
|
sx={{
|
||||||
style={{
|
minHeight: 36,
|
||||||
borderRadius: '5px',
|
'& .MuiTab-root': {
|
||||||
marginBottom: '30px'
|
minHeight: 36,
|
||||||
}}
|
paddingX: 2,
|
||||||
alt="Maxun Logo"
|
paddingY: 1.5,
|
||||||
/>
|
minWidth: 0,
|
||||||
|
},
|
||||||
|
'& .MuiTabs-indicator': {
|
||||||
|
height: 2,
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Tab label="Extract" id="extract-robot" aria-controls="extract-robot" />
|
||||||
|
<Tab label="Scrape" id="scrape-robot" aria-controls="scrape-robot" />
|
||||||
|
</Tabs>
|
||||||
|
</Box>
|
||||||
|
|
||||||
{/* Origin URL Input */}
|
|
||||||
<Box sx={{ width: '100%', maxWidth: 700, mb: 2 }}>
|
<TabPanel value={tabValue} index={0}>
|
||||||
<TextField
|
<Card sx={{ mb: 4, p: 4, textAlign: 'center' }}>
|
||||||
placeholder="Example: https://www.ycombinator.com/companies/"
|
<Box display="flex" flexDirection="column" alignItems="center">
|
||||||
variant="outlined"
|
{/* Logo (kept as original) */}
|
||||||
|
<img
|
||||||
|
src="https://ik.imagekit.io/ys1blv5kv/maxunlogo.png"
|
||||||
|
width={73}
|
||||||
|
height={65}
|
||||||
|
style={{
|
||||||
|
borderRadius: '5px',
|
||||||
|
marginBottom: '30px'
|
||||||
|
}}
|
||||||
|
alt="Maxun Logo"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Typography variant="body2" color="text.secondary" mb={3}>
|
||||||
|
Extract structured data from websites in a few clicks.
|
||||||
|
</Typography>
|
||||||
|
|
||||||
|
{/* Origin URL Input */}
|
||||||
|
<Box sx={{ width: '100%', maxWidth: 700, mb: 2 }}>
|
||||||
|
<TextField
|
||||||
|
placeholder="Example: https://www.ycombinator.com/companies/"
|
||||||
|
variant="outlined"
|
||||||
|
fullWidth
|
||||||
|
value={url}
|
||||||
|
onChange={(e) => setUrl(e.target.value)}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
{/* Checkbox */}
|
||||||
|
<Box sx={{ width: '100%', maxWidth: 700, mb: 3, textAlign: 'left' }}>
|
||||||
|
<FormControlLabel
|
||||||
|
control={
|
||||||
|
<Checkbox
|
||||||
|
checked={needsLogin}
|
||||||
|
onChange={(e) => setNeedsLogin(e.target.checked)}
|
||||||
|
color="primary"
|
||||||
|
/>
|
||||||
|
}
|
||||||
|
label="This website needs logging in."
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
{/* Button */}
|
||||||
|
<Button
|
||||||
|
variant="contained"
|
||||||
fullWidth
|
fullWidth
|
||||||
value={url}
|
onClick={handleStartRecording}
|
||||||
onChange={(e) => setUrl(e.target.value)}
|
disabled={!url.trim() || isLoading}
|
||||||
/>
|
sx={{
|
||||||
|
bgcolor: '#ff00c3',
|
||||||
|
py: 1.4,
|
||||||
|
fontSize: '1rem',
|
||||||
|
textTransform: 'none',
|
||||||
|
maxWidth: 700,
|
||||||
|
borderRadius: 2
|
||||||
|
}}
|
||||||
|
startIcon={isLoading ? <CircularProgress size={20} color="inherit" /> : null}
|
||||||
|
>
|
||||||
|
{isLoading ? 'Starting...' : 'Start Recording'}
|
||||||
|
</Button>
|
||||||
</Box>
|
</Box>
|
||||||
|
</Card>
|
||||||
|
|
||||||
{/* Checkbox */}
|
<Box mt={6} textAlign="center">
|
||||||
<Box sx={{ width: '100%', maxWidth: 700, mb: 3, textAlign: 'left' }}>
|
<Typography variant="h6" gutterBottom>
|
||||||
<FormControlLabel
|
First time creating a robot?
|
||||||
control={
|
</Typography>
|
||||||
<Checkbox
|
<Typography variant="body2" color="text.secondary" mb={3}>
|
||||||
checked={needsLogin}
|
Get help and learn how to use Maxun effectively.
|
||||||
onChange={(e) => setNeedsLogin(e.target.checked)}
|
</Typography>
|
||||||
color="primary"
|
|
||||||
/>
|
|
||||||
}
|
|
||||||
label="This website needs logging in."
|
|
||||||
/>
|
|
||||||
</Box>
|
|
||||||
|
|
||||||
{/* Button */}
|
<Grid container spacing={3} justifyContent="center">
|
||||||
<Button
|
|
||||||
variant="contained"
|
{/* YouTube Tutorials */}
|
||||||
fullWidth
|
<Grid item xs={12} sm={6} md={4}>
|
||||||
onClick={handleStartRecording}
|
<Card
|
||||||
disabled={!url.trim() || isLoading}
|
sx={{
|
||||||
sx={{
|
height: 140,
|
||||||
bgcolor: '#ff00c3',
|
cursor: "pointer",
|
||||||
py: 1.4,
|
}}
|
||||||
fontSize: '1rem',
|
onClick={() => window.open("https://www.youtube.com/@MaxunOSS/videos", "_blank")}
|
||||||
textTransform: 'none',
|
>
|
||||||
maxWidth: 700,
|
<CardContent
|
||||||
borderRadius: 2
|
sx={{
|
||||||
}}
|
display: "flex",
|
||||||
startIcon={isLoading ? <CircularProgress size={20} color="inherit" /> : null}
|
flexDirection: "column",
|
||||||
>
|
alignItems: "center",
|
||||||
{isLoading ? 'Starting...' : 'Start Recording'}
|
justifyContent: "center", // center content
|
||||||
</Button>
|
height: "100%",
|
||||||
|
textAlign: "center",
|
||||||
|
p: 2,
|
||||||
|
color: (theme) =>
|
||||||
|
theme.palette.mode === 'light' ? 'rgba(0, 0, 0, 0.54)' : '',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<PlayCircleOutline sx={{ fontSize: "32px", mb: 2 }} />
|
||||||
|
|
||||||
|
<Box sx={{ textAlign: "center" }}>
|
||||||
|
<Typography variant="body1" fontWeight="600" sx={{ lineHeight: 1.2 }}>
|
||||||
|
Video Tutorials
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2" color="text.secondary" sx={{ lineHeight: 1.4, mt: 1 }}>
|
||||||
|
Watch step-by-step guides
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
</Grid>
|
||||||
|
|
||||||
|
{/* Documentation */}
|
||||||
|
<Grid item xs={12} sm={6} md={4}>
|
||||||
|
<Card
|
||||||
|
sx={{
|
||||||
|
height: 140,
|
||||||
|
cursor: "pointer",
|
||||||
|
}}
|
||||||
|
onClick={() => window.open("https://docs.maxun.dev", "_blank")}
|
||||||
|
>
|
||||||
|
<CardContent
|
||||||
|
sx={{
|
||||||
|
display: "flex",
|
||||||
|
flexDirection: "column",
|
||||||
|
alignItems: "center",
|
||||||
|
justifyContent: "center", // center everything
|
||||||
|
height: "100%",
|
||||||
|
textAlign: "center",
|
||||||
|
p: 2,
|
||||||
|
color: (theme) =>
|
||||||
|
theme.palette.mode === 'light' ? 'rgba(0, 0, 0, 0.54)' : '',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Article sx={{ fontSize: "32px", mb: 2 }} />
|
||||||
|
|
||||||
|
<Box sx={{ textAlign: "center" }}>
|
||||||
|
<Typography variant="body1" fontWeight="600" sx={{ lineHeight: 1.2 }}>
|
||||||
|
Documentation
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2" color="text.secondary" sx={{ lineHeight: 1.4, mt: 1 }}>
|
||||||
|
Explore detailed guides
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
</Grid>
|
||||||
|
</Grid>
|
||||||
</Box>
|
</Box>
|
||||||
</Card>
|
</TabPanel>
|
||||||
|
|
||||||
|
<TabPanel value={tabValue} index={1}>
|
||||||
|
<Card sx={{ mb: 4, p: 4, textAlign: 'center' }}>
|
||||||
<Box mt={6} textAlign="center">
|
<Box display="flex" flexDirection="column" alignItems="center">
|
||||||
<Typography variant="h6" gutterBottom>
|
<img
|
||||||
First time creating a robot?
|
src="https://ik.imagekit.io/ys1blv5kv/maxunlogo.png"
|
||||||
</Typography>
|
width={73}
|
||||||
<Typography variant="body2" color="text.secondary" mb={3}>
|
height={65}
|
||||||
Get help and learn how to use Maxun effectively.
|
style={{
|
||||||
</Typography>
|
borderRadius: '5px',
|
||||||
|
marginBottom: '30px'
|
||||||
<Grid container spacing={3} justifyContent="center">
|
|
||||||
|
|
||||||
{/* YouTube Tutorials */}
|
|
||||||
<Grid item xs={12} sm={6} md={4}>
|
|
||||||
<Card
|
|
||||||
sx={{
|
|
||||||
height: 140,
|
|
||||||
cursor: "pointer",
|
|
||||||
}}
|
}}
|
||||||
onClick={() => window.open("https://www.youtube.com/@MaxunOSS/videos", "_blank")}
|
alt="Maxun Logo"
|
||||||
>
|
/>
|
||||||
<CardContent
|
|
||||||
sx={{
|
|
||||||
display: "flex",
|
|
||||||
flexDirection: "column",
|
|
||||||
alignItems: "center",
|
|
||||||
justifyContent: "center", // center content
|
|
||||||
height: "100%",
|
|
||||||
textAlign: "center",
|
|
||||||
p: 2,
|
|
||||||
color: (theme) =>
|
|
||||||
theme.palette.mode === 'light' ? 'rgba(0, 0, 0, 0.54)' : '',
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
<PlayCircleOutline sx={{ fontSize: "32px", mb: 2 }} />
|
|
||||||
|
|
||||||
<Box sx={{ textAlign: "center" }}>
|
<Typography variant="body2" color="text.secondary" mb={3}>
|
||||||
<Typography variant="body1" fontWeight="600" sx={{ lineHeight: 1.2 }}>
|
Turn websites into LLM-ready Markdown & clean HTML for AI apps.
|
||||||
Video Tutorials
|
</Typography>
|
||||||
</Typography>
|
|
||||||
<Typography variant="body2" color="text.secondary" sx={{ lineHeight: 1.4, mt: 1 }}>
|
|
||||||
Watch step-by-step guides
|
|
||||||
</Typography>
|
|
||||||
</Box>
|
|
||||||
</CardContent>
|
|
||||||
</Card>
|
|
||||||
</Grid>
|
|
||||||
|
|
||||||
{/* Documentation */}
|
<Box sx={{ width: '100%', maxWidth: 700, mb: 2 }}>
|
||||||
<Grid item xs={12} sm={6} md={4}>
|
<TextField
|
||||||
<Card
|
placeholder="Example: YC Companies Scraper"
|
||||||
sx={{
|
variant="outlined"
|
||||||
height: 140,
|
fullWidth
|
||||||
cursor: "pointer",
|
value={scrapeRobotName}
|
||||||
|
onChange={(e) => setScrapeRobotName(e.target.value)}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
label="Robot Name"
|
||||||
|
/>
|
||||||
|
<TextField
|
||||||
|
placeholder="Example: https://www.ycombinator.com/companies/"
|
||||||
|
variant="outlined"
|
||||||
|
fullWidth
|
||||||
|
value={url}
|
||||||
|
onChange={(e) => setUrl(e.target.value)}
|
||||||
|
label="Website URL"
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<FormControl component="fieldset" sx={{ width: '100%', textAlign: 'left' }}>
|
||||||
|
<p>Output Format (Select at least one)</p>
|
||||||
|
<FormControlLabel
|
||||||
|
control={
|
||||||
|
<Checkbox
|
||||||
|
checked={outputFormats.includes('markdown')}
|
||||||
|
onChange={(e) => {
|
||||||
|
if (e.target.checked) {
|
||||||
|
setOutputFormats([...outputFormats, 'markdown']);
|
||||||
|
} else {
|
||||||
|
setOutputFormats(outputFormats.filter(f => f !== 'markdown'));
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
}
|
||||||
|
label="Markdown"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<FormControlLabel
|
||||||
|
control={
|
||||||
|
<Checkbox
|
||||||
|
checked={outputFormats.includes('html')}
|
||||||
|
onChange={(e) => {
|
||||||
|
if (e.target.checked) {
|
||||||
|
setOutputFormats([...outputFormats, 'html']);
|
||||||
|
} else {
|
||||||
|
setOutputFormats(outputFormats.filter(f => f !== 'html'));
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
}
|
||||||
|
label="HTML"
|
||||||
|
/>
|
||||||
|
</FormControl>
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
<Button
|
||||||
|
variant="contained"
|
||||||
|
fullWidth
|
||||||
|
onClick={async () => {
|
||||||
|
if (!url.trim()) {
|
||||||
|
notify('error', 'Please enter a valid URL');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!scrapeRobotName.trim()) {
|
||||||
|
notify('error', 'Please enter a robot name');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (outputFormats.length === 0) {
|
||||||
|
notify('error', 'Please select at least one output format');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setIsLoading(true);
|
||||||
|
const result = await createScrapeRobot(url, scrapeRobotName, outputFormats);
|
||||||
|
setIsLoading(false);
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
setRerenderRobots(true);
|
||||||
|
notify('success', `${scrapeRobotName} created successfully!`);
|
||||||
|
navigate('/robots');
|
||||||
|
} else {
|
||||||
|
notify('error', 'Failed to create markdown robot');
|
||||||
|
}
|
||||||
}}
|
}}
|
||||||
onClick={() => window.open("https://docs.maxun.dev", "_blank")}
|
disabled={!url.trim() || !scrapeRobotName.trim() || outputFormats.length === 0 || isLoading}
|
||||||
|
sx={{
|
||||||
|
bgcolor: '#ff00c3',
|
||||||
|
py: 1.4,
|
||||||
|
fontSize: '1rem',
|
||||||
|
textTransform: 'none',
|
||||||
|
maxWidth: 700,
|
||||||
|
borderRadius: 2
|
||||||
|
}}
|
||||||
|
startIcon={isLoading ? <CircularProgress size={20} color="inherit" /> : null}
|
||||||
>
|
>
|
||||||
<CardContent
|
{isLoading
|
||||||
sx={{
|
? "Creating..."
|
||||||
display: "flex",
|
: `Create Robot`
|
||||||
flexDirection: "column",
|
}
|
||||||
alignItems: "center",
|
</Button>
|
||||||
justifyContent: "center", // center everything
|
</Box>
|
||||||
height: "100%",
|
</Card>
|
||||||
textAlign: "center",
|
</TabPanel>
|
||||||
p: 2,
|
|
||||||
color: (theme) =>
|
|
||||||
theme.palette.mode === 'light' ? 'rgba(0, 0, 0, 0.54)' : '',
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
<Article sx={{ fontSize: "32px", mb: 2 }} />
|
|
||||||
|
|
||||||
<Box sx={{ textAlign: "center" }}>
|
|
||||||
<Typography variant="body1" fontWeight="600" sx={{ lineHeight: 1.2 }}>
|
|
||||||
Documentation
|
|
||||||
</Typography>
|
|
||||||
<Typography variant="body2" color="text.secondary" sx={{ lineHeight: 1.4, mt: 1 }}>
|
|
||||||
Explore detailed guides
|
|
||||||
</Typography>
|
|
||||||
</Box>
|
|
||||||
</CardContent>
|
|
||||||
</Card>
|
|
||||||
</Grid>
|
|
||||||
</Grid>
|
|
||||||
</Box>
|
|
||||||
</Box>
|
</Box>
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -24,13 +24,9 @@ interface RobotMeta {
|
|||||||
pairs: number;
|
pairs: number;
|
||||||
updatedAt: string;
|
updatedAt: string;
|
||||||
params: any[];
|
params: any[];
|
||||||
type?: string;
|
type?: 'extract' | 'scrape';
|
||||||
description?: string;
|
|
||||||
usedByUsers?: number[];
|
|
||||||
subscriptionLevel?: number;
|
|
||||||
access?: string;
|
|
||||||
sample?: any[];
|
|
||||||
url?: string;
|
url?: string;
|
||||||
|
formats?: ('markdown' | 'html')[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RobotWorkflow {
|
interface RobotWorkflow {
|
||||||
|
|||||||
@@ -24,13 +24,9 @@ interface RobotMeta {
|
|||||||
pairs: number;
|
pairs: number;
|
||||||
updatedAt: string;
|
updatedAt: string;
|
||||||
params: any[];
|
params: any[];
|
||||||
type?: string;
|
type?: 'extract' | 'scrape';
|
||||||
description?: string;
|
|
||||||
usedByUsers?: number[];
|
|
||||||
subscriptionLevel?: number;
|
|
||||||
access?: string;
|
|
||||||
sample?: any[];
|
|
||||||
url?: string;
|
url?: string;
|
||||||
|
formats?: ('markdown' | 'html')[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RobotWorkflow {
|
interface RobotWorkflow {
|
||||||
@@ -795,11 +791,6 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
navigate(basePath);
|
navigate(basePath);
|
||||||
};
|
};
|
||||||
|
|
||||||
const lastPair =
|
|
||||||
robot?.recording.workflow[robot?.recording.workflow.length - 1];
|
|
||||||
const targetUrl = lastPair?.what.find((action) => action.action === "goto")
|
|
||||||
?.args?.[0];
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<RobotConfigPage
|
<RobotConfigPage
|
||||||
title={t("robot_edit.title")}
|
title={t("robot_edit.title")}
|
||||||
@@ -826,7 +817,7 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
<TextField
|
<TextField
|
||||||
label={t("robot_duplication.fields.target_url")}
|
label={t("robot_duplication.fields.target_url")}
|
||||||
key={t("robot_duplication.fields.target_url")}
|
key={t("robot_duplication.fields.target_url")}
|
||||||
value={targetUrl || ""}
|
value={getTargetUrl() || ""}
|
||||||
onChange={(e) => handleTargetUrlChange(e.target.value)}
|
onChange={(e) => handleTargetUrlChange(e.target.value)}
|
||||||
style={{ marginBottom: "20px" }}
|
style={{ marginBottom: "20px" }}
|
||||||
/>
|
/>
|
||||||
|
|||||||
@@ -128,6 +128,8 @@ export const RobotIntegrationPage = ({
|
|||||||
"googleSheets" | "airtable" | "webhook" | null
|
"googleSheets" | "airtable" | "webhook" | null
|
||||||
>(integrationType);
|
>(integrationType);
|
||||||
|
|
||||||
|
const isScrapeRobot = recording?.recording_meta?.type === "scrape";
|
||||||
|
|
||||||
const authenticateWithGoogle = () => {
|
const authenticateWithGoogle = () => {
|
||||||
if (!recordingId) {
|
if (!recordingId) {
|
||||||
console.error("Cannot authenticate: recordingId is null");
|
console.error("Cannot authenticate: recordingId is null");
|
||||||
@@ -729,26 +731,61 @@ export const RobotIntegrationPage = ({
|
|||||||
width: "100%",
|
width: "100%",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<Button variant="outlined" onClick={() => {
|
{!isScrapeRobot && (
|
||||||
if (!recordingId) return;
|
<Button
|
||||||
setSelectedIntegrationType("googleSheets");
|
variant="outlined"
|
||||||
setSettings({ ...settings, integrationType: "googleSheets" });
|
onClick={() => {
|
||||||
const basePath = robotPath === "prebuilt-robots" ? "/prebuilt-robots" : "/robots";
|
if (!recordingId) return;
|
||||||
navigate(`${basePath}/${recordingId}/integrate/googleSheets`);
|
setSelectedIntegrationType("googleSheets");
|
||||||
}} style={{ display: "flex", flexDirection: "column", alignItems: "center", background: 'white', color: '#ff00c3' }}>
|
setSettings({ ...settings, integrationType: "googleSheets" });
|
||||||
<img src="https://ik.imagekit.io/ys1blv5kv/gsheet.svg" alt="Google Sheets" style={{ margin: "6px" }} />
|
const basePath = robotPath === "prebuilt-robots" ? "/prebuilt-robots" : "/robots";
|
||||||
Google Sheets
|
navigate(`${basePath}/${recordingId}/integrate/googleSheets`);
|
||||||
</Button>
|
}}
|
||||||
<Button variant="outlined" onClick={() => {
|
style={{
|
||||||
if (!recordingId) return;
|
display: "flex",
|
||||||
setSelectedIntegrationType("airtable");
|
flexDirection: "column",
|
||||||
setSettings({ ...settings, integrationType: "airtable" });
|
alignItems: "center",
|
||||||
const basePath = robotPath === "prebuilt-robots" ? "/prebuilt-robots" : "/robots";
|
background: "white",
|
||||||
navigate(`${basePath}/${recordingId}/integrate/airtable`);
|
color: "#ff00c3",
|
||||||
}} style={{ display: "flex", flexDirection: "column", alignItems: "center", background: 'white', color: '#ff00c3' }}>
|
}}
|
||||||
<img src="https://ik.imagekit.io/ys1blv5kv/airtable.svg" alt="Airtable" style={{ margin: "6px" }} />
|
>
|
||||||
Airtable
|
<img
|
||||||
</Button>
|
src="https://ik.imagekit.io/ys1blv5kv/gsheet.svg"
|
||||||
|
alt="Google Sheets"
|
||||||
|
style={{ margin: "6px" }}
|
||||||
|
/>
|
||||||
|
Google Sheets
|
||||||
|
</Button>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{!isScrapeRobot && (
|
||||||
|
<Button
|
||||||
|
variant="outlined"
|
||||||
|
onClick={() => {
|
||||||
|
if (!recordingId) return;
|
||||||
|
setSelectedIntegrationType("airtable");
|
||||||
|
setSettings({ ...settings, integrationType: "airtable" });
|
||||||
|
const basePath = robotPath === "prebuilt-robots" ? "/prebuilt-robots" : "/robots";
|
||||||
|
navigate(`${basePath}/${recordingId}/integrate/airtable`);
|
||||||
|
}}
|
||||||
|
style={{
|
||||||
|
display: "flex",
|
||||||
|
flexDirection: "column",
|
||||||
|
alignItems: "center",
|
||||||
|
background: "white",
|
||||||
|
color: "#ff00c3",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<img
|
||||||
|
src="https://ik.imagekit.io/ys1blv5kv/airtable.svg"
|
||||||
|
alt="Airtable"
|
||||||
|
style={{ margin: "6px" }}
|
||||||
|
/>
|
||||||
|
Airtable
|
||||||
|
</Button>
|
||||||
|
)}
|
||||||
|
|
||||||
|
|
||||||
<Button variant="outlined" onClick={() => {
|
<Button variant="outlined" onClick={() => {
|
||||||
if (!recordingId) return;
|
if (!recordingId) return;
|
||||||
setSelectedIntegrationType("webhook");
|
setSelectedIntegrationType("webhook");
|
||||||
|
|||||||
@@ -16,7 +16,9 @@ interface RobotMeta {
|
|||||||
pairs: number;
|
pairs: number;
|
||||||
updatedAt: string;
|
updatedAt: string;
|
||||||
params: any[];
|
params: any[];
|
||||||
|
type?: 'extract' | 'scrape';
|
||||||
url?: string;
|
url?: string;
|
||||||
|
formats?: ('markdown' | 'html')[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RobotWorkflow {
|
interface RobotWorkflow {
|
||||||
|
|||||||
@@ -37,6 +37,8 @@ interface RunContentProps {
|
|||||||
export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRef, abortRunHandler }: RunContentProps) => {
|
export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRef, abortRunHandler }: RunContentProps) => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const [tab, setTab] = React.useState<string>('output');
|
const [tab, setTab] = React.useState<string>('output');
|
||||||
|
const [markdownContent, setMarkdownContent] = useState<string>('');
|
||||||
|
const [htmlContent, setHtmlContent] = useState<string>('');
|
||||||
|
|
||||||
const [schemaData, setSchemaData] = useState<any[]>([]);
|
const [schemaData, setSchemaData] = useState<any[]>([]);
|
||||||
const [schemaColumns, setSchemaColumns] = useState<string[]>([]);
|
const [schemaColumns, setSchemaColumns] = useState<string[]>([]);
|
||||||
@@ -63,6 +65,26 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
setTab(tab);
|
setTab(tab);
|
||||||
}, [interpretationInProgress]);
|
}, [interpretationInProgress]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
setMarkdownContent('');
|
||||||
|
setHtmlContent('');
|
||||||
|
|
||||||
|
if (row.serializableOutput?.markdown && Array.isArray(row.serializableOutput.markdown)) {
|
||||||
|
const markdownData = row.serializableOutput.markdown[0];
|
||||||
|
if (markdownData?.content) {
|
||||||
|
setMarkdownContent(markdownData.content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (row.serializableOutput?.html && Array.isArray(row.serializableOutput.html)) {
|
||||||
|
const htmlData = row.serializableOutput.html[0];
|
||||||
|
if (htmlData?.content) {
|
||||||
|
setHtmlContent(htmlData.content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, [row.serializableOutput]);
|
||||||
|
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (row.status === 'running' || row.status === 'queued' || row.status === 'scheduled') {
|
if (row.status === 'running' || row.status === 'queued' || row.status === 'scheduled') {
|
||||||
setSchemaData([]);
|
setSchemaData([]);
|
||||||
@@ -374,6 +396,22 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
}, 100);
|
}, 100);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const downloadMarkdown = (content: string, filename: string) => {
|
||||||
|
const blob = new Blob([content], { type: 'text/markdown;charset=utf-8;' });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
|
||||||
|
const link = document.createElement("a");
|
||||||
|
link.href = url;
|
||||||
|
link.setAttribute("download", filename);
|
||||||
|
document.body.appendChild(link);
|
||||||
|
link.click();
|
||||||
|
document.body.removeChild(link);
|
||||||
|
|
||||||
|
setTimeout(() => {
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
}, 100);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
const renderDataTable = (
|
const renderDataTable = (
|
||||||
data: any[],
|
data: any[],
|
||||||
@@ -636,11 +674,77 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
|
|
||||||
const hasData = schemaData.length > 0 || listData.length > 0 || legacyData.length > 0;
|
const hasData = schemaData.length > 0 || listData.length > 0 || legacyData.length > 0;
|
||||||
const hasScreenshots = row.binaryOutput && Object.keys(row.binaryOutput).length > 0;
|
const hasScreenshots = row.binaryOutput && Object.keys(row.binaryOutput).length > 0;
|
||||||
|
const hasMarkdown = markdownContent.length > 0;
|
||||||
|
const hasHTML = htmlContent.length > 0;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Box sx={{ width: '100%' }}>
|
<Box sx={{ width: '100%' }}>
|
||||||
<TabContext value={tab}>
|
<TabContext value={tab}>
|
||||||
<TabPanel value='output' sx={{ width: '100%', maxWidth: '900px' }}>
|
<TabPanel value='output' sx={{ width: '100%', maxWidth: '900px' }}>
|
||||||
|
{hasMarkdown || hasHTML ? (
|
||||||
|
<>
|
||||||
|
{hasMarkdown && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Typography variant='h6'>Markdown</Typography>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<Paper sx={{ p: 2, maxHeight: '500px', overflow: 'auto' }}>
|
||||||
|
<Typography component="pre" sx={{ whiteSpace: 'pre-wrap', fontFamily: 'monospace' }}>
|
||||||
|
{markdownContent}
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
|
||||||
|
<Box sx={{ mt: 2 }}>
|
||||||
|
<Button
|
||||||
|
onClick={() => downloadMarkdown(markdownContent, 'output.md')}
|
||||||
|
sx={{ color: '#FF00C3', textTransform: 'none' }}
|
||||||
|
>
|
||||||
|
Download
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{hasHTML && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Typography variant='h6'>HTML</Typography>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<Paper sx={{ p: 2, maxHeight: '500px', overflow: 'auto' }}>
|
||||||
|
<Typography
|
||||||
|
component="pre"
|
||||||
|
sx={{ whiteSpace: 'pre-wrap', fontFamily: 'monospace' }}
|
||||||
|
>
|
||||||
|
{htmlContent}
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
|
||||||
|
<Box sx={{ mt: 2 }}>
|
||||||
|
<Button
|
||||||
|
onClick={() => {
|
||||||
|
const blob = new Blob([htmlContent], { type: 'text/html;charset=utf-8;' });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
const link = document.createElement("a");
|
||||||
|
link.href = url;
|
||||||
|
link.download = "output.html";
|
||||||
|
link.click();
|
||||||
|
setTimeout(() => URL.revokeObjectURL(url), 100);
|
||||||
|
}}
|
||||||
|
sx={{ color: '#FF00C3', textTransform: 'none' }}
|
||||||
|
>
|
||||||
|
Download
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
// Extract robot output
|
||||||
|
<>
|
||||||
{row.status === 'running' || row.status === 'queued' ? (
|
{row.status === 'running' || row.status === 'queued' ? (
|
||||||
<>
|
<>
|
||||||
<Box sx={{ display: 'flex', alignItems: 'center', mb: 2 }}>
|
<Box sx={{ display: 'flex', alignItems: 'center', mb: 2 }}>
|
||||||
@@ -939,6 +1043,8 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
</AccordionDetails>
|
</AccordionDetails>
|
||||||
</Accordion>
|
</Accordion>
|
||||||
)}
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
</TabContext>
|
</TabContext>
|
||||||
</Box>
|
</Box>
|
||||||
|
|||||||
@@ -27,6 +27,9 @@ interface RobotMeta {
|
|||||||
pairs: number;
|
pairs: number;
|
||||||
updatedAt: string;
|
updatedAt: string;
|
||||||
params: any[];
|
params: any[];
|
||||||
|
type?: 'extract' | 'scrape';
|
||||||
|
url?: string;
|
||||||
|
formats?: ('markdown' | 'html')[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RobotWorkflow {
|
interface RobotWorkflow {
|
||||||
|
|||||||
Reference in New Issue
Block a user