feat: add server side crawl and search logic
This commit is contained in:
@@ -450,13 +450,35 @@ router.post("/sdk/robots/:id/execute", requireAPIKey, async (req: AuthenticatedR
|
||||
}
|
||||
}
|
||||
|
||||
let crawlData: any[] = [];
|
||||
if (run.serializableOutput?.crawl) {
|
||||
const crawl: any = run.serializableOutput.crawl;
|
||||
|
||||
if (Array.isArray(crawl)) {
|
||||
crawlData = crawl;
|
||||
}
|
||||
else if (typeof crawl === 'object') {
|
||||
const crawlValues = Object.values(crawl);
|
||||
if (crawlValues.length > 0 && Array.isArray(crawlValues[0])) {
|
||||
crawlData = crawlValues[0] as any[];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let searchData: any = {};
|
||||
if (run.serializableOutput?.search) {
|
||||
searchData = run.serializableOutput.search;
|
||||
}
|
||||
|
||||
return res.status(200).json({
|
||||
data: {
|
||||
runId: run.runId,
|
||||
status: run.status,
|
||||
data: {
|
||||
textData: run.serializableOutput?.scrapeSchema || {},
|
||||
listData: listData
|
||||
listData: listData,
|
||||
crawlData: crawlData,
|
||||
searchData: searchData
|
||||
},
|
||||
screenshots: Object.values(run.binaryOutput || {})
|
||||
}
|
||||
@@ -640,6 +662,202 @@ router.post("/sdk/robots/:id/runs/:runId/abort", requireAPIKey, async (req: Auth
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Create a crawl robot programmatically
|
||||
* POST /api/sdk/crawl
|
||||
*/
|
||||
router.post("/sdk/crawl", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
|
||||
try {
|
||||
const user = req.user;
|
||||
const { url, name, crawlConfig } = req.body;
|
||||
|
||||
if (!url || !crawlConfig) {
|
||||
return res.status(400).json({
|
||||
error: "URL and crawl configuration are required"
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
new URL(url);
|
||||
} catch (err) {
|
||||
return res.status(400).json({
|
||||
error: "Invalid URL format"
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof crawlConfig !== 'object') {
|
||||
return res.status(400).json({
|
||||
error: "crawlConfig must be an object"
|
||||
});
|
||||
}
|
||||
|
||||
const robotName = name || `Crawl Robot - ${new URL(url).hostname}`;
|
||||
const robotId = uuid();
|
||||
const metaId = uuid();
|
||||
|
||||
const robot = await Robot.create({
|
||||
id: robotId,
|
||||
userId: user.id,
|
||||
recording_meta: {
|
||||
name: robotName,
|
||||
id: metaId,
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
pairs: 1,
|
||||
params: [],
|
||||
type: 'crawl',
|
||||
url: url,
|
||||
},
|
||||
recording: {
|
||||
workflow: [
|
||||
{
|
||||
where: { url },
|
||||
what: [
|
||||
{ action: 'flag', args: ['generated'] },
|
||||
{
|
||||
action: 'crawl',
|
||||
args: [crawlConfig],
|
||||
name: 'Crawl'
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
where: { url: 'about:blank' },
|
||||
what: [
|
||||
{
|
||||
action: 'goto',
|
||||
args: [url]
|
||||
},
|
||||
{
|
||||
action: 'waitForLoadState',
|
||||
args: ['networkidle']
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
logger.info(`[SDK] Crawl robot created: ${metaId} (db: ${robotId}) by user ${user.id}`);
|
||||
|
||||
capture("maxun-oss-robot-created", {
|
||||
userId: user.id.toString(),
|
||||
robotId: metaId,
|
||||
robotName: robotName,
|
||||
url: url,
|
||||
robotType: 'crawl',
|
||||
crawlConfig: crawlConfig,
|
||||
source: 'sdk'
|
||||
});
|
||||
|
||||
return res.status(201).json({
|
||||
data: robot,
|
||||
message: "Crawl robot created successfully"
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error("[SDK] Error creating crawl robot:", error);
|
||||
return res.status(500).json({
|
||||
error: "Failed to create crawl robot",
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Create a search robot programmatically
|
||||
* POST /api/sdk/search
|
||||
*/
|
||||
router.post("/sdk/search", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
|
||||
try {
|
||||
const user = req.user;
|
||||
const { name, searchConfig } = req.body;
|
||||
|
||||
if (!searchConfig) {
|
||||
return res.status(400).json({
|
||||
error: "Search configuration is required"
|
||||
});
|
||||
}
|
||||
|
||||
if (!searchConfig.query) {
|
||||
return res.status(400).json({
|
||||
error: "searchConfig must include a query"
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof searchConfig !== 'object') {
|
||||
return res.status(400).json({
|
||||
error: "searchConfig must be an object"
|
||||
});
|
||||
}
|
||||
|
||||
if (searchConfig.mode && !['discover', 'scrape'].includes(searchConfig.mode)) {
|
||||
return res.status(400).json({
|
||||
error: "searchConfig.mode must be either 'discover' or 'scrape'"
|
||||
});
|
||||
}
|
||||
|
||||
searchConfig.provider = 'duckduckgo';
|
||||
|
||||
const robotName = name || `Search Robot - ${searchConfig.query}`;
|
||||
const robotId = uuid();
|
||||
const metaId = uuid();
|
||||
|
||||
const robot = await Robot.create({
|
||||
id: robotId,
|
||||
userId: user.id,
|
||||
recording_meta: {
|
||||
name: robotName,
|
||||
id: metaId,
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
pairs: 1,
|
||||
params: [],
|
||||
type: 'search',
|
||||
},
|
||||
recording: {
|
||||
workflow: [
|
||||
{
|
||||
where: { url: 'about:blank' },
|
||||
what: [
|
||||
{
|
||||
action: 'search',
|
||||
args: [searchConfig],
|
||||
name: 'Search'
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
logger.info(`[SDK] Search robot created: ${metaId} (db: ${robotId}) by user ${user.id}`);
|
||||
|
||||
capture("maxun-oss-robot-created", {
|
||||
userId: user.id.toString(),
|
||||
robotId: metaId,
|
||||
robotName: robotName,
|
||||
robotType: 'search',
|
||||
searchQuery: searchConfig.query,
|
||||
searchProvider: searchConfig.provider || 'duckduckgo',
|
||||
searchLimit: searchConfig.limit || 10,
|
||||
source: 'sdk'
|
||||
});
|
||||
|
||||
return res.status(201).json({
|
||||
data: robot,
|
||||
message: "Search robot created successfully"
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error("[SDK] Error creating search robot:", error);
|
||||
return res.status(500).json({
|
||||
error: "Failed to create search robot",
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* LLM-based extraction - generate workflow from natural language prompt
|
||||
* POST /api/sdk/extract/llm
|
||||
|
||||
Reference in New Issue
Block a user