feat: add crawl and search ui
This commit is contained in:
@@ -17,12 +17,14 @@ import {
|
||||
FormControl,
|
||||
Select,
|
||||
MenuItem,
|
||||
InputLabel
|
||||
InputLabel,
|
||||
Collapse,
|
||||
FormControlLabel
|
||||
} from '@mui/material';
|
||||
import { ArrowBack, AutoAwesome, HighlightAlt } from '@mui/icons-material';
|
||||
import { useGlobalInfoStore, useCacheInvalidation } from '../../../context/globalInfo';
|
||||
import { canCreateBrowserInState, getActiveBrowserId, stopRecording } from '../../../api/recording';
|
||||
import { createScrapeRobot, createLLMRobot, createAndRunRecording } from "../../../api/storage";
|
||||
import { createScrapeRobot, createLLMRobot, createAndRunRecording, createCrawlRobot, createSearchRobot } from "../../../api/storage";
|
||||
import { AuthContext } from '../../../context/auth';
|
||||
import { GenericModal } from '../../ui/GenericModal';
|
||||
|
||||
@@ -72,6 +74,25 @@ const RobotCreate: React.FC = () => {
|
||||
const [llmBaseUrl, setLlmBaseUrl] = useState('');
|
||||
const [aiRobotName, setAiRobotName] = useState('');
|
||||
|
||||
const [crawlRobotName, setCrawlRobotName] = useState('');
|
||||
const [crawlUrl, setCrawlUrl] = useState('');
|
||||
const [crawlMode, setCrawlMode] = useState<'domain' | 'subdomain' | 'path'>('domain');
|
||||
const [crawlLimit, setCrawlLimit] = useState(50);
|
||||
const [crawlMaxDepth, setCrawlMaxDepth] = useState(3);
|
||||
const [crawlIncludePaths, setCrawlIncludePaths] = useState<string>('');
|
||||
const [crawlExcludePaths, setCrawlExcludePaths] = useState<string>('');
|
||||
const [crawlUseSitemap, setCrawlUseSitemap] = useState(true);
|
||||
const [crawlFollowLinks, setCrawlFollowLinks] = useState(true);
|
||||
const [crawlRespectRobots, setCrawlRespectRobots] = useState(true);
|
||||
const [showCrawlAdvanced, setShowCrawlAdvanced] = useState(false);
|
||||
|
||||
const [searchRobotName, setSearchRobotName] = useState('');
|
||||
const [searchQuery, setSearchQuery] = useState('');
|
||||
const [searchLimit, setSearchLimit] = useState(10);
|
||||
const [searchProvider] = useState<'duckduckgo'>('duckduckgo');
|
||||
const [searchMode, setSearchMode] = useState<'discover' | 'scrape'>('discover');
|
||||
const [searchTimeRange, setSearchTimeRange] = useState<'day' | 'week' | 'month' | 'year' | ''>('');
|
||||
|
||||
const { state } = React.useContext(AuthContext);
|
||||
const { user } = state;
|
||||
const { addOptimisticRobot, removeOptimisticRobot, invalidateRecordings, invalidateRuns, addOptimisticRun } = useCacheInvalidation();
|
||||
@@ -155,6 +176,76 @@ const RobotCreate: React.FC = () => {
|
||||
navigate('/robots');
|
||||
};
|
||||
|
||||
const handleCreateCrawlRobot = async () => {
|
||||
if (!crawlUrl.trim()) {
|
||||
notify('error', 'Please enter a valid URL');
|
||||
return;
|
||||
}
|
||||
if (!crawlRobotName.trim()) {
|
||||
notify('error', 'Please enter a robot name');
|
||||
return;
|
||||
}
|
||||
|
||||
setIsLoading(true);
|
||||
const result = await createCrawlRobot(
|
||||
crawlUrl,
|
||||
crawlRobotName,
|
||||
{
|
||||
mode: crawlMode,
|
||||
limit: crawlLimit,
|
||||
maxDepth: crawlMaxDepth,
|
||||
includePaths: crawlIncludePaths ? crawlIncludePaths.split(',').map(p => p.trim()) : [],
|
||||
excludePaths: crawlExcludePaths ? crawlExcludePaths.split(',').map(p => p.trim()) : [],
|
||||
useSitemap: crawlUseSitemap,
|
||||
followLinks: crawlFollowLinks,
|
||||
respectRobots: crawlRespectRobots
|
||||
}
|
||||
);
|
||||
setIsLoading(false);
|
||||
|
||||
if (result) {
|
||||
invalidateRecordings();
|
||||
notify('success', `${crawlRobotName} created successfully!`);
|
||||
navigate('/robots');
|
||||
} else {
|
||||
notify('error', 'Failed to create crawl robot');
|
||||
}
|
||||
};
|
||||
|
||||
const handleCreateSearchRobot = async () => {
|
||||
if (!searchQuery.trim()) {
|
||||
notify('error', 'Please enter a search query');
|
||||
return;
|
||||
}
|
||||
if (!searchRobotName.trim()) {
|
||||
notify('error', 'Please enter a robot name');
|
||||
return;
|
||||
}
|
||||
|
||||
setIsLoading(true);
|
||||
const result = await createSearchRobot(
|
||||
searchRobotName,
|
||||
{
|
||||
query: searchQuery,
|
||||
limit: searchLimit,
|
||||
provider: searchProvider,
|
||||
filters: {
|
||||
timeRange: searchTimeRange ? searchTimeRange as 'day' | 'week' | 'month' | 'year' : undefined
|
||||
},
|
||||
mode: searchMode
|
||||
}
|
||||
);
|
||||
setIsLoading(false);
|
||||
|
||||
if (result) {
|
||||
invalidateRecordings();
|
||||
notify('success', `${searchRobotName} created successfully!`);
|
||||
navigate('/robots');
|
||||
} else {
|
||||
notify('error', 'Failed to create search robot');
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Container maxWidth="md" sx={{ py: 4 }}>
|
||||
<Box>
|
||||
@@ -210,6 +301,8 @@ const RobotCreate: React.FC = () => {
|
||||
>
|
||||
<Tab label="Extract" id="extract-robot" aria-controls="extract-robot" />
|
||||
<Tab label="Scrape" id="scrape-robot" aria-controls="scrape-robot" />
|
||||
<Tab label="Crawl" id="crawl-robot" aria-controls="crawl-robot" />
|
||||
<Tab label="Search" id="search-robot" aria-controls="search-robot" />
|
||||
</Tabs>
|
||||
</Box>
|
||||
|
||||
@@ -697,6 +790,262 @@ const RobotCreate: React.FC = () => {
|
||||
</Box>
|
||||
</Card>
|
||||
</TabPanel>
|
||||
|
||||
<TabPanel value={tabValue} index={2}>
|
||||
<Card sx={{ mb: 4, p: 4, textAlign: 'center' }}>
|
||||
<Box display="flex" flexDirection="column" alignItems="center">
|
||||
<img
|
||||
src="https://ik.imagekit.io/ys1blv5kv/maxunlogo.png"
|
||||
width={73}
|
||||
height={65}
|
||||
style={{
|
||||
borderRadius: '5px',
|
||||
marginBottom: '30px'
|
||||
}}
|
||||
alt="Maxun Logo"
|
||||
/>
|
||||
|
||||
<Typography variant="body2" color="text.secondary" mb={3}>
|
||||
Crawl entire websites and gather data from multiple pages automatically.
|
||||
</Typography>
|
||||
|
||||
<Box sx={{ width: '100%', maxWidth: 700, mb: 2 }}>
|
||||
<TextField
|
||||
label="Robot Name"
|
||||
placeholder="Example: YC Companies Crawler"
|
||||
fullWidth
|
||||
value={crawlRobotName}
|
||||
onChange={(e) => setCrawlRobotName(e.target.value)}
|
||||
sx={{ mb: 2 }}
|
||||
/>
|
||||
<TextField
|
||||
label="Starting URL"
|
||||
placeholder="https://www.ycombinator.com/companies"
|
||||
fullWidth
|
||||
value={crawlUrl}
|
||||
onChange={(e) => setCrawlUrl(e.target.value)}
|
||||
sx={{ mb: 2 }}
|
||||
/>
|
||||
|
||||
<TextField
|
||||
label="Max Pages to Crawl"
|
||||
type="number"
|
||||
fullWidth
|
||||
value={crawlLimit}
|
||||
onChange={(e) => setCrawlLimit(parseInt(e.target.value) || 10)}
|
||||
sx={{ mb: 2 }}
|
||||
/>
|
||||
|
||||
<Box sx={{ width: '100%', display: 'flex', justifyContent: 'flex-start', mb: 2 }}>
|
||||
<Button
|
||||
onClick={() => setShowCrawlAdvanced(!showCrawlAdvanced)}
|
||||
sx={{
|
||||
textTransform: 'none',
|
||||
color: '#ff00c3',
|
||||
}}
|
||||
>
|
||||
{showCrawlAdvanced ? 'Hide Advanced Options' : 'Advanced Options'}
|
||||
</Button>
|
||||
</Box>
|
||||
|
||||
<Collapse in={showCrawlAdvanced}>
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||
<InputLabel>Crawl Scope</InputLabel>
|
||||
<Select
|
||||
value={crawlMode}
|
||||
label="Crawl Scope"
|
||||
onChange={(e) => setCrawlMode(e.target.value as any)}
|
||||
>
|
||||
<MenuItem value="domain">Same Domain Only</MenuItem>
|
||||
<MenuItem value="subdomain">Include Subdomains</MenuItem>
|
||||
<MenuItem value="path">Specific Path Only</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
|
||||
<TextField
|
||||
label="Max Depth"
|
||||
type="number"
|
||||
fullWidth
|
||||
value={crawlMaxDepth}
|
||||
onChange={(e) => setCrawlMaxDepth(parseInt(e.target.value) || 3)}
|
||||
sx={{ mb: 2 }}
|
||||
helperText="How many links deep to follow (default: 3)"
|
||||
FormHelperTextProps={{ sx: { ml: 0 } }}
|
||||
/>
|
||||
|
||||
<TextField
|
||||
label="Include Paths"
|
||||
placeholder="Example: /products, /blog"
|
||||
fullWidth
|
||||
value={crawlIncludePaths}
|
||||
onChange={(e) => setCrawlIncludePaths(e.target.value)}
|
||||
sx={{ mb: 2 }}
|
||||
helperText="Only crawl URLs matching these paths (comma-separated)"
|
||||
FormHelperTextProps={{ sx: { ml: 0 } }}
|
||||
/>
|
||||
|
||||
<TextField
|
||||
label="Exclude Paths"
|
||||
placeholder="Example: /admin, /login"
|
||||
fullWidth
|
||||
value={crawlExcludePaths}
|
||||
onChange={(e) => setCrawlExcludePaths(e.target.value)}
|
||||
sx={{ mb: 2 }}
|
||||
helperText="Skip URLs matching these paths (comma-separated)"
|
||||
FormHelperTextProps={{ sx: { ml: 0 } }}
|
||||
/>
|
||||
|
||||
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 1 }}>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={crawlUseSitemap}
|
||||
onChange={(e) => setCrawlUseSitemap(e.target.checked)}
|
||||
/>
|
||||
}
|
||||
label="Use sitemap.xml for URL discovery"
|
||||
/>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={crawlFollowLinks}
|
||||
onChange={(e) => setCrawlFollowLinks(e.target.checked)}
|
||||
/>
|
||||
}
|
||||
label="Follow links on pages"
|
||||
/>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={crawlRespectRobots}
|
||||
onChange={(e) => setCrawlRespectRobots(e.target.checked)}
|
||||
/>
|
||||
}
|
||||
label="Respect robots.txt"
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
</Collapse>
|
||||
</Box>
|
||||
|
||||
<Button
|
||||
variant="contained"
|
||||
fullWidth
|
||||
onClick={handleCreateCrawlRobot}
|
||||
disabled={!crawlUrl.trim() || !crawlRobotName.trim() || isLoading}
|
||||
sx={{
|
||||
bgcolor: '#ff00c3',
|
||||
py: 1.4,
|
||||
fontSize: '1rem',
|
||||
textTransform: 'none',
|
||||
maxWidth: 700,
|
||||
borderRadius: 2
|
||||
}}
|
||||
startIcon={isLoading ? <CircularProgress size={20} color="inherit" /> : null}
|
||||
>
|
||||
{isLoading ? 'Creating...' : 'Create Robot'}
|
||||
</Button>
|
||||
</Box>
|
||||
</Card>
|
||||
</TabPanel>
|
||||
|
||||
<TabPanel value={tabValue} index={3}>
|
||||
<Card sx={{ mb: 4, p: 4, textAlign: 'center' }}>
|
||||
<Box display="flex" flexDirection="column" alignItems="center">
|
||||
<img
|
||||
src="https://ik.imagekit.io/ys1blv5kv/maxunlogo.png"
|
||||
width={73}
|
||||
height={65}
|
||||
style={{
|
||||
borderRadius: '5px',
|
||||
marginBottom: '30px'
|
||||
}}
|
||||
alt="Maxun Logo"
|
||||
/>
|
||||
|
||||
<Typography variant="body2" color="text.secondary" mb={3}>
|
||||
Search the web and gather data from relevant results.
|
||||
</Typography>
|
||||
|
||||
<Box sx={{ width: '100%', maxWidth: 700, mb: 2 }}>
|
||||
<TextField
|
||||
label="Robot Name"
|
||||
placeholder="Example: AI News Monitor"
|
||||
fullWidth
|
||||
value={searchRobotName}
|
||||
onChange={(e) => setSearchRobotName(e.target.value)}
|
||||
sx={{ mb: 2 }}
|
||||
/>
|
||||
|
||||
<TextField
|
||||
label="Search Query"
|
||||
placeholder="Example: latest AI breakthroughs 2025"
|
||||
fullWidth
|
||||
value={searchQuery}
|
||||
onChange={(e) => setSearchQuery(e.target.value)}
|
||||
sx={{ mb: 2 }}
|
||||
/>
|
||||
|
||||
<TextField
|
||||
label="Number of Results"
|
||||
type="number"
|
||||
fullWidth
|
||||
value={searchLimit}
|
||||
onChange={(e) => setSearchLimit(parseInt(e.target.value) || 10)}
|
||||
sx={{ mb: 2 }}
|
||||
/>
|
||||
|
||||
<Box sx={{ display: 'flex', gap: 2 }}>
|
||||
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||
<InputLabel>Mode</InputLabel>
|
||||
<Select
|
||||
value={searchMode}
|
||||
label="Mode"
|
||||
onChange={(e) => setSearchMode(e.target.value as any)}
|
||||
>
|
||||
<MenuItem value="discover">Discover URLs Only</MenuItem>
|
||||
<MenuItem value="scrape">Extract Data from Results</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
|
||||
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||
<InputLabel>Time Range</InputLabel>
|
||||
<Select
|
||||
value={searchTimeRange}
|
||||
label="Time Range"
|
||||
onChange={(e) => setSearchTimeRange(e.target.value as 'day' | 'week' | 'month' | 'year' | '')}
|
||||
>
|
||||
<MenuItem value="">No Filter</MenuItem>
|
||||
<MenuItem value="day">Past 24 Hours</MenuItem>
|
||||
<MenuItem value="week">Past Week</MenuItem>
|
||||
<MenuItem value="month">Past Month</MenuItem>
|
||||
<MenuItem value="year">Past Year</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
<Button
|
||||
variant="contained"
|
||||
fullWidth
|
||||
onClick={handleCreateSearchRobot}
|
||||
disabled={!searchQuery.trim() || !searchRobotName.trim() || isLoading}
|
||||
sx={{
|
||||
bgcolor: '#ff00c3',
|
||||
py: 1.4,
|
||||
fontSize: '1rem',
|
||||
textTransform: 'none',
|
||||
maxWidth: 700,
|
||||
borderRadius: 2
|
||||
}}
|
||||
startIcon={isLoading ? <CircularProgress size={20} color="inherit" /> : null}
|
||||
>
|
||||
{isLoading ? 'Creating...' : 'Create Robot'}
|
||||
</Button>
|
||||
</Box>
|
||||
</Card>
|
||||
</TabPanel>
|
||||
</Box>
|
||||
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ interface RobotMeta {
|
||||
pairs: number;
|
||||
updatedAt: string;
|
||||
params: any[];
|
||||
type?: 'extract' | 'scrape';
|
||||
type?: 'extract' | 'scrape' | 'crawl' | 'search';
|
||||
url?: string;
|
||||
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||
isLLM?: boolean;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useState, useEffect } from "react";
|
||||
import { useState, useEffect } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import {
|
||||
TextField,
|
||||
@@ -7,7 +7,13 @@ import {
|
||||
Button,
|
||||
IconButton,
|
||||
InputAdornment,
|
||||
Divider,
|
||||
FormControl,
|
||||
InputLabel,
|
||||
Select,
|
||||
MenuItem,
|
||||
FormControlLabel,
|
||||
Checkbox,
|
||||
Collapse
|
||||
} from "@mui/material";
|
||||
import { Visibility, VisibilityOff } from "@mui/icons-material";
|
||||
import { useGlobalInfoStore } from "../../../context/globalInfo";
|
||||
@@ -24,7 +30,7 @@ interface RobotMeta {
|
||||
pairs: number;
|
||||
updatedAt: string;
|
||||
params: any[];
|
||||
type?: 'extract' | 'scrape';
|
||||
type?: 'extract' | 'scrape' | 'crawl' | 'search';
|
||||
url?: string;
|
||||
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||
isLLM?: boolean;
|
||||
@@ -97,6 +103,25 @@ interface ScrapeListLimit {
|
||||
currentLimit: number;
|
||||
}
|
||||
|
||||
interface CrawlConfig {
|
||||
mode?: string;
|
||||
limit?: number;
|
||||
maxDepth?: number;
|
||||
useSitemap?: boolean;
|
||||
followLinks?: boolean;
|
||||
excludePaths?: string[];
|
||||
includePaths?: string[];
|
||||
respectRobots?: boolean;
|
||||
}
|
||||
|
||||
interface SearchConfig {
|
||||
mode?: 'discover' | 'scrape';
|
||||
limit?: number;
|
||||
query?: string;
|
||||
filters?: Record<string, any>;
|
||||
provider?: string;
|
||||
}
|
||||
|
||||
export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
const { t } = useTranslation();
|
||||
const navigate = useNavigate();
|
||||
@@ -115,6 +140,9 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
[]
|
||||
);
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [crawlConfig, setCrawlConfig] = useState<CrawlConfig>({});
|
||||
const [searchConfig, setSearchConfig] = useState<SearchConfig>({});
|
||||
const [showCrawlAdvanced, setShowCrawlAdvanced] = useState(false);
|
||||
|
||||
const isEmailPattern = (value: string): boolean => {
|
||||
return value.includes("@");
|
||||
@@ -163,6 +191,8 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
setCredentialGroups(groupCredentialsByType(extractedCredentials));
|
||||
|
||||
findScrapeListLimits(robot.recording.workflow);
|
||||
extractCrawlConfig(robot.recording.workflow);
|
||||
extractSearchConfig(robot.recording.workflow);
|
||||
}
|
||||
}, [robot]);
|
||||
|
||||
@@ -195,6 +225,36 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
setScrapeListLimits(limits);
|
||||
};
|
||||
|
||||
const extractCrawlConfig = (workflow: WhereWhatPair[]) => {
|
||||
workflow.forEach((pair) => {
|
||||
if (!pair.what) return;
|
||||
|
||||
pair.what.forEach((action: any) => {
|
||||
if (action.action === "crawl" && action.args && action.args.length > 0) {
|
||||
const config = action.args[0];
|
||||
if (config && typeof config === "object") {
|
||||
setCrawlConfig(config as CrawlConfig);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
const extractSearchConfig = (workflow: WhereWhatPair[]) => {
|
||||
workflow.forEach((pair) => {
|
||||
if (!pair.what) return;
|
||||
|
||||
pair.what.forEach((action: any) => {
|
||||
if (action.action === "search" && action.args && action.args.length > 0) {
|
||||
const config = action.args[0];
|
||||
if (config && typeof config === "object") {
|
||||
setSearchConfig(config as SearchConfig);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
function extractInitialCredentials(workflow: any[]): Credentials {
|
||||
const credentials: Credentials = {};
|
||||
|
||||
@@ -475,19 +535,17 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
<>
|
||||
{renderCredentialFields(
|
||||
credentialGroups.usernames,
|
||||
t("Username"),
|
||||
"text"
|
||||
t("Username")
|
||||
)}
|
||||
|
||||
{renderCredentialFields(credentialGroups.emails, t("Email"), "text")}
|
||||
{renderCredentialFields(credentialGroups.emails, t("Email"))}
|
||||
|
||||
{renderCredentialFields(
|
||||
credentialGroups.passwords,
|
||||
t("Password"),
|
||||
"password"
|
||||
t("Password")
|
||||
)}
|
||||
|
||||
{renderCredentialFields(credentialGroups.others, t("Other"), "text")}
|
||||
{renderCredentialFields(credentialGroups.others, t("Other"))}
|
||||
</>
|
||||
);
|
||||
};
|
||||
@@ -502,7 +560,6 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
</Typography>
|
||||
|
||||
{scrapeListLimits.map((limitInfo, index) => {
|
||||
// Get the corresponding scrapeList action to extract its name
|
||||
const scrapeListAction = robot?.recording?.workflow?.[limitInfo.pairIndex]?.what?.[limitInfo.actionIndex];
|
||||
const actionName =
|
||||
scrapeListAction?.name ||
|
||||
@@ -542,7 +599,6 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
const screenshotInputs: JSX.Element[] = [];
|
||||
const listInputs: JSX.Element[] = [];
|
||||
|
||||
let textCount = 0;
|
||||
let screenshotCount = 0;
|
||||
let listCount = 0;
|
||||
|
||||
@@ -683,7 +739,6 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
const renderCredentialFields = (
|
||||
selectors: string[],
|
||||
headerText: string,
|
||||
defaultType: "text" | "password" = "text"
|
||||
) => {
|
||||
if (selectors.length === 0) return null;
|
||||
|
||||
@@ -737,6 +792,193 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
return url;
|
||||
};
|
||||
|
||||
const renderCrawlConfigFields = () => {
|
||||
if (robot?.recording_meta.type !== 'crawl') return null;
|
||||
|
||||
return (
|
||||
<>
|
||||
<TextField
|
||||
label="Max Pages to Crawl"
|
||||
type="number"
|
||||
fullWidth
|
||||
value={crawlConfig.limit || 10}
|
||||
onChange={(e) => {
|
||||
const value = parseInt(e.target.value, 10);
|
||||
if (value >= 1) {
|
||||
setCrawlConfig((prev) => ({ ...prev, limit: value }));
|
||||
}
|
||||
}}
|
||||
inputProps={{ min: 1 }}
|
||||
style={{ marginBottom: "20px" }}
|
||||
/>
|
||||
|
||||
<Button
|
||||
onClick={() => setShowCrawlAdvanced(!showCrawlAdvanced)}
|
||||
sx={{
|
||||
mb: 2,
|
||||
textTransform: 'none',
|
||||
color: '#ff00c3'
|
||||
}}
|
||||
>
|
||||
{showCrawlAdvanced ? 'Hide Advanced Options' : 'Advanced Options'}
|
||||
</Button>
|
||||
|
||||
<Collapse in={showCrawlAdvanced}>
|
||||
<Box sx={{ mb: 2 }}>
|
||||
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||
<InputLabel>Crawl Scope</InputLabel>
|
||||
<Select
|
||||
value={crawlConfig.mode || 'domain'}
|
||||
label="Crawl Scope"
|
||||
onChange={(e) => setCrawlConfig((prev) => ({ ...prev, mode: e.target.value }))}
|
||||
>
|
||||
<MenuItem value="domain">Same Domain Only</MenuItem>
|
||||
<MenuItem value="subdomain">Include Subdomains</MenuItem>
|
||||
<MenuItem value="path">Specific Path Only</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
|
||||
<TextField
|
||||
label="Max Depth"
|
||||
type="number"
|
||||
fullWidth
|
||||
value={crawlConfig.maxDepth || 3}
|
||||
onChange={(e) => {
|
||||
const value = parseInt(e.target.value, 10);
|
||||
if (value >= 1) {
|
||||
setCrawlConfig((prev) => ({ ...prev, maxDepth: value }));
|
||||
}
|
||||
}}
|
||||
inputProps={{ min: 1 }}
|
||||
sx={{ mb: 2 }}
|
||||
helperText="How many links deep to follow (default: 3)"
|
||||
/>
|
||||
|
||||
<TextField
|
||||
label="Include Paths"
|
||||
placeholder="Example: /products, /blog"
|
||||
fullWidth
|
||||
value={crawlConfig.includePaths?.join(', ') || ''}
|
||||
onChange={(e) => {
|
||||
const paths = e.target.value ? e.target.value.split(',').map(p => p.trim()) : [];
|
||||
setCrawlConfig((prev) => ({ ...prev, includePaths: paths }));
|
||||
}}
|
||||
sx={{ mb: 2 }}
|
||||
helperText="Only crawl URLs matching these paths (comma-separated)"
|
||||
/>
|
||||
|
||||
<TextField
|
||||
label="Exclude Paths"
|
||||
placeholder="Example: /admin, /login"
|
||||
fullWidth
|
||||
value={crawlConfig.excludePaths?.join(', ') || ''}
|
||||
onChange={(e) => {
|
||||
const paths = e.target.value ? e.target.value.split(',').map(p => p.trim()) : [];
|
||||
setCrawlConfig((prev) => ({ ...prev, excludePaths: paths }));
|
||||
}}
|
||||
sx={{ mb: 2 }}
|
||||
helperText="Skip URLs matching these paths (comma-separated)"
|
||||
/>
|
||||
|
||||
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 1 }}>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={crawlConfig.useSitemap ?? true}
|
||||
onChange={(e) => setCrawlConfig((prev) => ({ ...prev, useSitemap: e.target.checked }))}
|
||||
/>
|
||||
}
|
||||
label="Use sitemap.xml for URL discovery"
|
||||
/>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={crawlConfig.followLinks ?? true}
|
||||
onChange={(e) => setCrawlConfig((prev) => ({ ...prev, followLinks: e.target.checked }))}
|
||||
/>
|
||||
}
|
||||
label="Follow links on pages"
|
||||
/>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={crawlConfig.respectRobots ?? true}
|
||||
onChange={(e) => setCrawlConfig((prev) => ({ ...prev, respectRobots: e.target.checked }))}
|
||||
/>
|
||||
}
|
||||
label="Respect robots.txt"
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
</Collapse>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
const renderSearchConfigFields = () => {
|
||||
if (robot?.recording_meta.type !== 'search') return null;
|
||||
|
||||
return (
|
||||
<>
|
||||
<TextField
|
||||
label="Search Query"
|
||||
placeholder="Example: latest AI breakthroughs 2025"
|
||||
fullWidth
|
||||
value={searchConfig.query || ''}
|
||||
onChange={(e) => {
|
||||
setSearchConfig((prev) => ({ ...prev, query: e.target.value }));
|
||||
}}
|
||||
sx={{ mb: 2 }}
|
||||
/>
|
||||
|
||||
<TextField
|
||||
label="Number of Results"
|
||||
type="number"
|
||||
fullWidth
|
||||
value={searchConfig.limit || 10}
|
||||
onChange={(e) => {
|
||||
const value = parseInt(e.target.value, 10);
|
||||
if (value >= 1) {
|
||||
setSearchConfig((prev) => ({ ...prev, limit: value }));
|
||||
}
|
||||
}}
|
||||
inputProps={{ min: 1 }}
|
||||
sx={{ mb: 2 }}
|
||||
/>
|
||||
|
||||
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||
<InputLabel>Mode</InputLabel>
|
||||
<Select
|
||||
value={searchConfig.mode || 'discover'}
|
||||
label="Mode"
|
||||
onChange={(e) => setSearchConfig((prev) => ({ ...prev, mode: e.target.value as 'discover' | 'scrape' }))}
|
||||
>
|
||||
<MenuItem value="discover">Discover URLs Only</MenuItem>
|
||||
<MenuItem value="scrape">Extract Data from Results</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
|
||||
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||
<InputLabel>Time Range</InputLabel>
|
||||
<Select
|
||||
value={searchConfig.filters?.timeRange || ''}
|
||||
label="Time Range"
|
||||
onChange={(e) => setSearchConfig((prev) => ({
|
||||
...prev,
|
||||
filters: { ...prev.filters, timeRange: e.target.value as '' | 'day' | 'week' | 'month' | 'year' || undefined }
|
||||
}))}
|
||||
>
|
||||
<MenuItem value="">No Filter</MenuItem>
|
||||
<MenuItem value="day">Past 24 Hours</MenuItem>
|
||||
<MenuItem value="week">Past Week</MenuItem>
|
||||
<MenuItem value="month">Past Month</MenuItem>
|
||||
<MenuItem value="year">Past Year</MenuItem>
|
||||
</Select>
|
||||
</FormControl>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
const handleSave = async () => {
|
||||
if (!robot) return;
|
||||
|
||||
@@ -757,6 +999,48 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
|
||||
const targetUrl = getTargetUrl();
|
||||
|
||||
let updatedWorkflow = robot.recording.workflow;
|
||||
if (robot.recording_meta.type === 'crawl') {
|
||||
updatedWorkflow = updatedWorkflow.map((pair: any) => {
|
||||
if (!pair.what) return pair;
|
||||
|
||||
return {
|
||||
...pair,
|
||||
what: pair.what.map((action: any) => {
|
||||
if (action.action === 'crawl') {
|
||||
return {
|
||||
...action,
|
||||
args: [{ ...crawlConfig }]
|
||||
};
|
||||
}
|
||||
return action;
|
||||
})
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
if (robot.recording_meta.type === 'search') {
|
||||
updatedWorkflow = updatedWorkflow.map((pair: any) => {
|
||||
if (!pair.what) return pair;
|
||||
|
||||
return {
|
||||
...pair,
|
||||
what: pair.what.map((action: any) => {
|
||||
if (action.action === 'search') {
|
||||
return {
|
||||
...action,
|
||||
args: [{
|
||||
...searchConfig,
|
||||
provider: 'duckduckgo'
|
||||
}]
|
||||
};
|
||||
}
|
||||
return action;
|
||||
})
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
const payload: any = {
|
||||
name: robot.recording_meta.name,
|
||||
limits: scrapeListLimits.map((limit) => ({
|
||||
@@ -767,7 +1051,6 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
})),
|
||||
credentials: credentialsForPayload,
|
||||
targetUrl: targetUrl,
|
||||
// send the (possibly edited) workflow so backend can persist action name changes
|
||||
workflow: robot.recording.workflow,
|
||||
};
|
||||
|
||||
@@ -825,19 +1108,12 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
onChange={(e) => handleTargetUrlChange(e.target.value)}
|
||||
style={{ marginBottom: "20px" }}
|
||||
/>
|
||||
{renderScrapeListLimitFields() && (
|
||||
<>
|
||||
<Divider />
|
||||
{renderScrapeListLimitFields()}
|
||||
</>
|
||||
)}
|
||||
|
||||
{renderCrawlConfigFields()}
|
||||
{renderSearchConfigFields()}
|
||||
|
||||
{renderActionNameFields() && (
|
||||
<>
|
||||
<Divider />
|
||||
{renderActionNameFields()}
|
||||
</>
|
||||
)}
|
||||
{renderScrapeListLimitFields()}
|
||||
{renderActionNameFields()}
|
||||
</>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import React, { useState, useEffect } from "react";
|
||||
import { useState, useEffect } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { TextField, Typography, Box, Card, CardContent } from "@mui/material";
|
||||
import { Settings, Info } from "@mui/icons-material";
|
||||
import { TextField, Box } from "@mui/material";
|
||||
import { useGlobalInfoStore } from "../../../context/globalInfo";
|
||||
import { getStoredRecording } from "../../../api/storage";
|
||||
import { WhereWhatPair } from "maxun-core";
|
||||
@@ -16,7 +15,7 @@ interface RobotMeta {
|
||||
pairs: number;
|
||||
updatedAt: string;
|
||||
params: any[];
|
||||
type?: 'extract' | 'scrape';
|
||||
type?: 'extract' | 'scrape' | 'crawl' | 'search';
|
||||
url?: string;
|
||||
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||
isLLM?: boolean;
|
||||
@@ -116,19 +115,11 @@ export const RobotSettingsPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
fetchUserEmail();
|
||||
}, [robot?.userId]);
|
||||
|
||||
const handleCancel = () => {
|
||||
const basePath = location.pathname.includes("/prebuilt-robots")
|
||||
? "/prebuilt-robots"
|
||||
: "/robots";
|
||||
navigate(basePath);
|
||||
};
|
||||
|
||||
const targetUrl = getTargetUrl();
|
||||
|
||||
return (
|
||||
<RobotConfigPage
|
||||
title={t("robot_settings.title")}
|
||||
onCancel={handleCancel}
|
||||
cancelButtonText={t("robot_settings.buttons.close")}
|
||||
showSaveButton={false}
|
||||
showCancelButton={false}
|
||||
@@ -137,15 +128,17 @@ export const RobotSettingsPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
<Box style={{ display: "flex", flexDirection: "column" }}>
|
||||
{robot && (
|
||||
<>
|
||||
<TextField
|
||||
label={t("robot_settings.target_url")}
|
||||
key="Robot Target URL"
|
||||
value={targetUrl}
|
||||
InputProps={{
|
||||
readOnly: true,
|
||||
}}
|
||||
style={{ marginBottom: "20px" }}
|
||||
/>
|
||||
{robot.recording_meta.type !== 'search' && (
|
||||
<TextField
|
||||
label={t("robot_settings.target_url")}
|
||||
key="Robot Target URL"
|
||||
value={targetUrl}
|
||||
InputProps={{
|
||||
readOnly: true,
|
||||
}}
|
||||
style={{ marginBottom: "20px" }}
|
||||
/>
|
||||
)}
|
||||
<TextField
|
||||
label={t("robot_settings.robot_id")}
|
||||
key="Robot ID"
|
||||
|
||||
Reference in New Issue
Block a user